Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions src/pyspector/_rust_core/src/analysis/ast_analysis.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,39 @@
use crate::ast_parser::AstNode;
use crate::issues::Issue;
use crate::rules::{RuleSet, Rule};
use crate::rules::{RuleSet, Rule, Defaults};

// Main entry point for AST scanning
pub fn scan_ast(ast: &AstNode, file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue> {
let mut issues = Vec::new();
let ast_rules: Vec<&Rule> = ruleset.rules.iter()
.filter(|r| r.ast_match.is_some())
.collect();

if ast_rules.is_empty() { return issues; }

walk_ast(ast, file_path, content, &ast_rules, &mut issues);
walk_ast(ast, file_path, content, &ast_rules, &ruleset.defaults, &mut issues);
issues
}

// Recursively walks the AST, checking each node against the rules
fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], issues: &mut Vec<Issue>) {
fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], defaults: &Defaults, issues: &mut Vec<Issue>) {
for rule in rules.iter() {
// Respect global defaults + rule-level exclude_file_pattern
if rule.is_file_excluded(file_path, defaults) {
continue;
}

if let Some(match_pattern) = &rule.ast_match {
if check_node_match(node, match_pattern) {
let line_content = content.lines().nth(node.lineno.saturating_sub(1) as usize).unwrap_or("").to_string();

// Respect exclude_pattern on the matched line
if let Some(exclude) = &rule.exclude_pattern {
if exclude.is_match(&line_content) {
continue;
}
}

issues.push(Issue::new(
rule.id.clone(),
rule.description.clone(),
Expand All @@ -38,7 +51,7 @@ fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], iss
// Recurse into children
for child_list in node.children.values() {
for child_node in child_list {
walk_ast(child_node, file_path, content, rules, issues);
walk_ast(child_node, file_path, content, rules, defaults, issues);
}
}
}
Expand Down
11 changes: 11 additions & 0 deletions src/pyspector/_rust_core/src/analysis/config_analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ pub fn scan_file(file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue
}
}

// Respect global defaults + rule-level exclude_file_pattern
if rule.is_file_excluded(file_path, &ruleset.defaults) {
continue;
}

// Regex pattern matching with comment/string filtering
if let Some(pattern) = &rule.pattern {
for (i, line) in lines.iter().enumerate() {
Expand All @@ -27,6 +32,12 @@ pub fn scan_file(file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue
}

if pattern.is_match(line) {
// Skip if the line also matches the exclude pattern
if let Some(exclude) = &rule.exclude_pattern {
if exclude.is_match(line) {
continue;
}
}
issues.push(Issue::new(
rule.id.clone(),
rule.description.clone(),
Expand Down
13 changes: 12 additions & 1 deletion src/pyspector/_rust_core/src/analysis/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,18 @@ pub struct AnalysisContext<'a> {
pub py_files: &'a [PythonFile],
}

pub fn run_analysis(context: AnalysisContext) -> Vec<Issue> {
pub fn run_analysis(mut context: AnalysisContext) -> Vec<Issue> {
// Apply disabled_rule_ids from [defaults] before scanning
if !context.ruleset.defaults.disabled_rule_ids.is_empty() {
let disabled: std::collections::HashSet<&str> = context.ruleset.defaults
.disabled_rule_ids.iter().map(|s| s.as_str()).collect();
let before = context.ruleset.rules.len();
context.ruleset.rules.retain(|r| !disabled.contains(r.id.as_str()));
let removed = before - context.ruleset.rules.len();
if removed > 0 {
println!("[*] Disabled {} rules via [defaults].disabled_rule_ids", removed);
}
}
println!("[*] Starting analysis with {} rules", context.ruleset.rules.len());

let root_path = Path::new(&context.root_path);
Expand Down
42 changes: 42 additions & 0 deletions src/pyspector/_rust_core/src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@ use serde::Deserialize;
use crate::issues::Severity;
use regex::Regex;

/// Global defaults inherited by every rule unless the rule overrides them.
#[derive(Debug, Deserialize, Default, Clone)]
pub struct Defaults {
/// File-path glob patterns excluded from ALL rules (e.g. "*tests*", "*/fixtures/*").
/// Rules may add their own exclude_file_pattern on top of these.
#[serde(default)]
pub exclude_file_patterns: Vec<String>,
/// Rule IDs that are completely disabled (produce too much noise for this codebase).
/// Disabling here is equivalent to deleting the rule but without touching the rule
/// definitions — making it easy to re-enable or override per project.
#[serde(default)]
pub disabled_rule_ids: Vec<String>,
}

#[derive(Debug, Deserialize, Clone)]
pub struct Rule {
pub id: String,
Expand All @@ -13,10 +27,35 @@ pub struct Rule {
pub remediation: String,
#[serde(with = "serde_regex", default)]
pub pattern: Option<Regex>,
#[serde(with = "serde_regex", default)]
pub exclude_pattern: Option<Regex>,
#[serde(default)]
pub ast_match: Option<String>,
#[serde(default)]
pub file_pattern: Option<String>,
/// Rule-level glob to exclude specific files (stacks on top of [defaults]).
#[serde(default)]
pub exclude_file_pattern: Option<String>,
}

impl Rule {
/// Returns true if `file_path` is excluded by this rule's own exclude_file_pattern
/// OR by the global defaults.
pub fn is_file_excluded(&self, file_path: &str, defaults: &Defaults) -> bool {
// Check global default exclusions first
for pattern in &defaults.exclude_file_patterns {
if wildmatch::WildMatch::new(pattern).matches(file_path) {
return true;
}
}
// Then rule-level exclusion
if let Some(efp) = &self.exclude_file_pattern {
if wildmatch::WildMatch::new(efp).matches(file_path) {
return true;
}
}
false
}
}

fn default_confidence() -> String { "Medium".to_string() }
Expand Down Expand Up @@ -47,6 +86,9 @@ pub struct TaintSanitizerRule {

#[derive(Debug, Deserialize)]
pub struct RuleSet {
/// Global defaults inherited by every rule.
#[serde(default)]
pub defaults: Defaults,
#[serde(default, rename = "rule")]
pub rules: Vec<Rule>,
#[serde(default, rename = "taint_source")]
Expand Down
72 changes: 66 additions & 6 deletions src/pyspector/rules/built-in-rules.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,55 @@
# PySpector Built-in Security Rules

# -------------------------------------------
# SECTION: Global Defaults (inherited by every rule)
# -------------------------------------------
[defaults]
# File-path globs excluded from ALL rules unless a rule opts out.
# Add paths here instead of repeating exclude_file_pattern on each rule.
exclude_file_patterns = [
"*tests*", # test directories and test_*.py / *_test.py files
"*fixtures*", # fixture data
"*testdata*", # test data
"*conftest*", # pytest configuration
]

# Rules disabled globally because they produce 100% false positives by flagging
# every use of a Python built-in function (len, isinstance, super, str, etc.).
# These rules have no security value on their own without taint analysis.
# Re-enable any of these per-project by removing the ID from this list.
disabled_rule_ids = [
# Python built-in functions — not security sinks without taint context
"ABS1089", "ALL1107", "ANY1104", "BOOL1035", "BYTEARRAY1008", "BYTES1005",
"CALLABLE1131", "CAPITALIZE954", "CASEFOLD918", "CHR1017", "CLASSMETHOD1125",
"COUNT909", "DECODE882", "DICT1050", "DIR849", "DIVMOD1098",
"ENCODE885", "ENDSWITH900", "ENUMERATE1059", "FILTER1068", "FIND903",
"FLOAT1029", "FROZENSET1053", "HASH1137", "HEX1020", "ID1134",
"INDEX906", "INT1038", "ISALPHA972", "ISASCII975", "ISDIGIT981",
"ISIDENTIFIER984", "ISINSTANCE855", "ISPRINTABLE993", "ISSPACE996",
"ISUPPER1002", "ITER1110", "JOIN876", "LEN1101", "LIST1041",
"LJUST930", "LOWER888", "LSTRIP957", "MAP1065", "MAX1083",
"MEMORYVIEW1011", "MIN1086", "NEXT1113", "ORD1014", "PARTITION936",
"PRINT1146", "PROPERTY1119", "RANGE1056", "REDUCE1071", "REMOVEPREFIX963",
"REMOVESUFFIX966", "REPLACE879", "REPR858", "REVERSED1077", "RJUST933",
"ROUND1092", "RPARTITION939", "RSPLIT942", "RSTRIP960", "SET1047",
"SLICE1116", "SORTED1074", "SPLIT873", "SPLITLINES945", "STARTSWITH897",
"STATICMETHOD1122", "STR861", "STRIP894", "SUM1080", "SUPER1128",
"TITLE951", "TRANSLATE912", "TUPLE1044", "TYPE852", "UPPER891",
"VARS840", "ZIP1062",
# Medium-noise rules: too broad without taint analysis
"FSTRING867", # every f-string is NOT an injection risk
"GETATTR828", # every getattr() is NOT unsafe
"SETATTR831", # every setattr() is NOT unsafe
"HASATTR837", # every hasattr() is NOT a disclosure risk
"DELATTR834", # every delattr() is NOT unsafe
"FORMAT864", # every .format() is NOT an injection risk
"DJG513", # csrf_exempt covered by CSRF747 already
"MIME786", # HttpResponse with content_type is not a vulnerability
"BRUTE765", # login_required is not "missing brute force protection"
"INFO738", # traceback.print_exc is not information disclosure by itself
"SER522", # serializers.serialize() is not inherently unsafe
]

# -------------------------------------------
# SECTION: Taint Analysis Rules
# -------------------------------------------
Expand Down Expand Up @@ -90,6 +140,8 @@ severity = "High"
remediation = "Use 'yaml.safe_load()' instead of 'yaml.load()'."
ast_match = "Call(func.value.id=yaml, func.attr=load)"
file_pattern = "*.py"
# Do not flag when SafeLoader or BaseLoader is explicitly passed
exclude_pattern = "Loader\\s*=\\s*(yaml\\.)?(Safe|Base)Loader"

# -------------------------------------------
# SECTION: Cryptographic Failures (OWASP A02:2021)
Expand Down Expand Up @@ -163,6 +215,8 @@ severity = "High"
remediation = "Always use 'yaml.safe_load()' to prevent arbitrary code execution from malicious YAML."
pattern = "^\\s*[^#]*yaml\\.load" # This regex ignores comment lines
file_pattern = "*.py"
# Do not flag when SafeLoader or safe_load is used
exclude_pattern = "Loader\\s*=\\s*(yaml\\.)?(Safe|Base)Loader|yaml\\.safe_load"

[[rule]]
id = "PY303"
Expand Down Expand Up @@ -434,8 +488,9 @@ file_pattern = "*.ini"
[[rule]]
id = "PY511"
description = "JSON deserialization without validation."
severity = "High"
remediation = "Validate JSON data before processing and implement schema validation."
severity = "Low"
confidence = "Low"
remediation = "json.loads() is safe from code execution. Only flag if the result feeds into eval/exec/pickle."
ast_match = "Call(func.value.id=json, func.attr=loads)"
file_pattern = "*.py"

Expand Down Expand Up @@ -470,6 +525,8 @@ severity = "High"
remediation = "Dynamic code compilation can be dangerous. Validate all inputs and consider static alternatives."
ast_match = "Call(func.attr=compile)"
file_pattern = "*.py"
# re.compile() and sql compiler.compile() are not Python code execution
exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"

[[rule]]
id = "DOM516"
Expand Down Expand Up @@ -634,9 +691,9 @@ file_pattern = "*.conf"
[[rule]]
id = "JSON612"
description = "JSON parsing without input validation."
severity = "High"
confidence = "Medium"
remediation = "Implement JSON schema validation and sanitize input data before parsing."
severity = "Low"
confidence = "Low"
remediation = "json.loads() is safe from code execution. Only flag if result feeds into eval/exec/pickle."
ast_match = "Call(func.value.id=json, func.attr=loads)"
file_pattern = "*.py"

Expand Down Expand Up @@ -684,6 +741,7 @@ confidence = "Medium"
remediation = "Avoid compile() function with untrusted input. Use static code analysis instead."
ast_match = "Call(func.attr=compile)"
file_pattern = "*.py"
exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"

[[rule]]
id = "PERM650"
Expand Down Expand Up @@ -729,6 +787,7 @@ confidence = "Medium"
remediation = "Avoid dynamic code compilation. Consider static analysis or predefined code patterns."
ast_match = "Call(func.attr=compile)"
file_pattern = "*.py"
exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"

[[rule]]
id = "SHELL675"
Expand Down Expand Up @@ -916,7 +975,8 @@ description = "Session fixation vulnerability in session handling."
severity = "High"
confidence = "Medium"
remediation = "Regenerate session IDs after authentication to prevent fixation attacks."
pattern = "session\\[.*\\]\\s*=.*request\\."
# Writing data to a session is NOT session fixation. Only flag direct session key assignment from request.
pattern = "session\\.session_key\\s*=.*request\\."
file_pattern = "*.py"

[[rule]]
Expand Down
Loading
Loading