ParzivalHack · ParzivalHack · May 10, 2026 · May 9, 2026
diff --git a/src/pyspector/_rust_core/src/analysis/ast_analysis.rs b/src/pyspector/_rust_core/src/analysis/ast_analysis.rs
@@ -1,26 +1,39 @@
 use crate::ast_parser::AstNode;
 use crate::issues::Issue;
-use crate::rules::{RuleSet, Rule};
+use crate::rules::{RuleSet, Rule, Defaults};
 
 // Main entry point for AST scanning
 pub fn scan_ast(ast: &AstNode, file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue> {
     let mut issues = Vec::new();
     let ast_rules: Vec<&Rule> = ruleset.rules.iter()
         .filter(|r| r.ast_match.is_some())
         .collect();
-    
+
     if ast_rules.is_empty() { return issues; }
 
-    walk_ast(ast, file_path, content, &ast_rules, &mut issues);
+    walk_ast(ast, file_path, content, &ast_rules, &ruleset.defaults, &mut issues);
     issues
 }
 
 // Recursively walks the AST, checking each node against the rules
-fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], issues: &mut Vec<Issue>) {
+fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], defaults: &Defaults, issues: &mut Vec<Issue>) {
     for rule in rules.iter() {
+        // Respect global defaults + rule-level exclude_file_pattern
+        if rule.is_file_excluded(file_path, defaults) {
+            continue;
+        }
+
         if let Some(match_pattern) = &rule.ast_match {
             if check_node_match(node, match_pattern) {
                 let line_content = content.lines().nth(node.lineno.saturating_sub(1) as usize).unwrap_or("").to_string();
+
+                // Respect exclude_pattern on the matched line
+                if let Some(exclude) = &rule.exclude_pattern {
+                    if exclude.is_match(&line_content) {
+                        continue;
+                    }
+                }
+
                 issues.push(Issue::new(
                     rule.id.clone(),
                     rule.description.clone(),
@@ -38,7 +51,7 @@ fn walk_ast(node: &AstNode, file_path: &str, content: &str, rules: &[&Rule], iss
     // Recurse into children
     for child_list in node.children.values() {
         for child_node in child_list {
-            walk_ast(child_node, file_path, content, rules, issues);
+            walk_ast(child_node, file_path, content, rules, defaults, issues);
         }
     }
 }

diff --git a/src/pyspector/_rust_core/src/analysis/config_analysis.rs b/src/pyspector/_rust_core/src/analysis/config_analysis.rs
@@ -18,6 +18,11 @@ pub fn scan_file(file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue
             }
         }
 
+        // Respect global defaults + rule-level exclude_file_pattern
+        if rule.is_file_excluded(file_path, &ruleset.defaults) {
+            continue;
+        }
+
         // Regex pattern matching with comment/string filtering
         if let Some(pattern) = &rule.pattern {
             for (i, line) in lines.iter().enumerate() {
@@ -27,6 +32,12 @@ pub fn scan_file(file_path: &str, content: &str, ruleset: &RuleSet) -> Vec<Issue
                 }
 
                 if pattern.is_match(line) {
+                    // Skip if the line also matches the exclude pattern
+                    if let Some(exclude) = &rule.exclude_pattern {
+                        if exclude.is_match(line) {
+                            continue;
+                        }
+                    }
                     issues.push(Issue::new(
                         rule.id.clone(),
                         rule.description.clone(),

diff --git a/src/pyspector/_rust_core/src/analysis/mod.rs b/src/pyspector/_rust_core/src/analysis/mod.rs
@@ -19,7 +19,18 @@ pub struct AnalysisContext<'a> {
     pub py_files: &'a [PythonFile],
 }
 
-pub fn run_analysis(context: AnalysisContext) -> Vec<Issue> {
+pub fn run_analysis(mut context: AnalysisContext) -> Vec<Issue> {
+    // Apply disabled_rule_ids from [defaults] before scanning
+    if !context.ruleset.defaults.disabled_rule_ids.is_empty() {
+        let disabled: std::collections::HashSet<&str> = context.ruleset.defaults
+            .disabled_rule_ids.iter().map(|s| s.as_str()).collect();
+        let before = context.ruleset.rules.len();
+        context.ruleset.rules.retain(|r| !disabled.contains(r.id.as_str()));
+        let removed = before - context.ruleset.rules.len();
+        if removed > 0 {
+            println!("[*] Disabled {} rules via [defaults].disabled_rule_ids", removed);
+        }
+    }
     println!("[*] Starting analysis with {} rules", context.ruleset.rules.len());
 
     let root_path = Path::new(&context.root_path);

diff --git a/src/pyspector/_rust_core/src/rules.rs b/src/pyspector/_rust_core/src/rules.rs
@@ -2,6 +2,20 @@ use serde::Deserialize;
 use crate::issues::Severity;
 use regex::Regex;
 
+/// Global defaults inherited by every rule unless the rule overrides them.
+#[derive(Debug, Deserialize, Default, Clone)]
+pub struct Defaults {
+    /// File-path glob patterns excluded from ALL rules (e.g. "*tests*", "*/fixtures/*").
+    /// Rules may add their own exclude_file_pattern on top of these.
+    #[serde(default)]
+    pub exclude_file_patterns: Vec<String>,
+    /// Rule IDs that are completely disabled (produce too much noise for this codebase).
+    /// Disabling here is equivalent to deleting the rule but without touching the rule
+    /// definitions — making it easy to re-enable or override per project.
+    #[serde(default)]
+    pub disabled_rule_ids: Vec<String>,
+}
+
 #[derive(Debug, Deserialize, Clone)]
 pub struct Rule {
     pub id: String,
@@ -13,10 +27,35 @@ pub struct Rule {
     pub remediation: String,
     #[serde(with = "serde_regex", default)]
     pub pattern: Option<Regex>,
+    #[serde(with = "serde_regex", default)]
+    pub exclude_pattern: Option<Regex>,
     #[serde(default)]
     pub ast_match: Option<String>,
     #[serde(default)]
     pub file_pattern: Option<String>,
+    /// Rule-level glob to exclude specific files (stacks on top of [defaults]).
+    #[serde(default)]
+    pub exclude_file_pattern: Option<String>,
+}
+
+impl Rule {
+    /// Returns true if `file_path` is excluded by this rule's own exclude_file_pattern
+    /// OR by the global defaults.
+    pub fn is_file_excluded(&self, file_path: &str, defaults: &Defaults) -> bool {
+        // Check global default exclusions first
+        for pattern in &defaults.exclude_file_patterns {
+            if wildmatch::WildMatch::new(pattern).matches(file_path) {
+                return true;
+            }
+        }
+        // Then rule-level exclusion
+        if let Some(efp) = &self.exclude_file_pattern {
+            if wildmatch::WildMatch::new(efp).matches(file_path) {
+                return true;
+            }
+        }
+        false
+    }
 }
 
 fn default_confidence() -> String { "Medium".to_string() }
@@ -47,6 +86,9 @@ pub struct TaintSanitizerRule {
 
 #[derive(Debug, Deserialize)]
 pub struct RuleSet {
+    /// Global defaults inherited by every rule.
+    #[serde(default)]
+    pub defaults: Defaults,
     #[serde(default, rename = "rule")]
     pub rules: Vec<Rule>,
     #[serde(default, rename = "taint_source")]

diff --git a/src/pyspector/rules/built-in-rules.toml b/src/pyspector/rules/built-in-rules.toml
@@ -1,5 +1,55 @@
 # PySpector Built-in Security Rules
 
+# -------------------------------------------
+# SECTION: Global Defaults (inherited by every rule)
+# -------------------------------------------
+[defaults]
+# File-path globs excluded from ALL rules unless a rule opts out.
+# Add paths here instead of repeating exclude_file_pattern on each rule.
+exclude_file_patterns = [
+  "*tests*",       # test directories and test_*.py / *_test.py files
+  "*fixtures*",    # fixture data
+  "*testdata*",    # test data
+  "*conftest*",    # pytest configuration
+]
+
+# Rules disabled globally because they produce 100% false positives by flagging
+# every use of a Python built-in function (len, isinstance, super, str, etc.).
+# These rules have no security value on their own without taint analysis.
+# Re-enable any of these per-project by removing the ID from this list.
+disabled_rule_ids = [
+  # Python built-in functions — not security sinks without taint context
+  "ABS1089", "ALL1107", "ANY1104", "BOOL1035", "BYTEARRAY1008", "BYTES1005",
+  "CALLABLE1131", "CAPITALIZE954", "CASEFOLD918", "CHR1017", "CLASSMETHOD1125",
+  "COUNT909", "DECODE882", "DICT1050", "DIR849", "DIVMOD1098",
+  "ENCODE885", "ENDSWITH900", "ENUMERATE1059", "FILTER1068", "FIND903",
+  "FLOAT1029", "FROZENSET1053", "HASH1137", "HEX1020", "ID1134",
+  "INDEX906", "INT1038", "ISALPHA972", "ISASCII975", "ISDIGIT981",
+  "ISIDENTIFIER984", "ISINSTANCE855", "ISPRINTABLE993", "ISSPACE996",
+  "ISUPPER1002", "ITER1110", "JOIN876", "LEN1101", "LIST1041",
+  "LJUST930", "LOWER888", "LSTRIP957", "MAP1065", "MAX1083",
+  "MEMORYVIEW1011", "MIN1086", "NEXT1113", "ORD1014", "PARTITION936",
+  "PRINT1146", "PROPERTY1119", "RANGE1056", "REDUCE1071", "REMOVEPREFIX963",
+  "REMOVESUFFIX966", "REPLACE879", "REPR858", "REVERSED1077", "RJUST933",
+  "ROUND1092", "RPARTITION939", "RSPLIT942", "RSTRIP960", "SET1047",
+  "SLICE1116", "SORTED1074", "SPLIT873", "SPLITLINES945", "STARTSWITH897",
+  "STATICMETHOD1122", "STR861", "STRIP894", "SUM1080", "SUPER1128",
+  "TITLE951", "TRANSLATE912", "TUPLE1044", "TYPE852", "UPPER891",
+  "VARS840", "ZIP1062",
+  # Medium-noise rules: too broad without taint analysis
+  "FSTRING867",    # every f-string is NOT an injection risk
+  "GETATTR828",    # every getattr() is NOT unsafe
+  "SETATTR831",    # every setattr() is NOT unsafe
+  "HASATTR837",    # every hasattr() is NOT a disclosure risk
+  "DELATTR834",    # every delattr() is NOT unsafe
+  "FORMAT864",     # every .format() is NOT an injection risk
+  "DJG513",        # csrf_exempt covered by CSRF747 already
+  "MIME786",       # HttpResponse with content_type is not a vulnerability
+  "BRUTE765",      # login_required is not "missing brute force protection"
+  "INFO738",       # traceback.print_exc is not information disclosure by itself
+  "SER522",        # serializers.serialize() is not inherently unsafe
+]
+
 # -------------------------------------------
 # SECTION: Taint Analysis Rules
 # -------------------------------------------
@@ -90,6 +140,8 @@ severity = "High"
 remediation = "Use 'yaml.safe_load()' instead of 'yaml.load()'."
 ast_match = "Call(func.value.id=yaml, func.attr=load)"
 file_pattern = "*.py"
+# Do not flag when SafeLoader or BaseLoader is explicitly passed
+exclude_pattern = "Loader\\s*=\\s*(yaml\\.)?(Safe|Base)Loader"
 
 # -------------------------------------------
 # SECTION: Cryptographic Failures (OWASP A02:2021)
@@ -163,6 +215,8 @@ severity = "High"
 remediation = "Always use 'yaml.safe_load()' to prevent arbitrary code execution from malicious YAML."
 pattern = "^\\s*[^#]*yaml\\.load" # This regex ignores comment lines
 file_pattern = "*.py"
+# Do not flag when SafeLoader or safe_load is used
+exclude_pattern = "Loader\\s*=\\s*(yaml\\.)?(Safe|Base)Loader|yaml\\.safe_load"
 
 [[rule]]
 id = "PY303"
@@ -434,8 +488,9 @@ file_pattern = "*.ini"
 [[rule]]
 id = "PY511"
 description = "JSON deserialization without validation."
-severity = "High"
-remediation = "Validate JSON data before processing and implement schema validation."
+severity = "Low"
+confidence = "Low"
+remediation = "json.loads() is safe from code execution. Only flag if the result feeds into eval/exec/pickle."
 ast_match = "Call(func.value.id=json, func.attr=loads)"
 file_pattern = "*.py"
 
@@ -470,6 +525,8 @@ severity = "High"
 remediation = "Dynamic code compilation can be dangerous. Validate all inputs and consider static alternatives."
 ast_match = "Call(func.attr=compile)"
 file_pattern = "*.py"
+# re.compile() and sql compiler.compile() are not Python code execution
+exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"
 
 [[rule]]
 id = "DOM516"
@@ -634,9 +691,9 @@ file_pattern = "*.conf"
 [[rule]]
 id = "JSON612"
 description = "JSON parsing without input validation."
-severity = "High"
-confidence = "Medium"
-remediation = "Implement JSON schema validation and sanitize input data before parsing."
+severity = "Low"
+confidence = "Low"
+remediation = "json.loads() is safe from code execution. Only flag if result feeds into eval/exec/pickle."
 ast_match = "Call(func.value.id=json, func.attr=loads)"
 file_pattern = "*.py"
 
@@ -684,6 +741,7 @@ confidence = "Medium"
 remediation = "Avoid compile() function with untrusted input. Use static code analysis instead."
 ast_match = "Call(func.attr=compile)"
 file_pattern = "*.py"
+exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"
 
 [[rule]]
 id = "PERM650"
@@ -729,6 +787,7 @@ confidence = "Medium"
 remediation = "Avoid dynamic code compilation. Consider static analysis or predefined code patterns."
 ast_match = "Call(func.attr=compile)"
 file_pattern = "*.py"
+exclude_pattern = "re\\.compile|regex\\.compile|compiler\\.compile|self\\.compile"
 
 [[rule]]
 id = "SHELL675"
@@ -916,7 +975,8 @@ description = "Session fixation vulnerability in session handling."
 severity = "High"
 confidence = "Medium"
 remediation = "Regenerate session IDs after authentication to prevent fixation attacks."
-pattern = "session\\[.*\\]\\s*=.*request\\."
+# Writing data to a session is NOT session fixation. Only flag direct session key assignment from request.
+pattern = "session\\.session_key\\s*=.*request\\."
 file_pattern = "*.py"
 
 [[rule]]