From 0b4f464223b1a26db1555342c1859d3d2fb891c9 Mon Sep 17 00:00:00 2001 From: asadbekXodjayev Date: Sat, 13 Jun 2026 17:19:38 +0500 Subject: [PATCH] Add P9: bidirectional control character (Trojan Source) detection The static prompt-injection analyzer now flags Unicode bidirectional control characters (U+202A-U+202E, U+2066-U+2069, U+061C) in file contents. These enable Trojan Source attacks (CVE-2021-42574 / CVE-2021-42694) where source code or text renders differently than it executes, so a reviewer can approve logic the agent/interpreter does not actually run. This was a real gap: P2 only scans markdown/other for zero-width characters, and the bidi check in mcp_tool_poisoning inspects only skill *metadata* fields (and omits U+202A/U+202B/U+061C). A bidi-reordered helper.py was undetected by any analyzer. P9 scans every file type, including source code. - Add P9_PATTERNS + detection loop to static_patterns_prompt_injection.analyze() - Register P9 explanation/category/name/remediation in pattern_defaults - Tests: RLO override in .py, RLI isolate in SKILL.md body, and a no-false-positive case on legitimate Arabic (RTL) text - README: pattern count 64 -> 65; Prompt Injection table adds P9 Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: asadbekXodjayev --- README.md | 7 +-- .../nodes/analyzers/pattern_defaults.py | 4 ++ .../static_patterns_prompt_injection.py | 34 +++++++++++++- tests/nodes/analyzers/test_static_patterns.py | 45 +++++++++++++++++++ 4 files changed, 85 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4ef9c6c..30f02ce 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ SkillSpector helps you answer: **"Is this skill safe to install?"** ## Features - **Multi-format input**: Scan Git repos, URLs, zip files, directories, or single files -- **64 vulnerability patterns** across 16 categories: prompt injection, data exfiltration, privilege escalation, supply chain, excessive agency, output handling, system prompt leakage, memory poisoning, tool misuse, rogue agent, trigger abuse, dangerous code (AST), taint tracking, YARA signatures, MCP least privilege, and MCP tool poisoning +- **65 vulnerability patterns** across 16 categories: prompt injection, data exfiltration, privilege escalation, supply chain, excessive agency, output handling, system prompt leakage, memory poisoning, tool misuse, rogue agent, trigger abuse, dangerous code (AST), taint tracking, YARA signatures, MCP least privilege, and MCP tool poisoning - **Two-stage analysis**: Fast static analysis + optional LLM semantic evaluation - **Live vulnerability lookups**: SC4 queries [OSV.dev](https://osv.dev) for real-time CVE data with automatic offline fallback - **Multiple output formats**: Terminal, JSON, Markdown, and SARIF reports @@ -125,9 +125,9 @@ skillspector scan ./my-skill/ --no-llm ## Vulnerability Patterns -SkillSpector detects **64 vulnerability patterns** across 16 categories: +SkillSpector detects **65 vulnerability patterns** across 16 categories: -### Prompt Injection (5 patterns) +### Prompt Injection (6 patterns) | ID | Pattern | Severity | Description | |----|---------|----------|-------------| @@ -136,6 +136,7 @@ SkillSpector detects **64 vulnerability patterns** across 16 categories: | P3 | Exfiltration Commands | HIGH | Instructions to transmit context externally | | P4 | Behavior Manipulation | MEDIUM | Subtle instructions altering agent decisions | | P5 | Harmful Content | CRITICAL | Instructions that could cause physical harm | +| P9 | Bidirectional Control Characters | HIGH | Trojan Source (CVE-2021-42574) bidi reordering in file contents | ### Data Exfiltration (4 patterns) diff --git a/src/skillspector/nodes/analyzers/pattern_defaults.py b/src/skillspector/nodes/analyzers/pattern_defaults.py index 0d32e17..c5b61b2 100644 --- a/src/skillspector/nodes/analyzers/pattern_defaults.py +++ b/src/skillspector/nodes/analyzers/pattern_defaults.py @@ -47,6 +47,7 @@ class PatternCategory(StrEnum): "P3": "Instructions found that direct the agent to transmit conversation context or user data to external services.", "P4": "Subtle instructions detected that may alter agent decision-making or introduce hidden biases.", "P5": "This content may contain harmful instructions that could cause physical harm if followed. CRITICAL: Review carefully before use.", + "P9": "Bidirectional Unicode control characters were found in file contents. These invisible characters (e.g. U+202E RLO, U+202D LRO) can reorder how source code or text is displayed without changing what is executed, enabling 'Trojan Source' attacks (CVE-2021-42574) where a human reviewer sees different logic than the agent or interpreter runs.", "E1": "Data is being sent to an external URL. This could be legitimate telemetry or data exfiltration. Manual review is recommended.", "E2": "Code accesses environment variables that may contain secrets (API keys, tokens). This is a common pattern for credential theft.", "E3": "Code scans file system directories looking for sensitive files. This could be reconnaissance for credential theft.", @@ -128,6 +129,7 @@ class PatternCategory(StrEnum): "P3": PatternCategory.PROMPT_INJECTION.value, "P4": PatternCategory.PROMPT_INJECTION.value, "P5": PatternCategory.PROMPT_INJECTION.value, + "P9": PatternCategory.PROMPT_INJECTION.value, "P6": PatternCategory.SYSTEM_PROMPT_LEAKAGE.value, "P7": PatternCategory.SYSTEM_PROMPT_LEAKAGE.value, "P8": PatternCategory.SYSTEM_PROMPT_LEAKAGE.value, @@ -191,6 +193,7 @@ class PatternCategory(StrEnum): "P3": "External Transmission Instructions", "P4": "Subtle Steering", "P5": "Harmful Content", + "P9": "Bidirectional Control Characters", "P6": "System Prompt Leakage", "P7": "System Prompt Leakage", "P8": "System Prompt Leakage", @@ -254,6 +257,7 @@ class PatternCategory(StrEnum): "P3": "Remove instructions that send user data, prompts, or context to external URLs. If telemetry is needed, use documented, privacy-preserving methods.", "P4": "Review content for implicit steering or bias. Ensure instructions are explicit and align with the skill's stated purpose.", "P5": "Remove all content that could lead to harmful outcomes. Add safety guardrails and human oversight for any high-risk operations.", + "P9": "Remove all bidirectional Unicode control characters (U+202A–U+202E, U+2066–U+2069, U+061C) from skill files. If right-to-left text is genuinely required, rely on the inherent directionality of the script rather than explicit override/isolate controls, and review the file with a Unicode-aware diff.", "E1": "Verify the destination URL is trusted and necessary. Remove or replace with documented APIs. Ensure no secrets, tokens, or PII are transmitted.", "E2": "Avoid reading sensitive env vars (API keys, tokens) unless strictly required. Use secrets managers or secure config. Never log or transmit credentials.", "E3": "Remove unnecessary filesystem scanning. If file access is needed, use explicit, scoped paths. Avoid reading ~/.ssh, ~/.aws, or credential directories.", diff --git a/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py b/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py index c062ee6..bf50a2d 100644 --- a/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py +++ b/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Static patterns: prompt injection (P1–P4). Node and analyze() in one module.""" +"""Static patterns: prompt injection (P1–P4, P9). Node and analyze() in one module.""" from __future__ import annotations @@ -114,10 +114,24 @@ 0.75, ), ] +# P9: Bidirectional Control Characters (Trojan Source — CVE-2021-42574 / CVE-2021-42694). +# These invisible Unicode controls reorder how source code or text is *displayed* without +# changing what is *executed*, so a human reviewer can see different logic than the +# interpreter runs. Unlike P2 (zero-width chars, markdown only), P9 scans every file type, +# including source code where Trojan Source attacks hide. The left-to-right / right-to-left +# overrides (LRO/RLO) are the core attack primitive; embeddings and isolates are weaker signals. +P9_OVERRIDE_PATTERN = r"[\u202d\u202e]" # LRO (U+202D), RLO (U+202E) +P9_EMBED_PATTERN = ( + r"[\u202a\u202b\u202c\u2066\u2067\u2068\u2069\u061c]" # LRE/RLE/PDF, isolates, ALM +) +P9_PATTERNS = [ + (P9_OVERRIDE_PATTERN, 0.9), + (P9_EMBED_PATTERN, 0.7), +] def analyze(content: str, file_path: str, file_type: str) -> list[AnalyzerFinding]: - """Analyze content for prompt injection patterns (P1–P4).""" + """Analyze content for prompt injection patterns (P1–P4, P9).""" findings: list[AnalyzerFinding] = [] def loc(ln: int) -> Location: @@ -189,6 +203,22 @@ def ctx(start: int) -> str: matched_text=match.group(0)[:200], ) ) + # P9 runs on every file type — Trojan Source attacks hide in source code, not just prose. + for pattern, confidence in P9_PATTERNS: + for match in re.finditer(pattern, content): + line_num = get_line_number(content, match.start()) + findings.append( + AnalyzerFinding( + rule_id="P9", + message="Bidirectional Control Characters", + severity=Severity.HIGH, + location=loc(line_num), + confidence=confidence, + tags=tag, + context=ctx(match.start()), + matched_text=f"U+{ord(match.group(0)):04X}", + ) + ) return findings diff --git a/tests/nodes/analyzers/test_static_patterns.py b/tests/nodes/analyzers/test_static_patterns.py index fbcac38..643f2b5 100644 --- a/tests/nodes/analyzers/test_static_patterns.py +++ b/tests/nodes/analyzers/test_static_patterns.py @@ -172,3 +172,48 @@ def test_empty_components_returns_empty(self): state = {"components": [], "file_cache": {}} findings = static_runner.run_static_patterns(state, [prompt_injection_module]) assert findings == [] + + +class TestRunStaticPatternsTrojanSource: + """run_static_patterns with prompt_injection P9: bidirectional control chars (Trojan Source).""" + + def test_p9_rlo_override_in_source_code_produces_finding(self): + """An RLO override (U+202E) inside a .py file yields P9, HIGH severity. + + This is the Trojan Source gap (CVE-2021-42574): P2 only scans markdown, so + bidi-reordered source code was previously undetected by any analyzer. + """ + state = { + "components": ["helper.py"], + "file_cache": { + "helper.py": "access_level = 'user' # \u202eadmin not\u202c\n", + }, + } + findings = static_runner.run_static_patterns(state, [prompt_injection_module]) + p9 = [f for f in findings if f.rule_id == "P9"] + assert len(p9) >= 1 + assert p9[0].severity == "HIGH" + assert p9[0].file == "helper.py" + assert p9[0].start_line >= 1 + assert p9[0].remediation is not None + assert p9[0].matched_text == "U+202E" + + def test_p9_isolate_in_markdown_produces_finding(self): + """A right-to-left isolate (U+2067) in SKILL.md body yields P9.""" + state = { + "components": ["SKILL.md"], + "file_cache": {"SKILL.md": "# Title\n\nNormal text \u2067hidden\u2069 here.\n"}, + } + findings = static_runner.run_static_patterns(state, [prompt_injection_module]) + assert any(f.rule_id == "P9" for f in findings) + + def test_safe_content_with_legitimate_rtl_no_p9(self): + """Legitimate RTL letters carry inherent directionality and must NOT trigger P9.""" + state = { + "components": ["notes.md"], + "file_cache": { + "notes.md": "# Notes\n\nمرحبا بالعالم\n\nHello world.\n", + }, + } + findings = static_runner.run_static_patterns(state, [prompt_injection_module]) + assert not any(f.rule_id == "P9" for f in findings)