diff --git a/src/skillspector/nodes/analyzers/static_patterns_data_exfiltration.py b/src/skillspector/nodes/analyzers/static_patterns_data_exfiltration.py index 0f0fa81..0a548af 100644 --- a/src/skillspector/nodes/analyzers/static_patterns_data_exfiltration.py +++ b/src/skillspector/nodes/analyzers/static_patterns_data_exfiltration.py @@ -25,7 +25,7 @@ from skillspector.state import AnalyzerNodeResponse, SkillspectorState from . import static_runner -from .common import get_context, get_line_number, is_code_example +from .common import get_context, get_line_number from .pattern_defaults import PatternCategory logger = get_logger(__name__) @@ -196,13 +196,9 @@ def ctx(start: int) -> str: matched_text=match.group(0)[:200], ) ) - # E5: cloud-storage exfiltration. Filtered through is_code_example() because - # upload calls commonly appear in SKILL.md docs and examples. + # E5: cloud-storage exfiltration. Example filtering is delegated to the runner. for pattern, confidence in E5_PATTERNS: for match in re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE): - context = ctx(match.start()) - if is_code_example(context): - continue line_num = get_line_number(content, match.start()) findings.append( AnalyzerFinding( @@ -212,7 +208,7 @@ def ctx(start: int) -> str: location=loc(line_num), confidence=confidence, tags=tag, - context=context, + context=ctx(match.start()), matched_text=match.group(0)[:200], ) ) diff --git a/src/skillspector/nodes/analyzers/static_patterns_tool_misuse.py b/src/skillspector/nodes/analyzers/static_patterns_tool_misuse.py index c588450..b71b97e 100644 --- a/src/skillspector/nodes/analyzers/static_patterns_tool_misuse.py +++ b/src/skillspector/nodes/analyzers/static_patterns_tool_misuse.py @@ -32,7 +32,7 @@ from skillspector.state import AnalyzerNodeResponse, SkillspectorState from . import static_runner -from .common import get_context, get_line_number, is_code_example +from .common import get_context, get_line_number from .pattern_defaults import PatternCategory logger = get_logger(__name__) @@ -280,13 +280,9 @@ def ctx(start: int) -> str: matched_text=match.group(0)[:200], ) ) - # TM4: privileged K8s workload. Filtered through is_code_example() because - # privileged/hostPath fields commonly appear in SKILL.md docs and examples. + # TM4: privileged K8s workload. Example filtering is delegated to the runner. for pattern, confidence in TM4_PATTERNS: for match in re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE): - context_text = ctx(match.start()) - if is_code_example(context_text): - continue line_num = get_line_number(content, match.start()) findings.append( AnalyzerFinding( @@ -296,7 +292,7 @@ def ctx(start: int) -> str: location=loc(line_num), confidence=confidence, tags=tag, - context=context_text, + context=ctx(match.start()), matched_text=match.group(0)[:200], ) ) diff --git a/tests/nodes/analyzers/test_static_patterns.py b/tests/nodes/analyzers/test_static_patterns.py index 05f4e22..ae97fcc 100644 --- a/tests/nodes/analyzers/test_static_patterns.py +++ b/tests/nodes/analyzers/test_static_patterns.py @@ -272,6 +272,21 @@ def test_e5_benign_client_creation_no_finding(self): findings = static_runner.run_static_patterns(state, [data_exfiltration_module]) assert not any(f.rule_id == "E5" for f in findings) + def test_e5_example_marker_in_executable_still_fires(self): + """An example marker near an upload in an executable .py must NOT suppress E5. + + Example filtering belongs to the runner, which only downweights (does not + skip) executables — so a nearby '# for example' cannot be used to evade E5. + """ + state = { + "components": ["up.py"], + "file_cache": { + "up.py": "# for example\ns3.put_object(Bucket='x', Key='k', Body=d)", + }, + } + findings = static_runner.run_static_patterns(state, [data_exfiltration_module]) + assert any(f.rule_id == "E5" for f in findings) + def test_eval_dataset_prose_is_not_scanned_for_static_patterns(self): """Eval datasets are test-case data, not installed skill code.""" for dataset_path in ("evals/evals.json", "eval/dataset.yaml"): diff --git a/tests/unit/test_patterns_new.py b/tests/unit/test_patterns_new.py index de2f678..b8ab5a1 100644 --- a/tests/unit/test_patterns_new.py +++ b/tests/unit/test_patterns_new.py @@ -732,9 +732,13 @@ def test_tm4_benign_workload_not_flagged(self) -> None: ) assert not any(f.rule_id == "TM4" for f in tm_mod.analyze(content, "ds.yaml", "yaml")) - def test_tm4_documentation_example_excluded(self) -> None: - content = "For example, never set privileged: true in your manifests." - assert not any(f.rule_id == "TM4" for f in tm_mod.analyze(content, "README.md", "markdown")) + def test_tm4_example_marker_not_self_filtered(self) -> None: + """analyze() no longer self-filters on example markers — the shared runner + handles that (suppressing non-executable docs, only downweighting + executables). So a nearby '# for example' marker cannot bypass TM4; the + finding is still produced at the analyzer level.""" + content = "# for example\nprivileged: true" + assert any(f.rule_id == "TM4" for f in tm_mod.analyze(content, "ds.yaml", "yaml")) def test_safe_content_produces_no_findings(self) -> None: findings = tm_mod.analyze(