Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from skillspector.state import AnalyzerNodeResponse, SkillspectorState

from . import static_runner
from .common import get_context, get_line_number, is_code_example
from .common import get_context, get_line_number
from .pattern_defaults import PatternCategory

logger = get_logger(__name__)
Expand Down Expand Up @@ -196,13 +196,9 @@ def ctx(start: int) -> str:
matched_text=match.group(0)[:200],
)
)
# E5: cloud-storage exfiltration. Filtered through is_code_example() because
# upload calls commonly appear in SKILL.md docs and examples.
# E5: cloud-storage exfiltration. Example filtering is delegated to the runner.
for pattern, confidence in E5_PATTERNS:
for match in re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE):
context = ctx(match.start())
if is_code_example(context):
continue
line_num = get_line_number(content, match.start())
findings.append(
AnalyzerFinding(
Expand All @@ -212,7 +208,7 @@ def ctx(start: int) -> str:
location=loc(line_num),
confidence=confidence,
tags=tag,
context=context,
context=ctx(match.start()),
matched_text=match.group(0)[:200],
)
)
Expand Down
10 changes: 3 additions & 7 deletions src/skillspector/nodes/analyzers/static_patterns_tool_misuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from skillspector.state import AnalyzerNodeResponse, SkillspectorState

from . import static_runner
from .common import get_context, get_line_number, is_code_example
from .common import get_context, get_line_number
from .pattern_defaults import PatternCategory

logger = get_logger(__name__)
Expand Down Expand Up @@ -280,13 +280,9 @@ def ctx(start: int) -> str:
matched_text=match.group(0)[:200],
)
)
# TM4: privileged K8s workload. Filtered through is_code_example() because
# privileged/hostPath fields commonly appear in SKILL.md docs and examples.
# TM4: privileged K8s workload. Example filtering is delegated to the runner.
for pattern, confidence in TM4_PATTERNS:
for match in re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE):
context_text = ctx(match.start())
if is_code_example(context_text):
continue
line_num = get_line_number(content, match.start())
findings.append(
AnalyzerFinding(
Expand All @@ -296,7 +292,7 @@ def ctx(start: int) -> str:
location=loc(line_num),
confidence=confidence,
tags=tag,
context=context_text,
context=ctx(match.start()),
matched_text=match.group(0)[:200],
)
)
Expand Down
15 changes: 15 additions & 0 deletions tests/nodes/analyzers/test_static_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,21 @@ def test_e5_benign_client_creation_no_finding(self):
findings = static_runner.run_static_patterns(state, [data_exfiltration_module])
assert not any(f.rule_id == "E5" for f in findings)

def test_e5_example_marker_in_executable_still_fires(self):
"""An example marker near an upload in an executable .py must NOT suppress E5.

Example filtering belongs to the runner, which only downweights (does not
skip) executables — so a nearby '# for example' cannot be used to evade E5.
"""
state = {
"components": ["up.py"],
"file_cache": {
"up.py": "# for example\ns3.put_object(Bucket='x', Key='k', Body=d)",
},
}
findings = static_runner.run_static_patterns(state, [data_exfiltration_module])
assert any(f.rule_id == "E5" for f in findings)

def test_eval_dataset_prose_is_not_scanned_for_static_patterns(self):
"""Eval datasets are test-case data, not installed skill code."""
for dataset_path in ("evals/evals.json", "eval/dataset.yaml"):
Expand Down
10 changes: 7 additions & 3 deletions tests/unit/test_patterns_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,9 +732,13 @@ def test_tm4_benign_workload_not_flagged(self) -> None:
)
assert not any(f.rule_id == "TM4" for f in tm_mod.analyze(content, "ds.yaml", "yaml"))

def test_tm4_documentation_example_excluded(self) -> None:
content = "For example, never set privileged: true in your manifests."
assert not any(f.rule_id == "TM4" for f in tm_mod.analyze(content, "README.md", "markdown"))
def test_tm4_example_marker_not_self_filtered(self) -> None:
"""analyze() no longer self-filters on example markers — the shared runner
handles that (suppressing non-executable docs, only downweighting
executables). So a nearby '# for example' marker cannot bypass TM4; the
finding is still produced at the analyzer level."""
content = "# for example\nprivileged: true"
assert any(f.rule_id == "TM4" for f in tm_mod.analyze(content, "ds.yaml", "yaml"))

def test_safe_content_produces_no_findings(self) -> None:
findings = tm_mod.analyze(
Expand Down