Skip to content

Commit 6e10004

Browse files
committed
Fix PDF report triggering on casual conversation
The keyword matcher was too loose — generic words like 'exploit' and 'vulnerability' appear in capability listings too. Now requires all three: 1. A report-level phrase (e.g. 'security audit') 2. At least 2 finding-level keywords (e.g. 'cve-', 'high risk') 3. Results language (e.g. 'found', 'detected') This prevents PDFs from generating when the model just lists what it can do.
1 parent 7f3b2ca commit 6e10004

1 file changed

Lines changed: 41 additions & 17 deletions

File tree

rust/crates/rusty-claude-cli/src/report.rs

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,46 +13,70 @@ const DIM: &str = "\x1b[90m";
1313
const BOLD: &str = "\x1b[1m";
1414
const RESET: &str = "\x1b[0m";
1515

16-
/// Security-related keywords that indicate an audit report.
17-
const AUDIT_KEYWORDS: &[&str] = &[
16+
/// Strong indicators — phrases that only appear in actual audit reports,
17+
/// not in casual "what can you do?" capability listings.
18+
const REPORT_PHRASES: &[&str] = &[
1819
"security audit",
19-
"vulnerability",
20-
"exploit",
21-
"severity",
20+
"penetration test",
21+
"security report",
22+
"audit report",
2223
"threat assessment",
23-
"findings",
24+
"vulnerability assessment",
25+
"security assessment",
26+
];
27+
28+
/// Finding-level keywords that signal real scan results (not capabilities).
29+
const FINDING_KEYWORDS: &[&str] = &[
2430
"cve-",
2531
"critical risk",
2632
"high risk",
2733
"medium risk",
2834
"low risk",
29-
"attack surface",
30-
"penetration test",
31-
"security report",
32-
"audit report",
33-
"reconnaissance",
35+
"severity:",
36+
"finding:",
3437
"hardcoded secret",
35-
"injection",
3638
"misconfiguration",
39+
"open port",
40+
"exposed service",
3741
];
3842

39-
/// Minimum keyword matches required to trigger PDF generation.
40-
const MIN_KEYWORD_MATCHES: usize = 3;
43+
/// Past-tense / results language — real reports describe what was found.
44+
const RESULTS_LANGUAGE: &[&str] = &[
45+
"found",
46+
"detected",
47+
"discovered",
48+
"identified",
49+
"vulnerable to",
50+
"is exposed",
51+
"was found",
52+
"were found",
53+
"recommendation",
54+
"remediation",
55+
];
4156

4257
/// Minimum response length to consider for report generation.
43-
const MIN_REPORT_LENGTH: usize = 500;
58+
const MIN_REPORT_LENGTH: usize = 600;
4459

4560
/// Check if the AI response looks like a security audit report.
61+
///
62+
/// To avoid false positives on casual capability listings, we require:
63+
/// - At least 1 report-level phrase (e.g. "security audit"), AND
64+
/// - At least 2 finding-level keywords (e.g. "cve-", "high risk"), AND
65+
/// - At least 1 results-language marker (e.g. "found", "detected")
4666
pub fn is_audit_report(text: &str) -> bool {
4767
if text.len() < MIN_REPORT_LENGTH {
4868
return false;
4969
}
5070
let lower = text.to_lowercase();
51-
let matches = AUDIT_KEYWORDS
71+
72+
let has_report_phrase = REPORT_PHRASES.iter().any(|kw| lower.contains(kw));
73+
let finding_hits = FINDING_KEYWORDS
5274
.iter()
5375
.filter(|kw| lower.contains(*kw))
5476
.count();
55-
matches >= MIN_KEYWORD_MATCHES
77+
let has_results_lang = RESULTS_LANGUAGE.iter().any(|kw| lower.contains(kw));
78+
79+
has_report_phrase && finding_hits >= 2 && has_results_lang
5680
}
5781

5882
/// Strip ANSI escape codes from text.

0 commit comments

Comments
 (0)