@@ -13,46 +13,70 @@ const DIM: &str = "\x1b[90m";
1313const BOLD : & str = "\x1b [1m" ;
1414const RESET : & str = "\x1b [0m" ;
1515
16- /// Security-related keywords that indicate an audit report.
17- const AUDIT_KEYWORDS : & [ & str ] = & [
16+ /// Strong indicators — phrases that only appear in actual audit reports,
17+ /// not in casual "what can you do?" capability listings.
18+ const REPORT_PHRASES : & [ & str ] = & [
1819 "security audit" ,
19- "vulnerability " ,
20- "exploit " ,
21- "severity " ,
20+ "penetration test " ,
21+ "security report " ,
22+ "audit report " ,
2223 "threat assessment" ,
23- "findings" ,
24+ "vulnerability assessment" ,
25+ "security assessment" ,
26+ ] ;
27+
28+ /// Finding-level keywords that signal real scan results (not capabilities).
29+ const FINDING_KEYWORDS : & [ & str ] = & [
2430 "cve-" ,
2531 "critical risk" ,
2632 "high risk" ,
2733 "medium risk" ,
2834 "low risk" ,
29- "attack surface" ,
30- "penetration test" ,
31- "security report" ,
32- "audit report" ,
33- "reconnaissance" ,
35+ "severity:" ,
36+ "finding:" ,
3437 "hardcoded secret" ,
35- "injection" ,
3638 "misconfiguration" ,
39+ "open port" ,
40+ "exposed service" ,
3741] ;
3842
39- /// Minimum keyword matches required to trigger PDF generation.
40- const MIN_KEYWORD_MATCHES : usize = 3 ;
43+ /// Past-tense / results language — real reports describe what was found.
44+ const RESULTS_LANGUAGE : & [ & str ] = & [
45+ "found" ,
46+ "detected" ,
47+ "discovered" ,
48+ "identified" ,
49+ "vulnerable to" ,
50+ "is exposed" ,
51+ "was found" ,
52+ "were found" ,
53+ "recommendation" ,
54+ "remediation" ,
55+ ] ;
4156
4257/// Minimum response length to consider for report generation.
43- const MIN_REPORT_LENGTH : usize = 500 ;
58+ const MIN_REPORT_LENGTH : usize = 600 ;
4459
4560/// Check if the AI response looks like a security audit report.
61+ ///
62+ /// To avoid false positives on casual capability listings, we require:
63+ /// - At least 1 report-level phrase (e.g. "security audit"), AND
64+ /// - At least 2 finding-level keywords (e.g. "cve-", "high risk"), AND
65+ /// - At least 1 results-language marker (e.g. "found", "detected")
4666pub fn is_audit_report ( text : & str ) -> bool {
4767 if text. len ( ) < MIN_REPORT_LENGTH {
4868 return false ;
4969 }
5070 let lower = text. to_lowercase ( ) ;
51- let matches = AUDIT_KEYWORDS
71+
72+ let has_report_phrase = REPORT_PHRASES . iter ( ) . any ( |kw| lower. contains ( kw) ) ;
73+ let finding_hits = FINDING_KEYWORDS
5274 . iter ( )
5375 . filter ( |kw| lower. contains ( * kw) )
5476 . count ( ) ;
55- matches >= MIN_KEYWORD_MATCHES
77+ let has_results_lang = RESULTS_LANGUAGE . iter ( ) . any ( |kw| lower. contains ( kw) ) ;
78+
79+ has_report_phrase && finding_hits >= 2 && has_results_lang
5680}
5781
5882/// Strip ANSI escape codes from text.
0 commit comments