From 8aa23ee41c57f60f7b02b650b85485d6c85dce6c Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 5 Mar 2026 09:45:57 +0000 Subject: [PATCH] feat: add comprehensive logging across scan pipeline Added structured logging to improve observability and debugging: - MalwareBazaar: hash lookups, API responses, matches - Configuration: signature/YARA rule loading with counts - YARA scanning: rule matches with severity and paths - Profile building: metadata extraction and loader detection - Scan pipeline: stage transitions, verdicts, overrides - HTTP endpoints: upload/scan request tracking All logging uses structured fields (upload_id, scan_id, etc.) for consistent querying and analysis. Nightshift-Task: logging-audit Nightshift-Ref: https://github.com/marcus/nightshift --- LOGGING_AUDIT.md | 138 +++++++++++++++++++++++++++++++++++++++++++ src/analysis/yara.rs | 27 ++++++++- src/lib.rs | 15 ++++- src/main.rs | 14 ++++- src/malwarebazaar.rs | 25 +++++++- src/profile.rs | 26 ++++++++ src/scan.rs | 125 +++++++++++++++++++++++++++++++-------- 7 files changed, 341 insertions(+), 29 deletions(-) create mode 100644 LOGGING_AUDIT.md diff --git a/LOGGING_AUDIT.md b/LOGGING_AUDIT.md new file mode 100644 index 0000000..cdb81f4 --- /dev/null +++ b/LOGGING_AUDIT.md @@ -0,0 +1,138 @@ +# Logging Audit Report + +## Executive Summary + +This report audits the logging quality and completeness across the Jarspect codebase. + +## Current State + +### Files with Logging +- `src/main.rs` - Uses `tracing::info!` (4 occurrences) +- `src/scan.rs` - Uses `tracing::warn!` (4 occurrences) +- `src/verdict.rs` - Uses `tracing::warn!` (7 occurrences) +- `src/analysis/archive.rs` - Uses `tracing::debug!` (6 occurrences) + +### Files WITHOUT Logging +- `src/malwarebazaar.rs` - CRITICAL: HTTP operations unlogged +- `src/lib.rs` - CRITICAL: Config loading unlogged +- `src/analysis/yara.rs` - HIGH: Scans and matches unlogged +- `src/profile.rs` - MEDIUM: Profile building unlogged +- `src/detectors/*` (all) - MEDIUM: Detector execution unlogged + +## Findings + +### 1. MalwareBazaar Integration (CRITICAL) +**Issue**: No logging for external HTTP operations +**Impact**: Cannot debug hash lookup failures, API errors, or rate limits +**Recommendation**: Add structured logging for: +- Hash lookup attempts (info) +- API responses (debug) +- Failures (warn) +- Rate limiting events (warn) + +### 2. Configuration Loading (CRITICAL) +**Issue**: No logging for signature and YARA rule loading +**Impact**: Cannot diagnose startup failures or misconfigurations +**Recommendation**: Add logging for: +- Signature files being loaded (info) +- Signature counts (info) +- YARA rule compilation (debug) +- Validation failures (warn) + +### 3. YARA Scanning (HIGH) +**Issue**: No logging of scan operations or matches +**Impact**: Cannot track which rules are being evaluated +**Recommendation**: Add logging for: +- Scan start/end (debug) +- Matched rules (debug with rule ID, severity, file path) + +### 4. Profile Building (MEDIUM) +**Issue**: No logging of metadata extraction +**Impact**: Cannot verify which mod format was detected +**Recommendation**: Add logging for: +- Detected loader (info) +- Metadata fields extracted (debug) +- Parsing failures (warn) + +### 5. Detector Execution (MEDIUM) +**Issue**: No logging of detector runs +**Impact**: Cannot see which detectors found evidence +**Recommendation**: Add logging for: +- Detector start/end (debug) +- Evidence found counts (debug) +- Skip reasons (trace) + +### 6. Scan Pipeline (HIGH) +**Issue**: Missing progress logging between scan stages +**Impact**: Cannot track where in the pipeline a scan is +**Recommendation**: Add logging for: +- Intake completion (info with file/class counts) +- Static analysis start (info) +- YARA scan start (info) +- Detector run start (info) +- Verdict determination (info) +- Scan completion (info with result) + +### 7. Archive Analysis (LOW - GOOD) +**Current state**: Uses debug logging for budget limits +**Status**: Adequate for current needs +**No changes required** + +## Logging Level Guidelines + +### info +- Scan start/end with identifiers +- High-level stage transitions +- Configuration loading results +- External service calls (MalwareBazaar, AI API) +- Errors affecting scan outcome + +### warn +- Retryable failures (with backoff details) +- Graceful degradation scenarios +- Non-critical parsing failures with fallbacks +- Rate limiting + +### debug +- Detailed operation progress +- File/class counts +- Detector evidence found +- YARA rule matches +- Individual detector runs + +### trace +- Very detailed diagnostics +- Skip reasons for optimization +- Individual record processing + +## Structured Logging Fields + +Use structured fields for consistency: + +### Common fields +- `scan_id`: UUID for the scan operation +- `upload_id`: Upload identifier +- `file_count`: Number of files processed +- `class_count`: Number of .class files +- `jar_size_bytes`: Size of the JAR file +- `duration_ms`: Operation duration +- `error`: Error details +- `retry_count`: Number of retry attempts +- `backoff_ms`: Backoff duration + +### MalwareBazaar-specific +- `sha256_hash`: Hash being looked up +- `family`: Malware family name (if found) +- `tags_count`: Number of tags +- `api_call_count`: Number of API calls made + +### YARA-specific +- `rule_id`: YARA rule identifier +- `severity`: Rule severity +- `file_path`: Matched file path +- `rulepack`: rulepack kind (demo/prod) + +### Detector-specific +- `detector_id`: Detector identifier +- `evidence_count`: Number of evidence items found +- `severity`: Finding severity diff --git a/src/analysis/yara.rs b/src/analysis/yara.rs index 56257f2..793343a 100644 --- a/src/analysis/yara.rs +++ b/src/analysis/yara.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use tracing::{debug, info}; use yara_x::{MetaValue, Rule, Rules, Scanner}; use crate::analysis::ArchiveEntry; @@ -57,25 +58,47 @@ pub fn scan_yara_rulepacks( packs: &[YaraRulepack], ) -> Result> { let mut findings = Vec::new(); + let total_entries = entries.len(); for pack in packs { + debug!( + pack = %pack.kind.as_str(), + entries_count = total_entries, + "YARA scan started" + ); + for entry in entries { let mut scanner = Scanner::new(&pack.rules); let scan_results = scanner.scan(entry.bytes.as_slice())?; for rule in scan_results.matching_rules() { + let severity = derive_rule_severity(&rule, pack.kind); + debug!( + rule_id = rule.identifier(), + severity, + pack = %pack.kind.as_str(), + entry_path = %entry.path, + "YARA rule matched" + ); findings.push(( entry.path.clone(), YaraFinding { pack: pack.kind, rule_identifier: rule.identifier().to_string(), - severity: derive_rule_severity(&rule, pack.kind).to_string(), + severity: severity.to_string(), evidence: build_match_evidence(&rule), }, )); } } + + debug!( + pack = %pack.kind.as_str(), + matches_count = findings.len(), + "YARA scan complete" + ); } + info!(total_findings = findings.len(), "YARA scan complete"); Ok(findings) } @@ -174,7 +197,7 @@ fn build_match_evidence(rule: &Rule<'_, '_>) -> String { mod tests { use yara_x::Compiler; - use super::{RulepackKind, YaraRulepack, scan_yara_rulepacks}; + use super::{scan_yara_rulepacks, RulepackKind, YaraRulepack}; use crate::analysis::ArchiveEntry; #[test] diff --git a/src/lib.rs b/src/lib.rs index 53d57b6..a1cc135 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ use std::sync::Arc; use anyhow::{Context, Result}; use regex::Regex; use serde::{Deserialize, Serialize}; +use tracing::{debug, info, warn}; use yara_x::Compiler; pub mod analysis; @@ -444,13 +445,17 @@ pub fn load_signatures( for pack in packs { let path = signatures_path_for_pack(cwd, pack.as_str()); + debug!(path = %path.display(), pack = %pack.as_str(), "loading signature corpus"); let payload = std::fs::read_to_string(&path) .with_context(|| format!("Failed to read signature corpus: {}", path.display()))?; let mut parsed: Vec = serde_json::from_str(&payload) .with_context(|| format!("Invalid signature JSON: {}", path.display()))?; + let count = parsed.len(); signatures.append(&mut parsed); + info!(pack = %pack.as_str(), count, "loaded signature corpus"); } + info!(total_signatures = signatures.len(), "signature loading complete"); Ok(signatures) } @@ -462,18 +467,26 @@ pub fn load_yara_rules( for pack in packs { let path = yara_path_for_pack(cwd, pack.as_str()); + debug!(path = %path.display(), pack = %pack.as_str(), "loading YARA rules"); let source = std::fs::read_to_string(&path) .with_context(|| format!("Failed to read YARA rules: {}", path.display()))?; let mut compiler = Compiler::new(); compiler .add_source(source.as_str()) .with_context(|| format!("Failed compiling YARA rules from {}", path.display()))?; + let rules = compiler.build(); + let rule_count = rules.iter().count(); loaded_rulepacks.push(analysis::YaraRulepack { kind: *pack, - rules: compiler.build(), + rules, }); + info!(pack = %pack.as_str(), rule_count, "compiled YARA rules"); } + info!( + total_rulepacks = loaded_rulepacks.len(), + "YARA rule loading complete" + ); Ok(loaded_rulepacks) } diff --git a/src/main.rs b/src/main.rs index 5fea760..70384cc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,7 @@ use axum::{Json, Router}; use serde_json::Value; use tokio::fs; use tower_http::services::ServeDir; -use tracing::info; +use tracing::{debug, info}; use uuid::Uuid; use jarspect::verdict::AiConfig; @@ -192,6 +192,9 @@ async fn upload( State(state): State, mut multipart: Multipart, ) -> Result, AppError> { + let upload_id = Uuid::new_v4().simple().to_string(); + info!(upload_id = %upload_id, "upload request received"); + let mut filename = None; let mut bytes = None; @@ -221,12 +224,18 @@ async fn upload( } let content = bytes.ok_or_else(|| AppError::bad_request("Missing upload file bytes"))?; - let upload_id = Uuid::new_v4().simple().to_string(); let output_path = state.uploads_dir.join(format!("{upload_id}.jar")); fs::write(&output_path, &content) .await .map_err(|e| AppError::internal(format!("Failed to persist upload: {e}")))?; + info!( + upload_id = %upload_id, + filename, + size_bytes = content.len(), + "upload persisted" + ); + Ok(Json(serde_json::json!({ "upload_id": upload_id, "filename": filename, @@ -239,6 +248,7 @@ async fn scan( State(state): State, Json(request): Json, ) -> Result, AppError> { + info!(upload_id = %request.upload_id, "scan request received"); match jarspect::run_scan(&state, request, None).await { Ok(scan_payload) => Ok(Json(scan_payload)), Err(error) => Err(map_scan_error(error)), diff --git a/src/malwarebazaar.rs b/src/malwarebazaar.rs index 53f2e23..1d322c3 100644 --- a/src/malwarebazaar.rs +++ b/src/malwarebazaar.rs @@ -1,9 +1,10 @@ -use std::time::Duration; +use std::time::{Duration, Instant}; use anyhow::{Context, Result}; use reqwest::Client; use serde::{Deserialize, Serialize}; use serde_json::Value; +use tracing::{debug, info, warn}; const MALWAREBAZAAR_API_URL: &str = "https://mb-api.abuse.ch/api/v1/"; @@ -42,6 +43,9 @@ pub async fn check_hash(sha256: &str, api_key: &str) -> Result Result Result Result String { fn extract_mod_metadata(entries: &[ArchiveEntry]) -> ModMetadata { if let Some(metadata) = parse_fabric_metadata(entries) { + debug!( + loader = "fabric", + mod_id = ?metadata.mod_id, + name = ?metadata.name, + "detected fabric mod metadata" + ); return metadata; } if let Some(metadata) = parse_forge_metadata(entries) { + debug!( + loader = ?metadata.loader, + mod_id = ?metadata.mod_id, + name = ?metadata.name, + "detected forge/neoforge mod metadata" + ); return metadata; } if let Some(metadata) = parse_mcmod_info(entries) { + debug!( + loader = "legacy-forge", + mod_id = ?metadata.mod_id, + name = ?metadata.name, + "detected mcmod.info metadata" + ); return metadata; } if let Some(metadata) = parse_spigot_metadata(entries) { + debug!( + loader = "spigot", + mod_id = ?metadata.mod_id, + name = ?metadata.name, + "detected spigot plugin metadata" + ); return metadata; } + debug!("no mod metadata found"); ModMetadata::default() } diff --git a/src/scan.rs b/src/scan.rs index 4f1f5c2..d82ac94 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -1,7 +1,7 @@ use anyhow::{Context, Result}; use sha2::{Digest, Sha256}; use tokio::fs; -use tracing::warn; +use tracing::{info, warn}; use uuid::Uuid; use crate::analysis; @@ -58,10 +58,25 @@ pub async fn run_scan( anyhow::bail!("Upload not found") } + let scan_id = build_scan_id(scan_id_override)?; + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + "scan started" + ); + let bytes = fs::read(&upload_path) .await .with_context(|| format!("Failed to read upload: {}", upload_path.display()))?; let sha256_hash = format!("{:x}", Sha256::digest(bytes.as_slice())); + let jar_size = bytes.len(); + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + jar_size_bytes = jar_size, + sha256 = %sha256_hash, + "upload loaded" + ); let mb_mode = malwarebazaar_match_mode(); let malwarebazaar_match = if malwarebazaar_hash_match_enabled() { @@ -224,23 +239,47 @@ pub async fn run_scan( } }; + let class_count = entries + .iter() + .filter(|entry| entry.path.ends_with(".class")) + .count(); + let intake = IntakeResult { upload_id: request.upload_id.clone(), storage_path: upload_path.to_string_lossy().into_owned(), file_count: entries.len(), - class_file_count: entries - .iter() - .filter(|entry| entry.path.ends_with(".class")) - .count(), + class_file_count: class_count, }; + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + file_count = intake.file_count, + class_count, + "archive intake complete" + ); + let bytecode_evidence = Some(analysis::extract_bytecode_evidence(&entries)); + + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + "static analysis started" + ); let static_findings = crate::run_static_analysis( &entries, bytecode_evidence.as_ref(), &state.signatures, &state.yara_rulepacks, )?; + + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + matches_count = static_findings.matches.len(), + "static analysis complete" + ); + let capability_profile = profile::build_profile( &static_findings, &entries, @@ -281,31 +320,61 @@ pub async fn run_scan( let (mut ai_verdict, mut method) = match state.ai_config.as_ref() { Some(ai_config) => match verdict::ai_verdict(&capability_profile, &static_findings, ai_config).await { - Ok(verdict) => (verdict, "ai_verdict".to_string()), + Ok(verdict) => { + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + result = %verdict.result, + confidence = verdict.confidence, + risk_score = verdict.risk_score, + "AI verdict determined" + ); + (verdict, "ai_verdict".to_string()) + }, Err(error) => { warn!(error = %error, "AI verdict failed; falling back to heuristic verdict"); - ( - verdict::heuristic_verdict( - &static_findings, - &capability_profile, - "AI verdict failed.", - ), - "heuristic_fallback".to_string(), - ) + let verdict = verdict::heuristic_verdict( + &static_findings, + &capability_profile, + "AI verdict failed.", + ); + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + result = %verdict.result, + confidence = verdict.confidence, + risk_score = verdict.risk_score, + "heuristic verdict determined" + ); + (verdict, "heuristic_fallback".to_string()) } }, - None => ( - verdict::heuristic_verdict( + None => { + let verdict = verdict::heuristic_verdict( &static_findings, &capability_profile, "AI configuration missing.", - ), - "heuristic_fallback".to_string(), - ), + ); + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + result = %verdict.result, + confidence = verdict.confidence, + risk_score = verdict.risk_score, + "heuristic verdict determined (no AI)" + ); + (verdict, "heuristic_fallback".to_string()) + }, }; if let Some(reason) = high_confidence_static_reason(&static_findings) { if ai_verdict.result != "MALICIOUS" { + info!( + upload_id = %request.upload_id, + scan_id = %scan_id, + reason, + "static override applied" + ); ai_verdict.result = "MALICIOUS".to_string(); ai_verdict.confidence = ai_verdict.confidence.max(0.9); ai_verdict.risk_score = ai_verdict.risk_score.max(90); @@ -318,7 +387,7 @@ pub async fn run_scan( } let response = ScanRunResponse { - scan_id: build_scan_id(scan_id_override)?, + scan_id: scan_id, sha256: Some(sha256_hash), verdict: Verdict { result: ai_verdict.result, @@ -337,8 +406,18 @@ pub async fn run_scan( intake, }; - persist_scan_result(state, &response).await?; - Ok(response) + info!( + upload_id = %request.upload_id, + scan_id = %response.scan_id, + result = %response.verdict.result, + method = %response.verdict.method, + confidence = response.verdict.confidence, + risk_score = response.verdict.risk_score, + "scan complete" + ); + + persist_scan_result(state, &response).await?; + Ok(response) } fn best_effort_text(bytes: &[u8]) -> Option { @@ -407,7 +486,7 @@ async fn lookup_malwarebazaar( match malwarebazaar::check_hash(sha256_hash, api_key).await { Ok(result) => result, Err(error) => { - warn!(error = %error, "MalwareBazaar lookup failed; continuing with local analysis"); + warn!(error = %error, sha256 = %sha256_hash, "MalwareBazaar lookup failed; continuing with local analysis"); None } }