use std::sync::Arc; use compliance_core::models::{Finding, FindingStatus}; use crate::llm::LlmClient; use crate::pipeline::orchestrator::GraphContext; const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action. Actions: - "confirm": The finding is a true positive at the reported severity. Keep as-is. - "downgrade": The finding is real but over-reported. Lower severity recommended. - "upgrade": The finding is under-reported. Higher severity recommended. - "dismiss": The finding is a false positive. Should be removed. Consider: - Is the code in a test, example, or generated file? (lower confidence for test code) - Does the surrounding code context confirm or refute the finding? - Is the finding actionable by a developer? - Would a real attacker be able to exploit this? Respond in JSON format: {"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#; pub async fn triage_findings( llm: &Arc, findings: &mut Vec, graph_context: Option<&GraphContext>, ) -> usize { let mut passed = 0; for finding in findings.iter_mut() { let file_classification = classify_file_path(finding.file_path.as_deref()); let mut user_prompt = format!( "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}", finding.scanner, finding.rule_id.as_deref().unwrap_or("N/A"), finding.severity, finding.title, finding.description, finding.file_path.as_deref().unwrap_or("N/A"), finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()), finding.code_snippet.as_deref().unwrap_or("N/A"), file_classification, ); // Enrich with surrounding code context if possible if let Some(context) = read_surrounding_context(finding) { user_prompt.push_str(&format!( "\n\n--- Surrounding Code (50 lines) ---\n{context}" )); } // Enrich with graph context if available if let Some(ctx) = graph_context { if let Some(impact) = ctx .impacts .iter() .find(|i| i.finding_id == finding.fingerprint) { user_prompt.push_str(&format!( "\n\n--- Code Graph Context ---\n\ Blast radius: {} nodes affected\n\ Entry points affected: {}\n\ Direct callers: {}\n\ Communities affected: {}\n\ Call chains: {}", impact.blast_radius, if impact.affected_entry_points.is_empty() { "none".to_string() } else { impact.affected_entry_points.join(", ") }, if impact.direct_callers.is_empty() { "none".to_string() } else { impact.direct_callers.join(", ") }, impact.affected_communities.len(), impact.call_chains.len(), )); } } match llm .chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1)) .await { Ok(response) => { let cleaned = response.trim(); let cleaned = if cleaned.starts_with("```") { cleaned .trim_start_matches("```json") .trim_start_matches("```") .trim_end_matches("```") .trim() } else { cleaned }; if let Ok(result) = serde_json::from_str::(cleaned) { // Apply file-path confidence adjustment let adjusted_confidence = adjust_confidence(result.confidence, &file_classification); finding.confidence = Some(adjusted_confidence); finding.triage_action = Some(result.action.clone()); finding.triage_rationale = Some(result.rationale); if let Some(remediation) = result.remediation { finding.remediation = Some(remediation); } match result.action.as_str() { "dismiss" => { finding.status = FindingStatus::FalsePositive; } "downgrade" => { // Downgrade severity by one level finding.severity = downgrade_severity(&finding.severity); finding.status = FindingStatus::Triaged; passed += 1; } "upgrade" => { finding.severity = upgrade_severity(&finding.severity); finding.status = FindingStatus::Triaged; passed += 1; } _ => { // "confirm" or unknown — keep as-is if adjusted_confidence >= 3.0 { finding.status = FindingStatus::Triaged; passed += 1; } else { finding.status = FindingStatus::FalsePositive; } } } } else { // Parse failure — keep the finding finding.status = FindingStatus::Triaged; passed += 1; tracing::warn!( "Failed to parse triage response for {}: {response}", finding.fingerprint ); } } Err(e) => { // On LLM error, keep the finding tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint); finding.status = FindingStatus::Triaged; passed += 1; } } } // Remove false positives findings.retain(|f| f.status != FindingStatus::FalsePositive); passed } /// Read ~50 lines of surrounding code from the file at the finding's location fn read_surrounding_context(finding: &Finding) -> Option { let file_path = finding.file_path.as_deref()?; let line = finding.line_number? as usize; // Try to read the file — this works because the repo is cloned locally let content = std::fs::read_to_string(file_path).ok()?; let lines: Vec<&str> = content.lines().collect(); let start = line.saturating_sub(25); let end = (line + 25).min(lines.len()); Some( lines[start..end] .iter() .enumerate() .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l)) .collect::>() .join("\n"), ) } /// Classify a file path to inform triage confidence adjustment fn classify_file_path(path: Option<&str>) -> String { let path = match path { Some(p) => p.to_lowercase(), None => return "unknown".to_string(), }; if path.contains("/test/") || path.contains("/tests/") || path.contains("_test.") || path.contains(".test.") || path.contains(".spec.") || path.contains("/fixtures/") || path.contains("/testdata/") { return "test".to_string(); } if path.contains("/example") || path.contains("/examples/") || path.contains("/demo/") || path.contains("/sample") { return "example".to_string(); } if path.contains("/generated/") || path.contains("/gen/") || path.contains(".generated.") || path.contains(".pb.go") || path.contains("_generated.rs") { return "generated".to_string(); } if path.contains("/vendor/") || path.contains("/node_modules/") || path.contains("/third_party/") { return "vendored".to_string(); } "production".to_string() } /// Adjust confidence based on file classification fn adjust_confidence(raw_confidence: f64, classification: &str) -> f64 { let multiplier = match classification { "test" => 0.5, "example" => 0.6, "generated" => 0.3, "vendored" => 0.4, _ => 1.0, }; raw_confidence * multiplier } fn downgrade_severity( severity: &compliance_core::models::Severity, ) -> compliance_core::models::Severity { use compliance_core::models::Severity; match severity { Severity::Critical => Severity::High, Severity::High => Severity::Medium, Severity::Medium => Severity::Low, Severity::Low => Severity::Info, Severity::Info => Severity::Info, } } fn upgrade_severity( severity: &compliance_core::models::Severity, ) -> compliance_core::models::Severity { use compliance_core::models::Severity; match severity { Severity::Info => Severity::Low, Severity::Low => Severity::Medium, Severity::Medium => Severity::High, Severity::High => Severity::Critical, Severity::Critical => Severity::Critical, } } #[derive(serde::Deserialize)] struct TriageResult { #[serde(default = "default_action")] action: String, #[serde(default)] confidence: f64, #[serde(default)] rationale: String, remediation: Option, } fn default_action() -> String { "confirm".to_string() }