feat: findings refinement, new scanners, and deployment tooling (#6)

2026-03-09 12:53:12 +00:00
parent 32e5fc21e7
commit 46bf9de549
40 changed files with 2048 additions and 118 deletions
--- a/compliance-agent/src/llm/mod.rs
+++ b/compliance-agent/src/llm/mod.rs
@@ -5,6 +5,7 @@ pub mod descriptions;
 pub mod fixes;
 #[allow(dead_code)]
 pub mod pr_review;
+pub mod review_prompts;
 pub mod triage;

 pub use client::LlmClient;
--- a/compliance-agent/src/llm/review_prompts.rs
+++ b/compliance-agent/src/llm/review_prompts.rs
@@ -0,0 +1,77 @@
+// System prompts for multi-pass LLM code review.
+// Each pass focuses on a different aspect to avoid overloading a single prompt.
+
+pub const LOGIC_REVIEW_PROMPT: &str = r#"You are a senior software engineer reviewing code changes. Focus ONLY on logic and correctness issues.
+
+Look for:
+- Off-by-one errors, wrong comparisons, missing edge cases
+- Incorrect control flow (unreachable code, missing returns, wrong loop conditions)
+- Race conditions or concurrency bugs
+- Resource leaks (unclosed handles, missing cleanup)
+- Wrong variable used (copy-paste errors)
+- Incorrect error handling (swallowed errors, wrong error type)
+
+Ignore: style, naming, formatting, documentation, minor improvements.
+
+For each issue found, respond with a JSON array:
+[{"title": "...", "description": "...", "severity": "high|medium|low", "file": "...", "line": N, "suggestion": "..."}]
+
+If no issues found, respond with: []"#;
+
+pub const SECURITY_REVIEW_PROMPT: &str = r#"You are a security engineer reviewing code changes. Focus ONLY on security vulnerabilities.
+
+Look for:
+- Injection vulnerabilities (SQL, command, XSS, template injection)
+- Authentication/authorization bypasses
+- Sensitive data exposure (logging secrets, hardcoded credentials)
+- Insecure cryptography (weak algorithms, predictable randomness)
+- Path traversal, SSRF, open redirects
+- Unsafe deserialization
+- Missing input validation at trust boundaries
+
+Ignore: code style, performance, general quality.
+
+For each issue found, respond with a JSON array:
+[{"title": "...", "description": "...", "severity": "critical|high|medium", "file": "...", "line": N, "cwe": "CWE-XXX", "suggestion": "..."}]
+
+If no issues found, respond with: []"#;
+
+pub const CONVENTION_REVIEW_PROMPT: &str = r#"You are a code reviewer checking adherence to project conventions. Focus ONLY on patterns that indicate likely bugs or maintenance problems.
+
+Look for:
+- Inconsistent error handling patterns within the same module
+- Public API that doesn't follow the project's established patterns
+- Missing or incorrect type annotations that could cause runtime issues
+- Anti-patterns specific to the language (e.g. unwrap in Rust library code, any in TypeScript)
+
+Do NOT report: minor style preferences, documentation gaps, formatting.
+Only report issues with HIGH confidence that they deviate from the visible codebase conventions.
+
+For each issue found, respond with a JSON array:
+[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
+
+If no issues found, respond with: []"#;
+
+pub const COMPLEXITY_REVIEW_PROMPT: &str = r#"You are reviewing code changes for excessive complexity that could lead to bugs.
+
+Look for:
+- Functions over 50 lines that should be decomposed
+- Deeply nested control flow (4+ levels)
+- Complex boolean expressions that are hard to reason about
+- Functions with 5+ parameters
+- Code duplication within the changed files
+
+Only report complexity issues that are HIGH risk for future bugs. Ignore acceptable complexity in configuration, CLI argument parsing, or generated code.
+
+For each issue found, respond with a JSON array:
+[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
+
+If no issues found, respond with: []"#;
+
+/// All review types with their prompts
+pub const REVIEW_PASSES: &[(&str, &str)] = &[
+    ("logic", LOGIC_REVIEW_PROMPT),
+    ("security", SECURITY_REVIEW_PROMPT),
+    ("convention", CONVENTION_REVIEW_PROMPT),
+    ("complexity", COMPLEXITY_REVIEW_PROMPT),
+];
--- a/compliance-agent/src/llm/triage.rs
+++ b/compliance-agent/src/llm/triage.rs
@@ -5,13 +5,22 @@ use compliance_core::models::{Finding, FindingStatus};
 use crate::llm::LlmClient;
 use crate::pipeline::orchestrator::GraphContext;

-const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding and determine:
-1. Is this a true positive? (yes/no)
-2. Confidence score (0-10, where 10 is highest confidence this is a real issue)
-3. Brief remediation suggestion (1-2 sentences)
+const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
+
+Actions:
+- "confirm": The finding is a true positive at the reported severity. Keep as-is.
+- "downgrade": The finding is real but over-reported. Lower severity recommended.
+- "upgrade": The finding is under-reported. Higher severity recommended.
+- "dismiss": The finding is a false positive. Should be removed.
+
+Consider:
+- Is the code in a test, example, or generated file? (lower confidence for test code)
+- Does the surrounding code context confirm or refute the finding?
+- Is the finding actionable by a developer?
+- Would a real attacker be able to exploit this?

 Respond in JSON format:
-{"true_positive": true/false, "confidence": N, "remediation": "..."}"#;
+{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;

 pub async fn triage_findings(
    llm: &Arc<LlmClient>,
@@ -21,8 +30,10 @@ pub async fn triage_findings(
    let mut passed = 0;

    for finding in findings.iter_mut() {
+        let file_classification = classify_file_path(finding.file_path.as_deref());
+
        let mut user_prompt = format!(
-            "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}",
+            "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
            finding.scanner,
            finding.rule_id.as_deref().unwrap_or("N/A"),
            finding.severity,
@@ -31,8 +42,16 @@ pub async fn triage_findings(
            finding.file_path.as_deref().unwrap_or("N/A"),
            finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
            finding.code_snippet.as_deref().unwrap_or("N/A"),
+            file_classification,
        );

+        // Enrich with surrounding code context if possible
+        if let Some(context) = read_surrounding_context(finding) {
+            user_prompt.push_str(&format!(
+                "\n\n--- Surrounding Code (50 lines) ---\n{context}"
+            ));
+        }
+
        // Enrich with graph context if available
        if let Some(ctx) = graph_context {
            if let Some(impact) = ctx
@@ -69,32 +88,55 @@ pub async fn triage_findings(
            .await
        {
            Ok(response) => {
-                // Strip markdown code fences if present (e.g. ```json ... ```)
                let cleaned = response.trim();
                let cleaned = if cleaned.starts_with("```") {
-                    let inner = cleaned
+                    cleaned
                        .trim_start_matches("```json")
                        .trim_start_matches("```")
                        .trim_end_matches("```")
-                        .trim();
-                    inner
+                        .trim()
                } else {
                    cleaned
                };
                if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
-                    finding.confidence = Some(result.confidence);
+                    // Apply file-path confidence adjustment
+                    let adjusted_confidence =
+                        adjust_confidence(result.confidence, &file_classification);
+                    finding.confidence = Some(adjusted_confidence);
+                    finding.triage_action = Some(result.action.clone());
+                    finding.triage_rationale = Some(result.rationale);
+
                    if let Some(remediation) = result.remediation {
                        finding.remediation = Some(remediation);
                    }

-                    if result.confidence >= 3.0 {
-                        finding.status = FindingStatus::Triaged;
-                        passed += 1;
-                    } else {
-                        finding.status = FindingStatus::FalsePositive;
+                    match result.action.as_str() {
+                        "dismiss" => {
+                            finding.status = FindingStatus::FalsePositive;
+                        }
+                        "downgrade" => {
+                            // Downgrade severity by one level
+                            finding.severity = downgrade_severity(&finding.severity);
+                            finding.status = FindingStatus::Triaged;
+                            passed += 1;
+                        }
+                        "upgrade" => {
+                            finding.severity = upgrade_severity(&finding.severity);
+                            finding.status = FindingStatus::Triaged;
+                            passed += 1;
+                        }
+                        _ => {
+                            // "confirm" or unknown — keep as-is
+                            if adjusted_confidence >= 3.0 {
+                                finding.status = FindingStatus::Triaged;
+                                passed += 1;
+                            } else {
+                                finding.status = FindingStatus::FalsePositive;
+                            }
+                        }
                    }
                } else {
-                    // If LLM response doesn't parse, keep the finding
+                    // Parse failure — keep the finding
                    finding.status = FindingStatus::Triaged;
                    passed += 1;
                    tracing::warn!(
@@ -117,12 +159,122 @@ pub async fn triage_findings(
    passed
 }

+/// Read ~50 lines of surrounding code from the file at the finding's location
+fn read_surrounding_context(finding: &Finding) -> Option<String> {
+    let file_path = finding.file_path.as_deref()?;
+    let line = finding.line_number? as usize;
+
+    // Try to read the file — this works because the repo is cloned locally
+    let content = std::fs::read_to_string(file_path).ok()?;
+    let lines: Vec<&str> = content.lines().collect();
+
+    let start = line.saturating_sub(25);
+    let end = (line + 25).min(lines.len());
+
+    Some(
+        lines[start..end]
+            .iter()
+            .enumerate()
+            .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
+            .collect::<Vec<_>>()
+            .join("\n"),
+    )
+}
+
+/// Classify a file path to inform triage confidence adjustment
+fn classify_file_path(path: Option<&str>) -> String {
+    let path = match path {
+        Some(p) => p.to_lowercase(),
+        None => return "unknown".to_string(),
+    };
+
+    if path.contains("/test/")
+        || path.contains("/tests/")
+        || path.contains("_test.")
+        || path.contains(".test.")
+        || path.contains(".spec.")
+        || path.contains("/fixtures/")
+        || path.contains("/testdata/")
+    {
+        return "test".to_string();
+    }
+
+    if path.contains("/example")
+        || path.contains("/examples/")
+        || path.contains("/demo/")
+        || path.contains("/sample")
+    {
+        return "example".to_string();
+    }
+
+    if path.contains("/generated/")
+        || path.contains("/gen/")
+        || path.contains(".generated.")
+        || path.contains(".pb.go")
+        || path.contains("_generated.rs")
+    {
+        return "generated".to_string();
+    }
+
+    if path.contains("/vendor/")
+        || path.contains("/node_modules/")
+        || path.contains("/third_party/")
+    {
+        return "vendored".to_string();
+    }
+
+    "production".to_string()
+}
+
+/// Adjust confidence based on file classification
+fn adjust_confidence(raw_confidence: f64, classification: &str) -> f64 {
+    let multiplier = match classification {
+        "test" => 0.5,
+        "example" => 0.6,
+        "generated" => 0.3,
+        "vendored" => 0.4,
+        _ => 1.0,
+    };
+    raw_confidence * multiplier
+}
+
+fn downgrade_severity(
+    severity: &compliance_core::models::Severity,
+) -> compliance_core::models::Severity {
+    use compliance_core::models::Severity;
+    match severity {
+        Severity::Critical => Severity::High,
+        Severity::High => Severity::Medium,
+        Severity::Medium => Severity::Low,
+        Severity::Low => Severity::Info,
+        Severity::Info => Severity::Info,
+    }
+}
+
+fn upgrade_severity(
+    severity: &compliance_core::models::Severity,
+) -> compliance_core::models::Severity {
+    use compliance_core::models::Severity;
+    match severity {
+        Severity::Info => Severity::Low,
+        Severity::Low => Severity::Medium,
+        Severity::Medium => Severity::High,
+        Severity::High => Severity::Critical,
+        Severity::Critical => Severity::Critical,
+    }
+}
+
 #[derive(serde::Deserialize)]
 struct TriageResult {
-    #[serde(default)]
-    #[allow(dead_code)]
-    true_positive: bool,
+    #[serde(default = "default_action")]
+    action: String,
    #[serde(default)]
    confidence: f64,
+    #[serde(default)]
+    rationale: String,
    remediation: Option<String>,
 }
+
+fn default_action() -> String {
+    "confirm".to_string()
+}