feat: pentest onboarding — streaming, browser automation, reports, user cleanup (#16)

Complete pentest feature overhaul: SSE streaming, session-persistent browser tool (CDP), AES-256 credential encryption, auto-screenshots in reports, code-level remediation correlation, SAST triage chunking, context window optimization, test user cleanup (Keycloak/Auth0/Okta), wizard dropdowns, attack chain improvements, architecture docs with Mermaid diagrams. Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com> Reviewed-on: #16
2026-03-17 20:32:20 +00:00
parent 11e1c5f438
commit c461faa2fb
57 changed files with 8844 additions and 2423 deletions
@@ -5,7 +5,10 @@ use compliance_core::models::{Finding, FindingStatus};
 use crate::llm::LlmClient;
 use crate::pipeline::orchestrator::GraphContext;

-const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
+/// Maximum number of findings to include in a single LLM triage call.
+const TRIAGE_CHUNK_SIZE: usize = 30;
+
+const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze each of the following security findings with its code context and determine the appropriate action.

 Actions:
 - "confirm": The finding is a true positive at the reported severity. Keep as-is.
@@ -19,8 +22,8 @@ Consider:
 - Is the finding actionable by a developer?
 - Would a real attacker be able to exploit this?

-Respond in JSON format:
-{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
+Respond with a JSON array, one entry per finding in the same order they were presented:
+[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}, ...]"#;

 pub async fn triage_findings(
    llm: &Arc<LlmClient>,
@@ -29,60 +32,76 @@ pub async fn triage_findings(
 ) -> usize {
    let mut passed = 0;

-    for finding in findings.iter_mut() {
-        let file_classification = classify_file_path(finding.file_path.as_deref());
+    // Process findings in chunks to avoid overflowing the LLM context window.
+    for chunk_start in (0..findings.len()).step_by(TRIAGE_CHUNK_SIZE) {
+        let chunk_end = (chunk_start + TRIAGE_CHUNK_SIZE).min(findings.len());
+        let chunk = &mut findings[chunk_start..chunk_end];

-        let mut user_prompt = format!(
-            "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
-            finding.scanner,
-            finding.rule_id.as_deref().unwrap_or("N/A"),
-            finding.severity,
-            finding.title,
-            finding.description,
-            finding.file_path.as_deref().unwrap_or("N/A"),
-            finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
-            finding.code_snippet.as_deref().unwrap_or("N/A"),
-            file_classification,
-        );
+        // Build a combined prompt for the entire chunk.
+        let mut user_prompt = String::new();
+        let mut file_classifications: Vec<String> = Vec::new();
+
+        for (i, finding) in chunk.iter().enumerate() {
+            let file_classification = classify_file_path(finding.file_path.as_deref());

-        // Enrich with surrounding code context if possible
-        if let Some(context) = read_surrounding_context(finding) {
            user_prompt.push_str(&format!(
-                "\n\n--- Surrounding Code (50 lines) ---\n{context}"
+                "\n--- Finding {} (id: {}) ---\nScanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
+                i + 1,
+                finding.fingerprint,
+                finding.scanner,
+                finding.rule_id.as_deref().unwrap_or("N/A"),
+                finding.severity,
+                finding.title,
+                finding.description,
+                finding.file_path.as_deref().unwrap_or("N/A"),
+                finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
+                finding.code_snippet.as_deref().unwrap_or("N/A"),
+                file_classification,
            ));
-        }

-        // Enrich with graph context if available
-        if let Some(ctx) = graph_context {
-            if let Some(impact) = ctx
-                .impacts
-                .iter()
-                .find(|i| i.finding_id == finding.fingerprint)
-            {
+            // Enrich with surrounding code context if possible
+            if let Some(context) = read_surrounding_context(finding) {
                user_prompt.push_str(&format!(
-                    "\n\n--- Code Graph Context ---\n\
-                     Blast radius: {} nodes affected\n\
-                     Entry points affected: {}\n\
-                     Direct callers: {}\n\
-                     Communities affected: {}\n\
-                     Call chains: {}",
-                    impact.blast_radius,
-                    if impact.affected_entry_points.is_empty() {
-                        "none".to_string()
-                    } else {
-                        impact.affected_entry_points.join(", ")
-                    },
-                    if impact.direct_callers.is_empty() {
-                        "none".to_string()
-                    } else {
-                        impact.direct_callers.join(", ")
-                    },
-                    impact.affected_communities.len(),
-                    impact.call_chains.len(),
+                    "\n\n--- Surrounding Code (50 lines) ---\n{context}"
                ));
            }
+
+            // Enrich with graph context if available
+            if let Some(ctx) = graph_context {
+                if let Some(impact) = ctx
+                    .impacts
+                    .iter()
+                    .find(|im| im.finding_id == finding.fingerprint)
+                {
+                    user_prompt.push_str(&format!(
+                        "\n\n--- Code Graph Context ---\n\
+                         Blast radius: {} nodes affected\n\
+                         Entry points affected: {}\n\
+                         Direct callers: {}\n\
+                         Communities affected: {}\n\
+                         Call chains: {}",
+                        impact.blast_radius,
+                        if impact.affected_entry_points.is_empty() {
+                            "none".to_string()
+                        } else {
+                            impact.affected_entry_points.join(", ")
+                        },
+                        if impact.direct_callers.is_empty() {
+                            "none".to_string()
+                        } else {
+                            impact.direct_callers.join(", ")
+                        },
+                        impact.affected_communities.len(),
+                        impact.call_chains.len(),
+                    ));
+                }
+            }
+
+            user_prompt.push('\n');
+            file_classifications.push(file_classification);
        }

+        // Send the batch to the LLM.
        match llm
            .chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1))
            .await
@@ -98,58 +117,77 @@ pub async fn triage_findings(
                } else {
                    cleaned
                };
-                if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
-                    // Apply file-path confidence adjustment
-                    let adjusted_confidence =
-                        adjust_confidence(result.confidence, &file_classification);
-                    finding.confidence = Some(adjusted_confidence);
-                    finding.triage_action = Some(result.action.clone());
-                    finding.triage_rationale = Some(result.rationale);

-                    if let Some(remediation) = result.remediation {
-                        finding.remediation = Some(remediation);
-                    }
-
-                    match result.action.as_str() {
-                        "dismiss" => {
-                            finding.status = FindingStatus::FalsePositive;
-                        }
-                        "downgrade" => {
-                            // Downgrade severity by one level
-                            finding.severity = downgrade_severity(&finding.severity);
-                            finding.status = FindingStatus::Triaged;
-                            passed += 1;
-                        }
-                        "upgrade" => {
-                            finding.severity = upgrade_severity(&finding.severity);
-                            finding.status = FindingStatus::Triaged;
-                            passed += 1;
-                        }
-                        _ => {
-                            // "confirm" or unknown — keep as-is
-                            if adjusted_confidence >= 3.0 {
+                match serde_json::from_str::<Vec<TriageResult>>(cleaned) {
+                    Ok(results) => {
+                        for (idx, finding) in chunk.iter_mut().enumerate() {
+                            // Match result by position; fall back to keeping the finding.
+                            let Some(result) = results.get(idx) else {
                                finding.status = FindingStatus::Triaged;
                                passed += 1;
-                            } else {
-                                finding.status = FindingStatus::FalsePositive;
+                                continue;
+                            };
+
+                            let file_classification = file_classifications
+                                .get(idx)
+                                .map(|s| s.as_str())
+                                .unwrap_or("unknown");
+
+                            let adjusted_confidence =
+                                adjust_confidence(result.confidence, file_classification);
+                            finding.confidence = Some(adjusted_confidence);
+                            finding.triage_action = Some(result.action.clone());
+                            finding.triage_rationale = Some(result.rationale.clone());
+
+                            if let Some(ref remediation) = result.remediation {
+                                finding.remediation = Some(remediation.clone());
+                            }
+
+                            match result.action.as_str() {
+                                "dismiss" => {
+                                    finding.status = FindingStatus::FalsePositive;
+                                }
+                                "downgrade" => {
+                                    finding.severity = downgrade_severity(&finding.severity);
+                                    finding.status = FindingStatus::Triaged;
+                                    passed += 1;
+                                }
+                                "upgrade" => {
+                                    finding.severity = upgrade_severity(&finding.severity);
+                                    finding.status = FindingStatus::Triaged;
+                                    passed += 1;
+                                }
+                                _ => {
+                                    // "confirm" or unknown — keep as-is
+                                    if adjusted_confidence >= 3.0 {
+                                        finding.status = FindingStatus::Triaged;
+                                        passed += 1;
+                                    } else {
+                                        finding.status = FindingStatus::FalsePositive;
+                                    }
+                                }
                            }
                        }
                    }
-                } else {
-                    // Parse failure — keep the finding
-                    finding.status = FindingStatus::Triaged;
-                    passed += 1;
-                    tracing::warn!(
-                        "Failed to parse triage response for {}: {response}",
-                        finding.fingerprint
-                    );
+                    Err(_) => {
+                        // Batch parse failure — keep all findings in the chunk.
+                        tracing::warn!(
+                            "Failed to parse batch triage response for chunk starting at {chunk_start}: {cleaned}"
+                        );
+                        for finding in chunk.iter_mut() {
+                            finding.status = FindingStatus::Triaged;
+                            passed += 1;
+                        }
+                    }
                }
            }
            Err(e) => {
-                // On LLM error, keep the finding
-                tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
-                finding.status = FindingStatus::Triaged;
-                passed += 1;
+                // On LLM error, keep all findings in the chunk.
+                tracing::warn!("LLM batch triage failed for chunk starting at {chunk_start}: {e}");
+                for finding in chunk.iter_mut() {
+                    finding.status = FindingStatus::Triaged;
+                    passed += 1;
+                }
            }
        }
    }
@@ -266,6 +304,10 @@ fn upgrade_severity(

 #[derive(serde::Deserialize)]
 struct TriageResult {
+    /// Finding fingerprint echoed back by the LLM (optional).
+    #[serde(default)]
+    #[allow(dead_code)]
+    id: String,
    #[serde(default = "default_action")]
    action: String,
    #[serde(default)]