feat: pentest onboarding — streaming, browser automation, reports, user cleanup (#16)
All checks were successful
CI / Check (push) Has been skipped
CI / Detect Changes (push) Successful in 7s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Successful in 2s
CI / Deploy MCP (push) Successful in 2s

Complete pentest feature overhaul: SSE streaming, session-persistent browser tool (CDP), AES-256 credential encryption, auto-screenshots in reports, code-level remediation correlation, SAST triage chunking, context window optimization, test user cleanup (Keycloak/Auth0/Okta), wizard dropdowns, attack chain improvements, architecture docs with Mermaid diagrams.

Co-authored-by: Sharang Parnerkar <parnerkarsharang@gmail.com>
Reviewed-on: #16
This commit was merged in pull request #16.
This commit is contained in:
2026-03-17 20:32:20 +00:00
parent 11e1c5f438
commit c461faa2fb
57 changed files with 8844 additions and 2423 deletions

View File

@@ -5,7 +5,10 @@ use compliance_core::models::{Finding, FindingStatus};
use crate::llm::LlmClient;
use crate::pipeline::orchestrator::GraphContext;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
/// Maximum number of findings to include in a single LLM triage call.
const TRIAGE_CHUNK_SIZE: usize = 30;
const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze each of the following security findings with its code context and determine the appropriate action.
Actions:
- "confirm": The finding is a true positive at the reported severity. Keep as-is.
@@ -19,8 +22,8 @@ Consider:
- Is the finding actionable by a developer?
- Would a real attacker be able to exploit this?
Respond in JSON format:
{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;
Respond with a JSON array, one entry per finding in the same order they were presented:
[{"id": "<fingerprint>", "action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}, ...]"#;
pub async fn triage_findings(
llm: &Arc<LlmClient>,
@@ -29,60 +32,76 @@ pub async fn triage_findings(
) -> usize {
let mut passed = 0;
for finding in findings.iter_mut() {
let file_classification = classify_file_path(finding.file_path.as_deref());
// Process findings in chunks to avoid overflowing the LLM context window.
for chunk_start in (0..findings.len()).step_by(TRIAGE_CHUNK_SIZE) {
let chunk_end = (chunk_start + TRIAGE_CHUNK_SIZE).min(findings.len());
let chunk = &mut findings[chunk_start..chunk_end];
let mut user_prompt = format!(
"Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
);
// Build a combined prompt for the entire chunk.
let mut user_prompt = String::new();
let mut file_classifications: Vec<String> = Vec::new();
for (i, finding) in chunk.iter().enumerate() {
let file_classification = classify_file_path(finding.file_path.as_deref());
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
user_prompt.push_str(&format!(
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
"\n--- Finding {} (id: {}) ---\nScanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
i + 1,
finding.fingerprint,
finding.scanner,
finding.rule_id.as_deref().unwrap_or("N/A"),
finding.severity,
finding.title,
finding.description,
finding.file_path.as_deref().unwrap_or("N/A"),
finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
finding.code_snippet.as_deref().unwrap_or("N/A"),
file_classification,
));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
.impacts
.iter()
.find(|i| i.finding_id == finding.fingerprint)
{
// Enrich with surrounding code context if possible
if let Some(context) = read_surrounding_context(finding) {
user_prompt.push_str(&format!(
"\n\n--- Code Graph Context ---\n\
Blast radius: {} nodes affected\n\
Entry points affected: {}\n\
Direct callers: {}\n\
Communities affected: {}\n\
Call chains: {}",
impact.blast_radius,
if impact.affected_entry_points.is_empty() {
"none".to_string()
} else {
impact.affected_entry_points.join(", ")
},
if impact.direct_callers.is_empty() {
"none".to_string()
} else {
impact.direct_callers.join(", ")
},
impact.affected_communities.len(),
impact.call_chains.len(),
"\n\n--- Surrounding Code (50 lines) ---\n{context}"
));
}
// Enrich with graph context if available
if let Some(ctx) = graph_context {
if let Some(impact) = ctx
.impacts
.iter()
.find(|im| im.finding_id == finding.fingerprint)
{
user_prompt.push_str(&format!(
"\n\n--- Code Graph Context ---\n\
Blast radius: {} nodes affected\n\
Entry points affected: {}\n\
Direct callers: {}\n\
Communities affected: {}\n\
Call chains: {}",
impact.blast_radius,
if impact.affected_entry_points.is_empty() {
"none".to_string()
} else {
impact.affected_entry_points.join(", ")
},
if impact.direct_callers.is_empty() {
"none".to_string()
} else {
impact.direct_callers.join(", ")
},
impact.affected_communities.len(),
impact.call_chains.len(),
));
}
}
user_prompt.push('\n');
file_classifications.push(file_classification);
}
// Send the batch to the LLM.
match llm
.chat(TRIAGE_SYSTEM_PROMPT, &user_prompt, Some(0.1))
.await
@@ -98,58 +117,77 @@ pub async fn triage_findings(
} else {
cleaned
};
if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
// Apply file-path confidence adjustment
let adjusted_confidence =
adjust_confidence(result.confidence, &file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale);
if let Some(remediation) = result.remediation {
finding.remediation = Some(remediation);
}
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
// Downgrade severity by one level
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
match serde_json::from_str::<Vec<TriageResult>>(cleaned) {
Ok(results) => {
for (idx, finding) in chunk.iter_mut().enumerate() {
// Match result by position; fall back to keeping the finding.
let Some(result) = results.get(idx) else {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
continue;
};
let file_classification = file_classifications
.get(idx)
.map(|s| s.as_str())
.unwrap_or("unknown");
let adjusted_confidence =
adjust_confidence(result.confidence, file_classification);
finding.confidence = Some(adjusted_confidence);
finding.triage_action = Some(result.action.clone());
finding.triage_rationale = Some(result.rationale.clone());
if let Some(ref remediation) = result.remediation {
finding.remediation = Some(remediation.clone());
}
match result.action.as_str() {
"dismiss" => {
finding.status = FindingStatus::FalsePositive;
}
"downgrade" => {
finding.severity = downgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
"upgrade" => {
finding.severity = upgrade_severity(&finding.severity);
finding.status = FindingStatus::Triaged;
passed += 1;
}
_ => {
// "confirm" or unknown — keep as-is
if adjusted_confidence >= 3.0 {
finding.status = FindingStatus::Triaged;
passed += 1;
} else {
finding.status = FindingStatus::FalsePositive;
}
}
}
}
}
} else {
// Parse failure — keep the finding
finding.status = FindingStatus::Triaged;
passed += 1;
tracing::warn!(
"Failed to parse triage response for {}: {response}",
finding.fingerprint
);
Err(_) => {
// Batch parse failure — keep all findings in the chunk.
tracing::warn!(
"Failed to parse batch triage response for chunk starting at {chunk_start}: {cleaned}"
);
for finding in chunk.iter_mut() {
finding.status = FindingStatus::Triaged;
passed += 1;
}
}
}
}
Err(e) => {
// On LLM error, keep the finding
tracing::warn!("LLM triage failed for {}: {e}", finding.fingerprint);
finding.status = FindingStatus::Triaged;
passed += 1;
// On LLM error, keep all findings in the chunk.
tracing::warn!("LLM batch triage failed for chunk starting at {chunk_start}: {e}");
for finding in chunk.iter_mut() {
finding.status = FindingStatus::Triaged;
passed += 1;
}
}
}
}
@@ -266,6 +304,10 @@ fn upgrade_severity(
#[derive(serde::Deserialize)]
struct TriageResult {
/// Finding fingerprint echoed back by the LLM (optional).
#[serde(default)]
#[allow(dead_code)]
id: String,
#[serde(default = "default_action")]
action: String,
#[serde(default)]