From c0f9ba467c56c90db0f577af4f5f67437e8e2a04 Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar Date: Wed, 11 Mar 2026 19:43:14 +0100 Subject: [PATCH] feat: add code-awareness to pentest orchestrator Connect SAST findings, SBOM/CVE data, and code knowledge graph entry points to the LLM pentest orchestrator so it can prioritize attacks based on known vulnerabilities and code structure. Co-Authored-By: Claude Opus 4.6 --- compliance-agent/src/pentest/orchestrator.rs | 362 ++++++++++++++++--- 1 file changed, 317 insertions(+), 45 deletions(-) diff --git a/compliance-agent/src/pentest/orchestrator.rs b/compliance-agent/src/pentest/orchestrator.rs index b8ff183..8da2eef 100644 --- a/compliance-agent/src/pentest/orchestrator.rs +++ b/compliance-agent/src/pentest/orchestrator.rs @@ -1,9 +1,13 @@ use std::sync::Arc; +use futures_util::StreamExt; +use mongodb::bson::doc; use tokio::sync::broadcast; use compliance_core::models::dast::DastTarget; +use compliance_core::models::finding::{Finding, FindingStatus, Severity}; use compliance_core::models::pentest::*; +use compliance_core::models::sbom::SbomEntry; use compliance_core::traits::pentest_tool::PentestToolContext; use compliance_dast::ToolRegistry; @@ -50,8 +54,14 @@ impl PentestOrchestrator { .map(|oid| oid.to_hex()) .unwrap_or_default(); - // Build system prompt - let system_prompt = self.build_system_prompt(session, target); + // Gather code-awareness context from linked repo + let (sast_findings, sbom_entries, code_context) = + self.gather_repo_context(target).await; + + // Build system prompt with code context + let system_prompt = self + .build_system_prompt(session, target, &sast_findings, &sbom_entries, &code_context) + .await; // Build tool definitions for LLM let tool_defs: Vec = self @@ -85,13 +95,13 @@ impl PentestOrchestrator { let user_msg = PentestMessage::user(session_id.clone(), initial_message.to_string()); let _ = self.db.pentest_messages().insert_one(&user_msg).await; - // Build tool context + // Build tool context with real data let tool_context = PentestToolContext { target: target.clone(), session_id: session_id.clone(), - sast_findings: Vec::new(), - sbom_entries: Vec::new(), - code_context: Vec::new(), + sast_findings, + sbom_entries, + code_context, rate_limit: target.rate_limit, allow_destructive: target.allow_destructive, }; @@ -102,7 +112,6 @@ impl PentestOrchestrator { let mut total_successes = 0u32; for _iteration in 0..max_iterations { - // Call LLM with tools let response = self .llm .chat_with_tools(messages.clone(), &tool_defs, Some(0.2), Some(8192)) @@ -110,17 +119,13 @@ impl PentestOrchestrator { match response { LlmResponse::Content(content) => { - // Store assistant message let msg = PentestMessage::assistant(session_id.clone(), content.clone()); let _ = self.db.pentest_messages().insert_one(&msg).await; - - // Emit message event let _ = self.event_tx.send(PentestEvent::Message { content: content.clone(), }); - // Add to messages messages.push(ChatMessage { role: "assistant".to_string(), content: Some(content.clone()), @@ -128,7 +133,6 @@ impl PentestOrchestrator { tool_call_id: None, }); - // Check if the LLM considers itself done let done_indicators = [ "pentest complete", "testing complete", @@ -144,12 +148,9 @@ impl PentestOrchestrator { { break; } - - // If not done, break and wait for user input break; } LlmResponse::ToolCalls(tool_calls) => { - // Build the assistant message with tool_calls let tc_requests: Vec = tool_calls .iter() .map(|tc| ToolCallRequest { @@ -170,12 +171,10 @@ impl PentestOrchestrator { tool_call_id: None, }); - // Execute each tool call for tc in &tool_calls { total_tool_calls += 1; let node_id = uuid::Uuid::new_v4().to_string(); - // Create attack chain node let mut node = AttackChainNode::new( session_id.clone(), node_id.clone(), @@ -187,14 +186,12 @@ impl PentestOrchestrator { node.started_at = Some(chrono::Utc::now()); let _ = self.db.attack_chain_nodes().insert_one(&node).await; - // Emit tool start event let _ = self.event_tx.send(PentestEvent::ToolStart { node_id: node_id.clone(), tool_name: tc.name.clone(), input: tc.arguments.clone(), }); - // Execute the tool let result = if let Some(tool) = self.tool_registry.get(&tc.name) { match tool.execute(tc.arguments.clone(), &tool_context).await { Ok(result) => { @@ -202,13 +199,11 @@ impl PentestOrchestrator { let findings_count = result.findings.len() as u32; total_findings += findings_count; - // Store findings for mut finding in result.findings { finding.scan_run_id = session_id.clone(); finding.session_id = Some(session_id.clone()); let _ = self.db.dast_findings().insert_one(&finding).await; - let _ = self.event_tx.send(PentestEvent::Finding { finding_id: finding @@ -220,23 +215,21 @@ impl PentestOrchestrator { }); } - // Emit tool complete event let _ = self.event_tx.send(PentestEvent::ToolComplete { node_id: node_id.clone(), summary: result.summary.clone(), findings_count, }); - // Update attack chain node let _ = self .db .attack_chain_nodes() .update_one( - mongodb::bson::doc! { + doc! { "session_id": &session_id, "node_id": &node_id, }, - mongodb::bson::doc! { "$set": { + doc! { "$set": { "status": "completed", "tool_output": mongodb::bson::to_bson(&result.data) .unwrap_or(mongodb::bson::Bson::Null), @@ -253,22 +246,20 @@ impl PentestOrchestrator { .to_string() } Err(e) => { - // Update node as failed let _ = self .db .attack_chain_nodes() .update_one( - mongodb::bson::doc! { + doc! { "session_id": &session_id, "node_id": &node_id, }, - mongodb::bson::doc! { "$set": { + doc! { "$set": { "status": "failed", "completed_at": mongodb::bson::DateTime::now(), }}, ) .await; - format!("Tool execution failed: {e}") } } @@ -276,7 +267,6 @@ impl PentestOrchestrator { format!("Unknown tool: {}", tc.name) }; - // Add tool result to messages messages.push(ChatMessage { role: "tool".to_string(), content: Some(result), @@ -285,14 +275,13 @@ impl PentestOrchestrator { }); } - // Update session stats if let Some(sid) = session.id { let _ = self .db .pentest_sessions() .update_one( - mongodb::bson::doc! { "_id": sid }, - mongodb::bson::doc! { "$set": { + doc! { "_id": sid }, + doc! { "$set": { "tool_invocations": total_tool_calls as i64, "tool_successes": total_successes as i64, "findings_count": total_findings as i64, @@ -304,14 +293,13 @@ impl PentestOrchestrator { } } - // Mark session as completed if let Some(sid) = session.id { let _ = self .db .pentest_sessions() .update_one( - mongodb::bson::doc! { "_id": sid }, - mongodb::bson::doc! { "$set": { + doc! { "_id": sid }, + doc! { "$set": { "status": "completed", "completed_at": mongodb::bson::DateTime::now(), "tool_invocations": total_tool_calls as i64, @@ -332,7 +320,159 @@ impl PentestOrchestrator { Ok(()) } - fn build_system_prompt(&self, session: &PentestSession, target: &DastTarget) -> String { + // ── Code-Awareness: Gather context from linked repo ───────── + + /// Fetch SAST findings, SBOM entries (with CVEs), and code graph entry points + /// for the repo linked to this DAST target. + async fn gather_repo_context( + &self, + target: &DastTarget, + ) -> (Vec, Vec, Vec) { + let Some(repo_id) = &target.repo_id else { + return (Vec::new(), Vec::new(), Vec::new()); + }; + + let sast_findings = self.fetch_sast_findings(repo_id).await; + let sbom_entries = self.fetch_vulnerable_sbom(repo_id).await; + let code_context = self.fetch_code_context(repo_id, &sast_findings).await; + + tracing::info!( + repo_id, + sast_findings = sast_findings.len(), + vulnerable_deps = sbom_entries.len(), + code_hints = code_context.len(), + "Gathered code-awareness context for pentest" + ); + + (sast_findings, sbom_entries, code_context) + } + + /// Fetch open/triaged SAST findings for the repo (not false positives or resolved) + async fn fetch_sast_findings(&self, repo_id: &str) -> Vec { + let cursor = self + .db + .findings() + .find(doc! { + "repo_id": repo_id, + "status": { "$in": ["open", "triaged"] }, + }) + .sort(doc! { "severity": -1 }) + .limit(100) + .await; + + match cursor { + Ok(mut c) => { + let mut results = Vec::new(); + while let Some(Ok(f)) = c.next().await { + results.push(f); + } + results + } + Err(e) => { + tracing::warn!("Failed to fetch SAST findings for pentest: {e}"); + Vec::new() + } + } + } + + /// Fetch SBOM entries that have known vulnerabilities + async fn fetch_vulnerable_sbom(&self, repo_id: &str) -> Vec { + let cursor = self + .db + .sbom_entries() + .find(doc! { + "repo_id": repo_id, + "known_vulnerabilities": { "$exists": true, "$ne": [] }, + }) + .limit(50) + .await; + + match cursor { + Ok(mut c) => { + let mut results = Vec::new(); + while let Some(Ok(e)) = c.next().await { + results.push(e); + } + results + } + Err(e) => { + tracing::warn!("Failed to fetch vulnerable SBOM entries: {e}"); + Vec::new() + } + } + } + + /// Build CodeContextHint objects from the code knowledge graph. + /// Maps entry points to their source files and links SAST findings. + async fn fetch_code_context( + &self, + repo_id: &str, + sast_findings: &[Finding], + ) -> Vec { + // Get entry point nodes from the code graph + let cursor = self + .db + .graph_nodes() + .find(doc! { + "repo_id": repo_id, + "is_entry_point": true, + }) + .limit(50) + .await; + + let nodes = match cursor { + Ok(mut c) => { + let mut results = Vec::new(); + while let Some(Ok(n)) = c.next().await { + results.push(n); + } + results + } + Err(_) => return Vec::new(), + }; + + // Build hints by matching graph nodes to SAST findings by file path + nodes + .into_iter() + .map(|node| { + // Find SAST findings in the same file + let linked_vulns: Vec = sast_findings + .iter() + .filter(|f| { + f.file_path.as_deref() == Some(&node.file_path) + }) + .map(|f| { + format!( + "[{}] {}: {} (line {})", + f.severity, + f.scanner, + f.title, + f.line_number.unwrap_or(0) + ) + }) + .collect(); + + CodeContextHint { + endpoint_pattern: node.qualified_name.clone(), + handler_function: node.name.clone(), + file_path: node.file_path.clone(), + code_snippet: String::new(), // Could fetch from embeddings + known_vulnerabilities: linked_vulns, + } + }) + .collect() + } + + // ── System Prompt Builder ─────────────────────────────────── + + async fn build_system_prompt( + &self, + session: &PentestSession, + target: &DastTarget, + sast_findings: &[Finding], + sbom_entries: &[SbomEntry], + code_context: &[CodeContextHint], + ) -> String { let tool_names = self.tool_registry.list_names().join(", "); let strategy_guidance = match session.strategy { PentestStrategy::Quick => { @@ -352,6 +492,121 @@ impl PentestOrchestrator { } }; + // Build SAST findings section + let sast_section = if sast_findings.is_empty() { + String::from("No SAST findings available for this target.") + } else { + let critical = sast_findings + .iter() + .filter(|f| f.severity == Severity::Critical) + .count(); + let high = sast_findings + .iter() + .filter(|f| f.severity == Severity::High) + .count(); + + let mut section = format!( + "{} open findings ({} critical, {} high):\n", + sast_findings.len(), + critical, + high + ); + + // List the most important findings (critical/high first, up to 20) + for f in sast_findings.iter().take(20) { + let file_info = f + .file_path + .as_ref() + .map(|p| { + format!( + " in {}:{}", + p, + f.line_number.unwrap_or(0) + ) + }) + .unwrap_or_default(); + let status_note = match f.status { + FindingStatus::Triaged => " [TRIAGED]", + _ => "", + }; + section.push_str(&format!( + "- [{sev}] {title}{file}{status}\n", + sev = f.severity, + title = f.title, + file = file_info, + status = status_note, + )); + if let Some(cwe) = &f.cwe { + section.push_str(&format!(" CWE: {cwe}\n")); + } + } + if sast_findings.len() > 20 { + section.push_str(&format!( + "... and {} more findings\n", + sast_findings.len() - 20 + )); + } + section + }; + + // Build SBOM/CVE section + let sbom_section = if sbom_entries.is_empty() { + String::from("No vulnerable dependencies identified.") + } else { + let mut section = format!( + "{} dependencies with known vulnerabilities:\n", + sbom_entries.len() + ); + for entry in sbom_entries.iter().take(15) { + let cve_ids: Vec<&str> = entry + .known_vulnerabilities + .iter() + .map(|v| v.id.as_str()) + .collect(); + section.push_str(&format!( + "- {} {} ({}): {}\n", + entry.name, + entry.version, + entry.package_manager, + cve_ids.join(", ") + )); + } + if sbom_entries.len() > 15 { + section.push_str(&format!( + "... and {} more vulnerable dependencies\n", + sbom_entries.len() - 15 + )); + } + section + }; + + // Build code context section + let code_section = if code_context.is_empty() { + String::from("No code knowledge graph available for this target.") + } else { + let with_vulns = code_context + .iter() + .filter(|c| !c.known_vulnerabilities.is_empty()) + .count(); + + let mut section = format!( + "{} entry points identified ({} with linked SAST findings):\n", + code_context.len(), + with_vulns + ); + + for hint in code_context.iter().take(20) { + section.push_str(&format!( + "- {} ({})\n", + hint.endpoint_pattern, hint.file_path + )); + for vuln in &hint.known_vulnerabilities { + section.push_str(&format!(" SAST: {vuln}\n")); + } + } + section + }; + format!( r#"You are an expert penetration tester conducting an authorized security assessment. @@ -361,33 +616,50 @@ impl PentestOrchestrator { - **Type**: {target_type} - **Rate Limit**: {rate_limit} req/s - **Destructive Tests Allowed**: {allow_destructive} +- **Linked Repository**: {repo_linked} ## Strategy {strategy_guidance} +## SAST Findings (Static Analysis) +{sast_section} + +## Vulnerable Dependencies (SBOM) +{sbom_section} + +## Code Entry Points (Knowledge Graph) +{code_section} + ## Available Tools {tool_names} ## Instructions -1. Start by running reconnaissance and crawling to understand the target. -2. Based on what you discover, select appropriate vulnerability scanning tools. -3. For each tool invocation, provide the discovered endpoints and parameters. -4. Analyze tool results and chain findings — if you find one vulnerability, explore whether it enables others. -5. When testing is complete, provide a summary of all findings with severity and remediation recommendations. -6. Always explain your reasoning before invoking each tool. -7. Focus on actionable findings with evidence. Avoid false positives. -8. When you have completed all relevant testing, say "Testing complete" followed by a final summary. +1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies. +2. Run the OpenAPI parser to discover API endpoints from specs. +3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS. +4. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code. +5. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability. +6. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application. +7. Test rate limiting on critical endpoints (login, API). +8. Check for console.log leakage in frontend JavaScript. +9. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain. +10. When testing is complete, provide a structured summary with severity and remediation. +11. Always explain your reasoning before invoking each tool. +12. When done, say "Testing complete" followed by a final summary. ## Important - This is an authorized penetration test. All testing is permitted within the target scope. - Respect the rate limit of {rate_limit} requests per second. - Only use destructive tests if explicitly allowed ({allow_destructive}). +- Use SAST findings to guide your testing — they tell you WHERE in the code vulnerabilities exist. +- Use SBOM data to understand what technologies and versions the target runs. "#, target_name = target.name, base_url = target.base_url, target_type = target.target_type, rate_limit = target.rate_limit, allow_destructive = target.allow_destructive, + repo_linked = target.repo_id.as_deref().unwrap_or("None"), ) } }