From c0f9ba467c56c90db0f577af4f5f67437e8e2a04 Mon Sep 17 00:00:00 2001
From: Sharang Parnerkar <parnerkarsharang@gmail.com>
Date: Wed, 11 Mar 2026 19:43:14 +0100
Subject: [PATCH] feat: add code-awareness to pentest orchestrator

Connect SAST findings, SBOM/CVE data, and code knowledge graph entry
points to the LLM pentest orchestrator so it can prioritize attacks
based on known vulnerabilities and code structure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 compliance-agent/src/pentest/orchestrator.rs | 362 ++++++++++++++++---
 1 file changed, 317 insertions(+), 45 deletions(-)
diff --git a/compliance-agent/src/pentest/orchestrator.rs b/compliance-agent/src/pentest/orchestrator.rs
index b8ff183..8da2eef 100644
--- a/compliance-agent/src/pentest/orchestrator.rs
+++ b/compliance-agent/src/pentest/orchestrator.rs
@@ -1,9 +1,13 @@
 use std::sync::Arc;
 
+use futures_util::StreamExt;
+use mongodb::bson::doc;
 use tokio::sync::broadcast;
 
 use compliance_core::models::dast::DastTarget;
+use compliance_core::models::finding::{Finding, FindingStatus, Severity};
 use compliance_core::models::pentest::*;
+use compliance_core::models::sbom::SbomEntry;
 use compliance_core::traits::pentest_tool::PentestToolContext;
 use compliance_dast::ToolRegistry;
 
@@ -50,8 +54,14 @@ impl PentestOrchestrator {
             .map(|oid| oid.to_hex())
             .unwrap_or_default();
 
-        // Build system prompt
-        let system_prompt = self.build_system_prompt(session, target);
+        // Gather code-awareness context from linked repo
+        let (sast_findings, sbom_entries, code_context) =
+            self.gather_repo_context(target).await;
+
+        // Build system prompt with code context
+        let system_prompt = self
+            .build_system_prompt(session, target, &sast_findings, &sbom_entries, &code_context)
+            .await;
 
         // Build tool definitions for LLM
         let tool_defs: Vec<ToolDefinition> = self
@@ -85,13 +95,13 @@ impl PentestOrchestrator {
         let user_msg = PentestMessage::user(session_id.clone(), initial_message.to_string());
         let _ = self.db.pentest_messages().insert_one(&user_msg).await;
 
-        // Build tool context
+        // Build tool context with real data
         let tool_context = PentestToolContext {
             target: target.clone(),
             session_id: session_id.clone(),
-            sast_findings: Vec::new(),
-            sbom_entries: Vec::new(),
-            code_context: Vec::new(),
+            sast_findings,
+            sbom_entries,
+            code_context,
             rate_limit: target.rate_limit,
             allow_destructive: target.allow_destructive,
         };
@@ -102,7 +112,6 @@ impl PentestOrchestrator {
         let mut total_successes = 0u32;
 
         for _iteration in 0..max_iterations {
-            // Call LLM with tools
             let response = self
                 .llm
                 .chat_with_tools(messages.clone(), &tool_defs, Some(0.2), Some(8192))
@@ -110,17 +119,13 @@ impl PentestOrchestrator {
 
             match response {
                 LlmResponse::Content(content) => {
-                    // Store assistant message
                     let msg =
                         PentestMessage::assistant(session_id.clone(), content.clone());
                     let _ = self.db.pentest_messages().insert_one(&msg).await;
-
-                    // Emit message event
                     let _ = self.event_tx.send(PentestEvent::Message {
                         content: content.clone(),
                     });
 
-                    // Add to messages
                     messages.push(ChatMessage {
                         role: "assistant".to_string(),
                         content: Some(content.clone()),
@@ -128,7 +133,6 @@ impl PentestOrchestrator {
                         tool_call_id: None,
                     });
 
-                    // Check if the LLM considers itself done
                     let done_indicators = [
                         "pentest complete",
                         "testing complete",
@@ -144,12 +148,9 @@ impl PentestOrchestrator {
                     {
                         break;
                     }
-
-                    // If not done, break and wait for user input
                     break;
                 }
                 LlmResponse::ToolCalls(tool_calls) => {
-                    // Build the assistant message with tool_calls
                     let tc_requests: Vec<ToolCallRequest> = tool_calls
                         .iter()
                         .map(|tc| ToolCallRequest {
@@ -170,12 +171,10 @@ impl PentestOrchestrator {
                         tool_call_id: None,
                     });
 
-                    // Execute each tool call
                     for tc in &tool_calls {
                         total_tool_calls += 1;
                         let node_id = uuid::Uuid::new_v4().to_string();
 
-                        // Create attack chain node
                         let mut node = AttackChainNode::new(
                             session_id.clone(),
                             node_id.clone(),
@@ -187,14 +186,12 @@ impl PentestOrchestrator {
                         node.started_at = Some(chrono::Utc::now());
                         let _ = self.db.attack_chain_nodes().insert_one(&node).await;
 
-                        // Emit tool start event
                         let _ = self.event_tx.send(PentestEvent::ToolStart {
                             node_id: node_id.clone(),
                             tool_name: tc.name.clone(),
                             input: tc.arguments.clone(),
                         });
 
-                        // Execute the tool
                         let result = if let Some(tool) = self.tool_registry.get(&tc.name) {
                             match tool.execute(tc.arguments.clone(), &tool_context).await {
                                 Ok(result) => {
@@ -202,13 +199,11 @@ impl PentestOrchestrator {
                                     let findings_count = result.findings.len() as u32;
                                     total_findings += findings_count;
 
-                                    // Store findings
                                     for mut finding in result.findings {
                                         finding.scan_run_id = session_id.clone();
                                         finding.session_id = Some(session_id.clone());
                                         let _ =
                                             self.db.dast_findings().insert_one(&finding).await;
-
                                         let _ =
                                             self.event_tx.send(PentestEvent::Finding {
                                                 finding_id: finding
@@ -220,23 +215,21 @@ impl PentestOrchestrator {
                                             });
                                     }
 
-                                    // Emit tool complete event
                                     let _ = self.event_tx.send(PentestEvent::ToolComplete {
                                         node_id: node_id.clone(),
                                         summary: result.summary.clone(),
                                         findings_count,
                                     });
 
-                                    // Update attack chain node
                                     let _ = self
                                         .db
                                         .attack_chain_nodes()
                                         .update_one(
-                                            mongodb::bson::doc! {
+                                            doc! {
                                                 "session_id": &session_id,
                                                 "node_id": &node_id,
                                             },
-                                            mongodb::bson::doc! { "$set": {
+                                            doc! { "$set": {
                                                 "status": "completed",
                                                 "tool_output": mongodb::bson::to_bson(&result.data)
                                                     .unwrap_or(mongodb::bson::Bson::Null),
@@ -253,22 +246,20 @@ impl PentestOrchestrator {
                                     .to_string()
                                 }
                                 Err(e) => {
-                                    // Update node as failed
                                     let _ = self
                                         .db
                                         .attack_chain_nodes()
                                         .update_one(
-                                            mongodb::bson::doc! {
+                                            doc! {
                                                 "session_id": &session_id,
                                                 "node_id": &node_id,
                                             },
-                                            mongodb::bson::doc! { "$set": {
+                                            doc! { "$set": {
                                                 "status": "failed",
                                                 "completed_at": mongodb::bson::DateTime::now(),
                                             }},
                                         )
                                         .await;
-
                                     format!("Tool execution failed: {e}")
                                 }
                             }
@@ -276,7 +267,6 @@ impl PentestOrchestrator {
                             format!("Unknown tool: {}", tc.name)
                         };
 
-                        // Add tool result to messages
                         messages.push(ChatMessage {
                             role: "tool".to_string(),
                             content: Some(result),
@@ -285,14 +275,13 @@ impl PentestOrchestrator {
                         });
                     }
 
-                    // Update session stats
                     if let Some(sid) = session.id {
                         let _ = self
                             .db
                             .pentest_sessions()
                             .update_one(
-                                mongodb::bson::doc! { "_id": sid },
-                                mongodb::bson::doc! { "$set": {
+                                doc! { "_id": sid },
+                                doc! { "$set": {
                                     "tool_invocations": total_tool_calls as i64,
                                     "tool_successes": total_successes as i64,
                                     "findings_count": total_findings as i64,
@@ -304,14 +293,13 @@ impl PentestOrchestrator {
             }
         }
 
-        // Mark session as completed
         if let Some(sid) = session.id {
             let _ = self
                 .db
                 .pentest_sessions()
                 .update_one(
-                    mongodb::bson::doc! { "_id": sid },
-                    mongodb::bson::doc! { "$set": {
+                    doc! { "_id": sid },
+                    doc! { "$set": {
                         "status": "completed",
                         "completed_at": mongodb::bson::DateTime::now(),
                         "tool_invocations": total_tool_calls as i64,
@@ -332,7 +320,159 @@ impl PentestOrchestrator {
         Ok(())
     }
 
-    fn build_system_prompt(&self, session: &PentestSession, target: &DastTarget) -> String {
+    // ── Code-Awareness: Gather context from linked repo ─────────
+
+    /// Fetch SAST findings, SBOM entries (with CVEs), and code graph entry points
+    /// for the repo linked to this DAST target.
+    async fn gather_repo_context(
+        &self,
+        target: &DastTarget,
+    ) -> (Vec<Finding>, Vec<SbomEntry>, Vec<CodeContextHint>) {
+        let Some(repo_id) = &target.repo_id else {
+            return (Vec::new(), Vec::new(), Vec::new());
+        };
+
+        let sast_findings = self.fetch_sast_findings(repo_id).await;
+        let sbom_entries = self.fetch_vulnerable_sbom(repo_id).await;
+        let code_context = self.fetch_code_context(repo_id, &sast_findings).await;
+
+        tracing::info!(
+            repo_id,
+            sast_findings = sast_findings.len(),
+            vulnerable_deps = sbom_entries.len(),
+            code_hints = code_context.len(),
+            "Gathered code-awareness context for pentest"
+        );
+
+        (sast_findings, sbom_entries, code_context)
+    }
+
+    /// Fetch open/triaged SAST findings for the repo (not false positives or resolved)
+    async fn fetch_sast_findings(&self, repo_id: &str) -> Vec<Finding> {
+        let cursor = self
+            .db
+            .findings()
+            .find(doc! {
+                "repo_id": repo_id,
+                "status": { "$in": ["open", "triaged"] },
+            })
+            .sort(doc! { "severity": -1 })
+            .limit(100)
+            .await;
+
+        match cursor {
+            Ok(mut c) => {
+                let mut results = Vec::new();
+                while let Some(Ok(f)) = c.next().await {
+                    results.push(f);
+                }
+                results
+            }
+            Err(e) => {
+                tracing::warn!("Failed to fetch SAST findings for pentest: {e}");
+                Vec::new()
+            }
+        }
+    }
+
+    /// Fetch SBOM entries that have known vulnerabilities
+    async fn fetch_vulnerable_sbom(&self, repo_id: &str) -> Vec<SbomEntry> {
+        let cursor = self
+            .db
+            .sbom_entries()
+            .find(doc! {
+                "repo_id": repo_id,
+                "known_vulnerabilities": { "$exists": true, "$ne": [] },
+            })
+            .limit(50)
+            .await;
+
+        match cursor {
+            Ok(mut c) => {
+                let mut results = Vec::new();
+                while let Some(Ok(e)) = c.next().await {
+                    results.push(e);
+                }
+                results
+            }
+            Err(e) => {
+                tracing::warn!("Failed to fetch vulnerable SBOM entries: {e}");
+                Vec::new()
+            }
+        }
+    }
+
+    /// Build CodeContextHint objects from the code knowledge graph.
+    /// Maps entry points to their source files and links SAST findings.
+    async fn fetch_code_context(
+        &self,
+        repo_id: &str,
+        sast_findings: &[Finding],
+    ) -> Vec<CodeContextHint> {
+        // Get entry point nodes from the code graph
+        let cursor = self
+            .db
+            .graph_nodes()
+            .find(doc! {
+                "repo_id": repo_id,
+                "is_entry_point": true,
+            })
+            .limit(50)
+            .await;
+
+        let nodes = match cursor {
+            Ok(mut c) => {
+                let mut results = Vec::new();
+                while let Some(Ok(n)) = c.next().await {
+                    results.push(n);
+                }
+                results
+            }
+            Err(_) => return Vec::new(),
+        };
+
+        // Build hints by matching graph nodes to SAST findings by file path
+        nodes
+            .into_iter()
+            .map(|node| {
+                // Find SAST findings in the same file
+                let linked_vulns: Vec<String> = sast_findings
+                    .iter()
+                    .filter(|f| {
+                        f.file_path.as_deref() == Some(&node.file_path)
+                    })
+                    .map(|f| {
+                        format!(
+                            "[{}] {}: {} (line {})",
+                            f.severity,
+                            f.scanner,
+                            f.title,
+                            f.line_number.unwrap_or(0)
+                        )
+                    })
+                    .collect();
+
+                CodeContextHint {
+                    endpoint_pattern: node.qualified_name.clone(),
+                    handler_function: node.name.clone(),
+                    file_path: node.file_path.clone(),
+                    code_snippet: String::new(), // Could fetch from embeddings
+                    known_vulnerabilities: linked_vulns,
+                }
+            })
+            .collect()
+    }
+
+    // ── System Prompt Builder ───────────────────────────────────
+
+    async fn build_system_prompt(
+        &self,
+        session: &PentestSession,
+        target: &DastTarget,
+        sast_findings: &[Finding],
+        sbom_entries: &[SbomEntry],
+        code_context: &[CodeContextHint],
+    ) -> String {
         let tool_names = self.tool_registry.list_names().join(", ");
         let strategy_guidance = match session.strategy {
             PentestStrategy::Quick => {
@@ -352,6 +492,121 @@ impl PentestOrchestrator {
             }
         };
 
+        // Build SAST findings section
+        let sast_section = if sast_findings.is_empty() {
+            String::from("No SAST findings available for this target.")
+        } else {
+            let critical = sast_findings
+                .iter()
+                .filter(|f| f.severity == Severity::Critical)
+                .count();
+            let high = sast_findings
+                .iter()
+                .filter(|f| f.severity == Severity::High)
+                .count();
+
+            let mut section = format!(
+                "{} open findings ({} critical, {} high):\n",
+                sast_findings.len(),
+                critical,
+                high
+            );
+
+            // List the most important findings (critical/high first, up to 20)
+            for f in sast_findings.iter().take(20) {
+                let file_info = f
+                    .file_path
+                    .as_ref()
+                    .map(|p| {
+                        format!(
+                            " in {}:{}",
+                            p,
+                            f.line_number.unwrap_or(0)
+                        )
+                    })
+                    .unwrap_or_default();
+                let status_note = match f.status {
+                    FindingStatus::Triaged => " [TRIAGED]",
+                    _ => "",
+                };
+                section.push_str(&format!(
+                    "- [{sev}] {title}{file}{status}\n",
+                    sev = f.severity,
+                    title = f.title,
+                    file = file_info,
+                    status = status_note,
+                ));
+                if let Some(cwe) = &f.cwe {
+                    section.push_str(&format!("  CWE: {cwe}\n"));
+                }
+            }
+            if sast_findings.len() > 20 {
+                section.push_str(&format!(
+                    "... and {} more findings\n",
+                    sast_findings.len() - 20
+                ));
+            }
+            section
+        };
+
+        // Build SBOM/CVE section
+        let sbom_section = if sbom_entries.is_empty() {
+            String::from("No vulnerable dependencies identified.")
+        } else {
+            let mut section = format!(
+                "{} dependencies with known vulnerabilities:\n",
+                sbom_entries.len()
+            );
+            for entry in sbom_entries.iter().take(15) {
+                let cve_ids: Vec<&str> = entry
+                    .known_vulnerabilities
+                    .iter()
+                    .map(|v| v.id.as_str())
+                    .collect();
+                section.push_str(&format!(
+                    "- {} {} ({}): {}\n",
+                    entry.name,
+                    entry.version,
+                    entry.package_manager,
+                    cve_ids.join(", ")
+                ));
+            }
+            if sbom_entries.len() > 15 {
+                section.push_str(&format!(
+                    "... and {} more vulnerable dependencies\n",
+                    sbom_entries.len() - 15
+                ));
+            }
+            section
+        };
+
+        // Build code context section
+        let code_section = if code_context.is_empty() {
+            String::from("No code knowledge graph available for this target.")
+        } else {
+            let with_vulns = code_context
+                .iter()
+                .filter(|c| !c.known_vulnerabilities.is_empty())
+                .count();
+
+            let mut section = format!(
+                "{} entry points identified ({} with linked SAST findings):\n",
+                code_context.len(),
+                with_vulns
+            );
+
+            for hint in code_context.iter().take(20) {
+                section.push_str(&format!(
+                    "- {} ({})\n",
+                    hint.endpoint_pattern, hint.file_path
+                ));
+                for vuln in &hint.known_vulnerabilities {
+                    section.push_str(&format!("  SAST: {vuln}\n"));
+                }
+            }
+            section
+        };
+
         format!(
             r#"You are an expert penetration tester conducting an authorized security assessment.
 
@@ -361,33 +616,50 @@ impl PentestOrchestrator {
 - **Type**: {target_type}
 - **Rate Limit**: {rate_limit} req/s
 - **Destructive Tests Allowed**: {allow_destructive}
+- **Linked Repository**: {repo_linked}
 
 ## Strategy
 {strategy_guidance}
 
+## SAST Findings (Static Analysis)
+{sast_section}
+
+## Vulnerable Dependencies (SBOM)
+{sbom_section}
+
+## Code Entry Points (Knowledge Graph)
+{code_section}
+
 ## Available Tools
 {tool_names}
 
 ## Instructions
-1. Start by running reconnaissance and crawling to understand the target.
-2. Based on what you discover, select appropriate vulnerability scanning tools.
-3. For each tool invocation, provide the discovered endpoints and parameters.
-4. Analyze tool results and chain findings — if you find one vulnerability, explore whether it enables others.
-5. When testing is complete, provide a summary of all findings with severity and remediation recommendations.
-6. Always explain your reasoning before invoking each tool.
-7. Focus on actionable findings with evidence. Avoid false positives.
-8. When you have completed all relevant testing, say "Testing complete" followed by a final summary.
+1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies.
+2. Run the OpenAPI parser to discover API endpoints from specs.
+3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS.
+4. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
+5. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
+6. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
+7. Test rate limiting on critical endpoints (login, API).
+8. Check for console.log leakage in frontend JavaScript.
+9. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
+10. When testing is complete, provide a structured summary with severity and remediation.
+11. Always explain your reasoning before invoking each tool.
+12. When done, say "Testing complete" followed by a final summary.
 
 ## Important
 - This is an authorized penetration test. All testing is permitted within the target scope.
 - Respect the rate limit of {rate_limit} requests per second.
 - Only use destructive tests if explicitly allowed ({allow_destructive}).
+- Use SAST findings to guide your testing — they tell you WHERE in the code vulnerabilities exist.
+- Use SBOM data to understand what technologies and versions the target runs.
 "#,
             target_name = target.name,
             base_url = target.base_url,
             target_type = target.target_type,
             rate_limit = target.rate_limit,
             allow_destructive = target.allow_destructive,
+            repo_linked = target.repo_id.as_deref().unwrap_or("None"),
         )
     }
 }