feat: add code-awareness to pentest orchestrator

Connect SAST findings, SBOM/CVE data, and code knowledge graph entry points to the LLM pentest orchestrator so it can prioritize attacks based on known vulnerabilities and code structure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 19:43:14 +01:00
parent 71d8741e10
commit c0f9ba467c
1 changed files with 317 additions and 45 deletions
@@ -1,9 +1,13 @@
 use std::sync::Arc;

+use futures_util::StreamExt;
+use mongodb::bson::doc;
 use tokio::sync::broadcast;

 use compliance_core::models::dast::DastTarget;
+use compliance_core::models::finding::{Finding, FindingStatus, Severity};
 use compliance_core::models::pentest::*;
+use compliance_core::models::sbom::SbomEntry;
 use compliance_core::traits::pentest_tool::PentestToolContext;
 use compliance_dast::ToolRegistry;

@@ -50,8 +54,14 @@ impl PentestOrchestrator {
            .map(|oid| oid.to_hex())
            .unwrap_or_default();

-        // Build system prompt
-        let system_prompt = self.build_system_prompt(session, target);
+        // Gather code-awareness context from linked repo
+        let (sast_findings, sbom_entries, code_context) =
+            self.gather_repo_context(target).await;
+
+        // Build system prompt with code context
+        let system_prompt = self
+            .build_system_prompt(session, target, &sast_findings, &sbom_entries, &code_context)
+            .await;

        // Build tool definitions for LLM
        let tool_defs: Vec<ToolDefinition> = self
@@ -85,13 +95,13 @@ impl PentestOrchestrator {
        let user_msg = PentestMessage::user(session_id.clone(), initial_message.to_string());
        let _ = self.db.pentest_messages().insert_one(&user_msg).await;

-        // Build tool context
+        // Build tool context with real data
        let tool_context = PentestToolContext {
            target: target.clone(),
            session_id: session_id.clone(),
-            sast_findings: Vec::new(),
-            sbom_entries: Vec::new(),
-            code_context: Vec::new(),
+            sast_findings,
+            sbom_entries,
+            code_context,
            rate_limit: target.rate_limit,
            allow_destructive: target.allow_destructive,
        };
@@ -102,7 +112,6 @@ impl PentestOrchestrator {
        let mut total_successes = 0u32;

        for _iteration in 0..max_iterations {
-            // Call LLM with tools
            let response = self
                .llm
                .chat_with_tools(messages.clone(), &tool_defs, Some(0.2), Some(8192))
@@ -110,17 +119,13 @@ impl PentestOrchestrator {

            match response {
                LlmResponse::Content(content) => {
-                    // Store assistant message
                    let msg =
                        PentestMessage::assistant(session_id.clone(), content.clone());
                    let _ = self.db.pentest_messages().insert_one(&msg).await;
-
-                    // Emit message event
                    let _ = self.event_tx.send(PentestEvent::Message {
                        content: content.clone(),
                    });

-                    // Add to messages
                    messages.push(ChatMessage {
                        role: "assistant".to_string(),
                        content: Some(content.clone()),
@@ -128,7 +133,6 @@ impl PentestOrchestrator {
                        tool_call_id: None,
                    });

-                    // Check if the LLM considers itself done
                    let done_indicators = [
                        "pentest complete",
                        "testing complete",
@@ -144,12 +148,9 @@ impl PentestOrchestrator {
                    {
                        break;
                    }
-
-                    // If not done, break and wait for user input
                    break;
                }
                LlmResponse::ToolCalls(tool_calls) => {
-                    // Build the assistant message with tool_calls
                    let tc_requests: Vec<ToolCallRequest> = tool_calls
                        .iter()
                        .map(|tc| ToolCallRequest {
@@ -170,12 +171,10 @@ impl PentestOrchestrator {
                        tool_call_id: None,
                    });

-                    // Execute each tool call
                    for tc in &tool_calls {
                        total_tool_calls += 1;
                        let node_id = uuid::Uuid::new_v4().to_string();

-                        // Create attack chain node
                        let mut node = AttackChainNode::new(
                            session_id.clone(),
                            node_id.clone(),
@@ -187,14 +186,12 @@ impl PentestOrchestrator {
                        node.started_at = Some(chrono::Utc::now());
                        let _ = self.db.attack_chain_nodes().insert_one(&node).await;

-                        // Emit tool start event
                        let _ = self.event_tx.send(PentestEvent::ToolStart {
                            node_id: node_id.clone(),
                            tool_name: tc.name.clone(),
                            input: tc.arguments.clone(),
                        });

-                        // Execute the tool
                        let result = if let Some(tool) = self.tool_registry.get(&tc.name) {
                            match tool.execute(tc.arguments.clone(), &tool_context).await {
                                Ok(result) => {
@@ -202,13 +199,11 @@ impl PentestOrchestrator {
                                    let findings_count = result.findings.len() as u32;
                                    total_findings += findings_count;

-                                    // Store findings
                                    for mut finding in result.findings {
                                        finding.scan_run_id = session_id.clone();
                                        finding.session_id = Some(session_id.clone());
                                        let _ =
                                            self.db.dast_findings().insert_one(&finding).await;
-
                                        let _ =
                                            self.event_tx.send(PentestEvent::Finding {
                                                finding_id: finding
@@ -220,23 +215,21 @@ impl PentestOrchestrator {
                                            });
                                    }

-                                    // Emit tool complete event
                                    let _ = self.event_tx.send(PentestEvent::ToolComplete {
                                        node_id: node_id.clone(),
                                        summary: result.summary.clone(),
                                        findings_count,
                                    });

-                                    // Update attack chain node
                                    let _ = self
                                        .db
                                        .attack_chain_nodes()
                                        .update_one(
-                                            mongodb::bson::doc! {
+                                            doc! {
                                                "session_id": &session_id,
                                                "node_id": &node_id,
                                            },
-                                            mongodb::bson::doc! { "$set": {
+                                            doc! { "$set": {
                                                "status": "completed",
                                                "tool_output": mongodb::bson::to_bson(&result.data)
                                                    .unwrap_or(mongodb::bson::Bson::Null),
@@ -253,22 +246,20 @@ impl PentestOrchestrator {
                                    .to_string()
                                }
                                Err(e) => {
-                                    // Update node as failed
                                    let _ = self
                                        .db
                                        .attack_chain_nodes()
                                        .update_one(
-                                            mongodb::bson::doc! {
+                                            doc! {
                                                "session_id": &session_id,
                                                "node_id": &node_id,
                                            },
-                                            mongodb::bson::doc! { "$set": {
+                                            doc! { "$set": {
                                                "status": "failed",
                                                "completed_at": mongodb::bson::DateTime::now(),
                                            }},
                                        )
                                        .await;
-
                                    format!("Tool execution failed: {e}")
                                }
                            }
@@ -276,7 +267,6 @@ impl PentestOrchestrator {
                            format!("Unknown tool: {}", tc.name)
                        };

-                        // Add tool result to messages
                        messages.push(ChatMessage {
                            role: "tool".to_string(),
                            content: Some(result),
@@ -285,14 +275,13 @@ impl PentestOrchestrator {
                        });
                    }

-                    // Update session stats
                    if let Some(sid) = session.id {
                        let _ = self
                            .db
                            .pentest_sessions()
                            .update_one(
-                                mongodb::bson::doc! { "_id": sid },
-                                mongodb::bson::doc! { "$set": {
+                                doc! { "_id": sid },
+                                doc! { "$set": {
                                    "tool_invocations": total_tool_calls as i64,
                                    "tool_successes": total_successes as i64,
                                    "findings_count": total_findings as i64,
@@ -304,14 +293,13 @@ impl PentestOrchestrator {
            }
        }

-        // Mark session as completed
        if let Some(sid) = session.id {
            let _ = self
                .db
                .pentest_sessions()
                .update_one(
-                    mongodb::bson::doc! { "_id": sid },
-                    mongodb::bson::doc! { "$set": {
+                    doc! { "_id": sid },
+                    doc! { "$set": {
                        "status": "completed",
                        "completed_at": mongodb::bson::DateTime::now(),
                        "tool_invocations": total_tool_calls as i64,
@@ -332,7 +320,159 @@ impl PentestOrchestrator {
        Ok(())
    }

-    fn build_system_prompt(&self, session: &PentestSession, target: &DastTarget) -> String {
+    // ── Code-Awareness: Gather context from linked repo ─────────
+
+    /// Fetch SAST findings, SBOM entries (with CVEs), and code graph entry points
+    /// for the repo linked to this DAST target.
+    async fn gather_repo_context(
+        &self,
+        target: &DastTarget,
+    ) -> (Vec<Finding>, Vec<SbomEntry>, Vec<CodeContextHint>) {
+        let Some(repo_id) = &target.repo_id else {
+            return (Vec::new(), Vec::new(), Vec::new());
+        };
+
+        let sast_findings = self.fetch_sast_findings(repo_id).await;
+        let sbom_entries = self.fetch_vulnerable_sbom(repo_id).await;
+        let code_context = self.fetch_code_context(repo_id, &sast_findings).await;
+
+        tracing::info!(
+            repo_id,
+            sast_findings = sast_findings.len(),
+            vulnerable_deps = sbom_entries.len(),
+            code_hints = code_context.len(),
+            "Gathered code-awareness context for pentest"
+        );
+
+        (sast_findings, sbom_entries, code_context)
+    }
+
+    /// Fetch open/triaged SAST findings for the repo (not false positives or resolved)
+    async fn fetch_sast_findings(&self, repo_id: &str) -> Vec<Finding> {
+        let cursor = self
+            .db
+            .findings()
+            .find(doc! {
+                "repo_id": repo_id,
+                "status": { "$in": ["open", "triaged"] },
+            })
+            .sort(doc! { "severity": -1 })
+            .limit(100)
+            .await;
+
+        match cursor {
+            Ok(mut c) => {
+                let mut results = Vec::new();
+                while let Some(Ok(f)) = c.next().await {
+                    results.push(f);
+                }
+                results
+            }
+            Err(e) => {
+                tracing::warn!("Failed to fetch SAST findings for pentest: {e}");
+                Vec::new()
+            }
+        }
+    }
+
+    /// Fetch SBOM entries that have known vulnerabilities
+    async fn fetch_vulnerable_sbom(&self, repo_id: &str) -> Vec<SbomEntry> {
+        let cursor = self
+            .db
+            .sbom_entries()
+            .find(doc! {
+                "repo_id": repo_id,
+                "known_vulnerabilities": { "$exists": true, "$ne": [] },
+            })
+            .limit(50)
+            .await;
+
+        match cursor {
+            Ok(mut c) => {
+                let mut results = Vec::new();
+                while let Some(Ok(e)) = c.next().await {
+                    results.push(e);
+                }
+                results
+            }
+            Err(e) => {
+                tracing::warn!("Failed to fetch vulnerable SBOM entries: {e}");
+                Vec::new()
+            }
+        }
+    }
+
+    /// Build CodeContextHint objects from the code knowledge graph.
+    /// Maps entry points to their source files and links SAST findings.
+    async fn fetch_code_context(
+        &self,
+        repo_id: &str,
+        sast_findings: &[Finding],
+    ) -> Vec<CodeContextHint> {
+        // Get entry point nodes from the code graph
+        let cursor = self
+            .db
+            .graph_nodes()
+            .find(doc! {
+                "repo_id": repo_id,
+                "is_entry_point": true,
+            })
+            .limit(50)
+            .await;
+
+        let nodes = match cursor {
+            Ok(mut c) => {
+                let mut results = Vec::new();
+                while let Some(Ok(n)) = c.next().await {
+                    results.push(n);
+                }
+                results
+            }
+            Err(_) => return Vec::new(),
+        };
+
+        // Build hints by matching graph nodes to SAST findings by file path
+        nodes
+            .into_iter()
+            .map(|node| {
+                // Find SAST findings in the same file
+                let linked_vulns: Vec<String> = sast_findings
+                    .iter()
+                    .filter(|f| {
+                        f.file_path.as_deref() == Some(&node.file_path)
+                    })
+                    .map(|f| {
+                        format!(
+                            "[{}] {}: {} (line {})",
+                            f.severity,
+                            f.scanner,
+                            f.title,
+                            f.line_number.unwrap_or(0)
+                        )
+                    })
+                    .collect();
+
+                CodeContextHint {
+                    endpoint_pattern: node.qualified_name.clone(),
+                    handler_function: node.name.clone(),
+                    file_path: node.file_path.clone(),
+                    code_snippet: String::new(), // Could fetch from embeddings
+                    known_vulnerabilities: linked_vulns,
+                }
+            })
+            .collect()
+    }
+
+    // ── System Prompt Builder ───────────────────────────────────
+
+    async fn build_system_prompt(
+        &self,
+        session: &PentestSession,
+        target: &DastTarget,
+        sast_findings: &[Finding],
+        sbom_entries: &[SbomEntry],
+        code_context: &[CodeContextHint],
+    ) -> String {
        let tool_names = self.tool_registry.list_names().join(", ");
        let strategy_guidance = match session.strategy {
            PentestStrategy::Quick => {
@@ -352,6 +492,121 @@ impl PentestOrchestrator {
            }
        };

+        // Build SAST findings section
+        let sast_section = if sast_findings.is_empty() {
+            String::from("No SAST findings available for this target.")
+        } else {
+            let critical = sast_findings
+                .iter()
+                .filter(|f| f.severity == Severity::Critical)
+                .count();
+            let high = sast_findings
+                .iter()
+                .filter(|f| f.severity == Severity::High)
+                .count();
+
+            let mut section = format!(
+                "{} open findings ({} critical, {} high):\n",
+                sast_findings.len(),
+                critical,
+                high
+            );
+
+            // List the most important findings (critical/high first, up to 20)
+            for f in sast_findings.iter().take(20) {
+                let file_info = f
+                    .file_path
+                    .as_ref()
+                    .map(|p| {
+                        format!(
+                            " in {}:{}",
+                            p,
+                            f.line_number.unwrap_or(0)
+                        )
+                    })
+                    .unwrap_or_default();
+                let status_note = match f.status {
+                    FindingStatus::Triaged => " [TRIAGED]",
+                    _ => "",
+                };
+                section.push_str(&format!(
+                    "- [{sev}] {title}{file}{status}\n",
+                    sev = f.severity,
+                    title = f.title,
+                    file = file_info,
+                    status = status_note,
+                ));
+                if let Some(cwe) = &f.cwe {
+                    section.push_str(&format!("  CWE: {cwe}\n"));
+                }
+            }
+            if sast_findings.len() > 20 {
+                section.push_str(&format!(
+                    "... and {} more findings\n",
+                    sast_findings.len() - 20
+                ));
+            }
+            section
+        };
+
+        // Build SBOM/CVE section
+        let sbom_section = if sbom_entries.is_empty() {
+            String::from("No vulnerable dependencies identified.")
+        } else {
+            let mut section = format!(
+                "{} dependencies with known vulnerabilities:\n",
+                sbom_entries.len()
+            );
+            for entry in sbom_entries.iter().take(15) {
+                let cve_ids: Vec<&str> = entry
+                    .known_vulnerabilities
+                    .iter()
+                    .map(|v| v.id.as_str())
+                    .collect();
+                section.push_str(&format!(
+                    "- {} {} ({}): {}\n",
+                    entry.name,
+                    entry.version,
+                    entry.package_manager,
+                    cve_ids.join(", ")
+                ));
+            }
+            if sbom_entries.len() > 15 {
+                section.push_str(&format!(
+                    "... and {} more vulnerable dependencies\n",
+                    sbom_entries.len() - 15
+                ));
+            }
+            section
+        };
+
+        // Build code context section
+        let code_section = if code_context.is_empty() {
+            String::from("No code knowledge graph available for this target.")
+        } else {
+            let with_vulns = code_context
+                .iter()
+                .filter(|c| !c.known_vulnerabilities.is_empty())
+                .count();
+
+            let mut section = format!(
+                "{} entry points identified ({} with linked SAST findings):\n",
+                code_context.len(),
+                with_vulns
+            );
+
+            for hint in code_context.iter().take(20) {
+                section.push_str(&format!(
+                    "- {} ({})\n",
+                    hint.endpoint_pattern, hint.file_path
+                ));
+                for vuln in &hint.known_vulnerabilities {
+                    section.push_str(&format!("  SAST: {vuln}\n"));
+                }
+            }
+            section
+        };
+
        format!(
            r#"You are an expert penetration tester conducting an authorized security assessment.

@@ -361,33 +616,50 @@ impl PentestOrchestrator {
 - **Type**: {target_type}
 - **Rate Limit**: {rate_limit} req/s
 - **Destructive Tests Allowed**: {allow_destructive}
+- **Linked Repository**: {repo_linked}

 ## Strategy
 {strategy_guidance}

+## SAST Findings (Static Analysis)
+{sast_section}
+
+## Vulnerable Dependencies (SBOM)
+{sbom_section}
+
+## Code Entry Points (Knowledge Graph)
+{code_section}
+
 ## Available Tools
 {tool_names}

 ## Instructions
-1. Start by running reconnaissance and crawling to understand the target.
-2. Based on what you discover, select appropriate vulnerability scanning tools.
-3. For each tool invocation, provide the discovered endpoints and parameters.
-4. Analyze tool results and chain findings — if you find one vulnerability, explore whether it enables others.
-5. When testing is complete, provide a summary of all findings with severity and remediation recommendations.
-6. Always explain your reasoning before invoking each tool.
-7. Focus on actionable findings with evidence. Avoid false positives.
-8. When you have completed all relevant testing, say "Testing complete" followed by a final summary.
+1. Start by running reconnaissance (recon tool) to fingerprint the target and discover technologies.
+2. Run the OpenAPI parser to discover API endpoints from specs.
+3. Check infrastructure: DNS, DMARC, TLS, security headers, cookies, CSP, CORS.
+4. Based on SAST findings, prioritize testing endpoints where vulnerabilities were found in code.
+5. For each vulnerability type found in SAST, use the corresponding DAST tool to verify exploitability.
+6. If vulnerable dependencies are listed, try to trigger known CVE conditions against the running application.
+7. Test rate limiting on critical endpoints (login, API).
+8. Check for console.log leakage in frontend JavaScript.
+9. Analyze tool results and chain findings — if one vulnerability enables others, explore the chain.
+10. When testing is complete, provide a structured summary with severity and remediation.
+11. Always explain your reasoning before invoking each tool.
+12. When done, say "Testing complete" followed by a final summary.

 ## Important
 - This is an authorized penetration test. All testing is permitted within the target scope.
 - Respect the rate limit of {rate_limit} requests per second.
 - Only use destructive tests if explicitly allowed ({allow_destructive}).
+- Use SAST findings to guide your testing — they tell you WHERE in the code vulnerabilities exist.
+- Use SBOM data to understand what technologies and versions the target runs.
 "#,
            target_name = target.name,
            base_url = target.base_url,
            target_type = target.target_type,
            rate_limit = target.rate_limit,
            allow_destructive = target.allow_destructive,
+            repo_linked = target.repo_id.as_deref().unwrap_or("None"),
        )
    }
 }