compliance-scanner-agent/compliance-agent/src/pentest/context.rs

use futures_util::StreamExt;
use mongodb::bson::doc;

use compliance_core::models::dast::DastTarget;
use compliance_core::models::finding::Finding;
use compliance_core::models::pentest::CodeContextHint;
use compliance_core::models::sbom::SbomEntry;

use super::orchestrator::PentestOrchestrator;

impl PentestOrchestrator {
    /// Fetch SAST findings, SBOM entries (with CVEs), and code graph entry points
    /// for the repo linked to this DAST target.
    pub(crate) async fn gather_repo_context(
        &self,
        target: &DastTarget,
    ) -> (Vec<Finding>, Vec<SbomEntry>, Vec<CodeContextHint>) {
        let Some(repo_id) = &target.repo_id else {
            return (Vec::new(), Vec::new(), Vec::new());
        };

        let sast_findings = self.fetch_sast_findings(repo_id).await;
        let sbom_entries = self.fetch_vulnerable_sbom(repo_id).await;
        let code_context = self.fetch_code_context(repo_id, &sast_findings).await;

        tracing::info!(
            repo_id,
            sast_findings = sast_findings.len(),
            vulnerable_deps = sbom_entries.len(),
            code_hints = code_context.len(),
            "Gathered code-awareness context for pentest"
        );

        (sast_findings, sbom_entries, code_context)
    }

    /// Fetch open/triaged SAST findings for the repo (not false positives or resolved)
    async fn fetch_sast_findings(&self, repo_id: &str) -> Vec<Finding> {
        let cursor = self
            .db
            .findings()
            .find(doc! {
                "repo_id": repo_id,
                "status": { "$in": ["open", "triaged"] },
            })
            .sort(doc! { "severity": -1 })
            .limit(100)
            .await;

        match cursor {
            Ok(mut c) => {
                let mut results = Vec::new();
                while let Some(Ok(f)) = c.next().await {
                    results.push(f);
                }
                results
            }
            Err(e) => {
                tracing::warn!("Failed to fetch SAST findings for pentest: {e}");
                Vec::new()
            }
        }
    }

    /// Fetch SBOM entries that have known vulnerabilities
    async fn fetch_vulnerable_sbom(&self, repo_id: &str) -> Vec<SbomEntry> {
        let cursor = self
            .db
            .sbom_entries()
            .find(doc! {
                "repo_id": repo_id,
                "known_vulnerabilities": { "$exists": true, "$ne": [] },
            })
            .limit(50)
            .await;

        match cursor {
            Ok(mut c) => {
                let mut results = Vec::new();
                while let Some(Ok(e)) = c.next().await {
                    results.push(e);
                }
                results
            }
            Err(e) => {
                tracing::warn!("Failed to fetch vulnerable SBOM entries: {e}");
                Vec::new()
            }
        }
    }

    /// Build CodeContextHint objects from the code knowledge graph.
    /// Maps entry points to their source files and links SAST findings.
    async fn fetch_code_context(
        &self,
        repo_id: &str,
        sast_findings: &[Finding],
    ) -> Vec<CodeContextHint> {
        // Get entry point nodes from the code graph
        let cursor = self
            .db
            .graph_nodes()
            .find(doc! {
                "repo_id": repo_id,
                "is_entry_point": true,
            })
            .limit(50)
            .await;

        let nodes = match cursor {
            Ok(mut c) => {
                let mut results = Vec::new();
                while let Some(Ok(n)) = c.next().await {
                    results.push(n);
                }
                results
            }
            Err(_) => return Vec::new(),
        };

        // Build hints by matching graph nodes to SAST findings by file path
        nodes
            .into_iter()
            .map(|node| {
                // Find SAST findings in the same file
                let linked_vulns: Vec<String> = sast_findings
                    .iter()
                    .filter(|f| f.file_path.as_deref() == Some(&node.file_path))
                    .map(|f| {
                        format!(
                            "[{}] {}: {} (line {})",
                            f.severity,
                            f.scanner,
                            f.title,
                            f.line_number.unwrap_or(0)
                        )
                    })
                    .collect();

                CodeContextHint {
                    endpoint_pattern: node.qualified_name.clone(),
                    handler_function: node.name.clone(),
                    file_path: node.file_path.clone(),
                    code_snippet: String::new(), // Could fetch from embeddings
                    known_vulnerabilities: linked_vulns,
                }
            })
            .collect()
    }
}