refactor: modularize codebase and add 404 unit tests

Split large files into focused modules across all crates while maintaining API compatibility via re-exports. Add comprehensive unit tests covering core models, pipeline parsers, LLM triage, DAST security tools, graph algorithms, and MCP parameter validation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 16:59:05 +01:00
parent acc5b86aa4
commit 4e95fd7016
89 changed files with 11855 additions and 6032 deletions
--- a/compliance-agent/src/pipeline/sbom/mod.rs
+++ b/compliance-agent/src/pipeline/sbom/mod.rs
@@ -0,0 +1,216 @@
+mod cargo_audit;
+mod syft;
+
+use std::path::Path;
+
+use compliance_core::models::{SbomEntry, ScanType, VulnRef};
+use compliance_core::traits::{ScanOutput, Scanner};
+use compliance_core::CoreError;
+
+pub struct SbomScanner;
+
+impl Scanner for SbomScanner {
+    fn name(&self) -> &str {
+        "sbom"
+    }
+
+    fn scan_type(&self) -> ScanType {
+        ScanType::Sbom
+    }
+
+    #[tracing::instrument(skip_all)]
+    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
+        let mut entries = Vec::new();
+
+        // Generate missing lock files so Syft can resolve the full dependency tree
+        generate_lockfiles(repo_path).await;
+
+        // Run syft for SBOM generation
+        match syft::run_syft(repo_path, repo_id).await {
+            Ok(syft_entries) => entries.extend(syft_entries),
+            Err(e) => tracing::warn!("syft failed: {e}"),
+        }
+
+        // Enrich Cargo entries with license info from cargo metadata
+        enrich_cargo_licenses(repo_path, &mut entries).await;
+
+        // Run cargo-audit for Rust-specific vulns
+        match cargo_audit::run_cargo_audit(repo_path, repo_id).await {
+            Ok(vulns) => merge_audit_vulns(&mut entries, vulns),
+            Err(e) => tracing::warn!("cargo-audit skipped: {e}"),
+        }
+
+        Ok(ScanOutput {
+            findings: Vec::new(),
+            sbom_entries: entries,
+        })
+    }
+}
+
+/// Generate missing lock files so Syft can resolve the full dependency tree.
+/// This handles repos that gitignore their lock files (common for Rust libraries).
+#[tracing::instrument(skip_all)]
+async fn generate_lockfiles(repo_path: &Path) {
+    // Cargo: generate Cargo.lock if Cargo.toml exists without it
+    if repo_path.join("Cargo.toml").exists() && !repo_path.join("Cargo.lock").exists() {
+        tracing::info!("generating Cargo.lock for SBOM scan");
+        let result = tokio::process::Command::new("cargo")
+            .args(["generate-lockfile"])
+            .current_dir(repo_path)
+            .env("RUSTC_WRAPPER", "")
+            .output()
+            .await;
+        match result {
+            Ok(o) if o.status.success() => tracing::info!("Cargo.lock generated"),
+            Ok(o) => tracing::warn!(
+                "cargo generate-lockfile failed: {}",
+                String::from_utf8_lossy(&o.stderr)
+            ),
+            Err(e) => tracing::warn!("cargo generate-lockfile error: {e}"),
+        }
+    }
+
+    // pip: generate a requirements lock if only pyproject.toml / setup.py exists
+    let has_pip_manifest = repo_path.join("pyproject.toml").exists()
+        || repo_path.join("setup.py").exists()
+        || repo_path.join("setup.cfg").exists();
+    let has_pip_lock = repo_path.join("requirements.txt").exists()
+        || repo_path.join("requirements-lock.txt").exists()
+        || repo_path.join("poetry.lock").exists()
+        || repo_path.join("Pipfile.lock").exists();
+    if has_pip_manifest && !has_pip_lock {
+        // Try pip-compile (pip-tools) first, fall back to pip freeze approach
+        tracing::info!("attempting to generate pip requirements for SBOM scan");
+        if repo_path.join("pyproject.toml").exists() {
+            let result = tokio::process::Command::new("pip-compile")
+                .args([
+                    "--quiet",
+                    "--output-file",
+                    "requirements.txt",
+                    "pyproject.toml",
+                ])
+                .current_dir(repo_path)
+                .output()
+                .await;
+            match result {
+                Ok(o) if o.status.success() => {
+                    tracing::info!("requirements.txt generated via pip-compile")
+                }
+                _ => tracing::warn!(
+                    "pip-compile not available or failed, Syft will parse pyproject.toml directly"
+                ),
+            }
+        }
+    }
+
+    // npm: generate package-lock.json if package.json exists without it
+    let has_npm_lock = repo_path.join("package-lock.json").exists()
+        || repo_path.join("yarn.lock").exists()
+        || repo_path.join("pnpm-lock.yaml").exists();
+    if repo_path.join("package.json").exists() && !has_npm_lock {
+        tracing::info!("generating package-lock.json for SBOM scan");
+        let result = tokio::process::Command::new("npm")
+            .args(["install", "--package-lock-only", "--ignore-scripts"])
+            .current_dir(repo_path)
+            .output()
+            .await;
+        match result {
+            Ok(o) if o.status.success() => tracing::info!("package-lock.json generated"),
+            Ok(o) => tracing::warn!(
+                "npm install --package-lock-only failed: {}",
+                String::from_utf8_lossy(&o.stderr)
+            ),
+            Err(e) => tracing::warn!("npm lock generation error: {e}"),
+        }
+    }
+}
+
+/// Enrich Cargo SBOM entries with license info from `cargo metadata`.
+/// Syft doesn't read license data from Cargo.lock, so we fill it in.
+#[tracing::instrument(skip_all)]
+async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) {
+    if !repo_path.join("Cargo.toml").exists() {
+        return;
+    }
+
+    let has_cargo_entries = entries.iter().any(|e| e.package_manager == "cargo");
+    if !has_cargo_entries {
+        return;
+    }
+
+    let output = match tokio::process::Command::new("cargo")
+        .args(["metadata", "--format-version", "1"])
+        .current_dir(repo_path)
+        .env("RUSTC_WRAPPER", "")
+        .output()
+        .await
+    {
+        Ok(o) if o.status.success() => o,
+        Ok(o) => {
+            tracing::warn!(
+                "cargo metadata failed: {}",
+                String::from_utf8_lossy(&o.stderr)
+            );
+            return;
+        }
+        Err(e) => {
+            tracing::warn!("cargo metadata error: {e}");
+            return;
+        }
+    };
+
+    let meta: CargoMetadata = match serde_json::from_slice(&output.stdout) {
+        Ok(m) => m,
+        Err(e) => {
+            tracing::warn!("failed to parse cargo metadata: {e}");
+            return;
+        }
+    };
+
+    // Build a lookup: (name, version) -> license
+    let license_map: std::collections::HashMap<(&str, &str), &str> = meta
+        .packages
+        .iter()
+        .filter_map(|p| {
+            p.license
+                .as_deref()
+                .map(|l| (p.name.as_str(), p.version.as_str(), l))
+        })
+        .map(|(n, v, l)| ((n, v), l))
+        .collect();
+
+    for entry in entries.iter_mut() {
+        if entry.package_manager != "cargo" || entry.license.is_some() {
+            continue;
+        }
+        if let Some(license) = license_map.get(&(entry.name.as_str(), entry.version.as_str())) {
+            entry.license = Some(license.to_string());
+        }
+    }
+}
+
+fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<cargo_audit::AuditVuln>) {
+    for vuln in vulns {
+        if let Some(entry) = entries.iter_mut().find(|e| e.name == vuln.package) {
+            entry.known_vulnerabilities.push(VulnRef {
+                id: vuln.id.clone(),
+                source: "cargo-audit".to_string(),
+                severity: None,
+                url: Some(vuln.url),
+            });
+        }
+    }
+}
+
+// Cargo metadata types
+#[derive(serde::Deserialize)]
+struct CargoMetadata {
+    packages: Vec<CargoPackage>,
+}
+
+#[derive(serde::Deserialize)]
+struct CargoPackage {
+    name: String,
+    version: String,
+    license: Option<String>,
+}