From daff5812a6980611a3bd4d36c22f469fb1cc3b5a Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar Date: Tue, 10 Mar 2026 12:37:29 +0000 Subject: [PATCH] fix: SBOM multi-ecosystem support with correct package managers and licenses (#8) --- Dockerfile.agent | 13 +- compliance-agent/src/api/handlers/mod.rs | 31 +++ compliance-agent/src/api/routes.rs | 1 + compliance-agent/src/pipeline/orchestrator.rs | 10 +- compliance-agent/src/pipeline/sbom.rs | 228 +++++++++++++++++- .../src/infrastructure/sbom.rs | 24 ++ compliance-dashboard/src/pages/sbom.rs | 61 +++-- 7 files changed, 340 insertions(+), 28 deletions(-) diff --git a/Dockerfile.agent b/Dockerfile.agent index 054c35b..b4cee75 100644 --- a/Dockerfile.agent +++ b/Dockerfile.agent @@ -5,7 +5,18 @@ COPY . . RUN cargo build --release -p compliance-agent FROM debian:bookworm-slim -RUN apt-get update && apt-get install -y ca-certificates libssl3 git curl python3 python3-pip && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y ca-certificates libssl3 git curl python3 python3-pip npm golang-go php-cli && rm -rf /var/lib/apt/lists/* + +# Install Cargo (minimal, for cargo metadata / cargo audit / generate-lockfile) +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal +ENV PATH="/root/.cargo/bin:${PATH}" +RUN cargo install cargo-audit + +# Install Composer for PHP dependency resolution +RUN curl -sS https://getcomposer.org/installer | php -- --install-dir=/usr/local/bin --filename=composer + +# Install Bundler for Ruby dependency resolution +RUN apt-get update && apt-get install -y ruby && rm -rf /var/lib/apt/lists/* && gem install bundler # Install syft for SBOM generation RUN curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b /usr/local/bin diff --git a/compliance-agent/src/api/handlers/mod.rs b/compliance-agent/src/api/handlers/mod.rs index a9a8801..497a737 100644 --- a/compliance-agent/src/api/handlers/mod.rs +++ b/compliance-agent/src/api/handlers/mod.rs @@ -554,6 +554,37 @@ pub async fn update_finding_feedback( Ok(Json(serde_json::json!({ "status": "updated" }))) } +pub async fn sbom_filters( + Extension(agent): AgentExt, +) -> Result, StatusCode> { + let db = &agent.db; + + let managers: Vec = db + .sbom_entries() + .distinct("package_manager", doc! {}) + .await + .unwrap_or_default() + .into_iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .filter(|s| !s.is_empty() && s != "unknown" && s != "file") + .collect(); + + let licenses: Vec = db + .sbom_entries() + .distinct("license", doc! {}) + .await + .unwrap_or_default() + .into_iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .filter(|s| !s.is_empty()) + .collect(); + + Ok(Json(serde_json::json!({ + "package_managers": managers, + "licenses": licenses, + }))) +} + pub async fn list_sbom( Extension(agent): AgentExt, Query(filter): Query, diff --git a/compliance-agent/src/api/routes.rs b/compliance-agent/src/api/routes.rs index f355040..bf6877a 100644 --- a/compliance-agent/src/api/routes.rs +++ b/compliance-agent/src/api/routes.rs @@ -36,6 +36,7 @@ pub fn build_router() -> Router { patch(handlers::update_finding_feedback), ) .route("/api/v1/sbom", get(handlers::list_sbom)) + .route("/api/v1/sbom/filters", get(handlers::sbom_filters)) .route("/api/v1/sbom/export", get(handlers::export_sbom)) .route("/api/v1/sbom/licenses", get(handlers::license_summary)) .route("/api/v1/sbom/diff", get(handlers::sbom_diff)) diff --git a/compliance-agent/src/pipeline/orchestrator.rs b/compliance-agent/src/pipeline/orchestrator.rs index 6452c09..b377c32 100644 --- a/compliance-agent/src/pipeline/orchestrator.rs +++ b/compliance-agent/src/pipeline/orchestrator.rs @@ -263,7 +263,15 @@ impl PipelineOrchestrator { } } - // Persist SBOM entries (upsert by repo_id + name + version) + // Remove stale SBOM entries for this repo before reinserting + if !sbom_entries.is_empty() { + self.db + .sbom_entries() + .delete_many(doc! { "repo_id": &repo.id }) + .await?; + } + + // Persist SBOM entries for entry in &sbom_entries { let filter = doc! { "repo_id": &entry.repo_id, diff --git a/compliance-agent/src/pipeline/sbom.rs b/compliance-agent/src/pipeline/sbom.rs index ab67663..e404b3c 100644 --- a/compliance-agent/src/pipeline/sbom.rs +++ b/compliance-agent/src/pipeline/sbom.rs @@ -18,12 +18,18 @@ impl Scanner for SbomScanner { async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result { let mut entries = Vec::new(); + // Generate missing lock files so Syft can resolve the full dependency tree + generate_lockfiles(repo_path).await; + // Run syft for SBOM generation match run_syft(repo_path, repo_id).await { Ok(syft_entries) => entries.extend(syft_entries), Err(e) => tracing::warn!("syft failed: {e}"), } + // Enrich Cargo entries with license info from cargo metadata + enrich_cargo_licenses(repo_path, &mut entries).await; + // Run cargo-audit for Rust-specific vulns match run_cargo_audit(repo_path, repo_id).await { Ok(vulns) => merge_audit_vulns(&mut entries, vulns), @@ -37,10 +43,153 @@ impl Scanner for SbomScanner { } } +/// Generate missing lock files so Syft can resolve the full dependency tree. +/// This handles repos that gitignore their lock files (common for Rust libraries). +async fn generate_lockfiles(repo_path: &Path) { + // Cargo: generate Cargo.lock if Cargo.toml exists without it + if repo_path.join("Cargo.toml").exists() && !repo_path.join("Cargo.lock").exists() { + tracing::info!("generating Cargo.lock for SBOM scan"); + let result = tokio::process::Command::new("cargo") + .args(["generate-lockfile"]) + .current_dir(repo_path) + .output() + .await; + match result { + Ok(o) if o.status.success() => tracing::info!("Cargo.lock generated"), + Ok(o) => tracing::warn!( + "cargo generate-lockfile failed: {}", + String::from_utf8_lossy(&o.stderr) + ), + Err(e) => tracing::warn!("cargo generate-lockfile error: {e}"), + } + } + + // pip: generate a requirements lock if only pyproject.toml / setup.py exists + let has_pip_manifest = repo_path.join("pyproject.toml").exists() + || repo_path.join("setup.py").exists() + || repo_path.join("setup.cfg").exists(); + let has_pip_lock = repo_path.join("requirements.txt").exists() + || repo_path.join("requirements-lock.txt").exists() + || repo_path.join("poetry.lock").exists() + || repo_path.join("Pipfile.lock").exists(); + if has_pip_manifest && !has_pip_lock { + // Try pip-compile (pip-tools) first, fall back to pip freeze approach + tracing::info!("attempting to generate pip requirements for SBOM scan"); + if repo_path.join("pyproject.toml").exists() { + let result = tokio::process::Command::new("pip-compile") + .args([ + "--quiet", + "--output-file", + "requirements.txt", + "pyproject.toml", + ]) + .current_dir(repo_path) + .output() + .await; + match result { + Ok(o) if o.status.success() => { + tracing::info!("requirements.txt generated via pip-compile") + } + _ => tracing::warn!( + "pip-compile not available or failed, Syft will parse pyproject.toml directly" + ), + } + } + } + + // npm: generate package-lock.json if package.json exists without it + let has_npm_lock = repo_path.join("package-lock.json").exists() + || repo_path.join("yarn.lock").exists() + || repo_path.join("pnpm-lock.yaml").exists(); + if repo_path.join("package.json").exists() && !has_npm_lock { + tracing::info!("generating package-lock.json for SBOM scan"); + let result = tokio::process::Command::new("npm") + .args(["install", "--package-lock-only", "--ignore-scripts"]) + .current_dir(repo_path) + .output() + .await; + match result { + Ok(o) if o.status.success() => tracing::info!("package-lock.json generated"), + Ok(o) => tracing::warn!( + "npm install --package-lock-only failed: {}", + String::from_utf8_lossy(&o.stderr) + ), + Err(e) => tracing::warn!("npm lock generation error: {e}"), + } + } +} + +/// Enrich Cargo SBOM entries with license info from `cargo metadata`. +/// Syft doesn't read license data from Cargo.lock, so we fill it in. +async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) { + if !repo_path.join("Cargo.toml").exists() { + return; + } + + let has_cargo_entries = entries.iter().any(|e| e.package_manager == "cargo"); + if !has_cargo_entries { + return; + } + + let output = match tokio::process::Command::new("cargo") + .args(["metadata", "--format-version", "1"]) + .current_dir(repo_path) + .output() + .await + { + Ok(o) if o.status.success() => o, + Ok(o) => { + tracing::warn!( + "cargo metadata failed: {}", + String::from_utf8_lossy(&o.stderr) + ); + return; + } + Err(e) => { + tracing::warn!("cargo metadata error: {e}"); + return; + } + }; + + let meta: CargoMetadata = match serde_json::from_slice(&output.stdout) { + Ok(m) => m, + Err(e) => { + tracing::warn!("failed to parse cargo metadata: {e}"); + return; + } + }; + + // Build a lookup: (name, version) -> license + let license_map: std::collections::HashMap<(&str, &str), &str> = meta + .packages + .iter() + .filter_map(|p| { + p.license + .as_deref() + .map(|l| (p.name.as_str(), p.version.as_str(), l)) + }) + .map(|(n, v, l)| ((n, v), l)) + .collect(); + + for entry in entries.iter_mut() { + if entry.package_manager != "cargo" || entry.license.is_some() { + continue; + } + if let Some(license) = license_map.get(&(entry.name.as_str(), entry.version.as_str())) { + entry.license = Some(license.to_string()); + } + } +} + async fn run_syft(repo_path: &Path, repo_id: &str) -> Result, CoreError> { let output = tokio::process::Command::new("syft") .arg(repo_path) .args(["-o", "cyclonedx-json"]) + // Enable remote license lookups for all ecosystems + .env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true") + .env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true") + .env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true") + .env("SYFT_JAVA_USE_NETWORK", "true") .output() .await .map_err(|e| CoreError::Scanner { @@ -62,22 +211,19 @@ async fn run_syft(repo_path: &Path, repo_id: &str) -> Result, Cor .unwrap_or_default() .into_iter() .map(|c| { + let package_manager = c + .purl + .as_deref() + .and_then(extract_ecosystem_from_purl) + .unwrap_or_else(|| "unknown".to_string()); let mut entry = SbomEntry::new( repo_id.to_string(), c.name, c.version.unwrap_or_else(|| "unknown".to_string()), - c.component_type.unwrap_or_else(|| "library".to_string()), + package_manager, ); entry.purl = c.purl; - entry.license = c.licenses.and_then(|ls| { - ls.first().and_then(|l| { - l.license.as_ref().map(|lic| { - lic.id - .clone() - .unwrap_or_else(|| lic.name.clone().unwrap_or_default()) - }) - }) - }); + entry.license = c.licenses.and_then(|ls| extract_license(&ls)); entry }) .collect(); @@ -144,6 +290,7 @@ struct CdxComponent { name: String, version: Option, #[serde(rename = "type")] + #[allow(dead_code)] component_type: Option, purl: Option, licenses: Option>, @@ -152,6 +299,8 @@ struct CdxComponent { #[derive(serde::Deserialize)] struct CdxLicenseWrapper { license: Option, + /// SPDX license expression (e.g. "MIT OR Apache-2.0") + expression: Option, } #[derive(serde::Deserialize)] @@ -188,3 +337,62 @@ struct AuditVuln { id: String, url: String, } + +// Cargo metadata types +#[derive(serde::Deserialize)] +struct CargoMetadata { + packages: Vec, +} + +#[derive(serde::Deserialize)] +struct CargoPackage { + name: String, + version: String, + license: Option, +} + +/// Extract the best license string from CycloneDX license entries. +/// Handles three formats: expression ("MIT OR Apache-2.0"), license.id ("MIT"), license.name ("MIT License"). +fn extract_license(entries: &[CdxLicenseWrapper]) -> Option { + // First pass: look for SPDX expressions (most precise for dual-licensed packages) + for entry in entries { + if let Some(ref expr) = entry.expression { + if !expr.is_empty() { + return Some(expr.clone()); + } + } + } + // Second pass: collect license.id or license.name from all entries + let parts: Vec = entries + .iter() + .filter_map(|e| { + e.license.as_ref().and_then(|lic| { + lic.id + .clone() + .or_else(|| lic.name.clone()) + .filter(|s| !s.is_empty()) + }) + }) + .collect(); + if parts.is_empty() { + return None; + } + Some(parts.join(" OR ")) +} + +/// Extract the ecosystem/package-manager from a PURL string. +/// e.g. "pkg:npm/lodash@4.17.21" → "npm", "pkg:cargo/serde@1.0" → "cargo" +fn extract_ecosystem_from_purl(purl: &str) -> Option { + let rest = purl.strip_prefix("pkg:")?; + let ecosystem = rest.split('/').next()?; + if ecosystem.is_empty() { + return None; + } + // Normalise common PURL types to user-friendly names + let normalised = match ecosystem { + "golang" => "go", + "pypi" => "pip", + _ => ecosystem, + }; + Some(normalised.to_string()) +} diff --git a/compliance-dashboard/src/infrastructure/sbom.rs b/compliance-dashboard/src/infrastructure/sbom.rs index c5fb0a2..f0dc2b6 100644 --- a/compliance-dashboard/src/infrastructure/sbom.rs +++ b/compliance-dashboard/src/infrastructure/sbom.rs @@ -77,8 +77,32 @@ pub struct SbomDiffResponse { pub data: SbomDiffResultData, } +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct SbomFiltersResponse { + pub package_managers: Vec, + pub licenses: Vec, +} + // ── Server functions ── +#[server] +pub async fn fetch_sbom_filters() -> Result { + let state: super::server_state::ServerState = + dioxus_fullstack::FullstackContext::extract().await?; + + let url = format!("{}/api/v1/sbom/filters", state.agent_api_url); + let resp = reqwest::get(&url) + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let text = resp + .text() + .await + .map_err(|e| ServerFnError::new(e.to_string()))?; + let body: SbomFiltersResponse = serde_json::from_str(&text) + .map_err(|e| ServerFnError::new(format!("Parse error: {e} — body: {text}")))?; + Ok(body) +} + #[server] pub async fn fetch_sbom_filtered( repo_id: Option, diff --git a/compliance-dashboard/src/pages/sbom.rs b/compliance-dashboard/src/pages/sbom.rs index 1779334..bae5756 100644 --- a/compliance-dashboard/src/pages/sbom.rs +++ b/compliance-dashboard/src/pages/sbom.rs @@ -36,6 +36,9 @@ pub fn SbomPage() -> Element { .ok() }); + // ── Dynamic filter options (package managers + licenses from DB) ── + let sbom_filters = use_resource(|| async { fetch_sbom_filters().await.ok() }); + // ── SBOM list (filtered) ── let sbom = use_resource(move || { let p = page(); @@ -132,14 +135,20 @@ pub fn SbomPage() -> Element { class: "sbom-filter-select", onchange: move |e| { pm_filter.set(e.value()); page.set(1); }, option { value: "", "All Managers" } - option { value: "npm", "npm" } - option { value: "cargo", "Cargo" } - option { value: "pip", "pip" } - option { value: "go", "Go" } - option { value: "maven", "Maven" } - option { value: "nuget", "NuGet" } - option { value: "composer", "Composer" } - option { value: "gem", "RubyGems" } + { + match &*sbom_filters.read() { + Some(Some(f)) => rsx! { + for pm in &f.package_managers { + { + let val = pm.clone(); + let label = pm_display_name(&val); + rsx! { option { value: "{val}", "{label}" } } + } + } + }, + _ => rsx! {}, + } + } } input { class: "sbom-filter-input", @@ -166,14 +175,19 @@ pub fn SbomPage() -> Element { class: "sbom-filter-select", onchange: move |e| { license_filter.set(e.value()); page.set(1); }, option { value: "", "All Licenses" } - option { value: "MIT", "MIT" } - option { value: "Apache-2.0", "Apache 2.0" } - option { value: "BSD-3-Clause", "BSD 3-Clause" } - option { value: "ISC", "ISC" } - option { value: "GPL-3.0", "GPL 3.0" } - option { value: "GPL-2.0", "GPL 2.0" } - option { value: "LGPL-2.1", "LGPL 2.1" } - option { value: "MPL-2.0", "MPL 2.0" } + { + match &*sbom_filters.read() { + Some(Some(f)) => rsx! { + for lic in &f.licenses { + { + let val = lic.clone(); + rsx! { option { value: "{val}", "{val}" } } + } + } + }, + _ => rsx! {}, + } + } } // ── Export button ── @@ -633,6 +647,21 @@ pub fn SbomPage() -> Element { } } +fn pm_display_name(pm: &str) -> &str { + match pm { + "npm" => "npm", + "cargo" => "Cargo", + "pip" => "pip", + "go" | "golang" => "Go", + "maven" => "Maven", + "nuget" => "NuGet", + "composer" => "Composer", + "gem" => "RubyGems", + "github" => "GitHub Actions", + other => other, + } +} + fn license_css_class(license: Option<&str>) -> &'static str { match license { Some(l) => {