use std::path::Path; use compliance_core::models::SbomEntry; use compliance_core::CoreError; #[tracing::instrument(skip_all, fields(repo_id = %repo_id))] pub(super) async fn run_syft(repo_path: &Path, repo_id: &str) -> Result, CoreError> { let output = tokio::process::Command::new("syft") .arg(repo_path) .args(["-o", "cyclonedx-json"]) // Enable remote license lookups for all ecosystems .env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true") .env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true") .env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true") .env("SYFT_JAVA_USE_NETWORK", "true") .output() .await .map_err(|e| CoreError::Scanner { scanner: "syft".to_string(), source: Box::new(e), })?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); return Err(CoreError::Scanner { scanner: "syft".to_string(), source: format!("syft exited with {}: {stderr}", output.status).into(), }); } let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?; let entries = cdx .components .unwrap_or_default() .into_iter() .map(|c| { let package_manager = c .purl .as_deref() .and_then(extract_ecosystem_from_purl) .unwrap_or_else(|| "unknown".to_string()); let mut entry = SbomEntry::new( repo_id.to_string(), c.name, c.version.unwrap_or_else(|| "unknown".to_string()), package_manager, ); entry.purl = c.purl; entry.license = c.licenses.and_then(|ls| extract_license(&ls)); entry }) .collect(); Ok(entries) } // CycloneDX JSON types #[derive(serde::Deserialize)] struct CycloneDxBom { components: Option>, } #[derive(serde::Deserialize)] struct CdxComponent { name: String, version: Option, #[serde(rename = "type")] #[allow(dead_code)] component_type: Option, purl: Option, licenses: Option>, } #[derive(serde::Deserialize)] struct CdxLicenseWrapper { license: Option, /// SPDX license expression (e.g. "MIT OR Apache-2.0") expression: Option, } #[derive(serde::Deserialize)] struct CdxLicense { id: Option, name: Option, } /// Extract the best license string from CycloneDX license entries. /// Handles three formats: expression ("MIT OR Apache-2.0"), license.id ("MIT"), license.name ("MIT License"). fn extract_license(entries: &[CdxLicenseWrapper]) -> Option { // First pass: look for SPDX expressions (most precise for dual-licensed packages) for entry in entries { if let Some(ref expr) = entry.expression { if !expr.is_empty() { return Some(expr.clone()); } } } // Second pass: collect license.id or license.name from all entries let parts: Vec = entries .iter() .filter_map(|e| { e.license.as_ref().and_then(|lic| { lic.id .clone() .or_else(|| lic.name.clone()) .filter(|s| !s.is_empty()) }) }) .collect(); if parts.is_empty() { return None; } Some(parts.join(" OR ")) } /// Extract the ecosystem/package-manager from a PURL string. /// e.g. "pkg:npm/lodash@4.17.21" -> "npm", "pkg:cargo/serde@1.0" -> "cargo" fn extract_ecosystem_from_purl(purl: &str) -> Option { let rest = purl.strip_prefix("pkg:")?; let ecosystem = rest.split('/').next()?; if ecosystem.is_empty() { return None; } // Normalise common PURL types to user-friendly names let normalised = match ecosystem { "golang" => "go", "pypi" => "pip", _ => ecosystem, }; Some(normalised.to_string()) } #[cfg(test)] mod tests { use super::*; // --- extract_ecosystem_from_purl tests --- #[test] fn purl_npm() { assert_eq!( extract_ecosystem_from_purl("pkg:npm/lodash@4.17.21"), Some("npm".to_string()) ); } #[test] fn purl_cargo() { assert_eq!( extract_ecosystem_from_purl("pkg:cargo/serde@1.0.197"), Some("cargo".to_string()) ); } #[test] fn purl_golang_normalised() { assert_eq!( extract_ecosystem_from_purl("pkg:golang/github.com/gin-gonic/gin@1.9.1"), Some("go".to_string()) ); } #[test] fn purl_pypi_normalised() { assert_eq!( extract_ecosystem_from_purl("pkg:pypi/requests@2.31.0"), Some("pip".to_string()) ); } #[test] fn purl_maven() { assert_eq!( extract_ecosystem_from_purl("pkg:maven/org.apache.commons/commons-lang3@3.14.0"), Some("maven".to_string()) ); } #[test] fn purl_missing_prefix() { assert_eq!(extract_ecosystem_from_purl("npm/lodash@4.17.21"), None); } #[test] fn purl_empty_ecosystem() { assert_eq!(extract_ecosystem_from_purl("pkg:/lodash@4.17.21"), None); } #[test] fn purl_empty_string() { assert_eq!(extract_ecosystem_from_purl(""), None); } #[test] fn purl_just_prefix() { assert_eq!(extract_ecosystem_from_purl("pkg:"), None); } // --- extract_license tests --- #[test] fn license_from_expression() { let entries = vec![CdxLicenseWrapper { license: None, expression: Some("MIT OR Apache-2.0".to_string()), }]; assert_eq!( extract_license(&entries), Some("MIT OR Apache-2.0".to_string()) ); } #[test] fn license_from_id() { let entries = vec![CdxLicenseWrapper { license: Some(CdxLicense { id: Some("MIT".to_string()), name: None, }), expression: None, }]; assert_eq!(extract_license(&entries), Some("MIT".to_string())); } #[test] fn license_from_name_fallback() { let entries = vec![CdxLicenseWrapper { license: Some(CdxLicense { id: None, name: Some("MIT License".to_string()), }), expression: None, }]; assert_eq!(extract_license(&entries), Some("MIT License".to_string())); } #[test] fn license_expression_preferred_over_id() { let entries = vec![ CdxLicenseWrapper { license: Some(CdxLicense { id: Some("MIT".to_string()), name: None, }), expression: None, }, CdxLicenseWrapper { license: None, expression: Some("MIT AND Apache-2.0".to_string()), }, ]; // Expression should be preferred (first pass finds it) assert_eq!( extract_license(&entries), Some("MIT AND Apache-2.0".to_string()) ); } #[test] fn license_multiple_ids_joined() { let entries = vec![ CdxLicenseWrapper { license: Some(CdxLicense { id: Some("MIT".to_string()), name: None, }), expression: None, }, CdxLicenseWrapper { license: Some(CdxLicense { id: Some("Apache-2.0".to_string()), name: None, }), expression: None, }, ]; assert_eq!( extract_license(&entries), Some("MIT OR Apache-2.0".to_string()) ); } #[test] fn license_empty_entries() { let entries: Vec = vec![]; assert_eq!(extract_license(&entries), None); } #[test] fn license_all_empty_strings() { let entries = vec![CdxLicenseWrapper { license: Some(CdxLicense { id: Some(String::new()), name: Some(String::new()), }), expression: Some(String::new()), }]; assert_eq!(extract_license(&entries), None); } #[test] fn license_none_fields() { let entries = vec![CdxLicenseWrapper { license: None, expression: None, }]; assert_eq!(extract_license(&entries), None); } // --- CycloneDX deserialization tests --- #[test] fn deserialize_cyclonedx_bom() { let json = r#"{ "components": [ { "name": "serde", "version": "1.0.197", "type": "library", "purl": "pkg:cargo/serde@1.0.197", "licenses": [ {"expression": "MIT OR Apache-2.0"} ] } ] }"#; let bom: CycloneDxBom = serde_json::from_str(json).unwrap(); let components = bom.components.unwrap(); assert_eq!(components.len(), 1); assert_eq!(components[0].name, "serde"); assert_eq!(components[0].version, Some("1.0.197".to_string())); assert_eq!( components[0].purl, Some("pkg:cargo/serde@1.0.197".to_string()) ); } #[test] fn deserialize_cyclonedx_no_components() { let json = r#"{}"#; let bom: CycloneDxBom = serde_json::from_str(json).unwrap(); assert!(bom.components.is_none()); } #[test] fn deserialize_cyclonedx_minimal_component() { let json = r#"{"components": [{"name": "foo"}]}"#; let bom: CycloneDxBom = serde_json::from_str(json).unwrap(); let c = &bom.components.unwrap()[0]; assert_eq!(c.name, "foo"); assert!(c.version.is_none()); assert!(c.purl.is_none()); assert!(c.licenses.is_none()); } }