refactor: modularize codebase and add 404 unit tests (#13)
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Tests (push) Successful in 5m15s
CI / Detect Changes (push) Successful in 5s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s

This commit was merged in pull request #13.
This commit is contained in:
2026-03-13 08:03:45 +00:00
parent acc5b86aa4
commit 3bb690e5bb
89 changed files with 11884 additions and 6046 deletions
@@ -0,0 +1,72 @@
use std::path::Path;
use compliance_core::CoreError;
pub(super) struct AuditVuln {
pub package: String,
pub id: String,
pub url: String,
}
#[tracing::instrument(skip_all)]
pub(super) async fn run_cargo_audit(
repo_path: &Path,
_repo_id: &str,
) -> Result<Vec<AuditVuln>, CoreError> {
let cargo_lock = repo_path.join("Cargo.lock");
if !cargo_lock.exists() {
return Ok(Vec::new());
}
let output = tokio::process::Command::new("cargo")
.args(["audit", "--json"])
.current_dir(repo_path)
.env("RUSTC_WRAPPER", "")
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "cargo-audit".to_string(),
source: Box::new(e),
})?;
let result: CargoAuditOutput =
serde_json::from_slice(&output.stdout).unwrap_or_else(|_| CargoAuditOutput {
vulnerabilities: CargoAuditVulns { list: Vec::new() },
});
let vulns = result
.vulnerabilities
.list
.into_iter()
.map(|v| AuditVuln {
package: v.advisory.package,
id: v.advisory.id,
url: v.advisory.url,
})
.collect();
Ok(vulns)
}
// Cargo audit types
#[derive(serde::Deserialize)]
struct CargoAuditOutput {
vulnerabilities: CargoAuditVulns,
}
#[derive(serde::Deserialize)]
struct CargoAuditVulns {
list: Vec<CargoAuditEntry>,
}
#[derive(serde::Deserialize)]
struct CargoAuditEntry {
advisory: CargoAuditAdvisory,
}
#[derive(serde::Deserialize)]
struct CargoAuditAdvisory {
id: String,
package: String,
url: String,
}
+216
View File
@@ -0,0 +1,216 @@
mod cargo_audit;
mod syft;
use std::path::Path;
use compliance_core::models::{SbomEntry, ScanType, VulnRef};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
pub struct SbomScanner;
impl Scanner for SbomScanner {
fn name(&self) -> &str {
"sbom"
}
fn scan_type(&self) -> ScanType {
ScanType::Sbom
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let mut entries = Vec::new();
// Generate missing lock files so Syft can resolve the full dependency tree
generate_lockfiles(repo_path).await;
// Run syft for SBOM generation
match syft::run_syft(repo_path, repo_id).await {
Ok(syft_entries) => entries.extend(syft_entries),
Err(e) => tracing::warn!("syft failed: {e}"),
}
// Enrich Cargo entries with license info from cargo metadata
enrich_cargo_licenses(repo_path, &mut entries).await;
// Run cargo-audit for Rust-specific vulns
match cargo_audit::run_cargo_audit(repo_path, repo_id).await {
Ok(vulns) => merge_audit_vulns(&mut entries, vulns),
Err(e) => tracing::warn!("cargo-audit skipped: {e}"),
}
Ok(ScanOutput {
findings: Vec::new(),
sbom_entries: entries,
})
}
}
/// Generate missing lock files so Syft can resolve the full dependency tree.
/// This handles repos that gitignore their lock files (common for Rust libraries).
#[tracing::instrument(skip_all)]
async fn generate_lockfiles(repo_path: &Path) {
// Cargo: generate Cargo.lock if Cargo.toml exists without it
if repo_path.join("Cargo.toml").exists() && !repo_path.join("Cargo.lock").exists() {
tracing::info!("generating Cargo.lock for SBOM scan");
let result = tokio::process::Command::new("cargo")
.args(["generate-lockfile"])
.current_dir(repo_path)
.env("RUSTC_WRAPPER", "")
.output()
.await;
match result {
Ok(o) if o.status.success() => tracing::info!("Cargo.lock generated"),
Ok(o) => tracing::warn!(
"cargo generate-lockfile failed: {}",
String::from_utf8_lossy(&o.stderr)
),
Err(e) => tracing::warn!("cargo generate-lockfile error: {e}"),
}
}
// pip: generate a requirements lock if only pyproject.toml / setup.py exists
let has_pip_manifest = repo_path.join("pyproject.toml").exists()
|| repo_path.join("setup.py").exists()
|| repo_path.join("setup.cfg").exists();
let has_pip_lock = repo_path.join("requirements.txt").exists()
|| repo_path.join("requirements-lock.txt").exists()
|| repo_path.join("poetry.lock").exists()
|| repo_path.join("Pipfile.lock").exists();
if has_pip_manifest && !has_pip_lock {
// Try pip-compile (pip-tools) first, fall back to pip freeze approach
tracing::info!("attempting to generate pip requirements for SBOM scan");
if repo_path.join("pyproject.toml").exists() {
let result = tokio::process::Command::new("pip-compile")
.args([
"--quiet",
"--output-file",
"requirements.txt",
"pyproject.toml",
])
.current_dir(repo_path)
.output()
.await;
match result {
Ok(o) if o.status.success() => {
tracing::info!("requirements.txt generated via pip-compile")
}
_ => tracing::warn!(
"pip-compile not available or failed, Syft will parse pyproject.toml directly"
),
}
}
}
// npm: generate package-lock.json if package.json exists without it
let has_npm_lock = repo_path.join("package-lock.json").exists()
|| repo_path.join("yarn.lock").exists()
|| repo_path.join("pnpm-lock.yaml").exists();
if repo_path.join("package.json").exists() && !has_npm_lock {
tracing::info!("generating package-lock.json for SBOM scan");
let result = tokio::process::Command::new("npm")
.args(["install", "--package-lock-only", "--ignore-scripts"])
.current_dir(repo_path)
.output()
.await;
match result {
Ok(o) if o.status.success() => tracing::info!("package-lock.json generated"),
Ok(o) => tracing::warn!(
"npm install --package-lock-only failed: {}",
String::from_utf8_lossy(&o.stderr)
),
Err(e) => tracing::warn!("npm lock generation error: {e}"),
}
}
}
/// Enrich Cargo SBOM entries with license info from `cargo metadata`.
/// Syft doesn't read license data from Cargo.lock, so we fill it in.
#[tracing::instrument(skip_all)]
async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) {
if !repo_path.join("Cargo.toml").exists() {
return;
}
let has_cargo_entries = entries.iter().any(|e| e.package_manager == "cargo");
if !has_cargo_entries {
return;
}
let output = match tokio::process::Command::new("cargo")
.args(["metadata", "--format-version", "1"])
.current_dir(repo_path)
.env("RUSTC_WRAPPER", "")
.output()
.await
{
Ok(o) if o.status.success() => o,
Ok(o) => {
tracing::warn!(
"cargo metadata failed: {}",
String::from_utf8_lossy(&o.stderr)
);
return;
}
Err(e) => {
tracing::warn!("cargo metadata error: {e}");
return;
}
};
let meta: CargoMetadata = match serde_json::from_slice(&output.stdout) {
Ok(m) => m,
Err(e) => {
tracing::warn!("failed to parse cargo metadata: {e}");
return;
}
};
// Build a lookup: (name, version) -> license
let license_map: std::collections::HashMap<(&str, &str), &str> = meta
.packages
.iter()
.filter_map(|p| {
p.license
.as_deref()
.map(|l| (p.name.as_str(), p.version.as_str(), l))
})
.map(|(n, v, l)| ((n, v), l))
.collect();
for entry in entries.iter_mut() {
if entry.package_manager != "cargo" || entry.license.is_some() {
continue;
}
if let Some(license) = license_map.get(&(entry.name.as_str(), entry.version.as_str())) {
entry.license = Some(license.to_string());
}
}
}
fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<cargo_audit::AuditVuln>) {
for vuln in vulns {
if let Some(entry) = entries.iter_mut().find(|e| e.name == vuln.package) {
entry.known_vulnerabilities.push(VulnRef {
id: vuln.id.clone(),
source: "cargo-audit".to_string(),
severity: None,
url: Some(vuln.url),
});
}
}
}
// Cargo metadata types
#[derive(serde::Deserialize)]
struct CargoMetadata {
packages: Vec<CargoPackage>,
}
#[derive(serde::Deserialize)]
struct CargoPackage {
name: String,
version: String,
license: Option<String>,
}
+355
View File
@@ -0,0 +1,355 @@
use std::path::Path;
use compliance_core::models::SbomEntry;
use compliance_core::CoreError;
#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
pub(super) async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
let output = tokio::process::Command::new("syft")
.arg(repo_path)
.args(["-o", "cyclonedx-json"])
// Enable remote license lookups for all ecosystems
.env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true")
.env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true")
.env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true")
.env("SYFT_JAVA_USE_NETWORK", "true")
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "syft".to_string(),
source: Box::new(e),
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(CoreError::Scanner {
scanner: "syft".to_string(),
source: format!("syft exited with {}: {stderr}", output.status).into(),
});
}
let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?;
let entries = cdx
.components
.unwrap_or_default()
.into_iter()
.map(|c| {
let package_manager = c
.purl
.as_deref()
.and_then(extract_ecosystem_from_purl)
.unwrap_or_else(|| "unknown".to_string());
let mut entry = SbomEntry::new(
repo_id.to_string(),
c.name,
c.version.unwrap_or_else(|| "unknown".to_string()),
package_manager,
);
entry.purl = c.purl;
entry.license = c.licenses.and_then(|ls| extract_license(&ls));
entry
})
.collect();
Ok(entries)
}
// CycloneDX JSON types
#[derive(serde::Deserialize)]
struct CycloneDxBom {
components: Option<Vec<CdxComponent>>,
}
#[derive(serde::Deserialize)]
struct CdxComponent {
name: String,
version: Option<String>,
#[serde(rename = "type")]
#[allow(dead_code)]
component_type: Option<String>,
purl: Option<String>,
licenses: Option<Vec<CdxLicenseWrapper>>,
}
#[derive(serde::Deserialize)]
struct CdxLicenseWrapper {
license: Option<CdxLicense>,
/// SPDX license expression (e.g. "MIT OR Apache-2.0")
expression: Option<String>,
}
#[derive(serde::Deserialize)]
struct CdxLicense {
id: Option<String>,
name: Option<String>,
}
/// Extract the best license string from CycloneDX license entries.
/// Handles three formats: expression ("MIT OR Apache-2.0"), license.id ("MIT"), license.name ("MIT License").
fn extract_license(entries: &[CdxLicenseWrapper]) -> Option<String> {
// First pass: look for SPDX expressions (most precise for dual-licensed packages)
for entry in entries {
if let Some(ref expr) = entry.expression {
if !expr.is_empty() {
return Some(expr.clone());
}
}
}
// Second pass: collect license.id or license.name from all entries
let parts: Vec<String> = entries
.iter()
.filter_map(|e| {
e.license.as_ref().and_then(|lic| {
lic.id
.clone()
.or_else(|| lic.name.clone())
.filter(|s| !s.is_empty())
})
})
.collect();
if parts.is_empty() {
return None;
}
Some(parts.join(" OR "))
}
/// Extract the ecosystem/package-manager from a PURL string.
/// e.g. "pkg:npm/lodash@4.17.21" -> "npm", "pkg:cargo/serde@1.0" -> "cargo"
fn extract_ecosystem_from_purl(purl: &str) -> Option<String> {
let rest = purl.strip_prefix("pkg:")?;
let ecosystem = rest.split('/').next()?;
if ecosystem.is_empty() {
return None;
}
// Normalise common PURL types to user-friendly names
let normalised = match ecosystem {
"golang" => "go",
"pypi" => "pip",
_ => ecosystem,
};
Some(normalised.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
// --- extract_ecosystem_from_purl tests ---
#[test]
fn purl_npm() {
assert_eq!(
extract_ecosystem_from_purl("pkg:npm/lodash@4.17.21"),
Some("npm".to_string())
);
}
#[test]
fn purl_cargo() {
assert_eq!(
extract_ecosystem_from_purl("pkg:cargo/serde@1.0.197"),
Some("cargo".to_string())
);
}
#[test]
fn purl_golang_normalised() {
assert_eq!(
extract_ecosystem_from_purl("pkg:golang/github.com/gin-gonic/gin@1.9.1"),
Some("go".to_string())
);
}
#[test]
fn purl_pypi_normalised() {
assert_eq!(
extract_ecosystem_from_purl("pkg:pypi/requests@2.31.0"),
Some("pip".to_string())
);
}
#[test]
fn purl_maven() {
assert_eq!(
extract_ecosystem_from_purl("pkg:maven/org.apache.commons/commons-lang3@3.14.0"),
Some("maven".to_string())
);
}
#[test]
fn purl_missing_prefix() {
assert_eq!(extract_ecosystem_from_purl("npm/lodash@4.17.21"), None);
}
#[test]
fn purl_empty_ecosystem() {
assert_eq!(extract_ecosystem_from_purl("pkg:/lodash@4.17.21"), None);
}
#[test]
fn purl_empty_string() {
assert_eq!(extract_ecosystem_from_purl(""), None);
}
#[test]
fn purl_just_prefix() {
assert_eq!(extract_ecosystem_from_purl("pkg:"), None);
}
// --- extract_license tests ---
#[test]
fn license_from_expression() {
let entries = vec![CdxLicenseWrapper {
license: None,
expression: Some("MIT OR Apache-2.0".to_string()),
}];
assert_eq!(
extract_license(&entries),
Some("MIT OR Apache-2.0".to_string())
);
}
#[test]
fn license_from_id() {
let entries = vec![CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some("MIT".to_string()),
name: None,
}),
expression: None,
}];
assert_eq!(extract_license(&entries), Some("MIT".to_string()));
}
#[test]
fn license_from_name_fallback() {
let entries = vec![CdxLicenseWrapper {
license: Some(CdxLicense {
id: None,
name: Some("MIT License".to_string()),
}),
expression: None,
}];
assert_eq!(extract_license(&entries), Some("MIT License".to_string()));
}
#[test]
fn license_expression_preferred_over_id() {
let entries = vec![
CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some("MIT".to_string()),
name: None,
}),
expression: None,
},
CdxLicenseWrapper {
license: None,
expression: Some("MIT AND Apache-2.0".to_string()),
},
];
// Expression should be preferred (first pass finds it)
assert_eq!(
extract_license(&entries),
Some("MIT AND Apache-2.0".to_string())
);
}
#[test]
fn license_multiple_ids_joined() {
let entries = vec![
CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some("MIT".to_string()),
name: None,
}),
expression: None,
},
CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some("Apache-2.0".to_string()),
name: None,
}),
expression: None,
},
];
assert_eq!(
extract_license(&entries),
Some("MIT OR Apache-2.0".to_string())
);
}
#[test]
fn license_empty_entries() {
let entries: Vec<CdxLicenseWrapper> = vec![];
assert_eq!(extract_license(&entries), None);
}
#[test]
fn license_all_empty_strings() {
let entries = vec![CdxLicenseWrapper {
license: Some(CdxLicense {
id: Some(String::new()),
name: Some(String::new()),
}),
expression: Some(String::new()),
}];
assert_eq!(extract_license(&entries), None);
}
#[test]
fn license_none_fields() {
let entries = vec![CdxLicenseWrapper {
license: None,
expression: None,
}];
assert_eq!(extract_license(&entries), None);
}
// --- CycloneDX deserialization tests ---
#[test]
fn deserialize_cyclonedx_bom() {
let json = r#"{
"components": [
{
"name": "serde",
"version": "1.0.197",
"type": "library",
"purl": "pkg:cargo/serde@1.0.197",
"licenses": [
{"expression": "MIT OR Apache-2.0"}
]
}
]
}"#;
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
let components = bom.components.unwrap();
assert_eq!(components.len(), 1);
assert_eq!(components[0].name, "serde");
assert_eq!(components[0].version, Some("1.0.197".to_string()));
assert_eq!(
components[0].purl,
Some("pkg:cargo/serde@1.0.197".to_string())
);
}
#[test]
fn deserialize_cyclonedx_no_components() {
let json = r#"{}"#;
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
assert!(bom.components.is_none());
}
#[test]
fn deserialize_cyclonedx_minimal_component() {
let json = r#"{"components": [{"name": "foo"}]}"#;
let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
let c = &bom.components.unwrap()[0];
assert_eq!(c.name, "foo");
assert!(c.version.is_none());
assert!(c.purl.is_none());
assert!(c.licenses.is_none());
}
}