Initial commit: Compliance Scanner Agent

Autonomous security and compliance scanning agent for git repositories.
Features: SAST (Semgrep), SBOM (Syft), CVE monitoring (OSV.dev/NVD),
GDPR/OAuth pattern detection, LLM triage, issue creation (GitHub/GitLab/Jira),
PR reviews, and Dioxus fullstack dashboard.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-03-02 13:30:17 +01:00
commit 0867e401bc
97 changed files with 11750 additions and 0 deletions

View File

@@ -0,0 +1,199 @@
use compliance_core::models::{CveAlert, CveSource, SbomEntry, VulnRef};
use compliance_core::CoreError;
pub struct CveScanner {
http: reqwest::Client,
searxng_url: Option<String>,
nvd_api_key: Option<String>,
}
impl CveScanner {
pub fn new(http: reqwest::Client, searxng_url: Option<String>, nvd_api_key: Option<String>) -> Self {
Self { http, searxng_url, nvd_api_key }
}
pub async fn scan_dependencies(
&self,
repo_id: &str,
entries: &mut [SbomEntry],
) -> Result<Vec<CveAlert>, CoreError> {
let mut alerts = Vec::new();
// Batch query OSV.dev
let osv_results = self.query_osv_batch(entries).await?;
for (idx, vulns) in osv_results.into_iter().enumerate() {
if let Some(entry) = entries.get_mut(idx) {
for vuln in &vulns {
entry.known_vulnerabilities.push(VulnRef {
id: vuln.id.clone(),
source: "osv".to_string(),
severity: vuln.severity.clone(),
url: Some(format!("https://osv.dev/vulnerability/{}", vuln.id)),
});
let mut alert = CveAlert::new(
vuln.id.clone(),
repo_id.to_string(),
entry.name.clone(),
entry.version.clone(),
CveSource::Osv,
);
alert.summary = vuln.summary.clone();
alerts.push(alert);
}
}
}
// Enrich with NVD CVSS scores
for alert in &mut alerts {
if let Ok(Some(cvss)) = self.query_nvd(&alert.cve_id).await {
alert.cvss_score = Some(cvss);
}
}
Ok(alerts)
}
async fn query_osv_batch(&self, entries: &[SbomEntry]) -> Result<Vec<Vec<OsvVuln>>, CoreError> {
let queries: Vec<_> = entries
.iter()
.filter_map(|e| {
e.purl.as_ref().map(|purl| {
serde_json::json!({
"package": { "purl": purl }
})
})
})
.collect();
if queries.is_empty() {
return Ok(Vec::new());
}
let body = serde_json::json!({ "queries": queries });
let resp = self
.http
.post("https://api.osv.dev/v1/querybatch")
.json(&body)
.send()
.await
.map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::warn!("OSV.dev returned {status}: {body}");
return Ok(Vec::new());
}
let result: OsvBatchResponse = resp.json().await.map_err(|e| {
CoreError::Http(format!("Failed to parse OSV.dev response: {e}"))
})?;
let vulns = result
.results
.into_iter()
.map(|r| {
r.vulns
.unwrap_or_default()
.into_iter()
.map(|v| OsvVuln {
id: v.id,
summary: v.summary,
severity: v.database_specific
.and_then(|d| d.get("severity").and_then(|s| s.as_str()).map(String::from)),
})
.collect()
})
.collect();
Ok(vulns)
}
async fn query_nvd(&self, cve_id: &str) -> Result<Option<f64>, CoreError> {
if !cve_id.starts_with("CVE-") {
return Ok(None);
}
let url = format!("https://services.nvd.nist.gov/rest/json/cves/2.0?cveId={cve_id}");
let mut req = self.http.get(&url);
if let Some(key) = &self.nvd_api_key {
req = req.header("apiKey", key.as_str());
}
let resp = req.send().await.map_err(|e| {
CoreError::Http(format!("NVD request failed: {e}"))
})?;
if !resp.status().is_success() {
return Ok(None);
}
let body: serde_json::Value = resp.json().await.map_err(|e| {
CoreError::Http(format!("Failed to parse NVD response: {e}"))
})?;
// Extract CVSS v3.1 base score
let score = body["vulnerabilities"]
.as_array()
.and_then(|v| v.first())
.and_then(|v| v["cve"]["metrics"]["cvssMetricV31"].as_array())
.and_then(|m| m.first())
.and_then(|m| m["cvssData"]["baseScore"].as_f64());
Ok(score)
}
pub async fn search_context(&self, cve_id: &str) -> Result<Vec<String>, CoreError> {
let Some(searxng_url) = &self.searxng_url else {
return Ok(Vec::new());
};
let url = format!("{}/search?q={cve_id}&format=json&engines=duckduckgo", searxng_url.trim_end_matches('/'));
let resp = self.http.get(&url).send().await.map_err(|e| {
CoreError::Http(format!("SearXNG request failed: {e}"))
})?;
if !resp.status().is_success() {
return Ok(Vec::new());
}
let body: serde_json::Value = resp.json().await.unwrap_or_default();
let results = body["results"]
.as_array()
.map(|arr| {
arr.iter()
.take(5)
.filter_map(|r| r["url"].as_str().map(String::from))
.collect()
})
.unwrap_or_default();
Ok(results)
}
}
#[derive(serde::Deserialize)]
struct OsvBatchResponse {
results: Vec<OsvBatchResult>,
}
#[derive(serde::Deserialize)]
struct OsvBatchResult {
vulns: Option<Vec<OsvVulnEntry>>,
}
#[derive(serde::Deserialize)]
struct OsvVulnEntry {
id: String,
summary: Option<String>,
database_specific: Option<serde_json::Value>,
}
struct OsvVuln {
id: String,
summary: Option<String>,
severity: Option<String>,
}

View File

@@ -0,0 +1,10 @@
use sha2::{Digest, Sha256};
pub fn compute_fingerprint(parts: &[&str]) -> String {
let mut hasher = Sha256::new();
for part in parts {
hasher.update(part.as_bytes());
hasher.update(b"|");
}
hex::encode(hasher.finalize())
}

View File

@@ -0,0 +1,100 @@
use std::path::{Path, PathBuf};
use git2::{FetchOptions, Repository};
use crate::error::AgentError;
pub struct GitOps {
base_path: PathBuf,
}
impl GitOps {
pub fn new(base_path: &str) -> Self {
Self {
base_path: PathBuf::from(base_path),
}
}
pub fn clone_or_fetch(&self, git_url: &str, repo_name: &str) -> Result<PathBuf, AgentError> {
let repo_path = self.base_path.join(repo_name);
if repo_path.exists() {
self.fetch(&repo_path)?;
} else {
std::fs::create_dir_all(&repo_path)?;
Repository::clone(git_url, &repo_path)?;
tracing::info!("Cloned {git_url} to {}", repo_path.display());
}
Ok(repo_path)
}
fn fetch(&self, repo_path: &Path) -> Result<(), AgentError> {
let repo = Repository::open(repo_path)?;
let mut remote = repo.find_remote("origin")?;
let mut fetch_opts = FetchOptions::new();
remote.fetch(&[] as &[&str], Some(&mut fetch_opts), None)?;
// Fast-forward to origin/HEAD
let fetch_head = repo.find_reference("FETCH_HEAD")?;
let fetch_commit = repo.reference_to_annotated_commit(&fetch_head)?;
let head_ref = repo.head()?;
let head_name = head_ref.name().unwrap_or("HEAD");
repo.reference(
head_name,
fetch_commit.id(),
true,
"fast-forward",
)?;
repo.checkout_head(Some(git2::build::CheckoutBuilder::default().force()))?;
tracing::info!("Fetched and fast-forwarded {}", repo_path.display());
Ok(())
}
pub fn get_head_sha(repo_path: &Path) -> Result<String, AgentError> {
let repo = Repository::open(repo_path)?;
let head = repo.head()?;
let commit = head.peel_to_commit()?;
Ok(commit.id().to_string())
}
pub fn has_new_commits(repo_path: &Path, last_sha: Option<&str>) -> Result<bool, AgentError> {
let current_sha = Self::get_head_sha(repo_path)?;
match last_sha {
Some(sha) if sha == current_sha => Ok(false),
_ => Ok(true),
}
}
pub fn get_changed_files(
repo_path: &Path,
old_sha: &str,
new_sha: &str,
) -> Result<Vec<String>, AgentError> {
let repo = Repository::open(repo_path)?;
let old_commit = repo.find_commit(git2::Oid::from_str(old_sha)?)?;
let new_commit = repo.find_commit(git2::Oid::from_str(new_sha)?)?;
let old_tree = old_commit.tree()?;
let new_tree = new_commit.tree()?;
let diff = repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), None)?;
let mut files = Vec::new();
diff.foreach(
&mut |delta, _| {
if let Some(path) = delta.new_file().path() {
files.push(path.to_string_lossy().to_string());
}
true
},
None,
None,
None,
)?;
Ok(files)
}
}

View File

@@ -0,0 +1,7 @@
pub mod cve;
pub mod dedup;
pub mod git;
pub mod orchestrator;
pub mod patterns;
pub mod sbom;
pub mod semgrep;

View File

@@ -0,0 +1,252 @@
use std::sync::Arc;
use mongodb::bson::doc;
use compliance_core::models::*;
use compliance_core::traits::Scanner;
use compliance_core::AgentConfig;
use crate::database::Database;
use crate::error::AgentError;
use crate::llm::LlmClient;
use crate::pipeline::cve::CveScanner;
use crate::pipeline::git::GitOps;
use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
use crate::pipeline::sbom::SbomScanner;
use crate::pipeline::semgrep::SemgrepScanner;
pub struct PipelineOrchestrator {
config: AgentConfig,
db: Database,
llm: Arc<LlmClient>,
http: reqwest::Client,
}
impl PipelineOrchestrator {
pub fn new(
config: AgentConfig,
db: Database,
llm: Arc<LlmClient>,
http: reqwest::Client,
) -> Self {
Self { config, db, llm, http }
}
pub async fn run(
&self,
repo_id: &str,
trigger: ScanTrigger,
) -> Result<(), AgentError> {
// Look up the repository
let repo = self
.db
.repositories()
.find_one(doc! { "_id": mongodb::bson::oid::ObjectId::parse_str(repo_id).map_err(|e| AgentError::Other(e.to_string()))? })
.await?
.ok_or_else(|| AgentError::Other(format!("Repository {repo_id} not found")))?;
// Create scan run
let scan_run = ScanRun::new(repo_id.to_string(), trigger);
let insert = self.db.scan_runs().insert_one(&scan_run).await?;
let scan_run_id = insert.inserted_id.as_object_id()
.map(|id| id.to_hex())
.unwrap_or_default();
let result = self.run_pipeline(&repo, &scan_run_id).await;
// Update scan run status
match &result {
Ok(count) => {
self.db.scan_runs().update_one(
doc! { "_id": &insert.inserted_id },
doc! {
"$set": {
"status": "completed",
"current_phase": "completed",
"new_findings_count": *count as i64,
"completed_at": mongodb::bson::DateTime::now(),
}
},
).await?;
}
Err(e) => {
self.db.scan_runs().update_one(
doc! { "_id": &insert.inserted_id },
doc! {
"$set": {
"status": "failed",
"error_message": e.to_string(),
"completed_at": mongodb::bson::DateTime::now(),
}
},
).await?;
}
}
result.map(|_| ())
}
async fn run_pipeline(
&self,
repo: &TrackedRepository,
scan_run_id: &str,
) -> Result<u32, AgentError> {
let repo_id = repo.id.as_ref()
.map(|id| id.to_hex())
.unwrap_or_default();
// Stage 0: Change detection
tracing::info!("[{repo_id}] Stage 0: Change detection");
let git_ops = GitOps::new(&self.config.git_clone_base_path);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
if !GitOps::has_new_commits(&repo_path, repo.last_scanned_commit.as_deref())? {
tracing::info!("[{repo_id}] No new commits, skipping scan");
return Ok(0);
}
let current_sha = GitOps::get_head_sha(&repo_path)?;
let mut all_findings: Vec<Finding> = Vec::new();
// Stage 1: Semgrep SAST
tracing::info!("[{repo_id}] Stage 1: Semgrep SAST");
self.update_phase(scan_run_id, "sast").await;
let semgrep = SemgrepScanner;
match semgrep.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] Semgrep failed: {e}"),
}
// Stage 2: SBOM Generation
tracing::info!("[{repo_id}] Stage 2: SBOM Generation");
self.update_phase(scan_run_id, "sbom_generation").await;
let sbom_scanner = SbomScanner;
let mut sbom_entries = match sbom_scanner.scan(&repo_path, &repo_id).await {
Ok(output) => output.sbom_entries,
Err(e) => {
tracing::warn!("[{repo_id}] SBOM generation failed: {e}");
Vec::new()
}
};
// Stage 3: CVE Scanning
tracing::info!("[{repo_id}] Stage 3: CVE Scanning");
self.update_phase(scan_run_id, "cve_scanning").await;
let cve_scanner = CveScanner::new(
self.http.clone(),
self.config.searxng_url.clone(),
self.config.nvd_api_key.as_ref().map(|k| {
use secrecy::ExposeSecret;
k.expose_secret().to_string()
}),
);
let cve_alerts = match cve_scanner.scan_dependencies(&repo_id, &mut sbom_entries).await {
Ok(alerts) => alerts,
Err(e) => {
tracing::warn!("[{repo_id}] CVE scanning failed: {e}");
Vec::new()
}
};
// Stage 4: Pattern Scanning (GDPR + OAuth)
tracing::info!("[{repo_id}] Stage 4: Pattern Scanning");
self.update_phase(scan_run_id, "pattern_scanning").await;
let gdpr = GdprPatternScanner::new();
match gdpr.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] GDPR pattern scan failed: {e}"),
}
let oauth = OAuthPatternScanner::new();
match oauth.scan(&repo_path, &repo_id).await {
Ok(output) => all_findings.extend(output.findings),
Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
}
// Stage 5: LLM Triage
tracing::info!("[{repo_id}] Stage 5: LLM Triage ({} findings)", all_findings.len());
self.update_phase(scan_run_id, "llm_triage").await;
let triaged = crate::llm::triage::triage_findings(&self.llm, &mut all_findings).await;
tracing::info!("[{repo_id}] Triaged: {triaged} findings passed confidence threshold");
// Dedup against existing findings and insert new ones
let mut new_count = 0u32;
for mut finding in all_findings {
finding.scan_run_id = Some(scan_run_id.to_string());
// Check if fingerprint already exists
let existing = self
.db
.findings()
.find_one(doc! { "fingerprint": &finding.fingerprint })
.await?;
if existing.is_none() {
self.db.findings().insert_one(&finding).await?;
new_count += 1;
}
}
// Persist SBOM entries (upsert by repo_id + name + version)
for entry in &sbom_entries {
let filter = doc! {
"repo_id": &entry.repo_id,
"name": &entry.name,
"version": &entry.version,
};
let update = mongodb::bson::to_document(entry)
.map(|d| doc! { "$set": d })
.unwrap_or_else(|_| doc! {});
self.db
.sbom_entries()
.update_one(filter, update)
.upsert(true)
.await?;
}
// Persist CVE alerts (upsert by cve_id + repo_id)
for alert in &cve_alerts {
let filter = doc! {
"cve_id": &alert.cve_id,
"repo_id": &alert.repo_id,
};
let update = mongodb::bson::to_document(alert)
.map(|d| doc! { "$set": d })
.unwrap_or_else(|_| doc! {});
self.db
.cve_alerts()
.update_one(filter, update)
.upsert(true)
.await?;
}
// Stage 6: Issue Creation
tracing::info!("[{repo_id}] Stage 6: Issue Creation");
self.update_phase(scan_run_id, "issue_creation").await;
// Issue creation is handled by the trackers module - deferred to agent
// Stage 7: Update repository
self.db.repositories().update_one(
doc! { "_id": repo.id },
doc! {
"$set": {
"last_scanned_commit": &current_sha,
"updated_at": mongodb::bson::DateTime::now(),
},
"$inc": { "findings_count": new_count as i64 },
},
).await?;
tracing::info!("[{repo_id}] Scan complete: {new_count} new findings");
Ok(new_count)
}
async fn update_phase(&self, scan_run_id: &str, phase: &str) {
if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
let _ = self.db.scan_runs().update_one(
doc! { "_id": oid },
doc! {
"$set": { "current_phase": phase },
"$push": { "phases_completed": phase },
},
).await;
}
}
}

View File

@@ -0,0 +1,226 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
use regex::Regex;
use crate::pipeline::dedup;
pub struct GdprPatternScanner {
patterns: Vec<PatternRule>,
}
pub struct OAuthPatternScanner {
patterns: Vec<PatternRule>,
}
struct PatternRule {
id: String,
title: String,
description: String,
pattern: Regex,
severity: Severity,
file_extensions: Vec<String>,
}
impl GdprPatternScanner {
pub fn new() -> Self {
let patterns = vec![
PatternRule {
id: "gdpr-pii-logging".to_string(),
title: "PII data potentially logged".to_string(),
description: "Logging statements that may contain personally identifiable information (email, SSN, phone, IP address).".to_string(),
pattern: Regex::new(r#"(?i)(log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(email|ssn|social.?security|phone.?number|ip.?addr|passport|date.?of.?birth|credit.?card)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
severity: Severity::High,
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "rb"].into_iter().map(String::from).collect(),
},
PatternRule {
id: "gdpr-no-consent".to_string(),
title: "Data collection without apparent consent mechanism".to_string(),
description: "Data collection endpoint that doesn't reference consent or opt-in mechanisms.".to_string(),
pattern: Regex::new(r#"(?i)(collect|store|save|persist|record).*\b(personal|user.?data|pii|biometric)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
severity: Severity::Medium,
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
},
PatternRule {
id: "gdpr-no-delete-endpoint".to_string(),
title: "Missing data deletion capability".to_string(),
description: "User data models or controllers without corresponding deletion endpoints (right to erasure).".to_string(),
pattern: Regex::new(r#"(?i)(class|struct|model)\s+User(?!.*[Dd]elete)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
severity: Severity::Medium,
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "rb"].into_iter().map(String::from).collect(),
},
PatternRule {
id: "gdpr-hardcoded-retention".to_string(),
title: "Hardcoded data retention period".to_string(),
description: "Data retention periods should be configurable for GDPR compliance.".to_string(),
pattern: Regex::new(r#"(?i)(retention|ttl|expire|keep.?for)\s*[=:]\s*\d+"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
severity: Severity::Low,
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "yaml", "yml", "toml", "json"].into_iter().map(String::from).collect(),
},
];
Self { patterns }
}
}
impl Scanner for GdprPatternScanner {
fn name(&self) -> &str {
"gdpr-patterns"
}
fn scan_type(&self) -> ScanType {
ScanType::Gdpr
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let findings = scan_with_patterns(repo_path, repo_id, &self.patterns, ScanType::Gdpr, "gdpr-patterns")?;
Ok(ScanOutput {
findings,
sbom_entries: Vec::new(),
})
}
}
impl OAuthPatternScanner {
pub fn new() -> Self {
let patterns = vec![
PatternRule {
id: "oauth-implicit-grant".to_string(),
title: "OAuth implicit grant flow detected".to_string(),
description: "Implicit grant flow is deprecated and insecure. Use authorization code flow with PKCE instead.".to_string(),
pattern: Regex::new(r#"(?i)(response_type\s*[=:]\s*["']?token|grant_type\s*[=:]\s*["']?implicit)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
severity: Severity::High,
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "yaml", "yml", "json"].into_iter().map(String::from).collect(),
},
PatternRule {
id: "oauth-missing-pkce".to_string(),
title: "OAuth flow without PKCE".to_string(),
description: "Authorization code flow should use PKCE (code_challenge/code_verifier) for public clients.".to_string(),
pattern: Regex::new(r#"(?i)authorization.?code(?!.*code.?challenge)(?!.*pkce)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
severity: Severity::Medium,
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
},
PatternRule {
id: "oauth-token-localstorage".to_string(),
title: "Token stored in localStorage".to_string(),
description: "Storing tokens in localStorage is vulnerable to XSS. Use httpOnly cookies or secure session storage.".to_string(),
pattern: Regex::new(r#"(?i)localStorage\.(set|get)Item\s*\(\s*["'].*token"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
severity: Severity::High,
file_extensions: vec!["js", "ts", "jsx", "tsx"].into_iter().map(String::from).collect(),
},
PatternRule {
id: "oauth-token-url".to_string(),
title: "Token passed in URL parameters".to_string(),
description: "Tokens in URLs can leak via referrer headers, server logs, and browser history.".to_string(),
pattern: Regex::new(r#"(?i)(access_token|bearer)\s*[=]\s*.*\b(url|query|param|href)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
severity: Severity::High,
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
},
];
Self { patterns }
}
}
impl Scanner for OAuthPatternScanner {
fn name(&self) -> &str {
"oauth-patterns"
}
fn scan_type(&self) -> ScanType {
ScanType::OAuth
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let findings = scan_with_patterns(repo_path, repo_id, &self.patterns, ScanType::OAuth, "oauth-patterns")?;
Ok(ScanOutput {
findings,
sbom_entries: Vec::new(),
})
}
}
fn scan_with_patterns(
repo_path: &Path,
repo_id: &str,
patterns: &[PatternRule],
scan_type: ScanType,
scanner_name: &str,
) -> Result<Vec<Finding>, CoreError> {
let mut findings = Vec::new();
for entry in walkdir(repo_path)? {
let path = entry.path();
if !path.is_file() {
continue;
}
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_string();
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(_) => continue, // skip binary files
};
let relative_path = path
.strip_prefix(repo_path)
.unwrap_or(path)
.to_string_lossy()
.to_string();
for pattern in patterns {
if !pattern.file_extensions.contains(&ext) {
continue;
}
for (line_num, line) in content.lines().enumerate() {
if pattern.pattern.is_match(line) {
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
&pattern.id,
&relative_path,
&(line_num + 1).to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
scanner_name.to_string(),
scan_type.clone(),
pattern.title.clone(),
pattern.description.clone(),
pattern.severity.clone(),
);
finding.rule_id = Some(pattern.id.clone());
finding.file_path = Some(relative_path.clone());
finding.line_number = Some((line_num + 1) as u32);
finding.code_snippet = Some(line.to_string());
findings.push(finding);
}
}
}
}
Ok(findings)
}
fn walkdir(path: &Path) -> Result<Vec<walkdir::DirEntry>, CoreError> {
// Simple recursive file walk, skipping hidden dirs and common non-source dirs
let skip_dirs = [".git", "node_modules", "target", "vendor", ".venv", "__pycache__", "dist", "build"];
let entries: Vec<_> = walkdir::WalkDir::new(path)
.into_iter()
.filter_entry(|e| {
let name = e.file_name().to_string_lossy();
!skip_dirs.contains(&name.as_ref())
})
.filter_map(|e| e.ok())
.collect();
Ok(entries)
}

View File

@@ -0,0 +1,186 @@
use std::path::Path;
use compliance_core::models::{SbomEntry, ScanType, VulnRef};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
pub struct SbomScanner;
impl Scanner for SbomScanner {
fn name(&self) -> &str {
"sbom"
}
fn scan_type(&self) -> ScanType {
ScanType::Sbom
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let mut entries = Vec::new();
// Run syft for SBOM generation
match run_syft(repo_path, repo_id).await {
Ok(syft_entries) => entries.extend(syft_entries),
Err(e) => tracing::warn!("syft failed: {e}"),
}
// Run cargo-audit for Rust-specific vulns
match run_cargo_audit(repo_path, repo_id).await {
Ok(vulns) => merge_audit_vulns(&mut entries, vulns),
Err(e) => tracing::warn!("cargo-audit skipped: {e}"),
}
Ok(ScanOutput {
findings: Vec::new(),
sbom_entries: entries,
})
}
}
async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
let output = tokio::process::Command::new("syft")
.arg(repo_path)
.args(["-o", "cyclonedx-json"])
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "syft".to_string(),
source: Box::new(e),
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(CoreError::Scanner {
scanner: "syft".to_string(),
source: format!("syft exited with {}: {stderr}", output.status).into(),
});
}
let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?;
let entries = cdx
.components
.unwrap_or_default()
.into_iter()
.map(|c| {
let mut entry = SbomEntry::new(
repo_id.to_string(),
c.name,
c.version.unwrap_or_else(|| "unknown".to_string()),
c.component_type.unwrap_or_else(|| "library".to_string()),
);
entry.purl = c.purl;
entry.license = c.licenses.and_then(|ls| {
ls.first().and_then(|l| {
l.license.as_ref().map(|lic| {
lic.id.clone().unwrap_or_else(|| lic.name.clone().unwrap_or_default())
})
})
});
entry
})
.collect();
Ok(entries)
}
async fn run_cargo_audit(repo_path: &Path, _repo_id: &str) -> Result<Vec<AuditVuln>, CoreError> {
let cargo_lock = repo_path.join("Cargo.lock");
if !cargo_lock.exists() {
return Ok(Vec::new());
}
let output = tokio::process::Command::new("cargo")
.args(["audit", "--json"])
.current_dir(repo_path)
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "cargo-audit".to_string(),
source: Box::new(e),
})?;
let result: CargoAuditOutput = serde_json::from_slice(&output.stdout)
.unwrap_or_else(|_| CargoAuditOutput { vulnerabilities: CargoAuditVulns { list: Vec::new() } });
let vulns = result
.vulnerabilities
.list
.into_iter()
.map(|v| AuditVuln {
package: v.advisory.package,
id: v.advisory.id,
url: v.advisory.url,
})
.collect();
Ok(vulns)
}
fn merge_audit_vulns(entries: &mut Vec<SbomEntry>, vulns: Vec<AuditVuln>) {
for vuln in vulns {
if let Some(entry) = entries.iter_mut().find(|e| e.name == vuln.package) {
entry.known_vulnerabilities.push(VulnRef {
id: vuln.id.clone(),
source: "cargo-audit".to_string(),
severity: None,
url: Some(vuln.url),
});
}
}
}
// CycloneDX JSON types
#[derive(serde::Deserialize)]
struct CycloneDxBom {
components: Option<Vec<CdxComponent>>,
}
#[derive(serde::Deserialize)]
struct CdxComponent {
name: String,
version: Option<String>,
#[serde(rename = "type")]
component_type: Option<String>,
purl: Option<String>,
licenses: Option<Vec<CdxLicenseWrapper>>,
}
#[derive(serde::Deserialize)]
struct CdxLicenseWrapper {
license: Option<CdxLicense>,
}
#[derive(serde::Deserialize)]
struct CdxLicense {
id: Option<String>,
name: Option<String>,
}
// Cargo audit types
#[derive(serde::Deserialize)]
struct CargoAuditOutput {
vulnerabilities: CargoAuditVulns,
}
#[derive(serde::Deserialize)]
struct CargoAuditVulns {
list: Vec<CargoAuditEntry>,
}
#[derive(serde::Deserialize)]
struct CargoAuditEntry {
advisory: CargoAuditAdvisory,
}
#[derive(serde::Deserialize)]
struct CargoAuditAdvisory {
id: String,
package: String,
url: String,
}
struct AuditVuln {
package: String,
id: String,
url: String,
}

View File

@@ -0,0 +1,110 @@
use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
use crate::pipeline::dedup;
pub struct SemgrepScanner;
impl Scanner for SemgrepScanner {
fn name(&self) -> &str {
"semgrep"
}
fn scan_type(&self) -> ScanType {
ScanType::Sast
}
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let output = tokio::process::Command::new("semgrep")
.args(["--config=auto", "--json", "--quiet"])
.arg(repo_path)
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "semgrep".to_string(),
source: Box::new(e),
})?;
if !output.status.success() && output.stdout.is_empty() {
let stderr = String::from_utf8_lossy(&output.stderr);
tracing::warn!("Semgrep exited with {}: {stderr}", output.status);
return Ok(ScanOutput::default());
}
let result: SemgrepOutput = serde_json::from_slice(&output.stdout)?;
let findings = result
.results
.into_iter()
.map(|r| {
let severity = match r.extra.severity.as_str() {
"ERROR" => Severity::High,
"WARNING" => Severity::Medium,
"INFO" => Severity::Low,
_ => Severity::Info,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
&r.check_id,
&r.path,
&r.start.line.to_string(),
]);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"semgrep".to_string(),
ScanType::Sast,
r.extra.message.clone(),
r.extra.message,
severity,
);
finding.rule_id = Some(r.check_id);
finding.file_path = Some(r.path);
finding.line_number = Some(r.start.line);
finding.code_snippet = Some(r.extra.lines);
finding.cwe = r.extra.metadata.and_then(|m| {
m.get("cwe")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
});
finding
})
.collect();
Ok(ScanOutput {
findings,
sbom_entries: Vec::new(),
})
}
}
#[derive(serde::Deserialize)]
struct SemgrepOutput {
results: Vec<SemgrepResult>,
}
#[derive(serde::Deserialize)]
struct SemgrepResult {
check_id: String,
path: String,
start: SemgrepPosition,
extra: SemgrepExtra,
}
#[derive(serde::Deserialize)]
struct SemgrepPosition {
line: u32,
}
#[derive(serde::Deserialize)]
struct SemgrepExtra {
message: String,
severity: String,
lines: String,
#[serde(default)]
metadata: Option<serde_json::Value>,
}