use std::path::Path; use compliance_core::models::{Finding, ScanType, Severity}; use compliance_core::traits::{ScanOutput, Scanner}; use compliance_core::CoreError; use crate::pipeline::dedup; pub struct SemgrepScanner; impl Scanner for SemgrepScanner { fn name(&self) -> &str { "semgrep" } fn scan_type(&self) -> ScanType { ScanType::Sast } async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result { let output = tokio::process::Command::new("semgrep") .args(["--config=auto", "--json", "--quiet"]) .arg(repo_path) .output() .await .map_err(|e| CoreError::Scanner { scanner: "semgrep".to_string(), source: Box::new(e), })?; if !output.status.success() && output.stdout.is_empty() { let stderr = String::from_utf8_lossy(&output.stderr); tracing::warn!("Semgrep exited with {}: {stderr}", output.status); return Ok(ScanOutput::default()); } let result: SemgrepOutput = serde_json::from_slice(&output.stdout)?; let findings = result .results .into_iter() .map(|r| { let severity = match r.extra.severity.as_str() { "ERROR" => Severity::High, "WARNING" => Severity::Medium, "INFO" => Severity::Low, _ => Severity::Info, }; let fingerprint = dedup::compute_fingerprint(&[ repo_id, &r.check_id, &r.path, &r.start.line.to_string(), ]); let mut finding = Finding::new( repo_id.to_string(), fingerprint, "semgrep".to_string(), ScanType::Sast, r.extra.message.clone(), r.extra.message, severity, ); finding.rule_id = Some(r.check_id); finding.file_path = Some(r.path); finding.line_number = Some(r.start.line); finding.code_snippet = Some(r.extra.lines); finding.cwe = r .extra .metadata .and_then(|m| m.get("cwe").and_then(|v| v.as_str()).map(|s| s.to_string())); finding }) .collect(); Ok(ScanOutput { findings, sbom_entries: Vec::new(), }) } } #[derive(serde::Deserialize)] struct SemgrepOutput { results: Vec, } #[derive(serde::Deserialize)] struct SemgrepResult { check_id: String, path: String, start: SemgrepPosition, extra: SemgrepExtra, } #[derive(serde::Deserialize)] struct SemgrepPosition { line: u32, } #[derive(serde::Deserialize)] struct SemgrepExtra { message: String, severity: String, lines: String, #[serde(default)] metadata: Option, }