use std::path::Path; use compliance_core::models::{Finding, ScanType, Severity}; use compliance_core::traits::{ScanOutput, Scanner}; use compliance_core::CoreError; use crate::pipeline::dedup; pub struct GitleaksScanner; impl Scanner for GitleaksScanner { fn name(&self) -> &str { "gitleaks" } fn scan_type(&self) -> ScanType { ScanType::SecretDetection } #[tracing::instrument(skip_all)] async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result { let output = tokio::process::Command::new("gitleaks") .args([ "detect", "--source", ".", "--report-format", "json", "--report-path", "/dev/stdout", "--no-banner", "--exit-code", "0", ]) .current_dir(repo_path) .output() .await .map_err(|e| CoreError::Scanner { scanner: "gitleaks".to_string(), source: Box::new(e), })?; if output.stdout.is_empty() { return Ok(ScanOutput::default()); } let results: Vec = serde_json::from_slice(&output.stdout).unwrap_or_default(); let findings = results .into_iter() .filter(|r| !is_allowlisted(&r.file)) .map(|r| { let severity = match r.rule_id.as_str() { s if s.contains("private-key") => Severity::Critical, s if s.contains("token") || s.contains("password") || s.contains("secret") => { Severity::High } s if s.contains("api-key") => Severity::High, _ => Severity::Medium, }; let fingerprint = dedup::compute_fingerprint(&[ repo_id, &r.rule_id, &r.file, &r.start_line.to_string(), ]); let title = format!("Secret detected: {}", r.description); let description = format!( "Potential secret ({}) found in {}:{}. Match: {}", r.rule_id, r.file, r.start_line, r.r#match.chars().take(80).collect::(), ); let mut finding = Finding::new( repo_id.to_string(), fingerprint, "gitleaks".to_string(), ScanType::SecretDetection, title, description, severity, ); finding.rule_id = Some(r.rule_id); finding.file_path = Some(r.file); finding.line_number = Some(r.start_line); finding.code_snippet = Some(r.r#match); finding }) .collect(); Ok(ScanOutput { findings, sbom_entries: Vec::new(), }) } } /// Skip files that commonly contain example/placeholder secrets fn is_allowlisted(file_path: &str) -> bool { let lower = file_path.to_lowercase(); lower.ends_with(".env.example") || lower.ends_with(".env.sample") || lower.ends_with(".env.template") || lower.contains("/test/") || lower.contains("/tests/") || lower.contains("/fixtures/") || lower.contains("/testdata/") || lower.contains("mock") || lower.ends_with("_test.go") || lower.ends_with(".test.ts") || lower.ends_with(".test.js") || lower.ends_with(".spec.ts") || lower.ends_with(".spec.js") } #[derive(serde::Deserialize)] #[serde(rename_all = "PascalCase")] struct GitleaksResult { description: String, #[serde(rename = "RuleID")] rule_id: String, file: String, start_line: u32, #[serde(rename = "Match")] r#match: String, } #[cfg(test)] mod tests { use super::*; // --- is_allowlisted tests --- #[test] fn allowlisted_env_example_files() { assert!(is_allowlisted(".env.example")); assert!(is_allowlisted("config/.env.sample")); assert!(is_allowlisted("deploy/.ENV.TEMPLATE")); } #[test] fn allowlisted_test_directories() { assert!(is_allowlisted("src/test/config.json")); assert!(is_allowlisted("src/tests/fixtures.rs")); assert!(is_allowlisted("data/fixtures/secret.txt")); assert!(is_allowlisted("pkg/testdata/key.pem")); } #[test] fn allowlisted_mock_files() { assert!(is_allowlisted("src/mock_service.py")); assert!(is_allowlisted("lib/MockAuth.java")); } #[test] fn allowlisted_test_suffixes() { assert!(is_allowlisted("auth_test.go")); assert!(is_allowlisted("auth.test.ts")); assert!(is_allowlisted("auth.test.js")); assert!(is_allowlisted("auth.spec.ts")); assert!(is_allowlisted("auth.spec.js")); } #[test] fn not_allowlisted_regular_files() { assert!(!is_allowlisted("src/main.rs")); assert!(!is_allowlisted("config/.env")); assert!(!is_allowlisted("lib/auth.ts")); assert!(!is_allowlisted("deploy/secrets.yaml")); } #[test] fn not_allowlisted_partial_matches() { // "test" as substring in a non-directory context should not match assert!(!is_allowlisted("src/attestation.rs")); assert!(!is_allowlisted("src/contest/data.json")); } // --- GitleaksResult deserialization tests --- #[test] fn deserialize_gitleaks_result() { let json = r#"{ "Description": "AWS Access Key", "RuleID": "aws-access-key", "File": "src/config.rs", "StartLine": 10, "Match": "AKIAIOSFODNN7EXAMPLE" }"#; let result: GitleaksResult = serde_json::from_str(json).unwrap(); assert_eq!(result.description, "AWS Access Key"); assert_eq!(result.rule_id, "aws-access-key"); assert_eq!(result.file, "src/config.rs"); assert_eq!(result.start_line, 10); assert_eq!(result.r#match, "AKIAIOSFODNN7EXAMPLE"); } #[test] fn deserialize_gitleaks_result_array() { let json = r#"[ { "Description": "Generic Secret", "RuleID": "generic-secret", "File": "app.py", "StartLine": 5, "Match": "password=hunter2" } ]"#; let results: Vec = serde_json::from_str(json).unwrap(); assert_eq!(results.len(), 1); assert_eq!(results[0].rule_id, "generic-secret"); } #[test] fn severity_mapping_private_key() { // Verify the severity logic from the scan method let rule_id = "some-private-key-rule"; assert!(rule_id.contains("private-key")); } #[test] fn severity_mapping_token_password_secret() { for keyword in &["token", "password", "secret"] { let rule_id = format!("some-{}-rule", keyword); assert!( rule_id.contains("token") || rule_id.contains("password") || rule_id.contains("secret"), "Expected '{rule_id}' to match token/password/secret" ); } } }