Files
compliance-scanner-agent/compliance-agent/src/pipeline/gitleaks.rs
Sharang Parnerkar 3bb690e5bb
All checks were successful
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Detect Changes (push) Successful in 5s
CI / Tests (push) Successful in 5m15s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s
refactor: modularize codebase and add 404 unit tests (#13)
2026-03-13 08:03:45 +00:00

239 lines
7.4 KiB
Rust

use std::path::Path;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::{ScanOutput, Scanner};
use compliance_core::CoreError;
use crate::pipeline::dedup;
pub struct GitleaksScanner;
impl Scanner for GitleaksScanner {
fn name(&self) -> &str {
"gitleaks"
}
fn scan_type(&self) -> ScanType {
ScanType::SecretDetection
}
#[tracing::instrument(skip_all)]
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
let output = tokio::process::Command::new("gitleaks")
.args([
"detect",
"--source",
".",
"--report-format",
"json",
"--report-path",
"/dev/stdout",
"--no-banner",
"--exit-code",
"0",
])
.current_dir(repo_path)
.output()
.await
.map_err(|e| CoreError::Scanner {
scanner: "gitleaks".to_string(),
source: Box::new(e),
})?;
if output.stdout.is_empty() {
return Ok(ScanOutput::default());
}
let results: Vec<GitleaksResult> =
serde_json::from_slice(&output.stdout).unwrap_or_default();
let findings = results
.into_iter()
.filter(|r| !is_allowlisted(&r.file))
.map(|r| {
let severity = match r.rule_id.as_str() {
s if s.contains("private-key") => Severity::Critical,
s if s.contains("token") || s.contains("password") || s.contains("secret") => {
Severity::High
}
s if s.contains("api-key") => Severity::High,
_ => Severity::Medium,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
&r.rule_id,
&r.file,
&r.start_line.to_string(),
]);
let title = format!("Secret detected: {}", r.description);
let description = format!(
"Potential secret ({}) found in {}:{}. Match: {}",
r.rule_id,
r.file,
r.start_line,
r.r#match.chars().take(80).collect::<String>(),
);
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
"gitleaks".to_string(),
ScanType::SecretDetection,
title,
description,
severity,
);
finding.rule_id = Some(r.rule_id);
finding.file_path = Some(r.file);
finding.line_number = Some(r.start_line);
finding.code_snippet = Some(r.r#match);
finding
})
.collect();
Ok(ScanOutput {
findings,
sbom_entries: Vec::new(),
})
}
}
/// Skip files that commonly contain example/placeholder secrets
fn is_allowlisted(file_path: &str) -> bool {
let lower = file_path.to_lowercase();
lower.ends_with(".env.example")
|| lower.ends_with(".env.sample")
|| lower.ends_with(".env.template")
|| lower.contains("/test/")
|| lower.contains("/tests/")
|| lower.contains("/fixtures/")
|| lower.contains("/testdata/")
|| lower.contains("mock")
|| lower.ends_with("_test.go")
|| lower.ends_with(".test.ts")
|| lower.ends_with(".test.js")
|| lower.ends_with(".spec.ts")
|| lower.ends_with(".spec.js")
}
#[derive(serde::Deserialize)]
#[serde(rename_all = "PascalCase")]
struct GitleaksResult {
description: String,
#[serde(rename = "RuleID")]
rule_id: String,
file: String,
start_line: u32,
#[serde(rename = "Match")]
r#match: String,
}
#[cfg(test)]
mod tests {
use super::*;
// --- is_allowlisted tests ---
#[test]
fn allowlisted_env_example_files() {
assert!(is_allowlisted(".env.example"));
assert!(is_allowlisted("config/.env.sample"));
assert!(is_allowlisted("deploy/.ENV.TEMPLATE"));
}
#[test]
fn allowlisted_test_directories() {
assert!(is_allowlisted("src/test/config.json"));
assert!(is_allowlisted("src/tests/fixtures.rs"));
assert!(is_allowlisted("data/fixtures/secret.txt"));
assert!(is_allowlisted("pkg/testdata/key.pem"));
}
#[test]
fn allowlisted_mock_files() {
assert!(is_allowlisted("src/mock_service.py"));
assert!(is_allowlisted("lib/MockAuth.java"));
}
#[test]
fn allowlisted_test_suffixes() {
assert!(is_allowlisted("auth_test.go"));
assert!(is_allowlisted("auth.test.ts"));
assert!(is_allowlisted("auth.test.js"));
assert!(is_allowlisted("auth.spec.ts"));
assert!(is_allowlisted("auth.spec.js"));
}
#[test]
fn not_allowlisted_regular_files() {
assert!(!is_allowlisted("src/main.rs"));
assert!(!is_allowlisted("config/.env"));
assert!(!is_allowlisted("lib/auth.ts"));
assert!(!is_allowlisted("deploy/secrets.yaml"));
}
#[test]
fn not_allowlisted_partial_matches() {
// "test" as substring in a non-directory context should not match
assert!(!is_allowlisted("src/attestation.rs"));
assert!(!is_allowlisted("src/contest/data.json"));
}
// --- GitleaksResult deserialization tests ---
#[test]
fn deserialize_gitleaks_result() {
let json = r#"{
"Description": "AWS Access Key",
"RuleID": "aws-access-key",
"File": "src/config.rs",
"StartLine": 10,
"Match": "AKIAIOSFODNN7EXAMPLE"
}"#;
let result: GitleaksResult = serde_json::from_str(json).unwrap();
assert_eq!(result.description, "AWS Access Key");
assert_eq!(result.rule_id, "aws-access-key");
assert_eq!(result.file, "src/config.rs");
assert_eq!(result.start_line, 10);
assert_eq!(result.r#match, "AKIAIOSFODNN7EXAMPLE");
}
#[test]
fn deserialize_gitleaks_result_array() {
let json = r#"[
{
"Description": "Generic Secret",
"RuleID": "generic-secret",
"File": "app.py",
"StartLine": 5,
"Match": "password=hunter2"
}
]"#;
let results: Vec<GitleaksResult> = serde_json::from_str(json).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].rule_id, "generic-secret");
}
#[test]
fn severity_mapping_private_key() {
// Verify the severity logic from the scan method
let rule_id = "some-private-key-rule";
assert!(rule_id.contains("private-key"));
}
#[test]
fn severity_mapping_token_password_secret() {
for keyword in &["token", "password", "secret"] {
let rule_id = format!("some-{}-rule", keyword);
assert!(
rule_id.contains("token")
|| rule_id.contains("password")
|| rule_id.contains("secret"),
"Expected '{rule_id}' to match token/password/secret"
);
}
}
}