Initial commit: Compliance Scanner Agent
Autonomous security and compliance scanning agent for git repositories. Features: SAST (Semgrep), SBOM (Syft), CVE monitoring (OSV.dev/NVD), GDPR/OAuth pattern detection, LLM triage, issue creation (GitHub/GitLab/Jira), PR reviews, and Dioxus fullstack dashboard. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
226
compliance-agent/src/pipeline/patterns.rs
Normal file
226
compliance-agent/src/pipeline/patterns.rs
Normal file
@@ -0,0 +1,226 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::traits::{ScanOutput, Scanner};
|
||||
use compliance_core::CoreError;
|
||||
use regex::Regex;
|
||||
|
||||
use crate::pipeline::dedup;
|
||||
|
||||
pub struct GdprPatternScanner {
|
||||
patterns: Vec<PatternRule>,
|
||||
}
|
||||
|
||||
pub struct OAuthPatternScanner {
|
||||
patterns: Vec<PatternRule>,
|
||||
}
|
||||
|
||||
struct PatternRule {
|
||||
id: String,
|
||||
title: String,
|
||||
description: String,
|
||||
pattern: Regex,
|
||||
severity: Severity,
|
||||
file_extensions: Vec<String>,
|
||||
}
|
||||
|
||||
impl GdprPatternScanner {
|
||||
pub fn new() -> Self {
|
||||
let patterns = vec![
|
||||
PatternRule {
|
||||
id: "gdpr-pii-logging".to_string(),
|
||||
title: "PII data potentially logged".to_string(),
|
||||
description: "Logging statements that may contain personally identifiable information (email, SSN, phone, IP address).".to_string(),
|
||||
pattern: Regex::new(r#"(?i)(log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(email|ssn|social.?security|phone.?number|ip.?addr|passport|date.?of.?birth|credit.?card)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
|
||||
severity: Severity::High,
|
||||
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "rb"].into_iter().map(String::from).collect(),
|
||||
},
|
||||
PatternRule {
|
||||
id: "gdpr-no-consent".to_string(),
|
||||
title: "Data collection without apparent consent mechanism".to_string(),
|
||||
description: "Data collection endpoint that doesn't reference consent or opt-in mechanisms.".to_string(),
|
||||
pattern: Regex::new(r#"(?i)(collect|store|save|persist|record).*\b(personal|user.?data|pii|biometric)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
|
||||
severity: Severity::Medium,
|
||||
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
|
||||
},
|
||||
PatternRule {
|
||||
id: "gdpr-no-delete-endpoint".to_string(),
|
||||
title: "Missing data deletion capability".to_string(),
|
||||
description: "User data models or controllers without corresponding deletion endpoints (right to erasure).".to_string(),
|
||||
pattern: Regex::new(r#"(?i)(class|struct|model)\s+User(?!.*[Dd]elete)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
|
||||
severity: Severity::Medium,
|
||||
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "rb"].into_iter().map(String::from).collect(),
|
||||
},
|
||||
PatternRule {
|
||||
id: "gdpr-hardcoded-retention".to_string(),
|
||||
title: "Hardcoded data retention period".to_string(),
|
||||
description: "Data retention periods should be configurable for GDPR compliance.".to_string(),
|
||||
pattern: Regex::new(r#"(?i)(retention|ttl|expire|keep.?for)\s*[=:]\s*\d+"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
|
||||
severity: Severity::Low,
|
||||
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "yaml", "yml", "toml", "json"].into_iter().map(String::from).collect(),
|
||||
},
|
||||
];
|
||||
Self { patterns }
|
||||
}
|
||||
}
|
||||
|
||||
impl Scanner for GdprPatternScanner {
|
||||
fn name(&self) -> &str {
|
||||
"gdpr-patterns"
|
||||
}
|
||||
|
||||
fn scan_type(&self) -> ScanType {
|
||||
ScanType::Gdpr
|
||||
}
|
||||
|
||||
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
|
||||
let findings = scan_with_patterns(repo_path, repo_id, &self.patterns, ScanType::Gdpr, "gdpr-patterns")?;
|
||||
Ok(ScanOutput {
|
||||
findings,
|
||||
sbom_entries: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl OAuthPatternScanner {
|
||||
pub fn new() -> Self {
|
||||
let patterns = vec![
|
||||
PatternRule {
|
||||
id: "oauth-implicit-grant".to_string(),
|
||||
title: "OAuth implicit grant flow detected".to_string(),
|
||||
description: "Implicit grant flow is deprecated and insecure. Use authorization code flow with PKCE instead.".to_string(),
|
||||
pattern: Regex::new(r#"(?i)(response_type\s*[=:]\s*["']?token|grant_type\s*[=:]\s*["']?implicit)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
|
||||
severity: Severity::High,
|
||||
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "yaml", "yml", "json"].into_iter().map(String::from).collect(),
|
||||
},
|
||||
PatternRule {
|
||||
id: "oauth-missing-pkce".to_string(),
|
||||
title: "OAuth flow without PKCE".to_string(),
|
||||
description: "Authorization code flow should use PKCE (code_challenge/code_verifier) for public clients.".to_string(),
|
||||
pattern: Regex::new(r#"(?i)authorization.?code(?!.*code.?challenge)(?!.*pkce)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
|
||||
severity: Severity::Medium,
|
||||
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
|
||||
},
|
||||
PatternRule {
|
||||
id: "oauth-token-localstorage".to_string(),
|
||||
title: "Token stored in localStorage".to_string(),
|
||||
description: "Storing tokens in localStorage is vulnerable to XSS. Use httpOnly cookies or secure session storage.".to_string(),
|
||||
pattern: Regex::new(r#"(?i)localStorage\.(set|get)Item\s*\(\s*["'].*token"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
|
||||
severity: Severity::High,
|
||||
file_extensions: vec!["js", "ts", "jsx", "tsx"].into_iter().map(String::from).collect(),
|
||||
},
|
||||
PatternRule {
|
||||
id: "oauth-token-url".to_string(),
|
||||
title: "Token passed in URL parameters".to_string(),
|
||||
description: "Tokens in URLs can leak via referrer headers, server logs, and browser history.".to_string(),
|
||||
pattern: Regex::new(r#"(?i)(access_token|bearer)\s*[=]\s*.*\b(url|query|param|href)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
|
||||
severity: Severity::High,
|
||||
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
|
||||
},
|
||||
];
|
||||
Self { patterns }
|
||||
}
|
||||
}
|
||||
|
||||
impl Scanner for OAuthPatternScanner {
|
||||
fn name(&self) -> &str {
|
||||
"oauth-patterns"
|
||||
}
|
||||
|
||||
fn scan_type(&self) -> ScanType {
|
||||
ScanType::OAuth
|
||||
}
|
||||
|
||||
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
|
||||
let findings = scan_with_patterns(repo_path, repo_id, &self.patterns, ScanType::OAuth, "oauth-patterns")?;
|
||||
Ok(ScanOutput {
|
||||
findings,
|
||||
sbom_entries: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_with_patterns(
|
||||
repo_path: &Path,
|
||||
repo_id: &str,
|
||||
patterns: &[PatternRule],
|
||||
scan_type: ScanType,
|
||||
scanner_name: &str,
|
||||
) -> Result<Vec<Finding>, CoreError> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for entry in walkdir(repo_path)? {
|
||||
let path = entry.path();
|
||||
if !path.is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let content = match std::fs::read_to_string(path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => continue, // skip binary files
|
||||
};
|
||||
|
||||
let relative_path = path
|
||||
.strip_prefix(repo_path)
|
||||
.unwrap_or(path)
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
for pattern in patterns {
|
||||
if !pattern.file_extensions.contains(&ext) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (line_num, line) in content.lines().enumerate() {
|
||||
if pattern.pattern.is_match(line) {
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
&pattern.id,
|
||||
&relative_path,
|
||||
&(line_num + 1).to_string(),
|
||||
]);
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
scanner_name.to_string(),
|
||||
scan_type.clone(),
|
||||
pattern.title.clone(),
|
||||
pattern.description.clone(),
|
||||
pattern.severity.clone(),
|
||||
);
|
||||
finding.rule_id = Some(pattern.id.clone());
|
||||
finding.file_path = Some(relative_path.clone());
|
||||
finding.line_number = Some((line_num + 1) as u32);
|
||||
finding.code_snippet = Some(line.to_string());
|
||||
|
||||
findings.push(finding);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
fn walkdir(path: &Path) -> Result<Vec<walkdir::DirEntry>, CoreError> {
|
||||
// Simple recursive file walk, skipping hidden dirs and common non-source dirs
|
||||
let skip_dirs = [".git", "node_modules", "target", "vendor", ".venv", "__pycache__", "dist", "build"];
|
||||
|
||||
let entries: Vec<_> = walkdir::WalkDir::new(path)
|
||||
.into_iter()
|
||||
.filter_entry(|e| {
|
||||
let name = e.file_name().to_string_lossy();
|
||||
!skip_dirs.contains(&name.as_ref())
|
||||
})
|
||||
.filter_map(|e| e.ok())
|
||||
.collect();
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
Reference in New Issue
Block a user