All checks were successful
CI / Format (push) Successful in 4s
CI / Clippy (push) Successful in 4m19s
CI / Security Audit (push) Successful in 1m44s
CI / Detect Changes (push) Successful in 5s
CI / Tests (push) Successful in 5m15s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Successful in 2s
415 lines
15 KiB
Rust
415 lines
15 KiB
Rust
use std::path::Path;
|
|
|
|
use compliance_core::models::{Finding, ScanType, Severity};
|
|
use compliance_core::traits::{ScanOutput, Scanner};
|
|
use compliance_core::CoreError;
|
|
use regex::Regex;
|
|
|
|
use crate::pipeline::dedup;
|
|
|
|
fn compile_regex(pattern: &str) -> Regex {
|
|
Regex::new(pattern).unwrap_or_else(|e| {
|
|
tracing::warn!("Invalid regex pattern '{pattern}': {e}, using empty fallback");
|
|
// SAFETY: "^$" is a known-valid regex that matches only empty strings
|
|
#[allow(clippy::unwrap_used)]
|
|
Regex::new("^$").unwrap()
|
|
})
|
|
}
|
|
|
|
pub struct GdprPatternScanner {
|
|
patterns: Vec<PatternRule>,
|
|
}
|
|
|
|
pub struct OAuthPatternScanner {
|
|
patterns: Vec<PatternRule>,
|
|
}
|
|
|
|
struct PatternRule {
|
|
id: String,
|
|
title: String,
|
|
description: String,
|
|
pattern: Regex,
|
|
severity: Severity,
|
|
file_extensions: Vec<String>,
|
|
}
|
|
|
|
impl GdprPatternScanner {
|
|
pub fn new() -> Self {
|
|
let patterns = vec![
|
|
PatternRule {
|
|
id: "gdpr-pii-logging".to_string(),
|
|
title: "PII data potentially logged".to_string(),
|
|
description: "Logging statements that may contain personally identifiable information (email, SSN, phone, IP address).".to_string(),
|
|
pattern: compile_regex(r#"(?i)(log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(email|ssn|social.?security|phone.?number|ip.?addr|passport|date.?of.?birth|credit.?card)\b"#),
|
|
severity: Severity::High,
|
|
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "rb"].into_iter().map(String::from).collect(),
|
|
},
|
|
PatternRule {
|
|
id: "gdpr-no-consent".to_string(),
|
|
title: "Data collection without apparent consent mechanism".to_string(),
|
|
description: "Data collection endpoint that doesn't reference consent or opt-in mechanisms.".to_string(),
|
|
pattern: compile_regex(r#"(?i)(collect|store|save|persist|record).*\b(personal|user.?data|pii|biometric)\b"#),
|
|
severity: Severity::Medium,
|
|
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
|
|
},
|
|
PatternRule {
|
|
id: "gdpr-no-delete-endpoint".to_string(),
|
|
title: "Missing data deletion capability".to_string(),
|
|
description: "User data models or controllers without corresponding deletion endpoints (right to erasure).".to_string(),
|
|
pattern: compile_regex(r#"(?i)(class|struct|model)\s+User"#),
|
|
severity: Severity::Medium,
|
|
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "rb"].into_iter().map(String::from).collect(),
|
|
},
|
|
PatternRule {
|
|
id: "gdpr-hardcoded-retention".to_string(),
|
|
title: "Hardcoded data retention period".to_string(),
|
|
description: "Data retention periods should be configurable for GDPR compliance.".to_string(),
|
|
pattern: compile_regex(r#"(?i)(retention|ttl|expire|keep.?for)\s*[=:]\s*\d+"#),
|
|
severity: Severity::Low,
|
|
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "yaml", "yml", "toml", "json"].into_iter().map(String::from).collect(),
|
|
},
|
|
];
|
|
Self { patterns }
|
|
}
|
|
}
|
|
|
|
impl Scanner for GdprPatternScanner {
|
|
fn name(&self) -> &str {
|
|
"gdpr-patterns"
|
|
}
|
|
|
|
fn scan_type(&self) -> ScanType {
|
|
ScanType::Gdpr
|
|
}
|
|
|
|
#[tracing::instrument(skip_all)]
|
|
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
|
|
let findings = scan_with_patterns(
|
|
repo_path,
|
|
repo_id,
|
|
&self.patterns,
|
|
ScanType::Gdpr,
|
|
"gdpr-patterns",
|
|
)?;
|
|
Ok(ScanOutput {
|
|
findings,
|
|
sbom_entries: Vec::new(),
|
|
})
|
|
}
|
|
}
|
|
|
|
impl OAuthPatternScanner {
|
|
pub fn new() -> Self {
|
|
let patterns = vec![
|
|
PatternRule {
|
|
id: "oauth-implicit-grant".to_string(),
|
|
title: "OAuth implicit grant flow detected".to_string(),
|
|
description: "Implicit grant flow is deprecated and insecure. Use authorization code flow with PKCE instead.".to_string(),
|
|
pattern: compile_regex(r#"(?i)(response_type\s*[=:]\s*["']?token|grant_type\s*[=:]\s*["']?implicit)"#),
|
|
severity: Severity::High,
|
|
file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "yaml", "yml", "json"].into_iter().map(String::from).collect(),
|
|
},
|
|
PatternRule {
|
|
id: "oauth-missing-pkce".to_string(),
|
|
title: "OAuth flow without PKCE".to_string(),
|
|
description: "Authorization code flow should use PKCE (code_challenge/code_verifier) for public clients.".to_string(),
|
|
pattern: compile_regex(r#"(?i)authorization.?code"#),
|
|
severity: Severity::Medium,
|
|
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
|
|
},
|
|
PatternRule {
|
|
id: "oauth-token-localstorage".to_string(),
|
|
title: "Token stored in localStorage".to_string(),
|
|
description: "Storing tokens in localStorage is vulnerable to XSS. Use httpOnly cookies or secure session storage.".to_string(),
|
|
pattern: compile_regex(r#"(?i)localStorage\.(set|get)Item\s*\(\s*["'].*token"#),
|
|
severity: Severity::High,
|
|
file_extensions: vec!["js", "ts", "jsx", "tsx"].into_iter().map(String::from).collect(),
|
|
},
|
|
PatternRule {
|
|
id: "oauth-token-url".to_string(),
|
|
title: "Token passed in URL parameters".to_string(),
|
|
description: "Tokens in URLs can leak via referrer headers, server logs, and browser history.".to_string(),
|
|
pattern: compile_regex(r#"(?i)(access_token|bearer)\s*[=]\s*.*\b(url|query|param|href)\b"#),
|
|
severity: Severity::High,
|
|
file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
|
|
},
|
|
];
|
|
Self { patterns }
|
|
}
|
|
}
|
|
|
|
impl Scanner for OAuthPatternScanner {
|
|
fn name(&self) -> &str {
|
|
"oauth-patterns"
|
|
}
|
|
|
|
fn scan_type(&self) -> ScanType {
|
|
ScanType::OAuth
|
|
}
|
|
|
|
#[tracing::instrument(skip_all)]
|
|
async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
|
|
let findings = scan_with_patterns(
|
|
repo_path,
|
|
repo_id,
|
|
&self.patterns,
|
|
ScanType::OAuth,
|
|
"oauth-patterns",
|
|
)?;
|
|
Ok(ScanOutput {
|
|
findings,
|
|
sbom_entries: Vec::new(),
|
|
})
|
|
}
|
|
}
|
|
|
|
fn scan_with_patterns(
|
|
repo_path: &Path,
|
|
repo_id: &str,
|
|
patterns: &[PatternRule],
|
|
scan_type: ScanType,
|
|
scanner_name: &str,
|
|
) -> Result<Vec<Finding>, CoreError> {
|
|
let mut findings = Vec::new();
|
|
|
|
for entry in walkdir(repo_path)? {
|
|
let path = entry.path();
|
|
if !path.is_file() {
|
|
continue;
|
|
}
|
|
|
|
let ext = path
|
|
.extension()
|
|
.and_then(|e| e.to_str())
|
|
.unwrap_or("")
|
|
.to_string();
|
|
|
|
let content = match std::fs::read_to_string(path) {
|
|
Ok(c) => c,
|
|
Err(_) => continue, // skip binary files
|
|
};
|
|
|
|
let relative_path = path
|
|
.strip_prefix(repo_path)
|
|
.unwrap_or(path)
|
|
.to_string_lossy()
|
|
.to_string();
|
|
|
|
for pattern in patterns {
|
|
if !pattern.file_extensions.contains(&ext) {
|
|
continue;
|
|
}
|
|
|
|
for (line_num, line) in content.lines().enumerate() {
|
|
if pattern.pattern.is_match(line) {
|
|
let fingerprint = dedup::compute_fingerprint(&[
|
|
repo_id,
|
|
&pattern.id,
|
|
&relative_path,
|
|
&(line_num + 1).to_string(),
|
|
]);
|
|
|
|
let mut finding = Finding::new(
|
|
repo_id.to_string(),
|
|
fingerprint,
|
|
scanner_name.to_string(),
|
|
scan_type.clone(),
|
|
pattern.title.clone(),
|
|
pattern.description.clone(),
|
|
pattern.severity.clone(),
|
|
);
|
|
finding.rule_id = Some(pattern.id.clone());
|
|
finding.file_path = Some(relative_path.clone());
|
|
finding.line_number = Some((line_num + 1) as u32);
|
|
finding.code_snippet = Some(line.to_string());
|
|
|
|
findings.push(finding);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(findings)
|
|
}
|
|
|
|
fn walkdir(path: &Path) -> Result<Vec<walkdir::DirEntry>, CoreError> {
|
|
// Simple recursive file walk, skipping hidden dirs and common non-source dirs
|
|
let skip_dirs = [
|
|
".git",
|
|
"node_modules",
|
|
"target",
|
|
"vendor",
|
|
".venv",
|
|
"__pycache__",
|
|
"dist",
|
|
"build",
|
|
];
|
|
|
|
let entries: Vec<_> = walkdir::WalkDir::new(path)
|
|
.into_iter()
|
|
.filter_entry(|e| {
|
|
let name = e.file_name().to_string_lossy();
|
|
!skip_dirs.contains(&name.as_ref())
|
|
})
|
|
.filter_map(|e| e.ok())
|
|
.collect();
|
|
|
|
Ok(entries)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
// --- compile_regex tests ---
|
|
|
|
#[test]
|
|
fn compile_regex_valid_pattern() {
|
|
let re = compile_regex(r"\bfoo\b");
|
|
assert!(re.is_match("hello foo bar"));
|
|
assert!(!re.is_match("foobar"));
|
|
}
|
|
|
|
#[test]
|
|
fn compile_regex_invalid_pattern_returns_fallback() {
|
|
// An invalid regex should return the fallback "^$" that only matches empty strings
|
|
let re = compile_regex(r"[invalid");
|
|
assert!(re.is_match(""));
|
|
assert!(!re.is_match("anything"));
|
|
}
|
|
|
|
// --- GDPR pattern tests ---
|
|
|
|
#[test]
|
|
fn gdpr_pii_logging_matches() {
|
|
let scanner = GdprPatternScanner::new();
|
|
let pattern = &scanner.patterns[0]; // gdpr-pii-logging
|
|
// Regex: (log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(pii_keyword)\b
|
|
assert!(pattern.pattern.is_match("console.log(email)"));
|
|
assert!(pattern.pattern.is_match("console.log(user.ssn)"));
|
|
assert!(pattern.pattern.is_match("print(phone_number)"));
|
|
assert!(pattern.pattern.is_match("tracing::(ip_addr)"));
|
|
assert!(pattern.pattern.is_match("log.debug(credit_card)"));
|
|
}
|
|
|
|
#[test]
|
|
fn gdpr_pii_logging_no_false_positive() {
|
|
let scanner = GdprPatternScanner::new();
|
|
let pattern = &scanner.patterns[0];
|
|
// Regular logging without PII fields should not match
|
|
assert!(!pattern
|
|
.pattern
|
|
.is_match("logger.info(\"request completed\")"));
|
|
assert!(!pattern.pattern.is_match("let email = user.email;"));
|
|
}
|
|
|
|
#[test]
|
|
fn gdpr_no_consent_matches() {
|
|
let scanner = GdprPatternScanner::new();
|
|
let pattern = &scanner.patterns[1]; // gdpr-no-consent
|
|
assert!(pattern.pattern.is_match("collect personal data"));
|
|
assert!(pattern.pattern.is_match("store user_data in db"));
|
|
assert!(pattern.pattern.is_match("save pii to disk"));
|
|
}
|
|
|
|
#[test]
|
|
fn gdpr_user_model_matches() {
|
|
let scanner = GdprPatternScanner::new();
|
|
let pattern = &scanner.patterns[2]; // gdpr-no-delete-endpoint
|
|
assert!(pattern.pattern.is_match("struct User {"));
|
|
assert!(pattern.pattern.is_match("class User(Model):"));
|
|
}
|
|
|
|
#[test]
|
|
fn gdpr_hardcoded_retention_matches() {
|
|
let scanner = GdprPatternScanner::new();
|
|
let pattern = &scanner.patterns[3]; // gdpr-hardcoded-retention
|
|
assert!(pattern.pattern.is_match("retention = 30"));
|
|
assert!(pattern.pattern.is_match("ttl: 3600"));
|
|
assert!(pattern.pattern.is_match("expire = 86400"));
|
|
}
|
|
|
|
// --- OAuth pattern tests ---
|
|
|
|
#[test]
|
|
fn oauth_implicit_grant_matches() {
|
|
let scanner = OAuthPatternScanner::new();
|
|
let pattern = &scanner.patterns[0]; // oauth-implicit-grant
|
|
assert!(pattern.pattern.is_match("response_type = \"token\""));
|
|
assert!(pattern.pattern.is_match("grant_type: implicit"));
|
|
assert!(pattern.pattern.is_match("response_type='token'"));
|
|
}
|
|
|
|
#[test]
|
|
fn oauth_implicit_grant_no_false_positive() {
|
|
let scanner = OAuthPatternScanner::new();
|
|
let pattern = &scanner.patterns[0];
|
|
assert!(!pattern.pattern.is_match("response_type = \"code\""));
|
|
assert!(!pattern.pattern.is_match("grant_type: authorization_code"));
|
|
}
|
|
|
|
#[test]
|
|
fn oauth_authorization_code_matches() {
|
|
let scanner = OAuthPatternScanner::new();
|
|
let pattern = &scanner.patterns[1]; // oauth-missing-pkce
|
|
assert!(pattern.pattern.is_match("uses authorization_code flow"));
|
|
assert!(pattern.pattern.is_match("authorization code grant"));
|
|
}
|
|
|
|
#[test]
|
|
fn oauth_token_localstorage_matches() {
|
|
let scanner = OAuthPatternScanner::new();
|
|
let pattern = &scanner.patterns[2]; // oauth-token-localstorage
|
|
assert!(pattern
|
|
.pattern
|
|
.is_match("localStorage.setItem('access_token', tok)"));
|
|
assert!(pattern
|
|
.pattern
|
|
.is_match("localStorage.getItem(\"refresh_token\")"));
|
|
}
|
|
|
|
#[test]
|
|
fn oauth_token_localstorage_no_false_positive() {
|
|
let scanner = OAuthPatternScanner::new();
|
|
let pattern = &scanner.patterns[2];
|
|
assert!(!pattern
|
|
.pattern
|
|
.is_match("localStorage.setItem('theme', 'dark')"));
|
|
assert!(!pattern
|
|
.pattern
|
|
.is_match("sessionStorage.setItem('token', t)"));
|
|
}
|
|
|
|
#[test]
|
|
fn oauth_token_url_matches() {
|
|
let scanner = OAuthPatternScanner::new();
|
|
let pattern = &scanner.patterns[3]; // oauth-token-url
|
|
assert!(pattern.pattern.is_match("access_token = build_url(query)"));
|
|
assert!(pattern.pattern.is_match("bearer = url.param"));
|
|
}
|
|
|
|
// --- Pattern rule file extension filtering ---
|
|
|
|
#[test]
|
|
fn gdpr_patterns_cover_common_languages() {
|
|
let scanner = GdprPatternScanner::new();
|
|
for pattern in &scanner.patterns {
|
|
assert!(
|
|
pattern.file_extensions.contains(&"rs".to_string()),
|
|
"Pattern {} should cover .rs files",
|
|
pattern.id
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn oauth_localstorage_only_js_ts() {
|
|
let scanner = OAuthPatternScanner::new();
|
|
let pattern = &scanner.patterns[2]; // oauth-token-localstorage
|
|
assert!(pattern.file_extensions.contains(&"js".to_string()));
|
|
assert!(pattern.file_extensions.contains(&"ts".to_string()));
|
|
assert!(!pattern.file_extensions.contains(&"rs".to_string()));
|
|
assert!(!pattern.file_extensions.contains(&"py".to_string()));
|
|
}
|
|
}
|