Fix formatting and clippy warnings across workspace

- Run cargo fmt on all crates - Fix regex patterns using unsupported lookahead in patterns.rs - Replace unwrap() calls with compile_regex() helper - Fix never type fallback in GitHub tracker - Fix redundant field name in findings page - Allow enum_variant_names for Dioxus Route enum - Fix &mut Vec -> &mut [T] clippy lint in sbom.rs - Mark unused-but-intended APIs with #[allow(dead_code)] Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 17:41:03 +01:00
parent 62196e5d74
commit 03ee69834d
37 changed files with 519 additions and 220 deletions
@@ -3,13 +3,22 @@ use compliance_core::CoreError;

 pub struct CveScanner {
    http: reqwest::Client,
+    #[allow(dead_code)]
    searxng_url: Option<String>,
    nvd_api_key: Option<String>,
 }

 impl CveScanner {
-    pub fn new(http: reqwest::Client, searxng_url: Option<String>, nvd_api_key: Option<String>) -> Self {
-        Self { http, searxng_url, nvd_api_key }
+    pub fn new(
+        http: reqwest::Client,
+        searxng_url: Option<String>,
+        nvd_api_key: Option<String>,
+    ) -> Self {
+        Self {
+            http,
+            searxng_url,
+            nvd_api_key,
+        }
    }

    pub async fn scan_dependencies(
@@ -87,9 +96,10 @@ impl CveScanner {
            return Ok(Vec::new());
        }

-        let result: OsvBatchResponse = resp.json().await.map_err(|e| {
-            CoreError::Http(format!("Failed to parse OSV.dev response: {e}"))
-        })?;
+        let result: OsvBatchResponse = resp
+            .json()
+            .await
+            .map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;

        let vulns = result
            .results
@@ -101,8 +111,9 @@ impl CveScanner {
                    .map(|v| OsvVuln {
                        id: v.id,
                        summary: v.summary,
-                        severity: v.database_specific
-                            .and_then(|d| d.get("severity").and_then(|s| s.as_str()).map(String::from)),
+                        severity: v.database_specific.and_then(|d| {
+                            d.get("severity").and_then(|s| s.as_str()).map(String::from)
+                        }),
                    })
                    .collect()
            })
@@ -123,17 +134,19 @@ impl CveScanner {
            req = req.header("apiKey", key.as_str());
        }

-        let resp = req.send().await.map_err(|e| {
-            CoreError::Http(format!("NVD request failed: {e}"))
-        })?;
+        let resp = req
+            .send()
+            .await
+            .map_err(|e| CoreError::Http(format!("NVD request failed: {e}")))?;

        if !resp.status().is_success() {
            return Ok(None);
        }

-        let body: serde_json::Value = resp.json().await.map_err(|e| {
-            CoreError::Http(format!("Failed to parse NVD response: {e}"))
-        })?;
+        let body: serde_json::Value = resp
+            .json()
+            .await
+            .map_err(|e| CoreError::Http(format!("Failed to parse NVD response: {e}")))?;

        // Extract CVSS v3.1 base score
        let score = body["vulnerabilities"]
@@ -146,15 +159,22 @@ impl CveScanner {
        Ok(score)
    }

+    #[allow(dead_code)]
    pub async fn search_context(&self, cve_id: &str) -> Result<Vec<String>, CoreError> {
        let Some(searxng_url) = &self.searxng_url else {
            return Ok(Vec::new());
        };

-        let url = format!("{}/search?q={cve_id}&format=json&engines=duckduckgo", searxng_url.trim_end_matches('/'));
-        let resp = self.http.get(&url).send().await.map_err(|e| {
-            CoreError::Http(format!("SearXNG request failed: {e}"))
-        })?;
+        let url = format!(
+            "{}/search?q={cve_id}&format=json&engines=duckduckgo",
+            searxng_url.trim_end_matches('/')
+        );
+        let resp = self
+            .http
+            .get(&url)
+            .send()
+            .await
+            .map_err(|e| CoreError::Http(format!("SearXNG request failed: {e}")))?;

        if !resp.status().is_success() {
            return Ok(Vec::new());
@@ -41,12 +41,7 @@ impl GitOps {
        let head_ref = repo.head()?;
        let head_name = head_ref.name().unwrap_or("HEAD");

-        repo.reference(
-            head_name,
-            fetch_commit.id(),
-            true,
-            "fast-forward",
-        )?;
+        repo.reference(head_name, fetch_commit.id(), true, "fast-forward")?;
        repo.checkout_head(Some(git2::build::CheckoutBuilder::default().force()))?;

        tracing::info!("Fetched and fast-forwarded {}", repo_path.display());
@@ -68,6 +63,7 @@ impl GitOps {
        }
    }

+    #[allow(dead_code)]
    pub fn get_changed_files(
        repo_path: &Path,
        old_sha: &str,
@@ -29,14 +29,15 @@ impl PipelineOrchestrator {
        llm: Arc<LlmClient>,
        http: reqwest::Client,
    ) -> Self {
-        Self { config, db, llm, http }
+        Self {
+            config,
+            db,
+            llm,
+            http,
+        }
    }

-    pub async fn run(
-        &self,
-        repo_id: &str,
-        trigger: ScanTrigger,
-    ) -> Result<(), AgentError> {
+    pub async fn run(&self, repo_id: &str, trigger: ScanTrigger) -> Result<(), AgentError> {
        // Look up the repository
        let repo = self
            .db
@@ -48,7 +49,9 @@ impl PipelineOrchestrator {
        // Create scan run
        let scan_run = ScanRun::new(repo_id.to_string(), trigger);
        let insert = self.db.scan_runs().insert_one(&scan_run).await?;
-        let scan_run_id = insert.inserted_id.as_object_id()
+        let scan_run_id = insert
+            .inserted_id
+            .as_object_id()
            .map(|id| id.to_hex())
            .unwrap_or_default();

@@ -57,29 +60,35 @@ impl PipelineOrchestrator {
        // Update scan run status
        match &result {
            Ok(count) => {
-                self.db.scan_runs().update_one(
-                    doc! { "_id": &insert.inserted_id },
-                    doc! {
-                        "$set": {
-                            "status": "completed",
-                            "current_phase": "completed",
-                            "new_findings_count": *count as i64,
-                            "completed_at": mongodb::bson::DateTime::now(),
-                        }
-                    },
-                ).await?;
+                self.db
+                    .scan_runs()
+                    .update_one(
+                        doc! { "_id": &insert.inserted_id },
+                        doc! {
+                            "$set": {
+                                "status": "completed",
+                                "current_phase": "completed",
+                                "new_findings_count": *count as i64,
+                                "completed_at": mongodb::bson::DateTime::now(),
+                            }
+                        },
+                    )
+                    .await?;
            }
            Err(e) => {
-                self.db.scan_runs().update_one(
-                    doc! { "_id": &insert.inserted_id },
-                    doc! {
-                        "$set": {
-                            "status": "failed",
-                            "error_message": e.to_string(),
-                            "completed_at": mongodb::bson::DateTime::now(),
-                        }
-                    },
-                ).await?;
+                self.db
+                    .scan_runs()
+                    .update_one(
+                        doc! { "_id": &insert.inserted_id },
+                        doc! {
+                            "$set": {
+                                "status": "failed",
+                                "error_message": e.to_string(),
+                                "completed_at": mongodb::bson::DateTime::now(),
+                            }
+                        },
+                    )
+                    .await?;
            }
        }

@@ -91,9 +100,7 @@ impl PipelineOrchestrator {
        repo: &TrackedRepository,
        scan_run_id: &str,
    ) -> Result<u32, AgentError> {
-        let repo_id = repo.id.as_ref()
-            .map(|id| id.to_hex())
-            .unwrap_or_default();
+        let repo_id = repo.id.as_ref().map(|id| id.to_hex()).unwrap_or_default();

        // Stage 0: Change detection
        tracing::info!("[{repo_id}] Stage 0: Change detection");
@@ -140,7 +147,10 @@ impl PipelineOrchestrator {
                k.expose_secret().to_string()
            }),
        );
-        let cve_alerts = match cve_scanner.scan_dependencies(&repo_id, &mut sbom_entries).await {
+        let cve_alerts = match cve_scanner
+            .scan_dependencies(&repo_id, &mut sbom_entries)
+            .await
+        {
            Ok(alerts) => alerts,
            Err(e) => {
                tracing::warn!("[{repo_id}] CVE scanning failed: {e}");
@@ -163,7 +173,10 @@ impl PipelineOrchestrator {
        }

        // Stage 5: LLM Triage
-        tracing::info!("[{repo_id}] Stage 5: LLM Triage ({} findings)", all_findings.len());
+        tracing::info!(
+            "[{repo_id}] Stage 5: LLM Triage ({} findings)",
+            all_findings.len()
+        );
        self.update_phase(scan_run_id, "llm_triage").await;
        let triaged = crate::llm::triage::triage_findings(&self.llm, &mut all_findings).await;
        tracing::info!("[{repo_id}] Triaged: {triaged} findings passed confidence threshold");
@@ -223,16 +236,19 @@ impl PipelineOrchestrator {
        // Issue creation is handled by the trackers module - deferred to agent

        // Stage 7: Update repository
-        self.db.repositories().update_one(
-            doc! { "_id": repo.id },
-            doc! {
-                "$set": {
-                    "last_scanned_commit": &current_sha,
-                    "updated_at": mongodb::bson::DateTime::now(),
+        self.db
+            .repositories()
+            .update_one(
+                doc! { "_id": repo.id },
+                doc! {
+                    "$set": {
+                        "last_scanned_commit": &current_sha,
+                        "updated_at": mongodb::bson::DateTime::now(),
+                    },
+                    "$inc": { "findings_count": new_count as i64 },
                },
-                "$inc": { "findings_count": new_count as i64 },
-            },
-        ).await?;
+            )
+            .await?;

        tracing::info!("[{repo_id}] Scan complete: {new_count} new findings");
        Ok(new_count)
@@ -240,13 +256,17 @@ impl PipelineOrchestrator {

    async fn update_phase(&self, scan_run_id: &str, phase: &str) {
        if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
-            let _ = self.db.scan_runs().update_one(
-                doc! { "_id": oid },
-                doc! {
-                    "$set": { "current_phase": phase },
-                    "$push": { "phases_completed": phase },
-                },
-            ).await;
+            let _ = self
+                .db
+                .scan_runs()
+                .update_one(
+                    doc! { "_id": oid },
+                    doc! {
+                        "$set": { "current_phase": phase },
+                        "$push": { "phases_completed": phase },
+                    },
+                )
+                .await;
        }
    }
 }
@@ -7,6 +7,15 @@ use regex::Regex;

 use crate::pipeline::dedup;

+fn compile_regex(pattern: &str) -> Regex {
+    Regex::new(pattern).unwrap_or_else(|e| {
+        tracing::warn!("Invalid regex pattern '{pattern}': {e}, using empty fallback");
+        // SAFETY: "^$" is a known-valid regex that matches only empty strings
+        #[allow(clippy::unwrap_used)]
+        Regex::new("^$").unwrap()
+    })
+}
+
 pub struct GdprPatternScanner {
    patterns: Vec<PatternRule>,
 }
@@ -31,7 +40,7 @@ impl GdprPatternScanner {
                id: "gdpr-pii-logging".to_string(),
                title: "PII data potentially logged".to_string(),
                description: "Logging statements that may contain personally identifiable information (email, SSN, phone, IP address).".to_string(),
-                pattern: Regex::new(r#"(?i)(log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(email|ssn|social.?security|phone.?number|ip.?addr|passport|date.?of.?birth|credit.?card)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
+                pattern: compile_regex(r#"(?i)(log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(email|ssn|social.?security|phone.?number|ip.?addr|passport|date.?of.?birth|credit.?card)\b"#),
                severity: Severity::High,
                file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "rb"].into_iter().map(String::from).collect(),
            },
@@ -39,7 +48,7 @@ impl GdprPatternScanner {
                id: "gdpr-no-consent".to_string(),
                title: "Data collection without apparent consent mechanism".to_string(),
                description: "Data collection endpoint that doesn't reference consent or opt-in mechanisms.".to_string(),
-                pattern: Regex::new(r#"(?i)(collect|store|save|persist|record).*\b(personal|user.?data|pii|biometric)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
+                pattern: compile_regex(r#"(?i)(collect|store|save|persist|record).*\b(personal|user.?data|pii|biometric)\b"#),
                severity: Severity::Medium,
                file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
            },
@@ -47,7 +56,7 @@ impl GdprPatternScanner {
                id: "gdpr-no-delete-endpoint".to_string(),
                title: "Missing data deletion capability".to_string(),
                description: "User data models or controllers without corresponding deletion endpoints (right to erasure).".to_string(),
-                pattern: Regex::new(r#"(?i)(class|struct|model)\s+User(?!.*[Dd]elete)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
+                pattern: compile_regex(r#"(?i)(class|struct|model)\s+User"#),
                severity: Severity::Medium,
                file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "rb"].into_iter().map(String::from).collect(),
            },
@@ -55,7 +64,7 @@ impl GdprPatternScanner {
                id: "gdpr-hardcoded-retention".to_string(),
                title: "Hardcoded data retention period".to_string(),
                description: "Data retention periods should be configurable for GDPR compliance.".to_string(),
-                pattern: Regex::new(r#"(?i)(retention|ttl|expire|keep.?for)\s*[=:]\s*\d+"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
+                pattern: compile_regex(r#"(?i)(retention|ttl|expire|keep.?for)\s*[=:]\s*\d+"#),
                severity: Severity::Low,
                file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "yaml", "yml", "toml", "json"].into_iter().map(String::from).collect(),
            },
@@ -74,7 +83,13 @@ impl Scanner for GdprPatternScanner {
    }

    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
-        let findings = scan_with_patterns(repo_path, repo_id, &self.patterns, ScanType::Gdpr, "gdpr-patterns")?;
+        let findings = scan_with_patterns(
+            repo_path,
+            repo_id,
+            &self.patterns,
+            ScanType::Gdpr,
+            "gdpr-patterns",
+        )?;
        Ok(ScanOutput {
            findings,
            sbom_entries: Vec::new(),
@@ -89,7 +104,7 @@ impl OAuthPatternScanner {
                id: "oauth-implicit-grant".to_string(),
                title: "OAuth implicit grant flow detected".to_string(),
                description: "Implicit grant flow is deprecated and insecure. Use authorization code flow with PKCE instead.".to_string(),
-                pattern: Regex::new(r#"(?i)(response_type\s*[=:]\s*["']?token|grant_type\s*[=:]\s*["']?implicit)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
+                pattern: compile_regex(r#"(?i)(response_type\s*[=:]\s*["']?token|grant_type\s*[=:]\s*["']?implicit)"#),
                severity: Severity::High,
                file_extensions: vec!["rs", "py", "js", "ts", "java", "go", "yaml", "yml", "json"].into_iter().map(String::from).collect(),
            },
@@ -97,7 +112,7 @@ impl OAuthPatternScanner {
                id: "oauth-missing-pkce".to_string(),
                title: "OAuth flow without PKCE".to_string(),
                description: "Authorization code flow should use PKCE (code_challenge/code_verifier) for public clients.".to_string(),
-                pattern: Regex::new(r#"(?i)authorization.?code(?!.*code.?challenge)(?!.*pkce)"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
+                pattern: compile_regex(r#"(?i)authorization.?code"#),
                severity: Severity::Medium,
                file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
            },
@@ -105,7 +120,7 @@ impl OAuthPatternScanner {
                id: "oauth-token-localstorage".to_string(),
                title: "Token stored in localStorage".to_string(),
                description: "Storing tokens in localStorage is vulnerable to XSS. Use httpOnly cookies or secure session storage.".to_string(),
-                pattern: Regex::new(r#"(?i)localStorage\.(set|get)Item\s*\(\s*["'].*token"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
+                pattern: compile_regex(r#"(?i)localStorage\.(set|get)Item\s*\(\s*["'].*token"#),
                severity: Severity::High,
                file_extensions: vec!["js", "ts", "jsx", "tsx"].into_iter().map(String::from).collect(),
            },
@@ -113,7 +128,7 @@ impl OAuthPatternScanner {
                id: "oauth-token-url".to_string(),
                title: "Token passed in URL parameters".to_string(),
                description: "Tokens in URLs can leak via referrer headers, server logs, and browser history.".to_string(),
-                pattern: Regex::new(r#"(?i)(access_token|bearer)\s*[=]\s*.*\b(url|query|param|href)\b"#).unwrap_or_else(|_| Regex::new("^$").unwrap()),
+                pattern: compile_regex(r#"(?i)(access_token|bearer)\s*[=]\s*.*\b(url|query|param|href)\b"#),
                severity: Severity::High,
                file_extensions: vec!["rs", "py", "js", "ts", "java", "go"].into_iter().map(String::from).collect(),
            },
@@ -132,7 +147,13 @@ impl Scanner for OAuthPatternScanner {
    }

    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
-        let findings = scan_with_patterns(repo_path, repo_id, &self.patterns, ScanType::OAuth, "oauth-patterns")?;
+        let findings = scan_with_patterns(
+            repo_path,
+            repo_id,
+            &self.patterns,
+            ScanType::OAuth,
+            "oauth-patterns",
+        )?;
        Ok(ScanOutput {
            findings,
            sbom_entries: Vec::new(),
@@ -211,7 +232,16 @@ fn scan_with_patterns(

 fn walkdir(path: &Path) -> Result<Vec<walkdir::DirEntry>, CoreError> {
    // Simple recursive file walk, skipping hidden dirs and common non-source dirs
-    let skip_dirs = [".git", "node_modules", "target", "vendor", ".venv", "__pycache__", "dist", "build"];
+    let skip_dirs = [
+        ".git",
+        "node_modules",
+        "target",
+        "vendor",
+        ".venv",
+        "__pycache__",
+        "dist",
+        "build",
+    ];

    let entries: Vec<_> = walkdir::WalkDir::new(path)
        .into_iter()
@@ -72,7 +72,9 @@ async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, Cor
            entry.license = c.licenses.and_then(|ls| {
                ls.first().and_then(|l| {
                    l.license.as_ref().map(|lic| {
-                        lic.id.clone().unwrap_or_else(|| lic.name.clone().unwrap_or_default())
+                        lic.id
+                            .clone()
+                            .unwrap_or_else(|| lic.name.clone().unwrap_or_default())
                    })
                })
            });
@@ -99,8 +101,10 @@ async fn run_cargo_audit(repo_path: &Path, _repo_id: &str) -> Result<Vec<AuditVu
            source: Box::new(e),
        })?;

-    let result: CargoAuditOutput = serde_json::from_slice(&output.stdout)
-        .unwrap_or_else(|_| CargoAuditOutput { vulnerabilities: CargoAuditVulns { list: Vec::new() } });
+    let result: CargoAuditOutput =
+        serde_json::from_slice(&output.stdout).unwrap_or_else(|_| CargoAuditOutput {
+            vulnerabilities: CargoAuditVulns { list: Vec::new() },
+        });

    let vulns = result
        .vulnerabilities
@@ -116,7 +120,7 @@ async fn run_cargo_audit(repo_path: &Path, _repo_id: &str) -> Result<Vec<AuditVu
    Ok(vulns)
 }

-fn merge_audit_vulns(entries: &mut Vec<SbomEntry>, vulns: Vec<AuditVuln>) {
+fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<AuditVuln>) {
    for vuln in vulns {
        if let Some(entry) = entries.iter_mut().find(|e| e.name == vuln.package) {
            entry.known_vulnerabilities.push(VulnRef {
@@ -66,11 +66,10 @@ impl Scanner for SemgrepScanner {
                finding.file_path = Some(r.path);
                finding.line_number = Some(r.start.line);
                finding.code_snippet = Some(r.extra.lines);
-                finding.cwe = r.extra.metadata.and_then(|m| {
-                    m.get("cwe")
-                        .and_then(|v| v.as_str())
-                        .map(|s| s.to_string())
-                });
+                finding.cwe = r
+                    .extra
+                    .metadata
+                    .and_then(|m| m.get("cwe").and_then(|v| v.as_str()).map(|s| s.to_string()));
                finding
            })
            .collect();