refactor: modularize codebase and add 404 unit tests (#13)

2026-03-13 08:03:45 +00:00
parent acc5b86aa4
commit 3bb690e5bb
89 changed files with 11884 additions and 6046 deletions
@@ -8,3 +8,51 @@ pub fn compute_fingerprint(parts: &[&str]) -> String {
    }
    hex::encode(hasher.finalize())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn fingerprint_is_deterministic() {
+        let a = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
+        let b = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
+        assert_eq!(a, b);
+    }
+
+    #[test]
+    fn fingerprint_changes_with_different_input() {
+        let a = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "42"]);
+        let b = compute_fingerprint(&["repo1", "rule-x", "src/main.rs", "43"]);
+        assert_ne!(a, b);
+    }
+
+    #[test]
+    fn fingerprint_is_valid_hex_sha256() {
+        let fp = compute_fingerprint(&["hello"]);
+        assert_eq!(fp.len(), 64, "SHA-256 hex should be 64 chars");
+        assert!(fp.chars().all(|c| c.is_ascii_hexdigit()));
+    }
+
+    #[test]
+    fn fingerprint_empty_parts() {
+        let fp = compute_fingerprint(&[]);
+        // Should still produce a valid hash (of empty input)
+        assert_eq!(fp.len(), 64);
+    }
+
+    #[test]
+    fn fingerprint_order_matters() {
+        let a = compute_fingerprint(&["a", "b"]);
+        let b = compute_fingerprint(&["b", "a"]);
+        assert_ne!(a, b);
+    }
+
+    #[test]
+    fn fingerprint_separator_prevents_collision() {
+        // "ab" + "c" vs "a" + "bc" should differ because of the "|" separator
+        let a = compute_fingerprint(&["ab", "c"]);
+        let b = compute_fingerprint(&["a", "bc"]);
+        assert_ne!(a, b);
+    }
+}
@@ -129,3 +129,110 @@ struct GitleaksResult {
    #[serde(rename = "Match")]
    r#match: String,
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // --- is_allowlisted tests ---
+
+    #[test]
+    fn allowlisted_env_example_files() {
+        assert!(is_allowlisted(".env.example"));
+        assert!(is_allowlisted("config/.env.sample"));
+        assert!(is_allowlisted("deploy/.ENV.TEMPLATE"));
+    }
+
+    #[test]
+    fn allowlisted_test_directories() {
+        assert!(is_allowlisted("src/test/config.json"));
+        assert!(is_allowlisted("src/tests/fixtures.rs"));
+        assert!(is_allowlisted("data/fixtures/secret.txt"));
+        assert!(is_allowlisted("pkg/testdata/key.pem"));
+    }
+
+    #[test]
+    fn allowlisted_mock_files() {
+        assert!(is_allowlisted("src/mock_service.py"));
+        assert!(is_allowlisted("lib/MockAuth.java"));
+    }
+
+    #[test]
+    fn allowlisted_test_suffixes() {
+        assert!(is_allowlisted("auth_test.go"));
+        assert!(is_allowlisted("auth.test.ts"));
+        assert!(is_allowlisted("auth.test.js"));
+        assert!(is_allowlisted("auth.spec.ts"));
+        assert!(is_allowlisted("auth.spec.js"));
+    }
+
+    #[test]
+    fn not_allowlisted_regular_files() {
+        assert!(!is_allowlisted("src/main.rs"));
+        assert!(!is_allowlisted("config/.env"));
+        assert!(!is_allowlisted("lib/auth.ts"));
+        assert!(!is_allowlisted("deploy/secrets.yaml"));
+    }
+
+    #[test]
+    fn not_allowlisted_partial_matches() {
+        // "test" as substring in a non-directory context should not match
+        assert!(!is_allowlisted("src/attestation.rs"));
+        assert!(!is_allowlisted("src/contest/data.json"));
+    }
+
+    // --- GitleaksResult deserialization tests ---
+
+    #[test]
+    fn deserialize_gitleaks_result() {
+        let json = r#"{
+            "Description": "AWS Access Key",
+            "RuleID": "aws-access-key",
+            "File": "src/config.rs",
+            "StartLine": 10,
+            "Match": "AKIAIOSFODNN7EXAMPLE"
+        }"#;
+        let result: GitleaksResult = serde_json::from_str(json).unwrap();
+        assert_eq!(result.description, "AWS Access Key");
+        assert_eq!(result.rule_id, "aws-access-key");
+        assert_eq!(result.file, "src/config.rs");
+        assert_eq!(result.start_line, 10);
+        assert_eq!(result.r#match, "AKIAIOSFODNN7EXAMPLE");
+    }
+
+    #[test]
+    fn deserialize_gitleaks_result_array() {
+        let json = r#"[
+            {
+                "Description": "Generic Secret",
+                "RuleID": "generic-secret",
+                "File": "app.py",
+                "StartLine": 5,
+                "Match": "password=hunter2"
+            }
+        ]"#;
+        let results: Vec<GitleaksResult> = serde_json::from_str(json).unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].rule_id, "generic-secret");
+    }
+
+    #[test]
+    fn severity_mapping_private_key() {
+        // Verify the severity logic from the scan method
+        let rule_id = "some-private-key-rule";
+        assert!(rule_id.contains("private-key"));
+    }
+
+    #[test]
+    fn severity_mapping_token_password_secret() {
+        for keyword in &["token", "password", "secret"] {
+            let rule_id = format!("some-{}-rule", keyword);
+            assert!(
+                rule_id.contains("token")
+                    || rule_id.contains("password")
+                    || rule_id.contains("secret"),
+                "Expected '{rule_id}' to match token/password/secret"
+            );
+        }
+    }
+}
@@ -0,0 +1,106 @@
+use compliance_core::models::Finding;
+
+use super::orchestrator::{GraphContext, PipelineOrchestrator};
+use crate::error::AgentError;
+
+impl PipelineOrchestrator {
+    /// Build the code knowledge graph for a repo and compute impact analyses
+    pub(super) async fn build_code_graph(
+        &self,
+        repo_path: &std::path::Path,
+        repo_id: &str,
+        findings: &[Finding],
+    ) -> Result<GraphContext, AgentError> {
+        let graph_build_id = uuid::Uuid::new_v4().to_string();
+        let engine = compliance_graph::GraphEngine::new(50_000);
+
+        let (mut code_graph, build_run) =
+            engine
+                .build_graph(repo_path, repo_id, &graph_build_id)
+                .map_err(|e| AgentError::Other(format!("Graph build error: {e}")))?;
+
+        // Apply community detection
+        compliance_graph::graph::community::apply_communities(&mut code_graph);
+
+        // Store graph in MongoDB
+        let store = compliance_graph::graph::persistence::GraphStore::new(self.db.inner());
+        store
+            .delete_repo_graph(repo_id)
+            .await
+            .map_err(|e| AgentError::Other(format!("Graph cleanup error: {e}")))?;
+        store
+            .store_graph(&build_run, &code_graph.nodes, &code_graph.edges)
+            .await
+            .map_err(|e| AgentError::Other(format!("Graph store error: {e}")))?;
+
+        // Compute impact analysis for each finding
+        let analyzer = compliance_graph::GraphEngine::impact_analyzer(&code_graph);
+        let mut impacts = Vec::new();
+
+        for finding in findings {
+            if let Some(file_path) = &finding.file_path {
+                let impact = analyzer.analyze(
+                    repo_id,
+                    &finding.fingerprint,
+                    &graph_build_id,
+                    file_path,
+                    finding.line_number,
+                );
+                store
+                    .store_impact(&impact)
+                    .await
+                    .map_err(|e| AgentError::Other(format!("Impact store error: {e}")))?;
+                impacts.push(impact);
+            }
+        }
+
+        Ok(GraphContext {
+            node_count: build_run.node_count,
+            edge_count: build_run.edge_count,
+            community_count: build_run.community_count,
+            impacts,
+        })
+    }
+
+    /// Trigger DAST scan if a target is configured for this repo
+    pub(super) async fn maybe_trigger_dast(&self, repo_id: &str, scan_run_id: &str) {
+        use futures_util::TryStreamExt;
+
+        let filter = mongodb::bson::doc! { "repo_id": repo_id };
+        let targets: Vec<compliance_core::models::DastTarget> =
+            match self.db.dast_targets().find(filter).await {
+                Ok(cursor) => cursor.try_collect().await.unwrap_or_default(),
+                Err(_) => return,
+            };
+
+        if targets.is_empty() {
+            tracing::info!("[{repo_id}] No DAST targets configured, skipping");
+            return;
+        }
+
+        for target in targets {
+            let db = self.db.clone();
+            let scan_run_id = scan_run_id.to_string();
+            tokio::spawn(async move {
+                let orchestrator = compliance_dast::DastOrchestrator::new(100);
+                match orchestrator.run_scan(&target, Vec::new()).await {
+                    Ok((mut scan_run, findings)) => {
+                        scan_run.sast_scan_run_id = Some(scan_run_id);
+                        if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await {
+                            tracing::error!("Failed to store DAST scan run: {e}");
+                        }
+                        for finding in &findings {
+                            if let Err(e) = db.dast_findings().insert_one(finding).await {
+                                tracing::error!("Failed to store DAST finding: {e}");
+                            }
+                        }
+                        tracing::info!("DAST scan complete: {} findings", findings.len());
+                    }
+                    Err(e) => {
+                        tracing::error!("DAST scan failed: {e}");
+                    }
+                }
+            });
+        }
+    }
+}
@@ -0,0 +1,259 @@
+use mongodb::bson::doc;
+
+use compliance_core::models::*;
+
+use super::orchestrator::{extract_base_url, PipelineOrchestrator};
+use super::tracker_dispatch::TrackerDispatch;
+use crate::error::AgentError;
+use crate::trackers;
+
+impl PipelineOrchestrator {
+    /// Build an issue tracker client from a repository's tracker configuration.
+    /// Returns `None` if the repo has no tracker configured.
+    pub(super) fn build_tracker(&self, repo: &TrackedRepository) -> Option<TrackerDispatch> {
+        let tracker_type = repo.tracker_type.as_ref()?;
+        // Per-repo token takes precedence, fall back to global config
+        match tracker_type {
+            TrackerType::GitHub => {
+                let token = repo.tracker_token.clone().or_else(|| {
+                    self.config.github_token.as_ref().map(|t| {
+                        use secrecy::ExposeSecret;
+                        t.expose_secret().to_string()
+                    })
+                })?;
+                let secret = secrecy::SecretString::from(token);
+                match trackers::github::GitHubTracker::new(&secret) {
+                    Ok(t) => Some(TrackerDispatch::GitHub(t)),
+                    Err(e) => {
+                        tracing::warn!("Failed to build GitHub tracker: {e}");
+                        None
+                    }
+                }
+            }
+            TrackerType::GitLab => {
+                let base_url = self
+                    .config
+                    .gitlab_url
+                    .clone()
+                    .unwrap_or_else(|| "https://gitlab.com".to_string());
+                let token = repo.tracker_token.clone().or_else(|| {
+                    self.config.gitlab_token.as_ref().map(|t| {
+                        use secrecy::ExposeSecret;
+                        t.expose_secret().to_string()
+                    })
+                })?;
+                let secret = secrecy::SecretString::from(token);
+                Some(TrackerDispatch::GitLab(
+                    trackers::gitlab::GitLabTracker::new(base_url, secret),
+                ))
+            }
+            TrackerType::Gitea => {
+                let token = repo.tracker_token.clone()?;
+                let base_url = extract_base_url(&repo.git_url)?;
+                let secret = secrecy::SecretString::from(token);
+                Some(TrackerDispatch::Gitea(trackers::gitea::GiteaTracker::new(
+                    base_url, secret,
+                )))
+            }
+            TrackerType::Jira => {
+                let base_url = self.config.jira_url.clone()?;
+                let email = self.config.jira_email.clone()?;
+                let project_key = self.config.jira_project_key.clone()?;
+                let token = repo.tracker_token.clone().or_else(|| {
+                    self.config.jira_api_token.as_ref().map(|t| {
+                        use secrecy::ExposeSecret;
+                        t.expose_secret().to_string()
+                    })
+                })?;
+                let secret = secrecy::SecretString::from(token);
+                Some(TrackerDispatch::Jira(trackers::jira::JiraTracker::new(
+                    base_url,
+                    email,
+                    secret,
+                    project_key,
+                )))
+            }
+        }
+    }
+
+    /// Create tracker issues for new findings (severity >= Medium).
+    /// Checks for duplicates via fingerprint search before creating.
+    #[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
+    pub(super) async fn create_tracker_issues(
+        &self,
+        repo: &TrackedRepository,
+        repo_id: &str,
+        new_findings: &[Finding],
+    ) -> Result<(), AgentError> {
+        let tracker = match self.build_tracker(repo) {
+            Some(t) => t,
+            None => {
+                tracing::info!("[{repo_id}] No issue tracker configured, skipping");
+                return Ok(());
+            }
+        };
+
+        let owner = match repo.tracker_owner.as_deref() {
+            Some(o) => o,
+            None => {
+                tracing::warn!("[{repo_id}] tracker_owner not set, skipping issue creation");
+                return Ok(());
+            }
+        };
+        let tracker_repo_name = match repo.tracker_repo.as_deref() {
+            Some(r) => r,
+            None => {
+                tracing::warn!("[{repo_id}] tracker_repo not set, skipping issue creation");
+                return Ok(());
+            }
+        };
+
+        // Only create issues for medium+ severity findings
+        let actionable: Vec<&Finding> = new_findings
+            .iter()
+            .filter(|f| {
+                matches!(
+                    f.severity,
+                    Severity::Medium | Severity::High | Severity::Critical
+                )
+            })
+            .collect();
+
+        if actionable.is_empty() {
+            tracing::info!("[{repo_id}] No medium+ findings, skipping issue creation");
+            return Ok(());
+        }
+
+        tracing::info!(
+            "[{repo_id}] Creating issues for {} findings via {}",
+            actionable.len(),
+            tracker.name()
+        );
+
+        let mut created = 0u32;
+        for finding in actionable {
+            let title = format!(
+                "[{}] {}: {}",
+                finding.severity, finding.scanner, finding.title
+            );
+
+            // Check if an issue already exists by fingerprint first, then by title
+            let mut found_existing = false;
+            for search_term in [&finding.fingerprint, &title] {
+                match tracker
+                    .find_existing_issue(owner, tracker_repo_name, search_term)
+                    .await
+                {
+                    Ok(Some(existing)) => {
+                        tracing::debug!(
+                            "[{repo_id}] Issue already exists for '{}': {}",
+                            search_term,
+                            existing.external_url
+                        );
+                        found_existing = true;
+                        break;
+                    }
+                    Ok(None) => {}
+                    Err(e) => {
+                        tracing::warn!("[{repo_id}] Failed to search for existing issue: {e}");
+                    }
+                }
+            }
+            if found_existing {
+                continue;
+            }
+            let body = format_issue_body(finding);
+            let labels = vec![
+                format!("severity:{}", finding.severity),
+                format!("scanner:{}", finding.scanner),
+                "compliance-scanner".to_string(),
+            ];
+
+            match tracker
+                .create_issue(owner, tracker_repo_name, &title, &body, &labels)
+                .await
+            {
+                Ok(mut issue) => {
+                    issue.finding_id = finding
+                        .id
+                        .as_ref()
+                        .map(|id| id.to_hex())
+                        .unwrap_or_default();
+
+                    // Update the finding with the issue URL
+                    if let Some(finding_id) = &finding.id {
+                        let _ = self
+                            .db
+                            .findings()
+                            .update_one(
+                                doc! { "_id": finding_id },
+                                doc! { "$set": { "tracker_issue_url": &issue.external_url } },
+                            )
+                            .await;
+                    }
+
+                    // Store the tracker issue record
+                    if let Err(e) = self.db.tracker_issues().insert_one(&issue).await {
+                        tracing::warn!("[{repo_id}] Failed to store tracker issue: {e}");
+                    }
+
+                    created += 1;
+                }
+                Err(e) => {
+                    tracing::warn!(
+                        "[{repo_id}] Failed to create issue for {}: {e}",
+                        finding.fingerprint
+                    );
+                }
+            }
+        }
+
+        tracing::info!("[{repo_id}] Created {created} tracker issues");
+        Ok(())
+    }
+}
+
+/// Format a finding into a markdown issue body for the tracker.
+pub(super) fn format_issue_body(finding: &Finding) -> String {
+    let mut body = String::new();
+
+    body.push_str(&format!("## {} Finding\n\n", finding.severity));
+    body.push_str(&format!("**Scanner:** {}\n", finding.scanner));
+    body.push_str(&format!("**Severity:** {}\n", finding.severity));
+
+    if let Some(rule) = &finding.rule_id {
+        body.push_str(&format!("**Rule:** {}\n", rule));
+    }
+    if let Some(cwe) = &finding.cwe {
+        body.push_str(&format!("**CWE:** {}\n", cwe));
+    }
+
+    body.push_str(&format!("\n### Description\n\n{}\n", finding.description));
+
+    if let Some(file_path) = &finding.file_path {
+        body.push_str(&format!("\n### Location\n\n**File:** `{}`", file_path));
+        if let Some(line) = finding.line_number {
+            body.push_str(&format!(" (line {})", line));
+        }
+        body.push('\n');
+    }
+
+    if let Some(snippet) = &finding.code_snippet {
+        body.push_str(&format!("\n### Code\n\n```\n{}\n```\n", snippet));
+    }
+
+    if let Some(remediation) = &finding.remediation {
+        body.push_str(&format!("\n### Remediation\n\n{}\n", remediation));
+    }
+
+    if let Some(fix) = &finding.suggested_fix {
+        body.push_str(&format!("\n### Suggested Fix\n\n```\n{}\n```\n", fix));
+    }
+
+    body.push_str(&format!(
+        "\n---\n*Fingerprint:* `{}`\n*Generated by compliance-scanner*",
+        finding.fingerprint
+    ));
+
+    body
+}
@@ -1,366 +0,0 @@
-use std::path::Path;
-use std::time::Duration;
-
-use compliance_core::models::{Finding, ScanType, Severity};
-use compliance_core::traits::{ScanOutput, Scanner};
-use compliance_core::CoreError;
-use tokio::process::Command;
-
-use crate::pipeline::dedup;
-
-/// Timeout for each individual lint command
-const LINT_TIMEOUT: Duration = Duration::from_secs(120);
-
-pub struct LintScanner;
-
-impl Scanner for LintScanner {
-    fn name(&self) -> &str {
-        "lint"
-    }
-
-    fn scan_type(&self) -> ScanType {
-        ScanType::Lint
-    }
-
-    #[tracing::instrument(skip_all)]
-    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
-        let mut all_findings = Vec::new();
-
-        // Detect which languages are present and run appropriate linters
-        if has_rust_project(repo_path) {
-            match run_clippy(repo_path, repo_id).await {
-                Ok(findings) => all_findings.extend(findings),
-                Err(e) => tracing::warn!("Clippy failed: {e}"),
-            }
-        }
-
-        if has_js_project(repo_path) {
-            match run_eslint(repo_path, repo_id).await {
-                Ok(findings) => all_findings.extend(findings),
-                Err(e) => tracing::warn!("ESLint failed: {e}"),
-            }
-        }
-
-        if has_python_project(repo_path) {
-            match run_ruff(repo_path, repo_id).await {
-                Ok(findings) => all_findings.extend(findings),
-                Err(e) => tracing::warn!("Ruff failed: {e}"),
-            }
-        }
-
-        Ok(ScanOutput {
-            findings: all_findings,
-            sbom_entries: Vec::new(),
-        })
-    }
-}
-
-fn has_rust_project(repo_path: &Path) -> bool {
-    repo_path.join("Cargo.toml").exists()
-}
-
-fn has_js_project(repo_path: &Path) -> bool {
-    // Only run if eslint is actually installed in the project
-    repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
-}
-
-fn has_python_project(repo_path: &Path) -> bool {
-    repo_path.join("pyproject.toml").exists()
-        || repo_path.join("setup.py").exists()
-        || repo_path.join("requirements.txt").exists()
-}
-
-/// Run a command with a timeout, returning its output or an error
-async fn run_with_timeout(
-    child: tokio::process::Child,
-    scanner_name: &str,
-) -> Result<std::process::Output, CoreError> {
-    let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
-    match result {
-        Ok(Ok(output)) => Ok(output),
-        Ok(Err(e)) => Err(CoreError::Scanner {
-            scanner: scanner_name.to_string(),
-            source: Box::new(e),
-        }),
-        Err(_) => {
-            // Process is dropped here which sends SIGKILL on Unix
-            Err(CoreError::Scanner {
-                scanner: scanner_name.to_string(),
-                source: Box::new(std::io::Error::new(
-                    std::io::ErrorKind::TimedOut,
-                    format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
-                )),
-            })
-        }
-    }
-}
-
-// ── Clippy ──────────────────────────────────────────────
-
-async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
-    let child = Command::new("cargo")
-        .args([
-            "clippy",
-            "--message-format=json",
-            "--quiet",
-            "--",
-            "-W",
-            "clippy::all",
-        ])
-        .current_dir(repo_path)
-        .env("RUSTC_WRAPPER", "")
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .spawn()
-        .map_err(|e| CoreError::Scanner {
-            scanner: "clippy".to_string(),
-            source: Box::new(e),
-        })?;
-
-    let output = run_with_timeout(child, "clippy").await?;
-    let stdout = String::from_utf8_lossy(&output.stdout);
-    let mut findings = Vec::new();
-
-    for line in stdout.lines() {
-        let msg: serde_json::Value = match serde_json::from_str(line) {
-            Ok(v) => v,
-            Err(_) => continue,
-        };
-
-        if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
-            continue;
-        }
-
-        let message = match msg.get("message") {
-            Some(m) => m,
-            None => continue,
-        };
-
-        let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
-
-        if level != "warning" && level != "error" {
-            continue;
-        }
-
-        let text = message
-            .get("message")
-            .and_then(|v| v.as_str())
-            .unwrap_or("")
-            .to_string();
-
-        let code = message
-            .get("code")
-            .and_then(|v| v.get("code"))
-            .and_then(|v| v.as_str())
-            .unwrap_or("")
-            .to_string();
-
-        if text.starts_with("aborting due to") || code.is_empty() {
-            continue;
-        }
-
-        let (file_path, line_number) = extract_primary_span(message);
-
-        let severity = if level == "error" {
-            Severity::High
-        } else {
-            Severity::Low
-        };
-
-        let fingerprint = dedup::compute_fingerprint(&[
-            repo_id,
-            "clippy",
-            &code,
-            &file_path,
-            &line_number.to_string(),
-        ]);
-
-        let mut finding = Finding::new(
-            repo_id.to_string(),
-            fingerprint,
-            "clippy".to_string(),
-            ScanType::Lint,
-            format!("[clippy] {text}"),
-            text,
-            severity,
-        );
-        finding.rule_id = Some(code);
-        if !file_path.is_empty() {
-            finding.file_path = Some(file_path);
-        }
-        if line_number > 0 {
-            finding.line_number = Some(line_number);
-        }
-        findings.push(finding);
-    }
-
-    Ok(findings)
-}
-
-fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
-    let spans = match message.get("spans").and_then(|v| v.as_array()) {
-        Some(s) => s,
-        None => return (String::new(), 0),
-    };
-
-    for span in spans {
-        if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
-            let file = span
-                .get("file_name")
-                .and_then(|v| v.as_str())
-                .unwrap_or("")
-                .to_string();
-            let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
-            return (file, line);
-        }
-    }
-
-    (String::new(), 0)
-}
-
-// ── ESLint ──────────────────────────────────────────────
-
-async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
-    // Use the project-local eslint binary directly, not npx (which can hang downloading)
-    let eslint_bin = repo_path.join("node_modules/.bin/eslint");
-    let child = Command::new(eslint_bin)
-        .args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
-        .current_dir(repo_path)
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .spawn()
-        .map_err(|e| CoreError::Scanner {
-            scanner: "eslint".to_string(),
-            source: Box::new(e),
-        })?;
-
-    let output = run_with_timeout(child, "eslint").await?;
-
-    if output.stdout.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
-
-    let mut findings = Vec::new();
-    for file_result in results {
-        for msg in file_result.messages {
-            let severity = match msg.severity {
-                2 => Severity::Medium,
-                _ => Severity::Low,
-            };
-
-            let rule_id = msg.rule_id.unwrap_or_default();
-            let fingerprint = dedup::compute_fingerprint(&[
-                repo_id,
-                "eslint",
-                &rule_id,
-                &file_result.file_path,
-                &msg.line.to_string(),
-            ]);
-
-            let mut finding = Finding::new(
-                repo_id.to_string(),
-                fingerprint,
-                "eslint".to_string(),
-                ScanType::Lint,
-                format!("[eslint] {}", msg.message),
-                msg.message,
-                severity,
-            );
-            finding.rule_id = Some(rule_id);
-            finding.file_path = Some(file_result.file_path.clone());
-            finding.line_number = Some(msg.line);
-            findings.push(finding);
-        }
-    }
-
-    Ok(findings)
-}
-
-#[derive(serde::Deserialize)]
-struct EslintFileResult {
-    #[serde(rename = "filePath")]
-    file_path: String,
-    messages: Vec<EslintMessage>,
-}
-
-#[derive(serde::Deserialize)]
-struct EslintMessage {
-    #[serde(rename = "ruleId")]
-    rule_id: Option<String>,
-    severity: u8,
-    message: String,
-    line: u32,
-}
-
-// ── Ruff ────────────────────────────────────────────────
-
-async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
-    let child = Command::new("ruff")
-        .args(["check", ".", "--output-format", "json", "--exit-zero"])
-        .current_dir(repo_path)
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .spawn()
-        .map_err(|e| CoreError::Scanner {
-            scanner: "ruff".to_string(),
-            source: Box::new(e),
-        })?;
-
-    let output = run_with_timeout(child, "ruff").await?;
-
-    if output.stdout.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
-
-    let findings = results
-        .into_iter()
-        .map(|r| {
-            let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
-                Severity::Medium
-            } else {
-                Severity::Low
-            };
-
-            let fingerprint = dedup::compute_fingerprint(&[
-                repo_id,
-                "ruff",
-                &r.code,
-                &r.filename,
-                &r.location.row.to_string(),
-            ]);
-
-            let mut finding = Finding::new(
-                repo_id.to_string(),
-                fingerprint,
-                "ruff".to_string(),
-                ScanType::Lint,
-                format!("[ruff] {}: {}", r.code, r.message),
-                r.message,
-                severity,
-            );
-            finding.rule_id = Some(r.code);
-            finding.file_path = Some(r.filename);
-            finding.line_number = Some(r.location.row);
-            finding
-        })
-        .collect();
-
-    Ok(findings)
-}
-
-#[derive(serde::Deserialize)]
-struct RuffResult {
-    code: String,
-    message: String,
-    filename: String,
-    location: RuffLocation,
-}
-
-#[derive(serde::Deserialize)]
-struct RuffLocation {
-    row: u32,
-}
@@ -0,0 +1,251 @@
+use std::path::Path;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::CoreError;
+use tokio::process::Command;
+
+use crate::pipeline::dedup;
+
+use super::run_with_timeout;
+
+pub(super) async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    let child = Command::new("cargo")
+        .args([
+            "clippy",
+            "--message-format=json",
+            "--quiet",
+            "--",
+            "-W",
+            "clippy::all",
+        ])
+        .current_dir(repo_path)
+        .env("RUSTC_WRAPPER", "")
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "clippy".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "clippy").await?;
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let mut findings = Vec::new();
+
+    for line in stdout.lines() {
+        let msg: serde_json::Value = match serde_json::from_str(line) {
+            Ok(v) => v,
+            Err(_) => continue,
+        };
+
+        if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
+            continue;
+        }
+
+        let message = match msg.get("message") {
+            Some(m) => m,
+            None => continue,
+        };
+
+        let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
+
+        if level != "warning" && level != "error" {
+            continue;
+        }
+
+        let text = message
+            .get("message")
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+
+        let code = message
+            .get("code")
+            .and_then(|v| v.get("code"))
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+
+        if text.starts_with("aborting due to") || code.is_empty() {
+            continue;
+        }
+
+        let (file_path, line_number) = extract_primary_span(message);
+
+        let severity = if level == "error" {
+            Severity::High
+        } else {
+            Severity::Low
+        };
+
+        let fingerprint = dedup::compute_fingerprint(&[
+            repo_id,
+            "clippy",
+            &code,
+            &file_path,
+            &line_number.to_string(),
+        ]);
+
+        let mut finding = Finding::new(
+            repo_id.to_string(),
+            fingerprint,
+            "clippy".to_string(),
+            ScanType::Lint,
+            format!("[clippy] {text}"),
+            text,
+            severity,
+        );
+        finding.rule_id = Some(code);
+        if !file_path.is_empty() {
+            finding.file_path = Some(file_path);
+        }
+        if line_number > 0 {
+            finding.line_number = Some(line_number);
+        }
+        findings.push(finding);
+    }
+
+    Ok(findings)
+}
+
+fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
+    let spans = match message.get("spans").and_then(|v| v.as_array()) {
+        Some(s) => s,
+        None => return (String::new(), 0),
+    };
+
+    for span in spans {
+        if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
+            let file = span
+                .get("file_name")
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .to_string();
+            let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
+            return (file, line);
+        }
+    }
+
+    (String::new(), 0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn extract_primary_span_with_primary() {
+        let msg = serde_json::json!({
+            "spans": [
+                {
+                    "file_name": "src/lib.rs",
+                    "line_start": 42,
+                    "is_primary": true
+                }
+            ]
+        });
+        let (file, line) = extract_primary_span(&msg);
+        assert_eq!(file, "src/lib.rs");
+        assert_eq!(line, 42);
+    }
+
+    #[test]
+    fn extract_primary_span_no_primary() {
+        let msg = serde_json::json!({
+            "spans": [
+                {
+                    "file_name": "src/lib.rs",
+                    "line_start": 42,
+                    "is_primary": false
+                }
+            ]
+        });
+        let (file, line) = extract_primary_span(&msg);
+        assert_eq!(file, "");
+        assert_eq!(line, 0);
+    }
+
+    #[test]
+    fn extract_primary_span_multiple_spans() {
+        let msg = serde_json::json!({
+            "spans": [
+                {
+                    "file_name": "src/other.rs",
+                    "line_start": 10,
+                    "is_primary": false
+                },
+                {
+                    "file_name": "src/main.rs",
+                    "line_start": 99,
+                    "is_primary": true
+                }
+            ]
+        });
+        let (file, line) = extract_primary_span(&msg);
+        assert_eq!(file, "src/main.rs");
+        assert_eq!(line, 99);
+    }
+
+    #[test]
+    fn extract_primary_span_no_spans() {
+        let msg = serde_json::json!({});
+        let (file, line) = extract_primary_span(&msg);
+        assert_eq!(file, "");
+        assert_eq!(line, 0);
+    }
+
+    #[test]
+    fn extract_primary_span_empty_spans() {
+        let msg = serde_json::json!({ "spans": [] });
+        let (file, line) = extract_primary_span(&msg);
+        assert_eq!(file, "");
+        assert_eq!(line, 0);
+    }
+
+    #[test]
+    fn parse_clippy_compiler_message_line() {
+        let line = r#"{"reason":"compiler-message","message":{"level":"warning","message":"unused variable","code":{"code":"unused_variables"},"spans":[{"file_name":"src/main.rs","line_start":5,"is_primary":true}]}}"#;
+        let msg: serde_json::Value = serde_json::from_str(line).unwrap();
+
+        assert_eq!(
+            msg.get("reason").and_then(|v| v.as_str()),
+            Some("compiler-message")
+        );
+        let message = msg.get("message").unwrap();
+        assert_eq!(
+            message.get("level").and_then(|v| v.as_str()),
+            Some("warning")
+        );
+        assert_eq!(
+            message.get("message").and_then(|v| v.as_str()),
+            Some("unused variable")
+        );
+        assert_eq!(
+            message
+                .get("code")
+                .and_then(|v| v.get("code"))
+                .and_then(|v| v.as_str()),
+            Some("unused_variables")
+        );
+
+        let (file, line_num) = extract_primary_span(message);
+        assert_eq!(file, "src/main.rs");
+        assert_eq!(line_num, 5);
+    }
+
+    #[test]
+    fn skip_non_compiler_message() {
+        let line = r#"{"reason":"build-script-executed","package_id":"foo 0.1.0"}"#;
+        let msg: serde_json::Value = serde_json::from_str(line).unwrap();
+        assert_ne!(
+            msg.get("reason").and_then(|v| v.as_str()),
+            Some("compiler-message")
+        );
+    }
+
+    #[test]
+    fn skip_aborting_message() {
+        let text = "aborting due to 3 previous errors";
+        assert!(text.starts_with("aborting due to"));
+    }
+}
@@ -0,0 +1,183 @@
+use std::path::Path;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::CoreError;
+use tokio::process::Command;
+
+use crate::pipeline::dedup;
+
+use super::run_with_timeout;
+
+pub(super) async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    // Use the project-local eslint binary directly, not npx (which can hang downloading)
+    let eslint_bin = repo_path.join("node_modules/.bin/eslint");
+    let child = Command::new(eslint_bin)
+        .args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
+        .current_dir(repo_path)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "eslint".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "eslint").await?;
+
+    if output.stdout.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
+
+    let mut findings = Vec::new();
+    for file_result in results {
+        for msg in file_result.messages {
+            let severity = match msg.severity {
+                2 => Severity::Medium,
+                _ => Severity::Low,
+            };
+
+            let rule_id = msg.rule_id.unwrap_or_default();
+            let fingerprint = dedup::compute_fingerprint(&[
+                repo_id,
+                "eslint",
+                &rule_id,
+                &file_result.file_path,
+                &msg.line.to_string(),
+            ]);
+
+            let mut finding = Finding::new(
+                repo_id.to_string(),
+                fingerprint,
+                "eslint".to_string(),
+                ScanType::Lint,
+                format!("[eslint] {}", msg.message),
+                msg.message,
+                severity,
+            );
+            finding.rule_id = Some(rule_id);
+            finding.file_path = Some(file_result.file_path.clone());
+            finding.line_number = Some(msg.line);
+            findings.push(finding);
+        }
+    }
+
+    Ok(findings)
+}
+
+#[derive(serde::Deserialize)]
+struct EslintFileResult {
+    #[serde(rename = "filePath")]
+    file_path: String,
+    messages: Vec<EslintMessage>,
+}
+
+#[derive(serde::Deserialize)]
+struct EslintMessage {
+    #[serde(rename = "ruleId")]
+    rule_id: Option<String>,
+    severity: u8,
+    message: String,
+    line: u32,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn deserialize_eslint_output() {
+        let json = r#"[
+            {
+                "filePath": "/home/user/project/src/app.js",
+                "messages": [
+                    {
+                        "ruleId": "no-unused-vars",
+                        "severity": 2,
+                        "message": "'x' is defined but never used.",
+                        "line": 10
+                    },
+                    {
+                        "ruleId": "semi",
+                        "severity": 1,
+                        "message": "Missing semicolon.",
+                        "line": 15
+                    }
+                ]
+            }
+        ]"#;
+        let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].file_path, "/home/user/project/src/app.js");
+        assert_eq!(results[0].messages.len(), 2);
+
+        assert_eq!(
+            results[0].messages[0].rule_id,
+            Some("no-unused-vars".to_string())
+        );
+        assert_eq!(results[0].messages[0].severity, 2);
+        assert_eq!(results[0].messages[0].line, 10);
+
+        assert_eq!(results[0].messages[1].severity, 1);
+    }
+
+    #[test]
+    fn deserialize_eslint_null_rule_id() {
+        let json = r#"[
+            {
+                "filePath": "src/index.js",
+                "messages": [
+                    {
+                        "ruleId": null,
+                        "severity": 2,
+                        "message": "Parsing error: Unexpected token",
+                        "line": 1
+                    }
+                ]
+            }
+        ]"#;
+        let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
+        assert_eq!(results[0].messages[0].rule_id, None);
+    }
+
+    #[test]
+    fn deserialize_eslint_empty_messages() {
+        let json = r#"[{"filePath": "src/clean.js", "messages": []}]"#;
+        let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
+        assert_eq!(results[0].messages.len(), 0);
+    }
+
+    #[test]
+    fn deserialize_eslint_empty_array() {
+        let json = "[]";
+        let results: Vec<EslintFileResult> = serde_json::from_str(json).unwrap();
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn eslint_severity_mapping() {
+        // severity 2 = error -> Medium, anything else -> Low
+        assert_eq!(
+            match 2u8 {
+                2 => "Medium",
+                _ => "Low",
+            },
+            "Medium"
+        );
+        assert_eq!(
+            match 1u8 {
+                2 => "Medium",
+                _ => "Low",
+            },
+            "Low"
+        );
+        assert_eq!(
+            match 0u8 {
+                2 => "Medium",
+                _ => "Low",
+            },
+            "Low"
+        );
+    }
+}
@@ -0,0 +1,97 @@
+mod clippy;
+mod eslint;
+mod ruff;
+
+use std::path::Path;
+use std::time::Duration;
+
+use compliance_core::models::ScanType;
+use compliance_core::traits::{ScanOutput, Scanner};
+use compliance_core::CoreError;
+
+/// Timeout for each individual lint command
+pub(crate) const LINT_TIMEOUT: Duration = Duration::from_secs(120);
+
+pub struct LintScanner;
+
+impl Scanner for LintScanner {
+    fn name(&self) -> &str {
+        "lint"
+    }
+
+    fn scan_type(&self) -> ScanType {
+        ScanType::Lint
+    }
+
+    #[tracing::instrument(skip_all)]
+    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
+        let mut all_findings = Vec::new();
+
+        // Detect which languages are present and run appropriate linters
+        if has_rust_project(repo_path) {
+            match clippy::run_clippy(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("Clippy failed: {e}"),
+            }
+        }
+
+        if has_js_project(repo_path) {
+            match eslint::run_eslint(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("ESLint failed: {e}"),
+            }
+        }
+
+        if has_python_project(repo_path) {
+            match ruff::run_ruff(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("Ruff failed: {e}"),
+            }
+        }
+
+        Ok(ScanOutput {
+            findings: all_findings,
+            sbom_entries: Vec::new(),
+        })
+    }
+}
+
+fn has_rust_project(repo_path: &Path) -> bool {
+    repo_path.join("Cargo.toml").exists()
+}
+
+fn has_js_project(repo_path: &Path) -> bool {
+    // Only run if eslint is actually installed in the project
+    repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
+}
+
+fn has_python_project(repo_path: &Path) -> bool {
+    repo_path.join("pyproject.toml").exists()
+        || repo_path.join("setup.py").exists()
+        || repo_path.join("requirements.txt").exists()
+}
+
+/// Run a command with a timeout, returning its output or an error
+pub(crate) async fn run_with_timeout(
+    child: tokio::process::Child,
+    scanner_name: &str,
+) -> Result<std::process::Output, CoreError> {
+    let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
+    match result {
+        Ok(Ok(output)) => Ok(output),
+        Ok(Err(e)) => Err(CoreError::Scanner {
+            scanner: scanner_name.to_string(),
+            source: Box::new(e),
+        }),
+        Err(_) => {
+            // Process is dropped here which sends SIGKILL on Unix
+            Err(CoreError::Scanner {
+                scanner: scanner_name.to_string(),
+                source: Box::new(std::io::Error::new(
+                    std::io::ErrorKind::TimedOut,
+                    format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
+                )),
+            })
+        }
+    }
+}
@@ -0,0 +1,150 @@
+use std::path::Path;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::CoreError;
+use tokio::process::Command;
+
+use crate::pipeline::dedup;
+
+use super::run_with_timeout;
+
+pub(super) async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    let child = Command::new("ruff")
+        .args(["check", ".", "--output-format", "json", "--exit-zero"])
+        .current_dir(repo_path)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "ruff".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "ruff").await?;
+
+    if output.stdout.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
+
+    let findings = results
+        .into_iter()
+        .map(|r| {
+            let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
+                Severity::Medium
+            } else {
+                Severity::Low
+            };
+
+            let fingerprint = dedup::compute_fingerprint(&[
+                repo_id,
+                "ruff",
+                &r.code,
+                &r.filename,
+                &r.location.row.to_string(),
+            ]);
+
+            let mut finding = Finding::new(
+                repo_id.to_string(),
+                fingerprint,
+                "ruff".to_string(),
+                ScanType::Lint,
+                format!("[ruff] {}: {}", r.code, r.message),
+                r.message,
+                severity,
+            );
+            finding.rule_id = Some(r.code);
+            finding.file_path = Some(r.filename);
+            finding.line_number = Some(r.location.row);
+            finding
+        })
+        .collect();
+
+    Ok(findings)
+}
+
+#[derive(serde::Deserialize)]
+struct RuffResult {
+    code: String,
+    message: String,
+    filename: String,
+    location: RuffLocation,
+}
+
+#[derive(serde::Deserialize)]
+struct RuffLocation {
+    row: u32,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn deserialize_ruff_output() {
+        let json = r#"[
+            {
+                "code": "E501",
+                "message": "Line too long (120 > 79 characters)",
+                "filename": "src/main.py",
+                "location": {"row": 42}
+            },
+            {
+                "code": "F401",
+                "message": "`os` imported but unused",
+                "filename": "src/utils.py",
+                "location": {"row": 1}
+            }
+        ]"#;
+        let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
+        assert_eq!(results.len(), 2);
+
+        assert_eq!(results[0].code, "E501");
+        assert_eq!(results[0].filename, "src/main.py");
+        assert_eq!(results[0].location.row, 42);
+
+        assert_eq!(results[1].code, "F401");
+        assert_eq!(results[1].location.row, 1);
+    }
+
+    #[test]
+    fn deserialize_ruff_empty() {
+        let json = "[]";
+        let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn ruff_severity_e_and_f_are_medium() {
+        for code in &["E501", "E302", "F401", "F811"] {
+            let is_medium = code.starts_with('E') || code.starts_with('F');
+            assert!(is_medium, "Expected {code} to be Medium severity");
+        }
+    }
+
+    #[test]
+    fn ruff_severity_others_are_low() {
+        for code in &["W291", "I001", "D100", "C901", "N801"] {
+            let is_medium = code.starts_with('E') || code.starts_with('F');
+            assert!(!is_medium, "Expected {code} to be Low severity");
+        }
+    }
+
+    #[test]
+    fn deserialize_ruff_with_extra_fields() {
+        // Ruff output may contain additional fields we don't use
+        let json = r#"[{
+            "code": "W291",
+            "message": "Trailing whitespace",
+            "filename": "app.py",
+            "location": {"row": 3, "column": 10},
+            "end_location": {"row": 3, "column": 11},
+            "fix": null,
+            "noqa_row": 3
+        }]"#;
+        let results: Vec<RuffResult> = serde_json::from_str(json).unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].code, "W291");
+    }
+}
@@ -3,8 +3,12 @@ pub mod cve;
 pub mod dedup;
 pub mod git;
 pub mod gitleaks;
+mod graph_build;
+mod issue_creation;
 pub mod lint;
 pub mod orchestrator;
 pub mod patterns;
+mod pr_review;
 pub mod sbom;
 pub mod semgrep;
+mod tracker_dispatch;
@@ -4,7 +4,6 @@ use mongodb::bson::doc;
 use tracing::Instrument;

 use compliance_core::models::*;
-use compliance_core::traits::issue_tracker::IssueTracker;
 use compliance_core::traits::Scanner;
 use compliance_core::AgentConfig;

@@ -19,84 +18,6 @@ use crate::pipeline::lint::LintScanner;
 use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
 use crate::pipeline::sbom::SbomScanner;
 use crate::pipeline::semgrep::SemgrepScanner;
-use crate::trackers;
-
-/// Enum dispatch for issue trackers (async traits aren't dyn-compatible).
-enum TrackerDispatch {
-    GitHub(trackers::github::GitHubTracker),
-    GitLab(trackers::gitlab::GitLabTracker),
-    Gitea(trackers::gitea::GiteaTracker),
-    Jira(trackers::jira::JiraTracker),
-}
-
-impl TrackerDispatch {
-    fn name(&self) -> &str {
-        match self {
-            Self::GitHub(t) => t.name(),
-            Self::GitLab(t) => t.name(),
-            Self::Gitea(t) => t.name(),
-            Self::Jira(t) => t.name(),
-        }
-    }
-
-    async fn create_issue(
-        &self,
-        owner: &str,
-        repo: &str,
-        title: &str,
-        body: &str,
-        labels: &[String],
-    ) -> Result<TrackerIssue, compliance_core::error::CoreError> {
-        match self {
-            Self::GitHub(t) => t.create_issue(owner, repo, title, body, labels).await,
-            Self::GitLab(t) => t.create_issue(owner, repo, title, body, labels).await,
-            Self::Gitea(t) => t.create_issue(owner, repo, title, body, labels).await,
-            Self::Jira(t) => t.create_issue(owner, repo, title, body, labels).await,
-        }
-    }
-
-    async fn find_existing_issue(
-        &self,
-        owner: &str,
-        repo: &str,
-        fingerprint: &str,
-    ) -> Result<Option<TrackerIssue>, compliance_core::error::CoreError> {
-        match self {
-            Self::GitHub(t) => t.find_existing_issue(owner, repo, fingerprint).await,
-            Self::GitLab(t) => t.find_existing_issue(owner, repo, fingerprint).await,
-            Self::Gitea(t) => t.find_existing_issue(owner, repo, fingerprint).await,
-            Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await,
-        }
-    }
-
-    async fn create_pr_review(
-        &self,
-        owner: &str,
-        repo: &str,
-        pr_number: u64,
-        body: &str,
-        comments: Vec<compliance_core::traits::issue_tracker::ReviewComment>,
-    ) -> Result<(), compliance_core::error::CoreError> {
-        match self {
-            Self::GitHub(t) => {
-                t.create_pr_review(owner, repo, pr_number, body, comments)
-                    .await
-            }
-            Self::GitLab(t) => {
-                t.create_pr_review(owner, repo, pr_number, body, comments)
-                    .await
-            }
-            Self::Gitea(t) => {
-                t.create_pr_review(owner, repo, pr_number, body, comments)
-                    .await
-            }
-            Self::Jira(t) => {
-                t.create_pr_review(owner, repo, pr_number, body, comments)
-                    .await
-            }
-        }
-    }
-}

 /// Context from graph analysis passed to LLM triage for enhanced filtering
 #[derive(Debug)]
@@ -109,10 +30,10 @@ pub struct GraphContext {
 }

 pub struct PipelineOrchestrator {
-    config: AgentConfig,
-    db: Database,
-    llm: Arc<LlmClient>,
-    http: reqwest::Client,
+    pub(super) config: AgentConfig,
+    pub(super) db: Database,
+    pub(super) llm: Arc<LlmClient>,
+    pub(super) http: reqwest::Client,
 }

 impl PipelineOrchestrator {
@@ -460,446 +381,7 @@ impl PipelineOrchestrator {
        Ok(new_count)
    }

-    /// Build the code knowledge graph for a repo and compute impact analyses
-    async fn build_code_graph(
-        &self,
-        repo_path: &std::path::Path,
-        repo_id: &str,
-        findings: &[Finding],
-    ) -> Result<GraphContext, AgentError> {
-        let graph_build_id = uuid::Uuid::new_v4().to_string();
-        let engine = compliance_graph::GraphEngine::new(50_000);
-
-        let (mut code_graph, build_run) =
-            engine
-                .build_graph(repo_path, repo_id, &graph_build_id)
-                .map_err(|e| AgentError::Other(format!("Graph build error: {e}")))?;
-
-        // Apply community detection
-        compliance_graph::graph::community::apply_communities(&mut code_graph);
-
-        // Store graph in MongoDB
-        let store = compliance_graph::graph::persistence::GraphStore::new(self.db.inner());
-        store
-            .delete_repo_graph(repo_id)
-            .await
-            .map_err(|e| AgentError::Other(format!("Graph cleanup error: {e}")))?;
-        store
-            .store_graph(&build_run, &code_graph.nodes, &code_graph.edges)
-            .await
-            .map_err(|e| AgentError::Other(format!("Graph store error: {e}")))?;
-
-        // Compute impact analysis for each finding
-        let analyzer = compliance_graph::GraphEngine::impact_analyzer(&code_graph);
-        let mut impacts = Vec::new();
-
-        for finding in findings {
-            if let Some(file_path) = &finding.file_path {
-                let impact = analyzer.analyze(
-                    repo_id,
-                    &finding.fingerprint,
-                    &graph_build_id,
-                    file_path,
-                    finding.line_number,
-                );
-                store
-                    .store_impact(&impact)
-                    .await
-                    .map_err(|e| AgentError::Other(format!("Impact store error: {e}")))?;
-                impacts.push(impact);
-            }
-        }
-
-        Ok(GraphContext {
-            node_count: build_run.node_count,
-            edge_count: build_run.edge_count,
-            community_count: build_run.community_count,
-            impacts,
-        })
-    }
-
-    /// Trigger DAST scan if a target is configured for this repo
-    async fn maybe_trigger_dast(&self, repo_id: &str, scan_run_id: &str) {
-        use futures_util::TryStreamExt;
-
-        let filter = mongodb::bson::doc! { "repo_id": repo_id };
-        let targets: Vec<compliance_core::models::DastTarget> =
-            match self.db.dast_targets().find(filter).await {
-                Ok(cursor) => cursor.try_collect().await.unwrap_or_default(),
-                Err(_) => return,
-            };
-
-        if targets.is_empty() {
-            tracing::info!("[{repo_id}] No DAST targets configured, skipping");
-            return;
-        }
-
-        for target in targets {
-            let db = self.db.clone();
-            let scan_run_id = scan_run_id.to_string();
-            tokio::spawn(async move {
-                let orchestrator = compliance_dast::DastOrchestrator::new(100);
-                match orchestrator.run_scan(&target, Vec::new()).await {
-                    Ok((mut scan_run, findings)) => {
-                        scan_run.sast_scan_run_id = Some(scan_run_id);
-                        if let Err(e) = db.dast_scan_runs().insert_one(&scan_run).await {
-                            tracing::error!("Failed to store DAST scan run: {e}");
-                        }
-                        for finding in &findings {
-                            if let Err(e) = db.dast_findings().insert_one(finding).await {
-                                tracing::error!("Failed to store DAST finding: {e}");
-                            }
-                        }
-                        tracing::info!("DAST scan complete: {} findings", findings.len());
-                    }
-                    Err(e) => {
-                        tracing::error!("DAST scan failed: {e}");
-                    }
-                }
-            });
-        }
-    }
-
-    /// Build an issue tracker client from a repository's tracker configuration.
-    /// Returns `None` if the repo has no tracker configured.
-    fn build_tracker(&self, repo: &TrackedRepository) -> Option<TrackerDispatch> {
-        let tracker_type = repo.tracker_type.as_ref()?;
-        // Per-repo token takes precedence, fall back to global config
-        match tracker_type {
-            TrackerType::GitHub => {
-                let token = repo.tracker_token.clone().or_else(|| {
-                    self.config.github_token.as_ref().map(|t| {
-                        use secrecy::ExposeSecret;
-                        t.expose_secret().to_string()
-                    })
-                })?;
-                let secret = secrecy::SecretString::from(token);
-                match trackers::github::GitHubTracker::new(&secret) {
-                    Ok(t) => Some(TrackerDispatch::GitHub(t)),
-                    Err(e) => {
-                        tracing::warn!("Failed to build GitHub tracker: {e}");
-                        None
-                    }
-                }
-            }
-            TrackerType::GitLab => {
-                let base_url = self
-                    .config
-                    .gitlab_url
-                    .clone()
-                    .unwrap_or_else(|| "https://gitlab.com".to_string());
-                let token = repo.tracker_token.clone().or_else(|| {
-                    self.config.gitlab_token.as_ref().map(|t| {
-                        use secrecy::ExposeSecret;
-                        t.expose_secret().to_string()
-                    })
-                })?;
-                let secret = secrecy::SecretString::from(token);
-                Some(TrackerDispatch::GitLab(
-                    trackers::gitlab::GitLabTracker::new(base_url, secret),
-                ))
-            }
-            TrackerType::Gitea => {
-                let token = repo.tracker_token.clone()?;
-                let base_url = extract_base_url(&repo.git_url)?;
-                let secret = secrecy::SecretString::from(token);
-                Some(TrackerDispatch::Gitea(trackers::gitea::GiteaTracker::new(
-                    base_url, secret,
-                )))
-            }
-            TrackerType::Jira => {
-                let base_url = self.config.jira_url.clone()?;
-                let email = self.config.jira_email.clone()?;
-                let project_key = self.config.jira_project_key.clone()?;
-                let token = repo.tracker_token.clone().or_else(|| {
-                    self.config.jira_api_token.as_ref().map(|t| {
-                        use secrecy::ExposeSecret;
-                        t.expose_secret().to_string()
-                    })
-                })?;
-                let secret = secrecy::SecretString::from(token);
-                Some(TrackerDispatch::Jira(trackers::jira::JiraTracker::new(
-                    base_url,
-                    email,
-                    secret,
-                    project_key,
-                )))
-            }
-        }
-    }
-
-    /// Create tracker issues for new findings (severity >= Medium).
-    /// Checks for duplicates via fingerprint search before creating.
-    #[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
-    async fn create_tracker_issues(
-        &self,
-        repo: &TrackedRepository,
-        repo_id: &str,
-        new_findings: &[Finding],
-    ) -> Result<(), AgentError> {
-        let tracker = match self.build_tracker(repo) {
-            Some(t) => t,
-            None => {
-                tracing::info!("[{repo_id}] No issue tracker configured, skipping");
-                return Ok(());
-            }
-        };
-
-        let owner = match repo.tracker_owner.as_deref() {
-            Some(o) => o,
-            None => {
-                tracing::warn!("[{repo_id}] tracker_owner not set, skipping issue creation");
-                return Ok(());
-            }
-        };
-        let tracker_repo_name = match repo.tracker_repo.as_deref() {
-            Some(r) => r,
-            None => {
-                tracing::warn!("[{repo_id}] tracker_repo not set, skipping issue creation");
-                return Ok(());
-            }
-        };
-
-        // Only create issues for medium+ severity findings
-        let actionable: Vec<&Finding> = new_findings
-            .iter()
-            .filter(|f| {
-                matches!(
-                    f.severity,
-                    Severity::Medium | Severity::High | Severity::Critical
-                )
-            })
-            .collect();
-
-        if actionable.is_empty() {
-            tracing::info!("[{repo_id}] No medium+ findings, skipping issue creation");
-            return Ok(());
-        }
-
-        tracing::info!(
-            "[{repo_id}] Creating issues for {} findings via {}",
-            actionable.len(),
-            tracker.name()
-        );
-
-        let mut created = 0u32;
-        for finding in actionable {
-            let title = format!(
-                "[{}] {}: {}",
-                finding.severity, finding.scanner, finding.title
-            );
-
-            // Check if an issue already exists by fingerprint first, then by title
-            let mut found_existing = false;
-            for search_term in [&finding.fingerprint, &title] {
-                match tracker
-                    .find_existing_issue(owner, tracker_repo_name, search_term)
-                    .await
-                {
-                    Ok(Some(existing)) => {
-                        tracing::debug!(
-                            "[{repo_id}] Issue already exists for '{}': {}",
-                            search_term,
-                            existing.external_url
-                        );
-                        found_existing = true;
-                        break;
-                    }
-                    Ok(None) => {}
-                    Err(e) => {
-                        tracing::warn!("[{repo_id}] Failed to search for existing issue: {e}");
-                    }
-                }
-            }
-            if found_existing {
-                continue;
-            }
-            let body = format_issue_body(finding);
-            let labels = vec![
-                format!("severity:{}", finding.severity),
-                format!("scanner:{}", finding.scanner),
-                "compliance-scanner".to_string(),
-            ];
-
-            match tracker
-                .create_issue(owner, tracker_repo_name, &title, &body, &labels)
-                .await
-            {
-                Ok(mut issue) => {
-                    issue.finding_id = finding
-                        .id
-                        .as_ref()
-                        .map(|id| id.to_hex())
-                        .unwrap_or_default();
-
-                    // Update the finding with the issue URL
-                    if let Some(finding_id) = &finding.id {
-                        let _ = self
-                            .db
-                            .findings()
-                            .update_one(
-                                doc! { "_id": finding_id },
-                                doc! { "$set": { "tracker_issue_url": &issue.external_url } },
-                            )
-                            .await;
-                    }
-
-                    // Store the tracker issue record
-                    if let Err(e) = self.db.tracker_issues().insert_one(&issue).await {
-                        tracing::warn!("[{repo_id}] Failed to store tracker issue: {e}");
-                    }
-
-                    created += 1;
-                }
-                Err(e) => {
-                    tracing::warn!(
-                        "[{repo_id}] Failed to create issue for {}: {e}",
-                        finding.fingerprint
-                    );
-                }
-            }
-        }
-
-        tracing::info!("[{repo_id}] Created {created} tracker issues");
-        Ok(())
-    }
-
-    /// Run an incremental scan on a PR diff and post review comments.
-    #[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
-    pub async fn run_pr_review(
-        &self,
-        repo: &TrackedRepository,
-        repo_id: &str,
-        pr_number: u64,
-        base_sha: &str,
-        head_sha: &str,
-    ) -> Result<(), AgentError> {
-        let tracker = match self.build_tracker(repo) {
-            Some(t) => t,
-            None => {
-                tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
-                return Ok(());
-            }
-        };
-        let owner = repo.tracker_owner.as_deref().unwrap_or("");
-        let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
-        if owner.is_empty() || tracker_repo_name.is_empty() {
-            tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
-            return Ok(());
-        }
-
-        // Clone/fetch the repo
-        let creds = GitOps::make_repo_credentials(&self.config, repo);
-        let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
-        let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
-
-        // Get diff between base and head
-        let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
-        if diff_files.is_empty() {
-            tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
-            return Ok(());
-        }
-
-        // Run semgrep on the full repo but we'll filter findings to changed files
-        let changed_paths: std::collections::HashSet<String> =
-            diff_files.iter().map(|f| f.path.clone()).collect();
-
-        let mut pr_findings: Vec<Finding> = Vec::new();
-
-        // SAST scan (semgrep)
-        match SemgrepScanner.scan(&repo_path, repo_id).await {
-            Ok(output) => {
-                for f in output.findings {
-                    if let Some(fp) = &f.file_path {
-                        if changed_paths.contains(fp.as_str()) {
-                            pr_findings.push(f);
-                        }
-                    }
-                }
-            }
-            Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
-        }
-
-        // LLM code review on the diff
-        let reviewer = CodeReviewScanner::new(self.llm.clone());
-        let review_output = reviewer
-            .review_diff(&repo_path, repo_id, base_sha, head_sha)
-            .await;
-        pr_findings.extend(review_output.findings);
-
-        if pr_findings.is_empty() {
-            // Post a clean review
-            if let Err(e) = tracker
-                .create_pr_review(
-                    owner,
-                    tracker_repo_name,
-                    pr_number,
-                    "Compliance scan: no issues found in this PR.",
-                    Vec::new(),
-                )
-                .await
-            {
-                tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
-            }
-            return Ok(());
-        }
-
-        // Build review comments from findings
-        let mut review_comments = Vec::new();
-        for finding in &pr_findings {
-            if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
-                let comment_body = format!(
-                    "**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*",
-                    finding.severity,
-                    finding.title,
-                    finding.description,
-                    finding.scanner,
-                    finding
-                        .cwe
-                        .as_deref()
-                        .map(|c| format!("CWE: {c}"))
-                        .unwrap_or_default(),
-                );
-                review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
-                    path: path.clone(),
-                    line,
-                    body: comment_body,
-                });
-            }
-        }
-
-        let summary = format!(
-            "Compliance scan found **{}** issue(s) in this PR:\n\n{}",
-            pr_findings.len(),
-            pr_findings
-                .iter()
-                .map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
-                .collect::<Vec<_>>()
-                .join("\n"),
-        );
-
-        if let Err(e) = tracker
-            .create_pr_review(
-                owner,
-                tracker_repo_name,
-                pr_number,
-                &summary,
-                review_comments,
-            )
-            .await
-        {
-            tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
-        } else {
-            tracing::info!(
-                "[{repo_id}] Posted PR review on #{pr_number} with {} findings",
-                pr_findings.len()
-            );
-        }
-
-        Ok(())
-    }
-
-    async fn update_phase(&self, scan_run_id: &str, phase: &str) {
+    pub(super) async fn update_phase(&self, scan_run_id: &str, phase: &str) {
        if let Ok(oid) = mongodb::bson::oid::ObjectId::parse_str(scan_run_id) {
            let _ = self
                .db
@@ -917,9 +399,9 @@ impl PipelineOrchestrator {
 }

 /// Extract the scheme + host from a git URL.
-/// e.g. "https://gitea.example.com/owner/repo.git" → "https://gitea.example.com"
-/// e.g. "ssh://git@gitea.example.com:22/owner/repo.git" → "https://gitea.example.com"
-fn extract_base_url(git_url: &str) -> Option<String> {
+/// e.g. "https://gitea.example.com/owner/repo.git" -> "https://gitea.example.com"
+/// e.g. "ssh://git@gitea.example.com:22/owner/repo.git" -> "https://gitea.example.com"
+pub(super) fn extract_base_url(git_url: &str) -> Option<String> {
    if let Some(rest) = git_url.strip_prefix("https://") {
        let host = rest.split('/').next()?;
        Some(format!("https://{host}"))
@@ -927,7 +409,7 @@ fn extract_base_url(git_url: &str) -> Option<String> {
        let host = rest.split('/').next()?;
        Some(format!("http://{host}"))
    } else if let Some(rest) = git_url.strip_prefix("ssh://") {
-        // ssh://git@host:port/path → extract host
+        // ssh://git@host:port/path -> extract host
        let after_at = rest.find('@').map(|i| &rest[i + 1..]).unwrap_or(rest);
        let host = after_at.split(&[':', '/'][..]).next()?;
        Some(format!("https://{host}"))
@@ -940,48 +422,3 @@ fn extract_base_url(git_url: &str) -> Option<String> {
        None
    }
 }
-
-/// Format a finding into a markdown issue body for the tracker.
-fn format_issue_body(finding: &Finding) -> String {
-    let mut body = String::new();
-
-    body.push_str(&format!("## {} Finding\n\n", finding.severity));
-    body.push_str(&format!("**Scanner:** {}\n", finding.scanner));
-    body.push_str(&format!("**Severity:** {}\n", finding.severity));
-
-    if let Some(rule) = &finding.rule_id {
-        body.push_str(&format!("**Rule:** {}\n", rule));
-    }
-    if let Some(cwe) = &finding.cwe {
-        body.push_str(&format!("**CWE:** {}\n", cwe));
-    }
-
-    body.push_str(&format!("\n### Description\n\n{}\n", finding.description));
-
-    if let Some(file_path) = &finding.file_path {
-        body.push_str(&format!("\n### Location\n\n**File:** `{}`", file_path));
-        if let Some(line) = finding.line_number {
-            body.push_str(&format!(" (line {})", line));
-        }
-        body.push('\n');
-    }
-
-    if let Some(snippet) = &finding.code_snippet {
-        body.push_str(&format!("\n### Code\n\n```\n{}\n```\n", snippet));
-    }
-
-    if let Some(remediation) = &finding.remediation {
-        body.push_str(&format!("\n### Remediation\n\n{}\n", remediation));
-    }
-
-    if let Some(fix) = &finding.suggested_fix {
-        body.push_str(&format!("\n### Suggested Fix\n\n```\n{}\n```\n", fix));
-    }
-
-    body.push_str(&format!(
-        "\n---\n*Fingerprint:* `{}`\n*Generated by compliance-scanner*",
-        finding.fingerprint
-    ));
-
-    body
-}
@@ -256,3 +256,159 @@ fn walkdir(path: &Path) -> Result<Vec<walkdir::DirEntry>, CoreError> {

    Ok(entries)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // --- compile_regex tests ---
+
+    #[test]
+    fn compile_regex_valid_pattern() {
+        let re = compile_regex(r"\bfoo\b");
+        assert!(re.is_match("hello foo bar"));
+        assert!(!re.is_match("foobar"));
+    }
+
+    #[test]
+    fn compile_regex_invalid_pattern_returns_fallback() {
+        // An invalid regex should return the fallback "^$" that only matches empty strings
+        let re = compile_regex(r"[invalid");
+        assert!(re.is_match(""));
+        assert!(!re.is_match("anything"));
+    }
+
+    // --- GDPR pattern tests ---
+
+    #[test]
+    fn gdpr_pii_logging_matches() {
+        let scanner = GdprPatternScanner::new();
+        let pattern = &scanner.patterns[0]; // gdpr-pii-logging
+                                            // Regex: (log|print|console\.|logger\.|tracing::)\s*[\.(].*\b(pii_keyword)\b
+        assert!(pattern.pattern.is_match("console.log(email)"));
+        assert!(pattern.pattern.is_match("console.log(user.ssn)"));
+        assert!(pattern.pattern.is_match("print(phone_number)"));
+        assert!(pattern.pattern.is_match("tracing::(ip_addr)"));
+        assert!(pattern.pattern.is_match("log.debug(credit_card)"));
+    }
+
+    #[test]
+    fn gdpr_pii_logging_no_false_positive() {
+        let scanner = GdprPatternScanner::new();
+        let pattern = &scanner.patterns[0];
+        // Regular logging without PII fields should not match
+        assert!(!pattern
+            .pattern
+            .is_match("logger.info(\"request completed\")"));
+        assert!(!pattern.pattern.is_match("let email = user.email;"));
+    }
+
+    #[test]
+    fn gdpr_no_consent_matches() {
+        let scanner = GdprPatternScanner::new();
+        let pattern = &scanner.patterns[1]; // gdpr-no-consent
+        assert!(pattern.pattern.is_match("collect personal data"));
+        assert!(pattern.pattern.is_match("store user_data in db"));
+        assert!(pattern.pattern.is_match("save pii to disk"));
+    }
+
+    #[test]
+    fn gdpr_user_model_matches() {
+        let scanner = GdprPatternScanner::new();
+        let pattern = &scanner.patterns[2]; // gdpr-no-delete-endpoint
+        assert!(pattern.pattern.is_match("struct User {"));
+        assert!(pattern.pattern.is_match("class User(Model):"));
+    }
+
+    #[test]
+    fn gdpr_hardcoded_retention_matches() {
+        let scanner = GdprPatternScanner::new();
+        let pattern = &scanner.patterns[3]; // gdpr-hardcoded-retention
+        assert!(pattern.pattern.is_match("retention = 30"));
+        assert!(pattern.pattern.is_match("ttl: 3600"));
+        assert!(pattern.pattern.is_match("expire = 86400"));
+    }
+
+    // --- OAuth pattern tests ---
+
+    #[test]
+    fn oauth_implicit_grant_matches() {
+        let scanner = OAuthPatternScanner::new();
+        let pattern = &scanner.patterns[0]; // oauth-implicit-grant
+        assert!(pattern.pattern.is_match("response_type = \"token\""));
+        assert!(pattern.pattern.is_match("grant_type: implicit"));
+        assert!(pattern.pattern.is_match("response_type='token'"));
+    }
+
+    #[test]
+    fn oauth_implicit_grant_no_false_positive() {
+        let scanner = OAuthPatternScanner::new();
+        let pattern = &scanner.patterns[0];
+        assert!(!pattern.pattern.is_match("response_type = \"code\""));
+        assert!(!pattern.pattern.is_match("grant_type: authorization_code"));
+    }
+
+    #[test]
+    fn oauth_authorization_code_matches() {
+        let scanner = OAuthPatternScanner::new();
+        let pattern = &scanner.patterns[1]; // oauth-missing-pkce
+        assert!(pattern.pattern.is_match("uses authorization_code flow"));
+        assert!(pattern.pattern.is_match("authorization code grant"));
+    }
+
+    #[test]
+    fn oauth_token_localstorage_matches() {
+        let scanner = OAuthPatternScanner::new();
+        let pattern = &scanner.patterns[2]; // oauth-token-localstorage
+        assert!(pattern
+            .pattern
+            .is_match("localStorage.setItem('access_token', tok)"));
+        assert!(pattern
+            .pattern
+            .is_match("localStorage.getItem(\"refresh_token\")"));
+    }
+
+    #[test]
+    fn oauth_token_localstorage_no_false_positive() {
+        let scanner = OAuthPatternScanner::new();
+        let pattern = &scanner.patterns[2];
+        assert!(!pattern
+            .pattern
+            .is_match("localStorage.setItem('theme', 'dark')"));
+        assert!(!pattern
+            .pattern
+            .is_match("sessionStorage.setItem('token', t)"));
+    }
+
+    #[test]
+    fn oauth_token_url_matches() {
+        let scanner = OAuthPatternScanner::new();
+        let pattern = &scanner.patterns[3]; // oauth-token-url
+        assert!(pattern.pattern.is_match("access_token = build_url(query)"));
+        assert!(pattern.pattern.is_match("bearer = url.param"));
+    }
+
+    // --- Pattern rule file extension filtering ---
+
+    #[test]
+    fn gdpr_patterns_cover_common_languages() {
+        let scanner = GdprPatternScanner::new();
+        for pattern in &scanner.patterns {
+            assert!(
+                pattern.file_extensions.contains(&"rs".to_string()),
+                "Pattern {} should cover .rs files",
+                pattern.id
+            );
+        }
+    }
+
+    #[test]
+    fn oauth_localstorage_only_js_ts() {
+        let scanner = OAuthPatternScanner::new();
+        let pattern = &scanner.patterns[2]; // oauth-token-localstorage
+        assert!(pattern.file_extensions.contains(&"js".to_string()));
+        assert!(pattern.file_extensions.contains(&"ts".to_string()));
+        assert!(!pattern.file_extensions.contains(&"rs".to_string()));
+        assert!(!pattern.file_extensions.contains(&"py".to_string()));
+    }
+}
@@ -0,0 +1,146 @@
+use compliance_core::models::*;
+
+use super::orchestrator::PipelineOrchestrator;
+use crate::error::AgentError;
+use crate::pipeline::code_review::CodeReviewScanner;
+use crate::pipeline::git::GitOps;
+use crate::pipeline::semgrep::SemgrepScanner;
+
+use compliance_core::traits::Scanner;
+
+impl PipelineOrchestrator {
+    /// Run an incremental scan on a PR diff and post review comments.
+    #[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
+    pub async fn run_pr_review(
+        &self,
+        repo: &TrackedRepository,
+        repo_id: &str,
+        pr_number: u64,
+        base_sha: &str,
+        head_sha: &str,
+    ) -> Result<(), AgentError> {
+        let tracker = match self.build_tracker(repo) {
+            Some(t) => t,
+            None => {
+                tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
+                return Ok(());
+            }
+        };
+        let owner = repo.tracker_owner.as_deref().unwrap_or("");
+        let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
+        if owner.is_empty() || tracker_repo_name.is_empty() {
+            tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
+            return Ok(());
+        }
+
+        // Clone/fetch the repo
+        let creds = GitOps::make_repo_credentials(&self.config, repo);
+        let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
+        let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
+
+        // Get diff between base and head
+        let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
+        if diff_files.is_empty() {
+            tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
+            return Ok(());
+        }
+
+        // Run semgrep on the full repo but we'll filter findings to changed files
+        let changed_paths: std::collections::HashSet<String> =
+            diff_files.iter().map(|f| f.path.clone()).collect();
+
+        let mut pr_findings: Vec<Finding> = Vec::new();
+
+        // SAST scan (semgrep)
+        match SemgrepScanner.scan(&repo_path, repo_id).await {
+            Ok(output) => {
+                for f in output.findings {
+                    if let Some(fp) = &f.file_path {
+                        if changed_paths.contains(fp.as_str()) {
+                            pr_findings.push(f);
+                        }
+                    }
+                }
+            }
+            Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
+        }
+
+        // LLM code review on the diff
+        let reviewer = CodeReviewScanner::new(self.llm.clone());
+        let review_output = reviewer
+            .review_diff(&repo_path, repo_id, base_sha, head_sha)
+            .await;
+        pr_findings.extend(review_output.findings);
+
+        if pr_findings.is_empty() {
+            // Post a clean review
+            if let Err(e) = tracker
+                .create_pr_review(
+                    owner,
+                    tracker_repo_name,
+                    pr_number,
+                    "Compliance scan: no issues found in this PR.",
+                    Vec::new(),
+                )
+                .await
+            {
+                tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
+            }
+            return Ok(());
+        }
+
+        // Build review comments from findings
+        let mut review_comments = Vec::new();
+        for finding in &pr_findings {
+            if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
+                let comment_body = format!(
+                    "**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*",
+                    finding.severity,
+                    finding.title,
+                    finding.description,
+                    finding.scanner,
+                    finding
+                        .cwe
+                        .as_deref()
+                        .map(|c| format!("CWE: {c}"))
+                        .unwrap_or_default(),
+                );
+                review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
+                    path: path.clone(),
+                    line,
+                    body: comment_body,
+                });
+            }
+        }
+
+        let summary = format!(
+            "Compliance scan found **{}** issue(s) in this PR:\n\n{}",
+            pr_findings.len(),
+            pr_findings
+                .iter()
+                .map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
+                .collect::<Vec<_>>()
+                .join("\n"),
+        );
+
+        if let Err(e) = tracker
+            .create_pr_review(
+                owner,
+                tracker_repo_name,
+                pr_number,
+                &summary,
+                review_comments,
+            )
+            .await
+        {
+            tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
+        } else {
+            tracing::info!(
+                "[{repo_id}] Posted PR review on #{pr_number} with {} findings",
+                pr_findings.len()
+            );
+        }
+
+        Ok(())
+    }
+}
@@ -0,0 +1,72 @@
+use std::path::Path;
+
+use compliance_core::CoreError;
+
+pub(super) struct AuditVuln {
+    pub package: String,
+    pub id: String,
+    pub url: String,
+}
+
+#[tracing::instrument(skip_all)]
+pub(super) async fn run_cargo_audit(
+    repo_path: &Path,
+    _repo_id: &str,
+) -> Result<Vec<AuditVuln>, CoreError> {
+    let cargo_lock = repo_path.join("Cargo.lock");
+    if !cargo_lock.exists() {
+        return Ok(Vec::new());
+    }
+
+    let output = tokio::process::Command::new("cargo")
+        .args(["audit", "--json"])
+        .current_dir(repo_path)
+        .env("RUSTC_WRAPPER", "")
+        .output()
+        .await
+        .map_err(|e| CoreError::Scanner {
+            scanner: "cargo-audit".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let result: CargoAuditOutput =
+        serde_json::from_slice(&output.stdout).unwrap_or_else(|_| CargoAuditOutput {
+            vulnerabilities: CargoAuditVulns { list: Vec::new() },
+        });
+
+    let vulns = result
+        .vulnerabilities
+        .list
+        .into_iter()
+        .map(|v| AuditVuln {
+            package: v.advisory.package,
+            id: v.advisory.id,
+            url: v.advisory.url,
+        })
+        .collect();
+
+    Ok(vulns)
+}
+
+// Cargo audit types
+#[derive(serde::Deserialize)]
+struct CargoAuditOutput {
+    vulnerabilities: CargoAuditVulns,
+}
+
+#[derive(serde::Deserialize)]
+struct CargoAuditVulns {
+    list: Vec<CargoAuditEntry>,
+}
+
+#[derive(serde::Deserialize)]
+struct CargoAuditEntry {
+    advisory: CargoAuditAdvisory,
+}
+
+#[derive(serde::Deserialize)]
+struct CargoAuditAdvisory {
+    id: String,
+    package: String,
+    url: String,
+}
@@ -1,3 +1,6 @@
+mod cargo_audit;
+mod syft;
+
 use std::path::Path;

 use compliance_core::models::{SbomEntry, ScanType, VulnRef};
@@ -23,7 +26,7 @@ impl Scanner for SbomScanner {
        generate_lockfiles(repo_path).await;

        // Run syft for SBOM generation
-        match run_syft(repo_path, repo_id).await {
+        match syft::run_syft(repo_path, repo_id).await {
            Ok(syft_entries) => entries.extend(syft_entries),
            Err(e) => tracing::warn!("syft failed: {e}"),
        }
@@ -32,7 +35,7 @@ impl Scanner for SbomScanner {
        enrich_cargo_licenses(repo_path, &mut entries).await;

        // Run cargo-audit for Rust-specific vulns
-        match run_cargo_audit(repo_path, repo_id).await {
+        match cargo_audit::run_cargo_audit(repo_path, repo_id).await {
            Ok(vulns) => merge_audit_vulns(&mut entries, vulns),
            Err(e) => tracing::warn!("cargo-audit skipped: {e}"),
        }
@@ -186,95 +189,7 @@ async fn enrich_cargo_licenses(repo_path: &Path, entries: &mut [SbomEntry]) {
    }
 }

-#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
-async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
-    let output = tokio::process::Command::new("syft")
-        .arg(repo_path)
-        .args(["-o", "cyclonedx-json"])
-        // Enable remote license lookups for all ecosystems
-        .env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true")
-        .env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true")
-        .env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true")
-        .env("SYFT_JAVA_USE_NETWORK", "true")
-        .output()
-        .await
-        .map_err(|e| CoreError::Scanner {
-            scanner: "syft".to_string(),
-            source: Box::new(e),
-        })?;
-
-    if !output.status.success() {
-        let stderr = String::from_utf8_lossy(&output.stderr);
-        return Err(CoreError::Scanner {
-            scanner: "syft".to_string(),
-            source: format!("syft exited with {}: {stderr}", output.status).into(),
-        });
-    }
-
-    let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?;
-    let entries = cdx
-        .components
-        .unwrap_or_default()
-        .into_iter()
-        .map(|c| {
-            let package_manager = c
-                .purl
-                .as_deref()
-                .and_then(extract_ecosystem_from_purl)
-                .unwrap_or_else(|| "unknown".to_string());
-            let mut entry = SbomEntry::new(
-                repo_id.to_string(),
-                c.name,
-                c.version.unwrap_or_else(|| "unknown".to_string()),
-                package_manager,
-            );
-            entry.purl = c.purl;
-            entry.license = c.licenses.and_then(|ls| extract_license(&ls));
-            entry
-        })
-        .collect();
-
-    Ok(entries)
-}
-
-#[tracing::instrument(skip_all)]
-async fn run_cargo_audit(repo_path: &Path, _repo_id: &str) -> Result<Vec<AuditVuln>, CoreError> {
-    let cargo_lock = repo_path.join("Cargo.lock");
-    if !cargo_lock.exists() {
-        return Ok(Vec::new());
-    }
-
-    let output = tokio::process::Command::new("cargo")
-        .args(["audit", "--json"])
-        .current_dir(repo_path)
-        .env("RUSTC_WRAPPER", "")
-        .output()
-        .await
-        .map_err(|e| CoreError::Scanner {
-            scanner: "cargo-audit".to_string(),
-            source: Box::new(e),
-        })?;
-
-    let result: CargoAuditOutput =
-        serde_json::from_slice(&output.stdout).unwrap_or_else(|_| CargoAuditOutput {
-            vulnerabilities: CargoAuditVulns { list: Vec::new() },
-        });
-
-    let vulns = result
-        .vulnerabilities
-        .list
-        .into_iter()
-        .map(|v| AuditVuln {
-            package: v.advisory.package,
-            id: v.advisory.id,
-            url: v.advisory.url,
-        })
-        .collect();
-
-    Ok(vulns)
-}
-
-fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<AuditVuln>) {
+fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<cargo_audit::AuditVuln>) {
    for vuln in vulns {
        if let Some(entry) = entries.iter_mut().find(|e| e.name == vuln.package) {
            entry.known_vulnerabilities.push(VulnRef {
@@ -287,65 +202,6 @@ fn merge_audit_vulns(entries: &mut [SbomEntry], vulns: Vec<AuditVuln>) {
    }
 }

-// CycloneDX JSON types
-#[derive(serde::Deserialize)]
-struct CycloneDxBom {
-    components: Option<Vec<CdxComponent>>,
-}
-
-#[derive(serde::Deserialize)]
-struct CdxComponent {
-    name: String,
-    version: Option<String>,
-    #[serde(rename = "type")]
-    #[allow(dead_code)]
-    component_type: Option<String>,
-    purl: Option<String>,
-    licenses: Option<Vec<CdxLicenseWrapper>>,
-}
-
-#[derive(serde::Deserialize)]
-struct CdxLicenseWrapper {
-    license: Option<CdxLicense>,
-    /// SPDX license expression (e.g. "MIT OR Apache-2.0")
-    expression: Option<String>,
-}
-
-#[derive(serde::Deserialize)]
-struct CdxLicense {
-    id: Option<String>,
-    name: Option<String>,
-}
-
-// Cargo audit types
-#[derive(serde::Deserialize)]
-struct CargoAuditOutput {
-    vulnerabilities: CargoAuditVulns,
-}
-
-#[derive(serde::Deserialize)]
-struct CargoAuditVulns {
-    list: Vec<CargoAuditEntry>,
-}
-
-#[derive(serde::Deserialize)]
-struct CargoAuditEntry {
-    advisory: CargoAuditAdvisory,
-}
-
-#[derive(serde::Deserialize)]
-struct CargoAuditAdvisory {
-    id: String,
-    package: String,
-    url: String,
-}
-
-struct AuditVuln {
-    package: String,
-    id: String,
-    url: String,
-}
-
 // Cargo metadata types
 #[derive(serde::Deserialize)]
 struct CargoMetadata {
@@ -358,49 +214,3 @@ struct CargoPackage {
    version: String,
    license: Option<String>,
 }
-
-/// Extract the best license string from CycloneDX license entries.
-/// Handles three formats: expression ("MIT OR Apache-2.0"), license.id ("MIT"), license.name ("MIT License").
-fn extract_license(entries: &[CdxLicenseWrapper]) -> Option<String> {
-    // First pass: look for SPDX expressions (most precise for dual-licensed packages)
-    for entry in entries {
-        if let Some(ref expr) = entry.expression {
-            if !expr.is_empty() {
-                return Some(expr.clone());
-            }
-        }
-    }
-    // Second pass: collect license.id or license.name from all entries
-    let parts: Vec<String> = entries
-        .iter()
-        .filter_map(|e| {
-            e.license.as_ref().and_then(|lic| {
-                lic.id
-                    .clone()
-                    .or_else(|| lic.name.clone())
-                    .filter(|s| !s.is_empty())
-            })
-        })
-        .collect();
-    if parts.is_empty() {
-        return None;
-    }
-    Some(parts.join(" OR "))
-}
-
-/// Extract the ecosystem/package-manager from a PURL string.
-/// e.g. "pkg:npm/lodash@4.17.21" → "npm", "pkg:cargo/serde@1.0" → "cargo"
-fn extract_ecosystem_from_purl(purl: &str) -> Option<String> {
-    let rest = purl.strip_prefix("pkg:")?;
-    let ecosystem = rest.split('/').next()?;
-    if ecosystem.is_empty() {
-        return None;
-    }
-    // Normalise common PURL types to user-friendly names
-    let normalised = match ecosystem {
-        "golang" => "go",
-        "pypi" => "pip",
-        _ => ecosystem,
-    };
-    Some(normalised.to_string())
-}
@@ -0,0 +1,355 @@
+use std::path::Path;
+
+use compliance_core::models::SbomEntry;
+use compliance_core::CoreError;
+
+#[tracing::instrument(skip_all, fields(repo_id = %repo_id))]
+pub(super) async fn run_syft(repo_path: &Path, repo_id: &str) -> Result<Vec<SbomEntry>, CoreError> {
+    let output = tokio::process::Command::new("syft")
+        .arg(repo_path)
+        .args(["-o", "cyclonedx-json"])
+        // Enable remote license lookups for all ecosystems
+        .env("SYFT_GOLANG_SEARCH_REMOTE_LICENSES", "true")
+        .env("SYFT_JAVASCRIPT_SEARCH_REMOTE_LICENSES", "true")
+        .env("SYFT_PYTHON_SEARCH_REMOTE_LICENSES", "true")
+        .env("SYFT_JAVA_USE_NETWORK", "true")
+        .output()
+        .await
+        .map_err(|e| CoreError::Scanner {
+            scanner: "syft".to_string(),
+            source: Box::new(e),
+        })?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(CoreError::Scanner {
+            scanner: "syft".to_string(),
+            source: format!("syft exited with {}: {stderr}", output.status).into(),
+        });
+    }
+
+    let cdx: CycloneDxBom = serde_json::from_slice(&output.stdout)?;
+    let entries = cdx
+        .components
+        .unwrap_or_default()
+        .into_iter()
+        .map(|c| {
+            let package_manager = c
+                .purl
+                .as_deref()
+                .and_then(extract_ecosystem_from_purl)
+                .unwrap_or_else(|| "unknown".to_string());
+            let mut entry = SbomEntry::new(
+                repo_id.to_string(),
+                c.name,
+                c.version.unwrap_or_else(|| "unknown".to_string()),
+                package_manager,
+            );
+            entry.purl = c.purl;
+            entry.license = c.licenses.and_then(|ls| extract_license(&ls));
+            entry
+        })
+        .collect();
+
+    Ok(entries)
+}
+
+// CycloneDX JSON types
+#[derive(serde::Deserialize)]
+struct CycloneDxBom {
+    components: Option<Vec<CdxComponent>>,
+}
+
+#[derive(serde::Deserialize)]
+struct CdxComponent {
+    name: String,
+    version: Option<String>,
+    #[serde(rename = "type")]
+    #[allow(dead_code)]
+    component_type: Option<String>,
+    purl: Option<String>,
+    licenses: Option<Vec<CdxLicenseWrapper>>,
+}
+
+#[derive(serde::Deserialize)]
+struct CdxLicenseWrapper {
+    license: Option<CdxLicense>,
+    /// SPDX license expression (e.g. "MIT OR Apache-2.0")
+    expression: Option<String>,
+}
+
+#[derive(serde::Deserialize)]
+struct CdxLicense {
+    id: Option<String>,
+    name: Option<String>,
+}
+
+/// Extract the best license string from CycloneDX license entries.
+/// Handles three formats: expression ("MIT OR Apache-2.0"), license.id ("MIT"), license.name ("MIT License").
+fn extract_license(entries: &[CdxLicenseWrapper]) -> Option<String> {
+    // First pass: look for SPDX expressions (most precise for dual-licensed packages)
+    for entry in entries {
+        if let Some(ref expr) = entry.expression {
+            if !expr.is_empty() {
+                return Some(expr.clone());
+            }
+        }
+    }
+    // Second pass: collect license.id or license.name from all entries
+    let parts: Vec<String> = entries
+        .iter()
+        .filter_map(|e| {
+            e.license.as_ref().and_then(|lic| {
+                lic.id
+                    .clone()
+                    .or_else(|| lic.name.clone())
+                    .filter(|s| !s.is_empty())
+            })
+        })
+        .collect();
+    if parts.is_empty() {
+        return None;
+    }
+    Some(parts.join(" OR "))
+}
+
+/// Extract the ecosystem/package-manager from a PURL string.
+/// e.g. "pkg:npm/lodash@4.17.21" -> "npm", "pkg:cargo/serde@1.0" -> "cargo"
+fn extract_ecosystem_from_purl(purl: &str) -> Option<String> {
+    let rest = purl.strip_prefix("pkg:")?;
+    let ecosystem = rest.split('/').next()?;
+    if ecosystem.is_empty() {
+        return None;
+    }
+    // Normalise common PURL types to user-friendly names
+    let normalised = match ecosystem {
+        "golang" => "go",
+        "pypi" => "pip",
+        _ => ecosystem,
+    };
+    Some(normalised.to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // --- extract_ecosystem_from_purl tests ---
+
+    #[test]
+    fn purl_npm() {
+        assert_eq!(
+            extract_ecosystem_from_purl("pkg:npm/lodash@4.17.21"),
+            Some("npm".to_string())
+        );
+    }
+
+    #[test]
+    fn purl_cargo() {
+        assert_eq!(
+            extract_ecosystem_from_purl("pkg:cargo/serde@1.0.197"),
+            Some("cargo".to_string())
+        );
+    }
+
+    #[test]
+    fn purl_golang_normalised() {
+        assert_eq!(
+            extract_ecosystem_from_purl("pkg:golang/github.com/gin-gonic/gin@1.9.1"),
+            Some("go".to_string())
+        );
+    }
+
+    #[test]
+    fn purl_pypi_normalised() {
+        assert_eq!(
+            extract_ecosystem_from_purl("pkg:pypi/requests@2.31.0"),
+            Some("pip".to_string())
+        );
+    }
+
+    #[test]
+    fn purl_maven() {
+        assert_eq!(
+            extract_ecosystem_from_purl("pkg:maven/org.apache.commons/commons-lang3@3.14.0"),
+            Some("maven".to_string())
+        );
+    }
+
+    #[test]
+    fn purl_missing_prefix() {
+        assert_eq!(extract_ecosystem_from_purl("npm/lodash@4.17.21"), None);
+    }
+
+    #[test]
+    fn purl_empty_ecosystem() {
+        assert_eq!(extract_ecosystem_from_purl("pkg:/lodash@4.17.21"), None);
+    }
+
+    #[test]
+    fn purl_empty_string() {
+        assert_eq!(extract_ecosystem_from_purl(""), None);
+    }
+
+    #[test]
+    fn purl_just_prefix() {
+        assert_eq!(extract_ecosystem_from_purl("pkg:"), None);
+    }
+
+    // --- extract_license tests ---
+
+    #[test]
+    fn license_from_expression() {
+        let entries = vec![CdxLicenseWrapper {
+            license: None,
+            expression: Some("MIT OR Apache-2.0".to_string()),
+        }];
+        assert_eq!(
+            extract_license(&entries),
+            Some("MIT OR Apache-2.0".to_string())
+        );
+    }
+
+    #[test]
+    fn license_from_id() {
+        let entries = vec![CdxLicenseWrapper {
+            license: Some(CdxLicense {
+                id: Some("MIT".to_string()),
+                name: None,
+            }),
+            expression: None,
+        }];
+        assert_eq!(extract_license(&entries), Some("MIT".to_string()));
+    }
+
+    #[test]
+    fn license_from_name_fallback() {
+        let entries = vec![CdxLicenseWrapper {
+            license: Some(CdxLicense {
+                id: None,
+                name: Some("MIT License".to_string()),
+            }),
+            expression: None,
+        }];
+        assert_eq!(extract_license(&entries), Some("MIT License".to_string()));
+    }
+
+    #[test]
+    fn license_expression_preferred_over_id() {
+        let entries = vec![
+            CdxLicenseWrapper {
+                license: Some(CdxLicense {
+                    id: Some("MIT".to_string()),
+                    name: None,
+                }),
+                expression: None,
+            },
+            CdxLicenseWrapper {
+                license: None,
+                expression: Some("MIT AND Apache-2.0".to_string()),
+            },
+        ];
+        // Expression should be preferred (first pass finds it)
+        assert_eq!(
+            extract_license(&entries),
+            Some("MIT AND Apache-2.0".to_string())
+        );
+    }
+
+    #[test]
+    fn license_multiple_ids_joined() {
+        let entries = vec![
+            CdxLicenseWrapper {
+                license: Some(CdxLicense {
+                    id: Some("MIT".to_string()),
+                    name: None,
+                }),
+                expression: None,
+            },
+            CdxLicenseWrapper {
+                license: Some(CdxLicense {
+                    id: Some("Apache-2.0".to_string()),
+                    name: None,
+                }),
+                expression: None,
+            },
+        ];
+        assert_eq!(
+            extract_license(&entries),
+            Some("MIT OR Apache-2.0".to_string())
+        );
+    }
+
+    #[test]
+    fn license_empty_entries() {
+        let entries: Vec<CdxLicenseWrapper> = vec![];
+        assert_eq!(extract_license(&entries), None);
+    }
+
+    #[test]
+    fn license_all_empty_strings() {
+        let entries = vec![CdxLicenseWrapper {
+            license: Some(CdxLicense {
+                id: Some(String::new()),
+                name: Some(String::new()),
+            }),
+            expression: Some(String::new()),
+        }];
+        assert_eq!(extract_license(&entries), None);
+    }
+
+    #[test]
+    fn license_none_fields() {
+        let entries = vec![CdxLicenseWrapper {
+            license: None,
+            expression: None,
+        }];
+        assert_eq!(extract_license(&entries), None);
+    }
+
+    // --- CycloneDX deserialization tests ---
+
+    #[test]
+    fn deserialize_cyclonedx_bom() {
+        let json = r#"{
+            "components": [
+                {
+                    "name": "serde",
+                    "version": "1.0.197",
+                    "type": "library",
+                    "purl": "pkg:cargo/serde@1.0.197",
+                    "licenses": [
+                        {"expression": "MIT OR Apache-2.0"}
+                    ]
+                }
+            ]
+        }"#;
+        let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
+        let components = bom.components.unwrap();
+        assert_eq!(components.len(), 1);
+        assert_eq!(components[0].name, "serde");
+        assert_eq!(components[0].version, Some("1.0.197".to_string()));
+        assert_eq!(
+            components[0].purl,
+            Some("pkg:cargo/serde@1.0.197".to_string())
+        );
+    }
+
+    #[test]
+    fn deserialize_cyclonedx_no_components() {
+        let json = r#"{}"#;
+        let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
+        assert!(bom.components.is_none());
+    }
+
+    #[test]
+    fn deserialize_cyclonedx_minimal_component() {
+        let json = r#"{"components": [{"name": "foo"}]}"#;
+        let bom: CycloneDxBom = serde_json::from_str(json).unwrap();
+        let c = &bom.components.unwrap()[0];
+        assert_eq!(c.name, "foo");
+        assert!(c.version.is_none());
+        assert!(c.purl.is_none());
+        assert!(c.licenses.is_none());
+    }
+}
@@ -108,3 +108,124 @@ struct SemgrepExtra {
    #[serde(default)]
    metadata: Option<serde_json::Value>,
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn deserialize_semgrep_output() {
+        let json = r#"{
+            "results": [
+                {
+                    "check_id": "python.lang.security.audit.exec-detected",
+                    "path": "src/main.py",
+                    "start": {"line": 15},
+                    "extra": {
+                        "message": "Detected use of exec()",
+                        "severity": "ERROR",
+                        "lines": "exec(user_input)",
+                        "metadata": {"cwe": "CWE-78"}
+                    }
+                }
+            ]
+        }"#;
+        let output: SemgrepOutput = serde_json::from_str(json).unwrap();
+        assert_eq!(output.results.len(), 1);
+
+        let r = &output.results[0];
+        assert_eq!(r.check_id, "python.lang.security.audit.exec-detected");
+        assert_eq!(r.path, "src/main.py");
+        assert_eq!(r.start.line, 15);
+        assert_eq!(r.extra.message, "Detected use of exec()");
+        assert_eq!(r.extra.severity, "ERROR");
+        assert_eq!(r.extra.lines, "exec(user_input)");
+        assert_eq!(
+            r.extra
+                .metadata
+                .as_ref()
+                .unwrap()
+                .get("cwe")
+                .unwrap()
+                .as_str(),
+            Some("CWE-78")
+        );
+    }
+
+    #[test]
+    fn deserialize_semgrep_empty_results() {
+        let json = r#"{"results": []}"#;
+        let output: SemgrepOutput = serde_json::from_str(json).unwrap();
+        assert!(output.results.is_empty());
+    }
+
+    #[test]
+    fn deserialize_semgrep_no_metadata() {
+        let json = r#"{
+            "results": [
+                {
+                    "check_id": "rule-1",
+                    "path": "app.py",
+                    "start": {"line": 1},
+                    "extra": {
+                        "message": "found something",
+                        "severity": "WARNING",
+                        "lines": "import os"
+                    }
+                }
+            ]
+        }"#;
+        let output: SemgrepOutput = serde_json::from_str(json).unwrap();
+        assert!(output.results[0].extra.metadata.is_none());
+    }
+
+    #[test]
+    fn semgrep_severity_mapping() {
+        let cases = vec![
+            ("ERROR", "High"),
+            ("WARNING", "Medium"),
+            ("INFO", "Low"),
+            ("UNKNOWN", "Info"),
+        ];
+        for (input, expected) in cases {
+            let result = match input {
+                "ERROR" => "High",
+                "WARNING" => "Medium",
+                "INFO" => "Low",
+                _ => "Info",
+            };
+            assert_eq!(result, expected, "Severity for '{input}'");
+        }
+    }
+
+    #[test]
+    fn deserialize_semgrep_multiple_results() {
+        let json = r#"{
+            "results": [
+                {
+                    "check_id": "rule-a",
+                    "path": "a.py",
+                    "start": {"line": 1},
+                    "extra": {
+                        "message": "msg a",
+                        "severity": "ERROR",
+                        "lines": "line a"
+                    }
+                },
+                {
+                    "check_id": "rule-b",
+                    "path": "b.py",
+                    "start": {"line": 99},
+                    "extra": {
+                        "message": "msg b",
+                        "severity": "INFO",
+                        "lines": "line b"
+                    }
+                }
+            ]
+        }"#;
+        let output: SemgrepOutput = serde_json::from_str(json).unwrap();
+        assert_eq!(output.results.len(), 2);
+        assert_eq!(output.results[1].start.line, 99);
+    }
+}
@@ -0,0 +1,81 @@
+use compliance_core::models::TrackerIssue;
+use compliance_core::traits::issue_tracker::IssueTracker;
+
+use crate::trackers;
+
+/// Enum dispatch for issue trackers (async traits aren't dyn-compatible).
+pub(crate) enum TrackerDispatch {
+    GitHub(trackers::github::GitHubTracker),
+    GitLab(trackers::gitlab::GitLabTracker),
+    Gitea(trackers::gitea::GiteaTracker),
+    Jira(trackers::jira::JiraTracker),
+}
+
+impl TrackerDispatch {
+    pub(crate) fn name(&self) -> &str {
+        match self {
+            Self::GitHub(t) => t.name(),
+            Self::GitLab(t) => t.name(),
+            Self::Gitea(t) => t.name(),
+            Self::Jira(t) => t.name(),
+        }
+    }
+
+    pub(crate) async fn create_issue(
+        &self,
+        owner: &str,
+        repo: &str,
+        title: &str,
+        body: &str,
+        labels: &[String],
+    ) -> Result<TrackerIssue, compliance_core::error::CoreError> {
+        match self {
+            Self::GitHub(t) => t.create_issue(owner, repo, title, body, labels).await,
+            Self::GitLab(t) => t.create_issue(owner, repo, title, body, labels).await,
+            Self::Gitea(t) => t.create_issue(owner, repo, title, body, labels).await,
+            Self::Jira(t) => t.create_issue(owner, repo, title, body, labels).await,
+        }
+    }
+
+    pub(crate) async fn find_existing_issue(
+        &self,
+        owner: &str,
+        repo: &str,
+        fingerprint: &str,
+    ) -> Result<Option<TrackerIssue>, compliance_core::error::CoreError> {
+        match self {
+            Self::GitHub(t) => t.find_existing_issue(owner, repo, fingerprint).await,
+            Self::GitLab(t) => t.find_existing_issue(owner, repo, fingerprint).await,
+            Self::Gitea(t) => t.find_existing_issue(owner, repo, fingerprint).await,
+            Self::Jira(t) => t.find_existing_issue(owner, repo, fingerprint).await,
+        }
+    }
+
+    pub(crate) async fn create_pr_review(
+        &self,
+        owner: &str,
+        repo: &str,
+        pr_number: u64,
+        body: &str,
+        comments: Vec<compliance_core::traits::issue_tracker::ReviewComment>,
+    ) -> Result<(), compliance_core::error::CoreError> {
+        match self {
+            Self::GitHub(t) => {
+                t.create_pr_review(owner, repo, pr_number, body, comments)
+                    .await
+            }
+            Self::GitLab(t) => {
+                t.create_pr_review(owner, repo, pr_number, body, comments)
+                    .await
+            }
+            Self::Gitea(t) => {
+                t.create_pr_review(owner, repo, pr_number, body, comments)
+                    .await
+            }
+            Self::Jira(t) => {
+                t.create_pr_review(owner, repo, pr_number, body, comments)
+                    .await
+            }
+        }
+    }
+}