feat: findings refinement, new scanners, and deployment tooling (#6)

2026-03-09 12:53:12 +00:00
parent 32e5fc21e7
commit 46bf9de549
40 changed files with 2048 additions and 118 deletions
--- a/compliance-agent/src/pipeline/code_review.rs
+++ b/compliance-agent/src/pipeline/code_review.rs
@@ -0,0 +1,186 @@
+use std::path::Path;
+use std::sync::Arc;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::traits::ScanOutput;
+
+use crate::llm::review_prompts::REVIEW_PASSES;
+use crate::llm::LlmClient;
+use crate::pipeline::dedup;
+use crate::pipeline::git::{DiffFile, GitOps};
+
+pub struct CodeReviewScanner {
+    llm: Arc<LlmClient>,
+}
+
+impl CodeReviewScanner {
+    pub fn new(llm: Arc<LlmClient>) -> Self {
+        Self { llm }
+    }
+
+    /// Run multi-pass LLM code review on the diff between old and new commits.
+    pub async fn review_diff(
+        &self,
+        repo_path: &Path,
+        repo_id: &str,
+        old_sha: &str,
+        new_sha: &str,
+    ) -> ScanOutput {
+        let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
+            Ok(files) => files,
+            Err(e) => {
+                tracing::warn!("Failed to extract diff for code review: {e}");
+                return ScanOutput::default();
+            }
+        };
+
+        if diff_files.is_empty() {
+            return ScanOutput::default();
+        }
+
+        let mut all_findings = Vec::new();
+
+        // Chunk diff files into groups to avoid exceeding context limits
+        let chunks = chunk_diff_files(&diff_files, 8000);
+
+        for (pass_name, system_prompt) in REVIEW_PASSES {
+            for chunk in &chunks {
+                let user_prompt = format!(
+                    "Review the following code changes:\n\n{}",
+                    chunk
+                        .iter()
+                        .map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
+                        .collect::<Vec<_>>()
+                        .join("\n\n")
+                );
+
+                match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
+                    Ok(response) => {
+                        let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
+                        all_findings.extend(parsed);
+                    }
+                    Err(e) => {
+                        tracing::warn!("Code review pass '{pass_name}' failed: {e}");
+                    }
+                }
+            }
+        }
+
+        ScanOutput {
+            findings: all_findings,
+            sbom_entries: Vec::new(),
+        }
+    }
+}
+
+/// Group diff files into chunks that fit within a token budget (rough char estimate)
+fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
+    let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
+    let mut current_chunk: Vec<&DiffFile> = Vec::new();
+    let mut current_size = 0;
+
+    for file in files {
+        if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
+            chunks.push(std::mem::take(&mut current_chunk));
+            current_size = 0;
+        }
+        current_chunk.push(file);
+        current_size += file.hunks.len();
+    }
+
+    if !current_chunk.is_empty() {
+        chunks.push(current_chunk);
+    }
+
+    chunks
+}
+
+fn parse_review_response(
+    response: &str,
+    pass_name: &str,
+    repo_id: &str,
+    chunk: &[&DiffFile],
+) -> Vec<Finding> {
+    let cleaned = response.trim();
+    let cleaned = if cleaned.starts_with("```") {
+        cleaned
+            .trim_start_matches("```json")
+            .trim_start_matches("```")
+            .trim_end_matches("```")
+            .trim()
+    } else {
+        cleaned
+    };
+
+    let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
+        Ok(v) => v,
+        Err(_) => {
+            if cleaned != "[]" {
+                tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
+            }
+            return Vec::new();
+        }
+    };
+
+    issues
+        .into_iter()
+        .filter(|issue| {
+            // Verify the file exists in the diff chunk
+            chunk.iter().any(|f| f.path == issue.file)
+        })
+        .map(|issue| {
+            let severity = match issue.severity.as_str() {
+                "critical" => Severity::Critical,
+                "high" => Severity::High,
+                "medium" => Severity::Medium,
+                "low" => Severity::Low,
+                _ => Severity::Info,
+            };
+
+            let fingerprint = dedup::compute_fingerprint(&[
+                repo_id,
+                "code-review",
+                pass_name,
+                &issue.file,
+                &issue.line.to_string(),
+                &issue.title,
+            ]);
+
+            let description = if let Some(suggestion) = &issue.suggestion {
+                format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
+            } else {
+                issue.description.clone()
+            };
+
+            let mut finding = Finding::new(
+                repo_id.to_string(),
+                fingerprint,
+                format!("code-review/{pass_name}"),
+                ScanType::CodeReview,
+                issue.title,
+                description,
+                severity,
+            );
+            finding.rule_id = Some(format!("review/{pass_name}"));
+            finding.file_path = Some(issue.file);
+            finding.line_number = Some(issue.line);
+            finding.cwe = issue.cwe;
+            finding.suggested_fix = issue.suggestion;
+            finding
+        })
+        .collect()
+}
+
+#[derive(serde::Deserialize)]
+struct ReviewIssue {
+    title: String,
+    description: String,
+    severity: String,
+    file: String,
+    #[serde(default)]
+    line: u32,
+    #[serde(default)]
+    cwe: Option<String>,
+    #[serde(default)]
+    suggestion: Option<String>,
+}
--- a/compliance-agent/src/pipeline/cve.rs
+++ b/compliance-agent/src/pipeline/cve.rs
@@ -64,6 +64,8 @@ impl CveScanner {
    }

    async fn query_osv_batch(&self, entries: &[SbomEntry]) -> Result<Vec<Vec<OsvVuln>>, CoreError> {
+        const OSV_BATCH_SIZE: usize = 500;
+
        let queries: Vec<_> = entries
            .iter()
            .filter_map(|e| {
@@ -79,32 +81,34 @@ impl CveScanner {
            return Ok(Vec::new());
        }

-        let body = serde_json::json!({ "queries": queries });
+        let mut all_vulns: Vec<Vec<OsvVuln>> = Vec::with_capacity(queries.len());

-        let resp = self
-            .http
-            .post("https://api.osv.dev/v1/querybatch")
-            .json(&body)
-            .send()
-            .await
-            .map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
+        for chunk in queries.chunks(OSV_BATCH_SIZE) {
+            let body = serde_json::json!({ "queries": chunk });

-        if !resp.status().is_success() {
-            let status = resp.status();
-            let body = resp.text().await.unwrap_or_default();
-            tracing::warn!("OSV.dev returned {status}: {body}");
-            return Ok(Vec::new());
-        }
+            let resp = self
+                .http
+                .post("https://api.osv.dev/v1/querybatch")
+                .json(&body)
+                .send()
+                .await
+                .map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;

-        let result: OsvBatchResponse = resp
-            .json()
-            .await
-            .map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
+            if !resp.status().is_success() {
+                let status = resp.status();
+                let body = resp.text().await.unwrap_or_default();
+                tracing::warn!("OSV.dev returned {status}: {body}");
+                // Push empty results for this chunk so indices stay aligned
+                all_vulns.extend(std::iter::repeat_with(Vec::new).take(chunk.len()));
+                continue;
+            }

-        let vulns = result
-            .results
-            .into_iter()
-            .map(|r| {
+            let result: OsvBatchResponse = resp
+                .json()
+                .await
+                .map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
+
+            let chunk_vulns = result.results.into_iter().map(|r| {
                r.vulns
                    .unwrap_or_default()
                    .into_iter()
@@ -116,10 +120,12 @@ impl CveScanner {
                        }),
                    })
                    .collect()
-            })
-            .collect();
+            });

-        Ok(vulns)
+            all_vulns.extend(chunk_vulns);
+        }
+
+        Ok(all_vulns)
    }

    async fn query_nvd(&self, cve_id: &str) -> Result<Option<f64>, CoreError> {
--- a/compliance-agent/src/pipeline/git.rs
+++ b/compliance-agent/src/pipeline/git.rs
@@ -1,17 +1,80 @@
 use std::path::{Path, PathBuf};

-use git2::{FetchOptions, Repository};
+use git2::{Cred, FetchOptions, RemoteCallbacks, Repository};

 use crate::error::AgentError;

+/// Credentials for accessing a private repository
+#[derive(Debug, Clone, Default)]
+pub struct RepoCredentials {
+    /// Path to the SSH private key (for SSH URLs)
+    pub ssh_key_path: Option<String>,
+    /// Auth token / password (for HTTPS URLs)
+    pub auth_token: Option<String>,
+    /// Username for HTTPS auth (defaults to "x-access-token")
+    pub auth_username: Option<String>,
+}
+
+impl RepoCredentials {
+    pub(crate) fn make_callbacks(&self) -> RemoteCallbacks<'_> {
+        let mut callbacks = RemoteCallbacks::new();
+        let ssh_key = self.ssh_key_path.clone();
+        let token = self.auth_token.clone();
+        let username = self.auth_username.clone();
+
+        callbacks.credentials(move |_url, username_from_url, allowed_types| {
+            // SSH key authentication
+            if allowed_types.contains(git2::CredentialType::SSH_KEY) {
+                if let Some(ref key_path) = ssh_key {
+                    let key = Path::new(key_path);
+                    if key.exists() {
+                        let user = username_from_url.unwrap_or("git");
+                        return Cred::ssh_key(user, None, key, None);
+                    }
+                }
+            }
+
+            // HTTPS userpass authentication
+            if allowed_types.contains(git2::CredentialType::USER_PASS_PLAINTEXT) {
+                if let Some(ref tok) = token {
+                    let user = username.as_deref().unwrap_or("x-access-token");
+                    return Cred::userpass_plaintext(user, tok);
+                }
+            }
+
+            Cred::default()
+        });
+
+        callbacks
+    }
+
+    fn fetch_options(&self) -> FetchOptions<'_> {
+        let mut fetch_opts = FetchOptions::new();
+        if self.has_credentials() {
+            fetch_opts.remote_callbacks(self.make_callbacks());
+        }
+        fetch_opts
+    }
+
+    fn has_credentials(&self) -> bool {
+        self.ssh_key_path
+            .as_ref()
+            .map(|p| Path::new(p).exists())
+            .unwrap_or(false)
+            || self.auth_token.is_some()
+    }
+}
+
 pub struct GitOps {
    base_path: PathBuf,
+    credentials: RepoCredentials,
 }

 impl GitOps {
-    pub fn new(base_path: &str) -> Self {
+    pub fn new(base_path: &str, credentials: RepoCredentials) -> Self {
        Self {
            base_path: PathBuf::from(base_path),
+            credentials,
        }
    }

@@ -22,17 +85,25 @@ impl GitOps {
            self.fetch(&repo_path)?;
        } else {
            std::fs::create_dir_all(&repo_path)?;
-            Repository::clone(git_url, &repo_path)?;
+            self.clone_repo(git_url, &repo_path)?;
            tracing::info!("Cloned {git_url} to {}", repo_path.display());
        }

        Ok(repo_path)
    }

+    fn clone_repo(&self, git_url: &str, repo_path: &Path) -> Result<(), AgentError> {
+        let mut builder = git2::build::RepoBuilder::new();
+        let fetch_opts = self.credentials.fetch_options();
+        builder.fetch_options(fetch_opts);
+        builder.clone(git_url, repo_path)?;
+        Ok(())
+    }
+
    fn fetch(&self, repo_path: &Path) -> Result<(), AgentError> {
        let repo = Repository::open(repo_path)?;
        let mut remote = repo.find_remote("origin")?;
-        let mut fetch_opts = FetchOptions::new();
+        let mut fetch_opts = self.credentials.fetch_options();
        remote.fetch(&[] as &[&str], Some(&mut fetch_opts), None)?;

        // Fast-forward to origin/HEAD
@@ -48,6 +119,15 @@ impl GitOps {
        Ok(())
    }

+    /// Test that we can access a remote repository (used during add validation)
+    pub fn test_access(git_url: &str, credentials: &RepoCredentials) -> Result<(), AgentError> {
+        let mut remote = git2::Remote::create_detached(git_url)?;
+        let callbacks = credentials.make_callbacks();
+        remote.connect_auth(git2::Direction::Fetch, Some(callbacks), None)?;
+        remote.disconnect()?;
+        Ok(())
+    }
+
    pub fn get_head_sha(repo_path: &Path) -> Result<String, AgentError> {
        let repo = Repository::open(repo_path)?;
        let head = repo.head()?;
@@ -63,6 +143,62 @@ impl GitOps {
        }
    }

+    /// Extract structured diff content between two commits
+    pub fn get_diff_content(
+        repo_path: &Path,
+        old_sha: &str,
+        new_sha: &str,
+    ) -> Result<Vec<DiffFile>, AgentError> {
+        let repo = Repository::open(repo_path)?;
+        let old_commit = repo.find_commit(git2::Oid::from_str(old_sha)?)?;
+        let new_commit = repo.find_commit(git2::Oid::from_str(new_sha)?)?;
+
+        let old_tree = old_commit.tree()?;
+        let new_tree = new_commit.tree()?;
+
+        let diff = repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), None)?;
+
+        let mut diff_files: Vec<DiffFile> = Vec::new();
+
+        diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| {
+            let file_path = delta
+                .new_file()
+                .path()
+                .map(|p| p.to_string_lossy().to_string())
+                .unwrap_or_default();
+
+            // Find or create the DiffFile entry
+            let idx = if let Some(pos) = diff_files.iter().position(|f| f.path == file_path) {
+                pos
+            } else {
+                diff_files.push(DiffFile {
+                    path: file_path,
+                    hunks: String::new(),
+                });
+                diff_files.len() - 1
+            };
+            let diff_file = &mut diff_files[idx];
+
+            let prefix = match line.origin() {
+                '+' => "+",
+                '-' => "-",
+                ' ' => " ",
+                _ => "",
+            };
+
+            let content = std::str::from_utf8(line.content()).unwrap_or("");
+            diff_file.hunks.push_str(prefix);
+            diff_file.hunks.push_str(content);
+
+            true
+        })?;
+
+        // Filter out binary files and very large diffs
+        diff_files.retain(|f| !f.hunks.is_empty() && f.hunks.len() < 50_000);
+
+        Ok(diff_files)
+    }
+
    #[allow(dead_code)]
    pub fn get_changed_files(
        repo_path: &Path,
@@ -94,3 +230,10 @@ impl GitOps {
        Ok(files)
    }
 }
+
+/// A file changed between two commits with its diff content
+#[derive(Debug, Clone)]
+pub struct DiffFile {
+    pub path: String,
+    pub hunks: String,
+}
--- a/compliance-agent/src/pipeline/gitleaks.rs
+++ b/compliance-agent/src/pipeline/gitleaks.rs
@@ -0,0 +1,130 @@
+use std::path::Path;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::traits::{ScanOutput, Scanner};
+use compliance_core::CoreError;
+
+use crate::pipeline::dedup;
+
+pub struct GitleaksScanner;
+
+impl Scanner for GitleaksScanner {
+    fn name(&self) -> &str {
+        "gitleaks"
+    }
+
+    fn scan_type(&self) -> ScanType {
+        ScanType::SecretDetection
+    }
+
+    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
+        let output = tokio::process::Command::new("gitleaks")
+            .args([
+                "detect",
+                "--source",
+                ".",
+                "--report-format",
+                "json",
+                "--report-path",
+                "/dev/stdout",
+                "--no-banner",
+                "--exit-code",
+                "0",
+            ])
+            .current_dir(repo_path)
+            .output()
+            .await
+            .map_err(|e| CoreError::Scanner {
+                scanner: "gitleaks".to_string(),
+                source: Box::new(e),
+            })?;
+
+        if output.stdout.is_empty() {
+            return Ok(ScanOutput::default());
+        }
+
+        let results: Vec<GitleaksResult> =
+            serde_json::from_slice(&output.stdout).unwrap_or_default();
+
+        let findings = results
+            .into_iter()
+            .filter(|r| !is_allowlisted(&r.file))
+            .map(|r| {
+                let severity = match r.rule_id.as_str() {
+                    s if s.contains("private-key") => Severity::Critical,
+                    s if s.contains("token") || s.contains("password") || s.contains("secret") => {
+                        Severity::High
+                    }
+                    s if s.contains("api-key") => Severity::High,
+                    _ => Severity::Medium,
+                };
+
+                let fingerprint = dedup::compute_fingerprint(&[
+                    repo_id,
+                    &r.rule_id,
+                    &r.file,
+                    &r.start_line.to_string(),
+                ]);
+
+                let title = format!("Secret detected: {}", r.description);
+                let description = format!(
+                    "Potential secret ({}) found in {}:{}. Match: {}",
+                    r.rule_id,
+                    r.file,
+                    r.start_line,
+                    r.r#match.chars().take(80).collect::<String>(),
+                );
+
+                let mut finding = Finding::new(
+                    repo_id.to_string(),
+                    fingerprint,
+                    "gitleaks".to_string(),
+                    ScanType::SecretDetection,
+                    title,
+                    description,
+                    severity,
+                );
+                finding.rule_id = Some(r.rule_id);
+                finding.file_path = Some(r.file);
+                finding.line_number = Some(r.start_line);
+                finding.code_snippet = Some(r.r#match);
+                finding
+            })
+            .collect();
+
+        Ok(ScanOutput {
+            findings,
+            sbom_entries: Vec::new(),
+        })
+    }
+}
+
+/// Skip files that commonly contain example/placeholder secrets
+fn is_allowlisted(file_path: &str) -> bool {
+    let lower = file_path.to_lowercase();
+    lower.ends_with(".env.example")
+        || lower.ends_with(".env.sample")
+        || lower.ends_with(".env.template")
+        || lower.contains("/test/")
+        || lower.contains("/tests/")
+        || lower.contains("/fixtures/")
+        || lower.contains("/testdata/")
+        || lower.contains("mock")
+        || lower.ends_with("_test.go")
+        || lower.ends_with(".test.ts")
+        || lower.ends_with(".test.js")
+        || lower.ends_with(".spec.ts")
+        || lower.ends_with(".spec.js")
+}
+
+#[derive(serde::Deserialize)]
+#[serde(rename_all = "PascalCase")]
+struct GitleaksResult {
+    description: String,
+    #[serde(rename = "RuleID")]
+    rule_id: String,
+    file: String,
+    start_line: u32,
+    #[serde(rename = "Match")]
+    r#match: String,
+}
--- a/compliance-agent/src/pipeline/lint.rs
+++ b/compliance-agent/src/pipeline/lint.rs
@@ -0,0 +1,364 @@
+use std::path::Path;
+use std::time::Duration;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::traits::{ScanOutput, Scanner};
+use compliance_core::CoreError;
+use tokio::process::Command;
+
+use crate::pipeline::dedup;
+
+/// Timeout for each individual lint command
+const LINT_TIMEOUT: Duration = Duration::from_secs(120);
+
+pub struct LintScanner;
+
+impl Scanner for LintScanner {
+    fn name(&self) -> &str {
+        "lint"
+    }
+
+    fn scan_type(&self) -> ScanType {
+        ScanType::Lint
+    }
+
+    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
+        let mut all_findings = Vec::new();
+
+        // Detect which languages are present and run appropriate linters
+        if has_rust_project(repo_path) {
+            match run_clippy(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("Clippy failed: {e}"),
+            }
+        }
+
+        if has_js_project(repo_path) {
+            match run_eslint(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("ESLint failed: {e}"),
+            }
+        }
+
+        if has_python_project(repo_path) {
+            match run_ruff(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("Ruff failed: {e}"),
+            }
+        }
+
+        Ok(ScanOutput {
+            findings: all_findings,
+            sbom_entries: Vec::new(),
+        })
+    }
+}
+
+fn has_rust_project(repo_path: &Path) -> bool {
+    repo_path.join("Cargo.toml").exists()
+}
+
+fn has_js_project(repo_path: &Path) -> bool {
+    // Only run if eslint is actually installed in the project
+    repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
+}
+
+fn has_python_project(repo_path: &Path) -> bool {
+    repo_path.join("pyproject.toml").exists()
+        || repo_path.join("setup.py").exists()
+        || repo_path.join("requirements.txt").exists()
+}
+
+/// Run a command with a timeout, returning its output or an error
+async fn run_with_timeout(
+    child: tokio::process::Child,
+    scanner_name: &str,
+) -> Result<std::process::Output, CoreError> {
+    let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
+    match result {
+        Ok(Ok(output)) => Ok(output),
+        Ok(Err(e)) => Err(CoreError::Scanner {
+            scanner: scanner_name.to_string(),
+            source: Box::new(e),
+        }),
+        Err(_) => {
+            // Process is dropped here which sends SIGKILL on Unix
+            Err(CoreError::Scanner {
+                scanner: scanner_name.to_string(),
+                source: Box::new(std::io::Error::new(
+                    std::io::ErrorKind::TimedOut,
+                    format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
+                )),
+            })
+        }
+    }
+}
+
+// ── Clippy ──────────────────────────────────────────────
+
+async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    let child = Command::new("cargo")
+        .args([
+            "clippy",
+            "--message-format=json",
+            "--quiet",
+            "--",
+            "-W",
+            "clippy::all",
+        ])
+        .current_dir(repo_path)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "clippy".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "clippy").await?;
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let mut findings = Vec::new();
+
+    for line in stdout.lines() {
+        let msg: serde_json::Value = match serde_json::from_str(line) {
+            Ok(v) => v,
+            Err(_) => continue,
+        };
+
+        if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
+            continue;
+        }
+
+        let message = match msg.get("message") {
+            Some(m) => m,
+            None => continue,
+        };
+
+        let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
+
+        if level != "warning" && level != "error" {
+            continue;
+        }
+
+        let text = message
+            .get("message")
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+
+        let code = message
+            .get("code")
+            .and_then(|v| v.get("code"))
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+
+        if text.starts_with("aborting due to") || code.is_empty() {
+            continue;
+        }
+
+        let (file_path, line_number) = extract_primary_span(message);
+
+        let severity = if level == "error" {
+            Severity::High
+        } else {
+            Severity::Low
+        };
+
+        let fingerprint = dedup::compute_fingerprint(&[
+            repo_id,
+            "clippy",
+            &code,
+            &file_path,
+            &line_number.to_string(),
+        ]);
+
+        let mut finding = Finding::new(
+            repo_id.to_string(),
+            fingerprint,
+            "clippy".to_string(),
+            ScanType::Lint,
+            format!("[clippy] {text}"),
+            text,
+            severity,
+        );
+        finding.rule_id = Some(code);
+        if !file_path.is_empty() {
+            finding.file_path = Some(file_path);
+        }
+        if line_number > 0 {
+            finding.line_number = Some(line_number);
+        }
+        findings.push(finding);
+    }
+
+    Ok(findings)
+}
+
+fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
+    let spans = match message.get("spans").and_then(|v| v.as_array()) {
+        Some(s) => s,
+        None => return (String::new(), 0),
+    };
+
+    for span in spans {
+        if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
+            let file = span
+                .get("file_name")
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .to_string();
+            let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
+            return (file, line);
+        }
+    }
+
+    (String::new(), 0)
+}
+
+// ── ESLint ──────────────────────────────────────────────
+
+async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    // Use the project-local eslint binary directly, not npx (which can hang downloading)
+    let eslint_bin = repo_path.join("node_modules/.bin/eslint");
+    let child = Command::new(eslint_bin)
+        .args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
+        .current_dir(repo_path)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "eslint".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "eslint").await?;
+
+    if output.stdout.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
+
+    let mut findings = Vec::new();
+    for file_result in results {
+        for msg in file_result.messages {
+            let severity = match msg.severity {
+                2 => Severity::Medium,
+                _ => Severity::Low,
+            };
+
+            let rule_id = msg.rule_id.unwrap_or_default();
+            let fingerprint = dedup::compute_fingerprint(&[
+                repo_id,
+                "eslint",
+                &rule_id,
+                &file_result.file_path,
+                &msg.line.to_string(),
+            ]);
+
+            let mut finding = Finding::new(
+                repo_id.to_string(),
+                fingerprint,
+                "eslint".to_string(),
+                ScanType::Lint,
+                format!("[eslint] {}", msg.message),
+                msg.message,
+                severity,
+            );
+            finding.rule_id = Some(rule_id);
+            finding.file_path = Some(file_result.file_path.clone());
+            finding.line_number = Some(msg.line);
+            findings.push(finding);
+        }
+    }
+
+    Ok(findings)
+}
+
+#[derive(serde::Deserialize)]
+struct EslintFileResult {
+    #[serde(rename = "filePath")]
+    file_path: String,
+    messages: Vec<EslintMessage>,
+}
+
+#[derive(serde::Deserialize)]
+struct EslintMessage {
+    #[serde(rename = "ruleId")]
+    rule_id: Option<String>,
+    severity: u8,
+    message: String,
+    line: u32,
+}
+
+// ── Ruff ────────────────────────────────────────────────
+
+async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    let child = Command::new("ruff")
+        .args(["check", ".", "--output-format", "json", "--exit-zero"])
+        .current_dir(repo_path)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "ruff".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "ruff").await?;
+
+    if output.stdout.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
+
+    let findings = results
+        .into_iter()
+        .map(|r| {
+            let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
+                Severity::Medium
+            } else {
+                Severity::Low
+            };
+
+            let fingerprint = dedup::compute_fingerprint(&[
+                repo_id,
+                "ruff",
+                &r.code,
+                &r.filename,
+                &r.location.row.to_string(),
+            ]);
+
+            let mut finding = Finding::new(
+                repo_id.to_string(),
+                fingerprint,
+                "ruff".to_string(),
+                ScanType::Lint,
+                format!("[ruff] {}: {}", r.code, r.message),
+                r.message,
+                severity,
+            );
+            finding.rule_id = Some(r.code);
+            finding.file_path = Some(r.filename);
+            finding.line_number = Some(r.location.row);
+            finding
+        })
+        .collect();
+
+    Ok(findings)
+}
+
+#[derive(serde::Deserialize)]
+struct RuffResult {
+    code: String,
+    message: String,
+    filename: String,
+    location: RuffLocation,
+}
+
+#[derive(serde::Deserialize)]
+struct RuffLocation {
+    row: u32,
+}
--- a/compliance-agent/src/pipeline/mod.rs
+++ b/compliance-agent/src/pipeline/mod.rs
@@ -1,6 +1,9 @@
+pub mod code_review;
 pub mod cve;
 pub mod dedup;
 pub mod git;
+pub mod gitleaks;
+pub mod lint;
 pub mod orchestrator;
 pub mod patterns;
 pub mod sbom;
--- a/compliance-agent/src/pipeline/orchestrator.rs
+++ b/compliance-agent/src/pipeline/orchestrator.rs
@@ -9,8 +9,11 @@ use compliance_core::AgentConfig;
 use crate::database::Database;
 use crate::error::AgentError;
 use crate::llm::LlmClient;
+use crate::pipeline::code_review::CodeReviewScanner;
 use crate::pipeline::cve::CveScanner;
-use crate::pipeline::git::GitOps;
+use crate::pipeline::git::{GitOps, RepoCredentials};
+use crate::pipeline::gitleaks::GitleaksScanner;
+use crate::pipeline::lint::LintScanner;
 use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
 use crate::pipeline::sbom::SbomScanner;
 use crate::pipeline::semgrep::SemgrepScanner;
@@ -114,7 +117,12 @@ impl PipelineOrchestrator {

        // Stage 0: Change detection
        tracing::info!("[{repo_id}] Stage 0: Change detection");
-        let git_ops = GitOps::new(&self.config.git_clone_base_path);
+        let creds = RepoCredentials {
+            ssh_key_path: Some(self.config.ssh_key_path.clone()),
+            auth_token: repo.auth_token.clone(),
+            auth_username: repo.auth_username.clone(),
+        };
+        let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
        let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;

        if !GitOps::has_new_commits(&repo_path, repo.last_scanned_commit.as_deref())? {
@@ -182,6 +190,35 @@ impl PipelineOrchestrator {
            Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
        }

+        // Stage 4a: Secret Detection (Gitleaks)
+        tracing::info!("[{repo_id}] Stage 4a: Secret Detection");
+        self.update_phase(scan_run_id, "secret_detection").await;
+        let gitleaks = GitleaksScanner;
+        match gitleaks.scan(&repo_path, &repo_id).await {
+            Ok(output) => all_findings.extend(output.findings),
+            Err(e) => tracing::warn!("[{repo_id}] Gitleaks failed: {e}"),
+        }
+
+        // Stage 4b: Lint Scanning
+        tracing::info!("[{repo_id}] Stage 4b: Lint Scanning");
+        self.update_phase(scan_run_id, "lint_scanning").await;
+        let lint = LintScanner;
+        match lint.scan(&repo_path, &repo_id).await {
+            Ok(output) => all_findings.extend(output.findings),
+            Err(e) => tracing::warn!("[{repo_id}] Lint scanning failed: {e}"),
+        }
+
+        // Stage 4c: LLM Code Review (only on incremental scans)
+        if let Some(old_sha) = &repo.last_scanned_commit {
+            tracing::info!("[{repo_id}] Stage 4c: LLM Code Review");
+            self.update_phase(scan_run_id, "code_review").await;
+            let reviewer = CodeReviewScanner::new(self.llm.clone());
+            let review_output = reviewer
+                .review_diff(&repo_path, &repo_id, old_sha, &current_sha)
+                .await;
+            all_findings.extend(review_output.findings);
+        }
+
        // Stage 4.5: Graph Building
        tracing::info!("[{repo_id}] Stage 4.5: Graph Building");
        self.update_phase(scan_run_id, "graph_building").await;