feat: findings refinement, new scanners, and deployment tooling (#6)

2026-03-09 12:53:12 +00:00
parent 32e5fc21e7
commit 46bf9de549
40 changed files with 2048 additions and 118 deletions
--- a/compliance-agent/src/api/handlers/chat.rs
+++ b/compliance-agent/src/api/handlers/chat.rs
@@ -187,7 +187,13 @@ pub async fn build_embeddings(
            }
        };

-        let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path);
+        let creds = crate::pipeline::git::RepoCredentials {
+            ssh_key_path: Some(agent_clone.config.ssh_key_path.clone()),
+            auth_token: repo.auth_token.clone(),
+            auth_username: repo.auth_username.clone(),
+        };
+        let git_ops =
+            crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path, creds);
        let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) {
            Ok(p) => p,
            Err(e) => {
--- a/compliance-agent/src/api/handlers/graph.rs
+++ b/compliance-agent/src/api/handlers/graph.rs
@@ -291,7 +291,13 @@ pub async fn trigger_build(
            }
        };

-        let git_ops = crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path);
+        let creds = crate::pipeline::git::RepoCredentials {
+            ssh_key_path: Some(agent_clone.config.ssh_key_path.clone()),
+            auth_token: repo.auth_token.clone(),
+            auth_username: repo.auth_username.clone(),
+        };
+        let git_ops =
+            crate::pipeline::git::GitOps::new(&agent_clone.config.git_clone_base_path, creds);
        let repo_path = match git_ops.clone_or_fetch(&repo.git_url, &repo.name) {
            Ok(p) => p,
            Err(e) => {
--- a/compliance-agent/src/api/handlers/mod.rs
+++ b/compliance-agent/src/api/handlers/mod.rs
@@ -41,6 +41,12 @@ pub struct FindingsFilter {
    pub scan_type: Option<String>,
    #[serde(default)]
    pub status: Option<String>,
+    #[serde(default)]
+    pub q: Option<String>,
+    #[serde(default)]
+    pub sort_by: Option<String>,
+    #[serde(default)]
+    pub sort_order: Option<String>,
    #[serde(default = "default_page")]
    pub page: u64,
    #[serde(default = "default_limit")]
@@ -76,6 +82,8 @@ pub struct AddRepositoryRequest {
    pub git_url: String,
    #[serde(default = "default_branch")]
    pub default_branch: String,
+    pub auth_token: Option<String>,
+    pub auth_username: Option<String>,
    pub tracker_type: Option<TrackerType>,
    pub tracker_owner: Option<String>,
    pub tracker_repo: Option<String>,
@@ -91,6 +99,17 @@ pub struct UpdateStatusRequest {
    pub status: String,
 }

+#[derive(Deserialize)]
+pub struct BulkUpdateStatusRequest {
+    pub ids: Vec<String>,
+    pub status: String,
+}
+
+#[derive(Deserialize)]
+pub struct UpdateFeedbackRequest {
+    pub feedback: String,
+}
+
 #[derive(Deserialize)]
 pub struct SbomFilter {
    #[serde(default)]
@@ -267,9 +286,25 @@ pub async fn list_repositories(
 pub async fn add_repository(
    Extension(agent): AgentExt,
    Json(req): Json<AddRepositoryRequest>,
-) -> Result<Json<ApiResponse<TrackedRepository>>, StatusCode> {
+) -> Result<Json<ApiResponse<TrackedRepository>>, (StatusCode, String)> {
+    // Validate repository access before saving
+    let creds = crate::pipeline::git::RepoCredentials {
+        ssh_key_path: Some(agent.config.ssh_key_path.clone()),
+        auth_token: req.auth_token.clone(),
+        auth_username: req.auth_username.clone(),
+    };
+
+    if let Err(e) = crate::pipeline::git::GitOps::test_access(&req.git_url, &creds) {
+        return Err((
+            StatusCode::BAD_REQUEST,
+            format!("Cannot access repository: {e}"),
+        ));
+    }
+
    let mut repo = TrackedRepository::new(req.name, req.git_url);
    repo.default_branch = req.default_branch;
+    repo.auth_token = req.auth_token;
+    repo.auth_username = req.auth_username;
    repo.tracker_type = req.tracker_type;
    repo.tracker_owner = req.tracker_owner;
    repo.tracker_repo = req.tracker_repo;
@@ -280,7 +315,12 @@ pub async fn add_repository(
        .repositories()
        .insert_one(&repo)
        .await
-        .map_err(|_| StatusCode::CONFLICT)?;
+        .map_err(|_| {
+            (
+                StatusCode::CONFLICT,
+                "Repository already exists".to_string(),
+            )
+        })?;

    Ok(Json(ApiResponse {
        data: repo,
@@ -289,6 +329,14 @@ pub async fn add_repository(
    }))
 }

+pub async fn get_ssh_public_key(
+    Extension(agent): AgentExt,
+) -> Result<Json<serde_json::Value>, StatusCode> {
+    let public_path = format!("{}.pub", agent.config.ssh_key_path);
+    let public_key = std::fs::read_to_string(&public_path).map_err(|_| StatusCode::NOT_FOUND)?;
+    Ok(Json(serde_json::json!({ "public_key": public_key.trim() })))
+}
+
 pub async fn trigger_scan(
    Extension(agent): AgentExt,
    Path(id): Path<String>,
@@ -367,6 +415,29 @@ pub async fn list_findings(
    if let Some(status) = &filter.status {
        query.insert("status", status);
    }
+    // Text search across title, description, file_path, rule_id
+    if let Some(q) = &filter.q {
+        if !q.is_empty() {
+            let regex = doc! { "$regex": q, "$options": "i" };
+            query.insert(
+                "$or",
+                mongodb::bson::bson!([
+                    { "title": regex.clone() },
+                    { "description": regex.clone() },
+                    { "file_path": regex.clone() },
+                    { "rule_id": regex },
+                ]),
+            );
+        }
+    }
+
+    // Dynamic sort
+    let sort_field = filter.sort_by.as_deref().unwrap_or("created_at");
+    let sort_dir: i32 = match filter.sort_order.as_deref() {
+        Some("asc") => 1,
+        _ => -1,
+    };
+    let sort_doc = doc! { sort_field: sort_dir };

    let skip = (filter.page.saturating_sub(1)) * filter.limit as u64;
    let total = db
@@ -378,7 +449,7 @@ pub async fn list_findings(
    let findings = match db
        .findings()
        .find(query)
-        .sort(doc! { "created_at": -1 })
+        .sort(sort_doc)
        .skip(skip)
        .limit(filter.limit)
        .await
@@ -434,6 +505,55 @@ pub async fn update_finding_status(
    Ok(Json(serde_json::json!({ "status": "updated" })))
 }

+pub async fn bulk_update_finding_status(
+    Extension(agent): AgentExt,
+    Json(req): Json<BulkUpdateStatusRequest>,
+) -> Result<Json<serde_json::Value>, StatusCode> {
+    let oids: Vec<mongodb::bson::oid::ObjectId> = req
+        .ids
+        .iter()
+        .filter_map(|id| mongodb::bson::oid::ObjectId::parse_str(id).ok())
+        .collect();
+
+    if oids.is_empty() {
+        return Err(StatusCode::BAD_REQUEST);
+    }
+
+    let result = agent
+        .db
+        .findings()
+        .update_many(
+            doc! { "_id": { "$in": oids } },
+            doc! { "$set": { "status": &req.status, "updated_at": mongodb::bson::DateTime::now() } },
+        )
+        .await
+        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+
+    Ok(Json(
+        serde_json::json!({ "status": "updated", "modified_count": result.modified_count }),
+    ))
+}
+
+pub async fn update_finding_feedback(
+    Extension(agent): AgentExt,
+    Path(id): Path<String>,
+    Json(req): Json<UpdateFeedbackRequest>,
+) -> Result<Json<serde_json::Value>, StatusCode> {
+    let oid = mongodb::bson::oid::ObjectId::parse_str(&id).map_err(|_| StatusCode::BAD_REQUEST)?;
+
+    agent
+        .db
+        .findings()
+        .update_one(
+            doc! { "_id": oid },
+            doc! { "$set": { "developer_feedback": &req.feedback, "updated_at": mongodb::bson::DateTime::now() } },
+        )
+        .await
+        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+
+    Ok(Json(serde_json::json!({ "status": "updated" })))
+}
+
 pub async fn list_sbom(
    Extension(agent): AgentExt,
    Query(filter): Query<SbomFilter>,
--- a/compliance-agent/src/api/routes.rs
+++ b/compliance-agent/src/api/routes.rs
@@ -7,6 +7,10 @@ pub fn build_router() -> Router {
    Router::new()
        .route("/api/v1/health", get(handlers::health))
        .route("/api/v1/stats/overview", get(handlers::stats_overview))
+        .route(
+            "/api/v1/settings/ssh-public-key",
+            get(handlers::get_ssh_public_key),
+        )
        .route("/api/v1/repositories", get(handlers::list_repositories))
        .route("/api/v1/repositories", post(handlers::add_repository))
        .route(
@@ -23,6 +27,14 @@ pub fn build_router() -> Router {
            "/api/v1/findings/{id}/status",
            patch(handlers::update_finding_status),
        )
+        .route(
+            "/api/v1/findings/bulk-status",
+            patch(handlers::bulk_update_finding_status),
+        )
+        .route(
+            "/api/v1/findings/{id}/feedback",
+            patch(handlers::update_finding_feedback),
+        )
        .route("/api/v1/sbom", get(handlers::list_sbom))
        .route("/api/v1/sbom/export", get(handlers::export_sbom))
        .route("/api/v1/sbom/licenses", get(handlers::license_summary))
--- a/compliance-agent/src/config.rs
+++ b/compliance-agent/src/config.rs
@@ -45,6 +45,8 @@ pub fn load_config() -> Result<AgentConfig, AgentError> {
            .unwrap_or_else(|| "0 0 0 * * *".to_string()),
        git_clone_base_path: env_var_opt("GIT_CLONE_BASE_PATH")
            .unwrap_or_else(|| "/tmp/compliance-scanner/repos".to_string()),
+        ssh_key_path: env_var_opt("SSH_KEY_PATH")
+            .unwrap_or_else(|| "/data/compliance-scanner/ssh/id_ed25519".to_string()),
        keycloak_url: env_var_opt("KEYCLOAK_URL"),
        keycloak_realm: env_var_opt("KEYCLOAK_REALM"),
    })
--- a/compliance-agent/src/llm/mod.rs
+++ b/compliance-agent/src/llm/mod.rs
@@ -5,6 +5,7 @@ pub mod descriptions;
 pub mod fixes;
 #[allow(dead_code)]
 pub mod pr_review;
+pub mod review_prompts;
 pub mod triage;

 pub use client::LlmClient;
--- a/compliance-agent/src/llm/review_prompts.rs
+++ b/compliance-agent/src/llm/review_prompts.rs
@@ -0,0 +1,77 @@
+// System prompts for multi-pass LLM code review.
+// Each pass focuses on a different aspect to avoid overloading a single prompt.
+
+pub const LOGIC_REVIEW_PROMPT: &str = r#"You are a senior software engineer reviewing code changes. Focus ONLY on logic and correctness issues.
+
+Look for:
+- Off-by-one errors, wrong comparisons, missing edge cases
+- Incorrect control flow (unreachable code, missing returns, wrong loop conditions)
+- Race conditions or concurrency bugs
+- Resource leaks (unclosed handles, missing cleanup)
+- Wrong variable used (copy-paste errors)
+- Incorrect error handling (swallowed errors, wrong error type)
+
+Ignore: style, naming, formatting, documentation, minor improvements.
+
+For each issue found, respond with a JSON array:
+[{"title": "...", "description": "...", "severity": "high|medium|low", "file": "...", "line": N, "suggestion": "..."}]
+
+If no issues found, respond with: []"#;
+
+pub const SECURITY_REVIEW_PROMPT: &str = r#"You are a security engineer reviewing code changes. Focus ONLY on security vulnerabilities.
+
+Look for:
+- Injection vulnerabilities (SQL, command, XSS, template injection)
+- Authentication/authorization bypasses
+- Sensitive data exposure (logging secrets, hardcoded credentials)
+- Insecure cryptography (weak algorithms, predictable randomness)
+- Path traversal, SSRF, open redirects
+- Unsafe deserialization
+- Missing input validation at trust boundaries
+
+Ignore: code style, performance, general quality.
+
+For each issue found, respond with a JSON array:
+[{"title": "...", "description": "...", "severity": "critical|high|medium", "file": "...", "line": N, "cwe": "CWE-XXX", "suggestion": "..."}]
+
+If no issues found, respond with: []"#;
+
+pub const CONVENTION_REVIEW_PROMPT: &str = r#"You are a code reviewer checking adherence to project conventions. Focus ONLY on patterns that indicate likely bugs or maintenance problems.
+
+Look for:
+- Inconsistent error handling patterns within the same module
+- Public API that doesn't follow the project's established patterns
+- Missing or incorrect type annotations that could cause runtime issues
+- Anti-patterns specific to the language (e.g. unwrap in Rust library code, any in TypeScript)
+
+Do NOT report: minor style preferences, documentation gaps, formatting.
+Only report issues with HIGH confidence that they deviate from the visible codebase conventions.
+
+For each issue found, respond with a JSON array:
+[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
+
+If no issues found, respond with: []"#;
+
+pub const COMPLEXITY_REVIEW_PROMPT: &str = r#"You are reviewing code changes for excessive complexity that could lead to bugs.
+
+Look for:
+- Functions over 50 lines that should be decomposed
+- Deeply nested control flow (4+ levels)
+- Complex boolean expressions that are hard to reason about
+- Functions with 5+ parameters
+- Code duplication within the changed files
+
+Only report complexity issues that are HIGH risk for future bugs. Ignore acceptable complexity in configuration, CLI argument parsing, or generated code.
+
+For each issue found, respond with a JSON array:
+[{"title": "...", "description": "...", "severity": "medium|low", "file": "...", "line": N, "suggestion": "..."}]
+
+If no issues found, respond with: []"#;
+
+/// All review types with their prompts
+pub const REVIEW_PASSES: &[(&str, &str)] = &[
+    ("logic", LOGIC_REVIEW_PROMPT),
+    ("security", SECURITY_REVIEW_PROMPT),
+    ("convention", CONVENTION_REVIEW_PROMPT),
+    ("complexity", COMPLEXITY_REVIEW_PROMPT),
+];
--- a/compliance-agent/src/llm/triage.rs
+++ b/compliance-agent/src/llm/triage.rs
@@ -5,13 +5,22 @@ use compliance_core::models::{Finding, FindingStatus};
 use crate::llm::LlmClient;
 use crate::pipeline::orchestrator::GraphContext;

-const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding and determine:
-1. Is this a true positive? (yes/no)
-2. Confidence score (0-10, where 10 is highest confidence this is a real issue)
-3. Brief remediation suggestion (1-2 sentences)
+const TRIAGE_SYSTEM_PROMPT: &str = r#"You are a security finding triage expert. Analyze the following security finding with its code context and determine the appropriate action.
+
+Actions:
+- "confirm": The finding is a true positive at the reported severity. Keep as-is.
+- "downgrade": The finding is real but over-reported. Lower severity recommended.
+- "upgrade": The finding is under-reported. Higher severity recommended.
+- "dismiss": The finding is a false positive. Should be removed.
+
+Consider:
+- Is the code in a test, example, or generated file? (lower confidence for test code)
+- Does the surrounding code context confirm or refute the finding?
+- Is the finding actionable by a developer?
+- Would a real attacker be able to exploit this?

 Respond in JSON format:
-{"true_positive": true/false, "confidence": N, "remediation": "..."}"#;
+{"action": "confirm|downgrade|upgrade|dismiss", "confidence": 0-10, "rationale": "brief explanation", "remediation": "optional fix suggestion"}"#;

 pub async fn triage_findings(
    llm: &Arc<LlmClient>,
@@ -21,8 +30,10 @@ pub async fn triage_findings(
    let mut passed = 0;

    for finding in findings.iter_mut() {
+        let file_classification = classify_file_path(finding.file_path.as_deref());
+
        let mut user_prompt = format!(
-            "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}",
+            "Scanner: {}\nRule: {}\nSeverity: {}\nTitle: {}\nDescription: {}\nFile: {}\nLine: {}\nCode: {}\nFile classification: {}",
            finding.scanner,
            finding.rule_id.as_deref().unwrap_or("N/A"),
            finding.severity,
@@ -31,8 +42,16 @@ pub async fn triage_findings(
            finding.file_path.as_deref().unwrap_or("N/A"),
            finding.line_number.map(|n| n.to_string()).unwrap_or_else(|| "N/A".to_string()),
            finding.code_snippet.as_deref().unwrap_or("N/A"),
+            file_classification,
        );

+        // Enrich with surrounding code context if possible
+        if let Some(context) = read_surrounding_context(finding) {
+            user_prompt.push_str(&format!(
+                "\n\n--- Surrounding Code (50 lines) ---\n{context}"
+            ));
+        }
+
        // Enrich with graph context if available
        if let Some(ctx) = graph_context {
            if let Some(impact) = ctx
@@ -69,32 +88,55 @@ pub async fn triage_findings(
            .await
        {
            Ok(response) => {
-                // Strip markdown code fences if present (e.g. ```json ... ```)
                let cleaned = response.trim();
                let cleaned = if cleaned.starts_with("```") {
-                    let inner = cleaned
+                    cleaned
                        .trim_start_matches("```json")
                        .trim_start_matches("```")
                        .trim_end_matches("```")
-                        .trim();
-                    inner
+                        .trim()
                } else {
                    cleaned
                };
                if let Ok(result) = serde_json::from_str::<TriageResult>(cleaned) {
-                    finding.confidence = Some(result.confidence);
+                    // Apply file-path confidence adjustment
+                    let adjusted_confidence =
+                        adjust_confidence(result.confidence, &file_classification);
+                    finding.confidence = Some(adjusted_confidence);
+                    finding.triage_action = Some(result.action.clone());
+                    finding.triage_rationale = Some(result.rationale);
+
                    if let Some(remediation) = result.remediation {
                        finding.remediation = Some(remediation);
                    }

-                    if result.confidence >= 3.0 {
-                        finding.status = FindingStatus::Triaged;
-                        passed += 1;
-                    } else {
-                        finding.status = FindingStatus::FalsePositive;
+                    match result.action.as_str() {
+                        "dismiss" => {
+                            finding.status = FindingStatus::FalsePositive;
+                        }
+                        "downgrade" => {
+                            // Downgrade severity by one level
+                            finding.severity = downgrade_severity(&finding.severity);
+                            finding.status = FindingStatus::Triaged;
+                            passed += 1;
+                        }
+                        "upgrade" => {
+                            finding.severity = upgrade_severity(&finding.severity);
+                            finding.status = FindingStatus::Triaged;
+                            passed += 1;
+                        }
+                        _ => {
+                            // "confirm" or unknown — keep as-is
+                            if adjusted_confidence >= 3.0 {
+                                finding.status = FindingStatus::Triaged;
+                                passed += 1;
+                            } else {
+                                finding.status = FindingStatus::FalsePositive;
+                            }
+                        }
                    }
                } else {
-                    // If LLM response doesn't parse, keep the finding
+                    // Parse failure — keep the finding
                    finding.status = FindingStatus::Triaged;
                    passed += 1;
                    tracing::warn!(
@@ -117,12 +159,122 @@ pub async fn triage_findings(
    passed
 }

+/// Read ~50 lines of surrounding code from the file at the finding's location
+fn read_surrounding_context(finding: &Finding) -> Option<String> {
+    let file_path = finding.file_path.as_deref()?;
+    let line = finding.line_number? as usize;
+
+    // Try to read the file — this works because the repo is cloned locally
+    let content = std::fs::read_to_string(file_path).ok()?;
+    let lines: Vec<&str> = content.lines().collect();
+
+    let start = line.saturating_sub(25);
+    let end = (line + 25).min(lines.len());
+
+    Some(
+        lines[start..end]
+            .iter()
+            .enumerate()
+            .map(|(i, l)| format!("{:>4} | {}", start + i + 1, l))
+            .collect::<Vec<_>>()
+            .join("\n"),
+    )
+}
+
+/// Classify a file path to inform triage confidence adjustment
+fn classify_file_path(path: Option<&str>) -> String {
+    let path = match path {
+        Some(p) => p.to_lowercase(),
+        None => return "unknown".to_string(),
+    };
+
+    if path.contains("/test/")
+        || path.contains("/tests/")
+        || path.contains("_test.")
+        || path.contains(".test.")
+        || path.contains(".spec.")
+        || path.contains("/fixtures/")
+        || path.contains("/testdata/")
+    {
+        return "test".to_string();
+    }
+
+    if path.contains("/example")
+        || path.contains("/examples/")
+        || path.contains("/demo/")
+        || path.contains("/sample")
+    {
+        return "example".to_string();
+    }
+
+    if path.contains("/generated/")
+        || path.contains("/gen/")
+        || path.contains(".generated.")
+        || path.contains(".pb.go")
+        || path.contains("_generated.rs")
+    {
+        return "generated".to_string();
+    }
+
+    if path.contains("/vendor/")
+        || path.contains("/node_modules/")
+        || path.contains("/third_party/")
+    {
+        return "vendored".to_string();
+    }
+
+    "production".to_string()
+}
+
+/// Adjust confidence based on file classification
+fn adjust_confidence(raw_confidence: f64, classification: &str) -> f64 {
+    let multiplier = match classification {
+        "test" => 0.5,
+        "example" => 0.6,
+        "generated" => 0.3,
+        "vendored" => 0.4,
+        _ => 1.0,
+    };
+    raw_confidence * multiplier
+}
+
+fn downgrade_severity(
+    severity: &compliance_core::models::Severity,
+) -> compliance_core::models::Severity {
+    use compliance_core::models::Severity;
+    match severity {
+        Severity::Critical => Severity::High,
+        Severity::High => Severity::Medium,
+        Severity::Medium => Severity::Low,
+        Severity::Low => Severity::Info,
+        Severity::Info => Severity::Info,
+    }
+}
+
+fn upgrade_severity(
+    severity: &compliance_core::models::Severity,
+) -> compliance_core::models::Severity {
+    use compliance_core::models::Severity;
+    match severity {
+        Severity::Info => Severity::Low,
+        Severity::Low => Severity::Medium,
+        Severity::Medium => Severity::High,
+        Severity::High => Severity::Critical,
+        Severity::Critical => Severity::Critical,
+    }
+}
+
 #[derive(serde::Deserialize)]
 struct TriageResult {
-    #[serde(default)]
-    #[allow(dead_code)]
-    true_positive: bool,
+    #[serde(default = "default_action")]
+    action: String,
    #[serde(default)]
    confidence: f64,
+    #[serde(default)]
+    rationale: String,
    remediation: Option<String>,
 }
+
+fn default_action() -> String {
+    "confirm".to_string()
+}
--- a/compliance-agent/src/main.rs
+++ b/compliance-agent/src/main.rs
@@ -7,6 +7,7 @@ mod llm;
 mod pipeline;
 mod rag;
 mod scheduler;
+mod ssh;
 #[allow(dead_code)]
 mod trackers;
 mod webhooks;
@@ -20,6 +21,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    tracing::info!("Loading configuration...");
    let config = config::load_config()?;

+    // Ensure SSH key pair exists for cloning private repos
+    match ssh::ensure_ssh_key(&config.ssh_key_path) {
+        Ok(pubkey) => tracing::info!("SSH public key: {}", pubkey.trim()),
+        Err(e) => tracing::warn!("SSH key generation skipped: {e}"),
+    }
+
    tracing::info!("Connecting to MongoDB...");
    let db = database::Database::connect(&config.mongodb_uri, &config.mongodb_database).await?;
    db.ensure_indexes().await?;
--- a/compliance-agent/src/pipeline/code_review.rs
+++ b/compliance-agent/src/pipeline/code_review.rs
@@ -0,0 +1,186 @@
+use std::path::Path;
+use std::sync::Arc;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::traits::ScanOutput;
+
+use crate::llm::review_prompts::REVIEW_PASSES;
+use crate::llm::LlmClient;
+use crate::pipeline::dedup;
+use crate::pipeline::git::{DiffFile, GitOps};
+
+pub struct CodeReviewScanner {
+    llm: Arc<LlmClient>,
+}
+
+impl CodeReviewScanner {
+    pub fn new(llm: Arc<LlmClient>) -> Self {
+        Self { llm }
+    }
+
+    /// Run multi-pass LLM code review on the diff between old and new commits.
+    pub async fn review_diff(
+        &self,
+        repo_path: &Path,
+        repo_id: &str,
+        old_sha: &str,
+        new_sha: &str,
+    ) -> ScanOutput {
+        let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
+            Ok(files) => files,
+            Err(e) => {
+                tracing::warn!("Failed to extract diff for code review: {e}");
+                return ScanOutput::default();
+            }
+        };
+
+        if diff_files.is_empty() {
+            return ScanOutput::default();
+        }
+
+        let mut all_findings = Vec::new();
+
+        // Chunk diff files into groups to avoid exceeding context limits
+        let chunks = chunk_diff_files(&diff_files, 8000);
+
+        for (pass_name, system_prompt) in REVIEW_PASSES {
+            for chunk in &chunks {
+                let user_prompt = format!(
+                    "Review the following code changes:\n\n{}",
+                    chunk
+                        .iter()
+                        .map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
+                        .collect::<Vec<_>>()
+                        .join("\n\n")
+                );
+
+                match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
+                    Ok(response) => {
+                        let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
+                        all_findings.extend(parsed);
+                    }
+                    Err(e) => {
+                        tracing::warn!("Code review pass '{pass_name}' failed: {e}");
+                    }
+                }
+            }
+        }
+
+        ScanOutput {
+            findings: all_findings,
+            sbom_entries: Vec::new(),
+        }
+    }
+}
+
+/// Group diff files into chunks that fit within a token budget (rough char estimate)
+fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
+    let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
+    let mut current_chunk: Vec<&DiffFile> = Vec::new();
+    let mut current_size = 0;
+
+    for file in files {
+        if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
+            chunks.push(std::mem::take(&mut current_chunk));
+            current_size = 0;
+        }
+        current_chunk.push(file);
+        current_size += file.hunks.len();
+    }
+
+    if !current_chunk.is_empty() {
+        chunks.push(current_chunk);
+    }
+
+    chunks
+}
+
+fn parse_review_response(
+    response: &str,
+    pass_name: &str,
+    repo_id: &str,
+    chunk: &[&DiffFile],
+) -> Vec<Finding> {
+    let cleaned = response.trim();
+    let cleaned = if cleaned.starts_with("```") {
+        cleaned
+            .trim_start_matches("```json")
+            .trim_start_matches("```")
+            .trim_end_matches("```")
+            .trim()
+    } else {
+        cleaned
+    };
+
+    let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
+        Ok(v) => v,
+        Err(_) => {
+            if cleaned != "[]" {
+                tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
+            }
+            return Vec::new();
+        }
+    };
+
+    issues
+        .into_iter()
+        .filter(|issue| {
+            // Verify the file exists in the diff chunk
+            chunk.iter().any(|f| f.path == issue.file)
+        })
+        .map(|issue| {
+            let severity = match issue.severity.as_str() {
+                "critical" => Severity::Critical,
+                "high" => Severity::High,
+                "medium" => Severity::Medium,
+                "low" => Severity::Low,
+                _ => Severity::Info,
+            };
+
+            let fingerprint = dedup::compute_fingerprint(&[
+                repo_id,
+                "code-review",
+                pass_name,
+                &issue.file,
+                &issue.line.to_string(),
+                &issue.title,
+            ]);
+
+            let description = if let Some(suggestion) = &issue.suggestion {
+                format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
+            } else {
+                issue.description.clone()
+            };
+
+            let mut finding = Finding::new(
+                repo_id.to_string(),
+                fingerprint,
+                format!("code-review/{pass_name}"),
+                ScanType::CodeReview,
+                issue.title,
+                description,
+                severity,
+            );
+            finding.rule_id = Some(format!("review/{pass_name}"));
+            finding.file_path = Some(issue.file);
+            finding.line_number = Some(issue.line);
+            finding.cwe = issue.cwe;
+            finding.suggested_fix = issue.suggestion;
+            finding
+        })
+        .collect()
+}
+
+#[derive(serde::Deserialize)]
+struct ReviewIssue {
+    title: String,
+    description: String,
+    severity: String,
+    file: String,
+    #[serde(default)]
+    line: u32,
+    #[serde(default)]
+    cwe: Option<String>,
+    #[serde(default)]
+    suggestion: Option<String>,
+}
--- a/compliance-agent/src/pipeline/cve.rs
+++ b/compliance-agent/src/pipeline/cve.rs
@@ -64,6 +64,8 @@ impl CveScanner {
    }

    async fn query_osv_batch(&self, entries: &[SbomEntry]) -> Result<Vec<Vec<OsvVuln>>, CoreError> {
+        const OSV_BATCH_SIZE: usize = 500;
+
        let queries: Vec<_> = entries
            .iter()
            .filter_map(|e| {
@@ -79,32 +81,34 @@ impl CveScanner {
            return Ok(Vec::new());
        }

-        let body = serde_json::json!({ "queries": queries });
+        let mut all_vulns: Vec<Vec<OsvVuln>> = Vec::with_capacity(queries.len());

-        let resp = self
-            .http
-            .post("https://api.osv.dev/v1/querybatch")
-            .json(&body)
-            .send()
-            .await
-            .map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;
+        for chunk in queries.chunks(OSV_BATCH_SIZE) {
+            let body = serde_json::json!({ "queries": chunk });

-        if !resp.status().is_success() {
-            let status = resp.status();
-            let body = resp.text().await.unwrap_or_default();
-            tracing::warn!("OSV.dev returned {status}: {body}");
-            return Ok(Vec::new());
-        }
+            let resp = self
+                .http
+                .post("https://api.osv.dev/v1/querybatch")
+                .json(&body)
+                .send()
+                .await
+                .map_err(|e| CoreError::Http(format!("OSV.dev request failed: {e}")))?;

-        let result: OsvBatchResponse = resp
-            .json()
-            .await
-            .map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
+            if !resp.status().is_success() {
+                let status = resp.status();
+                let body = resp.text().await.unwrap_or_default();
+                tracing::warn!("OSV.dev returned {status}: {body}");
+                // Push empty results for this chunk so indices stay aligned
+                all_vulns.extend(std::iter::repeat_with(Vec::new).take(chunk.len()));
+                continue;
+            }

-        let vulns = result
-            .results
-            .into_iter()
-            .map(|r| {
+            let result: OsvBatchResponse = resp
+                .json()
+                .await
+                .map_err(|e| CoreError::Http(format!("Failed to parse OSV.dev response: {e}")))?;
+
+            let chunk_vulns = result.results.into_iter().map(|r| {
                r.vulns
                    .unwrap_or_default()
                    .into_iter()
@@ -116,10 +120,12 @@ impl CveScanner {
                        }),
                    })
                    .collect()
-            })
-            .collect();
+            });

-        Ok(vulns)
+            all_vulns.extend(chunk_vulns);
+        }
+
+        Ok(all_vulns)
    }

    async fn query_nvd(&self, cve_id: &str) -> Result<Option<f64>, CoreError> {
--- a/compliance-agent/src/pipeline/git.rs
+++ b/compliance-agent/src/pipeline/git.rs
@@ -1,17 +1,80 @@
 use std::path::{Path, PathBuf};

-use git2::{FetchOptions, Repository};
+use git2::{Cred, FetchOptions, RemoteCallbacks, Repository};

 use crate::error::AgentError;

+/// Credentials for accessing a private repository
+#[derive(Debug, Clone, Default)]
+pub struct RepoCredentials {
+    /// Path to the SSH private key (for SSH URLs)
+    pub ssh_key_path: Option<String>,
+    /// Auth token / password (for HTTPS URLs)
+    pub auth_token: Option<String>,
+    /// Username for HTTPS auth (defaults to "x-access-token")
+    pub auth_username: Option<String>,
+}
+
+impl RepoCredentials {
+    pub(crate) fn make_callbacks(&self) -> RemoteCallbacks<'_> {
+        let mut callbacks = RemoteCallbacks::new();
+        let ssh_key = self.ssh_key_path.clone();
+        let token = self.auth_token.clone();
+        let username = self.auth_username.clone();
+
+        callbacks.credentials(move |_url, username_from_url, allowed_types| {
+            // SSH key authentication
+            if allowed_types.contains(git2::CredentialType::SSH_KEY) {
+                if let Some(ref key_path) = ssh_key {
+                    let key = Path::new(key_path);
+                    if key.exists() {
+                        let user = username_from_url.unwrap_or("git");
+                        return Cred::ssh_key(user, None, key, None);
+                    }
+                }
+            }
+
+            // HTTPS userpass authentication
+            if allowed_types.contains(git2::CredentialType::USER_PASS_PLAINTEXT) {
+                if let Some(ref tok) = token {
+                    let user = username.as_deref().unwrap_or("x-access-token");
+                    return Cred::userpass_plaintext(user, tok);
+                }
+            }
+
+            Cred::default()
+        });
+
+        callbacks
+    }
+
+    fn fetch_options(&self) -> FetchOptions<'_> {
+        let mut fetch_opts = FetchOptions::new();
+        if self.has_credentials() {
+            fetch_opts.remote_callbacks(self.make_callbacks());
+        }
+        fetch_opts
+    }
+
+    fn has_credentials(&self) -> bool {
+        self.ssh_key_path
+            .as_ref()
+            .map(|p| Path::new(p).exists())
+            .unwrap_or(false)
+            || self.auth_token.is_some()
+    }
+}
+
 pub struct GitOps {
    base_path: PathBuf,
+    credentials: RepoCredentials,
 }

 impl GitOps {
-    pub fn new(base_path: &str) -> Self {
+    pub fn new(base_path: &str, credentials: RepoCredentials) -> Self {
        Self {
            base_path: PathBuf::from(base_path),
+            credentials,
        }
    }

@@ -22,17 +85,25 @@ impl GitOps {
            self.fetch(&repo_path)?;
        } else {
            std::fs::create_dir_all(&repo_path)?;
-            Repository::clone(git_url, &repo_path)?;
+            self.clone_repo(git_url, &repo_path)?;
            tracing::info!("Cloned {git_url} to {}", repo_path.display());
        }

        Ok(repo_path)
    }

+    fn clone_repo(&self, git_url: &str, repo_path: &Path) -> Result<(), AgentError> {
+        let mut builder = git2::build::RepoBuilder::new();
+        let fetch_opts = self.credentials.fetch_options();
+        builder.fetch_options(fetch_opts);
+        builder.clone(git_url, repo_path)?;
+        Ok(())
+    }
+
    fn fetch(&self, repo_path: &Path) -> Result<(), AgentError> {
        let repo = Repository::open(repo_path)?;
        let mut remote = repo.find_remote("origin")?;
-        let mut fetch_opts = FetchOptions::new();
+        let mut fetch_opts = self.credentials.fetch_options();
        remote.fetch(&[] as &[&str], Some(&mut fetch_opts), None)?;

        // Fast-forward to origin/HEAD
@@ -48,6 +119,15 @@ impl GitOps {
        Ok(())
    }

+    /// Test that we can access a remote repository (used during add validation)
+    pub fn test_access(git_url: &str, credentials: &RepoCredentials) -> Result<(), AgentError> {
+        let mut remote = git2::Remote::create_detached(git_url)?;
+        let callbacks = credentials.make_callbacks();
+        remote.connect_auth(git2::Direction::Fetch, Some(callbacks), None)?;
+        remote.disconnect()?;
+        Ok(())
+    }
+
    pub fn get_head_sha(repo_path: &Path) -> Result<String, AgentError> {
        let repo = Repository::open(repo_path)?;
        let head = repo.head()?;
@@ -63,6 +143,62 @@ impl GitOps {
        }
    }

+    /// Extract structured diff content between two commits
+    pub fn get_diff_content(
+        repo_path: &Path,
+        old_sha: &str,
+        new_sha: &str,
+    ) -> Result<Vec<DiffFile>, AgentError> {
+        let repo = Repository::open(repo_path)?;
+        let old_commit = repo.find_commit(git2::Oid::from_str(old_sha)?)?;
+        let new_commit = repo.find_commit(git2::Oid::from_str(new_sha)?)?;
+
+        let old_tree = old_commit.tree()?;
+        let new_tree = new_commit.tree()?;
+
+        let diff = repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), None)?;
+
+        let mut diff_files: Vec<DiffFile> = Vec::new();
+
+        diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| {
+            let file_path = delta
+                .new_file()
+                .path()
+                .map(|p| p.to_string_lossy().to_string())
+                .unwrap_or_default();
+
+            // Find or create the DiffFile entry
+            let idx = if let Some(pos) = diff_files.iter().position(|f| f.path == file_path) {
+                pos
+            } else {
+                diff_files.push(DiffFile {
+                    path: file_path,
+                    hunks: String::new(),
+                });
+                diff_files.len() - 1
+            };
+            let diff_file = &mut diff_files[idx];
+
+            let prefix = match line.origin() {
+                '+' => "+",
+                '-' => "-",
+                ' ' => " ",
+                _ => "",
+            };
+
+            let content = std::str::from_utf8(line.content()).unwrap_or("");
+            diff_file.hunks.push_str(prefix);
+            diff_file.hunks.push_str(content);
+
+            true
+        })?;
+
+        // Filter out binary files and very large diffs
+        diff_files.retain(|f| !f.hunks.is_empty() && f.hunks.len() < 50_000);
+
+        Ok(diff_files)
+    }
+
    #[allow(dead_code)]
    pub fn get_changed_files(
        repo_path: &Path,
@@ -94,3 +230,10 @@ impl GitOps {
        Ok(files)
    }
 }
+
+/// A file changed between two commits with its diff content
+#[derive(Debug, Clone)]
+pub struct DiffFile {
+    pub path: String,
+    pub hunks: String,
+}
--- a/compliance-agent/src/pipeline/gitleaks.rs
+++ b/compliance-agent/src/pipeline/gitleaks.rs
@@ -0,0 +1,130 @@
+use std::path::Path;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::traits::{ScanOutput, Scanner};
+use compliance_core::CoreError;
+
+use crate::pipeline::dedup;
+
+pub struct GitleaksScanner;
+
+impl Scanner for GitleaksScanner {
+    fn name(&self) -> &str {
+        "gitleaks"
+    }
+
+    fn scan_type(&self) -> ScanType {
+        ScanType::SecretDetection
+    }
+
+    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
+        let output = tokio::process::Command::new("gitleaks")
+            .args([
+                "detect",
+                "--source",
+                ".",
+                "--report-format",
+                "json",
+                "--report-path",
+                "/dev/stdout",
+                "--no-banner",
+                "--exit-code",
+                "0",
+            ])
+            .current_dir(repo_path)
+            .output()
+            .await
+            .map_err(|e| CoreError::Scanner {
+                scanner: "gitleaks".to_string(),
+                source: Box::new(e),
+            })?;
+
+        if output.stdout.is_empty() {
+            return Ok(ScanOutput::default());
+        }
+
+        let results: Vec<GitleaksResult> =
+            serde_json::from_slice(&output.stdout).unwrap_or_default();
+
+        let findings = results
+            .into_iter()
+            .filter(|r| !is_allowlisted(&r.file))
+            .map(|r| {
+                let severity = match r.rule_id.as_str() {
+                    s if s.contains("private-key") => Severity::Critical,
+                    s if s.contains("token") || s.contains("password") || s.contains("secret") => {
+                        Severity::High
+                    }
+                    s if s.contains("api-key") => Severity::High,
+                    _ => Severity::Medium,
+                };
+
+                let fingerprint = dedup::compute_fingerprint(&[
+                    repo_id,
+                    &r.rule_id,
+                    &r.file,
+                    &r.start_line.to_string(),
+                ]);
+
+                let title = format!("Secret detected: {}", r.description);
+                let description = format!(
+                    "Potential secret ({}) found in {}:{}. Match: {}",
+                    r.rule_id,
+                    r.file,
+                    r.start_line,
+                    r.r#match.chars().take(80).collect::<String>(),
+                );
+
+                let mut finding = Finding::new(
+                    repo_id.to_string(),
+                    fingerprint,
+                    "gitleaks".to_string(),
+                    ScanType::SecretDetection,
+                    title,
+                    description,
+                    severity,
+                );
+                finding.rule_id = Some(r.rule_id);
+                finding.file_path = Some(r.file);
+                finding.line_number = Some(r.start_line);
+                finding.code_snippet = Some(r.r#match);
+                finding
+            })
+            .collect();
+
+        Ok(ScanOutput {
+            findings,
+            sbom_entries: Vec::new(),
+        })
+    }
+}
+
+/// Skip files that commonly contain example/placeholder secrets
+fn is_allowlisted(file_path: &str) -> bool {
+    let lower = file_path.to_lowercase();
+    lower.ends_with(".env.example")
+        || lower.ends_with(".env.sample")
+        || lower.ends_with(".env.template")
+        || lower.contains("/test/")
+        || lower.contains("/tests/")
+        || lower.contains("/fixtures/")
+        || lower.contains("/testdata/")
+        || lower.contains("mock")
+        || lower.ends_with("_test.go")
+        || lower.ends_with(".test.ts")
+        || lower.ends_with(".test.js")
+        || lower.ends_with(".spec.ts")
+        || lower.ends_with(".spec.js")
+}
+
+#[derive(serde::Deserialize)]
+#[serde(rename_all = "PascalCase")]
+struct GitleaksResult {
+    description: String,
+    #[serde(rename = "RuleID")]
+    rule_id: String,
+    file: String,
+    start_line: u32,
+    #[serde(rename = "Match")]
+    r#match: String,
+}
--- a/compliance-agent/src/pipeline/lint.rs
+++ b/compliance-agent/src/pipeline/lint.rs
@@ -0,0 +1,364 @@
+use std::path::Path;
+use std::time::Duration;
+
+use compliance_core::models::{Finding, ScanType, Severity};
+use compliance_core::traits::{ScanOutput, Scanner};
+use compliance_core::CoreError;
+use tokio::process::Command;
+
+use crate::pipeline::dedup;
+
+/// Timeout for each individual lint command
+const LINT_TIMEOUT: Duration = Duration::from_secs(120);
+
+pub struct LintScanner;
+
+impl Scanner for LintScanner {
+    fn name(&self) -> &str {
+        "lint"
+    }
+
+    fn scan_type(&self) -> ScanType {
+        ScanType::Lint
+    }
+
+    async fn scan(&self, repo_path: &Path, repo_id: &str) -> Result<ScanOutput, CoreError> {
+        let mut all_findings = Vec::new();
+
+        // Detect which languages are present and run appropriate linters
+        if has_rust_project(repo_path) {
+            match run_clippy(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("Clippy failed: {e}"),
+            }
+        }
+
+        if has_js_project(repo_path) {
+            match run_eslint(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("ESLint failed: {e}"),
+            }
+        }
+
+        if has_python_project(repo_path) {
+            match run_ruff(repo_path, repo_id).await {
+                Ok(findings) => all_findings.extend(findings),
+                Err(e) => tracing::warn!("Ruff failed: {e}"),
+            }
+        }
+
+        Ok(ScanOutput {
+            findings: all_findings,
+            sbom_entries: Vec::new(),
+        })
+    }
+}
+
+fn has_rust_project(repo_path: &Path) -> bool {
+    repo_path.join("Cargo.toml").exists()
+}
+
+fn has_js_project(repo_path: &Path) -> bool {
+    // Only run if eslint is actually installed in the project
+    repo_path.join("package.json").exists() && repo_path.join("node_modules/.bin/eslint").exists()
+}
+
+fn has_python_project(repo_path: &Path) -> bool {
+    repo_path.join("pyproject.toml").exists()
+        || repo_path.join("setup.py").exists()
+        || repo_path.join("requirements.txt").exists()
+}
+
+/// Run a command with a timeout, returning its output or an error
+async fn run_with_timeout(
+    child: tokio::process::Child,
+    scanner_name: &str,
+) -> Result<std::process::Output, CoreError> {
+    let result = tokio::time::timeout(LINT_TIMEOUT, child.wait_with_output()).await;
+    match result {
+        Ok(Ok(output)) => Ok(output),
+        Ok(Err(e)) => Err(CoreError::Scanner {
+            scanner: scanner_name.to_string(),
+            source: Box::new(e),
+        }),
+        Err(_) => {
+            // Process is dropped here which sends SIGKILL on Unix
+            Err(CoreError::Scanner {
+                scanner: scanner_name.to_string(),
+                source: Box::new(std::io::Error::new(
+                    std::io::ErrorKind::TimedOut,
+                    format!("{scanner_name} timed out after {}s", LINT_TIMEOUT.as_secs()),
+                )),
+            })
+        }
+    }
+}
+
+// ── Clippy ──────────────────────────────────────────────
+
+async fn run_clippy(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    let child = Command::new("cargo")
+        .args([
+            "clippy",
+            "--message-format=json",
+            "--quiet",
+            "--",
+            "-W",
+            "clippy::all",
+        ])
+        .current_dir(repo_path)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "clippy".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "clippy").await?;
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let mut findings = Vec::new();
+
+    for line in stdout.lines() {
+        let msg: serde_json::Value = match serde_json::from_str(line) {
+            Ok(v) => v,
+            Err(_) => continue,
+        };
+
+        if msg.get("reason").and_then(|v| v.as_str()) != Some("compiler-message") {
+            continue;
+        }
+
+        let message = match msg.get("message") {
+            Some(m) => m,
+            None => continue,
+        };
+
+        let level = message.get("level").and_then(|v| v.as_str()).unwrap_or("");
+
+        if level != "warning" && level != "error" {
+            continue;
+        }
+
+        let text = message
+            .get("message")
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+
+        let code = message
+            .get("code")
+            .and_then(|v| v.get("code"))
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+
+        if text.starts_with("aborting due to") || code.is_empty() {
+            continue;
+        }
+
+        let (file_path, line_number) = extract_primary_span(message);
+
+        let severity = if level == "error" {
+            Severity::High
+        } else {
+            Severity::Low
+        };
+
+        let fingerprint = dedup::compute_fingerprint(&[
+            repo_id,
+            "clippy",
+            &code,
+            &file_path,
+            &line_number.to_string(),
+        ]);
+
+        let mut finding = Finding::new(
+            repo_id.to_string(),
+            fingerprint,
+            "clippy".to_string(),
+            ScanType::Lint,
+            format!("[clippy] {text}"),
+            text,
+            severity,
+        );
+        finding.rule_id = Some(code);
+        if !file_path.is_empty() {
+            finding.file_path = Some(file_path);
+        }
+        if line_number > 0 {
+            finding.line_number = Some(line_number);
+        }
+        findings.push(finding);
+    }
+
+    Ok(findings)
+}
+
+fn extract_primary_span(message: &serde_json::Value) -> (String, u32) {
+    let spans = match message.get("spans").and_then(|v| v.as_array()) {
+        Some(s) => s,
+        None => return (String::new(), 0),
+    };
+
+    for span in spans {
+        if span.get("is_primary").and_then(|v| v.as_bool()) == Some(true) {
+            let file = span
+                .get("file_name")
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .to_string();
+            let line = span.get("line_start").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
+            return (file, line);
+        }
+    }
+
+    (String::new(), 0)
+}
+
+// ── ESLint ──────────────────────────────────────────────
+
+async fn run_eslint(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    // Use the project-local eslint binary directly, not npx (which can hang downloading)
+    let eslint_bin = repo_path.join("node_modules/.bin/eslint");
+    let child = Command::new(eslint_bin)
+        .args([".", "--format", "json", "--no-error-on-unmatched-pattern"])
+        .current_dir(repo_path)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "eslint".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "eslint").await?;
+
+    if output.stdout.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let results: Vec<EslintFileResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
+
+    let mut findings = Vec::new();
+    for file_result in results {
+        for msg in file_result.messages {
+            let severity = match msg.severity {
+                2 => Severity::Medium,
+                _ => Severity::Low,
+            };
+
+            let rule_id = msg.rule_id.unwrap_or_default();
+            let fingerprint = dedup::compute_fingerprint(&[
+                repo_id,
+                "eslint",
+                &rule_id,
+                &file_result.file_path,
+                &msg.line.to_string(),
+            ]);
+
+            let mut finding = Finding::new(
+                repo_id.to_string(),
+                fingerprint,
+                "eslint".to_string(),
+                ScanType::Lint,
+                format!("[eslint] {}", msg.message),
+                msg.message,
+                severity,
+            );
+            finding.rule_id = Some(rule_id);
+            finding.file_path = Some(file_result.file_path.clone());
+            finding.line_number = Some(msg.line);
+            findings.push(finding);
+        }
+    }
+
+    Ok(findings)
+}
+
+#[derive(serde::Deserialize)]
+struct EslintFileResult {
+    #[serde(rename = "filePath")]
+    file_path: String,
+    messages: Vec<EslintMessage>,
+}
+
+#[derive(serde::Deserialize)]
+struct EslintMessage {
+    #[serde(rename = "ruleId")]
+    rule_id: Option<String>,
+    severity: u8,
+    message: String,
+    line: u32,
+}
+
+// ── Ruff ────────────────────────────────────────────────
+
+async fn run_ruff(repo_path: &Path, repo_id: &str) -> Result<Vec<Finding>, CoreError> {
+    let child = Command::new("ruff")
+        .args(["check", ".", "--output-format", "json", "--exit-zero"])
+        .current_dir(repo_path)
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::piped())
+        .spawn()
+        .map_err(|e| CoreError::Scanner {
+            scanner: "ruff".to_string(),
+            source: Box::new(e),
+        })?;
+
+    let output = run_with_timeout(child, "ruff").await?;
+
+    if output.stdout.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let results: Vec<RuffResult> = serde_json::from_slice(&output.stdout).unwrap_or_default();
+
+    let findings = results
+        .into_iter()
+        .map(|r| {
+            let severity = if r.code.starts_with('E') || r.code.starts_with('F') {
+                Severity::Medium
+            } else {
+                Severity::Low
+            };
+
+            let fingerprint = dedup::compute_fingerprint(&[
+                repo_id,
+                "ruff",
+                &r.code,
+                &r.filename,
+                &r.location.row.to_string(),
+            ]);
+
+            let mut finding = Finding::new(
+                repo_id.to_string(),
+                fingerprint,
+                "ruff".to_string(),
+                ScanType::Lint,
+                format!("[ruff] {}: {}", r.code, r.message),
+                r.message,
+                severity,
+            );
+            finding.rule_id = Some(r.code);
+            finding.file_path = Some(r.filename);
+            finding.line_number = Some(r.location.row);
+            finding
+        })
+        .collect();
+
+    Ok(findings)
+}
+
+#[derive(serde::Deserialize)]
+struct RuffResult {
+    code: String,
+    message: String,
+    filename: String,
+    location: RuffLocation,
+}
+
+#[derive(serde::Deserialize)]
+struct RuffLocation {
+    row: u32,
+}
--- a/compliance-agent/src/pipeline/mod.rs
+++ b/compliance-agent/src/pipeline/mod.rs
@@ -1,6 +1,9 @@
+pub mod code_review;
 pub mod cve;
 pub mod dedup;
 pub mod git;
+pub mod gitleaks;
+pub mod lint;
 pub mod orchestrator;
 pub mod patterns;
 pub mod sbom;
--- a/compliance-agent/src/pipeline/orchestrator.rs
+++ b/compliance-agent/src/pipeline/orchestrator.rs
@@ -9,8 +9,11 @@ use compliance_core::AgentConfig;
 use crate::database::Database;
 use crate::error::AgentError;
 use crate::llm::LlmClient;
+use crate::pipeline::code_review::CodeReviewScanner;
 use crate::pipeline::cve::CveScanner;
-use crate::pipeline::git::GitOps;
+use crate::pipeline::git::{GitOps, RepoCredentials};
+use crate::pipeline::gitleaks::GitleaksScanner;
+use crate::pipeline::lint::LintScanner;
 use crate::pipeline::patterns::{GdprPatternScanner, OAuthPatternScanner};
 use crate::pipeline::sbom::SbomScanner;
 use crate::pipeline::semgrep::SemgrepScanner;
@@ -114,7 +117,12 @@ impl PipelineOrchestrator {

        // Stage 0: Change detection
        tracing::info!("[{repo_id}] Stage 0: Change detection");
-        let git_ops = GitOps::new(&self.config.git_clone_base_path);
+        let creds = RepoCredentials {
+            ssh_key_path: Some(self.config.ssh_key_path.clone()),
+            auth_token: repo.auth_token.clone(),
+            auth_username: repo.auth_username.clone(),
+        };
+        let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
        let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;

        if !GitOps::has_new_commits(&repo_path, repo.last_scanned_commit.as_deref())? {
@@ -182,6 +190,35 @@ impl PipelineOrchestrator {
            Err(e) => tracing::warn!("[{repo_id}] OAuth pattern scan failed: {e}"),
        }

+        // Stage 4a: Secret Detection (Gitleaks)
+        tracing::info!("[{repo_id}] Stage 4a: Secret Detection");
+        self.update_phase(scan_run_id, "secret_detection").await;
+        let gitleaks = GitleaksScanner;
+        match gitleaks.scan(&repo_path, &repo_id).await {
+            Ok(output) => all_findings.extend(output.findings),
+            Err(e) => tracing::warn!("[{repo_id}] Gitleaks failed: {e}"),
+        }
+
+        // Stage 4b: Lint Scanning
+        tracing::info!("[{repo_id}] Stage 4b: Lint Scanning");
+        self.update_phase(scan_run_id, "lint_scanning").await;
+        let lint = LintScanner;
+        match lint.scan(&repo_path, &repo_id).await {
+            Ok(output) => all_findings.extend(output.findings),
+            Err(e) => tracing::warn!("[{repo_id}] Lint scanning failed: {e}"),
+        }
+
+        // Stage 4c: LLM Code Review (only on incremental scans)
+        if let Some(old_sha) = &repo.last_scanned_commit {
+            tracing::info!("[{repo_id}] Stage 4c: LLM Code Review");
+            self.update_phase(scan_run_id, "code_review").await;
+            let reviewer = CodeReviewScanner::new(self.llm.clone());
+            let review_output = reviewer
+                .review_diff(&repo_path, &repo_id, old_sha, &current_sha)
+                .await;
+            all_findings.extend(review_output.findings);
+        }
+
        // Stage 4.5: Graph Building
        tracing::info!("[{repo_id}] Stage 4.5: Graph Building");
        self.update_phase(scan_run_id, "graph_building").await;
--- a/compliance-agent/src/ssh.rs
+++ b/compliance-agent/src/ssh.rs
@@ -0,0 +1,53 @@
+use std::path::Path;
+
+use crate::error::AgentError;
+
+/// Ensure the SSH key pair exists at the given path, generating it if missing.
+/// Returns the public key contents.
+pub fn ensure_ssh_key(key_path: &str) -> Result<String, AgentError> {
+    let private_path = Path::new(key_path);
+    let public_path = private_path.with_extension("pub");
+
+    if private_path.exists() && public_path.exists() {
+        return std::fs::read_to_string(&public_path)
+            .map_err(|e| AgentError::Config(format!("Failed to read SSH public key: {e}")));
+    }
+
+    // Create parent directory
+    if let Some(parent) = private_path.parent() {
+        std::fs::create_dir_all(parent)?;
+    }
+
+    // Generate ed25519 key pair using ssh-keygen
+    let output = std::process::Command::new("ssh-keygen")
+        .args([
+            "-t",
+            "ed25519",
+            "-f",
+            key_path,
+            "-N",
+            "", // no passphrase
+            "-C",
+            "compliance-scanner-agent",
+        ])
+        .output()
+        .map_err(|e| AgentError::Config(format!("Failed to run ssh-keygen: {e}")))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(AgentError::Config(format!("ssh-keygen failed: {stderr}")));
+    }
+
+    // Set correct permissions
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        std::fs::set_permissions(private_path, std::fs::Permissions::from_mode(0o600))?;
+    }
+
+    let public_key = std::fs::read_to_string(&public_path)
+        .map_err(|e| AgentError::Config(format!("Failed to read generated SSH public key: {e}")))?;
+
+    tracing::info!("Generated new SSH key pair at {key_path}");
+    Ok(public_key)
+}