Files
compliance-scanner-agent/compliance-agent/src/pipeline/code_review.rs
Sharang Parnerkar 23ba52276b
Some checks failed
CI / Deploy Agent (push) Has been skipped
CI / Deploy Dashboard (push) Has been skipped
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Format (push) Failing after 3s
CI / Clippy (push) Failing after 2m44s
CI / Security Audit (push) Has been skipped
CI / Tests (push) Has been skipped
CI / Format (pull_request) Failing after 3s
CI / Clippy (pull_request) Failing after 2m51s
CI / Security Audit (pull_request) Has been skipped
CI / Tests (pull_request) Has been skipped
CI / Detect Changes (push) Has been skipped
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
feat: add new scanners, enhanced triage, findings refinement, and deployment tooling
- Add gitleaks secret detection, lint scanning (clippy/eslint/ruff), and LLM code review scanners
- Enhance LLM triage with multi-action support (confirm/downgrade/upgrade/dismiss),
  surrounding code context, and file-path classification confidence adjustment
- Add text search, column sorting, and bulk status update to findings dashboard
- Fix finding detail page status refresh and add developer feedback field
- Fix BSON DateTime deserialization across all models with shared serde helpers
- Add scan progress spinner with polling to repositories page
- Batch OSV.dev queries to avoid "Too many queries" errors
- Add gitleaks, semgrep, and ruff to Dockerfile.agent for deployment

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 11:05:31 +01:00

187 lines
5.4 KiB
Rust

use std::path::Path;
use std::sync::Arc;
use compliance_core::models::{Finding, ScanType, Severity};
use compliance_core::traits::ScanOutput;
use crate::llm::review_prompts::REVIEW_PASSES;
use crate::llm::LlmClient;
use crate::pipeline::dedup;
use crate::pipeline::git::{DiffFile, GitOps};
pub struct CodeReviewScanner {
llm: Arc<LlmClient>,
}
impl CodeReviewScanner {
pub fn new(llm: Arc<LlmClient>) -> Self {
Self { llm }
}
/// Run multi-pass LLM code review on the diff between old and new commits.
pub async fn review_diff(
&self,
repo_path: &Path,
repo_id: &str,
old_sha: &str,
new_sha: &str,
) -> ScanOutput {
let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
Ok(files) => files,
Err(e) => {
tracing::warn!("Failed to extract diff for code review: {e}");
return ScanOutput::default();
}
};
if diff_files.is_empty() {
return ScanOutput::default();
}
let mut all_findings = Vec::new();
// Chunk diff files into groups to avoid exceeding context limits
let chunks = chunk_diff_files(&diff_files, 8000);
for (pass_name, system_prompt) in REVIEW_PASSES {
for chunk in &chunks {
let user_prompt = format!(
"Review the following code changes:\n\n{}",
chunk
.iter()
.map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
.collect::<Vec<_>>()
.join("\n\n")
);
match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
Ok(response) => {
let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
all_findings.extend(parsed);
}
Err(e) => {
tracing::warn!("Code review pass '{pass_name}' failed: {e}");
}
}
}
}
ScanOutput {
findings: all_findings,
sbom_entries: Vec::new(),
}
}
}
/// Group diff files into chunks that fit within a token budget (rough char estimate)
fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
let mut current_chunk: Vec<&DiffFile> = Vec::new();
let mut current_size = 0;
for file in files {
if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
chunks.push(std::mem::take(&mut current_chunk));
current_size = 0;
}
current_chunk.push(file);
current_size += file.hunks.len();
}
if !current_chunk.is_empty() {
chunks.push(current_chunk);
}
chunks
}
fn parse_review_response(
response: &str,
pass_name: &str,
repo_id: &str,
chunk: &[&DiffFile],
) -> Vec<Finding> {
let cleaned = response.trim();
let cleaned = if cleaned.starts_with("```") {
cleaned
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim()
} else {
cleaned
};
let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
Ok(v) => v,
Err(_) => {
if cleaned != "[]" {
tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
}
return Vec::new();
}
};
issues
.into_iter()
.filter(|issue| {
// Verify the file exists in the diff chunk
chunk.iter().any(|f| f.path == issue.file)
})
.map(|issue| {
let severity = match issue.severity.as_str() {
"critical" => Severity::Critical,
"high" => Severity::High,
"medium" => Severity::Medium,
"low" => Severity::Low,
_ => Severity::Info,
};
let fingerprint = dedup::compute_fingerprint(&[
repo_id,
"code-review",
pass_name,
&issue.file,
&issue.line.to_string(),
&issue.title,
]);
let description = if let Some(suggestion) = &issue.suggestion {
format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
} else {
issue.description.clone()
};
let mut finding = Finding::new(
repo_id.to_string(),
fingerprint,
format!("code-review/{pass_name}"),
ScanType::CodeReview,
issue.title,
description,
severity,
);
finding.rule_id = Some(format!("review/{pass_name}"));
finding.file_path = Some(issue.file);
finding.line_number = Some(issue.line);
finding.cwe = issue.cwe;
finding.suggested_fix = issue.suggestion;
finding
})
.collect()
}
#[derive(serde::Deserialize)]
struct ReviewIssue {
title: String,
description: String,
severity: String,
file: String,
#[serde(default)]
line: u32,
#[serde(default)]
cwe: Option<String>,
#[serde(default)]
suggestion: Option<String>,
}