Some checks failed
CI / Deploy Agent (push) Has been skipped
CI / Deploy Dashboard (push) Has been skipped
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Format (push) Failing after 3s
CI / Clippy (push) Failing after 2m44s
CI / Security Audit (push) Has been skipped
CI / Tests (push) Has been skipped
CI / Format (pull_request) Failing after 3s
CI / Clippy (pull_request) Failing after 2m51s
CI / Security Audit (pull_request) Has been skipped
CI / Tests (pull_request) Has been skipped
CI / Detect Changes (push) Has been skipped
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
- Add gitleaks secret detection, lint scanning (clippy/eslint/ruff), and LLM code review scanners - Enhance LLM triage with multi-action support (confirm/downgrade/upgrade/dismiss), surrounding code context, and file-path classification confidence adjustment - Add text search, column sorting, and bulk status update to findings dashboard - Fix finding detail page status refresh and add developer feedback field - Fix BSON DateTime deserialization across all models with shared serde helpers - Add scan progress spinner with polling to repositories page - Batch OSV.dev queries to avoid "Too many queries" errors - Add gitleaks, semgrep, and ruff to Dockerfile.agent for deployment Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
187 lines
5.4 KiB
Rust
187 lines
5.4 KiB
Rust
use std::path::Path;
|
|
use std::sync::Arc;
|
|
|
|
use compliance_core::models::{Finding, ScanType, Severity};
|
|
use compliance_core::traits::ScanOutput;
|
|
|
|
use crate::llm::review_prompts::REVIEW_PASSES;
|
|
use crate::llm::LlmClient;
|
|
use crate::pipeline::dedup;
|
|
use crate::pipeline::git::{DiffFile, GitOps};
|
|
|
|
pub struct CodeReviewScanner {
|
|
llm: Arc<LlmClient>,
|
|
}
|
|
|
|
impl CodeReviewScanner {
|
|
pub fn new(llm: Arc<LlmClient>) -> Self {
|
|
Self { llm }
|
|
}
|
|
|
|
/// Run multi-pass LLM code review on the diff between old and new commits.
|
|
pub async fn review_diff(
|
|
&self,
|
|
repo_path: &Path,
|
|
repo_id: &str,
|
|
old_sha: &str,
|
|
new_sha: &str,
|
|
) -> ScanOutput {
|
|
let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
|
|
Ok(files) => files,
|
|
Err(e) => {
|
|
tracing::warn!("Failed to extract diff for code review: {e}");
|
|
return ScanOutput::default();
|
|
}
|
|
};
|
|
|
|
if diff_files.is_empty() {
|
|
return ScanOutput::default();
|
|
}
|
|
|
|
let mut all_findings = Vec::new();
|
|
|
|
// Chunk diff files into groups to avoid exceeding context limits
|
|
let chunks = chunk_diff_files(&diff_files, 8000);
|
|
|
|
for (pass_name, system_prompt) in REVIEW_PASSES {
|
|
for chunk in &chunks {
|
|
let user_prompt = format!(
|
|
"Review the following code changes:\n\n{}",
|
|
chunk
|
|
.iter()
|
|
.map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
|
|
.collect::<Vec<_>>()
|
|
.join("\n\n")
|
|
);
|
|
|
|
match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
|
|
Ok(response) => {
|
|
let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
|
|
all_findings.extend(parsed);
|
|
}
|
|
Err(e) => {
|
|
tracing::warn!("Code review pass '{pass_name}' failed: {e}");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
ScanOutput {
|
|
findings: all_findings,
|
|
sbom_entries: Vec::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Group diff files into chunks that fit within a token budget (rough char estimate)
|
|
fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
|
|
let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
|
|
let mut current_chunk: Vec<&DiffFile> = Vec::new();
|
|
let mut current_size = 0;
|
|
|
|
for file in files {
|
|
if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
|
|
chunks.push(std::mem::take(&mut current_chunk));
|
|
current_size = 0;
|
|
}
|
|
current_chunk.push(file);
|
|
current_size += file.hunks.len();
|
|
}
|
|
|
|
if !current_chunk.is_empty() {
|
|
chunks.push(current_chunk);
|
|
}
|
|
|
|
chunks
|
|
}
|
|
|
|
fn parse_review_response(
|
|
response: &str,
|
|
pass_name: &str,
|
|
repo_id: &str,
|
|
chunk: &[&DiffFile],
|
|
) -> Vec<Finding> {
|
|
let cleaned = response.trim();
|
|
let cleaned = if cleaned.starts_with("```") {
|
|
cleaned
|
|
.trim_start_matches("```json")
|
|
.trim_start_matches("```")
|
|
.trim_end_matches("```")
|
|
.trim()
|
|
} else {
|
|
cleaned
|
|
};
|
|
|
|
let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
|
|
Ok(v) => v,
|
|
Err(_) => {
|
|
if cleaned != "[]" {
|
|
tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
|
|
}
|
|
return Vec::new();
|
|
}
|
|
};
|
|
|
|
issues
|
|
.into_iter()
|
|
.filter(|issue| {
|
|
// Verify the file exists in the diff chunk
|
|
chunk.iter().any(|f| f.path == issue.file)
|
|
})
|
|
.map(|issue| {
|
|
let severity = match issue.severity.as_str() {
|
|
"critical" => Severity::Critical,
|
|
"high" => Severity::High,
|
|
"medium" => Severity::Medium,
|
|
"low" => Severity::Low,
|
|
_ => Severity::Info,
|
|
};
|
|
|
|
let fingerprint = dedup::compute_fingerprint(&[
|
|
repo_id,
|
|
"code-review",
|
|
pass_name,
|
|
&issue.file,
|
|
&issue.line.to_string(),
|
|
&issue.title,
|
|
]);
|
|
|
|
let description = if let Some(suggestion) = &issue.suggestion {
|
|
format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
|
|
} else {
|
|
issue.description.clone()
|
|
};
|
|
|
|
let mut finding = Finding::new(
|
|
repo_id.to_string(),
|
|
fingerprint,
|
|
format!("code-review/{pass_name}"),
|
|
ScanType::CodeReview,
|
|
issue.title,
|
|
description,
|
|
severity,
|
|
);
|
|
finding.rule_id = Some(format!("review/{pass_name}"));
|
|
finding.file_path = Some(issue.file);
|
|
finding.line_number = Some(issue.line);
|
|
finding.cwe = issue.cwe;
|
|
finding.suggested_fix = issue.suggestion;
|
|
finding
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
#[derive(serde::Deserialize)]
|
|
struct ReviewIssue {
|
|
title: String,
|
|
description: String,
|
|
severity: String,
|
|
file: String,
|
|
#[serde(default)]
|
|
line: u32,
|
|
#[serde(default)]
|
|
cwe: Option<String>,
|
|
#[serde(default)]
|
|
suggestion: Option<String>,
|
|
}
|