feat: findings refinement, new scanners, and deployment tooling (#6)
Some checks failed
CI / Format (push) Successful in 3s
CI / Clippy (push) Successful in 4m3s
CI / Security Audit (push) Successful in 1m38s
CI / Tests (push) Successful in 4m44s
CI / Detect Changes (push) Successful in 2s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Failing after 2s
Some checks failed
CI / Format (push) Successful in 3s
CI / Clippy (push) Successful in 4m3s
CI / Security Audit (push) Successful in 1m38s
CI / Tests (push) Successful in 4m44s
CI / Detect Changes (push) Successful in 2s
CI / Deploy Agent (push) Successful in 2s
CI / Deploy Dashboard (push) Successful in 2s
CI / Deploy Docs (push) Has been skipped
CI / Deploy MCP (push) Failing after 2s
This commit was merged in pull request #6.
This commit is contained in:
186
compliance-agent/src/pipeline/code_review.rs
Normal file
186
compliance-agent/src/pipeline/code_review.rs
Normal file
@@ -0,0 +1,186 @@
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use compliance_core::models::{Finding, ScanType, Severity};
|
||||
use compliance_core::traits::ScanOutput;
|
||||
|
||||
use crate::llm::review_prompts::REVIEW_PASSES;
|
||||
use crate::llm::LlmClient;
|
||||
use crate::pipeline::dedup;
|
||||
use crate::pipeline::git::{DiffFile, GitOps};
|
||||
|
||||
pub struct CodeReviewScanner {
|
||||
llm: Arc<LlmClient>,
|
||||
}
|
||||
|
||||
impl CodeReviewScanner {
|
||||
pub fn new(llm: Arc<LlmClient>) -> Self {
|
||||
Self { llm }
|
||||
}
|
||||
|
||||
/// Run multi-pass LLM code review on the diff between old and new commits.
|
||||
pub async fn review_diff(
|
||||
&self,
|
||||
repo_path: &Path,
|
||||
repo_id: &str,
|
||||
old_sha: &str,
|
||||
new_sha: &str,
|
||||
) -> ScanOutput {
|
||||
let diff_files = match GitOps::get_diff_content(repo_path, old_sha, new_sha) {
|
||||
Ok(files) => files,
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to extract diff for code review: {e}");
|
||||
return ScanOutput::default();
|
||||
}
|
||||
};
|
||||
|
||||
if diff_files.is_empty() {
|
||||
return ScanOutput::default();
|
||||
}
|
||||
|
||||
let mut all_findings = Vec::new();
|
||||
|
||||
// Chunk diff files into groups to avoid exceeding context limits
|
||||
let chunks = chunk_diff_files(&diff_files, 8000);
|
||||
|
||||
for (pass_name, system_prompt) in REVIEW_PASSES {
|
||||
for chunk in &chunks {
|
||||
let user_prompt = format!(
|
||||
"Review the following code changes:\n\n{}",
|
||||
chunk
|
||||
.iter()
|
||||
.map(|f| format!("--- {} ---\n{}", f.path, f.hunks))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n")
|
||||
);
|
||||
|
||||
match self.llm.chat(system_prompt, &user_prompt, Some(0.1)).await {
|
||||
Ok(response) => {
|
||||
let parsed = parse_review_response(&response, pass_name, repo_id, chunk);
|
||||
all_findings.extend(parsed);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Code review pass '{pass_name}' failed: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ScanOutput {
|
||||
findings: all_findings,
|
||||
sbom_entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Group diff files into chunks that fit within a token budget (rough char estimate)
|
||||
fn chunk_diff_files(files: &[DiffFile], max_chars: usize) -> Vec<Vec<&DiffFile>> {
|
||||
let mut chunks: Vec<Vec<&DiffFile>> = Vec::new();
|
||||
let mut current_chunk: Vec<&DiffFile> = Vec::new();
|
||||
let mut current_size = 0;
|
||||
|
||||
for file in files {
|
||||
if current_size + file.hunks.len() > max_chars && !current_chunk.is_empty() {
|
||||
chunks.push(std::mem::take(&mut current_chunk));
|
||||
current_size = 0;
|
||||
}
|
||||
current_chunk.push(file);
|
||||
current_size += file.hunks.len();
|
||||
}
|
||||
|
||||
if !current_chunk.is_empty() {
|
||||
chunks.push(current_chunk);
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
fn parse_review_response(
|
||||
response: &str,
|
||||
pass_name: &str,
|
||||
repo_id: &str,
|
||||
chunk: &[&DiffFile],
|
||||
) -> Vec<Finding> {
|
||||
let cleaned = response.trim();
|
||||
let cleaned = if cleaned.starts_with("```") {
|
||||
cleaned
|
||||
.trim_start_matches("```json")
|
||||
.trim_start_matches("```")
|
||||
.trim_end_matches("```")
|
||||
.trim()
|
||||
} else {
|
||||
cleaned
|
||||
};
|
||||
|
||||
let issues: Vec<ReviewIssue> = match serde_json::from_str(cleaned) {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
if cleaned != "[]" {
|
||||
tracing::debug!("Failed to parse {pass_name} review response: {cleaned}");
|
||||
}
|
||||
return Vec::new();
|
||||
}
|
||||
};
|
||||
|
||||
issues
|
||||
.into_iter()
|
||||
.filter(|issue| {
|
||||
// Verify the file exists in the diff chunk
|
||||
chunk.iter().any(|f| f.path == issue.file)
|
||||
})
|
||||
.map(|issue| {
|
||||
let severity = match issue.severity.as_str() {
|
||||
"critical" => Severity::Critical,
|
||||
"high" => Severity::High,
|
||||
"medium" => Severity::Medium,
|
||||
"low" => Severity::Low,
|
||||
_ => Severity::Info,
|
||||
};
|
||||
|
||||
let fingerprint = dedup::compute_fingerprint(&[
|
||||
repo_id,
|
||||
"code-review",
|
||||
pass_name,
|
||||
&issue.file,
|
||||
&issue.line.to_string(),
|
||||
&issue.title,
|
||||
]);
|
||||
|
||||
let description = if let Some(suggestion) = &issue.suggestion {
|
||||
format!("{}\n\nSuggested fix: {}", issue.description, suggestion)
|
||||
} else {
|
||||
issue.description.clone()
|
||||
};
|
||||
|
||||
let mut finding = Finding::new(
|
||||
repo_id.to_string(),
|
||||
fingerprint,
|
||||
format!("code-review/{pass_name}"),
|
||||
ScanType::CodeReview,
|
||||
issue.title,
|
||||
description,
|
||||
severity,
|
||||
);
|
||||
finding.rule_id = Some(format!("review/{pass_name}"));
|
||||
finding.file_path = Some(issue.file);
|
||||
finding.line_number = Some(issue.line);
|
||||
finding.cwe = issue.cwe;
|
||||
finding.suggested_fix = issue.suggestion;
|
||||
finding
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct ReviewIssue {
|
||||
title: String,
|
||||
description: String,
|
||||
severity: String,
|
||||
file: String,
|
||||
#[serde(default)]
|
||||
line: u32,
|
||||
#[serde(default)]
|
||||
cwe: Option<String>,
|
||||
#[serde(default)]
|
||||
suggestion: Option<String>,
|
||||
}
|
||||
Reference in New Issue
Block a user