184 lines
6.4 KiB
Rust
184 lines
6.4 KiB
Rust
use compliance_core::models::*;
|
|
|
|
use super::dedup::compute_fingerprint;
|
|
use super::orchestrator::PipelineOrchestrator;
|
|
use crate::error::AgentError;
|
|
use crate::pipeline::code_review::CodeReviewScanner;
|
|
use crate::pipeline::git::GitOps;
|
|
use crate::pipeline::semgrep::SemgrepScanner;
|
|
|
|
use compliance_core::traits::Scanner;
|
|
|
|
impl PipelineOrchestrator {
|
|
/// Run an incremental scan on a PR diff and post review comments.
|
|
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
|
|
pub async fn run_pr_review(
|
|
&self,
|
|
repo: &TrackedRepository,
|
|
repo_id: &str,
|
|
pr_number: u64,
|
|
base_sha: &str,
|
|
head_sha: &str,
|
|
) -> Result<(), AgentError> {
|
|
let tracker = match self.build_tracker(repo) {
|
|
Some(t) => t,
|
|
None => {
|
|
tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
|
|
return Ok(());
|
|
}
|
|
};
|
|
let owner = repo.tracker_owner.as_deref().unwrap_or("");
|
|
let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
|
|
if owner.is_empty() || tracker_repo_name.is_empty() {
|
|
tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
|
|
return Ok(());
|
|
}
|
|
|
|
// Clone/fetch the repo
|
|
let creds = GitOps::make_repo_credentials(&self.config, repo);
|
|
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
|
|
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
|
|
|
|
// Get diff between base and head
|
|
let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
|
|
if diff_files.is_empty() {
|
|
tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
|
|
return Ok(());
|
|
}
|
|
|
|
// Run semgrep on the full repo but we'll filter findings to changed files
|
|
let changed_paths: std::collections::HashSet<String> =
|
|
diff_files.iter().map(|f| f.path.clone()).collect();
|
|
|
|
let mut pr_findings: Vec<Finding> = Vec::new();
|
|
|
|
// SAST scan (semgrep)
|
|
match SemgrepScanner.scan(&repo_path, repo_id).await {
|
|
Ok(output) => {
|
|
for f in output.findings {
|
|
if let Some(fp) = &f.file_path {
|
|
if changed_paths.contains(fp.as_str()) {
|
|
pr_findings.push(f);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
|
|
}
|
|
|
|
// LLM code review on the diff
|
|
let reviewer = CodeReviewScanner::new(self.llm.clone());
|
|
let review_output = reviewer
|
|
.review_diff(&repo_path, repo_id, base_sha, head_sha)
|
|
.await;
|
|
pr_findings.extend(review_output.findings);
|
|
|
|
if pr_findings.is_empty() {
|
|
// Post a clean review
|
|
if let Err(e) = tracker
|
|
.create_pr_review(
|
|
owner,
|
|
tracker_repo_name,
|
|
pr_number,
|
|
"Compliance scan: no issues found in this PR.",
|
|
Vec::new(),
|
|
)
|
|
.await
|
|
{
|
|
tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
|
|
}
|
|
return Ok(());
|
|
}
|
|
|
|
// Dedup findings by fingerprint to avoid duplicate comments
|
|
let mut seen_fps = std::collections::HashSet::new();
|
|
let mut unique_findings: Vec<&Finding> = Vec::new();
|
|
for finding in &pr_findings {
|
|
let fp = compute_fingerprint(&[
|
|
repo_id,
|
|
&pr_number.to_string(),
|
|
finding.file_path.as_deref().unwrap_or(""),
|
|
&finding.line_number.unwrap_or(0).to_string(),
|
|
&finding.title,
|
|
]);
|
|
if seen_fps.insert(fp) {
|
|
unique_findings.push(finding);
|
|
}
|
|
}
|
|
|
|
let pr_findings = unique_findings;
|
|
|
|
// Build review comments from findings
|
|
let mut review_comments = Vec::new();
|
|
for finding in &pr_findings {
|
|
if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
|
|
let fp = compute_fingerprint(&[
|
|
repo_id,
|
|
&pr_number.to_string(),
|
|
path,
|
|
&line.to_string(),
|
|
&finding.title,
|
|
]);
|
|
let comment_body = format!(
|
|
"**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*\n\n<!-- compliance-fp:{fp} -->",
|
|
finding.severity,
|
|
finding.title,
|
|
finding.description,
|
|
finding.scanner,
|
|
finding
|
|
.cwe
|
|
.as_deref()
|
|
.map(|c| format!("CWE: {c}"))
|
|
.unwrap_or_default(),
|
|
);
|
|
review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
|
|
path: path.clone(),
|
|
line,
|
|
body: comment_body,
|
|
});
|
|
}
|
|
}
|
|
|
|
let summary = format!(
|
|
"Compliance scan found **{}** issue(s) in this PR:\n\n{}",
|
|
pr_findings.len(),
|
|
pr_findings
|
|
.iter()
|
|
.map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
|
|
.collect::<Vec<_>>()
|
|
.join("\n"),
|
|
);
|
|
|
|
if review_comments.is_empty() {
|
|
// All findings were on files/lines we can't comment on inline
|
|
if let Err(e) = tracker
|
|
.create_pr_review(owner, tracker_repo_name, pr_number, &summary, Vec::new())
|
|
.await
|
|
{
|
|
tracing::warn!("[{repo_id}] Failed to post PR review summary: {e}");
|
|
}
|
|
return Ok(());
|
|
}
|
|
|
|
if let Err(e) = tracker
|
|
.create_pr_review(
|
|
owner,
|
|
tracker_repo_name,
|
|
pr_number,
|
|
&summary,
|
|
review_comments,
|
|
)
|
|
.await
|
|
{
|
|
tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
|
|
} else {
|
|
tracing::info!(
|
|
"[{repo_id}] Posted PR review on #{pr_number} with {} findings",
|
|
pr_findings.len()
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|