Files
compliance-scanner-agent/compliance-agent/src/pipeline/pr_review.rs
Sharang Parnerkar 5da33ef882
All checks were successful
CI / Check (pull_request) Successful in 10m17s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
feat: deduplicate DAST findings, PR comments, and pentest reports
Two-phase DAST dedup: exact fingerprint match (title+endpoint+method)
and CWE-based related finding merge (e.g., HSTS reported as both
security_header_missing and tls_misconfiguration). Applied at insertion
time in the pentest orchestrator and at report export.

PR review comments now include fingerprints and skip duplicates within
the same review run.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-29 22:15:48 +02:00

184 lines
6.4 KiB
Rust

use compliance_core::models::*;
use super::dedup::compute_fingerprint;
use super::orchestrator::PipelineOrchestrator;
use crate::error::AgentError;
use crate::pipeline::code_review::CodeReviewScanner;
use crate::pipeline::git::GitOps;
use crate::pipeline::semgrep::SemgrepScanner;
use compliance_core::traits::Scanner;
impl PipelineOrchestrator {
/// Run an incremental scan on a PR diff and post review comments.
#[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))]
pub async fn run_pr_review(
&self,
repo: &TrackedRepository,
repo_id: &str,
pr_number: u64,
base_sha: &str,
head_sha: &str,
) -> Result<(), AgentError> {
let tracker = match self.build_tracker(repo) {
Some(t) => t,
None => {
tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review");
return Ok(());
}
};
let owner = repo.tracker_owner.as_deref().unwrap_or("");
let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or("");
if owner.is_empty() || tracker_repo_name.is_empty() {
tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set");
return Ok(());
}
// Clone/fetch the repo
let creds = GitOps::make_repo_credentials(&self.config, repo);
let git_ops = GitOps::new(&self.config.git_clone_base_path, creds);
let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?;
// Get diff between base and head
let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?;
if diff_files.is_empty() {
tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review");
return Ok(());
}
// Run semgrep on the full repo but we'll filter findings to changed files
let changed_paths: std::collections::HashSet<String> =
diff_files.iter().map(|f| f.path.clone()).collect();
let mut pr_findings: Vec<Finding> = Vec::new();
// SAST scan (semgrep)
match SemgrepScanner.scan(&repo_path, repo_id).await {
Ok(output) => {
for f in output.findings {
if let Some(fp) = &f.file_path {
if changed_paths.contains(fp.as_str()) {
pr_findings.push(f);
}
}
}
}
Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"),
}
// LLM code review on the diff
let reviewer = CodeReviewScanner::new(self.llm.clone());
let review_output = reviewer
.review_diff(&repo_path, repo_id, base_sha, head_sha)
.await;
pr_findings.extend(review_output.findings);
if pr_findings.is_empty() {
// Post a clean review
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
"Compliance scan: no issues found in this PR.",
Vec::new(),
)
.await
{
tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}");
}
return Ok(());
}
// Dedup findings by fingerprint to avoid duplicate comments
let mut seen_fps = std::collections::HashSet::new();
let mut unique_findings: Vec<&Finding> = Vec::new();
for finding in &pr_findings {
let fp = compute_fingerprint(&[
repo_id,
&pr_number.to_string(),
finding.file_path.as_deref().unwrap_or(""),
&finding.line_number.unwrap_or(0).to_string(),
&finding.title,
]);
if seen_fps.insert(fp) {
unique_findings.push(finding);
}
}
let pr_findings = unique_findings;
// Build review comments from findings
let mut review_comments = Vec::new();
for finding in &pr_findings {
if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) {
let fp = compute_fingerprint(&[
repo_id,
&pr_number.to_string(),
path,
&line.to_string(),
&finding.title,
]);
let comment_body = format!(
"**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*\n\n<!-- compliance-fp:{fp} -->",
finding.severity,
finding.title,
finding.description,
finding.scanner,
finding
.cwe
.as_deref()
.map(|c| format!("CWE: {c}"))
.unwrap_or_default(),
);
review_comments.push(compliance_core::traits::issue_tracker::ReviewComment {
path: path.clone(),
line,
body: comment_body,
});
}
}
let summary = format!(
"Compliance scan found **{}** issue(s) in this PR:\n\n{}",
pr_findings.len(),
pr_findings
.iter()
.map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title))
.collect::<Vec<_>>()
.join("\n"),
);
if review_comments.is_empty() {
// All findings were on files/lines we can't comment on inline
if let Err(e) = tracker
.create_pr_review(owner, tracker_repo_name, pr_number, &summary, Vec::new())
.await
{
tracing::warn!("[{repo_id}] Failed to post PR review summary: {e}");
}
return Ok(());
}
if let Err(e) = tracker
.create_pr_review(
owner,
tracker_repo_name,
pr_number,
&summary,
review_comments,
)
.await
{
tracing::warn!("[{repo_id}] Failed to post PR review: {e}");
} else {
tracing::info!(
"[{repo_id}] Posted PR review on #{pr_number} with {} findings",
pr_findings.len()
);
}
Ok(())
}
}