use compliance_core::models::*; use super::dedup::compute_fingerprint; use super::orchestrator::PipelineOrchestrator; use crate::error::AgentError; use crate::pipeline::code_review::CodeReviewScanner; use crate::pipeline::git::GitOps; use crate::pipeline::semgrep::SemgrepScanner; use compliance_core::traits::Scanner; impl PipelineOrchestrator { /// Run an incremental scan on a PR diff and post review comments. #[tracing::instrument(skip_all, fields(repo_id = %repo_id, pr_number))] pub async fn run_pr_review( &self, repo: &TrackedRepository, repo_id: &str, pr_number: u64, base_sha: &str, head_sha: &str, ) -> Result<(), AgentError> { let tracker = match self.build_tracker(repo) { Some(t) => t, None => { tracing::warn!("[{repo_id}] No tracker configured, cannot post PR review"); return Ok(()); } }; let owner = repo.tracker_owner.as_deref().unwrap_or(""); let tracker_repo_name = repo.tracker_repo.as_deref().unwrap_or(""); if owner.is_empty() || tracker_repo_name.is_empty() { tracing::warn!("[{repo_id}] tracker_owner or tracker_repo not set"); return Ok(()); } // Clone/fetch the repo let creds = GitOps::make_repo_credentials(&self.config, repo); let git_ops = GitOps::new(&self.config.git_clone_base_path, creds); let repo_path = git_ops.clone_or_fetch(&repo.git_url, &repo.name)?; // Get diff between base and head let diff_files = GitOps::get_diff_content(&repo_path, base_sha, head_sha)?; if diff_files.is_empty() { tracing::info!("[{repo_id}] PR #{pr_number}: no diff files, skipping review"); return Ok(()); } // Run semgrep on the full repo but we'll filter findings to changed files let changed_paths: std::collections::HashSet = diff_files.iter().map(|f| f.path.clone()).collect(); let mut pr_findings: Vec = Vec::new(); // SAST scan (semgrep) match SemgrepScanner.scan(&repo_path, repo_id).await { Ok(output) => { for f in output.findings { if let Some(fp) = &f.file_path { if changed_paths.contains(fp.as_str()) { pr_findings.push(f); } } } } Err(e) => tracing::warn!("[{repo_id}] PR semgrep failed: {e}"), } // LLM code review on the diff let reviewer = CodeReviewScanner::new(self.llm.clone()); let review_output = reviewer .review_diff(&repo_path, repo_id, base_sha, head_sha) .await; pr_findings.extend(review_output.findings); if pr_findings.is_empty() { // Post a clean review if let Err(e) = tracker .create_pr_review( owner, tracker_repo_name, pr_number, "Compliance scan: no issues found in this PR.", Vec::new(), ) .await { tracing::warn!("[{repo_id}] Failed to post clean PR review: {e}"); } return Ok(()); } // Dedup findings by fingerprint to avoid duplicate comments let mut seen_fps = std::collections::HashSet::new(); let mut unique_findings: Vec<&Finding> = Vec::new(); for finding in &pr_findings { let fp = compute_fingerprint(&[ repo_id, &pr_number.to_string(), finding.file_path.as_deref().unwrap_or(""), &finding.line_number.unwrap_or(0).to_string(), &finding.title, ]); if seen_fps.insert(fp) { unique_findings.push(finding); } } let pr_findings = unique_findings; // Build review comments from findings let mut review_comments = Vec::new(); for finding in &pr_findings { if let (Some(path), Some(line)) = (&finding.file_path, finding.line_number) { let fp = compute_fingerprint(&[ repo_id, &pr_number.to_string(), path, &line.to_string(), &finding.title, ]); let comment_body = format!( "**[{}] {}**\n\n{}\n\n*Scanner: {} | {}*\n\n", finding.severity, finding.title, finding.description, finding.scanner, finding .cwe .as_deref() .map(|c| format!("CWE: {c}")) .unwrap_or_default(), ); review_comments.push(compliance_core::traits::issue_tracker::ReviewComment { path: path.clone(), line, body: comment_body, }); } } let summary = format!( "Compliance scan found **{}** issue(s) in this PR:\n\n{}", pr_findings.len(), pr_findings .iter() .map(|f| format!("- **[{}]** {}: {}", f.severity, f.scanner, f.title)) .collect::>() .join("\n"), ); if review_comments.is_empty() { // All findings were on files/lines we can't comment on inline if let Err(e) = tracker .create_pr_review(owner, tracker_repo_name, pr_number, &summary, Vec::new()) .await { tracing::warn!("[{repo_id}] Failed to post PR review summary: {e}"); } return Ok(()); } if let Err(e) = tracker .create_pr_review( owner, tracker_repo_name, pr_number, &summary, review_comments, ) .await { tracing::warn!("[{repo_id}] Failed to post PR review: {e}"); } else { tracing::info!( "[{repo_id}] Posted PR review on #{pr_number} with {} findings", pr_findings.len() ); } Ok(()) } }