feat: deduplicate code review findings across LLM passes
All checks were successful
CI / Check (pull_request) Successful in 12m46s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
All checks were successful
CI / Check (pull_request) Successful in 12m46s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
Group findings by file, line proximity, and normalized title keywords, keeping the highest-severity finding from each group and merging CWE info. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -66,8 +66,10 @@ impl CodeReviewScanner {
|
||||
}
|
||||
}
|
||||
|
||||
let deduped = dedup_cross_pass(all_findings);
|
||||
|
||||
ScanOutput {
|
||||
findings: all_findings,
|
||||
findings: deduped,
|
||||
sbom_entries: Vec::new(),
|
||||
}
|
||||
}
|
||||
@@ -184,3 +186,51 @@ struct ReviewIssue {
|
||||
#[serde(default)]
|
||||
suggestion: Option<String>,
|
||||
}
|
||||
|
||||
/// Deduplicate findings across review passes.
|
||||
///
|
||||
/// Multiple passes often flag the same issue (e.g. SQL injection reported by
|
||||
/// logic, security, and convention passes). We group by file + nearby line +
|
||||
/// normalized title keywords and keep the highest-severity finding.
|
||||
fn dedup_cross_pass(findings: Vec<Finding>) -> Vec<Finding> {
|
||||
use std::collections::HashMap;
|
||||
|
||||
// Build a dedup key: (file, line bucket, normalized title words)
|
||||
fn dedup_key(f: &Finding) -> String {
|
||||
let file = f.file_path.as_deref().unwrap_or("");
|
||||
// Group lines within 3 of each other
|
||||
let line_bucket = f.line_number.unwrap_or(0) / 4;
|
||||
// Normalize: lowercase, keep only alphanumeric, sort words for order-independence
|
||||
let title_lower = f.title.to_lowercase();
|
||||
let mut words: Vec<&str> = title_lower
|
||||
.split(|c: char| !c.is_alphanumeric())
|
||||
.filter(|w| w.len() > 2)
|
||||
.collect();
|
||||
words.sort();
|
||||
format!("{file}:{line_bucket}:{}", words.join(","))
|
||||
}
|
||||
|
||||
let mut groups: HashMap<String, Finding> = HashMap::new();
|
||||
|
||||
for finding in findings {
|
||||
let key = dedup_key(&finding);
|
||||
groups
|
||||
.entry(key)
|
||||
.and_modify(|existing| {
|
||||
// Keep the higher severity; on tie, keep the one with more detail
|
||||
if finding.severity > existing.severity
|
||||
|| (finding.severity == existing.severity
|
||||
&& finding.description.len() > existing.description.len())
|
||||
{
|
||||
*existing = finding.clone();
|
||||
}
|
||||
// Merge CWE if the existing one is missing it
|
||||
if existing.cwe.is_none() {
|
||||
existing.cwe = finding.cwe.clone();
|
||||
}
|
||||
})
|
||||
.or_insert(finding);
|
||||
}
|
||||
|
||||
groups.into_values().collect()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user