feat: deduplicate code review findings across LLM passes
All checks were successful
CI / Check (pull_request) Successful in 12m46s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
All checks were successful
CI / Check (pull_request) Successful in 12m46s
CI / Detect Changes (pull_request) Has been skipped
CI / Deploy Agent (pull_request) Has been skipped
CI / Deploy Dashboard (pull_request) Has been skipped
CI / Deploy Docs (pull_request) Has been skipped
CI / Deploy MCP (pull_request) Has been skipped
Group findings by file, line proximity, and normalized title keywords, keeping the highest-severity finding from each group and merging CWE info. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -66,8 +66,10 @@ impl CodeReviewScanner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let deduped = dedup_cross_pass(all_findings);
|
||||||
|
|
||||||
ScanOutput {
|
ScanOutput {
|
||||||
findings: all_findings,
|
findings: deduped,
|
||||||
sbom_entries: Vec::new(),
|
sbom_entries: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -184,3 +186,51 @@ struct ReviewIssue {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
suggestion: Option<String>,
|
suggestion: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Deduplicate findings across review passes.
|
||||||
|
///
|
||||||
|
/// Multiple passes often flag the same issue (e.g. SQL injection reported by
|
||||||
|
/// logic, security, and convention passes). We group by file + nearby line +
|
||||||
|
/// normalized title keywords and keep the highest-severity finding.
|
||||||
|
fn dedup_cross_pass(findings: Vec<Finding>) -> Vec<Finding> {
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
// Build a dedup key: (file, line bucket, normalized title words)
|
||||||
|
fn dedup_key(f: &Finding) -> String {
|
||||||
|
let file = f.file_path.as_deref().unwrap_or("");
|
||||||
|
// Group lines within 3 of each other
|
||||||
|
let line_bucket = f.line_number.unwrap_or(0) / 4;
|
||||||
|
// Normalize: lowercase, keep only alphanumeric, sort words for order-independence
|
||||||
|
let title_lower = f.title.to_lowercase();
|
||||||
|
let mut words: Vec<&str> = title_lower
|
||||||
|
.split(|c: char| !c.is_alphanumeric())
|
||||||
|
.filter(|w| w.len() > 2)
|
||||||
|
.collect();
|
||||||
|
words.sort();
|
||||||
|
format!("{file}:{line_bucket}:{}", words.join(","))
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut groups: HashMap<String, Finding> = HashMap::new();
|
||||||
|
|
||||||
|
for finding in findings {
|
||||||
|
let key = dedup_key(&finding);
|
||||||
|
groups
|
||||||
|
.entry(key)
|
||||||
|
.and_modify(|existing| {
|
||||||
|
// Keep the higher severity; on tie, keep the one with more detail
|
||||||
|
if finding.severity > existing.severity
|
||||||
|
|| (finding.severity == existing.severity
|
||||||
|
&& finding.description.len() > existing.description.len())
|
||||||
|
{
|
||||||
|
*existing = finding.clone();
|
||||||
|
}
|
||||||
|
// Merge CWE if the existing one is missing it
|
||||||
|
if existing.cwe.is_none() {
|
||||||
|
existing.cwe = finding.cwe.clone();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.or_insert(finding);
|
||||||
|
}
|
||||||
|
|
||||||
|
groups.into_values().collect()
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user