feat: Document-centric scan results + DSI deduplication
DSI Dedup (consent-tester): - Only H1/H2 headings count as documents (not H3/H4 sub-sections) - Sub-sections (Cookies, Betroffenenrechte, Social Media) are part of parent document's full text, not separate documents - Reduces IHK result from 30 to ~11 real documents Backend (agent_scan_routes): - ScanFinding gets doc_title field linking each finding to its document - doc_title set when creating DSI findings for document attribution Frontend (ScanResult.tsx): - 3 sections: Services table, Document cards, General findings - Documents: expandable cards with completeness bar (green/yellow/red) - Findings grouped under their parent document - Each card shows: title, word count, findings count, % completeness - Findings without doc_title go to "Allgemeine Findings" section Email Summary (agent_scan_helpers): - Findings listed under their parent document - General findings in separate section - No more flat mixed list Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -73,25 +73,41 @@ def build_scan_summary(
|
||||
f"Findings: {n_findings} ({high} mit hoher Prioritaet)",
|
||||
])
|
||||
|
||||
# DSI Documents section
|
||||
# DSI Documents section — grouped with their findings
|
||||
if discovered_docs:
|
||||
parts.extend([
|
||||
"",
|
||||
f"Rechtliche Dokumente gefunden: {len(discovered_docs)}",
|
||||
])
|
||||
parts.extend(["", f"Rechtliche Dokumente ({len(discovered_docs)})"])
|
||||
|
||||
# Group findings by doc_title
|
||||
doc_findings_map: dict[str, list] = {}
|
||||
general_findings: list = []
|
||||
for f in findings:
|
||||
dt = f.doc_title if hasattr(f, 'doc_title') else ""
|
||||
if dt:
|
||||
doc_findings_map.setdefault(dt, []).append(f)
|
||||
else:
|
||||
general_findings.append(f)
|
||||
|
||||
for doc in discovered_docs:
|
||||
title = doc.title if hasattr(doc, 'title') else "?"
|
||||
pct = doc.completeness_pct if hasattr(doc, 'completeness_pct') else 0
|
||||
fc = doc.findings_count if hasattr(doc, 'findings_count') else 0
|
||||
wc = doc.word_count if hasattr(doc, 'word_count') else 0
|
||||
status = "OK" if pct >= 80 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
|
||||
dt = doc.doc_type if hasattr(doc, 'doc_type') else "unknown"
|
||||
title = doc.title if hasattr(doc, 'title') else "?"
|
||||
parts.append(
|
||||
f" [{status}] {title} ({dt}, {wc} Woerter, "
|
||||
f"{pct}% vollstaendig, {fc} Maengel)"
|
||||
)
|
||||
parts.append(f" [{status}] {title} ({pct}%, {wc} Woerter)")
|
||||
for f in doc_findings_map.get(title, []):
|
||||
sev = f.severity if hasattr(f, 'severity') else "?"
|
||||
txt = f.text if hasattr(f, 'text') else str(f)
|
||||
marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i"
|
||||
parts.append(f" {marker} {txt}")
|
||||
|
||||
if findings:
|
||||
# General findings (no doc association)
|
||||
if general_findings:
|
||||
parts.extend(["", "Allgemeine Findings"])
|
||||
for f in general_findings[:20]:
|
||||
sev = f.severity if hasattr(f, 'severity') else "?"
|
||||
txt = f.text if hasattr(f, 'text') else str(f)
|
||||
marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i"
|
||||
parts.append(f" [{marker}] {txt}")
|
||||
elif findings:
|
||||
parts.append("")
|
||||
for f in findings[:20]:
|
||||
sev = f.severity if hasattr(f, 'severity') else "?"
|
||||
|
||||
@@ -79,6 +79,7 @@ class ScanFinding(BaseModel):
|
||||
severity: str
|
||||
text: str
|
||||
correction: str = ""
|
||||
doc_title: str = ""
|
||||
text_reference: TextReferenceModel | None = None
|
||||
|
||||
|
||||
@@ -264,6 +265,7 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
||||
if "SCORE" not in df.get("code", ""):
|
||||
dsi_findings.append(ScanFinding(
|
||||
code=df["code"], severity=df["severity"], text=df["text"],
|
||||
doc_title=doc["title"],
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning("DSI discovery failed: %s %s", type(e).__name__, e)
|
||||
|
||||
Reference in New Issue
Block a user