feat: Document-centric scan results + DSI deduplication

DSI Dedup (consent-tester):
- Only H1/H2 headings count as documents (not H3/H4 sub-sections)
- Sub-sections (Cookies, Betroffenenrechte, Social Media) are part of
  parent document's full text, not separate documents
- Reduces IHK result from 30 to ~11 real documents

Backend (agent_scan_routes):
- ScanFinding gets doc_title field linking each finding to its document
- doc_title set when creating DSI findings for document attribution

Frontend (ScanResult.tsx):
- 3 sections: Services table, Document cards, General findings
- Documents: expandable cards with completeness bar (green/yellow/red)
- Findings grouped under their parent document
- Each card shows: title, word count, findings count, % completeness
- Findings without doc_title go to "Allgemeine Findings" section

Email Summary (agent_scan_helpers):
- Findings listed under their parent document
- General findings in separate section
- No more flat mixed list

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-05 09:56:29 +02:00
parent d816cf8d3a
commit 7c7513525e
4 changed files with 210 additions and 91 deletions
@@ -19,6 +19,17 @@ interface ScanFinding {
severity: string
text: string
correction: string
doc_title: string
}
interface DiscoveredDocument {
title: string
url: string
doc_type: string
language: string
word_count: number
completeness_pct: number
findings_count: number
}
interface ScanData {
@@ -26,6 +37,7 @@ interface ScanData {
pages_list: string[]
services: ServiceInfo[]
findings: ScanFinding[]
discovered_documents?: DiscoveredDocument[]
ai_detected: boolean
chatbot_detected: boolean
chatbot_provider: string
@@ -34,24 +46,38 @@ interface ScanData {
}
const STATUS_ICON: Record<string, { icon: string; color: string }> = {
ok: { icon: '', color: 'text-green-600' },
undocumented: { icon: '', color: 'text-red-600' },
ok: { icon: '\u2713', color: 'text-green-600' },
undocumented: { icon: '\u2717', color: 'text-red-600' },
outdated: { icon: '~', color: 'text-yellow-600' },
}
const SEV_STYLE: Record<string, { bg: string; text: string }> = {
HIGH: { bg: 'bg-red-50 border-red-200', text: 'text-red-800' },
MEDIUM: { bg: 'bg-yellow-50 border-yellow-200', text: 'text-yellow-800' },
LOW: { bg: 'bg-blue-50 border-blue-200', text: 'text-blue-800' },
const SEV_STYLE: Record<string, { bg: string; text: string; dot: string }> = {
HIGH: { bg: 'bg-red-50 border-red-200', text: 'text-red-800', dot: 'bg-red-500' },
MEDIUM: { bg: 'bg-yellow-50 border-yellow-200', text: 'text-yellow-800', dot: 'bg-yellow-500' },
LOW: { bg: 'bg-blue-50 border-blue-200', text: 'text-blue-800', dot: 'bg-blue-500' },
CRITICAL: { bg: 'bg-red-100 border-red-300', text: 'text-red-900', dot: 'bg-red-700' },
}
export function ScanResult({ data }: { data: ScanData }) {
const [expandedCorrection, setExpandedCorrection] = useState<string | null>(null)
const [expandedDoc, setExpandedDoc] = useState<string | null>(null)
const undocCount = data.services.filter(s => s.status === 'undocumented').length
const okCount = data.services.filter(s => s.status === 'ok').length
const outdatedCount = data.services.filter(s => s.status === 'outdated').length
const highCount = data.findings.filter(f => f.severity === 'HIGH').length
const highCount = data.findings.filter(f => f.severity === 'HIGH' || f.severity === 'CRITICAL').length
const docs = data.discovered_documents || []
// Group findings by doc_title
const docFindings: Record<string, ScanFinding[]> = {}
const generalFindings: ScanFinding[] = []
for (const f of data.findings) {
if (f.doc_title) {
if (!docFindings[f.doc_title]) docFindings[f.doc_title] = []
docFindings[f.doc_title].push(f)
} else {
generalFindings.push(f)
}
}
return (
<div className="space-y-5">
@@ -59,7 +85,7 @@ export function ScanResult({ data }: { data: ScanData }) {
<div className="grid grid-cols-4 gap-3">
<div className="bg-gray-50 rounded-lg p-3 text-center">
<p className="text-2xl font-bold text-gray-900">{data.pages_scanned}</p>
<p className="text-xs text-gray-500">Seiten gescannt</p>
<p className="text-xs text-gray-500">Seiten</p>
</div>
<div className="bg-green-50 rounded-lg p-3 text-center">
<p className="text-2xl font-bold text-green-700">{okCount}</p>
@@ -69,9 +95,9 @@ export function ScanResult({ data }: { data: ScanData }) {
<p className="text-2xl font-bold text-red-700">{undocCount}</p>
<p className="text-xs text-gray-500">Nicht in DSE</p>
</div>
<div className="bg-yellow-50 rounded-lg p-3 text-center">
<p className="text-2xl font-bold text-yellow-700">{outdatedCount}</p>
<p className="text-xs text-gray-500">Veraltet</p>
<div className="bg-purple-50 rounded-lg p-3 text-center">
<p className="text-2xl font-bold text-purple-700">{docs.length}</p>
<p className="text-xs text-gray-500">Dokumente</p>
</div>
</div>
@@ -79,14 +105,14 @@ export function ScanResult({ data }: { data: ScanData }) {
{data.pages_list?.length > 0 && (
<details className="text-sm">
<summary className="text-gray-600 cursor-pointer hover:text-gray-800">
{data.pages_scanned} Seiten gescannt Details anzeigen
{data.pages_scanned} Seiten gescannt
</summary>
<ul className="mt-2 space-y-1 ml-4">
{data.pages_list.map((p, i) => {
const isMissing = data.missing_pages[p]
return (
<li key={i} className={`text-xs ${isMissing ? 'text-red-600' : 'text-gray-500'}`}>
{isMissing ? '' : ''} {p} {isMissing ? `(HTTP ${data.missing_pages[p]})` : ''}
{isMissing ? '\u2717' : '\u2713'} {p}
</li>
)
})}
@@ -94,61 +120,127 @@ export function ScanResult({ data }: { data: ScanData }) {
</details>
)}
{/* AI / Chatbot Detection */}
<div className="flex gap-3">
<span className={`px-3 py-1 rounded-full text-xs font-medium ${data.ai_detected ? 'bg-purple-100 text-purple-800' : 'bg-gray-100 text-gray-600'}`}>
{data.ai_detected ? 'KI erkannt' : 'Keine KI erkannt'}
</span>
<span className={`px-3 py-1 rounded-full text-xs font-medium ${data.chatbot_detected ? 'bg-blue-100 text-blue-800' : 'bg-gray-100 text-gray-600'}`}>
{data.chatbot_detected ? `Chatbot: ${data.chatbot_provider}` : 'Kein Chatbot'}
</span>
</div>
{/* Services Table */}
<div>
<h4 className="text-sm font-medium text-gray-700 mb-2">Dienstleister-Abgleich (SOLL/IST)</h4>
<div className="border rounded-lg overflow-hidden">
<table className="w-full text-sm">
<thead className="bg-gray-50">
<tr>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Status</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Dienst</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Land</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">EU</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">In DSE</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-100">
{data.services.map((s, i) => {
const st = STATUS_ICON[s.status] || STATUS_ICON.ok
return (
<tr key={i} className={s.status === 'undocumented' ? 'bg-red-50' : ''}>
<td className={`px-3 py-2 font-bold ${st.color}`}>{st.icon}</td>
<td className="px-3 py-2">
<span className="font-medium text-gray-900">{s.name}</span>
<span className="text-gray-400 text-xs ml-2">{s.category}</span>
</td>
<td className="px-3 py-2 text-gray-600">{s.country}</td>
<td className="px-3 py-2">{s.eu_adequate ? '' : '✗'}</td>
<td className="px-3 py-2">{s.in_dse ? 'Ja' : <span className="text-red-600 font-medium">Nein</span>}</td>
</tr>
)
})}
</tbody>
</table>
{data.services.length > 0 && (
<div>
<h4 className="text-sm font-medium text-gray-700 mb-2">Dienstleister (SOLL/IST)</h4>
<div className="border rounded-lg overflow-hidden">
<table className="w-full text-sm">
<thead className="bg-gray-50">
<tr>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Status</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Dienst</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Land</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">In DSE</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-100">
{data.services.map((s, i) => {
const st = STATUS_ICON[s.status] || STATUS_ICON.ok
return (
<tr key={i} className={s.status === 'undocumented' ? 'bg-red-50' : ''}>
<td className={`px-3 py-2 font-bold ${st.color}`}>{st.icon}</td>
<td className="px-3 py-2">
<span className="font-medium text-gray-900">{s.name}</span>
<span className="text-gray-400 text-xs ml-2">{s.provider}</span>
</td>
<td className="px-3 py-2 text-gray-600">{s.country}</td>
<td className="px-3 py-2">{s.in_dse ? '\u2713' : <span className="text-red-600 font-medium">Nein</span>}</td>
</tr>
)
})}
</tbody>
</table>
</div>
</div>
</div>
)}
{/* Findings */}
{data.findings.length > 0 && (
{/* === Document-Centric View === */}
{docs.length > 0 && (
<div>
<h4 className="text-sm font-medium text-gray-700 mb-2">
Findings ({data.findings.length}, davon {highCount} kritisch)
Rechtliche Dokumente ({docs.length})
</h4>
<div className="space-y-2">
{data.findings.map((f, i) => {
{docs.map((doc, i) => {
const isExpanded = expandedDoc === doc.title
const findings = docFindings[doc.title] || []
const pct = doc.completeness_pct
const barColor = pct >= 80 ? 'bg-green-500' : pct >= 50 ? 'bg-yellow-500' : 'bg-red-500'
const statusLabel = pct >= 80 ? 'OK' : pct >= 50 ? 'Lueckenhaft' : 'Mangelhaft'
const statusColor = pct >= 80 ? 'text-green-700 bg-green-50' : pct >= 50 ? 'text-yellow-700 bg-yellow-50' : 'text-red-700 bg-red-50'
return (
<div key={i} className="border border-gray-200 rounded-lg overflow-hidden">
<button
onClick={() => setExpandedDoc(isExpanded ? null : doc.title)}
className="w-full flex items-center justify-between px-4 py-3 bg-gray-50/50 hover:bg-gray-50 text-left"
>
<div className="flex items-center gap-3 flex-1 min-w-0">
<svg className={`w-4 h-4 text-gray-400 transition-transform shrink-0 ${isExpanded ? 'rotate-90' : ''}`}
fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
</svg>
<div className="min-w-0 flex-1">
<div className="text-sm font-medium text-gray-900 truncate">{doc.title}</div>
<div className="text-xs text-gray-500">
{doc.word_count} Woerter
{findings.length > 0 && <span className="text-red-600 ml-2">{findings.length} Maengel</span>}
</div>
</div>
</div>
<div className="flex items-center gap-3 shrink-0 ml-3">
{/* Completeness bar */}
<div className="w-20 h-2 bg-gray-200 rounded-full overflow-hidden">
<div className={`h-full rounded-full ${barColor}`} style={{ width: `${pct}%` }} />
</div>
<span className={`text-xs font-medium px-2 py-0.5 rounded ${statusColor}`}>
{pct}%
</span>
</div>
</button>
{isExpanded && (
<div className="px-4 py-3 border-t border-gray-100 space-y-2">
{findings.length > 0 ? (
findings.map((f, fi) => {
const sev = SEV_STYLE[f.severity] || SEV_STYLE.MEDIUM
return (
<div key={fi} className="flex items-start gap-2 text-sm">
<span className={`w-2 h-2 rounded-full mt-1.5 shrink-0 ${sev.dot}`} />
<span className="text-gray-700">{f.text}</span>
</div>
)
})
) : (
<p className="text-sm text-green-600">Alle Pflichtangaben vorhanden.</p>
)}
{doc.url && (
<a href={doc.url} target="_blank" rel="noopener noreferrer"
className="text-xs text-purple-600 hover:underline mt-2 inline-block">
Dokument oeffnen
</a>
)}
</div>
)}
</div>
)
})}
</div>
</div>
)}
{/* General Findings (not associated with a specific document) */}
{generalFindings.length > 0 && (
<div>
<h4 className="text-sm font-medium text-gray-700 mb-2">
Allgemeine Findings ({generalFindings.length})
</h4>
<div className="space-y-2">
{generalFindings.map((f, i) => {
const sev = SEV_STYLE[f.severity] || SEV_STYLE.MEDIUM
const isExpanded = expandedCorrection === f.code
const corrKey = `gen-${i}`
const isExp = expandedCorrection === corrKey
return (
<div key={i} className={`border rounded-lg p-3 ${sev.bg}`}>
<div className="flex items-start gap-2">
@@ -159,20 +251,15 @@ export function ScanResult({ data }: { data: ScanData }) {
</div>
{f.correction && (
<div className="mt-2">
<button
onClick={() => setExpandedCorrection(isExpanded ? null : f.code)}
className="text-xs text-purple-600 hover:text-purple-800 font-medium"
>
{isExpanded ? '▼ Korrekturvorschlag ausblenden' : '▶ Korrekturvorschlag anzeigen'}
<button onClick={() => setExpandedCorrection(isExp ? null : corrKey)}
className="text-xs text-purple-600 hover:text-purple-800 font-medium">
{isExp ? 'Korrektur ausblenden' : 'Korrekturvorschlag'}
</button>
{isExpanded && (
{isExp && (
<div className="mt-2 bg-white border border-gray-200 rounded-lg p-3 relative">
<pre className="text-xs text-gray-700 whitespace-pre-wrap font-sans">{f.correction}</pre>
<button
onClick={() => navigator.clipboard.writeText(f.correction)}
className="absolute top-2 right-2 text-xs bg-gray-100 hover:bg-gray-200 px-2 py-1 rounded"
title="Kopieren"
>
<button onClick={() => navigator.clipboard.writeText(f.correction)}
className="absolute top-2 right-2 text-xs bg-gray-100 hover:bg-gray-200 px-2 py-1 rounded">
Kopieren
</button>
</div>
@@ -185,6 +272,14 @@ export function ScanResult({ data }: { data: ScanData }) {
</div>
</div>
)}
{/* Email Status */}
{data.email_status && (
<div className="text-xs text-gray-500 flex items-center gap-2">
<span className={`w-2 h-2 rounded-full ${data.email_status === 'sent' ? 'bg-green-400' : 'bg-gray-300'}`} />
E-Mail: {data.email_status === 'sent' ? 'Gesendet' : data.email_status}
</div>
)}
</div>
)
}