feat: Document-centric scan results + DSI deduplication
DSI Dedup (consent-tester): - Only H1/H2 headings count as documents (not H3/H4 sub-sections) - Sub-sections (Cookies, Betroffenenrechte, Social Media) are part of parent document's full text, not separate documents - Reduces IHK result from 30 to ~11 real documents Backend (agent_scan_routes): - ScanFinding gets doc_title field linking each finding to its document - doc_title set when creating DSI findings for document attribution Frontend (ScanResult.tsx): - 3 sections: Services table, Document cards, General findings - Documents: expandable cards with completeness bar (green/yellow/red) - Findings grouped under their parent document - Each card shows: title, word count, findings count, % completeness - Findings without doc_title go to "Allgemeine Findings" section Email Summary (agent_scan_helpers): - Findings listed under their parent document - General findings in separate section - No more flat mixed list Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,17 @@ interface ScanFinding {
|
|||||||
severity: string
|
severity: string
|
||||||
text: string
|
text: string
|
||||||
correction: string
|
correction: string
|
||||||
|
doc_title: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DiscoveredDocument {
|
||||||
|
title: string
|
||||||
|
url: string
|
||||||
|
doc_type: string
|
||||||
|
language: string
|
||||||
|
word_count: number
|
||||||
|
completeness_pct: number
|
||||||
|
findings_count: number
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ScanData {
|
interface ScanData {
|
||||||
@@ -26,6 +37,7 @@ interface ScanData {
|
|||||||
pages_list: string[]
|
pages_list: string[]
|
||||||
services: ServiceInfo[]
|
services: ServiceInfo[]
|
||||||
findings: ScanFinding[]
|
findings: ScanFinding[]
|
||||||
|
discovered_documents?: DiscoveredDocument[]
|
||||||
ai_detected: boolean
|
ai_detected: boolean
|
||||||
chatbot_detected: boolean
|
chatbot_detected: boolean
|
||||||
chatbot_provider: string
|
chatbot_provider: string
|
||||||
@@ -34,24 +46,38 @@ interface ScanData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const STATUS_ICON: Record<string, { icon: string; color: string }> = {
|
const STATUS_ICON: Record<string, { icon: string; color: string }> = {
|
||||||
ok: { icon: '✓', color: 'text-green-600' },
|
ok: { icon: '\u2713', color: 'text-green-600' },
|
||||||
undocumented: { icon: '✗', color: 'text-red-600' },
|
undocumented: { icon: '\u2717', color: 'text-red-600' },
|
||||||
outdated: { icon: '~', color: 'text-yellow-600' },
|
outdated: { icon: '~', color: 'text-yellow-600' },
|
||||||
}
|
}
|
||||||
|
|
||||||
const SEV_STYLE: Record<string, { bg: string; text: string }> = {
|
const SEV_STYLE: Record<string, { bg: string; text: string; dot: string }> = {
|
||||||
HIGH: { bg: 'bg-red-50 border-red-200', text: 'text-red-800' },
|
HIGH: { bg: 'bg-red-50 border-red-200', text: 'text-red-800', dot: 'bg-red-500' },
|
||||||
MEDIUM: { bg: 'bg-yellow-50 border-yellow-200', text: 'text-yellow-800' },
|
MEDIUM: { bg: 'bg-yellow-50 border-yellow-200', text: 'text-yellow-800', dot: 'bg-yellow-500' },
|
||||||
LOW: { bg: 'bg-blue-50 border-blue-200', text: 'text-blue-800' },
|
LOW: { bg: 'bg-blue-50 border-blue-200', text: 'text-blue-800', dot: 'bg-blue-500' },
|
||||||
|
CRITICAL: { bg: 'bg-red-100 border-red-300', text: 'text-red-900', dot: 'bg-red-700' },
|
||||||
}
|
}
|
||||||
|
|
||||||
export function ScanResult({ data }: { data: ScanData }) {
|
export function ScanResult({ data }: { data: ScanData }) {
|
||||||
const [expandedCorrection, setExpandedCorrection] = useState<string | null>(null)
|
const [expandedCorrection, setExpandedCorrection] = useState<string | null>(null)
|
||||||
|
const [expandedDoc, setExpandedDoc] = useState<string | null>(null)
|
||||||
|
|
||||||
const undocCount = data.services.filter(s => s.status === 'undocumented').length
|
const undocCount = data.services.filter(s => s.status === 'undocumented').length
|
||||||
const okCount = data.services.filter(s => s.status === 'ok').length
|
const okCount = data.services.filter(s => s.status === 'ok').length
|
||||||
const outdatedCount = data.services.filter(s => s.status === 'outdated').length
|
const highCount = data.findings.filter(f => f.severity === 'HIGH' || f.severity === 'CRITICAL').length
|
||||||
const highCount = data.findings.filter(f => f.severity === 'HIGH').length
|
const docs = data.discovered_documents || []
|
||||||
|
|
||||||
|
// Group findings by doc_title
|
||||||
|
const docFindings: Record<string, ScanFinding[]> = {}
|
||||||
|
const generalFindings: ScanFinding[] = []
|
||||||
|
for (const f of data.findings) {
|
||||||
|
if (f.doc_title) {
|
||||||
|
if (!docFindings[f.doc_title]) docFindings[f.doc_title] = []
|
||||||
|
docFindings[f.doc_title].push(f)
|
||||||
|
} else {
|
||||||
|
generalFindings.push(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="space-y-5">
|
<div className="space-y-5">
|
||||||
@@ -59,7 +85,7 @@ export function ScanResult({ data }: { data: ScanData }) {
|
|||||||
<div className="grid grid-cols-4 gap-3">
|
<div className="grid grid-cols-4 gap-3">
|
||||||
<div className="bg-gray-50 rounded-lg p-3 text-center">
|
<div className="bg-gray-50 rounded-lg p-3 text-center">
|
||||||
<p className="text-2xl font-bold text-gray-900">{data.pages_scanned}</p>
|
<p className="text-2xl font-bold text-gray-900">{data.pages_scanned}</p>
|
||||||
<p className="text-xs text-gray-500">Seiten gescannt</p>
|
<p className="text-xs text-gray-500">Seiten</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="bg-green-50 rounded-lg p-3 text-center">
|
<div className="bg-green-50 rounded-lg p-3 text-center">
|
||||||
<p className="text-2xl font-bold text-green-700">{okCount}</p>
|
<p className="text-2xl font-bold text-green-700">{okCount}</p>
|
||||||
@@ -69,9 +95,9 @@ export function ScanResult({ data }: { data: ScanData }) {
|
|||||||
<p className="text-2xl font-bold text-red-700">{undocCount}</p>
|
<p className="text-2xl font-bold text-red-700">{undocCount}</p>
|
||||||
<p className="text-xs text-gray-500">Nicht in DSE</p>
|
<p className="text-xs text-gray-500">Nicht in DSE</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="bg-yellow-50 rounded-lg p-3 text-center">
|
<div className="bg-purple-50 rounded-lg p-3 text-center">
|
||||||
<p className="text-2xl font-bold text-yellow-700">{outdatedCount}</p>
|
<p className="text-2xl font-bold text-purple-700">{docs.length}</p>
|
||||||
<p className="text-xs text-gray-500">Veraltet</p>
|
<p className="text-xs text-gray-500">Dokumente</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -79,14 +105,14 @@ export function ScanResult({ data }: { data: ScanData }) {
|
|||||||
{data.pages_list?.length > 0 && (
|
{data.pages_list?.length > 0 && (
|
||||||
<details className="text-sm">
|
<details className="text-sm">
|
||||||
<summary className="text-gray-600 cursor-pointer hover:text-gray-800">
|
<summary className="text-gray-600 cursor-pointer hover:text-gray-800">
|
||||||
{data.pages_scanned} Seiten gescannt — Details anzeigen
|
{data.pages_scanned} Seiten gescannt
|
||||||
</summary>
|
</summary>
|
||||||
<ul className="mt-2 space-y-1 ml-4">
|
<ul className="mt-2 space-y-1 ml-4">
|
||||||
{data.pages_list.map((p, i) => {
|
{data.pages_list.map((p, i) => {
|
||||||
const isMissing = data.missing_pages[p]
|
const isMissing = data.missing_pages[p]
|
||||||
return (
|
return (
|
||||||
<li key={i} className={`text-xs ${isMissing ? 'text-red-600' : 'text-gray-500'}`}>
|
<li key={i} className={`text-xs ${isMissing ? 'text-red-600' : 'text-gray-500'}`}>
|
||||||
{isMissing ? '✗' : '✓'} {p} {isMissing ? `(HTTP ${data.missing_pages[p]})` : ''}
|
{isMissing ? '\u2717' : '\u2713'} {p}
|
||||||
</li>
|
</li>
|
||||||
)
|
)
|
||||||
})}
|
})}
|
||||||
@@ -94,61 +120,127 @@ export function ScanResult({ data }: { data: ScanData }) {
|
|||||||
</details>
|
</details>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* AI / Chatbot Detection */}
|
|
||||||
<div className="flex gap-3">
|
|
||||||
<span className={`px-3 py-1 rounded-full text-xs font-medium ${data.ai_detected ? 'bg-purple-100 text-purple-800' : 'bg-gray-100 text-gray-600'}`}>
|
|
||||||
{data.ai_detected ? 'KI erkannt' : 'Keine KI erkannt'}
|
|
||||||
</span>
|
|
||||||
<span className={`px-3 py-1 rounded-full text-xs font-medium ${data.chatbot_detected ? 'bg-blue-100 text-blue-800' : 'bg-gray-100 text-gray-600'}`}>
|
|
||||||
{data.chatbot_detected ? `Chatbot: ${data.chatbot_provider}` : 'Kein Chatbot'}
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Services Table */}
|
{/* Services Table */}
|
||||||
<div>
|
{data.services.length > 0 && (
|
||||||
<h4 className="text-sm font-medium text-gray-700 mb-2">Dienstleister-Abgleich (SOLL/IST)</h4>
|
<div>
|
||||||
<div className="border rounded-lg overflow-hidden">
|
<h4 className="text-sm font-medium text-gray-700 mb-2">Dienstleister (SOLL/IST)</h4>
|
||||||
<table className="w-full text-sm">
|
<div className="border rounded-lg overflow-hidden">
|
||||||
<thead className="bg-gray-50">
|
<table className="w-full text-sm">
|
||||||
<tr>
|
<thead className="bg-gray-50">
|
||||||
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Status</th>
|
<tr>
|
||||||
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Dienst</th>
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Status</th>
|
||||||
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Land</th>
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Dienst</th>
|
||||||
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">EU</th>
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Land</th>
|
||||||
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">In DSE</th>
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">In DSE</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody className="divide-y divide-gray-100">
|
<tbody className="divide-y divide-gray-100">
|
||||||
{data.services.map((s, i) => {
|
{data.services.map((s, i) => {
|
||||||
const st = STATUS_ICON[s.status] || STATUS_ICON.ok
|
const st = STATUS_ICON[s.status] || STATUS_ICON.ok
|
||||||
return (
|
return (
|
||||||
<tr key={i} className={s.status === 'undocumented' ? 'bg-red-50' : ''}>
|
<tr key={i} className={s.status === 'undocumented' ? 'bg-red-50' : ''}>
|
||||||
<td className={`px-3 py-2 font-bold ${st.color}`}>{st.icon}</td>
|
<td className={`px-3 py-2 font-bold ${st.color}`}>{st.icon}</td>
|
||||||
<td className="px-3 py-2">
|
<td className="px-3 py-2">
|
||||||
<span className="font-medium text-gray-900">{s.name}</span>
|
<span className="font-medium text-gray-900">{s.name}</span>
|
||||||
<span className="text-gray-400 text-xs ml-2">{s.category}</span>
|
<span className="text-gray-400 text-xs ml-2">{s.provider}</span>
|
||||||
</td>
|
</td>
|
||||||
<td className="px-3 py-2 text-gray-600">{s.country}</td>
|
<td className="px-3 py-2 text-gray-600">{s.country}</td>
|
||||||
<td className="px-3 py-2">{s.eu_adequate ? '✓' : '✗'}</td>
|
<td className="px-3 py-2">{s.in_dse ? '\u2713' : <span className="text-red-600 font-medium">Nein</span>}</td>
|
||||||
<td className="px-3 py-2">{s.in_dse ? 'Ja' : <span className="text-red-600 font-medium">Nein</span>}</td>
|
</tr>
|
||||||
</tr>
|
)
|
||||||
)
|
})}
|
||||||
})}
|
</tbody>
|
||||||
</tbody>
|
</table>
|
||||||
</table>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
)}
|
||||||
|
|
||||||
{/* Findings */}
|
{/* === Document-Centric View === */}
|
||||||
{data.findings.length > 0 && (
|
{docs.length > 0 && (
|
||||||
<div>
|
<div>
|
||||||
<h4 className="text-sm font-medium text-gray-700 mb-2">
|
<h4 className="text-sm font-medium text-gray-700 mb-2">
|
||||||
Findings ({data.findings.length}, davon {highCount} kritisch)
|
Rechtliche Dokumente ({docs.length})
|
||||||
</h4>
|
</h4>
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
{data.findings.map((f, i) => {
|
{docs.map((doc, i) => {
|
||||||
|
const isExpanded = expandedDoc === doc.title
|
||||||
|
const findings = docFindings[doc.title] || []
|
||||||
|
const pct = doc.completeness_pct
|
||||||
|
const barColor = pct >= 80 ? 'bg-green-500' : pct >= 50 ? 'bg-yellow-500' : 'bg-red-500'
|
||||||
|
const statusLabel = pct >= 80 ? 'OK' : pct >= 50 ? 'Lueckenhaft' : 'Mangelhaft'
|
||||||
|
const statusColor = pct >= 80 ? 'text-green-700 bg-green-50' : pct >= 50 ? 'text-yellow-700 bg-yellow-50' : 'text-red-700 bg-red-50'
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={i} className="border border-gray-200 rounded-lg overflow-hidden">
|
||||||
|
<button
|
||||||
|
onClick={() => setExpandedDoc(isExpanded ? null : doc.title)}
|
||||||
|
className="w-full flex items-center justify-between px-4 py-3 bg-gray-50/50 hover:bg-gray-50 text-left"
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-3 flex-1 min-w-0">
|
||||||
|
<svg className={`w-4 h-4 text-gray-400 transition-transform shrink-0 ${isExpanded ? 'rotate-90' : ''}`}
|
||||||
|
fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||||
|
</svg>
|
||||||
|
<div className="min-w-0 flex-1">
|
||||||
|
<div className="text-sm font-medium text-gray-900 truncate">{doc.title}</div>
|
||||||
|
<div className="text-xs text-gray-500">
|
||||||
|
{doc.word_count} Woerter
|
||||||
|
{findings.length > 0 && <span className="text-red-600 ml-2">{findings.length} Maengel</span>}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-3 shrink-0 ml-3">
|
||||||
|
{/* Completeness bar */}
|
||||||
|
<div className="w-20 h-2 bg-gray-200 rounded-full overflow-hidden">
|
||||||
|
<div className={`h-full rounded-full ${barColor}`} style={{ width: `${pct}%` }} />
|
||||||
|
</div>
|
||||||
|
<span className={`text-xs font-medium px-2 py-0.5 rounded ${statusColor}`}>
|
||||||
|
{pct}%
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{isExpanded && (
|
||||||
|
<div className="px-4 py-3 border-t border-gray-100 space-y-2">
|
||||||
|
{findings.length > 0 ? (
|
||||||
|
findings.map((f, fi) => {
|
||||||
|
const sev = SEV_STYLE[f.severity] || SEV_STYLE.MEDIUM
|
||||||
|
return (
|
||||||
|
<div key={fi} className="flex items-start gap-2 text-sm">
|
||||||
|
<span className={`w-2 h-2 rounded-full mt-1.5 shrink-0 ${sev.dot}`} />
|
||||||
|
<span className="text-gray-700">{f.text}</span>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})
|
||||||
|
) : (
|
||||||
|
<p className="text-sm text-green-600">Alle Pflichtangaben vorhanden.</p>
|
||||||
|
)}
|
||||||
|
{doc.url && (
|
||||||
|
<a href={doc.url} target="_blank" rel="noopener noreferrer"
|
||||||
|
className="text-xs text-purple-600 hover:underline mt-2 inline-block">
|
||||||
|
Dokument oeffnen
|
||||||
|
</a>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* General Findings (not associated with a specific document) */}
|
||||||
|
{generalFindings.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h4 className="text-sm font-medium text-gray-700 mb-2">
|
||||||
|
Allgemeine Findings ({generalFindings.length})
|
||||||
|
</h4>
|
||||||
|
<div className="space-y-2">
|
||||||
|
{generalFindings.map((f, i) => {
|
||||||
const sev = SEV_STYLE[f.severity] || SEV_STYLE.MEDIUM
|
const sev = SEV_STYLE[f.severity] || SEV_STYLE.MEDIUM
|
||||||
const isExpanded = expandedCorrection === f.code
|
const corrKey = `gen-${i}`
|
||||||
|
const isExp = expandedCorrection === corrKey
|
||||||
return (
|
return (
|
||||||
<div key={i} className={`border rounded-lg p-3 ${sev.bg}`}>
|
<div key={i} className={`border rounded-lg p-3 ${sev.bg}`}>
|
||||||
<div className="flex items-start gap-2">
|
<div className="flex items-start gap-2">
|
||||||
@@ -159,20 +251,15 @@ export function ScanResult({ data }: { data: ScanData }) {
|
|||||||
</div>
|
</div>
|
||||||
{f.correction && (
|
{f.correction && (
|
||||||
<div className="mt-2">
|
<div className="mt-2">
|
||||||
<button
|
<button onClick={() => setExpandedCorrection(isExp ? null : corrKey)}
|
||||||
onClick={() => setExpandedCorrection(isExpanded ? null : f.code)}
|
className="text-xs text-purple-600 hover:text-purple-800 font-medium">
|
||||||
className="text-xs text-purple-600 hover:text-purple-800 font-medium"
|
{isExp ? 'Korrektur ausblenden' : 'Korrekturvorschlag'}
|
||||||
>
|
|
||||||
{isExpanded ? '▼ Korrekturvorschlag ausblenden' : '▶ Korrekturvorschlag anzeigen'}
|
|
||||||
</button>
|
</button>
|
||||||
{isExpanded && (
|
{isExp && (
|
||||||
<div className="mt-2 bg-white border border-gray-200 rounded-lg p-3 relative">
|
<div className="mt-2 bg-white border border-gray-200 rounded-lg p-3 relative">
|
||||||
<pre className="text-xs text-gray-700 whitespace-pre-wrap font-sans">{f.correction}</pre>
|
<pre className="text-xs text-gray-700 whitespace-pre-wrap font-sans">{f.correction}</pre>
|
||||||
<button
|
<button onClick={() => navigator.clipboard.writeText(f.correction)}
|
||||||
onClick={() => navigator.clipboard.writeText(f.correction)}
|
className="absolute top-2 right-2 text-xs bg-gray-100 hover:bg-gray-200 px-2 py-1 rounded">
|
||||||
className="absolute top-2 right-2 text-xs bg-gray-100 hover:bg-gray-200 px-2 py-1 rounded"
|
|
||||||
title="Kopieren"
|
|
||||||
>
|
|
||||||
Kopieren
|
Kopieren
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
@@ -185,6 +272,14 @@ export function ScanResult({ data }: { data: ScanData }) {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Email Status */}
|
||||||
|
{data.email_status && (
|
||||||
|
<div className="text-xs text-gray-500 flex items-center gap-2">
|
||||||
|
<span className={`w-2 h-2 rounded-full ${data.email_status === 'sent' ? 'bg-green-400' : 'bg-gray-300'}`} />
|
||||||
|
E-Mail: {data.email_status === 'sent' ? 'Gesendet' : data.email_status}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -73,25 +73,41 @@ def build_scan_summary(
|
|||||||
f"Findings: {n_findings} ({high} mit hoher Prioritaet)",
|
f"Findings: {n_findings} ({high} mit hoher Prioritaet)",
|
||||||
])
|
])
|
||||||
|
|
||||||
# DSI Documents section
|
# DSI Documents section — grouped with their findings
|
||||||
if discovered_docs:
|
if discovered_docs:
|
||||||
parts.extend([
|
parts.extend(["", f"Rechtliche Dokumente ({len(discovered_docs)})"])
|
||||||
"",
|
|
||||||
f"Rechtliche Dokumente gefunden: {len(discovered_docs)}",
|
# Group findings by doc_title
|
||||||
])
|
doc_findings_map: dict[str, list] = {}
|
||||||
|
general_findings: list = []
|
||||||
|
for f in findings:
|
||||||
|
dt = f.doc_title if hasattr(f, 'doc_title') else ""
|
||||||
|
if dt:
|
||||||
|
doc_findings_map.setdefault(dt, []).append(f)
|
||||||
|
else:
|
||||||
|
general_findings.append(f)
|
||||||
|
|
||||||
for doc in discovered_docs:
|
for doc in discovered_docs:
|
||||||
|
title = doc.title if hasattr(doc, 'title') else "?"
|
||||||
pct = doc.completeness_pct if hasattr(doc, 'completeness_pct') else 0
|
pct = doc.completeness_pct if hasattr(doc, 'completeness_pct') else 0
|
||||||
fc = doc.findings_count if hasattr(doc, 'findings_count') else 0
|
|
||||||
wc = doc.word_count if hasattr(doc, 'word_count') else 0
|
wc = doc.word_count if hasattr(doc, 'word_count') else 0
|
||||||
status = "OK" if pct >= 80 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
|
status = "OK" if pct >= 80 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
|
||||||
dt = doc.doc_type if hasattr(doc, 'doc_type') else "unknown"
|
parts.append(f" [{status}] {title} ({pct}%, {wc} Woerter)")
|
||||||
title = doc.title if hasattr(doc, 'title') else "?"
|
for f in doc_findings_map.get(title, []):
|
||||||
parts.append(
|
sev = f.severity if hasattr(f, 'severity') else "?"
|
||||||
f" [{status}] {title} ({dt}, {wc} Woerter, "
|
txt = f.text if hasattr(f, 'text') else str(f)
|
||||||
f"{pct}% vollstaendig, {fc} Maengel)"
|
marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i"
|
||||||
)
|
parts.append(f" {marker} {txt}")
|
||||||
|
|
||||||
if findings:
|
# General findings (no doc association)
|
||||||
|
if general_findings:
|
||||||
|
parts.extend(["", "Allgemeine Findings"])
|
||||||
|
for f in general_findings[:20]:
|
||||||
|
sev = f.severity if hasattr(f, 'severity') else "?"
|
||||||
|
txt = f.text if hasattr(f, 'text') else str(f)
|
||||||
|
marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i"
|
||||||
|
parts.append(f" [{marker}] {txt}")
|
||||||
|
elif findings:
|
||||||
parts.append("")
|
parts.append("")
|
||||||
for f in findings[:20]:
|
for f in findings[:20]:
|
||||||
sev = f.severity if hasattr(f, 'severity') else "?"
|
sev = f.severity if hasattr(f, 'severity') else "?"
|
||||||
|
|||||||
@@ -79,6 +79,7 @@ class ScanFinding(BaseModel):
|
|||||||
severity: str
|
severity: str
|
||||||
text: str
|
text: str
|
||||||
correction: str = ""
|
correction: str = ""
|
||||||
|
doc_title: str = ""
|
||||||
text_reference: TextReferenceModel | None = None
|
text_reference: TextReferenceModel | None = None
|
||||||
|
|
||||||
|
|
||||||
@@ -264,6 +265,7 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
|||||||
if "SCORE" not in df.get("code", ""):
|
if "SCORE" not in df.get("code", ""):
|
||||||
dsi_findings.append(ScanFinding(
|
dsi_findings.append(ScanFinding(
|
||||||
code=df["code"], severity=df["severity"], text=df["text"],
|
code=df["code"], severity=df["severity"], text=df["text"],
|
||||||
|
doc_title=doc["title"],
|
||||||
))
|
))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("DSI discovery failed: %s %s", type(e).__name__, e)
|
logger.warning("DSI discovery failed: %s %s", type(e).__name__, e)
|
||||||
|
|||||||
@@ -444,13 +444,18 @@ async def _expand_all_interactive(page: Page) -> None:
|
|||||||
|
|
||||||
|
|
||||||
async def _find_inline_dsi_sections(page: Page) -> list[dict]:
|
async def _find_inline_dsi_sections(page: Page) -> list[dict]:
|
||||||
"""Find DSI content already visible on the page (e.g. expanded accordions)."""
|
"""Find DSI content already visible on the page (e.g. expanded accordions).
|
||||||
|
|
||||||
|
Only counts top-level documents (H1/H2 with DSI keywords).
|
||||||
|
Sub-sections (H3/H4 like 'Cookies', 'Betroffenenrechte') are NOT counted
|
||||||
|
as separate documents — their text is part of the parent document.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
sections = await page.evaluate("""
|
sections = await page.evaluate("""
|
||||||
() => {
|
() => {
|
||||||
const results = [];
|
const results = [];
|
||||||
// Find headings that match DSI keywords
|
// Only H1 and H2 count as document-level headings
|
||||||
const headings = document.querySelectorAll('h1, h2, h3, h4, h5');
|
const headings = document.querySelectorAll('h1, h2');
|
||||||
const dsiKeywords = [
|
const dsiKeywords = [
|
||||||
'datenschutz', 'privacy', 'données', 'privacidad', 'protezione',
|
'datenschutz', 'privacy', 'données', 'privacidad', 'protezione',
|
||||||
'gegevensbescherming', 'ochrona danych', 'tietosuoja', 'integritet',
|
'gegevensbescherming', 'ochrona danych', 'tietosuoja', 'integritet',
|
||||||
@@ -461,12 +466,13 @@ async def _find_inline_dsi_sections(page: Page) -> list[dict]:
|
|||||||
const textLower = text.toLowerCase();
|
const textLower = text.toLowerCase();
|
||||||
if (!dsiKeywords.some(kw => textLower.includes(kw))) continue;
|
if (!dsiKeywords.some(kw => textLower.includes(kw))) continue;
|
||||||
|
|
||||||
// Get the section content following this heading
|
// Get ALL content until the next H1/H2 (include sub-sections H3-H5)
|
||||||
let content = '';
|
let content = '';
|
||||||
let el = h.nextElementSibling;
|
let el = h.nextElementSibling;
|
||||||
let count = 0;
|
let count = 0;
|
||||||
while (el && count < 50) {
|
while (el && count < 200) {
|
||||||
if (el.tagName.match(/^H[1-5]$/)) break;
|
// Stop at next H1 or H2 (next top-level document)
|
||||||
|
if (el.tagName === 'H1' || el.tagName === 'H2') break;
|
||||||
content += (el.textContent || '').trim() + '\\n';
|
content += (el.textContent || '').trim() + '\\n';
|
||||||
el = el.nextElementSibling;
|
el = el.nextElementSibling;
|
||||||
count++;
|
count++;
|
||||||
|
|||||||
Reference in New Issue
Block a user