diff --git a/admin-compliance/app/sdk/agent/_components/ScanResult.tsx b/admin-compliance/app/sdk/agent/_components/ScanResult.tsx index e19d01f..36308fc 100644 --- a/admin-compliance/app/sdk/agent/_components/ScanResult.tsx +++ b/admin-compliance/app/sdk/agent/_components/ScanResult.tsx @@ -19,6 +19,17 @@ interface ScanFinding { severity: string text: string correction: string + doc_title: string +} + +interface DiscoveredDocument { + title: string + url: string + doc_type: string + language: string + word_count: number + completeness_pct: number + findings_count: number } interface ScanData { @@ -26,6 +37,7 @@ interface ScanData { pages_list: string[] services: ServiceInfo[] findings: ScanFinding[] + discovered_documents?: DiscoveredDocument[] ai_detected: boolean chatbot_detected: boolean chatbot_provider: string @@ -34,24 +46,38 @@ interface ScanData { } const STATUS_ICON: Record = { - ok: { icon: '✓', color: 'text-green-600' }, - undocumented: { icon: '✗', color: 'text-red-600' }, + ok: { icon: '\u2713', color: 'text-green-600' }, + undocumented: { icon: '\u2717', color: 'text-red-600' }, outdated: { icon: '~', color: 'text-yellow-600' }, } -const SEV_STYLE: Record = { - HIGH: { bg: 'bg-red-50 border-red-200', text: 'text-red-800' }, - MEDIUM: { bg: 'bg-yellow-50 border-yellow-200', text: 'text-yellow-800' }, - LOW: { bg: 'bg-blue-50 border-blue-200', text: 'text-blue-800' }, +const SEV_STYLE: Record = { + HIGH: { bg: 'bg-red-50 border-red-200', text: 'text-red-800', dot: 'bg-red-500' }, + MEDIUM: { bg: 'bg-yellow-50 border-yellow-200', text: 'text-yellow-800', dot: 'bg-yellow-500' }, + LOW: { bg: 'bg-blue-50 border-blue-200', text: 'text-blue-800', dot: 'bg-blue-500' }, + CRITICAL: { bg: 'bg-red-100 border-red-300', text: 'text-red-900', dot: 'bg-red-700' }, } export function ScanResult({ data }: { data: ScanData }) { const [expandedCorrection, setExpandedCorrection] = useState(null) + const [expandedDoc, setExpandedDoc] = useState(null) const undocCount = data.services.filter(s => s.status === 'undocumented').length const okCount = data.services.filter(s => s.status === 'ok').length - const outdatedCount = data.services.filter(s => s.status === 'outdated').length - const highCount = data.findings.filter(f => f.severity === 'HIGH').length + const highCount = data.findings.filter(f => f.severity === 'HIGH' || f.severity === 'CRITICAL').length + const docs = data.discovered_documents || [] + + // Group findings by doc_title + const docFindings: Record = {} + const generalFindings: ScanFinding[] = [] + for (const f of data.findings) { + if (f.doc_title) { + if (!docFindings[f.doc_title]) docFindings[f.doc_title] = [] + docFindings[f.doc_title].push(f) + } else { + generalFindings.push(f) + } + } return (
@@ -59,7 +85,7 @@ export function ScanResult({ data }: { data: ScanData }) {

{data.pages_scanned}

-

Seiten gescannt

+

Seiten

{okCount}

@@ -69,9 +95,9 @@ export function ScanResult({ data }: { data: ScanData }) {

{undocCount}

Nicht in DSE

-
-

{outdatedCount}

-

Veraltet

+
+

{docs.length}

+

Dokumente

@@ -79,14 +105,14 @@ export function ScanResult({ data }: { data: ScanData }) { {data.pages_list?.length > 0 && (
- {data.pages_scanned} Seiten gescannt — Details anzeigen + {data.pages_scanned} Seiten gescannt
    {data.pages_list.map((p, i) => { const isMissing = data.missing_pages[p] return (
  • - {isMissing ? '✗' : '✓'} {p} {isMissing ? `(HTTP ${data.missing_pages[p]})` : ''} + {isMissing ? '\u2717' : '\u2713'} {p}
  • ) })} @@ -94,61 +120,127 @@ export function ScanResult({ data }: { data: ScanData }) {
)} - {/* AI / Chatbot Detection */} -
- - {data.ai_detected ? 'KI erkannt' : 'Keine KI erkannt'} - - - {data.chatbot_detected ? `Chatbot: ${data.chatbot_provider}` : 'Kein Chatbot'} - -
- {/* Services Table */} -
-

Dienstleister-Abgleich (SOLL/IST)

-
- - - - - - - - - - - - {data.services.map((s, i) => { - const st = STATUS_ICON[s.status] || STATUS_ICON.ok - return ( - - - - - - - - ) - })} - -
StatusDienstLandEUIn DSE
{st.icon} - {s.name} - {s.category} - {s.country}{s.eu_adequate ? '✓' : '✗'}{s.in_dse ? 'Ja' : Nein}
+ {data.services.length > 0 && ( +
+

Dienstleister (SOLL/IST)

+
+ + + + + + + + + + + {data.services.map((s, i) => { + const st = STATUS_ICON[s.status] || STATUS_ICON.ok + return ( + + + + + + + ) + })} + +
StatusDienstLandIn DSE
{st.icon} + {s.name} + {s.provider} + {s.country}{s.in_dse ? '\u2713' : Nein}
+
-
+ )} - {/* Findings */} - {data.findings.length > 0 && ( + {/* === Document-Centric View === */} + {docs.length > 0 && (

- Findings ({data.findings.length}, davon {highCount} kritisch) + Rechtliche Dokumente ({docs.length})

- {data.findings.map((f, i) => { + {docs.map((doc, i) => { + const isExpanded = expandedDoc === doc.title + const findings = docFindings[doc.title] || [] + const pct = doc.completeness_pct + const barColor = pct >= 80 ? 'bg-green-500' : pct >= 50 ? 'bg-yellow-500' : 'bg-red-500' + const statusLabel = pct >= 80 ? 'OK' : pct >= 50 ? 'Lueckenhaft' : 'Mangelhaft' + const statusColor = pct >= 80 ? 'text-green-700 bg-green-50' : pct >= 50 ? 'text-yellow-700 bg-yellow-50' : 'text-red-700 bg-red-50' + + return ( +
+ + + {isExpanded && ( +
+ {findings.length > 0 ? ( + findings.map((f, fi) => { + const sev = SEV_STYLE[f.severity] || SEV_STYLE.MEDIUM + return ( +
+ + {f.text} +
+ ) + }) + ) : ( +

Alle Pflichtangaben vorhanden.

+ )} + {doc.url && ( + + Dokument oeffnen + + )} +
+ )} +
+ ) + })} +
+
+ )} + + {/* General Findings (not associated with a specific document) */} + {generalFindings.length > 0 && ( +
+

+ Allgemeine Findings ({generalFindings.length}) +

+
+ {generalFindings.map((f, i) => { const sev = SEV_STYLE[f.severity] || SEV_STYLE.MEDIUM - const isExpanded = expandedCorrection === f.code + const corrKey = `gen-${i}` + const isExp = expandedCorrection === corrKey return (
@@ -159,20 +251,15 @@ export function ScanResult({ data }: { data: ScanData }) {
{f.correction && (
- - {isExpanded && ( + {isExp && (
{f.correction}
-
@@ -185,6 +272,14 @@ export function ScanResult({ data }: { data: ScanData }) {
)} + + {/* Email Status */} + {data.email_status && ( +
+ + E-Mail: {data.email_status === 'sent' ? 'Gesendet' : data.email_status} +
+ )}
) } diff --git a/backend-compliance/compliance/api/agent_scan_helpers.py b/backend-compliance/compliance/api/agent_scan_helpers.py index 98d534f..82bca19 100644 --- a/backend-compliance/compliance/api/agent_scan_helpers.py +++ b/backend-compliance/compliance/api/agent_scan_helpers.py @@ -73,25 +73,41 @@ def build_scan_summary( f"Findings: {n_findings} ({high} mit hoher Prioritaet)", ]) - # DSI Documents section + # DSI Documents section — grouped with their findings if discovered_docs: - parts.extend([ - "", - f"Rechtliche Dokumente gefunden: {len(discovered_docs)}", - ]) + parts.extend(["", f"Rechtliche Dokumente ({len(discovered_docs)})"]) + + # Group findings by doc_title + doc_findings_map: dict[str, list] = {} + general_findings: list = [] + for f in findings: + dt = f.doc_title if hasattr(f, 'doc_title') else "" + if dt: + doc_findings_map.setdefault(dt, []).append(f) + else: + general_findings.append(f) + for doc in discovered_docs: + title = doc.title if hasattr(doc, 'title') else "?" pct = doc.completeness_pct if hasattr(doc, 'completeness_pct') else 0 - fc = doc.findings_count if hasattr(doc, 'findings_count') else 0 wc = doc.word_count if hasattr(doc, 'word_count') else 0 status = "OK" if pct >= 80 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT" - dt = doc.doc_type if hasattr(doc, 'doc_type') else "unknown" - title = doc.title if hasattr(doc, 'title') else "?" - parts.append( - f" [{status}] {title} ({dt}, {wc} Woerter, " - f"{pct}% vollstaendig, {fc} Maengel)" - ) + parts.append(f" [{status}] {title} ({pct}%, {wc} Woerter)") + for f in doc_findings_map.get(title, []): + sev = f.severity if hasattr(f, 'severity') else "?" + txt = f.text if hasattr(f, 'text') else str(f) + marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i" + parts.append(f" {marker} {txt}") - if findings: + # General findings (no doc association) + if general_findings: + parts.extend(["", "Allgemeine Findings"]) + for f in general_findings[:20]: + sev = f.severity if hasattr(f, 'severity') else "?" + txt = f.text if hasattr(f, 'text') else str(f) + marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i" + parts.append(f" [{marker}] {txt}") + elif findings: parts.append("") for f in findings[:20]: sev = f.severity if hasattr(f, 'severity') else "?" diff --git a/backend-compliance/compliance/api/agent_scan_routes.py b/backend-compliance/compliance/api/agent_scan_routes.py index 23952fa..510c992 100644 --- a/backend-compliance/compliance/api/agent_scan_routes.py +++ b/backend-compliance/compliance/api/agent_scan_routes.py @@ -79,6 +79,7 @@ class ScanFinding(BaseModel): severity: str text: str correction: str = "" + doc_title: str = "" text_reference: TextReferenceModel | None = None @@ -264,6 +265,7 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse: if "SCORE" not in df.get("code", ""): dsi_findings.append(ScanFinding( code=df["code"], severity=df["severity"], text=df["text"], + doc_title=doc["title"], )) except Exception as e: logger.warning("DSI discovery failed: %s %s", type(e).__name__, e) diff --git a/consent-tester/services/dsi_discovery.py b/consent-tester/services/dsi_discovery.py index d86e3d4..625f232 100644 --- a/consent-tester/services/dsi_discovery.py +++ b/consent-tester/services/dsi_discovery.py @@ -444,13 +444,18 @@ async def _expand_all_interactive(page: Page) -> None: async def _find_inline_dsi_sections(page: Page) -> list[dict]: - """Find DSI content already visible on the page (e.g. expanded accordions).""" + """Find DSI content already visible on the page (e.g. expanded accordions). + + Only counts top-level documents (H1/H2 with DSI keywords). + Sub-sections (H3/H4 like 'Cookies', 'Betroffenenrechte') are NOT counted + as separate documents — their text is part of the parent document. + """ try: sections = await page.evaluate(""" () => { const results = []; - // Find headings that match DSI keywords - const headings = document.querySelectorAll('h1, h2, h3, h4, h5'); + // Only H1 and H2 count as document-level headings + const headings = document.querySelectorAll('h1, h2'); const dsiKeywords = [ 'datenschutz', 'privacy', 'données', 'privacidad', 'protezione', 'gegevensbescherming', 'ochrona danych', 'tietosuoja', 'integritet', @@ -461,12 +466,13 @@ async def _find_inline_dsi_sections(page: Page) -> list[dict]: const textLower = text.toLowerCase(); if (!dsiKeywords.some(kw => textLower.includes(kw))) continue; - // Get the section content following this heading + // Get ALL content until the next H1/H2 (include sub-sections H3-H5) let content = ''; let el = h.nextElementSibling; let count = 0; - while (el && count < 50) { - if (el.tagName.match(/^H[1-5]$/)) break; + while (el && count < 200) { + // Stop at next H1 or H2 (next top-level document) + if (el.tagName === 'H1' || el.tagName === 'H2') break; content += (el.textContent || '').trim() + '\\n'; el = el.nextElementSibling; count++;