diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py
index d135eca2..3976b46d 100644
--- a/backend-compliance/compliance/api/agent_compliance_check_routes.py
+++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py
@@ -317,11 +317,13 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
else:
r.scenario = "import"
- # Step 5: Build report
+ # Step 5: Build report with management summary
_update(check_id, "Report wird erstellt...")
+ from .agent_doc_check_report import build_management_summary
+ summary_html = build_management_summary(results)
report_html = build_html_report(results, None)
profile_html = _build_profile_html(profile)
- full_html = profile_html + report_html
+ full_html = summary_html + profile_html + report_html
# Step 6: Send email
doc_count = len([r for r in results if not r.error])
diff --git a/backend-compliance/compliance/api/agent_doc_check_report.py b/backend-compliance/compliance/api/agent_doc_check_report.py
index 6f4186d1..6508cfd4 100644
--- a/backend-compliance/compliance/api/agent_doc_check_report.py
+++ b/backend-compliance/compliance/api/agent_doc_check_report.py
@@ -40,6 +40,121 @@ def _hint_box(hint: str) -> str:
)
+def build_management_summary(results: list[DocCheckResult]) -> str:
+ """Build a plain-language management summary for the CEO/GF.
+
+ No legal jargon — concrete actions that can be delegated to staff,
+ lawyers, or the DPO.
+ """
+ ok = [r for r in results if r.completeness_pct == 100 and not r.error]
+ fixable = [r for r in results if 0 < r.completeness_pct < 100 and not r.error]
+ critical = [r for r in results if r.completeness_pct == 0 and not r.error]
+ errors = [r for r in results if r.error]
+
+ html = [
+ '
',
+ '
'
+ 'Zusammenfassung fuer die Geschaeftsfuehrung
',
+ ]
+
+ # Overall status
+ total = len(results) - len(errors)
+ if total == 0:
+ html.append('
Keine Dokumente geprueft.
')
+ return "\n".join(html)
+
+ if len(ok) == total:
+ html.append(
+ ''
+ 'Alle Dokumente sind vollstaendig. Keine dringenden Massnahmen noetig.
'
+ )
+ else:
+ html.append(
+ f''
+ f'{len(ok)} von {total} Dokumenten sind vollstaendig. '
+ f'{len(fixable)} brauchen Korrekturen'
+ f'{f", {len(critical)} fehlen oder sind unbrauchbar" if critical else ""}.
'
+ )
+
+ # Concrete actions
+ actions: list[str] = []
+ for r in results:
+ if r.error or r.completeness_pct == 100:
+ continue
+ failed_checks = [
+ c for c in r.checks
+ if c.level == 1 and not c.passed and not c.skipped
+ and c.severity != "INFO"
+ ]
+ for c in failed_checks[:3]: # Max 3 per document
+ action = _check_to_action(r.label, c.label, c.hint)
+ if action:
+ actions.append(action)
+
+ if actions:
+ html.append(
+ ''
+ 'Konkrete Aufgaben:
'
+ ''
+ )
+ for a in actions[:10]: # Max 10 actions
+ html.append(f'- {a}
')
+ html.append('
')
+
+ html.append('')
+ return "\n".join(html)
+
+
+def _check_to_action(doc_label: str, check_label: str, hint: str) -> str:
+ """Convert a failed check into a plain-language action item."""
+ # Map technical check labels to business-language actions
+ label_lower = check_label.lower()
+
+ if "datenschutzbeauftragter" in label_lower or "dsb" in label_lower:
+ return (f"{doc_label}: Ihren Datenschutzbeauftragten "
+ f"mit Kontaktdaten erwaehnen. Pflicht ab 20 Mitarbeitern.")
+
+ if "beschwerderecht" in label_lower or "art. 77" in label_lower:
+ return (f"{doc_label}: Hinweis auf das Beschwerderecht "
+ f"bei der Aufsichtsbehoerde ergaenzen (Name + Kontakt der Behoerde).")
+
+ if "betroffenenrechte" in label_lower:
+ return (f"{doc_label}: Alle Betroffenenrechte "
+ f"(Auskunft, Berichtigung, Loeschung, etc.) einzeln auffuehren.")
+
+ if "verantwortlicher" in label_lower:
+ return (f"{doc_label}: Vollstaendige Firmenbezeichnung "
+ f"mit Rechtsform, Adresse, E-Mail und Telefon eintragen.")
+
+ if "interessenabwaegung" in label_lower:
+ return (f"{doc_label}: Bei 'berechtigtem Interesse' "
+ f"die Abwaegung dokumentieren. Aufgabe fuer den DSB/Rechtsanwalt.")
+
+ if "widerrufsbelehrung" in label_lower or "widerruf" in label_lower:
+ return (f"{doc_label}: Gesetzliche Widerrufsbelehrung "
+ f"mit 14-Tage-Frist und Musterformular bereitstellen.")
+
+ if "loeschkonzept" in label_lower:
+ return (f"{doc_label}: Loeschfristen und -prozess "
+ f"dokumentieren. Aufgabe fuer den DSB.")
+
+ if "profiling" in label_lower or "art. 22" in label_lower:
+ return (f"{doc_label}: Hinweis ergaenzen ob "
+ f"automatisierte Entscheidungen stattfinden oder nicht.")
+
+ if "nicht im eingereichten text" in label_lower:
+ return (f"{doc_label}: Das eingereichte Dokument "
+ f"enthaelt nicht den erwarteten Inhalt. Bitte korrekte URL pruefen.")
+
+ # Generic fallback
+ if hint and len(hint) < 150:
+ return f"{doc_label}: {hint[:120]}"
+
+ return f"{doc_label}: '{check_label}' muss ergaenzt werden."
+
+
def build_html_report(
results: list[DocCheckResult],
cookie_result: dict | None,
diff --git a/zeroclaw/scripts/batch_gt_test.py b/zeroclaw/scripts/batch_gt_test.py
new file mode 100644
index 00000000..2b00753a
--- /dev/null
+++ b/zeroclaw/scripts/batch_gt_test.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""
+Batch Ground Truth Test — run compliance check on all 10 GT websites.
+
+Usage:
+ python3 batch_gt_test.py [--backend-url URL]
+
+Calls the compliance-check API for each website's DSI + Impressum URLs,
+polls for results, and prints a comparison table.
+"""
+
+import argparse
+import json
+import sys
+import time
+
+import httpx
+
+# 10 GT websites with their known document URLs
+GT_WEBSITES = [
+ {
+ "name": "SafetyKon",
+ "documents": [
+ {"doc_type": "dse", "url": "https://safetykon.de/datenschutz"},
+ {"doc_type": "impressum", "url": "https://safetykon.de/impressum"},
+ ],
+ },
+ {
+ "name": "IHK Konstanz",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.ihk.de/konstanz/servicemarken/ueber-uns/downloads/datenschutzinformationen-zum-internetangebot-4163288"},
+ {"doc_type": "impressum", "url": "https://www.ihk.de/konstanz/impressum"},
+ ],
+ },
+ {
+ "name": "Stadt Koeln",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.stadt-koeln.de/datenschutz"},
+ {"doc_type": "impressum", "url": "https://www.stadt-koeln.de/impressum"},
+ ],
+ },
+ {
+ "name": "BMW",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.bmw.de/de/footer/metanavigation/datenschutz.html"},
+ {"doc_type": "impressum", "url": "https://www.bmw.de/de/footer/metanavigation/impressum.html"},
+ ],
+ },
+ {
+ "name": "Sparkasse Bodensee",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.sparkasse-bodensee.de/de/home/toolbar/datenschutz.html"},
+ {"doc_type": "impressum", "url": "https://www.sparkasse-bodensee.de/de/home/toolbar/impressum.html"},
+ ],
+ },
+ {
+ "name": "Spiegel",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.spiegel.de/datenschutz-spiegel"},
+ {"doc_type": "impressum", "url": "https://www.spiegel.de/impressum"},
+ {"doc_type": "nutzungsbedingungen", "url": "https://www.spiegel.de/nutzungsbedingungen"},
+ ],
+ },
+ {
+ "name": "TUEV Sued",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.tuvsud.com/de-de/datenschutzerklaerung"},
+ {"doc_type": "impressum", "url": "https://www.tuvsud.com/de-de/impressum"},
+ ],
+ },
+ {
+ "name": "ETO Gruppe",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.etogruppe.com/datenschutz.html"},
+ {"doc_type": "impressum", "url": "https://www.etogruppe.com/impressum.html"},
+ ],
+ },
+ {
+ "name": "Caritas",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.caritas.de/datenschutz"},
+ {"doc_type": "impressum", "url": "https://www.caritas.de/impressum"},
+ ],
+ },
+ {
+ "name": "BfDI",
+ "documents": [
+ {"doc_type": "dse", "url": "https://www.bfdi.bund.de/DE/Meta/Datenschutz/datenschutz_node.html"},
+ {"doc_type": "impressum", "url": "https://www.bfdi.bund.de/DE/Meta/Impressum/impressum_node.html"},
+ ],
+ },
+]
+
+
+def run_check(backend_url: str, website: dict) -> dict:
+ """Submit compliance check and poll for results."""
+ with httpx.Client(timeout=30.0, verify=False) as client:
+ # Start check
+ resp = client.post(
+ f"{backend_url}/api/compliance/agent/compliance-check",
+ json={
+ "documents": website["documents"],
+ "use_agent": False,
+ },
+ )
+ if resp.status_code != 200:
+ return {"error": f"Start failed: {resp.status_code}"}
+
+ check_id = resp.json().get("check_id")
+ if not check_id:
+ return {"error": "No check_id"}
+
+ # Poll (max 15 min)
+ for _ in range(300):
+ time.sleep(3)
+ poll = client.get(
+ f"{backend_url}/api/compliance/agent/compliance-check/{check_id}"
+ )
+ if poll.status_code != 200:
+ continue
+ data = poll.json()
+ if data.get("status") == "completed":
+ return data.get("result", {})
+ if data.get("status") == "failed":
+ return {"error": data.get("error", "Check failed")}
+
+ return {"error": "Timeout (15 min)"}
+
+
+def print_results(all_results: list[tuple[str, dict]]):
+ """Print comparison table."""
+ print()
+ print("=" * 100)
+ print(f"{'Website':20s} {'Profil':12s} {'DSI L1':10s} {'DSI W':7s} "
+ f"{'Imp L1':10s} {'Dienste':8s} {'Docs':5s} {'Status':12s}")
+ print("-" * 100)
+
+ for name, result in all_results:
+ if "error" in result:
+ print(f"{name:20s} {'ERROR':12s} {result['error'][:60]}")
+ continue
+
+ profile = result.get("business_profile", {})
+ btype = profile.get("business_type", "?").upper()
+ industry = profile.get("industry", "?")
+ services = len(profile.get("detected_services", []))
+
+ docs = result.get("results", [])
+ dsi = next((d for d in docs if d.get("doc_type") == "dse"), {})
+ imp = next((d for d in docs if d.get("doc_type") == "impressum"), {})
+
+ dsi_l1 = f"{dsi.get('completeness_pct', 0)}%"
+ dsi_w = str(dsi.get("word_count", 0))
+ imp_l1 = f"{imp.get('completeness_pct', 0)}%"
+
+ ok_count = sum(1 for d in docs if d.get("completeness_pct", 0) == 100)
+ total = len(docs)
+
+ print(f"{name:20s} {btype+'/'+industry:12s} {dsi_l1:10s} {dsi_w:7s} "
+ f"{imp_l1:10s} {services:8d} {ok_count}/{total:3s} "
+ f"{'OK' if dsi.get('completeness_pct', 0) == 100 else 'LUECKEN'}")
+
+ print("=" * 100)
+
+ # Detail: all doc results
+ print()
+ for name, result in all_results:
+ if "error" in result:
+ continue
+ docs = result.get("results", [])
+ print(f"--- {name} ---")
+ for d in docs:
+ pct = d.get("completeness_pct", 0)
+ cpct = d.get("correctness_pct", 0)
+ dtype = d.get("doc_type", "?")
+ label = d.get("label", dtype)
+ wc = d.get("word_count", 0)
+ scenario = d.get("scenario", "")
+ checks = d.get("checks", [])
+ l1_total = len([c for c in checks if c.get("level", 1) == 1])
+ l1_pass = len([c for c in checks if c.get("level", 1) == 1 and c.get("passed")])
+ failed = [c["label"] for c in checks if c.get("level", 1) == 1 and not c.get("passed") and not c.get("skipped") and c.get("severity") != "INFO"]
+ print(f" {label:30s} {l1_pass}/{l1_total} L1 ({pct}%) {wc}w {scenario}")
+ if failed:
+ for f in failed[:5]:
+ print(f" ✗ {f[:70]}")
+ print()
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--backend-url", default="https://localhost:8002",
+ help="Backend compliance URL")
+ parser.add_argument("--sites", default="all",
+ help="Comma-separated site indices (1-10) or 'all'")
+ args = parser.parse_args()
+
+ sites = GT_WEBSITES
+ if args.sites != "all":
+ indices = [int(i) - 1 for i in args.sites.split(",")]
+ sites = [GT_WEBSITES[i] for i in indices if 0 <= i < len(GT_WEBSITES)]
+
+ print(f"Running compliance check on {len(sites)} websites...")
+ print(f"Backend: {args.backend_url}")
+ print()
+
+ all_results = []
+ for i, website in enumerate(sites):
+ name = website["name"]
+ print(f"[{i+1}/{len(sites)}] {name}...", end=" ", flush=True)
+ t0 = time.time()
+ result = run_check(args.backend_url, website)
+ elapsed = time.time() - t0
+ if "error" in result:
+ print(f"ERROR ({elapsed:.0f}s): {result['error'][:60]}")
+ else:
+ docs = result.get("results", [])
+ ok = sum(1 for d in docs if d.get("completeness_pct", 0) == 100)
+ print(f"OK ({elapsed:.0f}s) — {len(docs)} docs, {ok} vollstaendig")
+ all_results.append((name, result))
+
+ print_results(all_results)
+
+ # Save raw results
+ out_file = f"batch_results_{time.strftime('%Y%m%d_%H%M%S')}.json"
+ with open(out_file, "w") as f:
+ json.dump(
+ {name: result for name, result in all_results},
+ f, indent=2, default=str,
+ )
+ print(f"\nRaw results saved to {out_file}")
+
+
+if __name__ == "__main__":
+ main()