feat: management summary for GF + batch GT test script
1. Management Summary (agent_doc_check_report.py):
- Plain-language action items for Geschaeftsfuehrer
- Maps technical checks to business actions ("Ihren DSB erwaehnen",
"Beschwerderecht ergaenzen", "Loeschfristen dokumentieren")
- Shows at top of compliance check email before detail report
- Max 10 actions, max 3 per document
2. Batch GT Test (zeroclaw/scripts/batch_gt_test.py):
- Runs all 10 GT websites through compliance-check API
- Prints comparison table with L1 scores, word counts, services
- Saves raw JSON results for analysis
- Usage: python3 batch_gt_test.py --sites 1,6 --backend-url URL
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Batch Ground Truth Test — run compliance check on all 10 GT websites.
|
||||
|
||||
Usage:
|
||||
python3 batch_gt_test.py [--backend-url URL]
|
||||
|
||||
Calls the compliance-check API for each website's DSI + Impressum URLs,
|
||||
polls for results, and prints a comparison table.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
# 10 GT websites with their known document URLs
|
||||
GT_WEBSITES = [
|
||||
{
|
||||
"name": "SafetyKon",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://safetykon.de/datenschutz"},
|
||||
{"doc_type": "impressum", "url": "https://safetykon.de/impressum"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "IHK Konstanz",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.ihk.de/konstanz/servicemarken/ueber-uns/downloads/datenschutzinformationen-zum-internetangebot-4163288"},
|
||||
{"doc_type": "impressum", "url": "https://www.ihk.de/konstanz/impressum"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Stadt Koeln",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.stadt-koeln.de/datenschutz"},
|
||||
{"doc_type": "impressum", "url": "https://www.stadt-koeln.de/impressum"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "BMW",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.bmw.de/de/footer/metanavigation/datenschutz.html"},
|
||||
{"doc_type": "impressum", "url": "https://www.bmw.de/de/footer/metanavigation/impressum.html"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Sparkasse Bodensee",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.sparkasse-bodensee.de/de/home/toolbar/datenschutz.html"},
|
||||
{"doc_type": "impressum", "url": "https://www.sparkasse-bodensee.de/de/home/toolbar/impressum.html"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Spiegel",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.spiegel.de/datenschutz-spiegel"},
|
||||
{"doc_type": "impressum", "url": "https://www.spiegel.de/impressum"},
|
||||
{"doc_type": "nutzungsbedingungen", "url": "https://www.spiegel.de/nutzungsbedingungen"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "TUEV Sued",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.tuvsud.com/de-de/datenschutzerklaerung"},
|
||||
{"doc_type": "impressum", "url": "https://www.tuvsud.com/de-de/impressum"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "ETO Gruppe",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.etogruppe.com/datenschutz.html"},
|
||||
{"doc_type": "impressum", "url": "https://www.etogruppe.com/impressum.html"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Caritas",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.caritas.de/datenschutz"},
|
||||
{"doc_type": "impressum", "url": "https://www.caritas.de/impressum"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "BfDI",
|
||||
"documents": [
|
||||
{"doc_type": "dse", "url": "https://www.bfdi.bund.de/DE/Meta/Datenschutz/datenschutz_node.html"},
|
||||
{"doc_type": "impressum", "url": "https://www.bfdi.bund.de/DE/Meta/Impressum/impressum_node.html"},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def run_check(backend_url: str, website: dict) -> dict:
|
||||
"""Submit compliance check and poll for results."""
|
||||
with httpx.Client(timeout=30.0, verify=False) as client:
|
||||
# Start check
|
||||
resp = client.post(
|
||||
f"{backend_url}/api/compliance/agent/compliance-check",
|
||||
json={
|
||||
"documents": website["documents"],
|
||||
"use_agent": False,
|
||||
},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return {"error": f"Start failed: {resp.status_code}"}
|
||||
|
||||
check_id = resp.json().get("check_id")
|
||||
if not check_id:
|
||||
return {"error": "No check_id"}
|
||||
|
||||
# Poll (max 15 min)
|
||||
for _ in range(300):
|
||||
time.sleep(3)
|
||||
poll = client.get(
|
||||
f"{backend_url}/api/compliance/agent/compliance-check/{check_id}"
|
||||
)
|
||||
if poll.status_code != 200:
|
||||
continue
|
||||
data = poll.json()
|
||||
if data.get("status") == "completed":
|
||||
return data.get("result", {})
|
||||
if data.get("status") == "failed":
|
||||
return {"error": data.get("error", "Check failed")}
|
||||
|
||||
return {"error": "Timeout (15 min)"}
|
||||
|
||||
|
||||
def print_results(all_results: list[tuple[str, dict]]):
|
||||
"""Print comparison table."""
|
||||
print()
|
||||
print("=" * 100)
|
||||
print(f"{'Website':20s} {'Profil':12s} {'DSI L1':10s} {'DSI W':7s} "
|
||||
f"{'Imp L1':10s} {'Dienste':8s} {'Docs':5s} {'Status':12s}")
|
||||
print("-" * 100)
|
||||
|
||||
for name, result in all_results:
|
||||
if "error" in result:
|
||||
print(f"{name:20s} {'ERROR':12s} {result['error'][:60]}")
|
||||
continue
|
||||
|
||||
profile = result.get("business_profile", {})
|
||||
btype = profile.get("business_type", "?").upper()
|
||||
industry = profile.get("industry", "?")
|
||||
services = len(profile.get("detected_services", []))
|
||||
|
||||
docs = result.get("results", [])
|
||||
dsi = next((d for d in docs if d.get("doc_type") == "dse"), {})
|
||||
imp = next((d for d in docs if d.get("doc_type") == "impressum"), {})
|
||||
|
||||
dsi_l1 = f"{dsi.get('completeness_pct', 0)}%"
|
||||
dsi_w = str(dsi.get("word_count", 0))
|
||||
imp_l1 = f"{imp.get('completeness_pct', 0)}%"
|
||||
|
||||
ok_count = sum(1 for d in docs if d.get("completeness_pct", 0) == 100)
|
||||
total = len(docs)
|
||||
|
||||
print(f"{name:20s} {btype+'/'+industry:12s} {dsi_l1:10s} {dsi_w:7s} "
|
||||
f"{imp_l1:10s} {services:8d} {ok_count}/{total:3s} "
|
||||
f"{'OK' if dsi.get('completeness_pct', 0) == 100 else 'LUECKEN'}")
|
||||
|
||||
print("=" * 100)
|
||||
|
||||
# Detail: all doc results
|
||||
print()
|
||||
for name, result in all_results:
|
||||
if "error" in result:
|
||||
continue
|
||||
docs = result.get("results", [])
|
||||
print(f"--- {name} ---")
|
||||
for d in docs:
|
||||
pct = d.get("completeness_pct", 0)
|
||||
cpct = d.get("correctness_pct", 0)
|
||||
dtype = d.get("doc_type", "?")
|
||||
label = d.get("label", dtype)
|
||||
wc = d.get("word_count", 0)
|
||||
scenario = d.get("scenario", "")
|
||||
checks = d.get("checks", [])
|
||||
l1_total = len([c for c in checks if c.get("level", 1) == 1])
|
||||
l1_pass = len([c for c in checks if c.get("level", 1) == 1 and c.get("passed")])
|
||||
failed = [c["label"] for c in checks if c.get("level", 1) == 1 and not c.get("passed") and not c.get("skipped") and c.get("severity") != "INFO"]
|
||||
print(f" {label:30s} {l1_pass}/{l1_total} L1 ({pct}%) {wc}w {scenario}")
|
||||
if failed:
|
||||
for f in failed[:5]:
|
||||
print(f" ✗ {f[:70]}")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--backend-url", default="https://localhost:8002",
|
||||
help="Backend compliance URL")
|
||||
parser.add_argument("--sites", default="all",
|
||||
help="Comma-separated site indices (1-10) or 'all'")
|
||||
args = parser.parse_args()
|
||||
|
||||
sites = GT_WEBSITES
|
||||
if args.sites != "all":
|
||||
indices = [int(i) - 1 for i in args.sites.split(",")]
|
||||
sites = [GT_WEBSITES[i] for i in indices if 0 <= i < len(GT_WEBSITES)]
|
||||
|
||||
print(f"Running compliance check on {len(sites)} websites...")
|
||||
print(f"Backend: {args.backend_url}")
|
||||
print()
|
||||
|
||||
all_results = []
|
||||
for i, website in enumerate(sites):
|
||||
name = website["name"]
|
||||
print(f"[{i+1}/{len(sites)}] {name}...", end=" ", flush=True)
|
||||
t0 = time.time()
|
||||
result = run_check(args.backend_url, website)
|
||||
elapsed = time.time() - t0
|
||||
if "error" in result:
|
||||
print(f"ERROR ({elapsed:.0f}s): {result['error'][:60]}")
|
||||
else:
|
||||
docs = result.get("results", [])
|
||||
ok = sum(1 for d in docs if d.get("completeness_pct", 0) == 100)
|
||||
print(f"OK ({elapsed:.0f}s) — {len(docs)} docs, {ok} vollstaendig")
|
||||
all_results.append((name, result))
|
||||
|
||||
print_results(all_results)
|
||||
|
||||
# Save raw results
|
||||
out_file = f"batch_results_{time.strftime('%Y%m%d_%H%M%S')}.json"
|
||||
with open(out_file, "w") as f:
|
||||
json.dump(
|
||||
{name: result for name, result in all_results},
|
||||
f, indent=2, default=str,
|
||||
)
|
||||
print(f"\nRaw results saved to {out_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user