feat(consent+report): P56-P67 Mercedes-Audit-Cycle (Anti-Audit, Phase G Vendors, Cookie-Behavior-Validator + 5 Mail-Polish-Items) [migration-approved]

P56 Anti-Auditing-Detection als constructive Compliance-Finding (Audit-API- Empfehlung statt Anklage, weil Mercedes berechtigt Bots blockiert) P57 Phase G vendor_details Union mit cmp_vendors -> 42 Anbieter sichtbar P58 Anti-Audit-Detection robuster (Script-Domain-Check + Settings-spezifisch) P59 Cookie-Behavior-Validator (4 Layer, 3-Tier-Severity: MEDIUM=Kategorie- Mismatch / HIGH=Zweck-Mismatch / CRITICAL=beide=Vorsatz-Indiz) + Open Cookie Database (CC0) als Library-Seed (2264 Cookies) P59b Cookie-Behavior in Banner-Check verdrahtet + Mail-Block (BUGFIX: SessionLocal selbst oeffnen, db war im Background-Task nicht im Scope) Mail-Polish nach Mercedes-Review: P63 Banner-Footer-Links auch im wb7-link/role=link erkennen (Shadow-DOM- Walker label-based statt nur <a href>) P64 Re-Access-Severity: MEDIUM statt HIGH, wenn Footer "Einstellungen" oder Mercedes-typisch existiert; OEM-Footer-Detection (wb7-footer) P65 Text-Truncation: Word-Boundary statt Zeichen-Cut (kein "einfa"-Bruch mehr in Sofortmassnahmen) P66 GF-Aktionen: Service-Zweck vs Cookie-Zweck explizit erklaert (haeufige Verwechslung Marketing/GF: "Akamai-Beschreibung" != Cookie- Zweck pro DSK-OH 2024) P67 Stirring-Finding mit "Verlust-Framing"-Erklaerung + Alt-vs-Neutral- Beispiel, statt nur EDPB-Fachbegriff Compliance-Advisor FAQ (admin agent-core/soul): + CNIL/EDPB Top-Bussgelder (Google 100M, Meta 60M, Amazon 35M) + Deutsche Praezedenz (LG Muenchen Google Fonts, EuGH Planet49, BGH I ZR 7/16) + 4 Risiko-Pfade (Bussgeld/Abmahnung/Sammelklage/NOYB) + Berechnungs-Methodik Document-Generator Templates: AGB-DE (142), Impressum (140), Widerrufs- formular-Anlage (143), DSR-Process-Dedup (139), Cookie-Library (144). Architektur: doc_action_mappings.py + banner_dom_walkers.py + cookie_behavior_validator.py + vendor_detail_extractor.py rausgezogen, um die 500-LOC-Caps in agent_doc_check_report.py und banner_text_checker.py einzuhalten. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 06:28:25 +02:00
parent badb356740
commit 57c0f940a2
38 changed files with 3656 additions and 116 deletions
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+"""Diagnose helper: for each failing template + missing check,
+show the patterns and the closest substring in the rendered template.
+Helps decide whether to fix the Template content or the regex pattern."""
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+from typing import Optional
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from compliance.services.doc_checks.runner import _CHECKLIST_MAP  # noqa: E402
+
+# Re-use the same rendering as the audit script
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from audit_template_completeness import (  # noqa: E402
+    TEMPLATE_TO_DOCTYPE, DEMO_PLACEHOLDERS,
+    render_placeholders, strip_handlebars_blocks,
+)
+
+
+def keyword_hits(text: str, keywords: list[str], window: int = 80) -> list[str]:
+    """Return short context snippets where any keyword appears (case-insensitive)."""
+    hits = []
+    text_lower = text.lower()
+    for kw in keywords:
+        for m in re.finditer(re.escape(kw.lower()), text_lower):
+            start = max(0, m.start() - window // 2)
+            end = min(len(text), m.end() + window // 2)
+            snippet = text[start:end].replace("\n", " ").strip()
+            hits.append(f"… {snippet} …")
+            if len(hits) >= 3:
+                return hits
+    return hits
+
+
+def diagnose_template(tpl_id: str, json_path: str = "/tmp/template_audit_report.json"):
+    with open(json_path) as f:
+        audit = json.load(f)
+    entry = next((a for a in audit if a["template_id"] == tpl_id), None)
+    if not entry or not entry.get("doc_type"):
+        print("Not found or no doc_type"); return
+    print(f"\n=== {entry['template_type']} ({entry['language']}) — {entry['title']} ===")
+    print(f"doc_type: {entry['doc_type']} | L1: {entry['l1_passed']}/{entry['l1_total']}")
+    print(f"Missing: {len(entry['l1_missing'])}")
+
+    # Load template content
+    dsn = os.environ["DATABASE_URL"]
+    conn = psycopg2.connect(dsn)
+    cur = conn.cursor(cursor_factory=RealDictCursor)
+    cur.execute("SELECT content FROM compliance.compliance_legal_templates WHERE id=%s", (tpl_id,))
+    row = cur.fetchone()
+    if not row:
+        print("Template not in DB"); return
+    rendered = render_placeholders(strip_handlebars_blocks(row["content"]))
+
+    # Look up checklist
+    checklist, _label = _CHECKLIST_MAP.get(entry["doc_type"], ([], ""))
+    by_id = {c["id"]: c for c in checklist}
+
+    for miss in entry["l1_missing"]:
+        chk = by_id.get(miss["id"])
+        print(f"\n  ✗ {miss['label']} (id={miss['id']})")
+        if not chk:
+            print("    Pattern: (not found in checklist)"); continue
+        patterns = chk.get("patterns", [])
+        print(f"    Patterns ({len(patterns)}):")
+        for p in patterns[:5]:
+            print(f"      {p}")
+        # Heuristic keywords from the label + pattern keywords
+        keywords = []
+        for p in patterns:
+            # Extract literal words from pattern (rough)
+            words = re.findall(r"[a-zÀ-ž]{4,}", p, re.IGNORECASE)
+            keywords.extend(words[:3])
+        keywords = list(dict.fromkeys(keywords))[:8]
+        if keywords:
+            print(f"    Searched keywords: {keywords}")
+        hits = keyword_hits(rendered, keywords)
+        if hits:
+            print("    Closest template snippets:")
+            for h in hits[:3]:
+                print(f"      • {h[:160]}")
+        else:
+            print("    No keyword hits — likely genuinely missing content.")
+
+
+if __name__ == "__main__":
+    json_path = sys.argv[2] if len(sys.argv) > 2 else "/tmp/template_audit_report.json"
+    if len(sys.argv) > 1 and sys.argv[1] != "all":
+        diagnose_template(sys.argv[1], json_path)
+    else:
+        with open(json_path) as f:
+            audit = json.load(f)
+        for a in audit:
+            if a.get("doc_type") and a.get("l1_missing"):
+                diagnose_template(a["template_id"], json_path)
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""
+P39 — Template-Audit: prueft alle Legal-Templates aus der DB gegen
+unsere eigenen Pflichtangaben-Checks (doc_checks/*).
+
+Verwendet check_document_completeness — die gleiche Funktion die auch
+externe Sites pruefen wuerde. Reports als Markdown.
+
+Run inside the bp-compliance-backend container:
+    docker exec bp-compliance-backend python /app/scripts/audit_template_completeness.py
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+from collections import defaultdict
+from datetime import datetime, timezone
+from typing import Iterable
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+
+# Add compliance package to path if running outside container
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from compliance.services.doc_checks.runner import check_document_completeness  # noqa: E402
+
+# template_type (DB) -> doc_type (checker) — only those for which we
+# have a checklist. Others fall back to LLM-only and skip.
+TEMPLATE_TO_DOCTYPE = {
+    "privacy_policy":         "dse",
+    "data_protection_policy": "dse",
+    "applicant_dsi":          "dse",
+    "employee_dsi":           "dse",
+    "social_media_dsi":       "dse",
+    "video_conference_dsi":   "dse",
+    "informationspflichten":  "dse",
+    "cookie_policy":          "cookie",
+    "agb":                    "agb",
+    "widerruf":               "widerruf",
+    "dpa":                    "avv",
+    "dsfa":                   "dsfa",
+    "tom_documentation":      "tom_annex",
+    "loeschkonzept":          "loeschkonzept",
+}
+
+# Demo replacements for common placeholders so the template has plausible
+# concrete values instead of generic {{X}} markers (which would all fail
+# regex-based mandatory-field checks).
+DEMO_PLACEHOLDERS: dict[str, str] = {
+    "company_name":         "Demo GmbH",
+    "company_legal_name":   "Demo GmbH",
+    "company_address":      "Musterstraße 1, 12345 Berlin",
+    "company_city":         "Berlin",
+    "company_postal":       "12345",
+    "company_country":      "Deutschland",
+    "company_email":        "datenschutz@demo.de",
+    "company_phone":        "+49 30 12345678",
+    "dpo_name":             "Max Mustermann",
+    "dpo_email":            "dsb@demo.de",
+    "dpo_phone":            "+49 30 87654321",
+    "managing_director":    "Erika Mustermann",
+    "register_court":       "Amtsgericht Berlin",
+    "register_number":      "HRB 123456",
+    "vat_id":               "DE123456789",
+    "supervisory_authority": "Berliner Beauftragte für Datenschutz",
+    "supervisory_address":  "Friedrichstr. 219, 10969 Berlin",
+    "retention_period":     "10 Jahre nach Vertragsende",
+    "third_country":        "USA",
+    "transfer_mechanism":   "EU-Standardvertragsklauseln",
+    "date":                 "2026-05-20",
+    "version":              "1.0",
+}
+
+
+def render_placeholders(content: str) -> str:
+    """Replace {{key}} placeholders with demo values. Unknown placeholders
+    are stripped to empty string so the regex checks see plausible text."""
+    def repl(m: re.Match) -> str:
+        key = m.group(1).strip().lower()
+        # Hyphens / underscores normalised
+        key_norm = key.replace("-", "_")
+        if key_norm in DEMO_PLACEHOLDERS:
+            return DEMO_PLACEHOLDERS[key_norm]
+        return f"[{key}]"  # leave hint for context but don't break sentences
+    # Match {{anything}} including dots and brackets used in conditional blocks
+    return re.sub(r"\{\{\s*([^{}]+?)\s*\}\}", repl, content)
+
+
+def strip_handlebars_blocks(content: str) -> str:
+    """Drop {{#IF X}}...{{/IF}} markers but keep inner content (audit
+    only cares whether mandatory text appears anywhere, not which branch
+    is active)."""
+    # Remove block markers but keep enclosed content
+    content = re.sub(r"\{\{#IF[^}]*\}\}", "", content)
+    content = re.sub(r"\{\{/IF\}\}", "", content)
+    content = re.sub(r"\{\{#UNLESS[^}]*\}\}", "", content)
+    content = re.sub(r"\{\{/UNLESS\}\}", "", content)
+    content = re.sub(r"\{\{else\}\}", "", content)
+    return content
+
+
+def fetch_templates(conn) -> list[dict]:
+    cur = conn.cursor(cursor_factory=RealDictCursor)
+    cur.execute("""
+        SELECT id, document_type, language, title, content
+        FROM compliance.compliance_legal_templates
+        WHERE status = 'published'
+        ORDER BY document_type, language
+    """)
+    return list(cur.fetchall())
+
+
+def audit_template(tpl: dict) -> dict:
+    """Audit a single template — returns dict with findings + summary."""
+    doc_type = TEMPLATE_TO_DOCTYPE.get(tpl["document_type"])
+    if not doc_type:
+        return {
+            "template_id": tpl["id"],
+            "template_type": tpl["document_type"],
+            "language": tpl["language"],
+            "title": tpl["title"],
+            "doc_type": None,
+            "skipped_reason": "no_checklist_mapping",
+            "l1_total": 0, "l1_passed": 0, "l1_missing": [],
+        }
+    raw = tpl["content"] or ""
+    rendered = strip_handlebars_blocks(raw)
+    rendered = render_placeholders(rendered)
+    findings = check_document_completeness(
+        text=rendered,
+        doc_type=doc_type,
+        doc_title=tpl["title"] or tpl["document_type"],
+        doc_url=f"template://{tpl['id']}",
+    )
+    # findings is a list of dicts; the first finding usually has 'all_checks'
+    all_checks: list[dict] = []
+    for f in findings:
+        if "all_checks" in f and f["all_checks"]:
+            all_checks = f["all_checks"]
+            break
+    l1_checks = [c for c in all_checks if c.get("level", 1) == 1]
+    l1_missing = [c for c in l1_checks if not c.get("passed") and not c.get("skipped")]
+    return {
+        "template_id": tpl["id"],
+        "template_type": tpl["document_type"],
+        "language": tpl["language"],
+        "title": tpl["title"],
+        "doc_type": doc_type,
+        "l1_total": len(l1_checks),
+        "l1_passed": sum(1 for c in l1_checks if c.get("passed") and not c.get("skipped")),
+        "l1_missing": [
+            {"id": c.get("id"), "label": c.get("label"), "hint": c.get("hint", "")[:200]}
+            for c in l1_missing
+        ],
+        "word_count": len(rendered.split()),
+    }
+
+
+def render_markdown_report(results: Iterable[dict]) -> str:
+    results = list(results)
+    audited = [r for r in results if r.get("doc_type")]
+    skipped = [r for r in results if not r.get("doc_type")]
+    by_type = defaultdict(list)
+    for r in audited:
+        by_type[r["template_type"]].append(r)
+
+    lines = []
+    lines.append(f"# Template-Audit (P39)")
+    lines.append("")
+    lines.append(f"**Datum:** {datetime.now(timezone.utc).isoformat()}")
+    lines.append(f"**Methode:** check_document_completeness gegen jede Vorlage")
+    lines.append("")
+    lines.append(f"- Templates gesamt: {len(results)}")
+    lines.append(f"- Auditierbar (mit Checklist-Mapping): {len(audited)}")
+    lines.append(f"- Uebersprungen (kein doc_type-Mapping): {len(skipped)}")
+    lines.append("")
+
+    # Summary table by template_type
+    lines.append("## Zusammenfassung pro Template-Typ")
+    lines.append("")
+    lines.append("| Template-Type | Sprache | L1-Score | Fehlende Pflichtangaben |")
+    lines.append("|---|---|---|---|")
+    for tpl_type in sorted(by_type):
+        for r in by_type[tpl_type]:
+            ratio = f"{r['l1_passed']}/{r['l1_total']}" if r["l1_total"] else "—"
+            missing_count = len(r["l1_missing"])
+            lines.append(
+                f"| `{tpl_type}` | {r['language']} | {ratio} | "
+                f"{missing_count} fehlt" + ("e" if missing_count != 1 else "")
+                + (f": {', '.join(c['label'] for c in r['l1_missing'])}" if r['l1_missing'] else "")
+                + " |"
+            )
+    lines.append("")
+
+    # Per-template details — only those with failures
+    failed = [r for r in audited if r["l1_missing"]]
+    lines.append(f"## Details: {len(failed)} Templates mit fehlenden Pflichtangaben")
+    lines.append("")
+    for r in failed:
+        lines.append(f"### {r['template_type']} ({r['language']}) — {r['title']}")
+        lines.append("")
+        lines.append(f"- Doc-Type: `{r['doc_type']}`")
+        lines.append(f"- Wortzahl: {r['word_count']}")
+        lines.append(f"- L1-Score: {r['l1_passed']}/{r['l1_total']}")
+        lines.append(f"- Fehlend ({len(r['l1_missing'])}):")
+        for c in r["l1_missing"]:
+            lines.append(f"  - **{c['label']}** (`{c['id']}`)")
+            if c.get("hint"):
+                lines.append(f"    - Hinweis: {c['hint']}")
+        lines.append("")
+
+    # Templates without checklist
+    if skipped:
+        lines.append("## Templates ohne automatische Pflichtangaben-Pruefung")
+        lines.append("")
+        lines.append("Diese Templates haben keinen Doc-Check-Mapping — sie werden "
+                     "nicht automatisch gepruft. Bei Bedarf manuell oder via LLM "
+                     "zu pruefen.")
+        lines.append("")
+        for r in sorted(skipped, key=lambda x: x["template_type"]):
+            lines.append(f"- `{r['template_type']}` ({r['language']}): {r['title']}")
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+def main() -> int:
+    dsn = os.environ.get("DATABASE_URL") or os.environ.get("COMPLIANCE_DATABASE_URL")
+    if not dsn:
+        print("ERROR: DATABASE_URL not set", file=sys.stderr)
+        return 1
+    conn = psycopg2.connect(dsn)
+    templates = fetch_templates(conn)
+    print(f"Auditing {len(templates)} templates...", file=sys.stderr)
+
+    results = []
+    for tpl in templates:
+        try:
+            results.append(audit_template(tpl))
+        except Exception as e:
+            print(f"  ! {tpl['document_type']}/{tpl['language']}: {e}", file=sys.stderr)
+            results.append({
+                "template_id": tpl["id"],
+                "template_type": tpl["document_type"],
+                "language": tpl["language"],
+                "title": tpl["title"],
+                "doc_type": None,
+                "skipped_reason": f"error: {e}",
+                "l1_total": 0, "l1_passed": 0, "l1_missing": [],
+            })
+
+    report_md = render_markdown_report(results)
+    out_path = os.environ.get(
+        "AUDIT_OUTPUT",
+        "/tmp/template_audit_report.md",
+    )
+    with open(out_path, "w") as f:
+        f.write(report_md)
+    # Also dump raw JSON for further analysis
+    json_path = out_path.replace(".md", ".json")
+    with open(json_path, "w") as f:
+        json.dump(results, f, indent=2, default=str)
+    print(f"Report:    {out_path}", file=sys.stderr)
+    print(f"Raw JSON:  {json_path}", file=sys.stderr)
+    # Short summary to stdout
+    audited = [r for r in results if r.get("doc_type")]
+    failed = [r for r in audited if r["l1_missing"]]
+    print(f"\n== Audit Summary ==")
+    print(f"Total templates:      {len(results)}")
+    print(f"Auditable:            {len(audited)}")
+    print(f"With failures:        {len(failed)}")
+    print(f"Skipped (no mapping): {len(results) - len(audited)}")
+    # P42: CI mode — exit non-zero when any auditable template fails L1
+    if "--strict" in sys.argv and failed:
+        print(f"\nFAIL: {len(failed)} template(s) missing mandatory fields:",
+              file=sys.stderr)
+        for r in failed:
+            missing = ", ".join(c["label"] for c in r["l1_missing"])
+            print(f"  - {r['template_type']} [{r['language']}]: {missing}",
+                  file=sys.stderr)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+"""
+P39 Phase B — Fix actual content gaps in legal templates.
+
+For each template with a genuine content gap (identified by P39 audit),
+insert the missing mandatory section. Targeted edits — does NOT
+overwrite the full template content.
+
+Templates fixed:
+  - data_protection_policy: add "Verantwortlicher" section (Art. 13(1)(a))
+  - applicant_dsi:          add "Drittlandtransfer" section (Art. 13(1)(f))
+  - employee_dsi:           add "Drittlandtransfer" section (Art. 13(1)(f))
+  - cookie_policy:          add concrete cookie table example
+  - dsfa:                   add LfDI / Aufsichtsbehoerden-Referenz
+  - widerruf:               add §312k BGB Online-Kuendigungsbutton clause
+
+Run inside container:
+    docker exec bp-compliance-backend python /app/scripts/fix_template_content.py
+    (dry-run by default; pass --apply to UPDATE the DB)
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+
+
+# Sentinels: each fix has (a) where to insert, (b) what to insert,
+# (c) a check string to verify the insertion already happened (idempotent).
+
+FIXES = [
+    {
+        "document_type": "data_protection_policy",
+        "language": "de",
+        "already_done_marker": "## 1. Verantwortlicher",
+        "anchor": None,  # Insert at top (after first heading)
+        "insert_block": """## 1. Verantwortlicher
+
+Verantwortlich fuer die in dieser Richtlinie beschriebene Verarbeitung personenbezogener Daten im Sinne der DSGVO ist:
+
+**{{company_legal_name}}**
+{{company_address}}
+{{company_postal}} {{company_city}}, {{company_country}}
+
+E-Mail: {{company_email}}
+Telefon: {{company_phone}}
+
+Datenschutzbeauftragte/r: {{dpo_name}} ({{dpo_email}})
+
+""",
+    },
+    {
+        "document_type": "applicant_dsi",
+        "language": "de",
+        "already_done_marker": "## 7. Drittlandtransfer",
+        "anchor": "## 7.",  # generic; we insert before whatever 7 is
+        "insert_block": """## 7. Drittlandtransfer (Art. 13(1)(f) DSGVO)
+
+Eine Uebermittlung Ihrer Bewerberdaten in Laender ausserhalb der Europaeischen Union oder des Europaeischen Wirtschaftsraums (Drittland) findet **nicht** statt. Saemtliche Verarbeitung erfolgt ausschliesslich auf Servern innerhalb der EU.
+
+Sollten in Ausnahmefaellen Drittlandtransfers erforderlich werden (z.B. Konzern-Verbund mit US-Schwestergesellschaft), erfolgen diese ausschliesslich auf Basis von EU-Standardvertragsklauseln (Art. 46(2)(c) DSGVO) oder eines Angemessenheitsbeschlusses der EU-Kommission (Art. 45 DSGVO).
+
+""",
+    },
+    {
+        "document_type": "employee_dsi",
+        "language": "de",
+        "already_done_marker": "## 7. Drittlandtransfer",
+        "anchor": "## 7.",
+        "insert_block": """## 7. Drittlandtransfer (Art. 13(1)(f) DSGVO)
+
+Eine Uebermittlung Ihrer Beschaeftigtendaten in Laender ausserhalb der Europaeischen Union oder des Europaeischen Wirtschaftsraums (Drittland) findet grundsaetzlich **nicht** statt. Eine Ausnahme bilden Cloud-Dienste, die ggf. auf US-Server zugreifen — in diesem Fall erfolgt die Uebermittlung auf Basis von EU-Standardvertragsklauseln (Art. 46(2)(c) DSGVO) oder unter dem EU-US Data Privacy Framework (Angemessenheitsbeschluss vom 10.07.2023, Art. 45 DSGVO).
+
+Empfaengerland und Schutzmechanismus pro genutztem Dienst: siehe Verarbeitungsverzeichnis (VVT).
+
+""",
+    },
+    {
+        "document_type": "cookie_policy",
+        "language": "de",
+        "already_done_marker": "### 4.1 Konkrete Cookie-Tabelle",
+        "anchor": None,  # append before the final heading or at end
+        "insert_block": """### 4.1 Konkrete Cookie-Tabelle (Beispiel)
+
+| Name | Anbieter | Zweck | Speicherdauer | Typ |
+|---|---|---|---|---|
+| `__session` | {{company_legal_name}} | Sitzungs-Authentifizierung | Sitzungsende | First-Party, technisch notwendig |
+| `cookie_consent` | {{company_legal_name}} | Speicherung der Cookie-Einwilligung | 12 Monate | First-Party, technisch notwendig |
+| `_ga` | Google Ireland Ltd. | Webanalyse (Google Analytics) | 2 Jahre | Third-Party, Statistik — Einwilligung erforderlich |
+| `_fbp` | Meta Platforms Ireland Ltd. | Marketing / Conversion-Tracking | 90 Tage | Third-Party, Marketing — Einwilligung erforderlich |
+
+> Hinweis: Die obenstehende Tabelle ist beispielhaft. Die tatsaechlich von Ihrer Website gesetzten Cookies pflegen Sie im Backend Ihres Consent-Tools (z.B. Cookiebot, Usercentrics, Borlabs). Die DSK-Orientierungshilfe Telemedien 2024 fordert je Cookie: Name, Anbieter, Zweck, Speicherdauer, Typ (First-/Third-Party).
+
+""",
+    },
+    {
+        "document_type": "dsfa",
+        "language": "de",
+        "already_done_marker": "### 0.2 Beruecksichtigung Landesaufsichtsbehoerden",
+        "anchor": None,
+        "insert_block": """### 0.2 Beruecksichtigung Landesaufsichtsbehoerden (LfDI) und DSK-Liste
+
+Diese DSFA beruecksichtigt:
+
+- **DSK-Positivliste** nach Art. 35(4) DSGVO: Die Datenschutzkonferenz (DSK) hat eine Liste von Verarbeitungen veroeffentlicht, die zwingend eine DSFA erfordern. Pruefen Sie, ob Ihre Verarbeitung dort gelistet ist.
+- **Landesbeauftragte fuer Datenschutz (LfDI)**: Jedes Bundesland (BfDI, BlnBDI, LfDI BW, LfDI BY, etc.) veroeffentlicht eigene Orientierungshilfen und Branchen-Stellungnahmen. Zustaendige Behoerde: {{supervisory_authority}}.
+- **EDPB Guidelines** (insbesondere WP248 — Kriterien fuer DSFA-Erforderlichkeit, Art. 29-Datenschutzgruppe).
+- **Branchenspezifische Aufsichtsempfehlungen** (z.B. Telemedien: DSK-OH 2024, Gesundheit: BfDI-Empfehlungen).
+
+""",
+    },
+    {
+        "document_type": "widerruf",
+        "language": "de",
+        "already_done_marker": "## §312k BGB",
+        "anchor": None,
+        "insert_block": """## §312k BGB — Online-Kuendigungsbutton (bei Dauerschuldverhaeltnissen)
+
+Bietet der Unternehmer Vertraege ueber **Dauerschuldverhaeltnisse** (Abonnements, Mitgliedschaften, SaaS-Subscriptions) auf seiner Website an, muss er nach §312k BGB einen Kuendigungsbutton bereitstellen.
+
+**Anforderungen** (BGH-Rechtsprechung 2023):
+
+- Der Button muss deutlich beschriftet sein mit "Vertraege hier kuendigen" oder gleichwertig.
+- Direkt nach Klick muss eine Bestaetigungsseite folgen mit Angaben zu Vertragsart, Vertragspartnern und Kuendigungstermin.
+- Nach Bestaetigung muss eine Bestaetigung der Kuendigung per E-Mail oder dauerhaft auf einem Datentraeger zur Verfuegung gestellt werden.
+
+**Verstoss**: Eine Kuendigung kann auch ohne den Button per E-Mail/Brief jederzeit erfolgen — fehlt der Button, kann der Vertrag zudem von der zustaendigen Verbraucherzentrale abgemahnt werden (§312k Abs. 6 BGB).
+
+**Ausnahme**: §312k gilt nur fuer Verbraucherkunden (B2C). Bei reinen B2B-Vertraegen besteht keine Pflicht.
+
+""",
+    },
+]
+
+
+def apply_fix(content: str, fix: dict) -> tuple[str, str]:
+    """Returns (new_content, status). Status: 'unchanged'/'inserted'/'already-fixed'."""
+    if fix["already_done_marker"] in content:
+        return content, "already-fixed"
+    anchor = fix["anchor"]
+    if anchor and anchor in content:
+        # Insert BEFORE the anchor
+        new_content = content.replace(anchor, fix["insert_block"] + anchor, 1)
+    else:
+        # Append at end
+        new_content = content.rstrip() + "\n\n" + fix["insert_block"]
+    return new_content, "inserted"
+
+
+def main(apply: bool):
+    dsn = os.environ.get("DATABASE_URL") or os.environ.get("COMPLIANCE_DATABASE_URL")
+    if not dsn:
+        print("ERROR: DATABASE_URL not set", file=sys.stderr)
+        return 1
+    conn = psycopg2.connect(dsn)
+    cur = conn.cursor(cursor_factory=RealDictCursor)
+    summary = []
+    for fix in FIXES:
+        cur.execute(
+            "SELECT id, content FROM compliance.compliance_legal_templates "
+            "WHERE document_type=%s AND language=%s AND status='published'",
+            (fix["document_type"], fix["language"]),
+        )
+        rows = cur.fetchall()
+        if not rows:
+            summary.append((fix["document_type"], fix["language"], "not-found", 0))
+            continue
+        for row in rows:
+            new_content, status = apply_fix(row["content"], fix)
+            if status == "inserted" and apply:
+                cur.execute(
+                    "UPDATE compliance.compliance_legal_templates "
+                    "SET content=%s, updated_at=now() WHERE id=%s",
+                    (new_content, row["id"]),
+                )
+            summary.append((fix["document_type"], fix["language"], status,
+                            len(new_content) - len(row["content"])))
+    if apply:
+        conn.commit()
+    print(f"\n== Template Content Fixes ({'APPLIED' if apply else 'DRY-RUN'}) ==")
+    for doc_type, lang, status, delta in summary:
+        marker = "✓" if status == "inserted" else ("·" if status == "already-fixed" else "✗")
+        print(f"  {marker} {doc_type:30s} [{lang}] {status:14s} (+{delta} chars)")
+    return 0
+
+
+if __name__ == "__main__":
+    apply = "--apply" in sys.argv
+    sys.exit(main(apply))
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""P59 Phase 2 — Seed compliance.cookie_library from Open Cookie Database (CC0).
+
+Open Cookie Database: jkwakman/Open-Cookie-Database (CC0-1.0 Public Domain).
+~700 categorised cookies maintained by Cybot/Cookiebot community."""
+from __future__ import annotations
+
+import csv
+import io
+import os
+import sys
+import urllib.request
+
+import psycopg2
+
+OCD_URL = (
+    "https://raw.githubusercontent.com/jkwakman/Open-Cookie-Database/master/"
+    "open-cookie-database.csv"
+)
+
+CATEGORY_MAP = {
+    "strictly necessary": "essential",
+    "functional": "functional",
+    "performance": "statistics",
+    "analytics": "statistics",
+    "targeting": "marketing",
+    "marketing": "marketing",
+    "advertisement": "marketing",
+    "social media": "social_media",
+    "unclassified": "unknown",
+}
+
+
+def parse_max_age(retention: str) -> int | None:
+    """Approximate seconds from retention strings like '2 years' / '30 days'."""
+    if not retention:
+        return None
+    r = retention.lower().strip()
+    if "session" in r:
+        return 0
+    import re
+    m = re.search(r"(\d+)\s*(jahr|year|day|tag|month|monat|hour|stund|minute)", r)
+    if not m:
+        return None
+    n = int(m.group(1))
+    unit = m.group(2)
+    multipliers = {
+        "jahr": 31536000, "year": 31536000,
+        "month": 2592000, "monat": 2592000,
+        "day": 86400, "tag": 86400,
+        "hour": 3600, "stund": 3600,
+        "minute": 60,
+    }
+    return n * multipliers.get(unit, 1)
+
+
+def main() -> int:
+    dsn = os.environ.get("DATABASE_URL")
+    if not dsn:
+        print("DATABASE_URL missing", file=sys.stderr); return 1
+    print(f"Fetching {OCD_URL} ...", file=sys.stderr)
+    try:
+        with urllib.request.urlopen(OCD_URL, timeout=30) as r:
+            body = r.read().decode("utf-8", errors="replace")
+    except Exception as e:
+        print(f"Fetch failed: {e}", file=sys.stderr); return 2
+    reader = csv.DictReader(io.StringIO(body))
+    rows = list(reader)
+    print(f"Parsed {len(rows)} rows", file=sys.stderr)
+
+    conn = psycopg2.connect(dsn)
+    cur = conn.cursor()
+    inserted = 0
+    skipped = 0
+    for r in rows:
+        name = (r.get("Cookie / Data Key name") or "").strip()
+        domain = (r.get("Domain") or "").strip()
+        if not name:
+            skipped += 1
+            continue
+        category_raw = (r.get("Category") or "").strip().lower()
+        actual_category = CATEGORY_MAP.get(category_raw, "unknown")
+        vendor = (r.get("Platform") or r.get("Data Controller") or "Unknown").strip()
+        purpose = (r.get("Description") or "").strip()[:1000]
+        privacy_url = (r.get("User Privacy & GDPR Rights Portals") or "").strip()
+        max_age = parse_max_age(r.get("Retention period") or "")
+        # Wildcard match flag → domain_pattern
+        domain_pattern = domain or "*"
+        cur.execute(
+            """
+            INSERT INTO compliance.cookie_library
+                (cookie_name, domain_pattern, vendor_name,
+                 vendor_privacy_url, actual_category, purpose_en,
+                 typical_max_age_seconds, source_name, source_url,
+                 source_license, confidence)
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+            ON CONFLICT DO NOTHING
+            """,
+            (name, domain_pattern, vendor[:200], privacy_url or None,
+             actual_category, purpose or None, max_age,
+             "Open Cookie Database", OCD_URL, "CC0-1.0", 0.75),
+        )
+        inserted += cur.rowcount
+    conn.commit()
+    print(f"\nInserted {inserted}, skipped {skipped}")
+    cur.execute("SELECT actual_category, COUNT(*) "
+                "FROM compliance.cookie_library GROUP BY actual_category "
+                "ORDER BY 2 DESC")
+    for row in cur.fetchall():
+        print(f"  {row[0]:15s}: {row[1]}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())