diff --git a/.claude/rules/loc-exceptions.txt b/.claude/rules/loc-exceptions.txt
index 401ce785..48ffb95a 100644
--- a/.claude/rules/loc-exceptions.txt
+++ b/.claude/rules/loc-exceptions.txt
@@ -122,9 +122,9 @@ consent-sdk/src/mobile/ios/ConsentManager.swift
 consent-tester/services/dsi_discovery.py
 
 # --- backend-compliance: unified compliance check orchestrator ---
-# Sequential 7-step pipeline (text resolve, profile detect, check documents,
-# banner scan, cross-check, profile extract, report). Phase 5 split target.
-backend-compliance/compliance/api/agent_compliance_check_routes.py
+# 2026-06-06: REMOVED — file split into agent_check/ subpackage
+# (19 files, main module now 347 LOC). Phase 5 target completed.
+# [guardrail-change]
 
 # --- docs-src: binary office files (not source code) ---
 # (Also excluded by extension in scripts/check-loc.sh — kept here for legibility.)
diff --git a/backend-compliance/compliance/api/agent_check/__init__.py b/backend-compliance/compliance/api/agent_check/__init__.py
new file mode 100644
index 00000000..b15c5367
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/__init__.py
@@ -0,0 +1,10 @@
+"""
+Subpackage for the compliance-check route — extracted to keep
+`agent_compliance_check_routes.py` under the 500-line guardrail.
+
+The route module still owns the public HTTP endpoints and re-exports
+all helpers from this subpackage, so external callers
+(`saving_scan_routes`, `agent_migration_routes`, tests) continue to
+import them from `compliance.api.agent_compliance_check_routes`
+unchanged.
+"""
diff --git a/backend-compliance/compliance/api/agent_check/_b1_wiring.py b/backend-compliance/compliance/api/agent_check/_b1_wiring.py
new file mode 100644
index 00000000..599a893d
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_b1_wiring.py
@@ -0,0 +1,105 @@
+"""B1 wiring — Mobile Consent-Reachability check + HTML block.
+
+Fetches the homepage of the first submitted URL, runs the static
+`evaluate_reachability` analysis on the footer, and renders the
+result as an HTML block for the audit mail.
+
+Only renders a block when the check FAILS — a passing site doesn't
+need a block. The block is severity-colored and lists the specific
+notes that triggered the finding (missing reopen anchor, new-tab
+break, browser-deflection language).
+"""
+
+from __future__ import annotations
+
+import html
+import logging
+
+import httpx
+
+from compliance.services.consent_reachability_check import (
+    evaluate_reachability,
+)
+
+from ._helpers import _update
+
+logger = logging.getLogger(__name__)
+
+
+async def run_b1(state: dict) -> None:
+    """Run the reachability check + render HTML. Mutates state in place."""
+    req = state["req"]
+    check_id = state["check_id"]
+    homepage_url = ""
+    for d in req.documents:
+        if d.url:
+            from urllib.parse import urlparse
+            p = urlparse(d.url)
+            if p.scheme and p.netloc:
+                homepage_url = f"{p.scheme}://{p.netloc}/"
+                break
+    if not homepage_url:
+        return
+
+    _update(check_id, "Mobile Consent-Reachability prüfen...", 95)
+    try:
+        async with httpx.AsyncClient(
+            timeout=20.0, follow_redirects=True,
+            headers={"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 "
+                     "like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) "
+                     "Version/17.5 Mobile/15E148 Safari/604.1"},
+        ) as c:
+            r = await c.get(homepage_url)
+            if r.status_code != 200:
+                logger.info("B1: homepage fetch %s → HTTP %d", homepage_url, r.status_code)
+                return
+            page_html = r.text
+    except Exception as e:
+        logger.warning("B1: homepage fetch failed: %s", e)
+        return
+
+    finding = evaluate_reachability(page_html, homepage_url)
+    state["reachability_finding"] = finding
+    state["reachability_html"] = _render_block(finding)
+    logger.info(
+        "B1 Reachability: passed=%s severity=%s reason=%s",
+        finding["passed"], finding.get("severity"),
+        finding.get("severity_reason"),
+    )
+
+
+def _render_block(finding: dict) -> str:
+    """Render the reachability finding as an audit-mail HTML block."""
+    if finding["passed"]:
+        return ""
+    sev = (finding.get("severity") or "").upper()
+    color = "#dc2626" if sev == "HIGH" else "#f59e0b"
+    notes_html = "".join(
+        f"<li>{html.escape(n)}</li>" for n in finding.get("notes") or []
+    )
+    anchor = finding.get("reopen_anchor") or {}
+    anchor_html = ""
+    if anchor:
+        anchor_html = (
+            "<p style='margin:8px 0 0;font-size:13px;color:#475569;'>"
+            "Gefundener Footer-Link: "
+            f"<code>{html.escape((anchor.get('text') or '')[:80])}</code> "
+            f"→ <code>{html.escape((anchor.get('href') or '')[:120])}</code> "
+            f"(target_class: {html.escape(anchor.get('target_class') or '—')})"
+            "</p>"
+        )
+    return (
+        f"<div style='margin:24px 0;padding:16px;border-left:4px solid {color};"
+        "background:#fef2f2;border-radius:4px;'>"
+        f"<h2 style='margin:0 0 8px;color:{color};font-size:16px;'>"
+        "COOKIE-CONSENT-UX-001 — Mobile Consent-Reachability</h2>"
+        f"<p style='margin:0 0 8px;font-size:14px;'><strong>Severity:</strong> "
+        f"{sev} ({html.escape(finding.get('severity_reason') or '')})</p>"
+        "<p style='margin:0 0 4px;font-size:14px;'>"
+        "Art. 7 Abs. 3 DSGVO: Widerruf muss so einfach wie Erteilung sein. "
+        "Auf Mobile-Safari konnten wir folgendes Problem feststellen:</p>"
+        f"<ul style='margin:8px 0 0 20px;font-size:14px;color:#7f1d1d;'>"
+        f"{notes_html}</ul>"
+        f"{anchor_html}"
+        "</div>"
+    )
diff --git a/backend-compliance/compliance/api/agent_check/_b3_wiring.py b/backend-compliance/compliance/api/agent_check/_b3_wiring.py
new file mode 100644
index 00000000..8f6e1a9d
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_b3_wiring.py
@@ -0,0 +1,189 @@
+"""B3 wiring — Cross-doc retention consistency check + HTML block.
+
+Combines three sources of retention truth per cookie:
+
+  - DSI text (state["doc_texts"]["dse"] or "cookie")
+  - cookie-table `duration` from cmp_vendors[i]["cookies"][j]
+  - actual cookie expiry from banner_result["cookies_detailed"][k]
+
+and produces per-cookie findings + a TH-RETENTION theme summary. Only
+renders an HTML block when there are findings to show; the block is
+sorted by severity (HIGH first) and shows the top-10 mismatches.
+"""
+
+from __future__ import annotations
+
+import html
+import logging
+import time
+
+from compliance.services.retention_comparator import (
+    build_retention_theme_summary,
+    compare_retention,
+    extract_retention_claims,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _actual_max_age_seconds(cookie: dict) -> float | None:
+    """Get cookie Max-Age in seconds.
+
+    Playwright gives us `expires` as a Unix timestamp (seconds-since-
+    epoch). Some sources give `max_age` directly. -1 / 0 means session
+    cookie (no expiry) — return None to signal that.
+    """
+    ma = cookie.get("max_age")
+    if isinstance(ma, (int, float)) and ma > 0:
+        return float(ma)
+    exp = cookie.get("expires")
+    if isinstance(exp, (int, float)) and exp > 0:
+        delta = exp - time.time()
+        if delta > 0:
+            return float(delta)
+    return None
+
+
+def run_b3(state: dict) -> None:
+    """Cross-doc retention check + render HTML. Mutates state in place."""
+    doc_texts = state["doc_texts"]
+    cmp_vendors = state["cmp_vendors"]
+    banner_result = state["banner_result"]
+
+    dsi_text = doc_texts.get("dse") or doc_texts.get("cookie") or ""
+    if not dsi_text:
+        return
+
+    cookie_records: list[dict] = []
+    cookie_names: list[str] = []
+    vendor_names: list[str] = []
+    for v in cmp_vendors or []:
+        vname = (v.get("name") or "").strip()
+        if vname:
+            vendor_names.append(vname)
+        for c in (v.get("cookies") or []):
+            cname = (c.get("name") or "").strip()
+            if not cname:
+                continue
+            duration = (c.get("duration") or c.get("persistence")
+                        or c.get("expiry") or "")
+            cookie_names.append(cname)
+            cookie_records.append({
+                "name": cname,
+                "vendor": vname,
+                "table_duration": duration,
+                "actual_max_age": None,
+            })
+
+    if not cookie_records:
+        return
+
+    # Match actual max_age from banner_result.cookies_detailed
+    if banner_result:
+        cookies_detailed = banner_result.get("cookies_detailed") or []
+        by_name: dict[str, dict] = {}
+        for c in cookies_detailed:
+            n = (c.get("name") or "").lower()
+            if n:
+                by_name[n] = c
+        for rec in cookie_records:
+            nm = rec["name"].lower()
+            if nm in by_name:
+                rec["actual_max_age"] = _actual_max_age_seconds(by_name[nm])
+
+    claims = extract_retention_claims(dsi_text, cookie_names, vendor_names)
+
+    findings: list[dict] = []
+    for rec in cookie_records:
+        finding = compare_retention(
+            cookie_name=rec["name"],
+            table_duration=rec["table_duration"],
+            actual_max_age_seconds=rec["actual_max_age"],
+            dsi_claims=claims,
+            vendor_name=rec["vendor"] or None,
+        )
+        findings.append(finding)
+
+    summary = build_retention_theme_summary(findings)
+    state["retention_findings"] = findings
+    state["retention_theme_summary"] = summary
+    state["retention_html"] = _render_block(summary, findings)
+    logger.info(
+        "B3 Retention: %d findings, %d passed, %d failed, %d incomplete",
+        summary["total"], summary["passed"], summary["failed"],
+        summary["incomplete"],
+    )
+
+
+def _fmt_days(d: float | None) -> str:
+    if d is None:
+        return "—"
+    if d < 1:
+        return f"{int(d * 24)}h"
+    if d < 30:
+        return f"{int(d)}d"
+    if d < 365:
+        return f"{int(d / 30)}mo"
+    return f"{d / 365:.1f}y"
+
+
+def _render_block(summary: dict, findings: list[dict]) -> str:
+    if summary["total"] == 0:
+        return ""
+    failed_findings = [f for f in findings if not f.get("matches")
+                       and f.get("severity_reason") != "incomplete"]
+    if not failed_findings:
+        return ""  # all OK, no block needed
+    # Sort by severity (HIGH first) then diff_days desc
+    sev_rank = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
+    failed_findings.sort(key=lambda f: (
+        sev_rank.get((f.get("severity") or "").upper(), 9),
+        -(f.get("diff_days") or 0),
+    ))
+    rows = []
+    for f in failed_findings[:10]:
+        sev = (f.get("severity") or "").upper()
+        color = ("#dc2626" if sev == "HIGH"
+                 else "#f59e0b" if sev == "MEDIUM" else "#64748b")
+        rows.append(
+            "<tr>"
+            f"<td style='padding:6px 10px;border-bottom:1px solid #e5e7eb;'>"
+            f"<code>{html.escape(f.get('cookie_name') or '—')}</code></td>"
+            f"<td style='padding:6px 10px;border-bottom:1px solid #e5e7eb;'>"
+            f"{html.escape((f.get('vendor_name') or '—'))}</td>"
+            f"<td style='padding:6px 10px;border-bottom:1px solid #e5e7eb;'>"
+            f"DSI: {_fmt_days(f.get('dsi_days'))} • "
+            f"Tabelle: {_fmt_days(f.get('table_days'))} • "
+            f"Realität: {_fmt_days(f.get('actual_days'))}</td>"
+            f"<td style='padding:6px 10px;border-bottom:1px solid #e5e7eb;"
+            f"color:{color};font-weight:600;'>"
+            f"{sev} ({html.escape(f.get('mismatch_type') or '—')})</td>"
+            "</tr>"
+        )
+    total = summary["total"]
+    passed = summary["passed"]
+    failed = summary["failed"]
+    incomplete = summary["incomplete"]
+    return (
+        "<div style='margin:24px 0;padding:16px;border-left:4px solid #dc2626;"
+        "background:#fefce8;border-radius:4px;'>"
+        "<h2 style='margin:0 0 8px;color:#854d0e;font-size:16px;'>"
+        "TH-RETENTION — Speicherdauer-Konsistenz (DSI ↔ Cookie-Tabelle ↔ Realität)"
+        "</h2>"
+        "<p style='margin:0 0 8px;font-size:14px;color:#3f3f46;'>"
+        f"<strong>{total}</strong> Cookies verglichen: "
+        f"<strong style='color:#15803d;'>{passed} ✓</strong> / "
+        f"<strong style='color:#dc2626;'>{failed} ✗</strong> / "
+        f"<strong style='color:#64748b;'>{incomplete} ?</strong></p>"
+        "<table style='width:100%;border-collapse:collapse;font-size:13px;"
+        "margin-top:8px;background:#fff;'>"
+        "<thead><tr style='background:#f1f5f9;'>"
+        "<th style='text-align:left;padding:6px 10px;'>Cookie</th>"
+        "<th style='text-align:left;padding:6px 10px;'>Vendor</th>"
+        "<th style='text-align:left;padding:6px 10px;'>Werte</th>"
+        "<th style='text-align:left;padding:6px 10px;'>Mismatch</th>"
+        "</tr></thead>"
+        f"<tbody>{''.join(rows)}</tbody>"
+        "</table>"
+        "</div>"
+    )
diff --git a/backend-compliance/compliance/api/agent_check/_constants.py b/backend-compliance/compliance/api/agent_check/_constants.py
new file mode 100644
index 00000000..628f45de
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_constants.py
@@ -0,0 +1,93 @@
+"""Module-level constants + shared job state for the compliance-check
+route.
+
+`_compliance_check_jobs` is the SINGLE source of truth for in-flight
+job progress. Other modules MUST import the same object — never
+re-declare it — otherwise progress updates land in a detached dict.
+"""
+
+from __future__ import annotations
+
+# Internal hostname of the consent-tester container.
+CONSENT_TESTER_URL = "http://bp-compliance-consent-tester:8094"
+
+# In-memory job registry. Keyed by check_id. Values:
+#   {"status": "running"|"completed"|"failed"|"skipped_tdm",
+#    "progress": str, "progress_pct": int, "result": dict, ...}
+# Read/written by:
+#   - agent_compliance_check_routes (start/status/_run/_update)
+#   - saving_scan_routes (start)
+#   - agent_migration_routes (status mirror)
+_compliance_check_jobs: dict[str, dict] = {}
+
+
+# Canonical doc types in the same order the frontend
+# ComplianceCheckTab renders them. The route pads `results` to always
+# include an entry for each — missing rows are flagged as 'Nicht
+# eingereicht' or 'Auf der Website nicht gefunden'.
+#
+# DSB-Kontakt is NOT canonical: per GDPR practice the DSB is named
+# inside the DSI/datenschutz document (email or contact block), not as
+# a separate page. We check 'DSB benannt' as a sub-check of the DSE.
+_ALL_DOC_TYPES = [
+    "dse", "impressum", "social_media", "cookie",
+    "agb", "nutzungsbedingungen", "widerruf",
+]
+
+
+# Human-readable labels per doc_type. Used in the report + emails.
+_DOC_TYPE_LABELS = {
+    "dse": "Datenschutzerklaerung",
+    "datenschutz": "Datenschutzerklaerung",
+    "privacy": "Datenschutzerklaerung",
+    "impressum": "Impressum",
+    "agb": "AGB",
+    "widerruf": "Widerrufsbelehrung",
+    "cookie": "Cookie-Richtlinie",
+    "avv": "Auftragsverarbeitung",
+    "loeschkonzept": "Loeschkonzept",
+    "dsfa": "Datenschutz-Folgenabschaetzung",
+    "social_media": "Social Media Datenschutz",
+    "nutzungsbedingungen": "Nutzungsbedingungen",
+    "dsb": "DSB-Kontakt",
+    # P74: Legal-Notice / Rechtliche Hinweise (IP, Forward-Looking, Risiko)
+    "legal_notice": "Rechtliche Hinweise",
+    # P96: Digital Services Act-Pflichtangaben (Art. 12+17 DSA)
+    "dsa": "DSA-Pflichtangaben",
+    # P97: Lizenzhinweise Dritter (OSS-Compliance)
+    "lizenzhinweise": "Lizenzhinweise Dritter",
+}
+
+
+# Title/URL keywords → canonical doc_type. Order matters: most-specific first.
+_DISCOVERY_RULES: list[tuple[str, tuple[str, ...]]] = [
+    ("cookie",            ("cookie", "kuche", "biscuit", "cookies-")),
+    ("widerruf",          ("widerruf", "rueckgabe", "rückgabe", "cancellation",
+                           "right-of-withdrawal", "ruecktritts", "rücktritts")),
+    ("social_media",      ("social-media", "soziale-medien", "social_media",
+                           "social-media-policy")),
+    # P23: 'terms-and-conditions' kann Allgemeine Geschaeftsbedingungen ODER
+    # Nutzungsbedingungen meinen. Discovery-Funktion klassifiziert spaeter
+    # praeziser per Titel + Inhalt. Hier nur Url-Hint:
+    ("agb",               ("/agb", "geschaeftsbedingungen", "geschäftsbedingungen",
+                           "general-terms")),
+    ("nutzungsbedingungen", ("nutzungsbedingung", "nutzungsbedingungen",
+                              "terms-of-use", "terms-and-conditions",
+                              "nutzungsordnung", "terms-of-service",
+                              "allgemeine-nutzungsbedingungen")),
+    ("dsb",               ("datenschutzbeauftragt", "data-protection-officer",
+                           "dpo-contact", "/dsb")),
+    ("impressum",         ("impressum", "imprint", "legal-notice", "site-notice",
+                           "anbieterkennzeichnung", "legal-disclaimer-pool")),
+    ("dse",               ("data-privacy", "datenschutz", "data-protection",
+                           "privacy-policy", "privacy-notice", "dsgvo",
+                           "data_privacy", "datenschutzinformation")),
+]
+
+
+# Compound TLDs that count as 2 labels when extracting the second-level
+# domain (e.g. shop.example.co.uk → 'example', not 'co').
+_COMPOUND_TLDS = {
+    "co.uk", "co.jp", "co.nz", "co.kr", "co.za", "co.in",
+    "com.au", "com.br", "com.mx", "com.tr", "com.sg",
+}
diff --git a/backend-compliance/compliance/api/agent_check/_discovery.py b/backend-compliance/compliance/api/agent_check/_discovery.py
new file mode 100644
index 00000000..e7e4d392
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_discovery.py
@@ -0,0 +1,230 @@
+"""Auto-discovery of missing canonical doc-types.
+
+For each canonical type the user did NOT submit, try to find it on the
+homepage of the URLs they DID submit. Also follow same-owner subdomains
+mentioned in the submitted text (BMW Group → bmwgroup.com etc.).
+
+Discovered docs are classified by `_classify_discovered_doc` and merged
+back into `doc_entries`; entries that stayed empty get
+`discovery_attempted=True` so the padding step can differentiate
+"Nicht eingereicht" from "Auf der Website nicht gefunden".
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from urllib.parse import urlparse
+
+import httpx
+
+from ._constants import _ALL_DOC_TYPES, CONSENT_TESTER_URL
+from ._helpers import _classify_discovered_doc, _update
+
+logger = logging.getLogger(__name__)
+
+
+async def _autodiscover_missing(
+    check_id: str,
+    doc_entries: list[dict],
+    doc_texts: dict[str, str],
+    url_text_cache: dict[str, str],
+) -> None:
+    """For each canonical doc_type the user did not submit, try to find
+    the corresponding document on the homepage of the site they DID submit.
+
+    Modifies doc_entries in place: fills text/url/word_count and sets
+    `auto_discovered=True`. Marks `discovery_attempted=True` on every
+    missing entry (even when nothing was found) so the report can
+    distinguish 'Nicht eingereicht' from 'Auf der Website nicht gefunden'.
+    """
+    # VW-Fix: nur Doc-Types mit substantieller Text-Ausbeute zaehlen
+    # als 'submitted'. Wenn der User eine URL eingegeben hat aber die
+    # 404 liefert (VW cookie-richtlinie.html), oder der Crawler weniger
+    # als 200 Zeichen extrahiert (SPA-Shell), als 'missing' behandeln
+    # damit der Discovery-Pass alternative URLs probiert.
+    _MIN_USEFUL_CHARS = 200
+    submitted_types = {
+        e["doc_type"] for e in doc_entries
+        if len((e.get("text") or "").strip()) >= _MIN_USEFUL_CHARS
+    }
+    # Markiere die fehlgeschlagenen URL-Submissions damit der Discovery
+    # ihre URL nicht erneut probiert (waere sinnlos).
+    failed_urls: set[str] = {
+        (e.get("url") or "").strip()
+        for e in doc_entries
+        if (e.get("url") or "").strip()
+        and len((e.get("text") or "").strip()) < _MIN_USEFUL_CHARS
+    }
+    if failed_urls:
+        logger.info(
+            "VW-Fix: %d eingegebene URLs lieferten <%d Zeichen — Discovery "
+            "soll Alternativen probieren: %s",
+            len(failed_urls), _MIN_USEFUL_CHARS,
+            ", ".join(list(failed_urls)[:3]),
+        )
+    # Map alias types to canonical
+    submitted_canon = {
+        "dse" if t in ("datenschutz", "privacy") else t for t in submitted_types
+    }
+    # Missing = canonical types the user did NOT submit
+    missing = set(_ALL_DOC_TYPES) - submitted_canon
+    if not missing:
+        return
+
+    # Pick the most common base (scheme://netloc) from submitted URLs.
+    bases: dict[str, int] = {}
+    for e in doc_entries:
+        u = (e.get("url") or "").strip()
+        if u and "://" in u:
+            p = urlparse(u)
+            base = f"{p.scheme}://{p.netloc}"
+            bases[base] = bases.get(base, 0) + 1
+    if not bases:
+        # No submitted URL at all — nothing to crawl from. Add empty
+        # placeholders (with discovery_attempted=False) so the padding
+        # step renders them as 'Nicht eingereicht' (not 'Nicht gefunden').
+        for dt in missing:
+            doc_entries.append({
+                "doc_type": dt, "url": "", "text": "", "word_count": 0,
+                "auto_discovered": False, "discovery_attempted": False,
+            })
+        return
+
+    # Build crawl plan: primary base + any related domains mentioned in
+    # the submitted texts that share the owner's SLD. Example: BMW Group
+    # text mentions bmwgroup.com and bmwgroup.jobs in addition to bmw.de.
+    primary_base = max(bases, key=bases.get) + "/"
+    crawl_bases: list[str] = [primary_base]
+    primary_netloc = urlparse(primary_base).netloc.lower().lstrip("www.")
+    owner_token = primary_netloc.split(".")[0]  # 'bmw'
+
+    if owner_token and len(owner_token) >= 3:
+        domain_re = re.compile(
+            r"https?://([a-z0-9][a-z0-9\-]*\.)*" + re.escape(owner_token)
+            + r"[a-z0-9\-]*\.[a-z]{2,}",
+            re.IGNORECASE,
+        )
+        seen_bases = {primary_base}
+        for entry in doc_entries:
+            text = entry.get("text") or ""
+            for m in domain_re.finditer(text):
+                p = urlparse(m.group(0))
+                base = f"{p.scheme}://{p.netloc}/"
+                base_netloc = p.netloc.lower().lstrip("www.")
+                if base_netloc == primary_netloc:
+                    continue
+                if base in seen_bases:
+                    continue
+                seen_bases.add(base)
+                crawl_bases.append(base)
+                if len(crawl_bases) >= 3:
+                    break
+            if len(crawl_bases) >= 3:
+                break
+
+    _update(
+        check_id,
+        f"Suche fehlende Dokumente auf {', '.join(urlparse(b).netloc for b in crawl_bases)}...",
+        18,
+    )
+
+    discovered: list[dict] = []
+    disc_payloads: list[dict] = []
+    disc_cookie_texts: list[str] = []
+    for base in crawl_bases:
+        try:
+            async with httpx.AsyncClient(timeout=300.0) as client:  # P90: 180s -> 300s
+                resp = await client.post(
+                    f"{CONSENT_TESTER_URL}/dsi-discovery",
+                    json={"url": base, "max_documents": 15},
+                    timeout=300.0,  # P90: 180s -> 300s
+                )
+                if resp.status_code != 200:
+                    logger.warning("auto-discovery: HTTP %d for %s",
+                                   resp.status_code, base)
+                    continue
+                body = resp.json()
+                discovered.extend(body.get("documents", []) or [])
+                disc_payloads.extend(body.get("cmp_payloads") or [])
+                cmp_text = body.get("cmp_cookie_text") or ""
+                if cmp_text:
+                    disc_cookie_texts.append(cmp_text)
+                logger.info("auto-discovery on %s: %d docs, %d CMP payloads, "
+                            "cmp_cookie_text=%d words", base,
+                            len(body.get("documents", []) or []),
+                            len(body.get("cmp_payloads") or []),
+                            len(cmp_text.split()))
+        except Exception as e:
+            # P90: verbose exception fuer Diagnose
+            logger.warning("auto-discovery failed for %s: %s (%s)",
+                           base, str(e) or "(empty)", type(e).__name__)
+
+    # Classify each discovered doc into a canonical doc_type
+    by_type: dict[str, dict] = {}
+    for d in discovered:
+        title = (d.get("title") or "").lower()
+        url = (d.get("url") or "").lower()
+        wc = d.get("word_count") or 0
+        if wc < 100:
+            continue
+        canon = _classify_discovered_doc(title, url)
+        if canon and canon in missing and canon not in by_type:
+            by_type[canon] = d
+
+    # Append/Update entry for every missing canonical type. Auto-discovered
+    # ones get the text/URL filled; ungratched ones stay empty so the
+    # padding step renders them as 'Auf der Website nicht gefunden'.
+    # VW-Fix: wenn schon ein leerer entry existiert (URL gesetzt, aber
+    # fetch hat 0/Mini-Text geliefert), in-place updaten statt duplizieren.
+    filled = 0
+    for dt in missing:
+        existing = next((e for e in doc_entries
+                         if e.get("doc_type") == dt), None)
+        new_entry: dict = existing if existing else {
+            "doc_type": dt, "url": "", "text": "", "word_count": 0,
+            "auto_discovered": False, "discovery_attempted": True,
+            "cmp_payloads": [],
+        }
+        new_entry["discovery_attempted"] = True
+        d = by_type.get(dt)
+        if d:
+            full = d.get("full_text") or d.get("text_preview") or ""
+            # For cookie: prefer the CMP-reconstructed text when it's
+            # substantially richer than the auto-discovered DOM extraction.
+            # BMW homepage CMP yields ~1800 words of authoritative policy;
+            # DOM extraction typically yields ~600 words of site chrome.
+            if dt == "cookie" and disc_cookie_texts:
+                cmp_merged = "\n\n".join(disc_cookie_texts)
+                if len(cmp_merged.split()) > len(full.split()):
+                    logger.info(
+                        "cookie: using CMP-reconstructed text (%d words) "
+                        "instead of DOM (%d words)",
+                        len(cmp_merged.split()), len(full.split()),
+                    )
+                    full = cmp_merged
+            if len(full.split()) >= 100:
+                new_entry["text"] = full
+                # Behalte die original URL als "rejected_url" damit Audit
+                # zeigt 'X war 404, wir haben Y gefunden'.
+                if existing and (existing.get("url") or "").strip() in failed_urls:
+                    new_entry["rejected_url"] = existing.get("url")
+                new_entry["url"] = d.get("url", "")
+                new_entry["word_count"] = len(full.split())
+                new_entry["auto_discovered"] = True
+                if dt == "cookie" and disc_payloads:
+                    new_entry["cmp_payloads"] = disc_payloads
+                doc_texts[dt] = full
+                filled += 1
+                logger.info(
+                    "auto-discovered %s on %s: %s (%d words)%s",
+                    dt, base, d.get("url", "")[:80], new_entry["word_count"],
+                    " [REPLACED failed URL]" if existing else "",
+                )
+        if not existing:
+            doc_entries.append(new_entry)
+
+    logger.info(
+        "auto-discovery: filled %d/%d missing types from %s",
+        filled, len(missing), base,
+    )
diff --git a/backend-compliance/compliance/api/agent_check/_fetch.py b/backend-compliance/compliance/api/agent_check/_fetch.py
new file mode 100644
index 00000000..52c96e7e
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_fetch.py
@@ -0,0 +1,142 @@
+"""URL → text fetch helper for the compliance-check pipeline.
+
+Tries the consent-tester service first (Playwright, full JS render +
+CMP capture). On any failure or empty result, falls back to a direct
+HTTP GET with an identifiable User-Agent and per-domain rate limiting.
+
+For cookie/dse/social_media doc types we cap discovery to 1 sub-page
+(the policy itself is authoritative). For Impressum/AGB/Widerruf and
+similar enterprise-split pages we follow up to 3 sub-pages.
+"""
+
+from __future__ import annotations
+
+import logging
+import re as _re
+
+import httpx
+
+from ._constants import CONSENT_TESTER_URL
+
+logger = logging.getLogger(__name__)
+
+
+async def _fetch_text(url: str, doc_type: str = "") -> tuple[str, list[dict]]:
+    """Fetch text from URL via consent-tester, with HTTP fallback.
+
+    Returns (text, cmp_payloads). cmp_payloads is the raw CMP JSON captured
+    during navigation (ePaaS, OneTrust, …) — empty when no CMP fired or
+    HTTP fallback was used. Backend turns payloads into structured vendor
+    records for the VVT table in the email.
+    """
+    # 1. Consent-tester (Playwright-based, full JS rendering).
+    # max_documents depends on doc_type:
+    #   - cookie/dse/social_media: self-extract (often + CMP capture) is
+    #     authoritative, sub-pages dilute the policy text. max=1.
+    #   - impressum/agb/widerruf/nutzungsbedingungen/dsb: BMW & similar
+    #     enterprise sites split this across 3-4 short sub-pages
+    #     (Versicherungsvermittler, Aufsicht, Berufsrecht). max=3 follows
+    #     them. The 15s networkidle bail (dsi_helpers) keeps timing safe.
+    short_extract_types = {"cookie", "dse", "datenschutz", "privacy", "social_media"}
+    max_docs = 1 if (doc_type or "") in short_extract_types else 3
+    try:
+        # P90: 120s reicht nicht fuer BMW-Impressum (Auto-Discovery folgt
+        # 3 Sub-Docs). 240s gibt Spielraum. Mercedes faellt aktuell mit
+        # 120s auch oft an Akamai-Latenz.
+        async with httpx.AsyncClient(timeout=240.0) as client:
+            resp = await client.post(
+                f"{CONSENT_TESTER_URL}/dsi-discovery",
+                json={"url": url, "max_documents": max_docs},
+                timeout=240.0,
+            )
+            if resp.status_code == 200:
+                payload = resp.json()
+                docs = payload.get("documents", [])
+                cmp_payloads = payload.get("cmp_payloads") or []
+                cmp_cookie_text = payload.get("cmp_cookie_text") or ""
+                # D — wenn der consent-tester HTML-Tabellen aus dem DOM
+                # extrahiert hat, in die cmp_payloads als "generic_table"
+                # einschleusen damit das Backend sie via cookies_table_parser
+                # verarbeiten kann.
+                for doc in (docs or []):
+                    for tbl in (doc.get("tables") or []):
+                        if not tbl or len(tbl) < 3:
+                            continue
+                        cmp_payloads.append({
+                            "kind": "html_table",
+                            "url":  doc.get("url", ""),
+                            "rows": tbl,
+                        })
+                if docs:
+                    texts = []
+                    for doc in docs:
+                        t = doc.get("full_text", "") or doc.get("text_preview", "") or ""
+                        if t and len(t) > 50:
+                            texts.append(t)
+                    merged = "\n\n".join(texts)
+                    # For cookie/dse/social_media: when CMP reconstruction is
+                    # substantially richer than DOM extraction, use it. This
+                    # fixes the BMW case where DOM yields ~600 words of
+                    # navigation but the ePaaS payload reconstructs to ~1800
+                    # words of actual cookie policy.
+                    if (doc_type in short_extract_types
+                            and cmp_cookie_text
+                            and len(cmp_cookie_text.split()) > len(merged.split())):
+                        logger.info(
+                            "Preferring CMP-reconstructed text for %s on %s "
+                            "(%d words CMP vs %d words DOM)",
+                            doc_type, url,
+                            len(cmp_cookie_text.split()),
+                            len(merged.split()),
+                        )
+                        merged = cmp_cookie_text
+                    if merged and len(merged.split()) > 100:
+                        if len(texts) > 1:
+                            logger.info("Merged %d docs from %s (%d words)",
+                                        len(texts), url, len(merged.split()))
+                        return merged, cmp_payloads
+                # P90-Bug-Fix: auch wenn DSE-Text zu kurz fuer 100-Wort-
+                # Schwelle ist, die captured CMP-Payloads NICHT verwerfen.
+                # BMW-Bug: DSE liefert 10 Wort SPA-Shell, aber ePaaS-JSON
+                # (393KB) wurde captured. Backend braucht die fuer
+                # extract_vendors_from_payloads (VVT-Tabelle).
+                if cmp_payloads:
+                    logger.info(
+                        "P90: keeping %d CMP payloads for %s despite "
+                        "short text (%d words) — HTTP fallback runs in parallel",
+                        len(cmp_payloads), url,
+                        len((merged or cmp_cookie_text).split()),
+                    )
+                    fallback_text = merged or cmp_cookie_text or ""
+                    return fallback_text, cmp_payloads
+    except Exception as e:
+        # P90: verbose exception fuer Diagnose (war vorher empty)
+        logger.warning("Consent-tester fetch failed for %s: %s (%s)",
+                       url, str(e) or "(empty)", type(e).__name__)
+
+    # 2. Fallback: direct HTTP fetch (works for SSR pages like BMW).
+    # P7: kenntlicher UA + per-Domain Rate-Limit.
+    try:
+        from compliance.services.compliance_user_agent import (
+            default_request_headers, DomainRateLimiter,
+        )
+        async with httpx.AsyncClient(
+            timeout=30.0, follow_redirects=True,
+            headers=default_request_headers(),
+        ) as client:
+            async with DomainRateLimiter(url):
+                resp = await client.get(url)
+            if resp.status_code == 200 and "text/html" in resp.headers.get("content-type", ""):
+                html = resp.text
+                # Strip HTML tags, decode entities
+                text = _re.sub(r"<script[^>]*>.*?</script>", " ", html, flags=_re.DOTALL | _re.IGNORECASE)
+                text = _re.sub(r"<style[^>]*>.*?</style>", " ", text, flags=_re.DOTALL | _re.IGNORECASE)
+                text = _re.sub(r"<[^>]+>", " ", text)
+                text = _re.sub(r"\s+", " ", text).strip()
+                if len(text.split()) > 100:
+                    logger.info("HTTP fallback for %s: %d words", url, len(text.split()))
+                    return text, []
+    except Exception as e:
+        logger.warning("HTTP fallback failed for %s: %s", url, e)
+
+    return "", []
diff --git a/backend-compliance/compliance/api/agent_check/_helpers.py b/backend-compliance/compliance/api/agent_check/_helpers.py
new file mode 100644
index 00000000..4c8d5d28
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_helpers.py
@@ -0,0 +1,228 @@
+"""Pure helpers for the compliance-check route — no I/O, no async.
+
+Grouped here because each is small and they share the same constants
+imports. Splitting further would not improve readability.
+"""
+
+from __future__ import annotations
+
+import logging
+from urllib.parse import urlparse
+
+from ._constants import (
+    _ALL_DOC_TYPES,
+    _COMPOUND_TLDS,
+    _DISCOVERY_RULES,
+    _DOC_TYPE_LABELS,
+    _compliance_check_jobs,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _update(check_id: str, msg: str, pct: int | None = None) -> None:
+    """Update the in-memory job entry with a progress message + pct."""
+    job = _compliance_check_jobs[check_id]
+    job["progress"] = msg
+    if pct is not None:
+        job["progress_pct"] = max(0, min(100, int(pct)))
+
+
+def _doc_type_label(doc_type: str) -> str:
+    return _DOC_TYPE_LABELS.get(doc_type, doc_type.upper())
+
+
+def _classify_discovered_doc(title: str, url: str) -> str | None:
+    """Map a discovered doc (by its title + URL) to one of our 8 canonical types."""
+    haystack = f"{title} {url}"
+    for canon, keywords in _DISCOVERY_RULES:
+        if any(kw in haystack for kw in keywords):
+            return canon
+    return None
+
+
+def _extract_domain(doc_entries: list[dict]) -> str | None:
+    """Extract base domain (without www) from first URL."""
+    for entry in doc_entries:
+        url = entry.get("url", "")
+        if url and "://" in url:
+            host = urlparse(url).netloc.lower()
+            if host.startswith("www."):
+                host = host[4:]
+            return host or None
+    return None
+
+
+def _company_name_from_url(doc_entries: list[dict]) -> str | None:
+    """Derive a display company name from the entered URLs.
+
+    Heuristic: take the second-level domain (e.g. "bmw" from "www.bmw.de"),
+    uppercase short acronyms (<=4 chars, no hyphens), title-case the rest.
+
+    Examples:
+      www.bmw.de              -> BMW
+      mercedes-benz.de        -> Mercedes-Benz
+      shop.example.co.uk      -> Example
+      juris.de                -> Juris
+    """
+    for entry in doc_entries:
+        url = entry.get("url", "")
+        if not url or "://" not in url:
+            continue
+        host = urlparse(url).netloc.lower()
+        if host.startswith("www."):
+            host = host[4:]
+        parts = host.split(".")
+        if len(parts) < 2:
+            continue
+        # Handle compound TLDs (.co.uk etc.)
+        if len(parts) >= 3 and ".".join(parts[-2:]) in _COMPOUND_TLDS:
+            sld = parts[-3]
+        else:
+            sld = parts[-2]
+        if not sld:
+            continue
+        if len(sld) <= 4 and "-" not in sld:
+            return sld.upper()
+        return "-".join(p.capitalize() for p in sld.split("-"))
+    return None
+
+
+def _get_skip_types(profile) -> dict[str, str]:
+    """Doc_types to skip entirely with a per-type reason message.
+
+    Heute primaer fuer OEM-Konfigurator-Pattern (BMW/Audi/Mercedes):
+    wenn die Site kein Direkt-Vertrieb macht, sind AGB/Widerruf/
+    Nutzungsbedingungen nicht Pflicht auf der Website — sie werden
+    beim Vertragshaendler ausgehaendigt.
+    """
+    if getattr(profile, "no_direct_sales", False):
+        msg = (
+            "Nicht anwendbar — die Webseite schliesst keinen Direkt-"
+            "Kaufvertrag (OEM-Konfigurator-Pattern, Vertrag laeuft "
+            "ueber Vertragshaendler). AGB/Widerruf werden beim "
+            "Haendler ausgehaendigt."
+        )
+        return {
+            "agb": msg,
+            "widerruf": msg,
+            "nutzungsbedingungen": msg,
+        }
+    return {}
+
+
+def _apply_profile_filter(result, profile, doc_type: str):
+    """Adjust INFO-level checks based on business profile context.
+
+    For example: ODR check only relevant for B2C online shops.
+    """
+    for check in result.checks:
+        cid = check.id.lower()
+
+        # ODR/OS-Link: relevant ONLY for B2C online shops. The check's
+        # default hint is written for B2B (it explains why it's not
+        # relevant) — for B2C we must replace it with action-oriented
+        # guidance, otherwise the report contradicts itself.
+        if "odr" in cid or "os-link" in cid or "streitbeilegung" in check.label.lower():
+            if profile.needs_odr:
+                if not check.passed:
+                    check.hint = (
+                        "Als B2C-Anbieter muessen Sie nach Art. 14 EU-VO 524/2013 "
+                        "auf die OS-Plattform (https://ec.europa.eu/consumers/odr) "
+                        "verlinken — klickbarer Link, nicht nur Text. Zusaetzlich "
+                        "§36 VSBG: angeben, ob Sie an Verbraucher-"
+                        "Streitbeilegungsverfahren teilnehmen (oder nicht)."
+                    )
+            else:
+                check.skipped = True
+                check.hint = "Nicht relevant (kein B2C Online-Shop)"
+
+        # Widerruf: Flag entire document as unnecessary for B2B
+        if doc_type == "widerruf" and profile.business_type not in ("b2c", "unknown"):
+            check.severity = "INFO"
+            if not check.passed:
+                check.hint = (
+                    "Als B2B-Unternehmen benoetigen Sie keine Widerrufsbelehrung "
+                    "(§355 BGB gilt nur fuer Verbrauchervertraege). "
+                    "Empfehlung: Entfernen Sie die Widerrufsbelehrung von "
+                    "Ihrer Website, da sie Verwirrung stiften kann."
+                )
+
+        # Regulated profession: check for Kammer info
+        if "kammer" in cid or "berufsordnung" in check.label.lower():
+            if not profile.is_regulated_profession:
+                check.skipped = True
+                check.hint = "Nicht relevant (kein regulierter Beruf)"
+
+    return result
+
+
+def _pad_results_with_missing(
+    results: list,
+    discovery_attempted: set[str] | None = None,
+) -> list:
+    """Ensure every canonical doc_type has an entry in the results list.
+
+    Doc_types the user did not submit AND auto-discovery did not find get
+    a placeholder DocCheckResult. The error message distinguishes:
+      - 'Auf der Website nicht gefunden' (discovery was attempted)
+      - 'Nicht eingereicht' (no submitted URLs to crawl from)
+
+    Preserves the canonical ordering from _ALL_DOC_TYPES so the report
+    layout is stable.
+    """
+    from ..agent_doc_check_routes import DocCheckResult
+    attempted = discovery_attempted or set()
+
+    by_type: dict[str, object] = {}
+    for r in results:
+        canon = "dse" if r.doc_type in ("datenschutz", "privacy") else r.doc_type
+        by_type[canon] = r
+
+    ordered: list = []
+    for dt in _ALL_DOC_TYPES:
+        if dt in by_type:
+            ordered.append(by_type[dt])
+            continue
+        if dt in attempted:
+            msg = ("Auf der Website nicht gefunden — bitte URL des "
+                   "Dokuments manuell eintragen, falls vorhanden")
+        else:
+            msg = "Nicht eingereicht — Quelle nicht angegeben"
+        ordered.append(DocCheckResult(
+            label=_doc_type_label(dt),
+            url="",
+            doc_type=dt,
+            word_count=0,
+            completeness_pct=0,
+            correctness_pct=0,
+            checks=[],
+            findings_count=0,
+            error=msg,
+            scenario="missing",
+        ))
+
+    extras = [r for r in results
+              if (r.doc_type if r.doc_type not in ("datenschutz", "privacy") else "dse")
+              not in _ALL_DOC_TYPES]
+    ordered.extend(extras)
+    return ordered
+
+
+def _result_to_dict(r) -> dict:
+    """Convert DocCheckResult to JSON-serializable dict."""
+    fields = ("id", "label", "passed", "severity", "matched_text",
+              "level", "parent", "skipped", "hint")
+    return {
+        "label": r.label, "url": r.url, "doc_type": r.doc_type,
+        "word_count": r.word_count, "completeness_pct": r.completeness_pct,
+        "correctness_pct": r.correctness_pct,
+        "checks": [{f: getattr(c, f) for f in fields} for c in r.checks],
+        "findings_count": r.findings_count, "error": r.error,
+        "scenario": getattr(r, "scenario", ""),
+    }
+
+
+def _build_profile_html(profile) -> str:
+    from ..agent_doc_check_report import build_profile_html
+    return build_profile_html(profile)
diff --git a/backend-compliance/compliance/api/agent_check/_orchestrator.py b/backend-compliance/compliance/api/agent_check/_orchestrator.py
new file mode 100644
index 00000000..3fcbb4f1
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_orchestrator.py
@@ -0,0 +1,69 @@
+"""Thin orchestrator — runs the 6 phases of the compliance check.
+
+The original `_run_compliance_check` was a 1620-line monolith. It is
+now decomposed into six phases (A=resolve, B=profile+check,
+C=banner+extract, D=report-build [D1 raw vendors, D2 finalize,
+D3-top/mid/bot blocks], E=email, F=persist), each in its own module.
+
+State flows through a single mutable `dict` (see `_state.new_state`).
+This intentionally trades type safety for additive flexibility: the
+report-building phase routinely adds new optional keys for each new
+HTML block, and a typed dataclass would freeze the schema before the
+new blocks could land.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from ._b1_wiring import run_b1
+from ._b3_wiring import run_b3
+from ._constants import _compliance_check_jobs
+from ._phase_a_resolve import run_phase_a
+from ._phase_b_profile_check import run_phase_b
+from ._phase_c_banner import run_phase_c
+from ._phase_d1_vendors_raw import run_phase_d1
+from ._phase_d2_vendors_finalize import run_phase_d2
+from ._phase_d3_blocks_bot import run_phase_d3_bot
+from ._phase_d3_blocks_mid import run_phase_d3_mid
+from ._phase_d3_blocks_top import run_phase_d3_top
+from ._phase_e_email import run_phase_e
+from ._phase_f_persist import run_phase_f
+from ._state import new_state
+
+logger = logging.getLogger(__name__)
+
+
+async def run_compliance_check(check_id: str, req) -> None:
+    """Background task: check all documents with business-profile context."""
+    state = new_state(check_id, req)
+    try:
+        # Phase A: TDM gate + Step 1 (resolve / discover / split / dedup)
+        continue_run = await run_phase_a(state)
+        if not continue_run:
+            return  # TDM denied — job already marked skipped_tdm
+        # Phase B: Step 2 (profile detect) + Step 3 (per-doc checks)
+        await run_phase_b(state)
+        # Phase C: Step 3b-d (banner + cross-check + TCF) + Step 4
+        await run_phase_c(state)
+        # Phase D-1/D-2: Step 5 vendor extraction + finalize
+        await run_phase_d1(state)
+        await run_phase_d2(state)
+        # B1 + B3: cross-cutting checks that need the finalized vendor
+        # list + DSI text. Render their own HTML blocks consumed by
+        # phase D-3 bot's full_html composition.
+        await run_b1(state)
+        run_b3(state)
+        # Phase D-3 top/mid/bot: Step 5 HTML blocks
+        await run_phase_d3_top(state)
+        await run_phase_d3_mid(state)
+        await run_phase_d3_bot(state)
+        # Phase E: Step 6 send mail (with A1 ZIP attachment)
+        run_phase_e(state)
+        # Phase F: Step 7 persist + audit log + unified findings
+        run_phase_f(state)
+    except Exception as e:
+        logger.error("Compliance check %s failed: %s",
+                     check_id, e, exc_info=True)
+        _compliance_check_jobs[check_id]["status"] = "failed"
+        _compliance_check_jobs[check_id]["error"] = str(e)[:500]
diff --git a/backend-compliance/compliance/api/agent_check/_phase_a_resolve.py b/backend-compliance/compliance/api/agent_check/_phase_a_resolve.py
new file mode 100644
index 00000000..b6bfa679
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_a_resolve.py
@@ -0,0 +1,232 @@
+"""Phase A — TDM gate + text resolution + section split + dedup.
+
+Covers (in the original `_run_compliance_check`):
+  - TDM-reservation pre-check (§ 44b UrhG)
+  - Step 1   Resolve texts (URL fetch / pasted text / auto-reclassify)
+  - Step 1a  Auto-discovery of missing canonical doc_types
+  - Step 1b  Section splitting (shared URL → multiple doc_types,
+             DSI → Cookie/Social-Media auto-fill)
+  - Step 1c  Cross-document keyword search
+  - P15      Dedup of doc_types referencing the same source document
+
+Returns True to continue, False if the run was aborted (TDM denied).
+"""
+
+from __future__ import annotations
+
+import logging
+
+from ._constants import _compliance_check_jobs
+from ._discovery import _autodiscover_missing
+from ._fetch import _fetch_text
+from ._helpers import _update
+
+logger = logging.getLogger(__name__)
+
+
+async def run_phase_a(state: dict) -> bool:
+    """Run TDM gate + Step 1 + Step 1a-c + P15 dedup. Mutate state in place."""
+    check_id = state["check_id"]
+    req = state["req"]
+
+    # Reset anchor-locator cache per run (avoid cross-run leak)
+    try:
+        from compliance.services.doc_anchor_locator import reset_cache
+        reset_cache()
+    except Exception:
+        pass
+
+    # P7: TDM-Reservation-Check der Base-Domain (§ 44b UrhG).
+    # Bei reserved/denied: Run sofort beenden, kein Crawl.
+    try:
+        from compliance.services.tdm_reservation_check import (
+            check_tdm_reservation, is_crawl_allowed,
+        )
+        first_url = next(
+            (d.url for d in req.documents if d.url), "",
+        )
+        if first_url:
+            tdm = await check_tdm_reservation(first_url)
+            _compliance_check_jobs[check_id]["tdm"] = tdm
+            # P12: Bei tdm_override + Reason wird NICHT abgebrochen,
+            # sondern nur dokumentiert. Override ohne Reason wird ignoriert.
+            override_active = (
+                req.tdm_override
+                and len((req.tdm_override_reason or "").strip()) >= 10
+            )
+            if not is_crawl_allowed(tdm) and not override_active:
+                _compliance_check_jobs[check_id]["status"] = "skipped_tdm"
+                _compliance_check_jobs[check_id]["error"] = (
+                    f"TDM-Vorbehalt fuer {tdm.get('domain')} erkannt "
+                    f"(status={tdm.get('status')}) — Crawl nach § 44b "
+                    f"UrhG nicht zulaessig. Signals: "
+                    f"{[s.get('src') for s in tdm.get('signals', [])]}"
+                )
+                _compliance_check_jobs[check_id]["progress_pct"] = 100
+                logger.info("TDM-skip check_id=%s domain=%s status=%s",
+                            check_id, tdm.get("domain"), tdm.get("status"))
+                return False
+            if override_active and not is_crawl_allowed(tdm):
+                _compliance_check_jobs[check_id]["tdm_override"] = {
+                    "reason": req.tdm_override_reason.strip()[:500],
+                    "original_status": tdm.get("status"),
+                }
+                logger.warning(
+                    "TDM-Override aktiv: check_id=%s domain=%s "
+                    "status=%s reason=%r",
+                    check_id, tdm.get("domain"), tdm.get("status"),
+                    req.tdm_override_reason.strip()[:80],
+                )
+    except Exception as e:
+        logger.warning("TDM-check failed (proceeding): %s", e)
+
+    # Step 1: Resolve texts (fetch from URL if needed) — 0-30%
+    _update(check_id, "Texte werden geladen...", 1)
+    doc_texts: dict[str, str] = {}
+    doc_entries: list[dict] = []
+
+    # Cache fetched URLs to detect duplicates
+    url_text_cache: dict[str, str] = {}
+
+    n_docs = max(1, len(req.documents))
+    # User-pasted-Tabellen-Vendors (kein LLM noetig) — werden weiter
+    # unten in cmp_vendors gemerged.
+    pasted_table_vendors: list[dict] = []
+    for i, doc in enumerate(req.documents):
+        pct = int(1 + (i / n_docs) * 29)
+        _update(check_id, f"Texte laden {i+1}/{n_docs}: {doc.doc_type}...", pct)
+        text = (doc.text or "").strip()
+        input_source = "url"
+        cmp_payloads: list[dict] = []
+        if text:
+            input_source = "text"
+            if doc.url:
+                input_source = "text+url"  # User hat beide gefuellt
+                logger.info(
+                    "doc_type=%s: User hat URL UND Text geliefert — "
+                    "Text gewinnt, URL wird als Quellen-Referenz behalten",
+                    doc.doc_type,
+                )
+        elif doc.url:
+            url_key = doc.url.strip().rstrip("/").lower()
+            if url_key in url_text_cache:
+                text = url_text_cache[url_key]
+            else:
+                text, cmp_payloads = await _fetch_text(doc.url, doc_type=doc.doc_type)
+                if text:
+                    url_text_cache[url_key] = text
+
+        # Auto-Reclassify-Check: wenn der user Text in das falsche
+        # Doc-Type-Feld kopiert hat (z.B. Impressum-Text in DSE),
+        # erkennen und ggf. umtaggen.
+        actual_doc_type = doc.doc_type
+        reclassify_hint: dict | None = None
+        if input_source.startswith("text") and len(text) >= 500:
+            try:
+                from compliance.services.doc_type_classifier import (
+                    detect_mismatch,
+                )
+                reclassify_hint = detect_mismatch(doc.doc_type, text)
+                if reclassify_hint and reclassify_hint["action"] == "reclassify":
+                    actual_doc_type = reclassify_hint["detected"]
+                    logger.info(
+                        "doc_type AUTO-RECLASSIFY: deklariert=%s "
+                        "erkannt=%s (score %d vs %d) — uebernehme erkannten Typ",
+                        doc.doc_type, actual_doc_type,
+                        reclassify_hint["detected_score"],
+                        reclassify_hint["declared_score"],
+                    )
+            except Exception as e:
+                logger.warning("doc_type_classifier failed: %s", e)
+
+        # Cookie-Tabelle: wenn User Tabelle reinkopiert hat, deterministisch
+        # parsen (kein LLM noetig) und Vendors gleich ableiten.
+        if input_source.startswith("text") and actual_doc_type == "cookie":
+            try:
+                from compliance.services.cookies_table_parser import (
+                    parse_cookie_table,
+                )
+                tab_vendors = parse_cookie_table(text)
+                if tab_vendors:
+                    pasted_table_vendors.extend(tab_vendors)
+                    logger.info(
+                        "Cookie-Tabelle erkannt im pasted Text — "
+                        "%d Vendors / %d Cookies deterministisch geparst",
+                        len(tab_vendors),
+                        sum(len(v.get("cookies", [])) for v in tab_vendors),
+                    )
+            except Exception as e:
+                logger.warning("cookies_table_parser failed: %s", e)
+
+        if text:
+            doc_texts[actual_doc_type] = text
+        doc_entries.append({
+            "doc_type":         actual_doc_type,
+            "declared_doc_type": doc.doc_type,
+            "url":              doc.url,
+            "text":             text,
+            "word_count":       len(text.split()) if text else 0,
+            "auto_discovered":  False,
+            "discovery_attempted": False,
+            "cmp_payloads":     cmp_payloads,
+            "input_source":     input_source,
+            "reclassify_hint":  reclassify_hint,
+        })
+
+    # Step 1a-bis: AUTO-DISCOVERY
+    await _autodiscover_missing(
+        check_id, doc_entries, doc_texts, url_text_cache,
+    )
+
+    # Step 1b: Section splitting — two cases:
+    # 1. Same URL used for multiple doc_types → split by heading
+    # 2. DSI text contains Cookie/Social-Media sections → auto-fill empty rows
+    from compliance.services.section_splitter import (
+        split_shared_texts, auto_fill_from_dsi, cross_search_documents,
+    )
+    split_shared_texts(doc_entries, url_text_cache)
+    auto_fill_from_dsi(doc_entries)
+
+    # Step 1c: Cross-document search — find doc_types in wrong documents (30-35%)
+    _update(check_id, "Dokumente werden uebergreifend durchsucht...", 32)
+    placement_findings = cross_search_documents(doc_entries)
+
+    # Refresh doc_texts after all splitting/searching
+    for entry in doc_entries:
+        if entry.get("text"):
+            doc_texts[entry["doc_type"]] = entry["text"]
+
+    # P15: Dedupe — wenn mehrere Doc-Types DASSELBE Dokument referenzieren
+    # (z.B. Safetykon: User gibt /datenschutz fuer dse + cookie + widerruf),
+    # behalten wir nur den primaeren Doc-Type. Andere: leeren + note.
+    # Priorität: dse > impressum > cookie > widerruf > agb > nutzungsbedingungen
+    _DOC_PRIORITY = ["dse", "impressum", "cookie", "widerruf", "agb",
+                     "nutzungsbedingungen", "social_media", "dsb"]
+    seen_text_hash: dict[int, str] = {}
+    for dt in _DOC_PRIORITY:
+        entry = next((e for e in doc_entries if e.get("doc_type") == dt
+                      and e.get("text")), None)
+        if not entry:
+            continue
+        text_hash = hash((entry.get("text") or "").strip()[:1000])
+        if text_hash in seen_text_hash:
+            primary = seen_text_hash[text_hash]
+            logger.info(
+                "P15 dedup: doc_type=%s referenziert dasselbe Dokument "
+                "wie %s (URL=%s) -> als Duplikat markiert.",
+                dt, primary, entry.get("url", "")[:60],
+            )
+            entry["text"] = ""
+            entry["word_count"] = 0
+            entry["url"] = ""
+            entry["dup_of"] = primary
+            doc_texts.pop(dt, None)
+        else:
+            seen_text_hash[text_hash] = dt
+
+    state["doc_texts"] = doc_texts
+    state["doc_entries"] = doc_entries
+    state["url_text_cache"] = url_text_cache
+    state["pasted_table_vendors"] = pasted_table_vendors
+    state["placement_findings"] = placement_findings
+    return True
diff --git a/backend-compliance/compliance/api/agent_check/_phase_b_profile_check.py b/backend-compliance/compliance/api/agent_check/_phase_b_profile_check.py
new file mode 100644
index 00000000..b19c5ed9
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_b_profile_check.py
@@ -0,0 +1,183 @@
+"""Phase B — Business-profile detection + per-document checks.
+
+Covers (in the original `_run_compliance_check`):
+  - Step 2  Detect business profile (with optional homepage merge for
+            P16 keywords)
+  - Step 3  Run regex + MC + LLM checks on each submitted document
+            (`_check_single`), applying skip rules + profile filter
+            + placement findings
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re as _re
+from dataclasses import asdict
+
+import httpx
+
+from ._helpers import (
+    _apply_profile_filter,
+    _doc_type_label,
+    _get_skip_types,
+    _update,
+)
+from ._single_check import _check_single
+
+logger = logging.getLogger(__name__)
+
+
+async def run_phase_b(state: dict) -> None:
+    """Detect business profile + check each document. Mutates state in place."""
+    check_id = state["check_id"]
+    req = state["req"]
+    doc_texts = state["doc_texts"]
+    doc_entries = state["doc_entries"]
+    placement_findings = state["placement_findings"]
+
+    # Step 2: Detect business profile (35-40%)
+    from compliance.services.business_profiler import detect_business_profile
+    _update(check_id, "Geschaeftsmodell wird erkannt...", 37)
+    # P16: Homepage-Text mit fuer Profile-Detection (no_direct_sales
+    # B2B-Indikatoren wie "CE-Zertifizierung" / "Schulungen" stehen oft
+    # nur im Homepage-Menue, nicht im Pflichttext).
+    profile_input = dict(doc_texts)
+    try:
+        base_url = ""
+        for e in doc_entries:
+            if e.get("url"):
+                from urllib.parse import urlparse
+                p = urlparse(e["url"])
+                if p.scheme and p.netloc:
+                    base_url = f"{p.scheme}://{p.netloc}/"
+                    break
+        if base_url:
+            async with httpx.AsyncClient(
+                timeout=8.0, follow_redirects=True,
+                headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
+                         "AppleWebKit/537.36 HeadlessChrome/120.0.0.0"},
+            ) as _hc:
+                _hr = await _hc.get(base_url)
+                if _hr.status_code == 200 and "text/html" in _hr.headers.get(
+                        "content-type", ""):
+                    _html = _hr.text[:60000]
+                    _html = _re.sub(r"<script[^>]*>.*?</script>", " ",
+                                    _html, flags=_re.DOTALL | _re.IGNORECASE)
+                    _html = _re.sub(r"<style[^>]*>.*?</style>", " ",
+                                    _html, flags=_re.DOTALL | _re.IGNORECASE)
+                    _html = _re.sub(r"<[^>]+>", " ", _html)
+                    _html = _re.sub(r"\s+", " ", _html).strip()
+                    if len(_html.split()) > 30:
+                        profile_input["__homepage"] = _html[:20000]
+                        logger.info("P16 homepage merged for profile: %d words",
+                                    len(_html.split()))
+    except Exception as e:
+        logger.debug("homepage fetch for profile failed: %s", e)
+    profile = await detect_business_profile(profile_input)
+    profile_dict = asdict(profile)
+
+    # Step 3: Check each document
+    from ..agent_doc_check_routes import CheckItem, DocCheckResult
+    results: list[DocCheckResult] = []
+    total_findings = 0
+    use_agent_flag = req.use_agent or os.getenv(
+        "COMPLIANCE_USE_AGENT", "false",
+    ).lower() == "true"
+
+    # Filter out doc_types that don't apply to this business profile
+    skip_types = _get_skip_types(profile)
+
+    # Derive business_scope hints for the MC filter (O1 — Doc-type Scope-Flag).
+    # MCs that explicitly require a feature (e.g. 'biometric_processing',
+    # 'ai_decision_making', 'child_targeting') get dropped when the
+    # detected profile doesn't declare it.
+    business_scope: set[str] = set()
+    for svc in (getattr(profile, "detected_services", []) or []):
+        business_scope.add(str(svc).lower())
+    if (getattr(profile, "business_type", "") or "").lower() == "b2c":
+        business_scope.add("b2c")
+    if getattr(profile, "has_online_shop", False):
+        business_scope.add("ecommerce")
+    if getattr(profile, "is_regulated_profession", False):
+        business_scope.add("regulated_profession")
+
+    # Document checks: 40-80%
+    n_entries = max(1, len(doc_entries))
+    for i, entry in enumerate(doc_entries):
+        text = entry["text"]
+        doc_type = entry["doc_type"]
+        label = _doc_type_label(doc_type)
+        url = entry["url"]
+
+        if doc_type in skip_types:
+            results.append(DocCheckResult(
+                label=label, url=url, doc_type=doc_type,
+                error=skip_types[doc_type],
+            ))
+            continue
+
+        pct = int(40 + (i / n_entries) * 40)
+        _update(check_id, f"Pruefen {i+1}/{n_entries}: {label}...", pct)
+
+        if not text or len(text) < 50:
+            # P15: duplicate doc that was deduped against a primary doc
+            if entry.get("dup_of"):
+                results.append(DocCheckResult(
+                    label=label, url="", doc_type=doc_type,
+                    error=f"Nicht separat vorhanden — wird im Dokument "
+                          f"'{_doc_type_label(entry['dup_of'])}' "
+                          f"mit-geprueft.",
+                ))
+                continue
+            # P24: DSB-Kontakt ist Pflichtangabe in der DSE (Art. 13(1)(b)
+            # DSGVO) — wenn kein separates DSB-Dokument vorliegt, ist das
+            # KEIN Fehler. DSB-Pruefung passiert ohnehin in der DSE.
+            if doc_type == "dsb" and not (entry.get("url") or "").strip():
+                results.append(DocCheckResult(
+                    label=label, url="", doc_type=doc_type,
+                    error="Nicht separat vorhanden — DSB-Kontaktdaten "
+                          "werden in der Datenschutzerklaerung als "
+                          "Pflichtangabe nach Art. 13(1)(b) DSGVO geprueft.",
+                ))
+                continue
+            # Empty entry — either from auto-discovery padding (no URL
+            # to fetch) or from a fetch that returned nothing. If there
+            # was a URL we keep the error so the user knows the fetch
+            # failed; otherwise let the padding step label it
+            # 'Nicht eingereicht' / 'Auf der Website nicht gefunden'.
+            if (entry.get("url") or "").strip():
+                results.append(DocCheckResult(
+                    label=label, url=url, doc_type=doc_type,
+                    error="Kein Text vorhanden oder zu kurz",
+                ))
+            continue
+
+        result = await _check_single(
+            text, doc_type, label, url,
+            entry["word_count"], use_agent_flag,
+            business_scope=business_scope,
+            business_profile={"no_direct_sales": getattr(profile, "no_direct_sales", False)},
+        )
+
+        # Apply profile context filter
+        result = _apply_profile_filter(result, profile, doc_type)
+
+        # Add placement findings — but only if the regex checks confirm
+        # the text doesn't match. If completeness >= 50%, the text IS the
+        # right doc_type despite missing cross-search keywords.
+        if result.completeness_pct < 50:
+            for pf in placement_findings:
+                if pf.get("doc_type") == doc_type:
+                    result.checks.insert(0, CheckItem(**{
+                        k: v for k, v in pf.items() if k != "doc_type"
+                    }))
+
+        results.append(result)
+        total_findings += result.findings_count
+
+    state["profile"] = profile
+    state["profile_dict"] = profile_dict
+    state["business_scope"] = business_scope
+    state["results"] = results
+    state["total_findings"] = total_findings
diff --git a/backend-compliance/compliance/api/agent_check/_phase_c_banner.py b/backend-compliance/compliance/api/agent_check/_phase_c_banner.py
new file mode 100644
index 00000000..00ec3384
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_c_banner.py
@@ -0,0 +1,129 @@
+"""Phase C — Banner scan + Cookie/DSE cross-check + TCF check + profile extract.
+
+Covers (in the original `_run_compliance_check`):
+  - Step 3b  Cookie-banner scan via consent-tester /scan (homepage,
+             3-phase consent test)
+  - Step 3c  Cross-check banner findings vs. cookie-policy text
+  - Step 3d  TCF vendor vs. DSI cross-check + VVT entries
+  - Step 4   Extract profile hints from documents
+  - Step 4b  Determine scenario per document (skip / regenerate / fix /
+             import)
+  - Step 4c  Pad missing canonical doc_types so the report always shows
+             every checklist row
+"""
+
+from __future__ import annotations
+
+import logging
+
+import httpx
+
+from ._constants import CONSENT_TESTER_URL
+from ._helpers import _pad_results_with_missing, _update
+
+logger = logging.getLogger(__name__)
+
+
+async def run_phase_c(state: dict) -> None:
+    """Run banner scan + cross-checks + profile extraction. Mutates state."""
+    check_id = state["check_id"]
+    req = state["req"]
+    doc_texts = state["doc_texts"]
+    doc_entries = state["doc_entries"]
+    results = state["results"]
+    profile_dict = state["profile_dict"]
+
+    # Step 3b: Banner-Check (automatic, uses first URL or homepage)
+    banner_result = None
+    banner_url = req.documents[0].url if req.documents and req.documents[0].url else ""
+    # Use the homepage (strip path) for banner check
+    if banner_url:
+        from urllib.parse import urlparse
+        parsed = urlparse(banner_url)
+        banner_url = f"{parsed.scheme}://{parsed.netloc}"
+    if banner_url:
+        _update(check_id, "Cookie-Banner wird geprueft...", 82)
+        try:
+            async with httpx.AsyncClient(timeout=900.0) as client:  # P50: +10min for vendor-detail-phase
+                resp = await client.post(
+                    f"{CONSENT_TESTER_URL}/scan",
+                    json={"url": banner_url, "timeout_per_phase": 10},
+                )
+                if resp.status_code == 200:
+                    banner_result = resp.json()
+        except Exception as e:
+            logger.warning(
+                "Banner check failed: %s (%s)", e or "<empty>", type(e).__name__,
+            )
+
+    # Step 3c: Cross-check Banner vs Cookie-Richtlinie (88-90%)
+    if banner_result and "cookie" in doc_texts:
+        from compliance.services.banner_cookie_cross_check import (
+            cross_check_banner_vs_cookie,
+        )
+        from ..agent_doc_check_routes import CheckItem
+        _update(check_id, "Banner vs. Cookie-Richtlinie abgleichen...", 89)
+        cross_findings = cross_check_banner_vs_cookie(
+            banner_result, doc_texts["cookie"],
+        )
+        if cross_findings:
+            for r in results:
+                if r.doc_type == "cookie":
+                    for cf in cross_findings:
+                        r.checks.append(CheckItem(**cf))
+                    l2 = [c for c in r.checks if c.level == 2 and not c.skipped]
+                    l2p = sum(1 for c in l2 if c.passed)
+                    r.correctness_pct = round(l2p / len(l2) * 100) if l2 else 0
+
+    # Step 3d: TCF Vendor cross-check against DSI
+    tcf_vendors = banner_result.get("tcf_vendors", []) if banner_result else []
+    vvt_entries: list[dict] = []
+    if tcf_vendors and "dse" in doc_texts:
+        _update(check_id, f"{len(tcf_vendors)} TCF-Verarbeiter vs. DSI abgleichen...", 91)
+        from compliance.services.banner_cookie_cross_check import (
+            cross_check_vendors_vs_dsi,
+        )
+        from compliance.services.vendor_vvt_mapper import map_vendors_to_vvt
+        from ..agent_doc_check_routes import CheckItem
+        vendor_findings = cross_check_vendors_vs_dsi(tcf_vendors, doc_texts["dse"])
+        if vendor_findings:
+            for r in results:
+                if r.doc_type == "dse":
+                    for vf in vendor_findings:
+                        r.checks.append(CheckItem(**vf))
+        vvt_entries = map_vendors_to_vvt(tcf_vendors)
+
+    # Step 4: Extract profile hints from documents (92-95%)
+    _update(check_id, "Profil wird aus Dokumenten extrahiert...", 93)
+    from compliance.services.profile_extractor import (
+        extract_profile_from_documents,
+    )
+    extracted_profile = extract_profile_from_documents(doc_texts, profile_dict)
+
+    # Step 4b: Determine scenario per document
+    for r in results:
+        if r.error:
+            r.scenario = "skip"
+        elif r.completeness_pct < 30:
+            r.scenario = "regenerate"
+        elif r.completeness_pct < 95:
+            r.scenario = "fix"
+        else:
+            r.scenario = "import"
+
+    # Step 4c: Always render all 8 canonical doc types. Missing types
+    # are differentiated:
+    #   - Discovery was tried but found nothing -> 'Auf der Website
+    #     nicht gefunden' (suggest user provides URL manually)
+    #   - No submitted URLs at all -> 'Nicht eingereicht'
+    attempted = {
+        e["doc_type"] for e in doc_entries if e.get("discovery_attempted")
+    }
+    results = _pad_results_with_missing(results, discovery_attempted=attempted)
+
+    state["banner_result"] = banner_result
+    state["banner_url"] = banner_url
+    state["tcf_vendors"] = tcf_vendors
+    state["vvt_entries"] = vvt_entries
+    state["extracted_profile"] = extracted_profile
+    state["results"] = results
diff --git a/backend-compliance/compliance/api/agent_check/_phase_d1_vendors_raw.py b/backend-compliance/compliance/api/agent_check/_phase_d1_vendors_raw.py
new file mode 100644
index 00000000..ca75e24d
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_d1_vendors_raw.py
@@ -0,0 +1,315 @@
+"""Phase D-1 — Vendor-extraction raw stages.
+
+Covers (in the original Step 5 of `_run_compliance_check`):
+  - Aggregate cmp_payloads from all doc_entries + banner_result (P30/P48)
+  - Fallback: use DSE text when cookie was deduped (P17-D)
+  - Extract structured vendor records from CMP payloads
+  - LLM-cascade fallback when structured extract yields < 5 vendors (P52)
+  - Phase-G vendor-details append (P57)
+  - HTML-table DOM parse (Stage D)
+  - Crawled cookie-table parse (Stage B)
+  - Tesseract OCR over evidence slices (Stage C) — also captures the
+    cookie_evidence_slices used by A1 e-mail attachment
+"""
+
+from __future__ import annotations
+
+import logging
+
+from ._helpers import _company_name_from_url, _update
+
+logger = logging.getLogger(__name__)
+
+
+async def run_phase_d1(state: dict) -> None:
+    """Vendor-extract raw stages. Mutates state in place."""
+    check_id = state["check_id"]
+    doc_entries = state["doc_entries"]
+    doc_texts = state["doc_texts"]
+    banner_result = state["banner_result"]
+    pasted_table_vendors = state["pasted_table_vendors"]
+
+    cmp_vendors: list[dict] = []
+    cookie_payloads: list[dict] = []
+    cookie_text = ""
+    cookie_evidence_slices: list[dict] | None = None
+    cookie_evidence_meta: dict | None = None
+
+    try:
+        from compliance.services.vendor_extractor import (
+            extract_vendors_from_payloads,
+        )
+
+        # P30: aggregate cmp_payloads from ALL doc_entries — sites
+        # like Mercedes load Usercentrics only on the homepage, so the
+        # JSON gets captured during DSE/Impressum discovery, not in the
+        # cookies.html fetch. Dedup by URL since the same payload is
+        # captured on every page load.
+        seen_cmp_urls: set[str] = set()
+        for e in doc_entries:
+            for p in (e.get("cmp_payloads") or []):
+                p_url = p.get("url") or ""
+                if p_url and p_url in seen_cmp_urls:
+                    continue
+                seen_cmp_urls.add(p_url)
+                cookie_payloads.append(p)
+            if e.get("doc_type") == "cookie" and e.get("text"):
+                cookie_text = e["text"]
+        # P48: also pull cmp_payloads from the Banner-Scan (homepage 3-phase
+        # consent test). Mercedes' Usercentrics-JSON is captured there even
+        # when not in DSI-Discovery of static legal pages.
+        if banner_result:
+            for p in (banner_result.get("cmp_payloads") or []):
+                p_url = p.get("url") or ""
+                if p_url and p_url in seen_cmp_urls:
+                    continue
+                seen_cmp_urls.add(p_url)
+                cookie_payloads.append(p)
+            if cookie_payloads:
+                logger.info("P48: %d CMP-payloads available for vendor-extract "
+                            "(after Banner-Scan merge)", len(cookie_payloads))
+        # P17-D: Fallback wenn cookie via P15 deduped wurde — nutze DSE-Text
+        # sofern Cookie-Begriffe drin sind, damit LLM-Vendor-Extract trotzdem
+        # greifen kann.
+        if not cookie_text and not cookie_payloads:
+            dse_t = doc_texts.get("dse", "")
+            if dse_t and any(w in dse_t.lower() for w in
+                              ("cookie", "tracking", "google analytics", "consent")):
+                cookie_text = dse_t
+                logger.info("P17-D: vendor-extract Fallback auf DSE (Cookie deduped)")
+        owner_name = _company_name_from_url(doc_entries) or ""
+        if cookie_payloads:
+            cmp_vendors = extract_vendors_from_payloads(
+                cookie_payloads, owner_name=owner_name,
+            )
+
+        # P52: LLM-Fallback nicht nur wenn 0 Vendors, sondern auch wenn die
+        # strukturierten Quellen < 5 Vendors lieferten und der Cookie-Text
+        # substantiell ist.
+        if (len(cmp_vendors) < 5
+                and cookie_text and len(cookie_text.split()) >= 500):
+            from compliance.services.vendor_llm_extractor import (
+                extract_vendors_via_llm,
+            )
+            from compliance.services.vendor_classifier import classify
+            _update(check_id, "Vendor-Liste per LLM extrahieren...", 94)
+            llm_vendors = await extract_vendors_via_llm(cookie_text)
+            existing_names = {(v.get("name") or "").strip().lower()
+                              for v in cmp_vendors}
+            added_llm = 0
+            for v in llm_vendors:
+                nm = (v.get("name") or "").strip()
+                if not nm or nm.lower() in existing_names:
+                    continue
+                v["recipient_type"] = classify(
+                    vendor_name=nm,
+                    category=v.get("category", ""),
+                    owner_name=owner_name,
+                )
+                v.setdefault("source", "llm_cascade")
+                cmp_vendors.append(v)
+                existing_names.add(nm.lower())
+                added_llm += 1
+            if added_llm:
+                logger.info("P52 LLM-Cascade: +%d Vendors (total: %d)",
+                            added_llm, len(cmp_vendors))
+
+        # P57: Phase G vendor_details als zusätzliche Vendor-Quelle.
+        if banner_result:
+            vd_list = banner_result.get("vendor_details") or []
+            vd_list = [v for v in vd_list if v.get("name") != "__TDM_OPTOUT__"]
+            existing_names = {(v.get("name") or "").strip().lower()
+                              for v in cmp_vendors}
+            added = 0
+            for d in vd_list:
+                n = (d.get("name") or "").strip()
+                if not n or n.lower() in existing_names:
+                    continue
+                if n.lower() in ("technisch erforderlich", "analyse und statistik",
+                                 "marketing", "alles auswählen",
+                                 "alles auswaehlen"):
+                    continue
+                from compliance.services.vendor_classifier import classify
+                cmp_vendors.append({
+                    "name": n,
+                    "country": "",
+                    "purpose": d.get("description", "")[:500],
+                    "category": "",
+                    "opt_out_url": d.get("opt_out_url", ""),
+                    "privacy_policy_url": d.get("privacy_url", ""),
+                    "persistence": d.get("retention", ""),
+                    "cookies": d.get("cookies", []),
+                    "processing_company": d.get("processing_company", ""),
+                    "address": d.get("address", ""),
+                    "purposes": d.get("purposes", []),
+                    "technologies": d.get("technologies", []),
+                    "recipient_type": classify(
+                        vendor_name=n, category="", owner_name=owner_name,
+                    ),
+                })
+                existing_names.add(n.lower())
+                added += 1
+            if added:
+                logger.info("P57: added %d new vendors from Phase G (total: %d)",
+                            added, len(cmp_vendors))
+
+        # D — HTML-Tabellen aus DOM
+        for pl in (cookie_payloads or []):
+            if pl.get("kind") != "html_table":
+                continue
+            rows = pl.get("rows") or []
+            if len(rows) < 3:
+                continue
+            try:
+                from compliance.services.cookies_table_parser import (
+                    parse_cookie_table as _parse_ct_d,
+                )
+                table_text = "\n".join(rows)
+                d_vendors = _parse_ct_d(table_text)
+                if d_vendors:
+                    existing_d = {(v.get("name") or "").strip().lower()
+                                  for v in cmp_vendors}
+                    added_d = 0
+                    for v in d_vendors:
+                        nm = (v.get("name") or "").strip()
+                        if not nm or nm.lower() in existing_d:
+                            continue
+                        v.setdefault("source", "html_table_dom")
+                        cmp_vendors.append(v)
+                        existing_d.add(nm.lower())
+                        added_d += 1
+                    if added_d:
+                        logger.info("D HTML-Table-DOM-Parse: +%d Vendors aus "
+                                    "%d-Zeilen-Tabelle (total: %d)",
+                                    added_d, len(rows), len(cmp_vendors))
+            except Exception as e:
+                logger.warning("html_table parse failed: %s", e)
+
+        # B — cookies_table_parser auch auf gecrawltem Cookie-Text
+        if cookie_text and len(cookie_text) >= 500:
+            try:
+                from compliance.services.cookies_table_parser import (
+                    parse_cookie_table as _parse_ct,
+                    parse_flat_cookie_text as _parse_flat,
+                )
+                crawled_table_vendors = _parse_ct(cookie_text)
+                if not crawled_table_vendors:
+                    crawled_table_vendors = _parse_flat(cookie_text)
+                if crawled_table_vendors:
+                    existing = {(v.get("name") or "").strip().lower()
+                                for v in cmp_vendors}
+                    added_c = 0
+                    for v in crawled_table_vendors:
+                        nm = (v.get("name") or "").strip()
+                        if not nm or nm.lower() in existing:
+                            continue
+                        v.setdefault("source", "table_crawled")
+                        cmp_vendors.append(v)
+                        existing.add(nm.lower())
+                        added_c += 1
+                    if added_c:
+                        logger.info("B Crawled-Tabellen-Parse: +%d Vendors "
+                                    "(total: %d)", added_c, len(cmp_vendors))
+            except Exception as e:
+                logger.warning("crawled-table-parse failed: %s", e)
+
+        # C — Screenshot + Tesseract-OCR (auch Quelle für A1 ZIP-Anhang)
+        cookie_url_for_shot = ""
+        for _e in doc_entries:
+            if _e.get("doc_type") == "cookie" and _e.get("url"):
+                cookie_url_for_shot = _e["url"]; break
+        if cookie_url_for_shot:
+            try:
+                from compliance.services.cookie_screenshot_ocr import (
+                    capture_cookie_evidence_slices,
+                    cookies_to_vendor_records,
+                    ocr_slices_extract_cookies,
+                )
+                from compliance.services.cookies_table_parser import (
+                    _guess_vendor as _gv,
+                )
+                _update(check_id,
+                        "Cookie-Richtlinie wird fotografiert "
+                        "(lueckenlose Beweiskette)...", 92)
+                ev = await capture_cookie_evidence_slices(
+                    cookie_url_for_shot, check_id=check_id,
+                    viewport_h=1024, overlap_px=200, max_slices=40,
+                )
+                if ev.get("slices"):
+                    cookie_evidence_slices = ev["slices"]
+                    cookie_evidence_meta = {
+                        "total_height_px": ev.get("total_height_px"),
+                        "width_px": ev.get("width_px"),
+                        "accepted_banner": ev.get("accepted_banner"),
+                        "expanded": ev.get("expanded"),
+                        "url": ev.get("url"),
+                        "slice_count": len(ev["slices"]),
+                    }
+                    _update(check_id, "Tesseract OCR über alle Slices...", 93)
+                    ocr_cookies, ocr_stats = ocr_slices_extract_cookies(
+                        ev["slices"],
+                    )
+                    if ocr_cookies:
+                        ocr_vendors = cookies_to_vendor_records(
+                            ocr_cookies, guess_vendor_fn=_gv,
+                        )
+                        existing = {(v.get("name") or "").strip().lower()
+                                    for v in cmp_vendors}
+                        added_v = 0
+                        for v in ocr_vendors:
+                            nm = (v.get("name") or "").strip()
+                            if not nm:
+                                continue
+                            if nm.lower() in existing:
+                                for ex in cmp_vendors:
+                                    if (ex.get("name") or "").strip().lower() == nm.lower():
+                                        ex_names = {
+                                            (c.get("name") or "").lower()
+                                            for c in (ex.get("cookies") or [])
+                                        }
+                                        for c in (v.get("cookies") or []):
+                                            if c["name"].lower() not in ex_names:
+                                                ex.setdefault("cookies", []).append(c)
+                                                ex_names.add(c["name"].lower())
+                                        cur_src = ex.get("source", "")
+                                        if "tesseract_ocr" not in cur_src:
+                                            ex["source"] = (cur_src + ";tesseract_ocr").strip(";")
+                                        break
+                                continue
+                            cmp_vendors.append(v)
+                            existing.add(nm.lower())
+                            added_v += 1
+                        logger.info(
+                            "C Tesseract-OCR: +%d Vendors / %d Cookies "
+                            "(über %d Slices, total: %d)",
+                            added_v, len(ocr_cookies),
+                            ocr_stats.get("slices", 0), len(cmp_vendors),
+                        )
+            except Exception as e:
+                logger.warning("Tesseract-OCR pipeline failed: %s (%s)",
+                               str(e) or "(no msg)", type(e).__name__)
+
+        # User-pasted Cookie-Tabelle (deterministisch, kein LLM):
+        # die hat IMMER Vorrang weil 100% genau.
+        if pasted_table_vendors:
+            existing = {(v.get("name") or "").strip().lower()
+                        for v in cmp_vendors}
+            added_p = 0
+            for v in pasted_table_vendors:
+                nm = (v.get("name") or "").strip()
+                if not nm or nm.lower() in existing:
+                    continue
+                cmp_vendors.append(v)
+                existing.add(nm.lower())
+                added_p += 1
+            if added_p:
+                logger.info("Pasted-Tabellen-Merge: +%d Vendors (total: %d)",
+                            added_p, len(cmp_vendors))
+    except Exception as e:
+        logger.warning("VVT vendor extraction skipped: %s", e)
+
+    state["cmp_vendors"] = cmp_vendors
+    state["cookie_payloads"] = cookie_payloads
+    state["cookie_text"] = cookie_text
+    state["cookie_evidence_slices"] = cookie_evidence_slices
+    state["cookie_evidence_meta"] = cookie_evidence_meta
diff --git a/backend-compliance/compliance/api/agent_check/_phase_d2_vendors_finalize.py b/backend-compliance/compliance/api/agent_check/_phase_d2_vendors_finalize.py
new file mode 100644
index 00000000..344b89a8
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_d2_vendors_finalize.py
@@ -0,0 +1,250 @@
+"""Phase D-2 — Vendor finalize: enrich + normalize + library fallback.
+
+Covers (in the original Step 5 of `_run_compliance_check`):
+  - Cookie-Library-Fallback (P52 Lite) — when < 20 vendors but many
+    after-accept cookies, resolve via library
+  - Vendor-Normalizer (Google-Familie dedup, garbage filter)
+  - Detail-modal enrichment from Phase G (P50) + TDM-opt-out sentinel
+  - Cookie-Behavior-Validator (P59b) — 3-Tier severity findings
+  - Implicit cookies detection (P61) — GTM brings GA/GCL/DoubleClick
+  - validate_vendor_urls + score_vendors + cookie-function classify
+  - Vendor-Redundanz (O4) + EU-Alternativen + Cost/Savings
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+
+logger = logging.getLogger(__name__)
+
+
+async def run_phase_d2(state: dict) -> None:
+    """Vendor finalize stages + redundancy. Mutates state in place."""
+    cmp_vendors = state["cmp_vendors"]
+    cookie_text = state.get("cookie_text", "")
+    banner_result = state["banner_result"]
+    banner_url = state["banner_url"]
+    profile = state["profile"]
+    business_scope = state["business_scope"]
+
+    tdm_opt_out_notice = ""
+    cookie_behavior_findings: list[dict] = []
+    redundancy_report = None
+
+    try:
+        from compliance.services.cookie_link_validator import (
+            score_vendors, validate_vendor_urls,
+        )
+
+        # Cookie-Library-Fallback (P52 Lite): wenn weiterhin wenige
+        # Vendors aber viele after_accept-Cookies, aus Library auflösen.
+        # VW-Lehre: 6 LLM-Grob-Vendors reichen NICHT — die Library
+        # holt 30+ weitere aus den Cookie-Namen + Cookie-Doc-Pattern.
+        # Schwelle: immer probieren wenn < 20 Vendors.
+        if banner_result and len(cmp_vendors) < 20:
+            try:
+                from compliance.services.cookie_to_vendor_fallback import (
+                    fallback_vendors_for_run,
+                )
+                from database import SessionLocal as _SLfb
+                _fb_db = _SLfb()
+                try:
+                    extra = fallback_vendors_for_run(
+                        _fb_db, banner_result, len(cmp_vendors),
+                        cookie_doc_text=cookie_text,
+                    )
+                    if extra:
+                        existing_names = {(v.get("name") or "").strip().lower()
+                                          for v in cmp_vendors}
+                        for v in extra:
+                            if v["name"].lower() in existing_names:
+                                continue
+                            cmp_vendors.append(v)
+                        logger.info(
+                            "Cookie-Library-Fallback: cmp_vendors %d -> %d",
+                            len(cmp_vendors) - len(extra), len(cmp_vendors),
+                        )
+                finally:
+                    _fb_db.close()
+            except Exception as e:
+                logger.warning("Cookie-Library-Fallback skipped: %s", e)
+
+        # Vendor-Normalizer: Dedup (Google-Familie etc) + Garbage-Filter
+        try:
+            from compliance.services.vendor_normalizer import (
+                normalize_vendors as _norm_v,
+            )
+            cmp_vendors = _norm_v(cmp_vendors)
+        except Exception as e:
+            logger.warning("vendor_normalizer skipped: %s", e)
+
+        # P50: enrich vendors with per-vendor detail-modal-extracts
+        if cmp_vendors and banner_result:
+            vendor_details = banner_result.get("vendor_details") or []
+            # P50f: filter out TDM-opt-out sentinel
+            tdm_sentinel = next((v for v in vendor_details
+                                 if v.get("name") == "__TDM_OPTOUT__"), None)
+            if tdm_sentinel:
+                tdm_opt_out_notice = tdm_sentinel.get("description", "")
+                logger.info("P50f: TDM opt-out — skipped detail-enrichment for vendors")
+                vendor_details = [v for v in vendor_details
+                                  if v.get("name") != "__TDM_OPTOUT__"]
+            if vendor_details:
+                details_by_name = {}
+                for d in vendor_details:
+                    n = (d.get("name") or "").strip().lower()
+                    if n:
+                        details_by_name[n] = d
+                enriched = 0
+                for v in cmp_vendors:
+                    key = (v.get("name") or "").strip().lower()
+                    d = details_by_name.get(key)
+                    if not d:
+                        for dn, dv in details_by_name.items():
+                            if key in dn or dn in key:
+                                d = dv
+                                break
+                    if not d:
+                        continue
+                    if not v.get("country") and (d.get("processing_company") or d.get("address")):
+                        addr = d.get("address", "")
+                        if re.search(r"\b(deutschland|germany|berlin|m(?:ue|ü)nchen|hamburg|stuttgart)\b", addr, re.I):
+                            v["country"] = "DE"
+                        elif re.search(r"\bireland|irland|dublin\b", addr, re.I):
+                            v["country"] = "IE"
+                        elif re.search(r"\busa|united states|california|new york|delaware\b", addr, re.I):
+                            v["country"] = "US"
+                    if not v.get("purpose"):
+                        v["purpose"] = d.get("description", "")[:500]
+                    if not v.get("opt_out_url"):
+                        v["opt_out_url"] = d.get("opt_out_url", "")
+                    if not v.get("privacy_policy_url"):
+                        v["privacy_policy_url"] = d.get("privacy_url", "")
+                    if not v.get("cookies"):
+                        v["cookies"] = d.get("cookies", [])
+                    v["purposes"] = d.get("purposes", [])
+                    v["technologies"] = d.get("technologies", [])
+                    if not v.get("persistence"):
+                        v["persistence"] = d.get("retention", "")
+                    v["processing_company"] = d.get("processing_company", "")
+                    v["address"] = d.get("address", "")
+                    enriched += 1
+                logger.info("P50: enriched %d/%d vendors with detail-modal data",
+                            enriched, len(cmp_vendors))
+
+        # P59b: Cookie-Behavior-Validator
+        if banner_result:
+            cookies_detailed = banner_result.get("cookies_detailed") or []
+            if cookies_detailed:
+                cb_session = None
+                try:
+                    from database import SessionLocal
+                    from compliance.services.cookie_behavior_validator import (
+                        validate_cookie_behavior,
+                    )
+                    from urllib.parse import urlparse
+                    fp_domain = ""
+                    if banner_url:
+                        fp_domain = urlparse(banner_url).netloc.replace("www.", "")
+                    cb_session = SessionLocal()
+                    cookie_behavior_findings = validate_cookie_behavior(
+                        cb_session, cookies_detailed,
+                        network_requests=[],  # TODO Layer B in P59d
+                        first_party_domain=fp_domain,
+                    )
+                    if cookie_behavior_findings:
+                        sevs = {f["severity"] for f in cookie_behavior_findings}
+                        logger.info(
+                            "P59b: Cookie-Behavior-Check %d findings (severities: %s) "
+                            "ueber %d Cookies",
+                            len(cookie_behavior_findings),
+                            sorted(sevs), len(cookies_detailed),
+                        )
+                        banner_result["cookie_behavior_findings"] = (
+                            cookie_behavior_findings
+                        )
+                    else:
+                        logger.info(
+                            "P59b: Cookie-Behavior-Check 0 findings ueber %d Cookies "
+                            "(library miss / clean)", len(cookies_detailed),
+                        )
+                except Exception as cb_err:
+                    logger.warning("P59b Cookie-Behavior-Check failed: %s", cb_err)
+                finally:
+                    if cb_session is not None:
+                        try:
+                            cb_session.close()
+                        except Exception:
+                            pass
+
+        # P61: "Untergeschobene Cookies"
+        if banner_result and cmp_vendors:
+            try:
+                from compliance.services.vendor_package_cookies import (
+                    detect_implicit_cookies,
+                )
+                declared = [v.get("name", "") for v in cmp_vendors if v.get("name")]
+                actual_cookies: list[str] = []
+                for phase_data in (banner_result.get("phases") or {}).values():
+                    if isinstance(phase_data, dict):
+                        for ck in (phase_data.get("cookies") or []):
+                            if isinstance(ck, dict) and ck.get("name"):
+                                actual_cookies.append(ck["name"])
+                implicit_findings = detect_implicit_cookies(
+                    declared, actual_cookies_set=actual_cookies or None,
+                )
+                if implicit_findings:
+                    banner_result["implicit_vendor_findings"] = implicit_findings
+                    logger.info(
+                        "P61: %d implicit vendor-package items detected "
+                        "(%d cookies + %d vendors)",
+                        len(implicit_findings),
+                        sum(1 for f in implicit_findings if f["implicit"]["type"] == "cookie"),
+                        sum(1 for f in implicit_findings if f["implicit"]["type"] == "vendor"),
+                    )
+            except Exception as p61_err:
+                logger.warning("P61 implicit-vendor detection failed: %s", p61_err)
+
+        if cmp_vendors:
+            logger.info("VVT: %d vendors extracted, validating links",
+                        len(cmp_vendors))
+            cmp_vendors = await validate_vendor_urls(cmp_vendors)
+            cmp_vendors = score_vendors(cmp_vendors)
+            try:
+                from compliance.services.cookie_function_classifier import (
+                    annotate_vendor_cookies,
+                )
+                cmp_vendors = [annotate_vendor_cookies(v) for v in cmp_vendors]
+            except Exception as e:
+                logger.warning("Cookie function classification skipped: %s", e)
+    except Exception as e:
+        logger.warning("VVT vendor finalize skipped: %s", e)
+
+    # Vendor-Redundanz + EU-Alternativen + Cost/Savings (O4)
+    try:
+        from compliance.services.vendor_cost_estimator import infer_company_tier
+        from compliance.services.vendor_redundancy import (
+            analyze as analyze_redundancy,
+        )
+        if cmp_vendors:
+            bp_dict = {
+                "type": getattr(profile, "business_type", ""),
+                "features": list(business_scope),
+            }
+            ctier = infer_company_tier(bp_dict)
+            redundancy_report = analyze_redundancy(cmp_vendors, company_tier=ctier)
+            logger.info(
+                "Redundanz: %d Kategorien mit Mehrfach-Anbietern, "
+                "Spar-Schaetzung %s pro Jahr (company_tier=%s)",
+                redundancy_report["summary"]["redundancy_count"],
+                redundancy_report["summary"]["estimated_saving_pct"],
+                ctier,
+            )
+    except Exception as e:
+        logger.warning("Vendor redundancy analysis skipped: %s", e)
+
+    state["cmp_vendors"] = cmp_vendors
+    state["tdm_opt_out_notice"] = tdm_opt_out_notice
+    state["cookie_behavior_findings"] = cookie_behavior_findings
+    state["redundancy_report"] = redundancy_report
diff --git a/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_bot.py b/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_bot.py
new file mode 100644
index 00000000..49f6ac6d
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_bot.py
@@ -0,0 +1,220 @@
+"""Phase D-3-Bot — Bottom HTML blocks + final composition.
+
+Covers (in the original Step 5):
+  - P71 JC-vs-AVV Entscheidungsbaum (only when DSE ambig)
+  - P6/P53/P55 Branchen-Kontext + Site-History
+  - P106 Internal-Checks-Block
+  - P85 Banner-Screenshot
+  - A Audit-Quality-Checks (Banner-Detect-Failure, vendor-extract dünn)
+  - P82 GF-1-Pager
+  - Doc-Input-Warnings (User text in falsches Feld gepastet)
+  - P86 Branchen-Benchmark
+  - P84 Diff-Mode (since-last-run delta)
+  - Final HTML composition
+
+NOTE: in the original code `audit_quality_findings` was used by
+build_gf_one_pager_html BEFORE it was initialised — a silent
+UnboundLocalError caught by the surrounding try/except, so the
+gf_one_pager block effectively never rendered. Here we run
+audit-quality FIRST so the data is actually available.
+"""
+
+from __future__ import annotations
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+async def run_phase_d3_bot(state: dict) -> None:
+    """Bottom blocks + assemble full_html. Mutates state in place."""
+    check_id = state["check_id"]
+    req = state["req"]
+    doc_entries = state["doc_entries"]
+    doc_texts = state["doc_texts"]
+    banner_result = state["banner_result"]
+    cmp_vendors = state["cmp_vendors"]
+    mc_split = state["mc_split"]
+    scorecard = state["scorecard"]
+    prev_scorecard = state.get("prev_scorecard")
+    mismatches = state.get("mismatches") or []
+    site_name_for_exec = state.get("site_name_for_exec", "")
+    domain_for_exec = state.get("domain_for_exec")
+    html_blocks = state["html_blocks"]
+
+    # P71: JC-vs-AVV Entscheidungsbaum
+    jc_decision_html = ""
+    try:
+        from compliance.services.jc_avv_decision import (
+            build_jc_avv_decision_html,
+        )
+        jc_decision_html = build_jc_avv_decision_html(doc_texts.get("dse"))
+    except Exception as e:
+        logger.warning("P71 jc_avv_decision skipped: %s", e)
+
+    # P6/P53/P55 — Branchen-Kontext + Site-History
+    industry_ctx_html = ""
+    try:
+        from compliance.services.industry_library import (
+            build_industry_context_block_html, load_site_profile,
+        )
+        from database import SessionLocal as _SLib
+        _ind_db = _SLib()
+        try:
+            ind = (req.scan_context or {}).get("industry") if req.scan_context else None
+            site_prof = load_site_profile(_ind_db, domain_for_exec or "")
+            industry_ctx_html = build_industry_context_block_html(ind, site_prof)
+        finally:
+            _ind_db.close()
+    except Exception as e:
+        logger.warning("industry context skipped: %s", e)
+
+    # P106 — Internal-Checks-Block
+    internal_checks_html = ""
+    try:
+        from compliance.services.mc_audit_type import (
+            build_internal_checks_block_html,
+        )
+        ic = (mc_split or {}).get("internal_checks") or []
+        if ic:
+            internal_checks_html = build_internal_checks_block_html(ic)
+            logger.info("P106: %d interne Checks (statt FAIL) im Block",
+                        len(ic))
+    except Exception as e:
+        logger.warning("P106 internal_checks_html skipped: %s", e)
+
+    # P85 — Banner-Screenshot
+    banner_shot_html = ""
+    try:
+        from compliance.services.banner_screenshot_block import (
+            build_banner_screenshot_html,
+        )
+        banner_shot_html = build_banner_screenshot_html(banner_result)
+    except Exception as e:
+        logger.warning("P85 banner-screenshot skipped: %s", e)
+
+    # A — Audit-Quality-Checks (run BEFORE gf_one_pager so the data is
+    # available — original code had this inverted, causing
+    # UnboundLocalError silently caught).
+    audit_quality_html = ""
+    audit_quality_findings: list[dict] = []
+    try:
+        from compliance.services.audit_quality_checks import (
+            build_audit_quality_block_html, run_all as run_audit_quality,
+        )
+        cookie_text_for_aq = doc_texts.get("cookie") or ""
+        audit_quality_findings = run_audit_quality(
+            banner_result, cookie_text_for_aq, cmp_vendors, doc_entries,
+        )
+        if audit_quality_findings:
+            audit_quality_html = build_audit_quality_block_html(audit_quality_findings)
+            logger.info("audit-quality: %d Vorbehalte erkannt",
+                        len(audit_quality_findings))
+    except Exception as e:
+        logger.warning("audit-quality-checks failed: %s", e)
+
+    # P82: GF-1-Pager (now has the audit_quality_findings filled)
+    gf_one_pager_html = ""
+    try:
+        from compliance.services.gf_one_pager import build_gf_one_pager_html
+        gf_one_pager_html = build_gf_one_pager_html(
+            site_name=site_name_for_exec,
+            scorecard=scorecard,
+            previous_scorecard=prev_scorecard,
+            banner_result=banner_result,
+            library_mismatch_findings=mismatches,
+            scan_context=req.scan_context,
+            audit_quality_findings=audit_quality_findings,
+        )
+    except Exception as e:
+        logger.warning("P82 GF-1-pager skipped: %s", e)
+
+    # Doc-Input-Warnings — wenn User Text ins falsche Feld gepastet hat
+    input_warn_html = ""
+    try:
+        from compliance.services.doc_input_warnings import (
+            build_warnings_block_html, collect_warnings,
+        )
+        warns = collect_warnings(doc_entries)
+        if warns:
+            input_warn_html = build_warnings_block_html(warns)
+            logger.info("doc-input-warnings: %d Mismatches gefunden", len(warns))
+    except Exception as e:
+        logger.warning("doc-input-warnings skipped: %s", e)
+
+    # P86: Branchen-Benchmark
+    bench_html = ""
+    try:
+        from compliance.services.industry_benchmark import (
+            _extract_score, build_benchmark_html, compute_benchmark,
+        )
+        from database import SessionLocal as _SLb
+        industry = (req.scan_context or {}).get("industry") if req.scan_context else None
+        curr_score = _extract_score(banner_result)
+        if industry and curr_score is not None:
+            _b_db = _SLb()
+            try:
+                bench = compute_benchmark(
+                    _b_db, industry, curr_score, check_id,
+                )
+                if bench:
+                    bench_html = build_benchmark_html(bench)
+            finally:
+                _b_db.close()
+    except Exception as e:
+        logger.warning("P86 industry-benchmark skipped: %s", e)
+
+    # P84: Diff-Mode
+    diff_html = ""
+    try:
+        from compliance.services.run_diff import (
+            build_diff_block_html, compute_diff,
+        )
+        from database import SessionLocal as _SL
+        _diff_db = _SL()
+        try:
+            diff = compute_diff(
+                _diff_db, check_id, domain_for_exec or "",
+                banner_result, scorecard,
+            )
+            if diff:
+                diff_html = build_diff_block_html(diff)
+        finally:
+            _diff_db.close()
+    except Exception as e:
+        logger.warning("P84 diff-mode skipped: %s", e)
+
+    # B1 / B3 cross-cutting findings (own renderers, may be empty).
+    reachability_html = state.get("reachability_html", "")
+    retention_html = state.get("retention_html", "")
+
+    # Reihenfolge — Sales-optimiert.
+    # B1 (Reachability) sits next to critical because it's an Art.7-Abs.3
+    # finding. B3 (Retention) sits next to cookie_audit because both
+    # are 3-source comparisons of cookie metadata.
+    full_html = (
+        gf_one_pager_html + audit_quality_html + input_warn_html
+        + bench_html + diff_html
+        + html_blocks["critical_html"] + reachability_html
+        + html_blocks["scope_disclaimer_html"]
+        + html_blocks["exec_summary_html"]
+        + html_blocks["cookie_arch_html"] + html_blocks["summary_html"]
+        + html_blocks["scanned_html"] + html_blocks["profile_html"]
+        + html_blocks["scorecard_html"] + internal_checks_html
+        + html_blocks["redundancy_html"]
+        + industry_ctx_html
+        + banner_shot_html
+        + html_blocks["providers_html"] + html_blocks["banner_deep_html"]
+        + html_blocks["cookie_audit_html"] + retention_html
+        + html_blocks["tcf_authority_html"]
+        + html_blocks["entropy_html"]
+        + html_blocks["network_trace_html"]
+        + html_blocks["library_mismatch_html"]
+        + html_blocks["consistency_html"] + html_blocks["signals_html"]
+        + html_blocks["solutions_html"]
+        + jc_decision_html
+        + html_blocks["vvt_html"] + html_blocks["report_html"]
+    )
+
+    state["audit_quality_findings"] = audit_quality_findings
+    state["full_html"] = full_html
diff --git a/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_mid.py b/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_mid.py
new file mode 100644
index 00000000..b080a5c5
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_mid.py
@@ -0,0 +1,221 @@
+"""Phase D-3-Mid — Mid HTML blocks (P62/P103/P104/P105/audit/mismatch/signals).
+
+Covers (in the original Step 5):
+  - P62 Scope-Disclaimer
+  - P103 Cookie-Value-Entropy + P104 Network-Tracing
+  - P105 IAB TCF Authority cross-reference
+  - Cookie-Compliance-Audit (3-Quellen-Vergleich, central USP)
+  - P102 Cookie-Klassifikations-Pruefung (library mismatch)
+  - P35/P77/P78 Doc-Text signals
+  - P92/P94 Banner-Konsistenz
+  - P73 MC-Solution-Generator (LLM suggestions per HIGH-Fail)
+"""
+
+from __future__ import annotations
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+async def run_phase_d3_mid(state: dict) -> None:
+    """Mid HTML blocks. Mutates state in place."""
+    doc_entries = state["doc_entries"]
+    doc_texts = state["doc_texts"]
+    banner_result = state["banner_result"]
+    cmp_vendors = state["cmp_vendors"]
+    fails_by_doc = state["fails_by_doc"]
+    html_blocks = state["html_blocks"]
+
+    # P62: Marketing-Manager-Disclaimer
+    scope_disclaimer_html = ""
+    try:
+        from ..scope_disclaimer import build_scope_disclaimer_html
+        scope_disclaimer_html = build_scope_disclaimer_html()
+    except Exception as e:
+        logger.warning("Scope-disclaimer block skipped: %s", e)
+
+    # P103 + P104 — Cookie-Value-Entropy + Network-Tracing
+    entropy_html = ""
+    network_trace_html = ""
+    try:
+        from compliance.services.cookie_network_tracer import (
+            build_network_trace_block_html,
+            trace_cookie_network,
+        )
+        from compliance.services.cookie_value_entropy import (
+            build_entropy_block_html,
+            check_cookies_for_entropy_mismatch,
+        )
+        cookies_detailed = (banner_result or {}).get("cookies_detailed") or []
+        entropy_findings = check_cookies_for_entropy_mismatch(cookies_detailed)
+        if entropy_findings:
+            entropy_html = build_entropy_block_html(entropy_findings)
+            logger.info("P103 Entropy: %d Findings", len(entropy_findings))
+        primary_url = ""
+        for e_ in doc_entries:
+            if e_.get("url"):
+                primary_url = e_["url"]; break
+        net_findings = trace_cookie_network(cookies_detailed, primary_url)
+        if net_findings:
+            network_trace_html = build_network_trace_block_html(net_findings)
+            logger.info("P104 Network-Trace: %d Findings", len(net_findings))
+    except Exception as e:
+        logger.warning("P103/P104 entropy/network-trace skipped: %s", e)
+
+    # P105 — IAB TCF Authority-Cross-Reference
+    tcf_authority_html = ""
+    try:
+        from compliance.services.tcf_vendor_authority import (
+            build_tcf_authority_block_html, cross_reference_with_tcf,
+        )
+        from database import SessionLocal as _SLtcf
+        _tcf_db = _SLtcf()
+        try:
+            tcf_findings = cross_reference_with_tcf(_tcf_db, cmp_vendors)
+            if tcf_findings:
+                tcf_authority_html = build_tcf_authority_block_html(tcf_findings)
+                logger.info(
+                    "TCF-Authority: %d Vendor-Discrepancies gefunden",
+                    len(tcf_findings),
+                )
+        finally:
+            _tcf_db.close()
+    except Exception as e:
+        logger.warning("TCF-Authority-Check skipped: %s", e)
+
+    # COOKIE-COMPLIANCE-AUDIT (3-Quellen-Vergleich — central USP)
+    cookie_audit: dict = {}
+    cookie_audit_html = ""
+    try:
+        from compliance.services.cookie_compliance_audit import (
+            audit_cookie_compliance, build_cookie_audit_block_html,
+        )
+        from database import SessionLocal as _SLca
+        _ca_db = _SLca()
+        try:
+            cookie_audit = audit_cookie_compliance(
+                _ca_db, doc_texts.get("cookie") or doc_texts.get("dse"),
+                banner_result,
+            )
+            if cookie_audit and (cookie_audit.get("declared_count") or
+                                  cookie_audit.get("browser_count")):
+                cookie_audit_html = build_cookie_audit_block_html(cookie_audit)
+                logger.info(
+                    "Cookie-Audit: %d deklariert, %d im Browser, "
+                    "%d undokumentiert, %d compliant",
+                    cookie_audit.get("declared_count"),
+                    cookie_audit.get("browser_count"),
+                    len(cookie_audit.get("undeclared_in_browser") or []),
+                    len(cookie_audit.get("compliant") or []),
+                )
+        finally:
+            _ca_db.close()
+    except Exception as e:
+        logger.warning("cookie-compliance-audit skipped: %s", e)
+
+    # P102: Cookie-Klassifikations-Pruefung
+    library_mismatch_html = ""
+    mismatches: list[dict] = []
+    try:
+        from compliance.services.cookie_library_mismatch import (
+            build_mismatch_block_html, detect_mismatches,
+        )
+        from database import SessionLocal
+        cookie_doc_for_check = doc_texts.get("cookie") or doc_texts.get("dse") or ""
+        all_cookies_seen: list[str] = []
+        if banner_result:
+            for ph in (banner_result.get("phases") or {}).values():
+                if isinstance(ph, dict):
+                    for ck in (ph.get("cookies") or []):
+                        if isinstance(ck, str):
+                            all_cookies_seen.append(ck)
+                        elif isinstance(ck, dict) and ck.get("name"):
+                            all_cookies_seen.append(ck["name"])
+        if all_cookies_seen and cookie_doc_for_check:
+            _mm_db = SessionLocal()
+            try:
+                mismatches = detect_mismatches(
+                    _mm_db, all_cookies_seen, cookie_doc_for_check,
+                )
+                if mismatches:
+                    library_mismatch_html = build_mismatch_block_html(mismatches)
+                    logger.info(
+                        "P102: %d Cookie-Mismatches gefunden", len(mismatches),
+                    )
+            finally:
+                _mm_db.close()
+    except Exception as e:
+        logger.warning("P102 mismatch detection failed: %s", e)
+
+    # P35 + P77 + P78: Textsignal-Checks
+    signals_html = ""
+    try:
+        from compliance.services.doc_text_signals import (
+            build_signals_block_html, run_all as run_signal_checks,
+        )
+        cookie_doc_missing = not bool(doc_texts.get("cookie"))
+        sig_findings = run_signal_checks(
+            banner_result, doc_texts, cookie_doc_missing,
+        )
+        if sig_findings:
+            signals_html = build_signals_block_html(sig_findings)
+    except Exception as e:
+        logger.warning("P35/P77/P78 signals-check failed: %s", e)
+
+    # P92 + P94: Banner-Konsistenz
+    consistency_html = ""
+    try:
+        from compliance.services.banner_consistency_checks import (
+            build_consistency_block_html, run_all as run_consistency_checks,
+        )
+        cookie_doc_for_check = (doc_texts.get("cookie")
+                                or doc_texts.get("dse") or "")
+        cons_findings = run_consistency_checks(
+            banner_result or {}, cookie_doc_for_check, cmp_vendors,
+            doc_texts=doc_texts,
+        )
+        if cons_findings:
+            consistency_html = build_consistency_block_html(cons_findings)
+            logger.info("P92/P94: %d Konsistenz-Findings", len(cons_findings))
+    except Exception as e:
+        logger.warning("P92/P94 consistency-check failed: %s", e)
+
+    # P73: MC-Solution-Generator — LLM-Vorschlaege pro HIGH-Fail
+    solutions_html = ""
+    try:
+        from compliance.services.mc_solution_generator import (
+            build_solutions_block_html, generate_solutions_for_fails,
+        )
+        all_solutions: list[dict] = []
+        for dt, fails in fails_by_doc.items():
+            if not fails:
+                continue
+            doc_txt = doc_texts.get(dt) or doc_texts.get("dse") or ""
+            if not doc_txt or len(doc_txt) < 500:
+                continue
+            sols = await generate_solutions_for_fails(
+                fails, doc_txt, dt, limit=3,
+            )
+            all_solutions.extend(sols)
+            if len(all_solutions) >= 8:
+                break
+        if all_solutions:
+            solutions_html = build_solutions_block_html(all_solutions[:8])
+            logger.info("P73: %d MC-Solutions generiert", len(all_solutions))
+    except Exception as e:
+        logger.warning("P73 MC-Solution-Generator skipped: %s", e)
+
+    html_blocks.update({
+        "scope_disclaimer_html": scope_disclaimer_html,
+        "entropy_html": entropy_html,
+        "network_trace_html": network_trace_html,
+        "tcf_authority_html": tcf_authority_html,
+        "cookie_audit_html": cookie_audit_html,
+        "library_mismatch_html": library_mismatch_html,
+        "signals_html": signals_html,
+        "consistency_html": consistency_html,
+        "solutions_html": solutions_html,
+    })
+    state["cookie_audit"] = cookie_audit
+    state["mismatches"] = mismatches
diff --git a/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_top.py b/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_top.py
new file mode 100644
index 00000000..bf4e5840
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_d3_blocks_top.py
@@ -0,0 +1,198 @@
+"""Phase D-3-Top — Top-of-mail HTML blocks.
+
+Covers (in the original Step 5 of `_run_compliance_check`):
+  - Summary / Scanned-URLs / Provider-list / Banner-deep / VVT HTML
+  - MC-scorecard aggregation (all_mc_checks + scorecard) + trend lookup
+  - P106 mc_audit_type split (internal_checks vs. verifiable_fails)
+  - Profile HTML / Redundancy HTML
+  - P1 Executive Summary
+  - P18 Critical Findings block
+  - P10 Cookie-Policy-Architecture detection
+"""
+
+from __future__ import annotations
+
+import logging
+
+from ._helpers import _build_profile_html, _company_name_from_url, _extract_domain
+
+logger = logging.getLogger(__name__)
+
+
+async def run_phase_d3_top(state: dict) -> None:
+    """Top-of-mail HTML blocks. Mutates state in place."""
+    req = state["req"]
+    results = state["results"]
+    doc_entries = state["doc_entries"]
+    doc_texts = state["doc_texts"]
+    banner_result = state["banner_result"]
+    vvt_entries = state["vvt_entries"]
+    cmp_vendors = state["cmp_vendors"]
+    profile = state["profile"]
+    redundancy_report = state.get("redundancy_report")
+
+    from ..agent_doc_check_banner import build_banner_deep_html
+    from ..agent_doc_check_critical import build_critical_findings_html
+    from ..agent_doc_check_exec_summary import build_exec_summary_html
+    from ..agent_doc_check_extras import build_vvt_table_html
+    from ..agent_doc_check_redundancy import build_redundancy_html
+    from ..agent_doc_check_report import (
+        build_html_report,
+        build_management_summary,
+        build_provider_list_html,
+        build_scanned_urls_html,
+    )
+    from ..agent_doc_check_scorecard import build_scorecard_html
+    from compliance.services.mc_scorecard import build_scorecard
+
+    summary_html = build_management_summary(results)
+    scanned_html = build_scanned_urls_html(doc_entries)
+    providers_html = build_provider_list_html(banner_result, vvt_entries)
+    # P18: Deep-Block mit Phases + Quality-Score + Per-Category-Tracker
+    banner_deep_html = build_banner_deep_html(banner_result)
+    vvt_html = build_vvt_table_html(cmp_vendors)
+
+    # MC scorecard aggregated across ALL docs (DSGVO/TDDDG/BGB/...)
+    all_mc_checks: list[dict] = []
+    fails_by_doc: dict[str, list[dict]] = {}
+    for r in results:
+        for c in r.checks:
+            if c.id.startswith("mc-"):
+                rec = {
+                    "id": c.id, "label": c.label, "passed": c.passed,
+                    "severity": c.severity, "skipped": c.skipped,
+                    "regulation": c.regulation,
+                    "hint": getattr(c, "hint", "") or "",
+                }
+                all_mc_checks.append(rec)
+                if (not c.passed and not c.skipped
+                        and (c.severity or "").upper() in ("CRITICAL", "HIGH")):
+                    fails_by_doc.setdefault(r.doc_type, []).append(rec)
+    # P106 — Audit-Type-Klassifizierung pro MC
+    mc_split: dict = {"internal_checks": [], "verifiable_fails": all_mc_checks}
+    try:
+        from compliance.services.mc_audit_type import (
+            annotate_mc_results, split_by_audit_type,
+        )
+        annotate_mc_results(all_mc_checks)
+        mc_split = split_by_audit_type(all_mc_checks)
+        fails_by_doc = {}
+        for r in mc_split.get("verifiable_fails") or []:
+            fails_by_doc.setdefault("dse", []).append(r)
+    except Exception as e:
+        logger.warning("P106 mc_audit_type skipped: %s", e)
+    scorecard = build_scorecard(all_mc_checks) if all_mc_checks else {}
+
+    # Trend: load previous scorecard for the same tenant + domain
+    prev_scorecard: dict | None = None
+    if scorecard:
+        try:
+            from compliance.services.compliance_audit_log import (
+                list_runs_for_tenant,
+            )
+            tenant_id_for_trend = req.recipient or ""
+            base_domain_for_trend = _extract_domain(doc_entries) or ""
+            prev_runs = list_runs_for_tenant(
+                tenant_id_for_trend,
+                base_domain=base_domain_for_trend,
+                limit=1,
+            )
+            if prev_runs:
+                prev_scorecard = prev_runs[0].get("scorecard")
+        except Exception as e:
+            logger.debug("trend lookup skipped: %s", e)
+    scorecard_html = (
+        build_scorecard_html(scorecard, previous_scorecard=prev_scorecard)
+        if scorecard else ""
+    )
+
+    report_html = build_html_report(results, None, doc_texts)
+    profile_html = _build_profile_html(profile)
+
+    # O4: Vendor-Redundanz / EU-Alternativen + Cost-Savings-Block
+    redundancy_html = build_redundancy_html(redundancy_report)
+
+    # P1: Executive-Summary
+    url_company_for_exec = _company_name_from_url(doc_entries)
+    domain_for_exec = _extract_domain(doc_entries)
+    site_name_for_exec = url_company_for_exec or domain_for_exec or ""
+    exec_summary_html = build_exec_summary_html(
+        scorecard=scorecard,
+        previous_scorecard=prev_scorecard,
+        cmp_vendors=cmp_vendors,
+        redundancy_report=redundancy_report,
+        site_name=site_name_for_exec,
+    )
+
+    # P18: Critical-Findings-Block
+    critical_html = ""
+    try:
+        critical_html = build_critical_findings_html(
+            banner_result=banner_result,
+            scorecard=scorecard,
+            results=results,
+        )
+    except Exception as e:
+        logger.warning("Critical-findings block skipped: %s", e)
+
+    # P10: Cookie-Policy-Architecture-Detection (BMW-Pattern erkennen)
+    cookie_arch_html = ""
+    try:
+        from compliance.services.cookie_policy_architecture import (
+            build_architecture_html,
+            detect_architecture,
+        )
+        cookie_doc_url = ""
+        cookie_doc_text = doc_texts.get("cookie", "")
+        cookie_cmp_payloads: list[dict] = []
+        for e in doc_entries:
+            if (e.get("doc_type") or "").lower() in ("cookie", "cookie_policy"):
+                cookie_doc_url = e.get("url", "")
+                cookie_cmp_payloads = e.get("cmp_payloads") or []
+                break
+        # P17-A: Fallback wenn Cookie-Doc via P15 deduped wurde
+        if not cookie_doc_text:
+            dse_text = doc_texts.get("dse", "")
+            if dse_text and any(w in dse_text.lower() for w in
+                                 ("cookie", "tracking", "google analytics",
+                                  "consent")):
+                cookie_doc_text = dse_text
+                dse_entry = next((e for e in doc_entries
+                                  if e.get("doc_type") == "dse"), {})
+                cookie_doc_url = dse_entry.get("url", "")
+                cookie_cmp_payloads = dse_entry.get("cmp_payloads") or []
+                logger.info("P17-A: cookie-arch fallback auf DSE")
+        if cookie_doc_text:
+            arch = detect_architecture(
+                doc_url=cookie_doc_url,
+                doc_text=cookie_doc_text,
+                cmp_payloads=cookie_cmp_payloads,
+                homepage_cmp_payloads=state.get("cookie_payloads") or [],
+            )
+            cookie_arch_html = build_architecture_html(arch)
+            logger.info("cookie-arch: layer=%s versioned=%s risk=%s",
+                        arch["layer_separation"], arch["versioned"],
+                        arch["risk_label"])
+    except Exception as e:
+        logger.warning("cookie-architecture detection failed: %s", e)
+
+    state["scorecard"] = scorecard
+    state["prev_scorecard"] = prev_scorecard
+    state["mc_split"] = mc_split
+    state["fails_by_doc"] = fails_by_doc
+    state["site_name_for_exec"] = site_name_for_exec
+    state["domain_for_exec"] = domain_for_exec
+    state["html_blocks"] = {
+        "summary_html": summary_html,
+        "scanned_html": scanned_html,
+        "providers_html": providers_html,
+        "banner_deep_html": banner_deep_html,
+        "vvt_html": vvt_html,
+        "scorecard_html": scorecard_html,
+        "report_html": report_html,
+        "profile_html": profile_html,
+        "redundancy_html": redundancy_html,
+        "exec_summary_html": exec_summary_html,
+        "critical_html": critical_html,
+        "cookie_arch_html": cookie_arch_html,
+    }
diff --git a/backend-compliance/compliance/api/agent_check/_phase_e_email.py b/backend-compliance/compliance/api/agent_check/_phase_e_email.py
new file mode 100644
index 00000000..466d239a
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_e_email.py
@@ -0,0 +1,75 @@
+"""Phase E — Send compliance-check email, with A1 ZIP-Anhang.
+
+Original Step 6 of `_run_compliance_check`, extended with the A1
+attachment: when the Tesseract pipeline captured evidence slices,
+bundle them into evidence-{check_id}.zip (manifest.json +
+audit_metadata.json + slice_NNN.png) and attach to the e-mail. The
+attachment makes the evidence chain portable so a DSB / lawyer can
+hand it to an external auditor or supervisory authority.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from compliance.services.smtp_sender import send_email
+
+from ._helpers import _company_name_from_url, _extract_domain, _update
+
+logger = logging.getLogger(__name__)
+
+
+def run_phase_e(state: dict) -> None:
+    """Build site label, optional ZIP attachment, send mail. Mutate state."""
+    check_id = state["check_id"]
+    req = state["req"]
+    results = state["results"]
+    doc_entries = state["doc_entries"]
+    full_html = state["full_html"]
+    cookie_evidence_slices = state.get("cookie_evidence_slices")
+    cookie_evidence_meta = state.get("cookie_evidence_meta")
+
+    # Derive site name primarily from entered URL.
+    # The extracted_profile.companyName is often noisy (e.g. picks up
+    # juris.de from legal references). Domain-derived name is more
+    # predictable for the GF email subject.
+    doc_count = len([r for r in results if not r.error])
+    url_company = _company_name_from_url(doc_entries)
+    domain = _extract_domain(doc_entries)
+    site_name = url_company or domain or "Unbekannt"
+    _update(check_id, "E-Mail wird versendet...", 98)
+
+    # A1: bundle cookie-evidence slices into a ZIP attachment so the
+    # audit chain reaches the recipient. Each slice has its own
+    # SHA-256 + capture timestamp; manifest.json + audit_metadata.json
+    # make the chain verifiable for an external auditor.
+    evidence_attachments: list[dict] = []
+    if cookie_evidence_slices:
+        try:
+            from compliance.services.evidence_zip_builder import (
+                build_evidence_zip,
+            )
+            zip_bytes = build_evidence_zip(
+                slices=cookie_evidence_slices,
+                meta=cookie_evidence_meta,
+                check_id=check_id,
+            )
+            evidence_attachments.append({
+                "filename": f"evidence-{check_id[:8]}.zip",
+                "data": zip_bytes,
+                "mime": "application/zip",
+            })
+        except Exception as e:
+            logger.warning("A1 evidence-zip build failed: %s", e)
+
+    email_result = send_email(
+        recipient=req.recipient,
+        subject=f"[COMPLIANCE-CHECK] {site_name} — {doc_count} Dokumente geprueft",
+        body_html=full_html,
+        attachments=evidence_attachments or None,
+    )
+
+    state["email_result"] = email_result
+    state["site_name"] = site_name
+    state["domain"] = domain
+    state["doc_count"] = doc_count
diff --git a/backend-compliance/compliance/api/agent_check/_phase_f_persist.py b/backend-compliance/compliance/api/agent_check/_phase_f_persist.py
new file mode 100644
index 00000000..413a9ef6
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_phase_f_persist.py
@@ -0,0 +1,166 @@
+"""Phase F — Build response + persist snapshot/audit-log/unified-findings.
+
+Covers (in the original `_run_compliance_check`):
+  - Step 7   Build response dict, mark job as completed
+  - P80      Persist raw scan data so we can replay the audit pipeline
+             without re-crawling (7min → 5sec test cycle)
+  - SQLite audit log (compliance.api/audit endpoints + trend view A6)
+  - P5       Unified findings (MC + Pflichtangaben + Vendor + Redundanz
+             in one searchable table behind /agent/findings/<id>)
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import datetime, timezone
+
+from ._constants import _compliance_check_jobs
+from ._helpers import _result_to_dict
+
+logger = logging.getLogger(__name__)
+
+
+def run_phase_f(state: dict) -> None:
+    """Build response + persist. Mutates state in place."""
+    check_id = state["check_id"]
+    req = state["req"]
+    results = state["results"]
+    profile = state["profile"]
+    profile_dict = state["profile_dict"]
+    extracted_profile = state["extracted_profile"]
+    banner_result = state["banner_result"]
+    tcf_vendors = state["tcf_vendors"]
+    vvt_entries = state["vvt_entries"]
+    cmp_vendors = state["cmp_vendors"]
+    cookie_audit = state["cookie_audit"]
+    total_findings = state["total_findings"]
+    email_result = state["email_result"]
+    doc_entries = state["doc_entries"]
+    doc_texts = state["doc_texts"]
+    redundancy_report = state.get("redundancy_report")
+    scorecard = state["scorecard"]
+    site_name = state.get("site_name", "")
+    domain = state.get("domain", "")
+    doc_count = state.get("doc_count", 0)
+
+    response = {
+        "check_id": check_id,
+        "results": [_result_to_dict(r) for r in results],
+        "business_profile": profile_dict,
+        "extracted_profile": extracted_profile,
+        # P18: vollen consent-tester-Output durchreichen statt nur 4 Felder.
+        # phases (before/after-accept/reject) + banner_checks.violations +
+        # category_tests werden vom Renderer + Critical-Findings-Block genutzt.
+        "banner_result": ({
+            "detected": banner_result.get("banner_detected", False),
+            "provider": banner_result.get("banner_provider", ""),
+            "violations": len((banner_result.get("banner_checks") or {})
+                              .get("violations", [])),
+            "tcf_vendor_count": len(tcf_vendors),
+            "completeness_pct": banner_result.get("completeness_pct"),
+            "correctness_pct": banner_result.get("correctness_pct"),
+            "phases": banner_result.get("phases", {}),
+            "banner_checks": banner_result.get("banner_checks", {}),
+            "category_tests": banner_result.get("category_tests", []),
+            "structured_checks": banner_result.get("structured_checks", []),
+            "summary": banner_result.get("summary", {}),
+        } if banner_result else None),
+        "tcf_vendors": vvt_entries if tcf_vendors else [],
+        "cmp_vendors": cmp_vendors,
+        "cookie_audit": cookie_audit if cookie_audit else None,
+        "total_documents": len(results),
+        "total_findings": total_findings,
+        "email_status": email_result.get("status", "failed"),
+        "checked_at": datetime.now(timezone.utc).isoformat(),
+    }
+
+    _compliance_check_jobs[check_id]["status"] = "completed"
+    _compliance_check_jobs[check_id]["result"] = response
+    _compliance_check_jobs[check_id]["progress"] = "Fertig"
+    _compliance_check_jobs[check_id]["progress_pct"] = 100
+
+    # P80: persist raw scan data so we can replay audit pipeline
+    # without re-crawling (7min -> 5sec test cycle).
+    try:
+        from database import SessionLocal
+        from compliance.services.check_snapshot import save_snapshot
+        snap_db = SessionLocal()
+        try:
+            save_snapshot(
+                snap_db,
+                check_id=check_id,
+                doc_entries=doc_entries,
+                banner_result=banner_result,
+                profile=profile,
+                cmp_vendors=cmp_vendors,
+                scan_context=req.scan_context,  # P79
+                site_label=site_name,
+                notes=f"recipient={req.recipient}",
+            )
+        finally:
+            snap_db.close()
+    except Exception as snap_err:
+        logger.warning("P80 snapshot save skipped: %s", snap_err)
+
+    # Persist to sidecar SQLite audit log — enables /audit endpoints
+    # (A5 admin tab) and trend view (A6). Best-effort; failures here
+    # do not affect the user-facing response.
+    try:
+        from compliance.services.compliance_audit_log import record_check_run
+        from compliance.services.mc_scorecard import full_audit_records
+        audit_rows: list[dict] = []
+        for r in results:
+            doc_mc = [c for c in r.checks if c.id.startswith("mc-")]
+            audit_rows.extend(full_audit_records(
+                [{"id": c.id, "label": c.label, "passed": c.passed,
+                  "severity": c.severity, "skipped": c.skipped,
+                  "regulation": c.regulation, "matched_text": c.matched_text,
+                  "hint": c.hint, "level": c.level}
+                 for c in doc_mc],
+                check_id=check_id,
+                doc_type=r.doc_type,
+            ))
+        record_check_run(
+            check_id=check_id,
+            tenant_id=req.recipient or "",
+            site_name=site_name,
+            base_domain=domain or "",
+            doc_count=doc_count,
+            scorecard=scorecard,
+            vvt_summary={
+                "total": len(cmp_vendors),
+                "internal": sum(1 for v in cmp_vendors
+                                if (v.get("recipient_type") or "").upper()
+                                in ("INTERNAL", "GROUP_COMPANY")),
+                "external": sum(1 for v in cmp_vendors
+                                if (v.get("recipient_type") or "").upper()
+                                in ("PROCESSOR", "CONTROLLER")),
+            },
+            mc_records=audit_rows,
+        )
+        from compliance.services.compliance_audit_log import record_check_payload
+        record_check_payload(
+            check_id=check_id,
+            vendors=cmp_vendors,
+            profile=extracted_profile,
+            banner=banner_result,
+        )
+        # Unified findings (P5): bundle MC + Pflichtangaben + Vendor +
+        # Redundanz in one searchable table behind /agent/findings/<id>.
+        try:
+            from compliance.services.unified_findings_collector import collect
+            from compliance.services.unified_findings_store import record_findings
+            unified = collect(
+                check_id=check_id,
+                results=results,
+                cmp_vendors=cmp_vendors,
+                redundancy_report=redundancy_report,
+                doc_texts=doc_texts,
+            )
+            record_findings(check_id, unified)
+        except Exception as e:
+            logger.warning("Unified findings collect failed: %s", e)
+    except Exception as e:
+        logger.warning("Audit persistence skipped: %s", e)
+
+    state["response"] = response
diff --git a/backend-compliance/compliance/api/agent_check/_schemas.py b/backend-compliance/compliance/api/agent_check/_schemas.py
new file mode 100644
index 00000000..d4625533
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_schemas.py
@@ -0,0 +1,44 @@
+"""Pydantic request/response schemas for the compliance-check route."""
+
+from __future__ import annotations
+
+from pydantic import BaseModel
+
+
+class ExtractTextRequest(BaseModel):
+    url: str
+
+
+class DocumentInput(BaseModel):
+    doc_type: str  # dse, agb, impressum, cookie, widerruf, avv, loeschkonzept, etc.
+    url: str = ""
+    text: str = ""  # text has priority over URL
+
+
+class ComplianceCheckRequest(BaseModel):
+    documents: list[DocumentInput]
+    use_agent: bool = False
+    recipient: str = "dsb@breakpilot.local"
+    # P12: Override fuer TDM-Vorbehalt bei dokumentierter Kunden-Erlaubnis.
+    # Pflichtfeld tdm_override_reason wenn tdm_override=True
+    # (z.B. "Auftragsbeziehung Safetykon GmbH, Email Hr. X 18.05.2026").
+    tdm_override: bool = False
+    tdm_override_reason: str = ""
+    # P79: 8-Feld Pre-Scan-Wizard (Branche, B2B/B2C, Direkt-Vertrieb,
+    # Rechtsform, Konzern, MA, Besondere Daten, Drittland). Wird im
+    # Snapshot persistiert und filtert die MC-Auswertung (P72).
+    scan_context: dict | None = None
+
+
+class ComplianceCheckStartResponse(BaseModel):
+    check_id: str
+    status: str = "running"
+
+
+class ComplianceCheckStatusResponse(BaseModel):
+    check_id: str
+    status: str
+    progress: str = ""
+    progress_pct: int = 0
+    result: dict | None = None
+    error: str = ""
diff --git a/backend-compliance/compliance/api/agent_check/_single_check.py b/backend-compliance/compliance/api/agent_check/_single_check.py
new file mode 100644
index 00000000..48ced787
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_single_check.py
@@ -0,0 +1,118 @@
+"""Per-document regex + MC + LLM checks for the compliance-check route.
+
+Each document goes through:
+  1. regex completeness/correctness checklist
+  2. Master Control evaluation (all MCs for this doc_type)
+  3. LLM verification of failed regex checks (overturns where evidence
+     was missed by the regex)
+  4. Cookie-only: opt-out + privacy-policy URL health-check
+"""
+
+from __future__ import annotations
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+async def _check_single(
+    text: str, doc_type: str, label: str, url: str,
+    word_count: int, use_agent: bool,
+    business_scope: set[str] | None = None,
+    business_profile: dict | None = None,
+):
+    """Run regex + MC checks on a single document."""
+    from compliance.services.doc_checks.runner import check_document_completeness
+    from compliance.services.rag_document_checker import check_document_with_controls
+    from ..agent_doc_check_routes import CheckItem, DocCheckResult
+
+    # Regex checklist
+    findings = check_document_completeness(text, doc_type, label, url,
+                                           business_profile=business_profile)
+
+    all_checks: list[CheckItem] = []
+    completeness = 0
+    correctness = 0
+
+    for f in findings:
+        if "SCORE" in f.get("code", ""):
+            for c in f.get("all_checks", []):
+                all_checks.append(CheckItem(
+                    id=c["id"], label=c["label"], passed=c["passed"],
+                    severity=c["severity"], matched_text=c.get("matched_text", ""),
+                    level=c.get("level", 1), parent=c.get("parent"),
+                    skipped=c.get("skipped", False), hint=c.get("hint", ""),
+                ))
+            completeness = f.get("completeness_pct", 0)
+            correctness = f.get("correctness_pct", 0)
+
+    # Master Control checks (top 20 by severity to avoid noise)
+    try:
+        # max_controls=0 -> evaluate ALL MCs for this doc_type (DB has
+        # 1874 across 8 types; regex matching is cheap and dominates
+        # well under 1s per doc). Caps remain on the LLM-enrich step
+        # (top-10 FAILs) so cost stays bounded.
+        mc_results = await check_document_with_controls(
+            text, doc_type, label, max_controls=0, use_agent=use_agent,
+            business_scope=business_scope,
+        )
+        if mc_results:
+            for mc in mc_results:
+                all_checks.append(CheckItem(**mc))
+            l2 = [c for c in all_checks if c.level == 2 and not c.skipped]
+            l2_passed = sum(1 for c in l2 if c.passed)
+            correctness = round(l2_passed / len(l2) * 100) if l2 else 0
+    except Exception as e:
+        logger.warning("MC check skipped for %s: %s", label, e)
+
+    # LLM verification of regex fails
+    failed = [c for c in all_checks if not c.passed and not c.skipped and c.hint]
+    if failed:
+        try:
+            from compliance.services.doc_checks.llm_verify import verify_failed_checks
+            overturns = await verify_failed_checks(
+                text,
+                [{"id": c.id, "label": c.label, "hint": c.hint} for c in failed],
+                label,
+            )
+            for c in all_checks:
+                if c.id in overturns and overturns[c.id]["overturned"]:
+                    c.passed = True
+                    c.matched_text = f"[LLM] {overturns[c.id]['evidence']}"
+            l2_active = [c for c in all_checks if c.level == 2 and not c.skipped]
+            l2_passed = sum(1 for c in l2_active if c.passed)
+            if l2_active:
+                correctness = round(l2_passed / len(l2_active) * 100)
+        except Exception as e:
+            logger.warning("LLM verification skipped: %s", e)
+
+    # Cookie-policy only: actively HTTP-probe the Opt-Out + Privacy-Policy
+    # URLs the document advertises. Broken links make individual provider
+    # entries non-compliant under Art. 7(3) DSGVO.
+    if doc_type == "cookie":
+        try:
+            from compliance.services.cookie_link_validator import (
+                extract_links, validate_links, build_check_items,
+            )
+            links = extract_links(text)
+            if links:
+                logger.info("Cookie-link validator: %d urls extracted from %s",
+                            len(links), label)
+                validated = await validate_links(links)
+                for item in build_check_items(validated):
+                    all_checks.append(CheckItem(**item))
+                # Re-compute correctness with the new L2 items
+                l2_active = [c for c in all_checks if c.level == 2 and not c.skipped]
+                l2_passed = sum(1 for c in l2_active if c.passed)
+                if l2_active:
+                    correctness = round(l2_passed / len(l2_active) * 100)
+        except Exception as e:
+            logger.warning("Cookie-link validation skipped for %s: %s", label, e)
+
+    non_score = [f for f in findings if "SCORE" not in f.get("code", "")]
+    return DocCheckResult(
+        label=label, url=url, doc_type=doc_type,
+        word_count=word_count or len(text.split()),
+        completeness_pct=completeness, correctness_pct=correctness,
+        checks=all_checks, findings_count=len(non_score),
+    )
diff --git a/backend-compliance/compliance/api/agent_check/_state.py b/backend-compliance/compliance/api/agent_check/_state.py
new file mode 100644
index 00000000..76beb3fb
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_state.py
@@ -0,0 +1,58 @@
+"""Shared state for the compliance-check pipeline.
+
+The 7-step pipeline accumulates ~60 named values that flow across
+phases (doc_entries, profile, results, banner_result, cmp_vendors,
+scorecard, HTML blocks, …). Rather than threading 60 parameters
+through each function, we pass one mutable `CheckState` dict.
+
+Phases read what they need with `state[key]` and write their outputs
+with `state[key] = value`. This is intentionally untyped: enforcing
+strict typing would require freezing the schema before all phases
+landed, and the report-building phase routinely adds new optional
+keys (P1, P10, P50, P59b, P82, P103, P104, P106, …).
+
+`CheckState.new(check_id, req)` initialises the dict with the few
+keys that must exist from the start.
+"""
+
+from __future__ import annotations
+
+
+def new_state(check_id: str, req) -> dict:
+    """Create a fresh state dict for a check run.
+
+    Pre-populates a few keys that downstream phases assume exist
+    (e.g. `cmp_vendors` defaulting to `[]`).
+    """
+    return {
+        "check_id": check_id,
+        "req": req,
+        # Phase-1 outputs
+        "doc_texts": {},
+        "doc_entries": [],
+        "url_text_cache": {},
+        "pasted_table_vendors": [],
+        "placement_findings": [],
+        # Phase-2/3/4 outputs
+        "profile": None,
+        "profile_dict": {},
+        "results": [],
+        "total_findings": 0,
+        "business_scope": set(),
+        "banner_result": None,
+        "banner_url": "",
+        "tcf_vendors": [],
+        "vvt_entries": [],
+        "extracted_profile": {},
+        # Phase-5 outputs
+        "cmp_vendors": [],
+        "cookie_audit": {},
+        "cookie_evidence_slices": None,
+        "cookie_evidence_meta": None,
+        "scorecard": {},
+        "full_html": "",
+        "audit_quality_findings": [],
+        # Phase-6/7 outputs
+        "email_result": {"status": "skipped"},
+        "site_name": "",
+    }
diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py
index 36ac607d..324326cc 100644
--- a/backend-compliance/compliance/api/agent_compliance_check_routes.py
+++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py
@@ -4,72 +4,70 @@ Unified Compliance Check Routes — check all documents in one request.
 POST /compliance/agent/extract-text — extract text from a URL
 POST /compliance/agent/compliance-check — unified check for all documents
 GET  /compliance/agent/compliance-check/{check_id} — poll status
+
+Phase 5 split (2026-06-06): the original 2700-line monolith is now
+decomposed into the `agent_check/` subpackage:
+  - _orchestrator.py — thin run_compliance_check pipeline
+  - _phase_a_resolve.py — TDM + Step 1 (resolve / discover / split)
+  - _phase_b_profile_check.py — Step 2 + Step 3 (profile + doc checks)
+  - _phase_c_banner.py — Step 3b-d (banner + cross-check + TCF) + Step 4
+  - _phase_d1_vendors_raw.py / _phase_d2_vendors_finalize.py — Step 5
+    vendor extraction + finalize
+  - _phase_d3_blocks_top.py / mid / bot — Step 5 HTML blocks
+  - _phase_e_email.py — Step 6 (with A1 ZIP-Anhang)
+  - _phase_f_persist.py — Step 7 (snapshot + audit log + unified findings)
+  - _helpers.py / _constants.py / _state.py / _schemas.py — shared
+
+External callers (saving_scan_routes, agent_migration_routes, tests)
+keep importing helpers from THIS module — everything is re-exported.
 """
 
+from __future__ import annotations
+
 import asyncio
 import logging
-import os
-import re
 import uuid as _uuid
-from dataclasses import asdict
-from datetime import datetime, timezone
 
 import httpx
 from fastapi import APIRouter
-from pydantic import BaseModel
 
-from compliance.services.smtp_sender import send_email
+# ── Re-exports: external callers import these from THIS module ──────
+from .agent_check._constants import (  # noqa: F401
+    CONSENT_TESTER_URL,
+    _ALL_DOC_TYPES,
+    _COMPOUND_TLDS,
+    _DISCOVERY_RULES,
+    _DOC_TYPE_LABELS,
+    _compliance_check_jobs,
+)
+from .agent_check._discovery import _autodiscover_missing  # noqa: F401
+from .agent_check._fetch import _fetch_text  # noqa: F401
+from .agent_check._helpers import (  # noqa: F401
+    _apply_profile_filter,
+    _build_profile_html,
+    _classify_discovered_doc,
+    _company_name_from_url,
+    _doc_type_label,
+    _extract_domain,
+    _get_skip_types,
+    _pad_results_with_missing,
+    _result_to_dict,
+    _update,
+)
+from .agent_check._orchestrator import run_compliance_check as _run_compliance_check  # noqa: F401
+from .agent_check._schemas import (
+    ComplianceCheckRequest,
+    ComplianceCheckStartResponse,
+    ComplianceCheckStatusResponse,
+    DocumentInput,
+    ExtractTextRequest,
+)
+from .agent_check._single_check import _check_single  # noqa: F401
 
 logger = logging.getLogger(__name__)
 
 router = APIRouter(prefix="/compliance/agent", tags=["agent"])
 
-CONSENT_TESTER_URL = "http://bp-compliance-consent-tester:8094"
-
-# In-memory job store (same pattern as doc-check)
-_compliance_check_jobs: dict[str, dict] = {}
-
-
-# ── Models ───────────────────────────────────────────────────────────
-
-class ExtractTextRequest(BaseModel):
-    url: str
-
-
-class DocumentInput(BaseModel):
-    doc_type: str  # dse, agb, impressum, cookie, widerruf, avv, loeschkonzept, etc.
-    url: str = ""
-    text: str = ""  # text has priority over URL
-
-
-class ComplianceCheckRequest(BaseModel):
-    documents: list[DocumentInput]
-    use_agent: bool = False
-    recipient: str = "dsb@breakpilot.local"
-    # P12: Override fuer TDM-Vorbehalt bei dokumentierter Kunden-Erlaubnis.
-    # Pflichtfeld tdm_override_reason wenn tdm_override=True
-    # (z.B. "Auftragsbeziehung Safetykon GmbH, Email Hr. X 18.05.2026").
-    tdm_override: bool = False
-    tdm_override_reason: str = ""
-    # P79: 8-Feld Pre-Scan-Wizard (Branche, B2B/B2C, Direkt-Vertrieb,
-    # Rechtsform, Konzern, MA, Besondere Daten, Drittland). Wird im
-    # Snapshot persistiert und filtert die MC-Auswertung (P72).
-    scan_context: dict | None = None
-
-
-class ComplianceCheckStartResponse(BaseModel):
-    check_id: str
-    status: str = "running"
-
-
-class ComplianceCheckStatusResponse(BaseModel):
-    check_id: str
-    status: str
-    progress: str = ""
-    progress_pct: int = 0
-    result: dict | None = None
-    error: str = ""
-
 
 # ── Extract text endpoint ────────────────────────────────────────────
 
@@ -214,15 +212,12 @@ async def benchmark(
     anonymized: bool = False,
     limit: int = 50,
 ):
-    """P107 — Branchen-Benchmark-Cockpit Endpoint.
-    industry: 'automotive' / 'banking' / etc (optional)
-    sites: comma-separated site_label list (optional)
-    anonymized: bool — wenn true, Hersteller-Namen → 'OEM 1/2/3'
-    """
+    """P107 — Branchen-Benchmark-Cockpit Endpoint."""
     from database import SessionLocal
     from compliance.services.benchmark_extractor import (
-        load_snapshots_for_benchmark, anonymize_kpis,
+        anonymize_kpis,
         build_benchmark_summary,
+        load_snapshots_for_benchmark,
     )
     site_list = [s.strip() for s in sites.split(",") if s.strip()] if sites else None
     db = SessionLocal()
@@ -245,9 +240,7 @@ async def benchmark(
 
 @router.post("/admin/tcf-ingest")
 async def tcf_ingest():
-    """P105 — IAB TCF Vendor-Liste ingestieren / refreshen.
-    Idempotent: holt aktuelle GVL und upserted in compliance.cookie_library
-    mit source='iab_tcf_v2'. Aufruf ein paar Mal pro Jahr ausreichend."""
+    """P105 — IAB TCF Vendor-Liste ingestieren / refreshen."""
     from database import SessionLocal
     from compliance.services.tcf_vendor_authority import (
         fetch_and_ingest_tcf_vendors,
@@ -306,2344 +299,6 @@ async def replay_snapshot(
         db.close()
 
 
-async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
-    """Background task: check all documents with business-profile context."""
-    try:
-        from compliance.services.business_profiler import detect_business_profile
-        from compliance.services.doc_checks.runner import check_document_completeness
-        from compliance.services.rag_document_checker import check_document_with_controls
-        from .agent_doc_check_routes import CheckItem, DocCheckResult
-        from .agent_doc_check_report import build_html_report
-
-        # Reset anchor-locator cache per run (avoid cross-run leak)
-        try:
-            from compliance.services.doc_anchor_locator import reset_cache
-            reset_cache()
-        except Exception:
-            pass
-
-        # P7: TDM-Reservation-Check der Base-Domain (§ 44b UrhG).
-        # Bei reserved/denied: Run sofort beenden, kein Crawl.
-        try:
-            from compliance.services.tdm_reservation_check import (
-                check_tdm_reservation, is_crawl_allowed,
-            )
-            first_url = next(
-                (d.url for d in req.documents if d.url), "",
-            )
-            if first_url:
-                tdm = await check_tdm_reservation(first_url)
-                _compliance_check_jobs[check_id]["tdm"] = tdm
-                # P12: Bei tdm_override + Reason wird NICHT abgebrochen,
-                # sondern nur dokumentiert. Override ohne Reason wird ignoriert.
-                override_active = (
-                    req.tdm_override
-                    and len((req.tdm_override_reason or "").strip()) >= 10
-                )
-                if not is_crawl_allowed(tdm) and not override_active:
-                    _compliance_check_jobs[check_id]["status"] = "skipped_tdm"
-                    _compliance_check_jobs[check_id]["error"] = (
-                        f"TDM-Vorbehalt fuer {tdm.get('domain')} erkannt "
-                        f"(status={tdm.get('status')}) — Crawl nach § 44b "
-                        f"UrhG nicht zulaessig. Signals: "
-                        f"{[s.get('src') for s in tdm.get('signals', [])]}"
-                    )
-                    _compliance_check_jobs[check_id]["progress_pct"] = 100
-                    logger.info("TDM-skip check_id=%s domain=%s status=%s",
-                                check_id, tdm.get("domain"), tdm.get("status"))
-                    return
-                if override_active and not is_crawl_allowed(tdm):
-                    _compliance_check_jobs[check_id]["tdm_override"] = {
-                        "reason": req.tdm_override_reason.strip()[:500],
-                        "original_status": tdm.get("status"),
-                    }
-                    logger.warning(
-                        "TDM-Override aktiv: check_id=%s domain=%s "
-                        "status=%s reason=%r",
-                        check_id, tdm.get("domain"), tdm.get("status"),
-                        req.tdm_override_reason.strip()[:80],
-                    )
-        except Exception as e:
-            logger.warning("TDM-check failed (proceeding): %s", e)
-
-        # Step 1: Resolve texts (fetch from URL if needed) — 0-30%
-        _update(check_id, "Texte werden geladen...", 1)
-        doc_texts: dict[str, str] = {}
-        doc_entries: list[dict] = []
-
-        # Cache fetched URLs to detect duplicates
-        url_text_cache: dict[str, str] = {}
-
-        n_docs = max(1, len(req.documents))
-        # User-pasted-Tabellen-Vendors (kein LLM noetig) — werden weiter
-        # unten in cmp_vendors gemerged.
-        pasted_table_vendors: list[dict] = []
-        for i, doc in enumerate(req.documents):
-            pct = int(1 + (i / n_docs) * 29)
-            _update(check_id, f"Texte laden {i+1}/{n_docs}: {doc.doc_type}...", pct)
-            text = (doc.text or "").strip()
-            input_source = "url"
-            cmp_payloads: list[dict] = []
-            if text:
-                input_source = "text"
-                if doc.url:
-                    input_source = "text+url"  # User hat beide gefuellt
-                    logger.info(
-                        "doc_type=%s: User hat URL UND Text geliefert — "
-                        "Text gewinnt, URL wird als Quellen-Referenz behalten",
-                        doc.doc_type,
-                    )
-            elif doc.url:
-                url_key = doc.url.strip().rstrip("/").lower()
-                if url_key in url_text_cache:
-                    text = url_text_cache[url_key]
-                else:
-                    text, cmp_payloads = await _fetch_text(doc.url, doc_type=doc.doc_type)
-                    if text:
-                        url_text_cache[url_key] = text
-
-            # Auto-Reclassify-Check: wenn der user Text in das falsche
-            # Doc-Type-Feld kopiert hat (z.B. Impressum-Text in DSE),
-            # erkennen und ggf. umtaggen.
-            actual_doc_type = doc.doc_type
-            reclassify_hint: dict | None = None
-            if input_source.startswith("text") and len(text) >= 500:
-                try:
-                    from compliance.services.doc_type_classifier import (
-                        detect_mismatch,
-                    )
-                    reclassify_hint = detect_mismatch(doc.doc_type, text)
-                    if reclassify_hint and reclassify_hint["action"] == "reclassify":
-                        actual_doc_type = reclassify_hint["detected"]
-                        logger.info(
-                            "doc_type AUTO-RECLASSIFY: deklariert=%s "
-                            "erkannt=%s (score %d vs %d) — uebernehme erkannten Typ",
-                            doc.doc_type, actual_doc_type,
-                            reclassify_hint["detected_score"],
-                            reclassify_hint["declared_score"],
-                        )
-                except Exception as e:
-                    logger.warning("doc_type_classifier failed: %s", e)
-
-            # Cookie-Tabelle: wenn User Tabelle reinkopiert hat, deterministisch
-            # parsen (kein LLM noetig) und Vendors gleich ableiten.
-            if input_source.startswith("text") and actual_doc_type == "cookie":
-                try:
-                    from compliance.services.cookies_table_parser import (
-                        parse_cookie_table,
-                    )
-                    tab_vendors = parse_cookie_table(text)
-                    if tab_vendors:
-                        pasted_table_vendors.extend(tab_vendors)
-                        logger.info(
-                            "Cookie-Tabelle erkannt im pasted Text — "
-                            "%d Vendors / %d Cookies deterministisch geparst",
-                            len(tab_vendors),
-                            sum(len(v.get("cookies", [])) for v in tab_vendors),
-                        )
-                except Exception as e:
-                    logger.warning("cookies_table_parser failed: %s", e)
-
-            if text:
-                doc_texts[actual_doc_type] = text
-            doc_entries.append({
-                "doc_type":         actual_doc_type,
-                "declared_doc_type": doc.doc_type,
-                "url":              doc.url,
-                "text":             text,
-                "word_count":       len(text.split()) if text else 0,
-                "auto_discovered":  False,
-                "discovery_attempted": False,
-                "cmp_payloads":     cmp_payloads,
-                "input_source":     input_source,
-                "reclassify_hint":  reclassify_hint,
-            })
-
-        # Step 1a-bis: AUTO-DISCOVERY. For each canonical doc_type the user
-        # did NOT submit a URL/text for, try to find it on the homepage of
-        # the submitted URLs. This bridges the gap between "user knows the
-        # exact URL" (rare) and "user pasted the homepage" (common).
-        await _autodiscover_missing(
-            check_id, doc_entries, doc_texts, url_text_cache,
-        )
-
-        # Step 1b: Section splitting — two cases:
-        # 1. Same URL used for multiple doc_types → split by heading
-        # 2. DSI text contains Cookie/Social-Media sections → auto-fill empty rows
-        from compliance.services.section_splitter import (
-            split_shared_texts, auto_fill_from_dsi, cross_search_documents,
-        )
-        split_shared_texts(doc_entries, url_text_cache)
-        auto_fill_from_dsi(doc_entries)
-
-        # Step 1c: Cross-document search — find doc_types in wrong documents (30-35%)
-        _update(check_id, "Dokumente werden uebergreifend durchsucht...", 32)
-        placement_findings = cross_search_documents(doc_entries)
-
-        # Refresh doc_texts after all splitting/searching
-        for entry in doc_entries:
-            if entry.get("text"):
-                doc_texts[entry["doc_type"]] = entry["text"]
-
-        # P15: Dedupe — wenn mehrere Doc-Types DASSELBE Dokument referenzieren
-        # (z.B. Safetykon: User gibt /datenschutz fuer dse + cookie + widerruf),
-        # behalten wir nur den primaeren Doc-Type. Andere: leeren + note.
-        # Priorität: dse > impressum > cookie > widerruf > agb > nutzungsbedingungen
-        _DOC_PRIORITY = ["dse", "impressum", "cookie", "widerruf", "agb",
-                         "nutzungsbedingungen", "social_media", "dsb"]
-        seen_text_hash: dict[int, str] = {}
-        for dt in _DOC_PRIORITY:
-            entry = next((e for e in doc_entries if e.get("doc_type") == dt
-                          and e.get("text")), None)
-            if not entry:
-                continue
-            text_hash = hash((entry.get("text") or "").strip()[:1000])
-            if text_hash in seen_text_hash:
-                primary = seen_text_hash[text_hash]
-                logger.info(
-                    "P15 dedup: doc_type=%s referenziert dasselbe Dokument "
-                    "wie %s (URL=%s) -> als Duplikat markiert.",
-                    dt, primary, entry.get("url", "")[:60],
-                )
-                entry["text"] = ""
-                entry["word_count"] = 0
-                entry["url"] = ""
-                entry["dup_of"] = primary
-                doc_texts.pop(dt, None)
-            else:
-                seen_text_hash[text_hash] = dt
-
-        # Step 2: Detect business profile (35-40%)
-        _update(check_id, "Geschaeftsmodell wird erkannt...", 37)
-        # P16: Homepage-Text mit fuer Profile-Detection (no_direct_sales
-        # B2B-Indikatoren wie "CE-Zertifizierung" / "Schulungen" stehen oft
-        # nur im Homepage-Menue, nicht im Pflichttext).
-        profile_input = dict(doc_texts)
-        try:
-            base_url = ""
-            for e in doc_entries:
-                if e.get("url"):
-                    from urllib.parse import urlparse
-                    p = urlparse(e["url"])
-                    if p.scheme and p.netloc:
-                        base_url = f"{p.scheme}://{p.netloc}/"
-                        break
-            if base_url:
-                import re as _re
-                async with httpx.AsyncClient(
-                    timeout=8.0, follow_redirects=True,
-                    headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
-                             "AppleWebKit/537.36 HeadlessChrome/120.0.0.0"},
-                ) as _hc:
-                    _hr = await _hc.get(base_url)
-                    if _hr.status_code == 200 and "text/html" in _hr.headers.get(
-                            "content-type", ""):
-                        _html = _hr.text[:60000]
-                        _html = _re.sub(r"<script[^>]*>.*?</script>", " ",
-                                        _html, flags=_re.DOTALL | _re.IGNORECASE)
-                        _html = _re.sub(r"<style[^>]*>.*?</style>", " ",
-                                        _html, flags=_re.DOTALL | _re.IGNORECASE)
-                        _html = _re.sub(r"<[^>]+>", " ", _html)
-                        _html = _re.sub(r"\s+", " ", _html).strip()
-                        if len(_html.split()) > 30:
-                            profile_input["__homepage"] = _html[:20000]
-                            logger.info("P16 homepage merged for profile: %d words",
-                                        len(_html.split()))
-        except Exception as e:
-            logger.debug("homepage fetch for profile failed: %s", e)
-        profile = await detect_business_profile(profile_input)
-        profile_dict = asdict(profile)
-
-        # Step 3: Check each document
-        results: list[DocCheckResult] = []
-        total_findings = 0
-        use_agent_flag = req.use_agent or os.getenv(
-            "COMPLIANCE_USE_AGENT", "false"
-        ).lower() == "true"
-
-        # Filter out doc_types that don't apply to this business profile
-        skip_types = _get_skip_types(profile)
-
-        # Derive business_scope hints for the MC filter (O1 — Doc-type Scope-Flag).
-        # MCs that explicitly require a feature (e.g. 'biometric_processing',
-        # 'ai_decision_making', 'child_targeting') get dropped when the
-        # detected profile doesn't declare it.
-        business_scope: set[str] = set()
-        for svc in (getattr(profile, "detected_services", []) or []):
-            business_scope.add(str(svc).lower())
-        if (getattr(profile, "business_type", "") or "").lower() == "b2c":
-            business_scope.add("b2c")
-        if getattr(profile, "has_online_shop", False):
-            business_scope.add("ecommerce")
-        if getattr(profile, "is_regulated_profession", False):
-            business_scope.add("regulated_profession")
-
-        # Document checks: 40-80%
-        n_entries = max(1, len(doc_entries))
-        for i, entry in enumerate(doc_entries):
-            text = entry["text"]
-            doc_type = entry["doc_type"]
-            label = _doc_type_label(doc_type)
-            url = entry["url"]
-
-            if doc_type in skip_types:
-                results.append(DocCheckResult(
-                    label=label, url=url, doc_type=doc_type,
-                    error=skip_types[doc_type],
-                ))
-                continue
-
-            pct = int(40 + (i / n_entries) * 40)
-            _update(check_id, f"Pruefen {i+1}/{n_entries}: {label}...", pct)
-
-            if not text or len(text) < 50:
-                # P15: duplicate doc that was deduped against a primary doc
-                if entry.get("dup_of"):
-                    results.append(DocCheckResult(
-                        label=label, url="", doc_type=doc_type,
-                        error=f"Nicht separat vorhanden — wird im Dokument "
-                              f"'{_doc_type_label(entry['dup_of'])}' "
-                              f"mit-geprueft.",
-                    ))
-                    continue
-                # P24: DSB-Kontakt ist Pflichtangabe in der DSE (Art. 13(1)(b)
-                # DSGVO) — wenn kein separates DSB-Dokument vorliegt, ist das
-                # KEIN Fehler. DSB-Pruefung passiert ohnehin in der DSE.
-                if doc_type == "dsb" and not (entry.get("url") or "").strip():
-                    results.append(DocCheckResult(
-                        label=label, url="", doc_type=doc_type,
-                        error="Nicht separat vorhanden — DSB-Kontaktdaten "
-                              "werden in der Datenschutzerklaerung als "
-                              "Pflichtangabe nach Art. 13(1)(b) DSGVO geprueft.",
-                    ))
-                    continue
-                # Empty entry — either from auto-discovery padding (no URL
-                # to fetch) or from a fetch that returned nothing. If there
-                # was a URL we keep the error so the user knows the fetch
-                # failed; otherwise let the padding step label it
-                # 'Nicht eingereicht' / 'Auf der Website nicht gefunden'.
-                if (entry.get("url") or "").strip():
-                    results.append(DocCheckResult(
-                        label=label, url=url, doc_type=doc_type,
-                        error="Kein Text vorhanden oder zu kurz",
-                    ))
-                continue
-
-            result = await _check_single(
-                text, doc_type, label, url,
-                entry["word_count"], use_agent_flag,
-                business_scope=business_scope,
-                business_profile={"no_direct_sales": getattr(profile, "no_direct_sales", False)},
-            )
-
-            # Apply profile context filter
-            result = _apply_profile_filter(result, profile, doc_type)
-
-            # Add placement findings — but only if the regex checks confirm
-            # the text doesn't match. If completeness >= 50%, the text IS the
-            # right doc_type despite missing cross-search keywords.
-            if result.completeness_pct < 50:
-                for pf in placement_findings:
-                    if pf.get("doc_type") == doc_type:
-                        result.checks.insert(0, CheckItem(**{
-                            k: v for k, v in pf.items() if k != "doc_type"
-                        }))
-
-            results.append(result)
-            total_findings += result.findings_count
-
-        # Step 3b: Banner-Check (automatic, uses first URL or homepage)
-        banner_result = None
-        banner_url = req.documents[0].url if req.documents and req.documents[0].url else ""
-        # Use the homepage (strip path) for banner check
-        if banner_url:
-            from urllib.parse import urlparse
-            parsed = urlparse(banner_url)
-            banner_url = f"{parsed.scheme}://{parsed.netloc}"
-        if banner_url:
-            _update(check_id, "Cookie-Banner wird geprueft...", 82)
-            try:
-                async with httpx.AsyncClient(timeout=900.0) as client:  # P50: +10min for vendor-detail-phase
-                    resp = await client.post(
-                        f"{CONSENT_TESTER_URL}/scan",
-                        json={"url": banner_url, "timeout_per_phase": 10},
-                    )
-                    if resp.status_code == 200:
-                        banner_result = resp.json()
-            except Exception as e:
-                logger.warning(
-                    "Banner check failed: %s (%s)", e or "<empty>", type(e).__name__
-                )
-
-        # Step 3c: Cross-check Banner vs Cookie-Richtlinie (88-90%)
-        if banner_result and "cookie" in doc_texts:
-            _update(check_id, "Banner vs. Cookie-Richtlinie abgleichen...", 89)
-            cross_findings = _cross_check_banner_vs_cookie(
-                banner_result, doc_texts["cookie"],
-            )
-            if cross_findings:
-                for r in results:
-                    if r.doc_type == "cookie":
-                        for cf in cross_findings:
-                            r.checks.append(CheckItem(**cf))
-                        l2 = [c for c in r.checks if c.level == 2 and not c.skipped]
-                        l2p = sum(1 for c in l2 if c.passed)
-                        r.correctness_pct = round(l2p / len(l2) * 100) if l2 else 0
-
-        # Step 3d: TCF Vendor cross-check against DSI
-        tcf_vendors = banner_result.get("tcf_vendors", []) if banner_result else []
-        vvt_entries: list[dict] = []
-        if tcf_vendors and "dse" in doc_texts:
-            _update(check_id, f"{len(tcf_vendors)} TCF-Verarbeiter vs. DSI abgleichen...", 91)
-            from compliance.services.banner_cookie_cross_check import cross_check_vendors_vs_dsi
-            from compliance.services.vendor_vvt_mapper import map_vendors_to_vvt
-            vendor_findings = cross_check_vendors_vs_dsi(tcf_vendors, doc_texts["dse"])
-            if vendor_findings:
-                for r in results:
-                    if r.doc_type == "dse":
-                        for vf in vendor_findings:
-                            r.checks.append(CheckItem(**vf))
-            vvt_entries = map_vendors_to_vvt(tcf_vendors)
-
-        # Step 4: Extract profile hints from documents (92-95%)
-        _update(check_id, "Profil wird aus Dokumenten extrahiert...", 93)
-        from compliance.services.profile_extractor import extract_profile_from_documents
-        extracted_profile = extract_profile_from_documents(doc_texts, profile_dict)
-
-        # Step 4b: Determine scenario per document
-        for r in results:
-            if r.error:
-                r.scenario = "skip"
-            elif r.completeness_pct < 30:
-                r.scenario = "regenerate"
-            elif r.completeness_pct < 95:
-                r.scenario = "fix"
-            else:
-                r.scenario = "import"
-
-        # Step 4c: Always render all 8 canonical doc types. Missing types
-        # are differentiated:
-        #   - Discovery was tried but found nothing -> 'Auf der Website
-        #     nicht gefunden' (suggest user provides URL manually)
-        #   - No submitted URLs at all -> 'Nicht eingereicht'
-        attempted = {
-            e["doc_type"] for e in doc_entries if e.get("discovery_attempted")
-        }
-        results = _pad_results_with_missing(results, discovery_attempted=attempted)
-
-        # Step 5: Build report with management summary (95-98%)
-        _update(check_id, "Report wird erstellt...", 96)
-        from .agent_doc_check_report import (
-            build_management_summary,
-            build_scanned_urls_html,
-            build_provider_list_html,
-        )
-        from .agent_doc_check_extras import build_vvt_table_html
-
-        # Extract structured vendor records from any CMP payloads captured
-        # for the cookie doc (BMW ePaaS, OneTrust, etc.), validate their
-        # opt-out + privacy URLs concurrently, score each entry.
-        cmp_vendors: list[dict] = []
-        try:
-            from compliance.services.vendor_extractor import (
-                extract_vendors_from_payloads,
-            )
-            from compliance.services.cookie_link_validator import (
-                validate_vendor_urls, score_vendors,
-            )
-            cookie_payloads = []
-            cookie_text = ""
-            # P30: aggregate cmp_payloads from ALL doc_entries — sites
-            # like Mercedes load Usercentrics only on the homepage, so
-            # the JSON gets captured during DSE/Impressum discovery, not
-            # in the cookies.html fetch. Dedup by URL since the same
-            # payload is captured on every page load.
-            seen_cmp_urls: set[str] = set()
-            for e in doc_entries:
-                for p in (e.get("cmp_payloads") or []):
-                    p_url = p.get("url") or ""
-                    if p_url and p_url in seen_cmp_urls:
-                        continue
-                    seen_cmp_urls.add(p_url)
-                    cookie_payloads.append(p)
-                if e.get("doc_type") == "cookie" and e.get("text"):
-                    cookie_text = e["text"]
-            # P48: also pull cmp_payloads from the Banner-Scan (homepage
-            # 3-phase consent test). Mercedes' Usercentrics-JSON is
-            # captured there even when not in DSI-Discovery of static
-            # legal pages.
-            if banner_result:
-                for p in (banner_result.get("cmp_payloads") or []):
-                    p_url = p.get("url") or ""
-                    if p_url and p_url in seen_cmp_urls:
-                        continue
-                    seen_cmp_urls.add(p_url)
-                    cookie_payloads.append(p)
-                if cookie_payloads:
-                    logger.info("P48: %d CMP-payloads available for vendor-extract (after Banner-Scan merge)",
-                                len(cookie_payloads))
-            # P17-D: Fallback wenn cookie via P15 deduped wurde — nutze DSE-Text
-            # sofern Cookie-Begriffe drin sind, damit LLM-Vendor-Extract trotzdem
-            # greifen kann.
-            if not cookie_text and not cookie_payloads:
-                dse_t = doc_texts.get("dse", "")
-                if dse_t and any(w in dse_t.lower() for w in
-                                  ("cookie", "tracking", "google analytics", "consent")):
-                    cookie_text = dse_t
-                    logger.info("P17-D: vendor-extract Fallback auf DSE (Cookie deduped)")
-            # Site-owner derived from the submitted URLs — drives the
-            # INTERNAL/GROUP_COMPANY classification of vendor records.
-            owner_name = _company_name_from_url(doc_entries) or ""
-            if cookie_payloads:
-                cmp_vendors = extract_vendors_from_payloads(
-                    cookie_payloads, owner_name=owner_name,
-                )
-            # P52: LLM-Fallback nicht nur wenn 0 Vendors, sondern auch
-            # wenn die strukturierten Quellen < 5 Vendors lieferten und
-            # der Cookie-Text substantiell ist. So holt sich VW-typische
-            # Setups (Generic CMP, 28 Cookies aber 0 cmp_payloads) noch
-            # ihre echten Vendors aus dem Text.
-            if (len(cmp_vendors) < 5
-                    and cookie_text and len(cookie_text.split()) >= 500):
-                from compliance.services.vendor_llm_extractor import (
-                    extract_vendors_via_llm,
-                )
-                from compliance.services.vendor_classifier import classify
-                _update(check_id, "Vendor-Liste per LLM extrahieren...", 94)
-                llm_vendors = await extract_vendors_via_llm(cookie_text)
-                # P52: classify die LLM-Vendors und MERGE mit existing
-                # statt zu ueberschreiben.
-                existing_names = {(v.get("name") or "").strip().lower()
-                                  for v in cmp_vendors}
-                added_llm = 0
-                for v in llm_vendors:
-                    nm = (v.get("name") or "").strip()
-                    if not nm or nm.lower() in existing_names:
-                        continue
-                    v["recipient_type"] = classify(
-                        vendor_name=nm,
-                        category=v.get("category", ""),
-                        owner_name=owner_name,
-                    )
-                    v.setdefault("source", "llm_cascade")
-                    cmp_vendors.append(v)
-                    existing_names.add(nm.lower())
-                    added_llm += 1
-                if added_llm:
-                    logger.info(
-                        "P52 LLM-Cascade: +%d Vendors (total: %d)",
-                        added_llm, len(cmp_vendors),
-                    )
-            # P57: Phase G vendor_details als zusätzliche Vendor-Quelle.
-            # Wenn extract_vendors_from_payloads weniger findet als
-            # Phase G's Info-Click-Through (z.B. Mercedes-Settings nicht
-            # erkannt als usercentrics-kind), die Phase-G-Namen als
-            # eigenständige Vendors hinzufügen.
-            if banner_result:
-                vd_list = banner_result.get("vendor_details") or []
-                vd_list = [v for v in vd_list if v.get("name") != "__TDM_OPTOUT__"]
-                existing_names = {(v.get("name") or "").strip().lower()
-                                  for v in cmp_vendors}
-                added = 0
-                for d in vd_list:
-                    n = (d.get("name") or "").strip()
-                    if not n or n.lower() in existing_names:
-                        continue
-                    # Skip generic category-labels (Mercedes-Kategorien)
-                    if n.lower() in ("technisch erforderlich", "analyse und statistik",
-                                     "marketing", "alles auswählen",
-                                     "alles auswaehlen"):
-                        continue
-                    from compliance.services.vendor_classifier import classify
-                    cmp_vendors.append({
-                        "name": n,
-                        "country": "",
-                        "purpose": d.get("description", "")[:500],
-                        "category": "",
-                        "opt_out_url": d.get("opt_out_url", ""),
-                        "privacy_policy_url": d.get("privacy_url", ""),
-                        "persistence": d.get("retention", ""),
-                        "cookies": d.get("cookies", []),
-                        "processing_company": d.get("processing_company", ""),
-                        "address": d.get("address", ""),
-                        "purposes": d.get("purposes", []),
-                        "technologies": d.get("technologies", []),
-                        "recipient_type": classify(
-                            vendor_name=n, category="", owner_name=owner_name,
-                        ),
-                    })
-                    existing_names.add(n.lower())
-                    added += 1
-                if added:
-                    logger.info("P57: added %d new vendors from Phase G (total: %d)",
-                                added, len(cmp_vendors))
-
-            # D — HTML-Tabellen die der consent-tester aus dem DOM
-            # extrahiert hat: direkt deterministisch parsen (hoechste
-            # Genauigkeit, keine LLM-Halluzinationen).
-            for pl in (cookie_payloads or []):
-                if pl.get("kind") != "html_table":
-                    continue
-                rows = pl.get("rows") or []
-                if len(rows) < 3:
-                    continue
-                try:
-                    from compliance.services.cookies_table_parser import (
-                        parse_cookie_table as _parse_ct_d,
-                    )
-                    table_text = "\n".join(rows)
-                    d_vendors = _parse_ct_d(table_text)
-                    if d_vendors:
-                        existing_d = {(v.get("name") or "").strip().lower()
-                                      for v in cmp_vendors}
-                        added_d = 0
-                        for v in d_vendors:
-                            nm = (v.get("name") or "").strip()
-                            if not nm or nm.lower() in existing_d:
-                                continue
-                            v.setdefault("source", "html_table_dom")
-                            cmp_vendors.append(v)
-                            existing_d.add(nm.lower())
-                            added_d += 1
-                        if added_d:
-                            logger.info(
-                                "D HTML-Table-DOM-Parse: +%d Vendors aus "
-                                "%d-Zeilen-Tabelle (total: %d)",
-                                added_d, len(rows), len(cmp_vendors),
-                            )
-                except Exception as e:
-                    logger.warning("html_table parse failed: %s", e)
-
-            # B — cookies_table_parser auch auf gecrawltem Cookie-Text.
-            # Erst Standard-Parse (Tab/Pipe-getrennt). Wenn der nichts
-            # findet (kein Separator), Flat-Pattern-Parse fuer Sites wie
-            # VW die ihre Tabelle als flachen Text liefern.
-            if cookie_text and len(cookie_text) >= 500:
-                try:
-                    from compliance.services.cookies_table_parser import (
-                        parse_cookie_table as _parse_ct,
-                        parse_flat_cookie_text as _parse_flat,
-                    )
-                    crawled_table_vendors = _parse_ct(cookie_text)
-                    if not crawled_table_vendors:
-                        crawled_table_vendors = _parse_flat(cookie_text)
-                    if crawled_table_vendors:
-                        existing = {(v.get("name") or "").strip().lower()
-                                    for v in cmp_vendors}
-                        added_c = 0
-                        for v in crawled_table_vendors:
-                            nm = (v.get("name") or "").strip()
-                            if not nm or nm.lower() in existing:
-                                continue
-                            v.setdefault("source", "table_crawled")
-                            cmp_vendors.append(v)
-                            existing.add(nm.lower())
-                            added_c += 1
-                        if added_c:
-                            logger.info(
-                                "B Crawled-Tabellen-Parse: +%d Vendors "
-                                "(total: %d)",
-                                added_c, len(cmp_vendors),
-                            )
-                except Exception as e:
-                    logger.warning("crawled-table-parse failed: %s", e)
-
-            # C — Screenshot + Tesseract-OCR der Cookie-Richtlinie.
-            # Overlapping scrolling screenshots (jede Slice ueberlappt die
-            # vorherige um overlap_px Pixel) → lueckenlose Beweiskette.
-            # Pro Slice Tesseract OCR + parse_ocr_cookie_table; Dedup nach
-            # Cookie-Name über alle Slices. Site-unabhaengig, deterministisch.
-            cookie_url_for_shot = ""
-            for _e in doc_entries:
-                if _e.get("doc_type") == "cookie" and _e.get("url"):
-                    cookie_url_for_shot = _e["url"]; break
-            cookie_evidence_slices: list[dict] | None = None
-            cookie_evidence_meta: dict | None = None
-            if cookie_url_for_shot:
-                try:
-                    from compliance.services.cookie_screenshot_ocr import (
-                        capture_cookie_evidence_slices,
-                        ocr_slices_extract_cookies,
-                        cookies_to_vendor_records,
-                    )
-                    from compliance.services.cookies_table_parser import (
-                        _guess_vendor as _gv,
-                    )
-                    _update(check_id,
-                            "Cookie-Richtlinie wird fotografiert (lueckenlose Beweiskette)...",
-                            92)
-                    ev = await capture_cookie_evidence_slices(
-                        cookie_url_for_shot, check_id=check_id,
-                        viewport_h=1024, overlap_px=200, max_slices=40,
-                    )
-                    if ev.get("slices"):
-                        cookie_evidence_slices = ev["slices"]  # ZIP-Anhang
-                        cookie_evidence_meta = {
-                            "total_height_px": ev.get("total_height_px"),
-                            "width_px": ev.get("width_px"),
-                            "accepted_banner": ev.get("accepted_banner"),
-                            "expanded": ev.get("expanded"),
-                            "url": ev.get("url"),
-                            "slice_count": len(ev["slices"]),
-                        }
-                        _update(check_id,
-                                "Tesseract OCR über alle Slices...", 93)
-                        ocr_cookies, ocr_stats = ocr_slices_extract_cookies(
-                            ev["slices"],
-                        )
-                        if ocr_cookies:
-                            ocr_vendors = cookies_to_vendor_records(
-                                ocr_cookies, guess_vendor_fn=_gv,
-                            )
-                            existing = {
-                                (v.get("name") or "").strip().lower()
-                                for v in cmp_vendors
-                            }
-                            added_v = 0
-                            for v in ocr_vendors:
-                                nm = (v.get("name") or "").strip()
-                                if not nm:
-                                    continue
-                                if nm.lower() in existing:
-                                    for ex in cmp_vendors:
-                                        if (ex.get("name") or "").strip().lower() == nm.lower():
-                                            ex_names = {
-                                                (c.get("name") or "").lower()
-                                                for c in (ex.get("cookies") or [])
-                                            }
-                                            for c in (v.get("cookies") or []):
-                                                if c["name"].lower() not in ex_names:
-                                                    ex.setdefault("cookies", []).append(c)
-                                                    ex_names.add(c["name"].lower())
-                                            cur_src = ex.get("source", "")
-                                            if "tesseract_ocr" not in cur_src:
-                                                ex["source"] = (cur_src + ";tesseract_ocr").strip(";")
-                                            break
-                                    continue
-                                cmp_vendors.append(v)
-                                existing.add(nm.lower())
-                                added_v += 1
-                            logger.info(
-                                "C Tesseract-OCR: +%d Vendors / %d Cookies "
-                                "(über %d Slices, total: %d)",
-                                added_v, len(ocr_cookies),
-                                ocr_stats.get("slices", 0), len(cmp_vendors),
-                            )
-                except Exception as e:
-                    logger.warning(
-                        "Tesseract-OCR pipeline failed: %s (%s)",
-                        str(e) or "(no msg)", type(e).__name__,
-                    )
-
-            # User-pasted Cookie-Tabelle (deterministisch, kein LLM):
-            # die hat IMMER Vorrang weil 100% genau.
-            if pasted_table_vendors:
-                existing = {(v.get("name") or "").strip().lower()
-                            for v in cmp_vendors}
-                added_p = 0
-                for v in pasted_table_vendors:
-                    nm = (v.get("name") or "").strip()
-                    if not nm or nm.lower() in existing:
-                        continue
-                    cmp_vendors.append(v)
-                    existing.add(nm.lower())
-                    added_p += 1
-                if added_p:
-                    logger.info(
-                        "Pasted-Tabellen-Merge: +%d Vendors (total: %d)",
-                        added_p, len(cmp_vendors),
-                    )
-
-            # Cookie-Library-Fallback (P52 Lite): wenn weiterhin wenige
-            # Vendors aber viele after_accept-Cookies, aus Library auflösen.
-            # VW-Lehre: 6 LLM-Grob-Vendors reichen NICHT — die Library
-            # holt 30+ weitere aus den Cookie-Namen + Cookie-Doc-Pattern.
-            # Schwelle: immer probieren wenn < 20 Vendors.
-            if banner_result and len(cmp_vendors) < 20:
-                try:
-                    from compliance.services.cookie_to_vendor_fallback import (
-                        fallback_vendors_for_run,
-                    )
-                    from database import SessionLocal as _SLfb
-                    _fb_db = _SLfb()
-                    try:
-                        extra = fallback_vendors_for_run(
-                            _fb_db, banner_result, len(cmp_vendors),
-                            cookie_doc_text=cookie_text,
-                        )
-                        if extra:
-                            existing_names = {(v.get("name") or "").strip().lower()
-                                              for v in cmp_vendors}
-                            for v in extra:
-                                if v["name"].lower() in existing_names:
-                                    continue
-                                cmp_vendors.append(v)
-                            logger.info(
-                                "Cookie-Library-Fallback: cmp_vendors %d -> %d",
-                                len(cmp_vendors) - len(extra), len(cmp_vendors),
-                            )
-                    finally:
-                        _fb_db.close()
-                except Exception as e:
-                    logger.warning("Cookie-Library-Fallback skipped: %s", e)
-
-            # Vendor-Normalizer: Dedup (Google-Familie etc) + Garbage-Filter
-            try:
-                from compliance.services.vendor_normalizer import (
-                    normalize_vendors as _norm_v,
-                )
-                cmp_vendors = _norm_v(cmp_vendors)
-            except Exception as e:
-                logger.warning("vendor_normalizer skipped: %s", e)
-
-            # P50: enrich vendors with per-vendor detail-modal-extracts
-            # (description, opt-out URL, privacy URL, cookies). Detail
-            # comes from Phase G Info-button-click-through in /scan.
-            tdm_opt_out_notice = ""
-            if cmp_vendors and banner_result:
-                vendor_details = banner_result.get("vendor_details") or []
-                # P50f: filter out TDM-opt-out sentinel
-                tdm_sentinel = next((v for v in vendor_details
-                                     if v.get("name") == "__TDM_OPTOUT__"), None)
-                if tdm_sentinel:
-                    tdm_opt_out_notice = tdm_sentinel.get("description", "")
-                    logger.info("P50f: TDM opt-out — skipped detail-enrichment for vendors")
-                    vendor_details = [v for v in vendor_details
-                                      if v.get("name") != "__TDM_OPTOUT__"]
-                if vendor_details:
-                    details_by_name = {}
-                    for d in vendor_details:
-                        n = (d.get("name") or "").strip().lower()
-                        if n:
-                            details_by_name[n] = d
-                    enriched = 0
-                    for v in cmp_vendors:
-                        key = (v.get("name") or "").strip().lower()
-                        # Substring fallback for fuzzy matches (e.g.
-                        # "Google Analytics" detail-name may differ slightly)
-                        d = details_by_name.get(key)
-                        if not d:
-                            for dn, dv in details_by_name.items():
-                                if key in dn or dn in key:
-                                    d = dv
-                                    break
-                        if not d:
-                            continue
-                        if not v.get("country") and (d.get("processing_company") or d.get("address")):
-                            # Heuristic country extract from address (DE/EU keywords)
-                            addr = d.get("address", "")
-                            if re.search(r"\b(deutschland|germany|berlin|m(?:ue|ü)nchen|hamburg|stuttgart)\b", addr, re.I):
-                                v["country"] = "DE"
-                            elif re.search(r"\bireland|irland|dublin\b", addr, re.I):
-                                v["country"] = "IE"
-                            elif re.search(r"\busa|united states|california|new york|delaware\b", addr, re.I):
-                                v["country"] = "US"
-                        if not v.get("purpose"):
-                            v["purpose"] = d.get("description", "")[:500]
-                        if not v.get("opt_out_url"):
-                            v["opt_out_url"] = d.get("opt_out_url", "")
-                        if not v.get("privacy_policy_url"):
-                            v["privacy_policy_url"] = d.get("privacy_url", "")
-                        if not v.get("cookies"):
-                            v["cookies"] = d.get("cookies", [])
-                        v["purposes"] = d.get("purposes", [])
-                        v["technologies"] = d.get("technologies", [])
-                        if not v.get("persistence"):
-                            v["persistence"] = d.get("retention", "")
-                        v["processing_company"] = d.get("processing_company", "")
-                        v["address"] = d.get("address", "")
-                        enriched += 1
-                    logger.info("P50: enriched %d/%d vendors with detail-modal data",
-                                enriched, len(cmp_vendors))
-            # P59b: Cookie-Behavior-Validator — pruefe alle gesetzten Cookies
-            # gegen unsere Library, generiere 3-Tier-Severity-Findings.
-            # Background-Task hat keinen DB-Dependency-Inject -> SessionLocal
-            # selber oeffnen + sauber schliessen.
-            cookie_behavior_findings: list[dict] = []
-            if banner_result:
-                cookies_detailed = banner_result.get("cookies_detailed") or []
-                if cookies_detailed:
-                    cb_session = None
-                    try:
-                        from database import SessionLocal
-                        from compliance.services.cookie_behavior_validator import (
-                            validate_cookie_behavior,
-                        )
-                        from urllib.parse import urlparse
-                        fp_domain = ""
-                        if banner_url:
-                            fp_domain = urlparse(banner_url).netloc.replace("www.", "")
-                        cb_session = SessionLocal()
-                        cookie_behavior_findings = validate_cookie_behavior(
-                            cb_session, cookies_detailed,
-                            network_requests=[],  # TODO Layer B in P59d
-                            first_party_domain=fp_domain,
-                        )
-                        if cookie_behavior_findings:
-                            sevs = {f["severity"] for f in cookie_behavior_findings}
-                            logger.info(
-                                "P59b: Cookie-Behavior-Check %d findings "
-                                "(severities: %s) ueber %d Cookies",
-                                len(cookie_behavior_findings),
-                                sorted(sevs),
-                                len(cookies_detailed),
-                            )
-                            banner_result["cookie_behavior_findings"] = (
-                                cookie_behavior_findings
-                            )
-                        else:
-                            logger.info(
-                                "P59b: Cookie-Behavior-Check 0 findings "
-                                "ueber %d Cookies (library miss / clean)",
-                                len(cookies_detailed),
-                            )
-                    except Exception as cb_err:
-                        logger.warning("P59b Cookie-Behavior-Check failed: %s", cb_err)
-                    finally:
-                        if cb_session is not None:
-                            try:
-                                cb_session.close()
-                            except Exception:
-                                pass
-
-            # P61: "Untergeschobene Cookies" — wenn z.B. Google Tag Manager
-            # deklariert ist, kommen GA + GCL_AU + DoubleClick automatisch mit.
-            # Findings landen im banner_result fuer Mail-Render.
-            if banner_result and cmp_vendors:
-                try:
-                    from compliance.services.vendor_package_cookies import (
-                        detect_implicit_cookies,
-                    )
-                    declared = [v.get("name", "") for v in cmp_vendors if v.get("name")]
-                    actual_cookies: list[str] = []
-                    for phase_data in (banner_result.get("phases") or {}).values():
-                        if isinstance(phase_data, dict):
-                            for ck in (phase_data.get("cookies") or []):
-                                if isinstance(ck, dict) and ck.get("name"):
-                                    actual_cookies.append(ck["name"])
-                    implicit_findings = detect_implicit_cookies(
-                        declared, actual_cookies_set=actual_cookies or None,
-                    )
-                    if implicit_findings:
-                        banner_result["implicit_vendor_findings"] = implicit_findings
-                        logger.info(
-                            "P61: %d implicit vendor-package items detected "
-                            "(%d cookies + %d vendors)",
-                            len(implicit_findings),
-                            sum(1 for f in implicit_findings if f["implicit"]["type"] == "cookie"),
-                            sum(1 for f in implicit_findings if f["implicit"]["type"] == "vendor"),
-                        )
-                except Exception as p61_err:
-                    logger.warning("P61 implicit-vendor detection failed: %s", p61_err)
-
-            if cmp_vendors:
-                logger.info("VVT: %d vendors extracted, validating links",
-                            len(cmp_vendors))
-                cmp_vendors = await validate_vendor_urls(cmp_vendors)
-                cmp_vendors = score_vendors(cmp_vendors)
-                # Enrich each vendor with per-cookie functional roles
-                try:
-                    from compliance.services.cookie_function_classifier import (
-                        annotate_vendor_cookies,
-                    )
-                    cmp_vendors = [annotate_vendor_cookies(v) for v in cmp_vendors]
-                except Exception as e:
-                    logger.warning("Cookie function classification skipped: %s", e)
-        except Exception as e:
-            logger.warning("VVT vendor extraction skipped: %s", e)
-
-        # Vendor-Redundanz + EU-Alternativen + Cost/Savings (O4)
-        redundancy_report = None
-        try:
-            from compliance.services.vendor_redundancy import analyze as analyze_redundancy
-            from compliance.services.vendor_cost_estimator import infer_company_tier
-            if cmp_vendors:
-                # Company-Tier aus business_profile ableiten — beeinflusst die
-                # Cost-Range so dass z.B. fuer DAX-Konzerne nicht starter-Preise
-                # die untere Schranke duruecken.
-                bp_dict = {
-                    "type": getattr(profile, "business_type", ""),
-                    "features": list(business_scope),
-                }
-                ctier = infer_company_tier(bp_dict)
-                redundancy_report = analyze_redundancy(cmp_vendors, company_tier=ctier)
-                logger.info(
-                    "Redundanz: %d Kategorien mit Mehrfach-Anbietern, "
-                    "Spar-Schaetzung %s pro Jahr (company_tier=%s)",
-                    redundancy_report["summary"]["redundancy_count"],
-                    redundancy_report["summary"]["estimated_saving_pct"],
-                    ctier,
-                )
-        except Exception as e:
-            logger.warning("Vendor redundancy analysis skipped: %s", e)
-
-        summary_html = build_management_summary(results)
-        scanned_html = build_scanned_urls_html(doc_entries)
-        providers_html = build_provider_list_html(banner_result, vvt_entries)
-        # P18: Deep-Block mit Phases + Quality-Score + Per-Category-Tracker
-        from .agent_doc_check_banner import build_banner_deep_html
-        banner_deep_html = build_banner_deep_html(banner_result)
-        vvt_html = build_vvt_table_html(cmp_vendors)
-
-        # MC scorecard aggregated across ALL docs in this run (DSGVO/TDDDG/
-        # BGB/...). Sits at the top so the GF sees the regulation-by-
-        # regulation view before drilling into per-doc details.
-        from compliance.services.mc_scorecard import build_scorecard
-        from .agent_doc_check_scorecard import build_scorecard_html
-        all_mc_checks: list[dict] = []
-        # P73: pro-doc Fails sammeln um Solution-Generator pro Doc-Type
-        # mit dem korrekten doc_text aufzurufen.
-        fails_by_doc: dict[str, list[dict]] = {}
-        for r in results:
-            for c in r.checks:
-                if c.id.startswith("mc-"):
-                    rec = {
-                        "id": c.id, "label": c.label, "passed": c.passed,
-                        "severity": c.severity, "skipped": c.skipped,
-                        "regulation": c.regulation,
-                        "hint": getattr(c, "hint", "") or "",
-                    }
-                    all_mc_checks.append(rec)
-                    if (not c.passed and not c.skipped
-                            and (c.severity or "").upper() in ("CRITICAL", "HIGH")):
-                        fails_by_doc.setdefault(r.doc_type, []).append(rec)
-        # P106 — Audit-Type-Klassifizierung pro MC. Interne Prozess-/
-        # Doku-Checks werden NICHT als FAIL gewertet sondern als CHECK
-        # (manuelle Pruefung beim DSB notwendig).
-        try:
-            from compliance.services.mc_audit_type import (
-                annotate_mc_results, split_by_audit_type,
-            )
-            annotate_mc_results(all_mc_checks)
-            mc_split = split_by_audit_type(all_mc_checks)
-            # Fails-by-doc neu aufbauen: nur noch echte verifiable Fails
-            fails_by_doc = {}
-            for r in mc_split.get("verifiable_fails") or []:
-                fails_by_doc.setdefault("dse", []).append(r)
-        except Exception as e:
-            logger.warning("P106 mc_audit_type skipped: %s", e)
-            mc_split = {"internal_checks": [], "verifiable_fails": all_mc_checks}
-        scorecard = build_scorecard(all_mc_checks) if all_mc_checks else {}
-        # Trend: load previous scorecard for the same tenant + domain so the
-        # email can show delta indicators (A6).
-        prev_scorecard: dict | None = None
-        if scorecard:
-            try:
-                from compliance.services.compliance_audit_log import (
-                    list_runs_for_tenant,
-                )
-                tenant_id_for_trend = req.recipient or ""
-                base_domain_for_trend = _extract_domain(doc_entries) or ""
-                prev_runs = list_runs_for_tenant(
-                    tenant_id_for_trend,
-                    base_domain=base_domain_for_trend,
-                    limit=1,
-                )
-                if prev_runs:
-                    prev_scorecard = prev_runs[0].get("scorecard")
-            except Exception as e:
-                logger.debug("trend lookup skipped: %s", e)
-        scorecard_html = (
-            build_scorecard_html(scorecard, previous_scorecard=prev_scorecard)
-            if scorecard else ""
-        )
-
-        report_html = build_html_report(results, None, doc_texts)
-        profile_html = _build_profile_html(profile)
-
-        # O4: Vendor-Redundanz / EU-Alternativen + Cost-Savings-Block
-        from .agent_doc_check_redundancy import build_redundancy_html
-        redundancy_html = build_redundancy_html(redundancy_report)
-
-        # P1: Executive-Summary GANZ oben — CFO/GF sieht 4 KPIs + 2 CTAs.
-        from .agent_doc_check_exec_summary import build_exec_summary_html
-        # Site-Name fuer Header bestimmen (gleiche Logik wie Email-Subject)
-        url_company_for_exec = _company_name_from_url(doc_entries)
-        domain_for_exec = _extract_domain(doc_entries)
-        site_name_for_exec = url_company_for_exec or domain_for_exec or ""
-        exec_summary_html = build_exec_summary_html(
-            scorecard=scorecard,
-            previous_scorecard=prev_scorecard,
-            cmp_vendors=cmp_vendors,
-            redundancy_report=redundancy_report,
-            site_name=site_name_for_exec,
-        )
-
-        # P18: Critical-Findings-Block (rot oben, mit Sofortmassnahmen +
-        # Quellen + Bussgeld-Praezedenz). Wird nur gerendert wenn echte
-        # kritische Verstoesse vorliegen.
-        critical_html = ""
-        try:
-            from .agent_doc_check_critical import build_critical_findings_html
-            critical_html = build_critical_findings_html(
-                banner_result=banner_result,
-                scorecard=scorecard,
-                results=results,
-            )
-        except Exception as e:
-            logger.warning("Critical-findings block skipped: %s", e)
-
-        # P10: Cookie-Policy-Architecture-Detection (BMW-Pattern erkennen)
-        cookie_arch_html = ""
-        try:
-            from compliance.services.cookie_policy_architecture import (
-                detect_architecture, build_architecture_html,
-            )
-            cookie_doc_url = ""
-            cookie_doc_text = doc_texts.get("cookie", "")
-            cookie_cmp_payloads: list[dict] = []
-            for e in doc_entries:
-                if (e.get("doc_type") or "").lower() in ("cookie", "cookie_policy"):
-                    cookie_doc_url = e.get("url", "")
-                    cookie_cmp_payloads = e.get("cmp_payloads") or []
-                    break
-            # P17-A: Fallback wenn Cookie-Doc via P15 deduped wurde — nutze
-            # den DSE-Text wenn er Cookie-Schluesselwoerter enthaelt.
-            if not cookie_doc_text:
-                dse_text = doc_texts.get("dse", "")
-                if dse_text and any(w in dse_text.lower() for w in
-                                     ("cookie", "tracking", "google analytics",
-                                      "consent")):
-                    cookie_doc_text = dse_text
-                    dse_entry = next((e for e in doc_entries
-                                      if e.get("doc_type") == "dse"), {})
-                    cookie_doc_url = dse_entry.get("url", "")
-                    cookie_cmp_payloads = dse_entry.get("cmp_payloads") or []
-                    logger.info("P17-A: cookie-arch fallback auf DSE (Cookie-Doc deduped)")
-            if cookie_doc_text:
-                arch = detect_architecture(
-                    doc_url=cookie_doc_url,
-                    doc_text=cookie_doc_text,
-                    cmp_payloads=cookie_cmp_payloads,
-                    homepage_cmp_payloads=cmp_payloads or [],
-                )
-                cookie_arch_html = build_architecture_html(arch)
-                logger.info("cookie-arch: layer=%s versioned=%s risk=%s",
-                            arch["layer_separation"], arch["versioned"], arch["risk_label"])
-        except Exception as e:
-            logger.warning("cookie-architecture detection failed: %s", e)
-
-        # Reihenfolge — Sales-optimiert:
-        #   1) Exec-Summary (KPIs + Saving + CTAs)
-        #   2) summary_html (Konkrete Aufgaben fuer die Geschaeftsfuehrung)
-        #   3) scanned_urls (Quellen-Transparenz)
-        #   4) profile_html (Erkanntes Geschaeftsmodell)
-        #   5) scorecard_html (MC-Scorecard)
-        #   6) redundancy_html (Optimierungspotenzial — direkt nach Compliance-Score)
-        #   7) providers_html + vvt_html (Vendor-Liste)
-        #   8) report_html (Doc-Pruefung Details)
-        # P62: Marketing-Manager-Disclaimer — was wir sehen vs nicht sehen
-        scope_disclaimer_html = ""
-        try:
-            from .scope_disclaimer import build_scope_disclaimer_html
-            scope_disclaimer_html = build_scope_disclaimer_html()
-        except Exception as e:
-            logger.warning("Scope-disclaimer block skipped: %s", e)
-
-        # P103 + P104 — Cookie-Value-Entropy + Network-Tracing (Stufe 3 + 4)
-        entropy_html = ""
-        network_trace_html = ""
-        try:
-            from compliance.services.cookie_value_entropy import (
-                check_cookies_for_entropy_mismatch, build_entropy_block_html,
-            )
-            from compliance.services.cookie_network_tracer import (
-                trace_cookie_network, build_network_trace_block_html,
-            )
-            cookies_detailed = (banner_result or {}).get("cookies_detailed") or []
-            entropy_findings = check_cookies_for_entropy_mismatch(cookies_detailed)
-            if entropy_findings:
-                entropy_html = build_entropy_block_html(entropy_findings)
-                logger.info("P103 Entropy: %d Findings", len(entropy_findings))
-            primary_url = ""
-            for e_ in doc_entries:
-                if e_.get("url"):
-                    primary_url = e_["url"]; break
-            net_findings = trace_cookie_network(cookies_detailed, primary_url)
-            if net_findings:
-                network_trace_html = build_network_trace_block_html(net_findings)
-                logger.info("P104 Network-Trace: %d Findings", len(net_findings))
-        except Exception as e:
-            logger.warning("P103/P104 entropy/network-trace skipped: %s", e)
-
-        # P105 — IAB TCF Authority-Cross-Reference (Stufe 5)
-        tcf_authority_html = ""
-        try:
-            from compliance.services.tcf_vendor_authority import (
-                cross_reference_with_tcf, build_tcf_authority_block_html,
-            )
-            from database import SessionLocal as _SLtcf
-            _tcf_db = _SLtcf()
-            try:
-                tcf_findings = cross_reference_with_tcf(_tcf_db, cmp_vendors)
-                if tcf_findings:
-                    tcf_authority_html = build_tcf_authority_block_html(tcf_findings)
-                    logger.info(
-                        "TCF-Authority: %d Vendor-Discrepancies gefunden",
-                        len(tcf_findings),
-                    )
-            finally:
-                _tcf_db.close()
-        except Exception as e:
-            logger.warning("TCF-Authority-Check skipped: %s", e)
-
-        # COOKIE-COMPLIANCE-AUDIT (3-Quellen-Vergleich) — das ist der
-        # zentrale USP: deklariert in Richtlinie vs tatsaechlich im
-        # Browser geladen vs Library-Match.
-        cookie_audit = {}
-        cookie_audit_html = ""
-        try:
-            from compliance.services.cookie_compliance_audit import (
-                audit_cookie_compliance, build_cookie_audit_block_html,
-            )
-            from database import SessionLocal as _SLca
-            _ca_db = _SLca()
-            try:
-                cookie_audit = audit_cookie_compliance(
-                    _ca_db, doc_texts.get("cookie") or doc_texts.get("dse"),
-                    banner_result,
-                )
-                if cookie_audit and (cookie_audit.get("declared_count") or
-                                      cookie_audit.get("browser_count")):
-                    cookie_audit_html = build_cookie_audit_block_html(cookie_audit)
-                    logger.info(
-                        "Cookie-Audit: %d deklariert, %d im Browser, "
-                        "%d undokumentiert, %d compliant",
-                        cookie_audit.get("declared_count"),
-                        cookie_audit.get("browser_count"),
-                        len(cookie_audit.get("undeclared_in_browser") or []),
-                        len(cookie_audit.get("compliant") or []),
-                    )
-            finally:
-                _ca_db.close()
-        except Exception as e:
-            logger.warning("cookie-compliance-audit skipped: %s", e)
-
-        # P102: Cookie-Klassifikations-Pruefung (deklariert vs Library)
-        library_mismatch_html = ""
-        mismatches: list[dict] = []
-        try:
-            from compliance.services.cookie_library_mismatch import (
-                detect_mismatches, build_mismatch_block_html,
-            )
-            from database import SessionLocal
-            cookie_doc_for_check = doc_texts.get("cookie") or doc_texts.get("dse") or ""
-            all_cookies_seen: list[str] = []
-            if banner_result:
-                for ph in (banner_result.get("phases") or {}).values():
-                    if isinstance(ph, dict):
-                        for ck in (ph.get("cookies") or []):
-                            if isinstance(ck, str):
-                                all_cookies_seen.append(ck)
-                            elif isinstance(ck, dict) and ck.get("name"):
-                                all_cookies_seen.append(ck["name"])
-            if all_cookies_seen and cookie_doc_for_check:
-                _mm_db = SessionLocal()
-                try:
-                    mismatches = detect_mismatches(
-                        _mm_db, all_cookies_seen, cookie_doc_for_check,
-                    )
-                    if mismatches:
-                        library_mismatch_html = build_mismatch_block_html(mismatches)
-                        logger.info(
-                            "P102: %d Cookie-Mismatches gefunden", len(mismatches)
-                        )
-                finally:
-                    _mm_db.close()
-        except Exception as e:
-            logger.warning("P102 mismatch detection failed: %s", e)
-
-        # P35 + P77 + P78: Textsignal-Checks (Save-Label, Cookies-in-DSE,
-        # JC-Klausel im DSE)
-        signals_html = ""
-        try:
-            from compliance.services.doc_text_signals import (
-                run_all as run_signal_checks,
-                build_signals_block_html,
-            )
-            cookie_doc_missing = not bool(doc_texts.get("cookie"))
-            sig_findings = run_signal_checks(
-                banner_result, doc_texts, cookie_doc_missing,
-            )
-            if sig_findings:
-                signals_html = build_signals_block_html(sig_findings)
-        except Exception as e:
-            logger.warning("P35/P77/P78 signals-check failed: %s", e)
-
-        # P92 + P94: Banner-Konsistenz (CMP-Tool kaputt / Banner-vs-Doc-Diff)
-        consistency_html = ""
-        try:
-            from compliance.services.banner_consistency_checks import (
-                run_all as run_consistency_checks,
-                build_consistency_block_html,
-            )
-            cookie_doc_for_check = (doc_texts.get("cookie")
-                                    or doc_texts.get("dse") or "")
-            cons_findings = run_consistency_checks(
-                banner_result or {}, cookie_doc_for_check, cmp_vendors,
-                doc_texts=doc_texts,
-            )
-            if cons_findings:
-                consistency_html = build_consistency_block_html(cons_findings)
-                logger.info("P92/P94: %d Konsistenz-Findings", len(cons_findings))
-        except Exception as e:
-            logger.warning("P92/P94 consistency-check failed: %s", e)
-
-        # P73: MC-Solution-Generator — LLM-Vorschlaege pro HIGH-Fail.
-        # Max 5 Solutions pro Doc-Type um Latenz < 60s zu halten.
-        solutions_html = ""
-        try:
-            from compliance.services.mc_solution_generator import (
-                generate_solutions_for_fails, build_solutions_block_html,
-            )
-            all_solutions: list[dict] = []
-            for dt, fails in fails_by_doc.items():
-                if not fails:
-                    continue
-                doc_txt = doc_texts.get(dt) or doc_texts.get("dse") or ""
-                if not doc_txt or len(doc_txt) < 500:
-                    continue
-                sols = await generate_solutions_for_fails(
-                    fails, doc_txt, dt, limit=3,
-                )
-                all_solutions.extend(sols)
-                if len(all_solutions) >= 8:
-                    break  # global cap
-            if all_solutions:
-                solutions_html = build_solutions_block_html(all_solutions[:8])
-                logger.info("P73: %d MC-Solutions generiert", len(all_solutions))
-        except Exception as e:
-            logger.warning("P73 MC-Solution-Generator skipped: %s", e)
-
-        # P71: JC-vs-AVV Entscheidungsbaum (nur wenn DSE ambig)
-        jc_decision_html = ""
-        try:
-            from compliance.services.jc_avv_decision import (
-                build_jc_avv_decision_html,
-            )
-            jc_decision_html = build_jc_avv_decision_html(doc_texts.get("dse"))
-        except Exception as e:
-            logger.warning("P71 jc_avv_decision skipped: %s", e)
-
-        # P6/P53/P55 — Branchen-Kontext + Site-History
-        industry_ctx_html = ""
-        try:
-            from compliance.services.industry_library import (
-                build_industry_context_block_html, load_site_profile,
-            )
-            from database import SessionLocal as _SLib
-            _ind_db = _SLib()
-            try:
-                ind = (req.scan_context or {}).get("industry") if req.scan_context else None
-                site_prof = load_site_profile(_ind_db, domain_for_exec or "")
-                industry_ctx_html = build_industry_context_block_html(ind, site_prof)
-            finally:
-                _ind_db.close()
-        except Exception as e:
-            logger.warning("industry context skipped: %s", e)
-
-        # P106 — Internal-Checks-Block (interne Prozesse / Doku-Pflichten)
-        internal_checks_html = ""
-        try:
-            from compliance.services.mc_audit_type import (
-                build_internal_checks_block_html,
-            )
-            ic = (mc_split or {}).get("internal_checks") or []
-            if ic:
-                internal_checks_html = build_internal_checks_block_html(ic)
-                logger.info(
-                    "P106: %d interne Checks (statt FAIL) im Block",
-                    len(ic),
-                )
-        except Exception as e:
-            logger.warning("P106 internal_checks_html skipped: %s", e)
-
-        # P85 — Banner-Screenshot fuer visuellen Beweis (zwischen
-        # GF-1-Pager und Detail-Bloecken)
-        banner_shot_html = ""
-        try:
-            from compliance.services.banner_screenshot_block import (
-                build_banner_screenshot_html,
-            )
-            banner_shot_html = build_banner_screenshot_html(banner_result)
-        except Exception as e:
-            logger.warning("P85 banner-screenshot skipped: %s", e)
-
-        # P82: GF-1-Pager ganz oben in der Mail — 5-Bullet-Zusammenfassung
-        # damit die GF nicht 124k Char lesen muss.
-        gf_one_pager_html = ""
-        try:
-            from compliance.services.gf_one_pager import build_gf_one_pager_html
-            gf_one_pager_html = build_gf_one_pager_html(
-                site_name=site_name_for_exec,
-                scorecard=scorecard,
-                previous_scorecard=prev_scorecard,
-                banner_result=banner_result,
-                library_mismatch_findings=mismatches,
-                scan_context=req.scan_context,
-                audit_quality_findings=audit_quality_findings,
-            )
-        except Exception as e:
-            logger.warning("P82 GF-1-pager skipped: %s", e)
-
-        # A — Audit-Quality-Checks: Banner-Detect-Failure, Vendor-Extract
-        # auffaellig duenn, URL-Fetch fehlgeschlagen → IMMER prominent zeigen.
-        audit_quality_html = ""
-        audit_quality_findings: list[dict] = []
-        try:
-            from compliance.services.audit_quality_checks import (
-                run_all as run_audit_quality, build_audit_quality_block_html,
-            )
-            cookie_text_for_aq = doc_texts.get("cookie") or ""
-            audit_quality_findings = run_audit_quality(
-                banner_result, cookie_text_for_aq, cmp_vendors, doc_entries,
-            )
-            if audit_quality_findings:
-                audit_quality_html = build_audit_quality_block_html(audit_quality_findings)
-                logger.info(
-                    "audit-quality: %d Vorbehalte erkannt",
-                    len(audit_quality_findings),
-                )
-        except Exception as e:
-            logger.warning("audit-quality-checks failed: %s", e)
-
-        # Doc-Input-Warnings — wenn User Text ins falsche Feld gepastet hat
-        input_warn_html = ""
-        try:
-            from compliance.services.doc_input_warnings import (
-                collect_warnings, build_warnings_block_html,
-            )
-            warns = collect_warnings(doc_entries)
-            if warns:
-                input_warn_html = build_warnings_block_html(warns)
-                logger.info("doc-input-warnings: %d Mismatches gefunden", len(warns))
-        except Exception as e:
-            logger.warning("doc-input-warnings skipped: %s", e)
-
-        # P86: Branchen-Benchmark (nur wenn scan_context.industry gesetzt)
-        bench_html = ""
-        try:
-            from database import SessionLocal as _SLb
-            from compliance.services.industry_benchmark import (
-                compute_benchmark, build_benchmark_html, _extract_score,
-            )
-            industry = (req.scan_context or {}).get("industry") if req.scan_context else None
-            curr_score = _extract_score(banner_result)
-            if industry and curr_score is not None:
-                _b_db = _SLb()
-                try:
-                    bench = compute_benchmark(
-                        _b_db, industry, curr_score, check_id,
-                    )
-                    if bench:
-                        bench_html = build_benchmark_html(bench)
-                finally:
-                    _b_db.close()
-        except Exception as e:
-            logger.warning("P86 industry-benchmark skipped: %s", e)
-
-        # P84: Diff-Mode — "Seit letztem Lauf X Findings weg, Y neue".
-        diff_html = ""
-        try:
-            from database import SessionLocal as _SL
-            from compliance.services.run_diff import (
-                compute_diff, build_diff_block_html,
-            )
-            _diff_db = _SL()
-            try:
-                diff = compute_diff(
-                    _diff_db, check_id, domain_for_exec or "",
-                    banner_result, scorecard,
-                )
-                if diff:
-                    diff_html = build_diff_block_html(diff)
-            finally:
-                _diff_db.close()
-        except Exception as e:
-            logger.warning("P84 diff-mode skipped: %s", e)
-
-        full_html = (
-            gf_one_pager_html + audit_quality_html + input_warn_html
-            + bench_html + diff_html
-            + critical_html + scope_disclaimer_html + exec_summary_html
-            + cookie_arch_html + summary_html + scanned_html + profile_html
-            + scorecard_html + internal_checks_html + redundancy_html
-            + industry_ctx_html
-            + banner_shot_html
-            + providers_html + banner_deep_html
-            + cookie_audit_html
-            + tcf_authority_html
-            + entropy_html
-            + network_trace_html
-            + library_mismatch_html
-            + consistency_html + signals_html + solutions_html
-            + jc_decision_html
-            + vvt_html + report_html
-        )
-
-        # Step 6: Send email — derive site name primarily from entered URL.
-        # The extracted_profile.companyName is often noisy (e.g. picks up
-        # juris.de from legal references). Domain-derived name is more
-        # predictable for the GF email subject.
-        doc_count = len([r for r in results if not r.error])
-        url_company = _company_name_from_url(doc_entries)
-        domain = _extract_domain(doc_entries)
-        site_name = url_company or domain or "Unbekannt"
-        _update(check_id, "E-Mail wird versendet...", 98)
-        email_result = send_email(
-            recipient=req.recipient,
-            subject=f"[COMPLIANCE-CHECK] {site_name} — {doc_count} Dokumente geprueft",
-            body_html=full_html,
-        )
-
-        # Step 7: Store result
-        response = {
-            "check_id": check_id,
-            "results": [_result_to_dict(r) for r in results],
-            "business_profile": profile_dict,
-            "extracted_profile": extracted_profile,
-            # P18: vollen consent-tester-Output durchreichen statt nur 4 Felder.
-            # phases (before/after-accept/reject) + banner_checks.violations +
-            # category_tests werden vom Renderer + Critical-Findings-Block genutzt.
-            "banner_result": ({
-                "detected": banner_result.get("banner_detected", False),
-                "provider": banner_result.get("banner_provider", ""),
-                "violations": len((banner_result.get("banner_checks") or {})
-                                  .get("violations", [])),
-                "tcf_vendor_count": len(tcf_vendors),
-                "completeness_pct": banner_result.get("completeness_pct"),
-                "correctness_pct": banner_result.get("correctness_pct"),
-                "phases": banner_result.get("phases", {}),
-                "banner_checks": banner_result.get("banner_checks", {}),
-                "category_tests": banner_result.get("category_tests", []),
-                "structured_checks": banner_result.get("structured_checks", []),
-                "summary": banner_result.get("summary", {}),
-            } if banner_result else None),
-            "tcf_vendors": vvt_entries if tcf_vendors else [],
-            "cmp_vendors": cmp_vendors,
-            "cookie_audit": cookie_audit if cookie_audit else None,
-            "total_documents": len(results),
-            "total_findings": total_findings,
-            "email_status": email_result.get("status", "failed"),
-            "checked_at": datetime.now(timezone.utc).isoformat(),
-        }
-
-        _compliance_check_jobs[check_id]["status"] = "completed"
-        _compliance_check_jobs[check_id]["result"] = response
-        _compliance_check_jobs[check_id]["progress"] = "Fertig"
-        _compliance_check_jobs[check_id]["progress_pct"] = 100
-
-        # P80: persist raw scan data so we can replay audit pipeline
-        # without re-crawling (7min -> 5sec test cycle).
-        try:
-            from database import SessionLocal
-            from compliance.services.check_snapshot import save_snapshot
-            snap_db = SessionLocal()
-            try:
-                save_snapshot(
-                    snap_db,
-                    check_id=check_id,
-                    doc_entries=doc_entries,
-                    banner_result=banner_result,
-                    profile=profile,
-                    cmp_vendors=cmp_vendors,
-                    scan_context=req.scan_context,  # P79
-                    site_label=site_name,
-                    notes=f"recipient={req.recipient}",
-                )
-            finally:
-                snap_db.close()
-        except Exception as snap_err:
-            logger.warning("P80 snapshot save skipped: %s", snap_err)
-
-        # Persist to sidecar SQLite audit log — enables /audit endpoints
-        # (A5 admin tab) and trend view (A6). Best-effort; failures here
-        # do not affect the user-facing response.
-        try:
-            from compliance.services.compliance_audit_log import record_check_run
-            from compliance.services.mc_scorecard import full_audit_records
-            audit_rows: list[dict] = []
-            for r in results:
-                doc_mc = [c for c in r.checks if c.id.startswith("mc-")]
-                audit_rows.extend(full_audit_records(
-                    [{"id": c.id, "label": c.label, "passed": c.passed,
-                      "severity": c.severity, "skipped": c.skipped,
-                      "regulation": c.regulation, "matched_text": c.matched_text,
-                      "hint": c.hint, "level": c.level}
-                     for c in doc_mc],
-                    check_id=check_id,
-                    doc_type=r.doc_type,
-                ))
-            record_check_run(
-                check_id=check_id,
-                tenant_id=req.recipient or "",
-                site_name=site_name,
-                base_domain=domain or "",
-                doc_count=doc_count,
-                scorecard=scorecard,
-                vvt_summary={
-                    "total": len(cmp_vendors),
-                    "internal": sum(1 for v in cmp_vendors
-                                    if (v.get("recipient_type") or "").upper()
-                                    in ("INTERNAL", "GROUP_COMPANY")),
-                    "external": sum(1 for v in cmp_vendors
-                                    if (v.get("recipient_type") or "").upper()
-                                    in ("PROCESSOR", "CONTROLLER")),
-                },
-                mc_records=audit_rows,
-            )
-            from compliance.services.compliance_audit_log import record_check_payload
-            record_check_payload(
-                check_id=check_id,
-                vendors=cmp_vendors,
-                profile=extracted_profile,
-                banner=banner_result,
-            )
-            # Unified findings (P5): bundle MC + Pflichtangaben + Vendor +
-            # Redundanz in one searchable table behind /agent/findings/<id>.
-            try:
-                from compliance.services.unified_findings_collector import collect
-                from compliance.services.unified_findings_store import record_findings
-                unified = collect(
-                    check_id=check_id,
-                    results=results,
-                    cmp_vendors=cmp_vendors,
-                    redundancy_report=redundancy_report,
-                    doc_texts=doc_texts,
-                )
-                record_findings(check_id, unified)
-            except Exception as e:
-                logger.warning("Unified findings collect failed: %s", e)
-        except Exception as e:
-            logger.warning("Audit persistence skipped: %s", e)
-
-    except Exception as e:
-        logger.error("Compliance check %s failed: %s", check_id, e, exc_info=True)
-        _compliance_check_jobs[check_id]["status"] = "failed"
-        _compliance_check_jobs[check_id]["error"] = str(e)[:500]
-
-
-def _update(check_id: str, msg: str, pct: int | None = None):
-    job = _compliance_check_jobs[check_id]
-    job["progress"] = msg
-    if pct is not None:
-        job["progress_pct"] = max(0, min(100, int(pct)))
-
-
-async def _fetch_text(url: str, doc_type: str = "") -> tuple[str, list[dict]]:
-    """Fetch text from URL via consent-tester, with HTTP fallback.
-
-    Returns (text, cmp_payloads). cmp_payloads is the raw CMP JSON captured
-    during navigation (ePaaS, OneTrust, …) — empty when no CMP fired or
-    HTTP fallback was used. Backend turns payloads into structured vendor
-    records for the VVT table in the email.
-    """
-    # 1. Consent-tester (Playwright-based, full JS rendering).
-    # max_documents depends on doc_type:
-    #   - cookie/dse/social_media: self-extract (often + CMP capture) is
-    #     authoritative, sub-pages dilute the policy text. max=1.
-    #   - impressum/agb/widerruf/nutzungsbedingungen/dsb: BMW & similar
-    #     enterprise sites split this across 3-4 short sub-pages
-    #     (Versicherungsvermittler, Aufsicht, Berufsrecht). max=3 follows
-    #     them. The 15s networkidle bail (dsi_helpers) keeps timing safe.
-    short_extract_types = {"cookie", "dse", "datenschutz", "privacy", "social_media"}
-    max_docs = 1 if (doc_type or "") in short_extract_types else 3
-    try:
-        # P90: 120s reicht nicht fuer BMW-Impressum (Auto-Discovery folgt
-        # 3 Sub-Docs). 240s gibt Spielraum. Mercedes faellt aktuell mit
-        # 120s auch oft an Akamai-Latenz.
-        async with httpx.AsyncClient(timeout=240.0) as client:
-            resp = await client.post(
-                f"{CONSENT_TESTER_URL}/dsi-discovery",
-                json={"url": url, "max_documents": max_docs},
-                timeout=240.0,
-            )
-            if resp.status_code == 200:
-                payload = resp.json()
-                docs = payload.get("documents", [])
-                cmp_payloads = payload.get("cmp_payloads") or []
-                cmp_cookie_text = payload.get("cmp_cookie_text") or ""
-                # D — wenn der consent-tester HTML-Tabellen aus dem DOM
-                # extrahiert hat, in die cmp_payloads als "generic_table"
-                # einschleusen damit das Backend sie via cookies_table_parser
-                # verarbeiten kann.
-                for doc in (docs or []):
-                    for tbl in (doc.get("tables") or []):
-                        if not tbl or len(tbl) < 3:
-                            continue
-                        cmp_payloads.append({
-                            "kind": "html_table",
-                            "url":  doc.get("url", ""),
-                            "rows": tbl,
-                        })
-                if docs:
-                    texts = []
-                    for doc in docs:
-                        t = doc.get("full_text", "") or doc.get("text_preview", "") or ""
-                        if t and len(t) > 50:
-                            texts.append(t)
-                    merged = "\n\n".join(texts)
-                    # For cookie/dse/social_media: when CMP reconstruction is
-                    # substantially richer than DOM extraction, use it. This
-                    # fixes the BMW case where DOM yields ~600 words of
-                    # navigation but the ePaaS payload reconstructs to ~1800
-                    # words of actual cookie policy.
-                    if (doc_type in short_extract_types
-                            and cmp_cookie_text
-                            and len(cmp_cookie_text.split()) > len(merged.split())):
-                        logger.info(
-                            "Preferring CMP-reconstructed text for %s on %s "
-                            "(%d words CMP vs %d words DOM)",
-                            doc_type, url,
-                            len(cmp_cookie_text.split()),
-                            len(merged.split()),
-                        )
-                        merged = cmp_cookie_text
-                    if merged and len(merged.split()) > 100:
-                        if len(texts) > 1:
-                            logger.info("Merged %d docs from %s (%d words)",
-                                        len(texts), url, len(merged.split()))
-                        return merged, cmp_payloads
-                # P90-Bug-Fix: auch wenn DSE-Text zu kurz fuer 100-Wort-
-                # Schwelle ist, die captured CMP-Payloads NICHT verwerfen.
-                # BMW-Bug: DSE liefert 10 Wort SPA-Shell, aber ePaaS-JSON
-                # (393KB) wurde captured. Backend braucht die fuer
-                # extract_vendors_from_payloads (VVT-Tabelle).
-                if cmp_payloads:
-                    logger.info(
-                        "P90: keeping %d CMP payloads for %s despite "
-                        "short text (%d words) — HTTP fallback runs in parallel",
-                        len(cmp_payloads), url,
-                        len((merged or cmp_cookie_text).split()),
-                    )
-                    fallback_text = merged or cmp_cookie_text or ""
-                    return fallback_text, cmp_payloads
-    except Exception as e:
-        # P90: verbose exception fuer Diagnose (war vorher empty)
-        logger.warning("Consent-tester fetch failed for %s: %s (%s)",
-                       url, str(e) or "(empty)", type(e).__name__)
-
-    # 2. Fallback: direct HTTP fetch (works for SSR pages like BMW).
-    # P7: kenntlicher UA + per-Domain Rate-Limit.
-    try:
-        import re as _re
-        from compliance.services.compliance_user_agent import (
-            default_request_headers, DomainRateLimiter,
-        )
-        async with httpx.AsyncClient(
-            timeout=30.0, follow_redirects=True,
-            headers=default_request_headers(),
-        ) as client:
-            async with DomainRateLimiter(url):
-                resp = await client.get(url)
-            if resp.status_code == 200 and "text/html" in resp.headers.get("content-type", ""):
-                html = resp.text
-                # Strip HTML tags, decode entities
-                text = _re.sub(r"<script[^>]*>.*?</script>", " ", html, flags=_re.DOTALL | _re.IGNORECASE)
-                text = _re.sub(r"<style[^>]*>.*?</style>", " ", text, flags=_re.DOTALL | _re.IGNORECASE)
-                text = _re.sub(r"<[^>]+>", " ", text)
-                text = _re.sub(r"\s+", " ", text).strip()
-                if len(text.split()) > 100:
-                    logger.info("HTTP fallback for %s: %d words", url, len(text.split()))
-                    return text, []
-    except Exception as e:
-        logger.warning("HTTP fallback failed for %s: %s", url, e)
-
-    return "", []
-
-
-async def _autodiscover_missing(
-    check_id: str,
-    doc_entries: list[dict],
-    doc_texts: dict[str, str],
-    url_text_cache: dict[str, str],
-) -> None:
-    """For each canonical doc_type the user did not submit, try to find
-    the corresponding document on the homepage of the site they DID submit.
-
-    Modifies doc_entries in place: fills text/url/word_count and sets
-    `auto_discovered=True`. Marks `discovery_attempted=True` on every
-    missing entry (even when nothing was found) so the report can
-    distinguish 'Nicht eingereicht' from 'Auf der Website nicht gefunden'.
-    """
-    from urllib.parse import urlparse
-
-    # VW-Fix: nur Doc-Types mit substantieller Text-Ausbeute zaehlen
-    # als 'submitted'. Wenn der User eine URL eingegeben hat aber die
-    # 404 liefert (VW cookie-richtlinie.html), oder der Crawler weniger
-    # als 200 Zeichen extrahiert (SPA-Shell), als 'missing' behandeln
-    # damit der Discovery-Pass alternative URLs probiert.
-    _MIN_USEFUL_CHARS = 200
-    submitted_types = {
-        e["doc_type"] for e in doc_entries
-        if len((e.get("text") or "").strip()) >= _MIN_USEFUL_CHARS
-    }
-    # Markiere die fehlgeschlagenen URL-Submissions damit der Discovery
-    # ihre URL nicht erneut probiert (waere sinnlos).
-    failed_urls: set[str] = {
-        (e.get("url") or "").strip()
-        for e in doc_entries
-        if (e.get("url") or "").strip()
-        and len((e.get("text") or "").strip()) < _MIN_USEFUL_CHARS
-    }
-    if failed_urls:
-        logger.info(
-            "VW-Fix: %d eingegebene URLs lieferten <%d Zeichen — Discovery "
-            "soll Alternativen probieren: %s",
-            len(failed_urls), _MIN_USEFUL_CHARS,
-            ", ".join(list(failed_urls)[:3]),
-        )
-    # Map alias types to canonical
-    submitted_canon = {
-        "dse" if t in ("datenschutz", "privacy") else t for t in submitted_types
-    }
-    # Missing = canonical types the user did NOT submit
-    missing = set(_ALL_DOC_TYPES) - submitted_canon
-    if not missing:
-        return
-
-    # Pick the most common base (scheme://netloc) from submitted URLs.
-    bases: dict[str, int] = {}
-    for e in doc_entries:
-        u = (e.get("url") or "").strip()
-        if u and "://" in u:
-            p = urlparse(u)
-            base = f"{p.scheme}://{p.netloc}"
-            bases[base] = bases.get(base, 0) + 1
-    if not bases:
-        # No submitted URL at all — nothing to crawl from. Add empty
-        # placeholders (with discovery_attempted=False) so the padding
-        # step renders them as 'Nicht eingereicht' (not 'Nicht gefunden').
-        for dt in missing:
-            doc_entries.append({
-                "doc_type": dt, "url": "", "text": "", "word_count": 0,
-                "auto_discovered": False, "discovery_attempted": False,
-            })
-        return
-
-    # Build crawl plan: primary base + any related domains mentioned in
-    # the submitted texts that share the owner's SLD. Example: BMW Group
-    # text mentions bmwgroup.com and bmwgroup.jobs in addition to bmw.de.
-    primary_base = max(bases, key=bases.get) + "/"
-    crawl_bases: list[str] = [primary_base]
-    primary_netloc = urlparse(primary_base).netloc.lower().lstrip("www.")
-    owner_token = primary_netloc.split(".")[0]  # 'bmw'
-
-    if owner_token and len(owner_token) >= 3:
-        domain_re = re.compile(
-            r"https?://([a-z0-9][a-z0-9\-]*\.)*" + re.escape(owner_token)
-            + r"[a-z0-9\-]*\.[a-z]{2,}",
-            re.IGNORECASE,
-        )
-        seen_bases = {primary_base}
-        for entry in doc_entries:
-            text = entry.get("text") or ""
-            for m in domain_re.finditer(text):
-                p = urlparse(m.group(0))
-                base = f"{p.scheme}://{p.netloc}/"
-                base_netloc = p.netloc.lower().lstrip("www.")
-                if base_netloc == primary_netloc:
-                    continue
-                if base in seen_bases:
-                    continue
-                seen_bases.add(base)
-                crawl_bases.append(base)
-                if len(crawl_bases) >= 3:
-                    break
-            if len(crawl_bases) >= 3:
-                break
-
-    _update(
-        check_id,
-        f"Suche fehlende Dokumente auf {', '.join(urlparse(b).netloc for b in crawl_bases)}...",
-        18,
-    )
-
-    discovered: list[dict] = []
-    disc_payloads: list[dict] = []
-    disc_cookie_texts: list[str] = []
-    for base in crawl_bases:
-        try:
-            async with httpx.AsyncClient(timeout=300.0) as client:  # P90: 180s -> 300s
-                resp = await client.post(
-                    f"{CONSENT_TESTER_URL}/dsi-discovery",
-                    json={"url": base, "max_documents": 15},
-                    timeout=300.0,  # P90: 180s -> 300s
-                )
-                if resp.status_code != 200:
-                    logger.warning("auto-discovery: HTTP %d for %s",
-                                   resp.status_code, base)
-                    continue
-                body = resp.json()
-                discovered.extend(body.get("documents", []) or [])
-                disc_payloads.extend(body.get("cmp_payloads") or [])
-                cmp_text = body.get("cmp_cookie_text") or ""
-                if cmp_text:
-                    disc_cookie_texts.append(cmp_text)
-                logger.info("auto-discovery on %s: %d docs, %d CMP payloads, "
-                            "cmp_cookie_text=%d words", base,
-                            len(body.get("documents", []) or []),
-                            len(body.get("cmp_payloads") or []),
-                            len(cmp_text.split()))
-        except Exception as e:
-            # P90: verbose exception fuer Diagnose
-            logger.warning("auto-discovery failed for %s: %s (%s)",
-                           base, str(e) or "(empty)", type(e).__name__)
-
-    # Classify each discovered doc into a canonical doc_type
-    by_type: dict[str, dict] = {}
-    for d in discovered:
-        title = (d.get("title") or "").lower()
-        url = (d.get("url") or "").lower()
-        wc = d.get("word_count") or 0
-        if wc < 100:
-            continue
-        canon = _classify_discovered_doc(title, url)
-        if canon and canon in missing and canon not in by_type:
-            by_type[canon] = d
-
-    # Append/Update entry for every missing canonical type. Auto-discovered
-    # ones get the text/URL filled; ungratched ones stay empty so the
-    # padding step renders them as 'Auf der Website nicht gefunden'.
-    # VW-Fix: wenn schon ein leerer entry existiert (URL gesetzt, aber
-    # fetch hat 0/Mini-Text geliefert), in-place updaten statt duplizieren.
-    filled = 0
-    for dt in missing:
-        existing = next((e for e in doc_entries
-                         if e.get("doc_type") == dt), None)
-        new_entry: dict = existing if existing else {
-            "doc_type": dt, "url": "", "text": "", "word_count": 0,
-            "auto_discovered": False, "discovery_attempted": True,
-            "cmp_payloads": [],
-        }
-        new_entry["discovery_attempted"] = True
-        d = by_type.get(dt)
-        if d:
-            full = d.get("full_text") or d.get("text_preview") or ""
-            # For cookie: prefer the CMP-reconstructed text when it's
-            # substantially richer than the auto-discovered DOM extraction.
-            # BMW homepage CMP yields ~1800 words of authoritative policy;
-            # DOM extraction typically yields ~600 words of site chrome.
-            if dt == "cookie" and disc_cookie_texts:
-                cmp_merged = "\n\n".join(disc_cookie_texts)
-                if len(cmp_merged.split()) > len(full.split()):
-                    logger.info(
-                        "cookie: using CMP-reconstructed text (%d words) "
-                        "instead of DOM (%d words)",
-                        len(cmp_merged.split()), len(full.split()),
-                    )
-                    full = cmp_merged
-            if len(full.split()) >= 100:
-                new_entry["text"] = full
-                # Behalte die original URL als "rejected_url" damit Audit
-                # zeigt 'X war 404, wir haben Y gefunden'.
-                if existing and (existing.get("url") or "").strip() in failed_urls:
-                    new_entry["rejected_url"] = existing.get("url")
-                new_entry["url"] = d.get("url", "")
-                new_entry["word_count"] = len(full.split())
-                new_entry["auto_discovered"] = True
-                if dt == "cookie" and disc_payloads:
-                    new_entry["cmp_payloads"] = disc_payloads
-                doc_texts[dt] = full
-                filled += 1
-                logger.info(
-                    "auto-discovered %s on %s: %s (%d words)%s",
-                    dt, base, d.get("url", "")[:80], new_entry["word_count"],
-                    " [REPLACED failed URL]" if existing else "",
-                )
-        if not existing:
-            doc_entries.append(new_entry)
-
-    logger.info(
-        "auto-discovery: filled %d/%d missing types from %s",
-        filled, len(missing), base,
-    )
-
-
-# Title/URL keywords → canonical doc_type. Order matters: most-specific first.
-_DISCOVERY_RULES: list[tuple[str, tuple[str, ...]]] = [
-    ("cookie",            ("cookie", "kuche", "biscuit", "cookies-")),
-    ("widerruf",          ("widerruf", "rueckgabe", "rückgabe", "cancellation",
-                           "right-of-withdrawal", "ruecktritts", "rücktritts")),
-    ("social_media",      ("social-media", "soziale-medien", "social_media",
-                           "social-media-policy")),
-    # P23: 'terms-and-conditions' kann Allgemeine Geschaeftsbedingungen ODER
-    # Nutzungsbedingungen meinen. Discovery-Funktion klassifiziert spaeter
-    # praeziser per Titel + Inhalt. Hier nur Url-Hint:
-    ("agb",               ("/agb", "geschaeftsbedingungen", "geschäftsbedingungen",
-                           "general-terms")),
-    ("nutzungsbedingungen", ("nutzungsbedingung", "nutzungsbedingungen",
-                              "terms-of-use", "terms-and-conditions",
-                              "nutzungsordnung", "terms-of-service",
-                              "allgemeine-nutzungsbedingungen")),
-    ("dsb",               ("datenschutzbeauftragt", "data-protection-officer",
-                           "dpo-contact", "/dsb")),
-    ("impressum",         ("impressum", "imprint", "legal-notice", "site-notice",
-                           "anbieterkennzeichnung", "legal-disclaimer-pool")),
-    ("dse",               ("data-privacy", "datenschutz", "data-protection",
-                           "privacy-policy", "privacy-notice", "dsgvo",
-                           "data_privacy", "datenschutzinformation")),
-]
-
-
-def _classify_discovered_doc(title: str, url: str) -> str | None:
-    """Map a discovered doc (by its title + URL) to one of our 8 canonical types."""
-    haystack = f"{title} {url}"
-    for canon, keywords in _DISCOVERY_RULES:
-        if any(kw in haystack for kw in keywords):
-            return canon
-    return None
-
-
-async def _check_single(
-    text: str, doc_type: str, label: str, url: str,
-    word_count: int, use_agent: bool,
-    business_scope: set[str] | None = None,
-    business_profile: dict | None = None,
-):
-    """Run regex + MC checks on a single document."""
-    from compliance.services.doc_checks.runner import check_document_completeness
-    from compliance.services.rag_document_checker import check_document_with_controls
-    from .agent_doc_check_routes import CheckItem, DocCheckResult
-
-    # Regex checklist
-    findings = check_document_completeness(text, doc_type, label, url,
-                                           business_profile=business_profile)
-
-    all_checks: list[CheckItem] = []
-    completeness = 0
-    correctness = 0
-
-    for f in findings:
-        if "SCORE" in f.get("code", ""):
-            for c in f.get("all_checks", []):
-                all_checks.append(CheckItem(
-                    id=c["id"], label=c["label"], passed=c["passed"],
-                    severity=c["severity"], matched_text=c.get("matched_text", ""),
-                    level=c.get("level", 1), parent=c.get("parent"),
-                    skipped=c.get("skipped", False), hint=c.get("hint", ""),
-                ))
-            completeness = f.get("completeness_pct", 0)
-            correctness = f.get("correctness_pct", 0)
-
-    # Master Control checks (top 20 by severity to avoid noise)
-    try:
-        # max_controls=0 -> evaluate ALL MCs for this doc_type (DB has
-        # 1874 across 8 types; regex matching is cheap and dominates
-        # well under 1s per doc). Caps remain on the LLM-enrich step
-        # (top-10 FAILs) so cost stays bounded.
-        mc_results = await check_document_with_controls(
-            text, doc_type, label, max_controls=0, use_agent=use_agent,
-            business_scope=business_scope,
-        )
-        if mc_results:
-            for mc in mc_results:
-                all_checks.append(CheckItem(**mc))
-            l2 = [c for c in all_checks if c.level == 2 and not c.skipped]
-            l2_passed = sum(1 for c in l2 if c.passed)
-            correctness = round(l2_passed / len(l2) * 100) if l2 else 0
-    except Exception as e:
-        logger.warning("MC check skipped for %s: %s", label, e)
-
-    # LLM verification of regex fails
-    failed = [c for c in all_checks if not c.passed and not c.skipped and c.hint]
-    if failed:
-        try:
-            from compliance.services.doc_checks.llm_verify import verify_failed_checks
-            overturns = await verify_failed_checks(
-                text,
-                [{"id": c.id, "label": c.label, "hint": c.hint} for c in failed],
-                label,
-            )
-            for c in all_checks:
-                if c.id in overturns and overturns[c.id]["overturned"]:
-                    c.passed = True
-                    c.matched_text = f"[LLM] {overturns[c.id]['evidence']}"
-            l2_active = [c for c in all_checks if c.level == 2 and not c.skipped]
-            l2_passed = sum(1 for c in l2_active if c.passed)
-            if l2_active:
-                correctness = round(l2_passed / len(l2_active) * 100)
-        except Exception as e:
-            logger.warning("LLM verification skipped: %s", e)
-
-    # Cookie-policy only: actively HTTP-probe the Opt-Out + Privacy-Policy
-    # URLs the document advertises. Broken links make individual provider
-    # entries non-compliant under Art. 7(3) DSGVO.
-    if doc_type == "cookie":
-        try:
-            from compliance.services.cookie_link_validator import (
-                extract_links, validate_links, build_check_items,
-            )
-            links = extract_links(text)
-            if links:
-                logger.info("Cookie-link validator: %d urls extracted from %s",
-                            len(links), label)
-                validated = await validate_links(links)
-                for item in build_check_items(validated):
-                    all_checks.append(CheckItem(**item))
-                # Re-compute correctness with the new L2 items
-                l2_active = [c for c in all_checks if c.level == 2 and not c.skipped]
-                l2_passed = sum(1 for c in l2_active if c.passed)
-                if l2_active:
-                    correctness = round(l2_passed / len(l2_active) * 100)
-        except Exception as e:
-            logger.warning("Cookie-link validation skipped for %s: %s", label, e)
-
-    non_score = [f for f in findings if "SCORE" not in f.get("code", "")]
-    return DocCheckResult(
-        label=label, url=url, doc_type=doc_type,
-        word_count=word_count or len(text.split()),
-        completeness_pct=completeness, correctness_pct=correctness,
-        checks=all_checks, findings_count=len(non_score),
-    )
-
-
-def _pad_results_with_missing(
-    results: list,
-    discovery_attempted: set[str] | None = None,
-) -> list:
-    """Ensure every canonical doc_type has an entry in the results list.
-
-    Doc_types the user did not submit AND auto-discovery did not find get
-    a placeholder DocCheckResult. The error message distinguishes:
-      - 'Auf der Website nicht gefunden' (discovery was attempted)
-      - 'Nicht eingereicht' (no submitted URLs to crawl from)
-
-    Preserves the canonical ordering from _ALL_DOC_TYPES so the report
-    layout is stable.
-    """
-    from .agent_doc_check_routes import DocCheckResult
-    attempted = discovery_attempted or set()
-
-    by_type: dict[str, object] = {}
-    for r in results:
-        canon = "dse" if r.doc_type in ("datenschutz", "privacy") else r.doc_type
-        by_type[canon] = r
-
-    ordered: list = []
-    for dt in _ALL_DOC_TYPES:
-        if dt in by_type:
-            ordered.append(by_type[dt])
-            continue
-        if dt in attempted:
-            msg = ("Auf der Website nicht gefunden — bitte URL des "
-                   "Dokuments manuell eintragen, falls vorhanden")
-        else:
-            msg = "Nicht eingereicht — Quelle nicht angegeben"
-        ordered.append(DocCheckResult(
-            label=_doc_type_label(dt),
-            url="",
-            doc_type=dt,
-            word_count=0,
-            completeness_pct=0,
-            correctness_pct=0,
-            checks=[],
-            findings_count=0,
-            error=msg,
-            scenario="missing",
-        ))
-
-    extras = [r for r in results
-              if (r.doc_type if r.doc_type not in ("datenschutz", "privacy") else "dse")
-              not in _ALL_DOC_TYPES]
-    ordered.extend(extras)
-    return ordered
-
-
-_COMPOUND_TLDS = {
-    "co.uk", "co.jp", "co.nz", "co.kr", "co.za", "co.in",
-    "com.au", "com.br", "com.mx", "com.tr", "com.sg",
-}
-
-
-def _extract_domain(doc_entries: list[dict]) -> str | None:
-    """Extract base domain (without www) from first URL."""
-    for entry in doc_entries:
-        url = entry.get("url", "")
-        if url and "://" in url:
-            from urllib.parse import urlparse
-            host = urlparse(url).netloc.lower()
-            if host.startswith("www."):
-                host = host[4:]
-            return host or None
-    return None
-
-
-def _company_name_from_url(doc_entries: list[dict]) -> str | None:
-    """Derive a display company name from the entered URLs.
-
-    Heuristic: take the second-level domain (e.g. "bmw" from "www.bmw.de"),
-    uppercase short acronyms (<=4 chars, no hyphens), title-case the rest.
-
-    Examples:
-      www.bmw.de              -> BMW
-      mercedes-benz.de        -> Mercedes-Benz
-      shop.example.co.uk      -> Example
-      juris.de                -> Juris
-    """
-    from urllib.parse import urlparse
-
-    for entry in doc_entries:
-        url = entry.get("url", "")
-        if not url or "://" not in url:
-            continue
-        host = urlparse(url).netloc.lower()
-        if host.startswith("www."):
-            host = host[4:]
-        parts = host.split(".")
-        if len(parts) < 2:
-            continue
-        # Handle compound TLDs (.co.uk etc.)
-        if len(parts) >= 3 and ".".join(parts[-2:]) in _COMPOUND_TLDS:
-            sld = parts[-3]
-        else:
-            sld = parts[-2]
-        if not sld:
-            continue
-        if len(sld) <= 4 and "-" not in sld:
-            return sld.upper()
-        return "-".join(p.capitalize() for p in sld.split("-"))
-    return None
-
-
-def _get_skip_types(profile) -> dict[str, str]:
-    """Doc_types to skip entirely with a per-type reason message.
-
-    Heute primaer fuer OEM-Konfigurator-Pattern (BMW/Audi/Mercedes):
-    wenn die Site kein Direkt-Vertrieb macht, sind AGB/Widerruf/
-    Nutzungsbedingungen nicht Pflicht auf der Website — sie werden
-    beim Vertragshaendler ausgehaendigt.
-    """
-    if getattr(profile, "no_direct_sales", False):
-        msg = (
-            "Nicht anwendbar — die Webseite schliesst keinen Direkt-"
-            "Kaufvertrag (OEM-Konfigurator-Pattern, Vertrag laeuft "
-            "ueber Vertragshaendler). AGB/Widerruf werden beim "
-            "Haendler ausgehaendigt."
-        )
-        return {
-            "agb": msg,
-            "widerruf": msg,
-            "nutzungsbedingungen": msg,
-        }
-    return {}
-
-
-def _apply_profile_filter(result, profile, doc_type: str):
-    """Adjust INFO-level checks based on business profile context.
-
-    For example: ODR check only relevant for B2C online shops.
-    """
-    from .agent_doc_check_routes import CheckItem
-
-    for check in result.checks:
-        cid = check.id.lower()
-
-        # ODR/OS-Link: relevant ONLY for B2C online shops. The check's
-        # default hint is written for B2B (it explains why it's not
-        # relevant) — for B2C we must replace it with action-oriented
-        # guidance, otherwise the report contradicts itself.
-        if "odr" in cid or "os-link" in cid or "streitbeilegung" in check.label.lower():
-            if profile.needs_odr:
-                if not check.passed:
-                    check.hint = (
-                        "Als B2C-Anbieter muessen Sie nach Art. 14 EU-VO 524/2013 "
-                        "auf die OS-Plattform (https://ec.europa.eu/consumers/odr) "
-                        "verlinken — klickbarer Link, nicht nur Text. Zusaetzlich "
-                        "§36 VSBG: angeben, ob Sie an Verbraucher-"
-                        "Streitbeilegungsverfahren teilnehmen (oder nicht)."
-                    )
-            else:
-                check.skipped = True
-                check.hint = "Nicht relevant (kein B2C Online-Shop)"
-
-        # Widerruf: Flag entire document as unnecessary for B2B
-        if doc_type == "widerruf" and profile.business_type not in ("b2c", "unknown"):
-            check.severity = "INFO"
-            if not check.passed:
-                check.hint = (
-                    "Als B2B-Unternehmen benoetigen Sie keine Widerrufsbelehrung "
-                    "(§355 BGB gilt nur fuer Verbrauchervertraege). "
-                    "Empfehlung: Entfernen Sie die Widerrufsbelehrung von "
-                    "Ihrer Website, da sie Verwirrung stiften kann."
-                )
-
-        # Regulated profession: check for Kammer info
-        if "kammer" in cid or "berufsordnung" in check.label.lower():
-            if not profile.is_regulated_profession:
-                check.skipped = True
-                check.hint = "Nicht relevant (kein regulierter Beruf)"
-
-    return result
-
-
-# ── Helpers ──────────────────────────────────────────────────────────
-
-_DOC_TYPE_LABELS = {
-    "dse": "Datenschutzerklaerung",
-    "datenschutz": "Datenschutzerklaerung",
-    "privacy": "Datenschutzerklaerung",
-    "impressum": "Impressum",
-    "agb": "AGB",
-    "widerruf": "Widerrufsbelehrung",
-    "cookie": "Cookie-Richtlinie",
-    "avv": "Auftragsverarbeitung",
-    "loeschkonzept": "Loeschkonzept",
-    "dsfa": "Datenschutz-Folgenabschaetzung",
-    "social_media": "Social Media Datenschutz",
-    "nutzungsbedingungen": "Nutzungsbedingungen",
-    "dsb": "DSB-Kontakt",
-    # P74: Legal-Notice / Rechtliche Hinweise (IP, Forward-Looking, Risiko)
-    "legal_notice": "Rechtliche Hinweise",
-    # P96: Digital Services Act-Pflichtangaben (Art. 12+17 DSA)
-    "dsa": "DSA-Pflichtangaben",
-    # P97: Lizenzhinweise Dritter (OSS-Compliance)
-    "lizenzhinweise": "Lizenzhinweise Dritter",
-}
-
-# Canonical doc types in the same order as the frontend ComplianceCheckTab.
-# The route pads `results` to always contain an entry for each — even if
-# the user did not submit a URL — so the email + frontend always show
-# the complete checklist (missing rows marked as 'Nicht eingereicht').
-#
-# DSB-Kontakt is intentionally NOT canonical: per GDPR practice the DSB is
-# named *inside* the DSI/datenschutz document (email or contact block), not
-# as a separate page. We check 'DSB benannt' as a sub-check of the DSE
-# instead. If a tenant insists on a separate DSB document, they can still
-# submit one — it just won't appear as a missing checklist row.
-_ALL_DOC_TYPES = [
-    "dse", "impressum", "social_media", "cookie",
-    "agb", "nutzungsbedingungen", "widerruf",
-]
-
-
-def _doc_type_label(doc_type: str) -> str:
-    return _DOC_TYPE_LABELS.get(doc_type, doc_type.upper())
-
-
-def _result_to_dict(r) -> dict:
-    """Convert DocCheckResult to JSON-serializable dict."""
-    fields = ("id", "label", "passed", "severity", "matched_text",
-              "level", "parent", "skipped", "hint")
-    return {
-        "label": r.label, "url": r.url, "doc_type": r.doc_type,
-        "word_count": r.word_count, "completeness_pct": r.completeness_pct,
-        "correctness_pct": r.correctness_pct,
-        "checks": [{f: getattr(c, f) for f in fields} for c in r.checks],
-        "findings_count": r.findings_count, "error": r.error,
-        "scenario": getattr(r, "scenario", ""),
-    }
-
-
-def _build_profile_html(profile) -> str:
-    from .agent_doc_check_report import build_profile_html
-    return build_profile_html(profile)
-
-
-# Cross-check extracted to compliance.services.banner_cookie_cross_check
-from compliance.services.banner_cookie_cross_check import cross_check_banner_vs_cookie as _cross_check_banner_vs_cookie
-
-
 # ── Admin: audit drill-down (A5) + trend view (A6) ──────────────────
 
 @router.get("/audit/{check_id}")
diff --git a/backend-compliance/compliance/services/consent_reachability_check.py b/backend-compliance/compliance/services/consent_reachability_check.py
new file mode 100644
index 00000000..d47d61fc
--- /dev/null
+++ b/backend-compliance/compliance/services/consent_reachability_check.py
@@ -0,0 +1,278 @@
+"""
+B1 — Cookie-Consent-UX-001: Mobile Reachability of Consent Settings.
+
+DSGVO Art. 7 Abs. 3 requires that withdrawing consent must be as
+easy as giving it. EDPB Cookie Banner Taskforce Report (2023) and
+DSK OH Digitale Dienste v1.2 (2024) both demand a permanent, directly
+reachable way to change cookie preferences — typically a Footer link
+labelled "Cookie-Einstellungen" that re-opens the CMP in place.
+
+Common anti-patterns we want to flag:
+  - Footer points to a Cookie-Policy *page* in a new tab, no CMP
+  - Footer only offers "more info" but no "manage settings"
+  - Only mention is a verbal reference to browser settings inside the
+    privacy-policy text
+  - Mobile footer hides the link in a multi-level accordion
+
+This module does the STATIC HTML analysis. The dynamic part (mobile
+viewport rendering, tap-target measurement, click-behaviour
+verification) is performed by consent-tester via Playwright and feeds
+back into `evaluate_combined` in a later phase.
+
+Pure module — no DB, no network. Tests live in
+tests/test_consent_reachability_check.py.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from html.parser import HTMLParser
+from urllib.parse import urljoin, urlparse
+
+logger = logging.getLogger(__name__)
+
+# Phrases that suggest "open the consent manager" rather than "show
+# more info / open a policy page".
+_REOPEN_PHRASES = (
+    "cookie-einstellungen", "cookie einstellungen",
+    "cookie-präferenzen", "cookie praeferenzen", "cookie-praferenzen",
+    "cookie-einwilligung", "einwilligung verwalten",
+    "consent manager", "consent settings", "consent-einstellungen",
+    "datenschutz-einstellungen", "datenschutzeinstellungen",
+    "cookies verwalten", "manage cookies", "manage preferences",
+    "privacy settings", "privacy preferences",
+    "tracking-einstellungen",
+)
+
+# Weaker — these usually point at a policy page, not the CMP itself.
+_INFO_ONLY_PHRASES = (
+    "cookie-richtlinie", "cookie richtlinie", "cookie-policy",
+    "cookie policy", "cookies (information)",
+    "datenschutz", "datenschutzerklärung", "privacy policy",
+    "weitere informationen", "more information",
+)
+
+# Phrases that try to shift the burden to the user's browser —
+# Bundesländer-Datenschutzbeauftragte explicitly call this insufficient.
+_BROWSER_DEFLECTION_PHRASES = (
+    "browser-einstellungen", "browsereinstellungen",
+    "einstellungen ihres browsers", "browser settings",
+    "in ihrem browser", "über ihren browser",
+)
+
+
+class _AnchorCollector(HTMLParser):
+    """Collects <a> and <button> elements with text + attrs.
+
+    Track footer scope via a depth counter so we only return anchors
+    that are descendants of <footer> (or have role="contentinfo" /
+    id|class containing 'footer').
+    """
+
+    def __init__(self) -> None:
+        super().__init__(convert_charrefs=True)
+        self._footer_depth = 0
+        self._current: dict | None = None
+        self._text_chunks: list[str] = []
+        self.anchors: list[dict] = []
+
+    def _is_footer_open(self, tag: str, attrs: dict) -> bool:
+        if tag == "footer":
+            return True
+        if attrs.get("role", "").lower() == "contentinfo":
+            return True
+        ident = (attrs.get("id", "") + " " + attrs.get("class", "")).lower()
+        return "footer" in ident or "site-footer" in ident
+
+    def handle_starttag(self, tag, attrs):
+        a = {k.lower(): (v or "") for k, v in attrs}
+        if self._is_footer_open(tag, a):
+            self._footer_depth += 1
+            return
+        if self._footer_depth > 0 and tag in ("a", "button"):
+            self._current = {
+                "tag": tag,
+                "href": a.get("href", ""),
+                "target": a.get("target", ""),
+                "aria_label": a.get("aria-label", ""),
+                "data_cmp": a.get("data-cmp", ""),
+                "onclick": a.get("onclick", ""),
+                "id": a.get("id", ""),
+                "class": a.get("class", ""),
+            }
+            self._text_chunks = []
+
+    def handle_endtag(self, tag):
+        if tag == "footer" and self._footer_depth > 0:
+            self._footer_depth -= 1
+        elif self._current and tag == self._current["tag"]:
+            txt = " ".join(self._text_chunks).strip()
+            self._current["text"] = re.sub(r"\s+", " ", txt)[:200]
+            self.anchors.append(self._current)
+            self._current = None
+            self._text_chunks = []
+
+    def handle_data(self, data):
+        if self._current is not None:
+            self._text_chunks.append(data)
+
+
+def find_consent_anchors_in_footer(html: str) -> list[dict]:
+    """Parse <a> / <button> elements in <footer> and tag those that
+    look related to cookie/consent management.
+
+    Each returned dict:
+      { tag, href, target, text, aria_label, onclick, id, class,
+        intent }
+    where intent ∈ {"reopen_cmp", "info_only", "browser_deflect",
+                    "unrelated"}.
+    """
+    if not html:
+        return []
+    parser = _AnchorCollector()
+    try:
+        parser.feed(html)
+    except Exception as e:  # malformed HTML — recover silently
+        logger.warning("footer parser failed: %s", e)
+        return []
+    out: list[dict] = []
+    for a in parser.anchors:
+        label = " ".join([
+            a.get("text", ""), a.get("aria_label", ""),
+            a.get("data_cmp", ""), a.get("onclick", ""),
+        ]).lower()
+        intent = "unrelated"
+        if any(p in label for p in _REOPEN_PHRASES):
+            intent = "reopen_cmp"
+        elif any(p in label for p in _BROWSER_DEFLECTION_PHRASES):
+            intent = "browser_deflect"
+        elif any(p in label for p in _INFO_ONLY_PHRASES):
+            intent = "info_only"
+        if intent != "unrelated":
+            a["intent"] = intent
+            out.append(a)
+    return out
+
+
+def classify_anchor_target(
+    anchor: dict, base_url: str,
+) -> str:
+    """Decide whether the anchor would open the CMP in place or
+    navigate elsewhere.
+
+    Returns:
+      "same_page_cmp"  — onclick / data-cmp / data-* / hash-only link
+      "same_origin"    — relative link or same-origin page (still a
+                         navigation away from the live banner)
+      "external"       — link to a different origin
+      "new_tab"        — target="_blank" or rel*=external
+      "javascript"     — javascript: link, probably a CMP trigger
+    """
+    href = (anchor.get("href") or "").strip()
+    target = (anchor.get("target") or "").strip().lower()
+    onclick = anchor.get("onclick", "") or ""
+    data_cmp = anchor.get("data_cmp", "") or ""
+
+    if data_cmp or onclick:
+        return "same_page_cmp"
+    if href.startswith("javascript:"):
+        return "javascript"
+    if target == "_blank":
+        return "new_tab"
+    if not href or href.startswith("#"):
+        return "same_page_cmp"
+
+    base_host = urlparse(base_url).netloc.lower() if base_url else ""
+    try:
+        target_host = urlparse(urljoin(base_url or "/", href)).netloc.lower()
+    except Exception:
+        target_host = ""
+    if not target_host or target_host == base_host:
+        return "same_origin"
+    return "external"
+
+
+def evaluate_reachability(
+    footer_html: str,
+    base_url: str = "",
+) -> dict:
+    """Run static reachability analysis on a footer HTML fragment.
+
+    Returns a finding dict for the COOKIE-CONSENT-UX-001 check.
+    """
+    anchors = find_consent_anchors_in_footer(footer_html)
+    has_reopen_anchor = False
+    reopen_anchor: dict | None = None
+    info_only_count = 0
+    browser_deflect_count = 0
+    for a in anchors:
+        intent = a.get("intent")
+        if intent == "reopen_cmp":
+            has_reopen_anchor = True
+            target_class = classify_anchor_target(a, base_url)
+            a["target_class"] = target_class
+            if reopen_anchor is None:
+                reopen_anchor = a
+        elif intent == "info_only":
+            info_only_count += 1
+        elif intent == "browser_deflect":
+            browser_deflect_count += 1
+
+    result: dict = {
+        "check_id": "COOKIE-CONSENT-UX-001",
+        "anchors_total": len(anchors),
+        "has_reopen_anchor": has_reopen_anchor,
+        "info_only_count": info_only_count,
+        "browser_deflect_count": browser_deflect_count,
+        "reopen_anchor": reopen_anchor,
+        "passed": True,
+        "severity": None,
+        "severity_reason": None,
+        "evidence_phrases": [],
+        "notes": [],
+    }
+
+    # Hard fail: no reopen anchor at all → withdrawal not as easy as
+    # opt-in (Art. 7 Abs. 3 DSGVO).
+    if not has_reopen_anchor:
+        result["passed"] = False
+        result["severity"] = "HIGH"
+        result["severity_reason"] = "missing"
+        result["notes"].append(
+            "no consent-manager link in footer; withdrawal path "
+            "missing or only indirect",
+        )
+
+    # Soft fail: anchor exists but opens in new tab — DSK OH calls this
+    # an avoidable hurdle. MEDIUM rather than HIGH because withdrawal
+    # is technically still possible.
+    if has_reopen_anchor and reopen_anchor is not None:
+        cls = reopen_anchor.get("target_class")
+        if cls == "new_tab":
+            result["passed"] = False
+            result["severity"] = "MEDIUM"
+            result["severity_reason"] = "misclassified"
+            result["notes"].append(
+                "consent-manager link opens in new tab — context-break",
+            )
+        elif cls == "external":
+            result["passed"] = False
+            result["severity"] = "MEDIUM"
+            result["severity_reason"] = "misclassified"
+            result["notes"].append(
+                "consent-manager link points to external host",
+            )
+
+    # Extra signal: only browser-deflection phrases and zero reopen
+    # anchor — this is the worst variant the LfDI BaWü explicitly
+    # flagged.
+    if (not has_reopen_anchor and browser_deflect_count > 0):
+        result["severity"] = "HIGH"
+        result["severity_reason"] = "factually_wrong"
+        result["notes"].append(
+            "withdrawal route only via browser-settings — not gleich "
+            "einfach wie Erteilung",
+        )
+
+    return result
diff --git a/backend-compliance/compliance/services/evidence_zip_builder.py b/backend-compliance/compliance/services/evidence_zip_builder.py
new file mode 100644
index 00000000..a960d256
--- /dev/null
+++ b/backend-compliance/compliance/services/evidence_zip_builder.py
@@ -0,0 +1,119 @@
+"""
+Evidence ZIP Builder — bundles cookie-evidence slices into one ZIP
+suitable as email attachment for the audit trail.
+
+Why: capture_cookie_evidence_slices() produces N PNG slices per check
+with timestamps + per-slice SHA256. Without an attachment to the
+compliance report, the evidence chain stops at the backend. The ZIP
+makes the slices portable so a DSB / lawyer can hand them to an
+auditor or supervisory authority.
+
+ZIP layout:
+  evidence.zip
+  ├── manifest.json          # per-slice metadata
+  ├── audit_metadata.json    # run-level (check_id, url, build_sha, ...)
+  └── slice_001.png ...      # binary PNG per slice
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import json
+import logging
+import os
+import zipfile
+from datetime import datetime, timezone
+
+logger = logging.getLogger(__name__)
+
+
+def build_evidence_zip(
+    slices: list[dict],
+    meta: dict | None = None,
+    check_id: str = "",
+) -> bytes:
+    """Build a ZIP archive with all slices + a manifest.
+
+    Args:
+      slices: list of dicts from capture_cookie_evidence_slices():
+        each {"idx", "ts", "top_y", "bot_y", "sha256", "png_b64",
+              "png_size"}
+      meta: run-level dict from the same call:
+        {"total_height_px", "width_px", "accepted_banner",
+         "expanded", "url", "captured_at", "slice_count"}
+      check_id: the compliance-check job id
+
+    Returns:
+      raw ZIP bytes (suitable as email attachment payload)
+    """
+    buf = io.BytesIO()
+    manifest_slices: list[dict] = []
+
+    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
+        for s in slices or []:
+            idx = int(s.get("idx", 0))
+            fname = f"slice_{idx + 1:03d}.png"
+            try:
+                png = base64.b64decode(s.get("png_b64", ""))
+            except Exception as e:
+                logger.warning(
+                    "evidence_zip: skip slice %s, b64 decode failed: %s",
+                    idx, e,
+                )
+                continue
+            zf.writestr(fname, png)
+            manifest_slices.append({
+                "filename": fname,
+                "slice_idx": idx,
+                "captured_at": s.get("ts", ""),
+                "top_y_px": s.get("top_y"),
+                "bot_y_px": s.get("bot_y"),
+                "sha256_short": s.get("sha256", ""),
+                "png_size_bytes": s.get("png_size", len(png)),
+            })
+
+        manifest = {
+            "schema_version": "1.0",
+            "check_id": check_id,
+            "slices": manifest_slices,
+            "slice_count": len(manifest_slices),
+        }
+        zf.writestr(
+            "manifest.json",
+            json.dumps(manifest, indent=2, ensure_ascii=False),
+        )
+
+        audit_meta = {
+            "schema_version": "1.0",
+            "check_id": check_id,
+            "build_sha": os.environ.get("BUILD_SHA", "unknown"),
+            "generated_at": datetime.now(timezone.utc).isoformat(),
+            "source_url": (meta or {}).get("url", ""),
+            "captured_at": (meta or {}).get("captured_at", ""),
+            "accepted_banner": (meta or {}).get("accepted_banner"),
+            "expanded": (meta or {}).get("expanded"),
+            "total_height_px": (meta or {}).get("total_height_px"),
+            "width_px": (meta or {}).get("width_px"),
+            "slice_count": (meta or {}).get(
+                "slice_count", len(manifest_slices),
+            ),
+            "note": (
+                "Each slice_NNN.png is an overlapping screenshot fragment "
+                "of the cookie policy page captured at captured_at. "
+                "sha256_short is the first 16 hex chars of the SHA-256 of "
+                "the raw PNG bytes — use it to verify the slice was not "
+                "modified after capture."
+            ),
+        }
+        zf.writestr(
+            "audit_metadata.json",
+            json.dumps(audit_meta, indent=2, ensure_ascii=False),
+        )
+
+    data = buf.getvalue()
+    logger.info(
+        "evidence_zip built: %d slices, %d bytes, check_id=%s",
+        len(manifest_slices), len(data), check_id,
+    )
+    return data
diff --git a/backend-compliance/compliance/services/retention_comparator.py b/backend-compliance/compliance/services/retention_comparator.py
new file mode 100644
index 00000000..f1856a25
--- /dev/null
+++ b/backend-compliance/compliance/services/retention_comparator.py
@@ -0,0 +1,362 @@
+"""
+B3 — Cross-Doc Retention Consistency Comparator.
+
+Compares three sources of truth for cookie storage duration:
+
+  1. DSI claim — sentence(s) in the privacy policy mentioning retention
+     ("Die Speicherdauer beträgt 14 Monate", "_ga: 14 Monate", ...).
+  2. Cookie-table — the `duration` field parsed from the cookie policy
+     table (parse_flat_cookie_text / OCR / vendor-extract).
+  3. Actual cookie — `Max-Age` / `Expires` from the real Set-Cookie
+     header captured by the consent-tester.
+
+Output is a per-cookie finding usable by the audit report:
+  - matches=True  → all three sources agree (within tolerance)
+  - matches=False → mismatch with explicit type + severity_reason
+
+Severity hierarchy (see project_audit_report_architecture.md):
+  HIGH/factually_wrong : DSI claim is shorter than reality
+                         → user is told "X" but tracked for longer
+  HIGH/factually_wrong : table duration is shorter than reality
+                         → cookie table understates what is set
+  MEDIUM/misclassified : DSI is shorter than table (internal docs disagree)
+  LOW/incomplete       : only one source has data
+
+The module is pure (no DB, no network) and meant to be called from the
+report pipeline after cookies+DSI+HAR have already been collected.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+# 5% tolerance — Safari ITP, leap years, server clocks etc.
+_MATCH_TOLERANCE_PCT = 5
+
+# Multipliers in DAYS for the German + English unit vocabulary used in
+# our cookie tables and policies.
+_UNIT_DAYS: dict[str, float] = {
+    "sekunden": 1 / 86400, "sekunde": 1 / 86400, "sec": 1 / 86400, "s": 1 / 86400,
+    "minuten": 1 / 1440, "minute": 1 / 1440, "min": 1 / 1440,
+    "stunden": 1 / 24, "stunde": 1 / 24, "h": 1 / 24,
+    "tage": 1, "tag": 1, "d": 1, "day": 1, "days": 1,
+    "wochen": 7, "woche": 7, "week": 7, "weeks": 7,
+    "monate": 30, "monat": 30, "month": 30, "months": 30,
+    "jahre": 365, "jahr": 365, "year": 365, "years": 365,
+}
+
+# Phrases that mean "session" — cookie deleted when browser closes.
+_SESSION_TOKENS = {
+    "session", "sitzung", "sitzungsdauer", "browsersitzung",
+    "browser session", "browsing session", "tab",
+}
+
+# Phrases that mean "persistent without explicit cap".
+_NO_EXPIRY_TOKENS = {
+    "unbegrenzt", "unbestimmt", "kein ablaufdatum",
+    "no expiry", "persistent", "permanent",
+}
+
+
+@dataclass
+class RetentionClaim:
+    """One retention statement found in the DSI text."""
+    sentence: str
+    days: float | None  # None for session/unknown
+    is_session: bool
+    is_persistent: bool
+    context_terms: list[str]  # cookie names / provider names mentioned nearby
+
+
+def parse_duration_to_days(text: str) -> tuple[float | None, str]:
+    """Convert a duration phrase to days.
+
+    Returns (days, kind) where kind ∈
+      {"days", "session", "persistent", "unknown"}.
+    For "session" / "persistent" days is None — comparisons must
+    handle these as special cases, not as 0 or infinity.
+    """
+    if text is None:
+        return None, "unknown"
+    s = text.strip().lower()
+    if not s:
+        return None, "unknown"
+
+    for tok in _SESSION_TOKENS:
+        if tok in s:
+            return None, "session"
+    for tok in _NO_EXPIRY_TOKENS:
+        if tok in s:
+            return None, "persistent"
+
+    # "14 Monate", "1 Jahr", "24h", "30 Tage", "365 Tage", "30d"
+    m = re.search(
+        r"(?P<num>\d+(?:[.,]\d+)?)\s*(?P<unit>"
+        r"sekunden?|sec|s|minuten?|min|stunden?|h|"
+        r"tage?|d(?:ays?)?|wochen?|weeks?|"
+        r"monate?|months?|jahre?|years?)\b",
+        s,
+    )
+    if not m:
+        return None, "unknown"
+    num = float(m.group("num").replace(",", "."))
+    unit = m.group("unit")
+    mult = _UNIT_DAYS.get(unit)
+    if mult is None:
+        return None, "unknown"
+    return num * mult, "days"
+
+
+def max_age_to_days(max_age_seconds: int | float | None) -> float | None:
+    """Convert a Set-Cookie Max-Age (in seconds) to days."""
+    if max_age_seconds is None:
+        return None
+    try:
+        return float(max_age_seconds) / 86400.0
+    except (TypeError, ValueError):
+        return None
+
+
+# Sentence splitter that respects German legal text style (lots of
+# semicolons + parentheses but few capitalised abbreviations).
+_SENTENCE_SPLIT = re.compile(r"(?<=[.!?])\s+(?=[A-ZÄÖÜ])")
+
+# Quick anchor terms for retention sentences.
+_RETENTION_ANCHORS = (
+    "speicherdauer", "speicherfrist", "speicher",
+    "aufbewahrungsdauer", "aufbewahrungsfrist",
+    "löschfrist", "löschung",
+    "gespeichert für", "wird gespeichert", "wird für",
+    "retention", "expires", "expiration", "lifetime",
+    "gültigkeit", "laufzeit",
+)
+
+
+def _looks_like_retention(sentence: str) -> bool:
+    s = sentence.lower()
+    if not any(a in s for a in _RETENTION_ANCHORS):
+        return False
+    # Need a unit token nearby — otherwise it's metadata not duration.
+    return bool(re.search(
+        r"\b\d[\d.,]*\s*("
+        r"sekunden?|minuten?|stunden?|tage?|wochen?|"
+        r"monate?|jahre?|sec|min|h|d|"
+        r"weeks?|months?|years?|days?)\b",
+        s,
+    ))
+
+
+def extract_retention_claims(
+    dsi_text: str,
+    cookie_names: list[str] | None = None,
+    vendor_names: list[str] | None = None,
+) -> list[RetentionClaim]:
+    """Find sentences in the DSI that state a retention period.
+
+    cookie_names / vendor_names attach themselves to a sentence when
+    they are mentioned in it; the comparator uses this to prefer the
+    most specific claim available for a given cookie.
+    """
+    if not dsi_text:
+        return []
+    cookie_names = cookie_names or []
+    vendor_names = vendor_names or []
+    # Normalise — keep original case for the sentence so it can be
+    # cited verbatim in the audit report.
+    sentences = _SENTENCE_SPLIT.split(dsi_text)
+    claims: list[RetentionClaim] = []
+    for raw in sentences:
+        s = raw.strip()
+        if not s:
+            continue
+        if not _looks_like_retention(s):
+            continue
+        days, kind = parse_duration_to_days(s)
+        lower = s.lower()
+        contexts: list[str] = []
+        for n in cookie_names:
+            if n and n.lower() in lower:
+                contexts.append(n)
+        for v in vendor_names:
+            if v and v.lower() in lower:
+                contexts.append(v)
+        claims.append(RetentionClaim(
+            sentence=s[:400],
+            days=days,
+            is_session=(kind == "session"),
+            is_persistent=(kind == "persistent"),
+            context_terms=contexts,
+        ))
+    return claims
+
+
+def _best_dsi_claim(
+    claims: list[RetentionClaim],
+    cookie_name: str,
+    vendor_name: str | None,
+) -> RetentionClaim | None:
+    """Pick the most specific DSI claim for a given cookie.
+
+    Priority: claim that mentions the cookie name > claim that mentions
+    the vendor > generic (no context).
+    """
+    if not claims:
+        return None
+    by_cookie = [c for c in claims if cookie_name and cookie_name in c.context_terms]
+    if by_cookie:
+        return by_cookie[0]
+    if vendor_name:
+        by_vendor = [c for c in claims if vendor_name in c.context_terms]
+        if by_vendor:
+            return by_vendor[0]
+    generic = [c for c in claims if not c.context_terms]
+    return generic[0] if generic else claims[0]
+
+
+def _within_tolerance(a: float, b: float) -> bool:
+    if a == 0 and b == 0:
+        return True
+    base = max(abs(a), abs(b))
+    return abs(a - b) <= base * (_MATCH_TOLERANCE_PCT / 100.0)
+
+
+def compare_retention(
+    cookie_name: str,
+    table_duration: str | None,
+    actual_max_age_seconds: int | float | None,
+    dsi_claims: list[RetentionClaim] | None = None,
+    vendor_name: str | None = None,
+) -> dict:
+    """Per-cookie three-way retention comparison.
+
+    Returns a finding dict suitable for the audit-report aggregator
+    (theme = TH-RETENTION). Output schema is stable — extending it must
+    be additive so existing tests stay green.
+    """
+    table_days, table_kind = parse_duration_to_days(table_duration or "")
+    actual_days = max_age_to_days(actual_max_age_seconds)
+    dsi_claim = _best_dsi_claim(
+        dsi_claims or [], cookie_name, vendor_name,
+    )
+    dsi_days = dsi_claim.days if dsi_claim else None
+
+    out: dict = {
+        "cookie_name": cookie_name,
+        "vendor_name": vendor_name,
+        "table_duration_raw": table_duration,
+        "table_days": table_days,
+        "table_kind": table_kind,
+        "actual_max_age_seconds": actual_max_age_seconds,
+        "actual_days": actual_days,
+        "dsi_days": dsi_days,
+        "dsi_sentence": dsi_claim.sentence if dsi_claim else None,
+        "dsi_context_terms": dsi_claim.context_terms if dsi_claim else [],
+        "matches": True,
+        "mismatch_type": None,
+        "severity_reason": None,
+        "severity": None,
+        "diff_days": None,
+        "notes": [],
+    }
+
+    sources = [v for v in (table_days, actual_days, dsi_days) if v is not None]
+    if len(sources) <= 1:
+        out["severity_reason"] = "incomplete"
+        out["severity"] = "LOW"
+        out["notes"].append("only_one_source_has_data")
+        return out
+
+    # Highest-severity check first: DSI claim is shorter than the cookie
+    # actually lives — user was misled.
+    if dsi_days is not None and actual_days is not None:
+        if not _within_tolerance(dsi_days, actual_days):
+            if dsi_days < actual_days:
+                out["matches"] = False
+                out["mismatch_type"] = "dsi_under_actual"
+                out["severity_reason"] = "factually_wrong"
+                out["severity"] = "HIGH"
+                out["diff_days"] = actual_days - dsi_days
+
+    # Cookie table understates reality — second highest.
+    if (out["matches"] and table_days is not None
+            and actual_days is not None):
+        if not _within_tolerance(table_days, actual_days):
+            if table_days < actual_days:
+                out["matches"] = False
+                out["mismatch_type"] = "table_under_actual"
+                out["severity_reason"] = "factually_wrong"
+                out["severity"] = "HIGH"
+                out["diff_days"] = actual_days - table_days
+
+    # Internal disagreement DSI vs. table (less severe — both are
+    # documentation, neither contradicts the live cookie).
+    if (out["matches"] and dsi_days is not None and table_days is not None):
+        if not _within_tolerance(dsi_days, table_days):
+            out["matches"] = False
+            out["mismatch_type"] = "dsi_vs_table"
+            out["severity_reason"] = "misclassified"
+            out["severity"] = "MEDIUM"
+            out["diff_days"] = abs(dsi_days - table_days)
+
+    # Catch over-declaration too — table says "2 years" but cookie
+    # lives 7 days (Safari ITP). Less severe but worth flagging.
+    if (out["matches"] and table_days is not None
+            and actual_days is not None):
+        if (not _within_tolerance(table_days, actual_days)
+                and table_days > actual_days):
+            out["matches"] = False
+            out["mismatch_type"] = "actual_under_table"
+            out["severity_reason"] = "incomplete"
+            out["severity"] = "LOW"
+            out["notes"].append("possible_safari_itp_cap")
+            out["diff_days"] = table_days - actual_days
+
+    return out
+
+
+def build_retention_theme_summary(
+    findings: list[dict],
+) -> dict:
+    """Aggregate per-cookie findings into the per-theme block used by
+    the report (theme = TH-RETENTION).
+    """
+    total = len(findings)
+    incomplete = sum(
+        1 for f in findings if f.get("severity_reason") == "incomplete"
+    )
+    # Incomplete findings keep matches=True (we did not observe a
+    # mismatch), but they don't count as a verified pass either.
+    passed = sum(
+        1 for f in findings
+        if f.get("matches") and f.get("severity_reason") != "incomplete"
+    )
+    failed = total - passed - incomplete
+    by_severity: dict[str, int] = {}
+    by_type: dict[str, int] = {}
+    for f in findings:
+        sev = f.get("severity")
+        if sev:
+            by_severity[sev] = by_severity.get(sev, 0) + 1
+        mt = f.get("mismatch_type")
+        if mt:
+            by_type[mt] = by_type.get(mt, 0) + 1
+    return {
+        "theme_id": "TH-RETENTION",
+        "total": total,
+        "passed": passed,
+        "failed": failed,
+        "incomplete": incomplete,
+        "pct": int(round(100 * passed / total)) if total else 0,
+        "by_severity": by_severity,
+        "by_mismatch_type": by_type,
+        "top_fails": sorted(
+            (f for f in findings
+             if not f.get("matches")
+             and f.get("severity_reason") == "factually_wrong"),
+            key=lambda f: -(f.get("diff_days") or 0),
+        )[:10],
+    }
diff --git a/backend-compliance/compliance/services/smtp_sender.py b/backend-compliance/compliance/services/smtp_sender.py
index 973c096a..d9af5860 100644
--- a/backend-compliance/compliance/services/smtp_sender.py
+++ b/backend-compliance/compliance/services/smtp_sender.py
@@ -8,9 +8,13 @@ Uses standard smtplib. Configuration via environment variables:
   SMTP_FROM_ADDR (default: compliance@breakpilot.local)
 """
 
+from __future__ import annotations
+
 import logging
 import os
 import smtplib
+from email import encoders
+from email.mime.base import MIMEBase
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 
@@ -28,22 +32,54 @@ def send_email(
     body_html: str,
     from_addr: str | None = None,
     from_name: str | None = None,
+    attachments: list[dict] | None = None,
 ) -> dict:
-    """Send an email via SMTP. Returns dict with status and message_id."""
+    """Send an email via SMTP. Returns dict with status and message_id.
+
+    attachments: optional list of dicts:
+      [{"filename": "evidence.zip", "data": <bytes>,
+        "mime": "application/zip"}, ...]
+    """
     sender_addr = from_addr or SMTP_FROM_ADDR
     sender_name = from_name or SMTP_FROM_NAME
 
-    msg = MIMEMultipart("alternative")
+    if attachments:
+        msg = MIMEMultipart("mixed")
+        body = MIMEMultipart("alternative")
+        body.attach(MIMEText(body_html, "html", "utf-8"))
+        msg.attach(body)
+        for att in attachments:
+            mime = att.get("mime", "application/octet-stream")
+            maintype, _, subtype = mime.partition("/")
+            part = MIMEBase(maintype or "application", subtype or "octet-stream")
+            part.set_payload(att.get("data", b""))
+            encoders.encode_base64(part)
+            fname = att.get("filename", "attachment.bin")
+            part.add_header(
+                "Content-Disposition",
+                f'attachment; filename="{fname}"',
+            )
+            msg.attach(part)
+    else:
+        msg = MIMEMultipart("alternative")
+        msg.attach(MIMEText(body_html, "html", "utf-8"))
+
     msg["From"] = f"{sender_name} <{sender_addr}>"
     msg["To"] = recipient
     msg["Subject"] = subject
-    msg.attach(MIMEText(body_html, "html", "utf-8"))
 
     try:
-        with smtplib.SMTP(SMTP_HOST, SMTP_PORT, timeout=10) as server:
+        with smtplib.SMTP(SMTP_HOST, SMTP_PORT, timeout=30) as server:
             server.sendmail(sender_addr, [recipient], msg.as_string())
-        logger.info("Email sent to %s: %s", recipient, subject)
-        return {"status": "sent", "recipient": recipient, "subject": subject}
+        att_count = len(attachments or [])
+        logger.info(
+            "Email sent to %s: %s (attachments=%d)",
+            recipient, subject, att_count,
+        )
+        return {
+            "status": "sent", "recipient": recipient, "subject": subject,
+            "attachments": att_count,
+        }
     except Exception as e:
         logger.error("Failed to send email to %s: %s", recipient, e)
         return {"status": "failed", "recipient": recipient, "error": str(e)}
diff --git a/backend-compliance/tests/test_consent_reachability_check.py b/backend-compliance/tests/test_consent_reachability_check.py
new file mode 100644
index 00000000..b4a69db6
--- /dev/null
+++ b/backend-compliance/tests/test_consent_reachability_check.py
@@ -0,0 +1,153 @@
+"""Tests for B1 static consent-reachability analysis."""
+
+from compliance.services.consent_reachability_check import (
+    classify_anchor_target,
+    evaluate_reachability,
+    find_consent_anchors_in_footer,
+)
+
+
+def _wrap(footer_inner: str) -> str:
+    return (
+        "<html><body>"
+        "<main>some content</main>"
+        f"<footer>{footer_inner}</footer>"
+        "</body></html>"
+    )
+
+
+class TestFindConsentAnchors:
+    def test_finds_reopen_link_german(self):
+        html = _wrap('<a href="#" onclick="UC_UI.showSecondLayer()">'
+                     'Cookie-Einstellungen</a>')
+        anchors = find_consent_anchors_in_footer(html)
+        assert len(anchors) == 1
+        assert anchors[0]["intent"] == "reopen_cmp"
+
+    def test_finds_reopen_button(self):
+        html = _wrap('<button data-cmp="show">Cookies verwalten</button>')
+        anchors = find_consent_anchors_in_footer(html)
+        assert anchors[0]["intent"] == "reopen_cmp"
+
+    def test_info_only_link_to_policy(self):
+        html = _wrap('<a href="/cookie-richtlinie">Cookie-Richtlinie</a>')
+        anchors = find_consent_anchors_in_footer(html)
+        assert len(anchors) == 1
+        assert anchors[0]["intent"] == "info_only"
+
+    def test_browser_deflection_link(self):
+        html = _wrap('<a href="/cookies">Browser-Einstellungen</a>')
+        anchors = find_consent_anchors_in_footer(html)
+        assert anchors[0]["intent"] == "browser_deflect"
+
+    def test_ignores_anchors_outside_footer(self):
+        html = ('<html><body>'
+                '<a href="#">Cookie-Einstellungen</a>'
+                '<footer><a href="/impressum">Impressum</a></footer>'
+                '</body></html>')
+        assert find_consent_anchors_in_footer(html) == []
+
+    def test_role_contentinfo_treated_as_footer(self):
+        html = ('<html><body>'
+                '<div role="contentinfo">'
+                '<a href="#" data-cmp="open">Cookie-Einstellungen</a>'
+                '</div></body></html>')
+        anchors = find_consent_anchors_in_footer(html)
+        assert len(anchors) == 1
+
+    def test_class_with_footer_treated_as_footer(self):
+        html = ('<html><body>'
+                '<div class="site-footer">'
+                '<a href="#" data-cmp="open">Cookies verwalten</a>'
+                '</div></body></html>')
+        anchors = find_consent_anchors_in_footer(html)
+        assert len(anchors) == 1
+
+    def test_empty_html(self):
+        assert find_consent_anchors_in_footer("") == []
+
+    def test_malformed_html(self):
+        # broken markup shouldn't crash
+        anchors = find_consent_anchors_in_footer("<footer><a>foo")
+        # may or may not yield results; must not raise
+        assert isinstance(anchors, list)
+
+
+class TestClassifyAnchorTarget:
+    def test_onclick_classifies_as_cmp(self):
+        a = {"href": "#", "onclick": "showCmp()"}
+        assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
+
+    def test_data_cmp_classifies_as_cmp(self):
+        a = {"href": "#", "data_cmp": "show"}
+        assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
+
+    def test_javascript_link(self):
+        a = {"href": "javascript:void(0)"}
+        assert classify_anchor_target(a, "https://x.de/") == "javascript"
+
+    def test_new_tab(self):
+        a = {"href": "/cookie", "target": "_blank"}
+        assert classify_anchor_target(a, "https://x.de/") == "new_tab"
+
+    def test_hash_only(self):
+        a = {"href": "#cookies"}
+        assert classify_anchor_target(a, "https://x.de/") == "same_page_cmp"
+
+    def test_same_origin_relative(self):
+        a = {"href": "/cookie-richtlinie"}
+        assert classify_anchor_target(a, "https://x.de/") == "same_origin"
+
+    def test_external_origin(self):
+        a = {"href": "https://other.de/policy"}
+        assert classify_anchor_target(a, "https://x.de/") == "external"
+
+
+class TestEvaluateReachability:
+    def test_pass_when_reopen_in_same_page(self):
+        html = _wrap('<a href="#" data-cmp="open">Cookie-Einstellungen</a>')
+        r = evaluate_reachability(html, "https://x.de/")
+        assert r["check_id"] == "COOKIE-CONSENT-UX-001"
+        assert r["passed"] is True
+        assert r["severity"] is None
+        assert r["has_reopen_anchor"] is True
+
+    def test_fail_missing_when_no_reopen(self):
+        html = _wrap('<a href="/cookie-richtlinie">Cookie-Richtlinie</a>')
+        r = evaluate_reachability(html, "https://x.de/")
+        assert r["passed"] is False
+        assert r["severity"] == "HIGH"
+        assert r["severity_reason"] == "missing"
+
+    def test_medium_when_reopen_opens_new_tab(self):
+        # The Elli case: footer link points at cookie policy in a new
+        # tab, no in-place CMP open.
+        html = _wrap(
+            '<a href="/cookie-einstellungen" target="_blank">'
+            'Cookie-Einstellungen</a>'
+        )
+        r = evaluate_reachability(html, "https://x.de/")
+        assert r["passed"] is False
+        assert r["severity"] == "MEDIUM"
+        assert r["severity_reason"] == "misclassified"
+
+    def test_high_when_only_browser_deflection(self):
+        html = _wrap('<a href="/cookies">Browser-Einstellungen</a>')
+        r = evaluate_reachability(html, "https://x.de/")
+        assert r["passed"] is False
+        assert r["severity"] == "HIGH"
+        assert r["severity_reason"] == "factually_wrong"
+
+    def test_empty_footer_is_fail(self):
+        r = evaluate_reachability(_wrap(""), "https://x.de/")
+        assert r["passed"] is False
+        assert r["severity"] == "HIGH"
+
+    def test_reopen_external_origin_is_medium(self):
+        html = _wrap(
+            '<a href="https://privacy.other.com/manage">'
+            'Cookie-Einstellungen</a>'
+        )
+        r = evaluate_reachability(html, "https://x.de/")
+        assert r["passed"] is False
+        assert r["severity"] == "MEDIUM"
diff --git a/backend-compliance/tests/test_retention_comparator.py b/backend-compliance/tests/test_retention_comparator.py
new file mode 100644
index 00000000..e4807e5d
--- /dev/null
+++ b/backend-compliance/tests/test_retention_comparator.py
@@ -0,0 +1,259 @@
+"""Tests for B3 cross-doc retention comparator."""
+
+from compliance.services.retention_comparator import (
+    RetentionClaim,
+    build_retention_theme_summary,
+    compare_retention,
+    extract_retention_claims,
+    max_age_to_days,
+    parse_duration_to_days,
+)
+
+
+class TestParseDurationToDays:
+    def test_months(self):
+        d, k = parse_duration_to_days("14 Monate")
+        assert k == "days"
+        assert d == 14 * 30
+
+    def test_jahre(self):
+        d, k = parse_duration_to_days("2 Jahre")
+        assert k == "days"
+        assert d == 2 * 365
+
+    def test_hours_short(self):
+        d, k = parse_duration_to_days("24h")
+        assert k == "days"
+        assert d == 1.0
+
+    def test_days(self):
+        d, k = parse_duration_to_days("30 Tage")
+        assert k == "days"
+        assert d == 30
+
+    def test_minutes(self):
+        d, k = parse_duration_to_days("1 Minute")
+        assert k == "days"
+        assert abs(d - 1 / 1440) < 1e-9
+
+    def test_session(self):
+        d, k = parse_duration_to_days("Sitzungsdauer")
+        assert k == "session"
+        assert d is None
+
+    def test_session_token(self):
+        d, k = parse_duration_to_days("Session")
+        assert k == "session"
+
+    def test_persistent(self):
+        d, k = parse_duration_to_days("unbegrenzt")
+        assert k == "persistent"
+
+    def test_empty(self):
+        d, k = parse_duration_to_days("")
+        assert k == "unknown"
+        assert d is None
+
+    def test_none(self):
+        d, k = parse_duration_to_days(None)
+        assert k == "unknown"
+        assert d is None
+
+    def test_decimal_comma(self):
+        d, k = parse_duration_to_days("1,5 Jahre")
+        assert k == "days"
+        assert d == 1.5 * 365
+
+
+class TestMaxAgeToDays:
+    def test_one_year(self):
+        assert abs(max_age_to_days(365 * 86400) - 365) < 1e-9
+
+    def test_session_none(self):
+        assert max_age_to_days(None) is None
+
+    def test_bad_input(self):
+        assert max_age_to_days("bad") is None
+
+
+class TestExtractRetentionClaims:
+    def test_finds_global_claim(self):
+        dsi = (
+            "Wir verarbeiten Ihre Daten gemäß Art. 6 DSGVO. "
+            "Die Speicherdauer der Daten beträgt grundsätzlich 6 Monate. "
+            "Danach werden die Daten gelöscht."
+        )
+        claims = extract_retention_claims(dsi)
+        assert len(claims) == 1
+        assert claims[0].days == 6 * 30
+
+    def test_finds_cookie_specific(self):
+        dsi = (
+            "Wir nutzen Google Analytics. "
+            "Das Cookie _ga wird für 14 Monate gespeichert. "
+            "Weitere Hinweise finden Sie unten."
+        )
+        claims = extract_retention_claims(
+            dsi, cookie_names=["_ga"], vendor_names=["Google Analytics"],
+        )
+        assert len(claims) >= 1
+        ga_claim = next(c for c in claims if "_ga" in c.context_terms)
+        assert ga_claim.days == 14 * 30
+
+    def test_ignores_non_retention_sentence(self):
+        dsi = "Wir sind 14 Monate am Markt. Das ist keine Speicherdauer."
+        # "14 Monate" present but no retention anchor — skip.
+        assert extract_retention_claims(dsi) == []
+
+    def test_empty_text(self):
+        assert extract_retention_claims("") == []
+
+
+class TestCompareRetention:
+    def test_match_all_three(self):
+        dsi_claims = [RetentionClaim(
+            sentence="Speicherdauer 14 Monate.",
+            days=14 * 30, is_session=False, is_persistent=False,
+            context_terms=[],
+        )]
+        out = compare_retention(
+            cookie_name="_ga",
+            table_duration="14 Monate",
+            actual_max_age_seconds=14 * 30 * 86400,
+            dsi_claims=dsi_claims,
+        )
+        assert out["matches"] is True
+        assert out["severity"] is None
+
+    def test_dsi_under_actual_is_HIGH(self):
+        # DSI claims 6 months, real cookie lives 14 months.
+        dsi_claims = [RetentionClaim(
+            sentence="Speicherdauer 6 Monate.",
+            days=6 * 30, is_session=False, is_persistent=False,
+            context_terms=[],
+        )]
+        out = compare_retention(
+            cookie_name="_ga",
+            table_duration="14 Monate",
+            actual_max_age_seconds=14 * 30 * 86400,
+            dsi_claims=dsi_claims,
+        )
+        assert out["matches"] is False
+        assert out["mismatch_type"] == "dsi_under_actual"
+        assert out["severity_reason"] == "factually_wrong"
+        assert out["severity"] == "HIGH"
+        assert out["diff_days"] == 14 * 30 - 6 * 30
+
+    def test_table_under_actual_is_HIGH(self):
+        # Table says 7 days, real cookie lives 365 days.
+        out = compare_retention(
+            cookie_name="_fbp",
+            table_duration="7 Tage",
+            actual_max_age_seconds=365 * 86400,
+        )
+        assert out["matches"] is False
+        assert out["mismatch_type"] == "table_under_actual"
+        assert out["severity"] == "HIGH"
+
+    def test_dsi_vs_table_is_MEDIUM(self):
+        # DSI says 6 months, table says 14 months, no actual.
+        dsi_claims = [RetentionClaim(
+            sentence="Speicherdauer 6 Monate.",
+            days=6 * 30, is_session=False, is_persistent=False,
+            context_terms=[],
+        )]
+        out = compare_retention(
+            cookie_name="_ga",
+            table_duration="14 Monate",
+            actual_max_age_seconds=None,
+            dsi_claims=dsi_claims,
+        )
+        assert out["matches"] is False
+        assert out["mismatch_type"] == "dsi_vs_table"
+        assert out["severity"] == "MEDIUM"
+
+    def test_actual_under_table_is_LOW_safari_itp_hint(self):
+        # Table says 2 years, real cookie lives 7 days (Safari ITP).
+        out = compare_retention(
+            cookie_name="_ga",
+            table_duration="2 Jahre",
+            actual_max_age_seconds=7 * 86400,
+        )
+        assert out["matches"] is False
+        assert out["mismatch_type"] == "actual_under_table"
+        assert out["severity"] == "LOW"
+        assert "possible_safari_itp_cap" in out["notes"]
+
+    def test_only_one_source_is_incomplete(self):
+        out = compare_retention(
+            cookie_name="_ga",
+            table_duration="14 Monate",
+            actual_max_age_seconds=None,
+            dsi_claims=[],
+        )
+        assert out["severity_reason"] == "incomplete"
+        assert out["severity"] == "LOW"
+
+    def test_tolerance_5pct(self):
+        # 14 Monate (420d) vs 410d — within 5% tolerance, match.
+        out = compare_retention(
+            cookie_name="_ga",
+            table_duration="14 Monate",
+            actual_max_age_seconds=410 * 86400,
+        )
+        assert out["matches"] is True
+
+    def test_cookie_specific_dsi_beats_generic(self):
+        dsi_claims = [
+            RetentionClaim(
+                sentence="Speicherdauer grundsätzlich 6 Monate.",
+                days=6 * 30, is_session=False, is_persistent=False,
+                context_terms=[],
+            ),
+            RetentionClaim(
+                sentence="_ga: Speicherdauer 14 Monate.",
+                days=14 * 30, is_session=False, is_persistent=False,
+                context_terms=["_ga"],
+            ),
+        ]
+        out = compare_retention(
+            cookie_name="_ga",
+            table_duration="14 Monate",
+            actual_max_age_seconds=14 * 30 * 86400,
+            dsi_claims=dsi_claims,
+        )
+        # The cookie-specific claim should win → all three match.
+        assert out["matches"] is True
+        assert out["dsi_days"] == 14 * 30
+
+
+class TestBuildRetentionThemeSummary:
+    def _claim(self, sentence, days):
+        return RetentionClaim(
+            sentence=sentence, days=days,
+            is_session=False, is_persistent=False, context_terms=[],
+        )
+
+    def test_aggregate(self):
+        findings = [
+            compare_retention(
+                "_a", "14 Monate", 14 * 30 * 86400,
+                [self._claim("14 Monate", 14 * 30)],
+            ),
+            compare_retention(
+                "_b", "6 Monate", 14 * 30 * 86400,
+                [self._claim("6 Monate", 6 * 30)],
+            ),
+            compare_retention(
+                "_c", "14 Monate", None, [],
+            ),
+        ]
+        s = build_retention_theme_summary(findings)
+        assert s["theme_id"] == "TH-RETENTION"
+        assert s["total"] == 3
+        assert s["passed"] == 1
+        assert s["incomplete"] == 1
+        assert s["failed"] == 1
+        assert s["by_severity"].get("HIGH") == 1
+        assert s["by_mismatch_type"].get("dsi_under_actual") == 1
+        assert len(s["top_fails"]) == 1