diff --git a/backend-compliance/compliance/api/agent_check/_b16_wiring.py b/backend-compliance/compliance/api/agent_check/_b16_wiring.py
new file mode 100644
index 00000000..b11ef600
--- /dev/null
+++ b/backend-compliance/compliance/api/agent_check/_b16_wiring.py
@@ -0,0 +1,66 @@
+"""B16 wiring — Footer-Label-vs-URL-Slug-Drift-Detector.
+
+Hängt sich an `state["extra_findings"]` an und rendert einen V2-Block
+(`url_slug_drift_html`).
+"""
+
+from __future__ import annotations
+
+import html
+import logging
+
+from compliance.services.url_slug_drift_check import check_url_slug_drift
+
+logger = logging.getLogger(__name__)
+
+
+def run_b16(state: dict) -> None:
+ new = check_url_slug_drift(state)
+ if not new:
+ return
+ extras = state.get("extra_findings") or []
+ extras.extend(new)
+ state["extra_findings"] = extras
+ state["url_slug_drift_html"] = _render(new)
+ logger.info("B16 url-slug-drift: %d finding(s)", len(new))
+
+
+def _render(findings: list[dict]) -> str:
+ cards = []
+ for f in findings:
+ sev = (f.get("severity") or "").upper()
+ color = "#64748b" if sev == "LOW" else "#f59e0b"
+ alts = f.get("alt_slugs_404") or []
+ alts_html = ""
+ if alts:
+ alts_html = (
+ "
"
+ f"404-Slugs: {html.escape(', '.join(alts))}
"
+ )
+ cards.append(
+ f""
+ f"
"
+ f"{sev} · {html.escape(f.get('check_id') or '')}
"
+ f"
"
+ f"{html.escape(f.get('title') or '')}
"
+ f"
"
+ f"{html.escape(f.get('norm') or '')}
"
+ f"{alts_html}"
+ f"
"
+ f"{html.escape(f.get('evidence') or '')}
"
+ f"
"
+ f"→ Empfehlung: "
+ f"{html.escape(f.get('action') or '')}
"
+ "
"
+ )
+ return (
+ ""
+ "
"
+ "🔗 Standard-Slug-Brüche (SEO / Bookmarks)"
+ "
"
+ + "".join(cards) +
+ ""
+ )
diff --git a/backend-compliance/compliance/api/agent_check/_orchestrator.py b/backend-compliance/compliance/api/agent_check/_orchestrator.py
index de219e12..235492d2 100644
--- a/backend-compliance/compliance/api/agent_check/_orchestrator.py
+++ b/backend-compliance/compliance/api/agent_check/_orchestrator.py
@@ -26,6 +26,7 @@ from ._b12_wiring import run_b12
from ._b13_wiring import run_b13
from ._b14_wiring import run_b14
from ._b15_wiring import run_b15
+from ._b16_wiring import run_b16
from ._constants import _compliance_check_jobs
from ._phase_a_resolve import run_phase_a
from ._phase_b_profile_check import run_phase_b
@@ -76,6 +77,7 @@ async def run_compliance_check(check_id: str, req) -> None:
run_b13(state) # Widerrufsbelehrung-Reachability (B2C-Pflicht)
run_b14(state) # Widersprüchliche Speicherdauer im selben Doc
run_b15(state) # AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
+ run_b16(state) # Footer-Label-vs-URL-Slug-Drift
# Phase D-3 top/mid/bot: Step 5 HTML blocks
await run_phase_d3_top(state)
await run_phase_d3_mid(state)
diff --git a/backend-compliance/compliance/services/mail_render_v2/_compose.py b/backend-compliance/compliance/services/mail_render_v2/_compose.py
index 083ae470..ba7baafb 100644
--- a/backend-compliance/compliance/services/mail_render_v2/_compose.py
+++ b/backend-compliance/compliance/services/mail_render_v2/_compose.py
@@ -54,6 +54,8 @@ def compose_v2(state: dict) -> str:
state.get("retention_conflict_html", ""),
# B15 AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
state.get("ai_legal_basis_html", ""),
+ # B16 Footer-Label-vs-URL-Slug-Drift (SEO / Bookmarks)
+ state.get("url_slug_drift_html", ""),
# Browser-Matrix (Stage 1.c)
state.get("browser_matrix_html", ""),
# All legacy build_*_html() wrapped in V2 sections — preserves
diff --git a/backend-compliance/compliance/services/url_slug_drift_check.py b/backend-compliance/compliance/services/url_slug_drift_check.py
new file mode 100644
index 00000000..1435e5a9
--- /dev/null
+++ b/backend-compliance/compliance/services/url_slug_drift_check.py
@@ -0,0 +1,206 @@
+"""B16 — Footer-Label-vs-URL-Slug-Drift-Detector.
+
+Erkennt: gängige Footer-Labels (z.B. "Cookie-Richtlinie", "AGB",
+"Datenschutzerklärung") existieren als Bookmark-/SEO-Erwartung,
+aber auf der Site antwortet der entsprechende Standard-Slug mit 404.
+Real wird das Doc unter einem abweichenden Slug ausgeliefert.
+
+GT-Anker: Elli URL-STRUCTURE-001:
+ Footer-Label "Cookie-Richtlinie" → /cookie-richtlinie 404
+ Footer-Label "AGB" → /agb 404
+ Real: /de/cookies, /de/nutzungsbedingungen.
+
+Heuristik:
+ 1. Aus den discovered URLs den Base-Host extrahieren.
+ 2. Pro doc_type eine kleine Liste kanonischer Standard-Slugs prüfen
+ (HEAD oder GET), je 2 s Timeout.
+ 3. Wenn discovered Slug bekannt ist, ABER mindestens ein
+ gleichwertiger Standard-Slug 404 ergibt → LOW Finding pro alt-Slug.
+
+Severity: LOW (SEO/Bookmark-Bruch, kein juristisches Hardfail).
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from concurrent.futures import ThreadPoolExecutor
+from urllib.parse import urlparse
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+# Kanonische DE/EN Standard-Slugs pro doc_type (ohne führenden /).
+# Reihenfolge: erst der häufigste deutsche, dann Synonyme, dann EN.
+_CANONICAL_SLUGS: dict[str, tuple[str, ...]] = {
+ "dse": (
+ "datenschutz", "datenschutzerklaerung", "datenschutzerklärung",
+ "privacy", "privacy-policy",
+ ),
+ "impressum": (
+ "impressum", "imprint", "legal-notice",
+ ),
+ "cookie": (
+ "cookie-richtlinie", "cookies", "cookie-policy",
+ ),
+ "agb": (
+ "agb", "allgemeine-geschaeftsbedingungen",
+ "geschaeftsbedingungen", "terms-and-conditions",
+ ),
+ "nutzungsbedingungen": (
+ "nutzungsbedingungen", "terms-of-use", "terms-of-service",
+ ),
+ "widerruf": (
+ "widerrufsbelehrung", "widerruf", "cancellation",
+ ),
+}
+
+
+# Konfigurations-Schalter (default: AN; lässt sich pro Run abschalten).
+_DISABLED = os.environ.get("URL_SLUG_PROBE_DISABLED", "").lower() in (
+ "1", "true", "yes", "on",
+)
+
+
+def _strip_path_slug(url: str) -> str:
+ """Return the LAST path-segment of a URL (without trailing /)."""
+ if not url:
+ return ""
+ try:
+ p = urlparse(url)
+ path = (p.path or "").strip("/")
+ if not path:
+ return ""
+ return path.split("/")[-1].lower()
+ except Exception:
+ return ""
+
+
+def _origin_and_prefix(url: str) -> tuple[str, str]:
+ """Return (origin, language-prefix-or-empty) so we can rebuild
+ alternative URLs at the same scope as the discovered one.
+
+ Example: 'https://www.elli.eco/de/cookies' → ('https://www.elli.eco', '/de')
+ """
+ try:
+ p = urlparse(url)
+ origin = f"{p.scheme}://{p.netloc}"
+ path = p.path or "/"
+ parts = [s for s in path.split("/") if s]
+ # Heuristik: ein 2-3 Zeichen-Pfad-Segment ganz vorn wird als
+ # Sprach-Prefix interpretiert (de, en, fr, de-de, en-us).
+ if parts and (len(parts[0]) == 2 or len(parts[0]) == 5):
+ return origin, f"/{parts[0]}"
+ return origin, ""
+ except Exception:
+ return "", ""
+
+
+def _head_status(url: str, timeout_s: float = 2.0) -> int:
+ """Return HTTP status code (0 on network error)."""
+ try:
+ with httpx.Client(timeout=timeout_s, follow_redirects=False) as c:
+ r = c.head(url)
+ # Some servers reject HEAD with 405 — fall back to GET.
+ if r.status_code == 405:
+ r = c.get(url)
+ return r.status_code
+ except Exception:
+ return 0
+
+
+def check_url_slug_drift(state: dict) -> list[dict]:
+ """Probe canonical alternative slugs per discovered doc; emit a LOW
+ finding per slug that 404s while the doc is reachable under a
+ different slug."""
+ if _DISABLED:
+ return []
+ doc_entries = state.get("doc_entries") or []
+ # Build {doc_type: (discovered_url, discovered_slug)} for
+ # auto-discovered docs with non-empty text.
+ discovered: dict[str, tuple[str, str]] = {}
+ for e in doc_entries:
+ dt = (e.get("doc_type") or "").lower()
+ if dt not in _CANONICAL_SLUGS:
+ continue
+ url = (e.get("url") or "").strip()
+ text_len = len((e.get("text") or "").strip())
+ if not url or text_len < 400:
+ continue
+ slug = _strip_path_slug(url)
+ if not slug:
+ continue
+ discovered[dt] = (url, slug)
+
+ if not discovered:
+ return []
+
+ # Build probe-plan: for each doc_type, probe the canonical slugs
+ # OTHER than the one that's already discovered.
+ probes: list[tuple[str, str, str]] = [] # (doc_type, alt_slug, url)
+ for dt, (url, slug) in discovered.items():
+ origin, prefix = _origin_and_prefix(url)
+ if not origin:
+ continue
+ for alt in _CANONICAL_SLUGS[dt]:
+ if alt.lower() == slug:
+ continue
+ probes.append((dt, alt, f"{origin}{prefix}/{alt}"))
+
+ # Cap to keep network noise bounded.
+ probes = probes[:18]
+ if not probes:
+ return []
+
+ def _do_probe(item: tuple[str, str, str]) -> tuple[str, str, str, int]:
+ dt, alt, u = item
+ return dt, alt, u, _head_status(u)
+
+ results: list[tuple[str, str, str, int]] = []
+ with ThreadPoolExecutor(max_workers=6) as ex:
+ results = list(ex.map(_do_probe, probes))
+
+ findings: list[dict] = []
+ # Group by doc_type so we can emit ONE finding per doc with the
+ # list of 404-alts.
+ per_dt: dict[str, list[tuple[str, str]]] = {}
+ for dt, alt, u, status in results:
+ if status == 404 or status == 410:
+ per_dt.setdefault(dt, []).append((alt, u))
+
+ for dt, alts in per_dt.items():
+ if not alts:
+ continue
+ discovered_url, discovered_slug = discovered[dt]
+ broken_urls = ", ".join(u for _, u in alts[:3])
+ broken_slugs = ", ".join(s for s, _ in alts[:3])
+ findings.append({
+ "check_id": "URL-SLUG-DRIFT-001",
+ "severity": "LOW",
+ "severity_reason": "seo_bookmark_break",
+ "doc_type": dt,
+ "title": (
+ f"Externe Bookmarks / SEO-Erwartung für {dt} brechen "
+ f"({len(alts)} Standard-Slug(s) 404)"
+ ),
+ "norm": (
+ "Kein juristischer Pflichttatbestand — Best-Practice "
+ "(SEO, externe Verlinkungen, Footer-Label-Konsistenz)"
+ ),
+ "evidence": (
+ f"Doc ist erreichbar unter '{discovered_url}'. "
+ f"Standard-Slug(s) {broken_slugs} liefern 404/410 "
+ f"({broken_urls})."
+ ),
+ "action": (
+ f"Redirects einrichten von {broken_slugs} nach "
+ f"'{discovered_url}' — damit externe Bookmarks, "
+ "alte Footer-Labels und Google-Treffer nicht brechen."
+ ),
+ "alt_slugs_404": [s for s, _ in alts],
+ })
+ if findings:
+ logger.info("B16 url-slug-drift: %d finding(s)", len(findings))
+ return findings
diff --git a/backend-compliance/tests/test_url_slug_drift_check.py b/backend-compliance/tests/test_url_slug_drift_check.py
new file mode 100644
index 00000000..55d4d4af
--- /dev/null
+++ b/backend-compliance/tests/test_url_slug_drift_check.py
@@ -0,0 +1,139 @@
+"""Tests for B16 URL-Slug-Drift-Detector (GT URL-STRUCTURE-001)."""
+
+from unittest.mock import patch
+
+from compliance.services.url_slug_drift_check import (
+ _origin_and_prefix,
+ _strip_path_slug,
+ check_url_slug_drift,
+)
+
+
+class TestStripPathSlug:
+ def test_simple(self):
+ assert _strip_path_slug("https://x.de/cookies") == "cookies"
+
+ def test_with_lang_prefix(self):
+ assert _strip_path_slug("https://x.de/de/cookies") == "cookies"
+
+ def test_trailing_slash(self):
+ assert _strip_path_slug("https://x.de/cookies/") == "cookies"
+
+ def test_empty(self):
+ assert _strip_path_slug("") == ""
+
+
+class TestOriginAndPrefix:
+ def test_lang_prefix(self):
+ assert _origin_and_prefix("https://www.elli.eco/de/cookies") == (
+ "https://www.elli.eco", "/de",
+ )
+
+ def test_no_lang_prefix(self):
+ assert _origin_and_prefix("https://x.de/cookies") == (
+ "https://x.de", "",
+ )
+
+ def test_locale_prefix(self):
+ assert _origin_and_prefix("https://x.de/de-de/cookies") == (
+ "https://x.de", "/de-de",
+ )
+
+
+class TestCheckURLSlugDrift:
+ def test_no_docs_no_findings(self):
+ assert check_url_slug_drift({"doc_entries": []}) == []
+
+ def test_disabled_via_env_no_findings(self, monkeypatch):
+ monkeypatch.setenv("URL_SLUG_PROBE_DISABLED", "1")
+ # Reload module-level _DISABLED flag
+ import importlib
+
+ from compliance.services import url_slug_drift_check
+ importlib.reload(url_slug_drift_check)
+ result = url_slug_drift_check.check_url_slug_drift({
+ "doc_entries": [{
+ "doc_type": "cookie",
+ "url": "https://x.de/de/cookies",
+ "text": "x" * 500,
+ }]
+ })
+ assert result == []
+ # Restore
+ monkeypatch.delenv("URL_SLUG_PROBE_DISABLED")
+ importlib.reload(url_slug_drift_check)
+
+ def test_all_alternatives_200_no_finding(self):
+ with patch(
+ "compliance.services.url_slug_drift_check._head_status",
+ return_value=200,
+ ):
+ result = check_url_slug_drift({
+ "doc_entries": [{
+ "doc_type": "cookie",
+ "url": "https://x.de/de/cookies",
+ "text": "x" * 500,
+ }]
+ })
+ assert result == []
+
+ def test_alternative_404_emits_finding(self):
+ with patch(
+ "compliance.services.url_slug_drift_check._head_status",
+ return_value=404,
+ ):
+ result = check_url_slug_drift({
+ "doc_entries": [{
+ "doc_type": "cookie",
+ "url": "https://x.de/de/cookies",
+ "text": "x" * 500,
+ }]
+ })
+ assert len(result) == 1
+ f = result[0]
+ assert f["check_id"] == "URL-SLUG-DRIFT-001"
+ assert f["severity"] == "LOW"
+ assert f["doc_type"] == "cookie"
+ assert "cookie-richtlinie" in f["alt_slugs_404"]
+
+ def test_short_text_skipped(self):
+ # text < 400 chars → not counted as reachable doc
+ with patch(
+ "compliance.services.url_slug_drift_check._head_status",
+ return_value=404,
+ ):
+ result = check_url_slug_drift({
+ "doc_entries": [{
+ "doc_type": "cookie",
+ "url": "https://x.de/de/cookies",
+ "text": "x" * 50,
+ }]
+ })
+ assert result == []
+
+ def test_elli_pattern_cookie_and_agb_both_emit(self):
+ # Simulate Elli: cookie under /de/cookies, but cookie-richtlinie 404.
+ # agb-doc resolves at /de/nutzungsbedingungen with /agb 404.
+ # Note: nutzungsbedingungen is its own doc_type — Elli's "AGB"
+ # label thus has no canonical doc on the site.
+ state = {
+ "doc_entries": [
+ {"doc_type": "cookie",
+ "url": "https://x.de/de/cookies",
+ "text": "x" * 500},
+ {"doc_type": "nutzungsbedingungen",
+ "url": "https://x.de/de/nutzungsbedingungen",
+ "text": "x" * 500},
+ ]
+ }
+ with patch(
+ "compliance.services.url_slug_drift_check._head_status",
+ return_value=404,
+ ):
+ result = check_url_slug_drift(state)
+ # cookie has 2 canonical alts (cookie-richtlinie, cookie-policy);
+ # nutzungsbedingungen has 2 alts (terms-of-use, terms-of-service).
+ # Both should emit findings since all alts return 404.
+ doc_types = {f["doc_type"] for f in result}
+ assert "cookie" in doc_types
+ assert "nutzungsbedingungen" in doc_types