feat(b16): Footer-Label-vs-URL-Slug-Drift-Check (GT URL-STRUCTURE-001)
Erkennt: gängige Footer-Labels / Bookmark- + SEO-Erwartungs-Slugs
(z.B. "Cookie-Richtlinie", "AGB", "Datenschutzerklärung") liefern
404, während das Doc tatsächlich unter einem abweichenden Slug
ausgeliefert wird.
GT-Anker (Elli URL-STRUCTURE-001):
Footer-Label "Cookie-Richtlinie" → /cookie-richtlinie 404
Real: /de/cookies
→ externe Bookmarks und Google-Treffer brechen.
Heuristik:
- Aus auto-discovered URLs Origin + Sprach-Prefix extrahieren
(z.B. /de, /de-de)
- Pro doc_type 2-4 kanonische Standard-Slugs probieren (parallel
via ThreadPoolExecutor, 2s Timeout, HEAD → GET fallback bei 405)
- Wenn alternative Slug 404/410 → LOW Finding pro doc_type
- Probe-Cap auf 18 Requests gesamt (Network-Noise-Schutz)
- Abschaltbar via URL_SLUG_PROBE_DISABLED=1
Severity: LOW (Best-Practice, kein juristisches Hardfail).
Tests: 13/13 grün (Strip-Helper 4 + Origin-Helper 3 + Check-Pfade 6
inkl. mocked _head_status).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,66 @@
|
|||||||
|
"""B16 wiring — Footer-Label-vs-URL-Slug-Drift-Detector.
|
||||||
|
|
||||||
|
Hängt sich an `state["extra_findings"]` an und rendert einen V2-Block
|
||||||
|
(`url_slug_drift_html`).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import html
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from compliance.services.url_slug_drift_check import check_url_slug_drift
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def run_b16(state: dict) -> None:
|
||||||
|
new = check_url_slug_drift(state)
|
||||||
|
if not new:
|
||||||
|
return
|
||||||
|
extras = state.get("extra_findings") or []
|
||||||
|
extras.extend(new)
|
||||||
|
state["extra_findings"] = extras
|
||||||
|
state["url_slug_drift_html"] = _render(new)
|
||||||
|
logger.info("B16 url-slug-drift: %d finding(s)", len(new))
|
||||||
|
|
||||||
|
|
||||||
|
def _render(findings: list[dict]) -> str:
|
||||||
|
cards = []
|
||||||
|
for f in findings:
|
||||||
|
sev = (f.get("severity") or "").upper()
|
||||||
|
color = "#64748b" if sev == "LOW" else "#f59e0b"
|
||||||
|
alts = f.get("alt_slugs_404") or []
|
||||||
|
alts_html = ""
|
||||||
|
if alts:
|
||||||
|
alts_html = (
|
||||||
|
"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||||
|
f"<em>404-Slugs: {html.escape(', '.join(alts))}</em></div>"
|
||||||
|
)
|
||||||
|
cards.append(
|
||||||
|
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
||||||
|
f"border-left:3px solid {color};border-radius:4px;'>"
|
||||||
|
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
||||||
|
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
|
||||||
|
f"<div style='font-size:14px;margin-top:4px;'>"
|
||||||
|
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
||||||
|
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
||||||
|
f"{html.escape(f.get('norm') or '')}</div>"
|
||||||
|
f"{alts_html}"
|
||||||
|
f"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||||
|
f"<em>{html.escape(f.get('evidence') or '')}</em></div>"
|
||||||
|
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
||||||
|
f"padding:8px 10px;border-radius:4px;'>"
|
||||||
|
f"<strong>→ Empfehlung:</strong> "
|
||||||
|
f"{html.escape(f.get('action') or '')}</div>"
|
||||||
|
"</div>"
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
"<div style='margin:24px 0;padding:16px;border-left:4px solid #64748b;"
|
||||||
|
"background:#f8fafc;border-radius:4px;'>"
|
||||||
|
"<h2 style='margin:0 0 8px;color:#475569;font-size:16px;'>"
|
||||||
|
"🔗 Standard-Slug-Brüche (SEO / Bookmarks)"
|
||||||
|
"</h2>"
|
||||||
|
+ "".join(cards) +
|
||||||
|
"</div>"
|
||||||
|
)
|
||||||
@@ -26,6 +26,7 @@ from ._b12_wiring import run_b12
|
|||||||
from ._b13_wiring import run_b13
|
from ._b13_wiring import run_b13
|
||||||
from ._b14_wiring import run_b14
|
from ._b14_wiring import run_b14
|
||||||
from ._b15_wiring import run_b15
|
from ._b15_wiring import run_b15
|
||||||
|
from ._b16_wiring import run_b16
|
||||||
from ._constants import _compliance_check_jobs
|
from ._constants import _compliance_check_jobs
|
||||||
from ._phase_a_resolve import run_phase_a
|
from ._phase_a_resolve import run_phase_a
|
||||||
from ._phase_b_profile_check import run_phase_b
|
from ._phase_b_profile_check import run_phase_b
|
||||||
@@ -76,6 +77,7 @@ async def run_compliance_check(check_id: str, req) -> None:
|
|||||||
run_b13(state) # Widerrufsbelehrung-Reachability (B2C-Pflicht)
|
run_b13(state) # Widerrufsbelehrung-Reachability (B2C-Pflicht)
|
||||||
run_b14(state) # Widersprüchliche Speicherdauer im selben Doc
|
run_b14(state) # Widersprüchliche Speicherdauer im selben Doc
|
||||||
run_b15(state) # AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
|
run_b15(state) # AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
|
||||||
|
run_b16(state) # Footer-Label-vs-URL-Slug-Drift
|
||||||
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
||||||
await run_phase_d3_top(state)
|
await run_phase_d3_top(state)
|
||||||
await run_phase_d3_mid(state)
|
await run_phase_d3_mid(state)
|
||||||
|
|||||||
@@ -54,6 +54,8 @@ def compose_v2(state: dict) -> str:
|
|||||||
state.get("retention_conflict_html", ""),
|
state.get("retention_conflict_html", ""),
|
||||||
# B15 AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
|
# B15 AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
|
||||||
state.get("ai_legal_basis_html", ""),
|
state.get("ai_legal_basis_html", ""),
|
||||||
|
# B16 Footer-Label-vs-URL-Slug-Drift (SEO / Bookmarks)
|
||||||
|
state.get("url_slug_drift_html", ""),
|
||||||
# Browser-Matrix (Stage 1.c)
|
# Browser-Matrix (Stage 1.c)
|
||||||
state.get("browser_matrix_html", ""),
|
state.get("browser_matrix_html", ""),
|
||||||
# All legacy build_*_html() wrapped in V2 sections — preserves
|
# All legacy build_*_html() wrapped in V2 sections — preserves
|
||||||
|
|||||||
@@ -0,0 +1,206 @@
|
|||||||
|
"""B16 — Footer-Label-vs-URL-Slug-Drift-Detector.
|
||||||
|
|
||||||
|
Erkennt: gängige Footer-Labels (z.B. "Cookie-Richtlinie", "AGB",
|
||||||
|
"Datenschutzerklärung") existieren als Bookmark-/SEO-Erwartung,
|
||||||
|
aber auf der Site antwortet der entsprechende Standard-Slug mit 404.
|
||||||
|
Real wird das Doc unter einem abweichenden Slug ausgeliefert.
|
||||||
|
|
||||||
|
GT-Anker: Elli URL-STRUCTURE-001:
|
||||||
|
Footer-Label "Cookie-Richtlinie" → /cookie-richtlinie 404
|
||||||
|
Footer-Label "AGB" → /agb 404
|
||||||
|
Real: /de/cookies, /de/nutzungsbedingungen.
|
||||||
|
|
||||||
|
Heuristik:
|
||||||
|
1. Aus den discovered URLs den Base-Host extrahieren.
|
||||||
|
2. Pro doc_type eine kleine Liste kanonischer Standard-Slugs prüfen
|
||||||
|
(HEAD oder GET), je 2 s Timeout.
|
||||||
|
3. Wenn discovered Slug bekannt ist, ABER mindestens ein
|
||||||
|
gleichwertiger Standard-Slug 404 ergibt → LOW Finding pro alt-Slug.
|
||||||
|
|
||||||
|
Severity: LOW (SEO/Bookmark-Bruch, kein juristisches Hardfail).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Kanonische DE/EN Standard-Slugs pro doc_type (ohne führenden /).
|
||||||
|
# Reihenfolge: erst der häufigste deutsche, dann Synonyme, dann EN.
|
||||||
|
_CANONICAL_SLUGS: dict[str, tuple[str, ...]] = {
|
||||||
|
"dse": (
|
||||||
|
"datenschutz", "datenschutzerklaerung", "datenschutzerklärung",
|
||||||
|
"privacy", "privacy-policy",
|
||||||
|
),
|
||||||
|
"impressum": (
|
||||||
|
"impressum", "imprint", "legal-notice",
|
||||||
|
),
|
||||||
|
"cookie": (
|
||||||
|
"cookie-richtlinie", "cookies", "cookie-policy",
|
||||||
|
),
|
||||||
|
"agb": (
|
||||||
|
"agb", "allgemeine-geschaeftsbedingungen",
|
||||||
|
"geschaeftsbedingungen", "terms-and-conditions",
|
||||||
|
),
|
||||||
|
"nutzungsbedingungen": (
|
||||||
|
"nutzungsbedingungen", "terms-of-use", "terms-of-service",
|
||||||
|
),
|
||||||
|
"widerruf": (
|
||||||
|
"widerrufsbelehrung", "widerruf", "cancellation",
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Konfigurations-Schalter (default: AN; lässt sich pro Run abschalten).
|
||||||
|
_DISABLED = os.environ.get("URL_SLUG_PROBE_DISABLED", "").lower() in (
|
||||||
|
"1", "true", "yes", "on",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_path_slug(url: str) -> str:
|
||||||
|
"""Return the LAST path-segment of a URL (without trailing /)."""
|
||||||
|
if not url:
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
p = urlparse(url)
|
||||||
|
path = (p.path or "").strip("/")
|
||||||
|
if not path:
|
||||||
|
return ""
|
||||||
|
return path.split("/")[-1].lower()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _origin_and_prefix(url: str) -> tuple[str, str]:
|
||||||
|
"""Return (origin, language-prefix-or-empty) so we can rebuild
|
||||||
|
alternative URLs at the same scope as the discovered one.
|
||||||
|
|
||||||
|
Example: 'https://www.elli.eco/de/cookies' → ('https://www.elli.eco', '/de')
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
p = urlparse(url)
|
||||||
|
origin = f"{p.scheme}://{p.netloc}"
|
||||||
|
path = p.path or "/"
|
||||||
|
parts = [s for s in path.split("/") if s]
|
||||||
|
# Heuristik: ein 2-3 Zeichen-Pfad-Segment ganz vorn wird als
|
||||||
|
# Sprach-Prefix interpretiert (de, en, fr, de-de, en-us).
|
||||||
|
if parts and (len(parts[0]) == 2 or len(parts[0]) == 5):
|
||||||
|
return origin, f"/{parts[0]}"
|
||||||
|
return origin, ""
|
||||||
|
except Exception:
|
||||||
|
return "", ""
|
||||||
|
|
||||||
|
|
||||||
|
def _head_status(url: str, timeout_s: float = 2.0) -> int:
|
||||||
|
"""Return HTTP status code (0 on network error)."""
|
||||||
|
try:
|
||||||
|
with httpx.Client(timeout=timeout_s, follow_redirects=False) as c:
|
||||||
|
r = c.head(url)
|
||||||
|
# Some servers reject HEAD with 405 — fall back to GET.
|
||||||
|
if r.status_code == 405:
|
||||||
|
r = c.get(url)
|
||||||
|
return r.status_code
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def check_url_slug_drift(state: dict) -> list[dict]:
|
||||||
|
"""Probe canonical alternative slugs per discovered doc; emit a LOW
|
||||||
|
finding per slug that 404s while the doc is reachable under a
|
||||||
|
different slug."""
|
||||||
|
if _DISABLED:
|
||||||
|
return []
|
||||||
|
doc_entries = state.get("doc_entries") or []
|
||||||
|
# Build {doc_type: (discovered_url, discovered_slug)} for
|
||||||
|
# auto-discovered docs with non-empty text.
|
||||||
|
discovered: dict[str, tuple[str, str]] = {}
|
||||||
|
for e in doc_entries:
|
||||||
|
dt = (e.get("doc_type") or "").lower()
|
||||||
|
if dt not in _CANONICAL_SLUGS:
|
||||||
|
continue
|
||||||
|
url = (e.get("url") or "").strip()
|
||||||
|
text_len = len((e.get("text") or "").strip())
|
||||||
|
if not url or text_len < 400:
|
||||||
|
continue
|
||||||
|
slug = _strip_path_slug(url)
|
||||||
|
if not slug:
|
||||||
|
continue
|
||||||
|
discovered[dt] = (url, slug)
|
||||||
|
|
||||||
|
if not discovered:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Build probe-plan: for each doc_type, probe the canonical slugs
|
||||||
|
# OTHER than the one that's already discovered.
|
||||||
|
probes: list[tuple[str, str, str]] = [] # (doc_type, alt_slug, url)
|
||||||
|
for dt, (url, slug) in discovered.items():
|
||||||
|
origin, prefix = _origin_and_prefix(url)
|
||||||
|
if not origin:
|
||||||
|
continue
|
||||||
|
for alt in _CANONICAL_SLUGS[dt]:
|
||||||
|
if alt.lower() == slug:
|
||||||
|
continue
|
||||||
|
probes.append((dt, alt, f"{origin}{prefix}/{alt}"))
|
||||||
|
|
||||||
|
# Cap to keep network noise bounded.
|
||||||
|
probes = probes[:18]
|
||||||
|
if not probes:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _do_probe(item: tuple[str, str, str]) -> tuple[str, str, str, int]:
|
||||||
|
dt, alt, u = item
|
||||||
|
return dt, alt, u, _head_status(u)
|
||||||
|
|
||||||
|
results: list[tuple[str, str, str, int]] = []
|
||||||
|
with ThreadPoolExecutor(max_workers=6) as ex:
|
||||||
|
results = list(ex.map(_do_probe, probes))
|
||||||
|
|
||||||
|
findings: list[dict] = []
|
||||||
|
# Group by doc_type so we can emit ONE finding per doc with the
|
||||||
|
# list of 404-alts.
|
||||||
|
per_dt: dict[str, list[tuple[str, str]]] = {}
|
||||||
|
for dt, alt, u, status in results:
|
||||||
|
if status == 404 or status == 410:
|
||||||
|
per_dt.setdefault(dt, []).append((alt, u))
|
||||||
|
|
||||||
|
for dt, alts in per_dt.items():
|
||||||
|
if not alts:
|
||||||
|
continue
|
||||||
|
discovered_url, discovered_slug = discovered[dt]
|
||||||
|
broken_urls = ", ".join(u for _, u in alts[:3])
|
||||||
|
broken_slugs = ", ".join(s for s, _ in alts[:3])
|
||||||
|
findings.append({
|
||||||
|
"check_id": "URL-SLUG-DRIFT-001",
|
||||||
|
"severity": "LOW",
|
||||||
|
"severity_reason": "seo_bookmark_break",
|
||||||
|
"doc_type": dt,
|
||||||
|
"title": (
|
||||||
|
f"Externe Bookmarks / SEO-Erwartung für {dt} brechen "
|
||||||
|
f"({len(alts)} Standard-Slug(s) 404)"
|
||||||
|
),
|
||||||
|
"norm": (
|
||||||
|
"Kein juristischer Pflichttatbestand — Best-Practice "
|
||||||
|
"(SEO, externe Verlinkungen, Footer-Label-Konsistenz)"
|
||||||
|
),
|
||||||
|
"evidence": (
|
||||||
|
f"Doc ist erreichbar unter '{discovered_url}'. "
|
||||||
|
f"Standard-Slug(s) {broken_slugs} liefern 404/410 "
|
||||||
|
f"({broken_urls})."
|
||||||
|
),
|
||||||
|
"action": (
|
||||||
|
f"Redirects einrichten von {broken_slugs} nach "
|
||||||
|
f"'{discovered_url}' — damit externe Bookmarks, "
|
||||||
|
"alte Footer-Labels und Google-Treffer nicht brechen."
|
||||||
|
),
|
||||||
|
"alt_slugs_404": [s for s, _ in alts],
|
||||||
|
})
|
||||||
|
if findings:
|
||||||
|
logger.info("B16 url-slug-drift: %d finding(s)", len(findings))
|
||||||
|
return findings
|
||||||
@@ -0,0 +1,139 @@
|
|||||||
|
"""Tests for B16 URL-Slug-Drift-Detector (GT URL-STRUCTURE-001)."""
|
||||||
|
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from compliance.services.url_slug_drift_check import (
|
||||||
|
_origin_and_prefix,
|
||||||
|
_strip_path_slug,
|
||||||
|
check_url_slug_drift,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestStripPathSlug:
|
||||||
|
def test_simple(self):
|
||||||
|
assert _strip_path_slug("https://x.de/cookies") == "cookies"
|
||||||
|
|
||||||
|
def test_with_lang_prefix(self):
|
||||||
|
assert _strip_path_slug("https://x.de/de/cookies") == "cookies"
|
||||||
|
|
||||||
|
def test_trailing_slash(self):
|
||||||
|
assert _strip_path_slug("https://x.de/cookies/") == "cookies"
|
||||||
|
|
||||||
|
def test_empty(self):
|
||||||
|
assert _strip_path_slug("") == ""
|
||||||
|
|
||||||
|
|
||||||
|
class TestOriginAndPrefix:
|
||||||
|
def test_lang_prefix(self):
|
||||||
|
assert _origin_and_prefix("https://www.elli.eco/de/cookies") == (
|
||||||
|
"https://www.elli.eco", "/de",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_lang_prefix(self):
|
||||||
|
assert _origin_and_prefix("https://x.de/cookies") == (
|
||||||
|
"https://x.de", "",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_locale_prefix(self):
|
||||||
|
assert _origin_and_prefix("https://x.de/de-de/cookies") == (
|
||||||
|
"https://x.de", "/de-de",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckURLSlugDrift:
|
||||||
|
def test_no_docs_no_findings(self):
|
||||||
|
assert check_url_slug_drift({"doc_entries": []}) == []
|
||||||
|
|
||||||
|
def test_disabled_via_env_no_findings(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("URL_SLUG_PROBE_DISABLED", "1")
|
||||||
|
# Reload module-level _DISABLED flag
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
from compliance.services import url_slug_drift_check
|
||||||
|
importlib.reload(url_slug_drift_check)
|
||||||
|
result = url_slug_drift_check.check_url_slug_drift({
|
||||||
|
"doc_entries": [{
|
||||||
|
"doc_type": "cookie",
|
||||||
|
"url": "https://x.de/de/cookies",
|
||||||
|
"text": "x" * 500,
|
||||||
|
}]
|
||||||
|
})
|
||||||
|
assert result == []
|
||||||
|
# Restore
|
||||||
|
monkeypatch.delenv("URL_SLUG_PROBE_DISABLED")
|
||||||
|
importlib.reload(url_slug_drift_check)
|
||||||
|
|
||||||
|
def test_all_alternatives_200_no_finding(self):
|
||||||
|
with patch(
|
||||||
|
"compliance.services.url_slug_drift_check._head_status",
|
||||||
|
return_value=200,
|
||||||
|
):
|
||||||
|
result = check_url_slug_drift({
|
||||||
|
"doc_entries": [{
|
||||||
|
"doc_type": "cookie",
|
||||||
|
"url": "https://x.de/de/cookies",
|
||||||
|
"text": "x" * 500,
|
||||||
|
}]
|
||||||
|
})
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_alternative_404_emits_finding(self):
|
||||||
|
with patch(
|
||||||
|
"compliance.services.url_slug_drift_check._head_status",
|
||||||
|
return_value=404,
|
||||||
|
):
|
||||||
|
result = check_url_slug_drift({
|
||||||
|
"doc_entries": [{
|
||||||
|
"doc_type": "cookie",
|
||||||
|
"url": "https://x.de/de/cookies",
|
||||||
|
"text": "x" * 500,
|
||||||
|
}]
|
||||||
|
})
|
||||||
|
assert len(result) == 1
|
||||||
|
f = result[0]
|
||||||
|
assert f["check_id"] == "URL-SLUG-DRIFT-001"
|
||||||
|
assert f["severity"] == "LOW"
|
||||||
|
assert f["doc_type"] == "cookie"
|
||||||
|
assert "cookie-richtlinie" in f["alt_slugs_404"]
|
||||||
|
|
||||||
|
def test_short_text_skipped(self):
|
||||||
|
# text < 400 chars → not counted as reachable doc
|
||||||
|
with patch(
|
||||||
|
"compliance.services.url_slug_drift_check._head_status",
|
||||||
|
return_value=404,
|
||||||
|
):
|
||||||
|
result = check_url_slug_drift({
|
||||||
|
"doc_entries": [{
|
||||||
|
"doc_type": "cookie",
|
||||||
|
"url": "https://x.de/de/cookies",
|
||||||
|
"text": "x" * 50,
|
||||||
|
}]
|
||||||
|
})
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_elli_pattern_cookie_and_agb_both_emit(self):
|
||||||
|
# Simulate Elli: cookie under /de/cookies, but cookie-richtlinie 404.
|
||||||
|
# agb-doc resolves at /de/nutzungsbedingungen with /agb 404.
|
||||||
|
# Note: nutzungsbedingungen is its own doc_type — Elli's "AGB"
|
||||||
|
# label thus has no canonical doc on the site.
|
||||||
|
state = {
|
||||||
|
"doc_entries": [
|
||||||
|
{"doc_type": "cookie",
|
||||||
|
"url": "https://x.de/de/cookies",
|
||||||
|
"text": "x" * 500},
|
||||||
|
{"doc_type": "nutzungsbedingungen",
|
||||||
|
"url": "https://x.de/de/nutzungsbedingungen",
|
||||||
|
"text": "x" * 500},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
with patch(
|
||||||
|
"compliance.services.url_slug_drift_check._head_status",
|
||||||
|
return_value=404,
|
||||||
|
):
|
||||||
|
result = check_url_slug_drift(state)
|
||||||
|
# cookie has 2 canonical alts (cookie-richtlinie, cookie-policy);
|
||||||
|
# nutzungsbedingungen has 2 alts (terms-of-use, terms-of-service).
|
||||||
|
# Both should emit findings since all alts return 404.
|
||||||
|
doc_types = {f["doc_type"] for f in result}
|
||||||
|
assert "cookie" in doc_types
|
||||||
|
assert "nutzungsbedingungen" in doc_types
|
||||||
Reference in New Issue
Block a user