feat(b16): Footer-Label-vs-URL-Slug-Drift-Check (GT URL-STRUCTURE-001)
Erkennt: gängige Footer-Labels / Bookmark- + SEO-Erwartungs-Slugs
(z.B. "Cookie-Richtlinie", "AGB", "Datenschutzerklärung") liefern
404, während das Doc tatsächlich unter einem abweichenden Slug
ausgeliefert wird.
GT-Anker (Elli URL-STRUCTURE-001):
Footer-Label "Cookie-Richtlinie" → /cookie-richtlinie 404
Real: /de/cookies
→ externe Bookmarks und Google-Treffer brechen.
Heuristik:
- Aus auto-discovered URLs Origin + Sprach-Prefix extrahieren
(z.B. /de, /de-de)
- Pro doc_type 2-4 kanonische Standard-Slugs probieren (parallel
via ThreadPoolExecutor, 2s Timeout, HEAD → GET fallback bei 405)
- Wenn alternative Slug 404/410 → LOW Finding pro doc_type
- Probe-Cap auf 18 Requests gesamt (Network-Noise-Schutz)
- Abschaltbar via URL_SLUG_PROBE_DISABLED=1
Severity: LOW (Best-Practice, kein juristisches Hardfail).
Tests: 13/13 grün (Strip-Helper 4 + Origin-Helper 3 + Check-Pfade 6
inkl. mocked _head_status).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
"""B16 wiring — Footer-Label-vs-URL-Slug-Drift-Detector.
|
||||
|
||||
Hängt sich an `state["extra_findings"]` an und rendert einen V2-Block
|
||||
(`url_slug_drift_html`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import logging
|
||||
|
||||
from compliance.services.url_slug_drift_check import check_url_slug_drift
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_b16(state: dict) -> None:
|
||||
new = check_url_slug_drift(state)
|
||||
if not new:
|
||||
return
|
||||
extras = state.get("extra_findings") or []
|
||||
extras.extend(new)
|
||||
state["extra_findings"] = extras
|
||||
state["url_slug_drift_html"] = _render(new)
|
||||
logger.info("B16 url-slug-drift: %d finding(s)", len(new))
|
||||
|
||||
|
||||
def _render(findings: list[dict]) -> str:
|
||||
cards = []
|
||||
for f in findings:
|
||||
sev = (f.get("severity") or "").upper()
|
||||
color = "#64748b" if sev == "LOW" else "#f59e0b"
|
||||
alts = f.get("alt_slugs_404") or []
|
||||
alts_html = ""
|
||||
if alts:
|
||||
alts_html = (
|
||||
"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>404-Slugs: {html.escape(', '.join(alts))}</em></div>"
|
||||
)
|
||||
cards.append(
|
||||
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
||||
f"border-left:3px solid {color};border-radius:4px;'>"
|
||||
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
||||
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
|
||||
f"<div style='font-size:14px;margin-top:4px;'>"
|
||||
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
||||
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
||||
f"{html.escape(f.get('norm') or '')}</div>"
|
||||
f"{alts_html}"
|
||||
f"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>{html.escape(f.get('evidence') or '')}</em></div>"
|
||||
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
||||
f"padding:8px 10px;border-radius:4px;'>"
|
||||
f"<strong>→ Empfehlung:</strong> "
|
||||
f"{html.escape(f.get('action') or '')}</div>"
|
||||
"</div>"
|
||||
)
|
||||
return (
|
||||
"<div style='margin:24px 0;padding:16px;border-left:4px solid #64748b;"
|
||||
"background:#f8fafc;border-radius:4px;'>"
|
||||
"<h2 style='margin:0 0 8px;color:#475569;font-size:16px;'>"
|
||||
"🔗 Standard-Slug-Brüche (SEO / Bookmarks)"
|
||||
"</h2>"
|
||||
+ "".join(cards) +
|
||||
"</div>"
|
||||
)
|
||||
@@ -26,6 +26,7 @@ from ._b12_wiring import run_b12
|
||||
from ._b13_wiring import run_b13
|
||||
from ._b14_wiring import run_b14
|
||||
from ._b15_wiring import run_b15
|
||||
from ._b16_wiring import run_b16
|
||||
from ._constants import _compliance_check_jobs
|
||||
from ._phase_a_resolve import run_phase_a
|
||||
from ._phase_b_profile_check import run_phase_b
|
||||
@@ -76,6 +77,7 @@ async def run_compliance_check(check_id: str, req) -> None:
|
||||
run_b13(state) # Widerrufsbelehrung-Reachability (B2C-Pflicht)
|
||||
run_b14(state) # Widersprüchliche Speicherdauer im selben Doc
|
||||
run_b15(state) # AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
|
||||
run_b16(state) # Footer-Label-vs-URL-Slug-Drift
|
||||
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
||||
await run_phase_d3_top(state)
|
||||
await run_phase_d3_mid(state)
|
||||
|
||||
Reference in New Issue
Block a user