65e8bb9d42
Erkennt: gängige Footer-Labels / Bookmark- + SEO-Erwartungs-Slugs
(z.B. "Cookie-Richtlinie", "AGB", "Datenschutzerklärung") liefern
404, während das Doc tatsächlich unter einem abweichenden Slug
ausgeliefert wird.
GT-Anker (Elli URL-STRUCTURE-001):
Footer-Label "Cookie-Richtlinie" → /cookie-richtlinie 404
Real: /de/cookies
→ externe Bookmarks und Google-Treffer brechen.
Heuristik:
- Aus auto-discovered URLs Origin + Sprach-Prefix extrahieren
(z.B. /de, /de-de)
- Pro doc_type 2-4 kanonische Standard-Slugs probieren (parallel
via ThreadPoolExecutor, 2s Timeout, HEAD → GET fallback bei 405)
- Wenn alternative Slug 404/410 → LOW Finding pro doc_type
- Probe-Cap auf 18 Requests gesamt (Network-Noise-Schutz)
- Abschaltbar via URL_SLUG_PROBE_DISABLED=1
Severity: LOW (Best-Practice, kein juristisches Hardfail).
Tests: 13/13 grün (Strip-Helper 4 + Origin-Helper 3 + Check-Pfade 6
inkl. mocked _head_status).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
77 lines
2.6 KiB
Python
77 lines
2.6 KiB
Python
"""Mail-V2 compose — single entrypoint that returns the full HTML.
|
|
|
|
Call `compose_v2(state)` from the email-dispatch phase when
|
|
`MAIL_RENDER_V2=true`. Default remains the legacy compose so we can
|
|
A/B in Mailpit.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
|
|
from ._blocks import (
|
|
render_attachments,
|
|
render_caveats,
|
|
render_header,
|
|
render_per_doc,
|
|
render_per_theme,
|
|
render_sofortmassnahmen,
|
|
render_toc,
|
|
)
|
|
from ._blocks_findings import (
|
|
render_critical,
|
|
render_internal_reminders,
|
|
render_manual_review,
|
|
)
|
|
from ._legacy_wrappers import render_all_legacy
|
|
from ._style import page_close, page_open
|
|
|
|
|
|
def compose_v2(state: dict) -> str:
|
|
"""Build the full audit-mail HTML in the V2 layout."""
|
|
site = state.get("site_name") or "—"
|
|
parts = [
|
|
page_open(site),
|
|
render_header(state),
|
|
render_toc(state),
|
|
render_critical(state),
|
|
render_manual_review(state),
|
|
render_internal_reminders(state),
|
|
render_sofortmassnahmen(state),
|
|
render_per_doc(state),
|
|
render_per_theme(state),
|
|
# B4 — Cross-Doc Vendor-Consistency (Elli Vertex↔Iadvize pattern)
|
|
state.get("vendor_consistency_html", ""),
|
|
# B5 — AI-Act Art. 50 Transparenzpflicht
|
|
state.get("ai_act_html", ""),
|
|
# B6/B7/B8/B9/B10 — DPO + Staleness + CMP + MultiEntity + Transfer
|
|
state.get("extra_findings_html", ""),
|
|
# B12 Chatbot-Cookie-Klassifikation
|
|
state.get("chatbot_cookie_html", ""),
|
|
# B13 Widerrufsbelehrung-Reachability (B2C-Pflicht)
|
|
state.get("widerruf_reach_html", ""),
|
|
# B14 Widersprüchliche Speicherdauer im selben Doc
|
|
state.get("retention_conflict_html", ""),
|
|
# B15 AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
|
|
state.get("ai_legal_basis_html", ""),
|
|
# B16 Footer-Label-vs-URL-Slug-Drift (SEO / Bookmarks)
|
|
state.get("url_slug_drift_html", ""),
|
|
# Browser-Matrix (Stage 1.c)
|
|
state.get("browser_matrix_html", ""),
|
|
# All legacy build_*_html() wrapped in V2 sections — preserves
|
|
# every information block from the old renderer (Exec Summary,
|
|
# Banner-Screenshot, VVT, Redundancy, Solutions, Diff, etc.)
|
|
render_all_legacy(state),
|
|
render_caveats(state),
|
|
render_attachments(state),
|
|
page_close(state.get("check_id", ""),
|
|
os.environ.get("BUILD_SHA", "unknown")),
|
|
]
|
|
return "".join(p for p in parts if p)
|
|
|
|
|
|
def is_v2_enabled() -> bool:
|
|
return os.environ.get("MAIL_RENDER_V2", "false").lower() in (
|
|
"true", "1", "yes", "on",
|
|
)
|