feat: Mail-Restrukturierung + B22 Cross-Domain-Doc-Detector
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Successful in 13s
CI / go-lint (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / python-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Successful in 13s
CI / go-lint (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / python-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
User-Feedback BMW v5: "740 Cookies verschwunden auf 31, Übersicht
verloren". Drei Anpassungen:
Mail-Restrukturierung (_executive_summary.py + _compose.py):
- render_executive_summary(): Top-of-mail TL;DR mit
Compliance-Score (gross + farbig), Top-3-Findings nach
Severity, Cookie-Statistik (deklariert/Browser/Drittland),
Severity-Verteilungs-Chips.
- collapsible(): wrapt jeden Block in <details>/<summary>.
Mailpit + alle modernen Mail-Clients rendern das nativ.
- _compose.py: alle 18+ B-Blöcke + per_doc + per_theme +
legacy_html in Akkordeons. NUR Critical-Findings + Sofort-
massnahmen sind immer offen — Reviewer sieht ~15 Zeilen
Übersicht und klappt selektiv auf.
- Cookie-Inventar (742) hat jetzt eigene Sektion ganz oben
(Akkordeon "🍪 Cookie-Inventar"), Vendor-Karten parallel.
B22 Cross-Domain-Legal-Doc-Detector (cross_domain_doc_check.py):
Real-Beispiel User-Feedback: Elli's AGB liegt auf docs.logpay.de
statt elli.eco. Detektor erkennt SLD-Mismatch:
- HIGH bei agb / widerruf (vertragsrelevant)
- MEDIUM bei dse / nutzungsbedingungen
- INFO bei cookie / impressum (Best-Practice)
Norm: DSGVO Art. 28 (AVV-Pflicht für Hosting) + Art. 13 Abs. 1
lit. e (Empfänger) + § 312i BGB (Cool-URLs).
9/9 Tests grün inkl. Elli/LogPay Pattern.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,59 @@
|
||||
"""B22 wiring — Cross-Domain-Legal-Doc-Detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import logging
|
||||
|
||||
from compliance.services.cross_domain_doc_check import check_cross_domain_docs
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_b22(state: dict) -> None:
|
||||
new = check_cross_domain_docs(state)
|
||||
if not new:
|
||||
return
|
||||
extras = state.get("extra_findings") or []
|
||||
extras.extend(new)
|
||||
state["extra_findings"] = extras
|
||||
state["cross_domain_doc_html"] = _render(new)
|
||||
logger.info("B22 cross-domain: %d finding(s)", len(new))
|
||||
|
||||
|
||||
def _render(findings: list[dict]) -> str:
|
||||
cards = []
|
||||
for f in findings:
|
||||
sev = (f.get("severity") or "").upper()
|
||||
color = ("#dc2626" if sev == "HIGH"
|
||||
else "#f59e0b" if sev == "MEDIUM" else "#64748b")
|
||||
cards.append(
|
||||
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
||||
f"border-left:3px solid {color};border-radius:4px;'>"
|
||||
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
||||
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
|
||||
f"<div style='font-size:14px;margin-top:4px;'>"
|
||||
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
||||
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
||||
f"{html.escape(f.get('norm') or '')}</div>"
|
||||
f"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>{html.escape(f.get('evidence') or '')}</em></div>"
|
||||
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
||||
f"padding:8px 10px;border-radius:4px;'>"
|
||||
f"<strong>→ Empfehlung:</strong> "
|
||||
f"{html.escape(f.get('recommended_action') or '')}</div>"
|
||||
"</div>"
|
||||
)
|
||||
return (
|
||||
"<div style='margin:24px 0;padding:16px;border-left:4px solid #dc2626;"
|
||||
"background:#fef2f2;border-radius:4px;'>"
|
||||
f"<h2 style='margin:0 0 8px;color:#7f1d1d;font-size:16px;'>"
|
||||
f"🌐 Vertragsdoc auf Fremd-Domain ({len(findings)} Fall(e))"
|
||||
"</h2>"
|
||||
"<p style='margin:0 0 8px;font-size:12px;color:#475569;'>"
|
||||
"Vertragsrelevante Dokumente liegen auf einer anderen Second-Level-"
|
||||
"Domain als die Site. AVV-Pflicht + URL-Stabilitäts-Risiko."
|
||||
"</p>"
|
||||
+ "".join(cards) +
|
||||
"</div>"
|
||||
)
|
||||
@@ -31,6 +31,7 @@ from ._b17_wiring import run_b17
|
||||
from ._b18_wiring import run_b18
|
||||
from ._b19_wiring import run_b19
|
||||
from ._b20_wiring import run_b20
|
||||
from ._b22_wiring import run_b22
|
||||
from ._constants import _compliance_check_jobs
|
||||
from ._phase_a_resolve import run_phase_a
|
||||
from ._phase_b_profile_check import run_phase_b
|
||||
@@ -96,6 +97,7 @@ async def run_compliance_check(check_id: str, req) -> None:
|
||||
await run_b18(state) # Impressum-Specialist-Agent (Pattern+LLM)
|
||||
run_b19(state) # Cookie-Coherence (Salesforce-as-essential)
|
||||
await run_b20(state) # Legacy-URL-Discovery (Sitemap+Wayback)
|
||||
run_b22(state) # Cross-Domain-Legal-Doc-Hosting (Elli/LogPay)
|
||||
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
||||
await run_phase_d3_top(state)
|
||||
await run_phase_d3_mid(state)
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
"""B22 — Cross-Domain-Legal-Doc-Detector.
|
||||
|
||||
Erkennt: vertragsrelevante Dokumente (AGB, DSE, Widerrufsbelehrung,
|
||||
Nutzungsbedingungen) liegen auf einer anderen Second-Level-Domain als
|
||||
die Site selbst. Beispiel Elli/LogPay: AGB von Elli (elli.eco) liegt
|
||||
auf docs.logpay.de.
|
||||
|
||||
Norm-Argument:
|
||||
- DSGVO Art. 28: das Hosten von Vertragsdokumenten durch einen
|
||||
Dritten ist Auftragsverarbeitung — AVV-Pflicht.
|
||||
- DSGVO Art. 13 Abs. 1 lit. e: Empfänger / Auftragsverarbeiter
|
||||
müssen in der DSE benannt sein.
|
||||
- Vertragsrechtlich: AGB-Verbindlichkeit wackelig wenn der
|
||||
Dokumenten-Host wechselt — was passiert wenn der externe Host
|
||||
den Pfad ändert (Cool-URLs-Problem § 312i BGB).
|
||||
|
||||
Severity:
|
||||
- HIGH bei AGB / Widerrufsbelehrung (vertragsrelevant)
|
||||
- MEDIUM bei DSE / Nutzungsbedingungen
|
||||
- INFO bei Cookie-Policy / Impressum (eher Best-Practice)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_COMPOUND_TLDS = {
|
||||
"co.uk", "co.jp", "co.nz", "co.kr", "co.za", "co.in",
|
||||
"com.au", "com.br", "com.mx", "com.tr", "com.sg",
|
||||
}
|
||||
|
||||
|
||||
_SEVERITY_BY_DOC = {
|
||||
"agb": "HIGH",
|
||||
"widerruf": "HIGH",
|
||||
"dse": "MEDIUM",
|
||||
"nutzungsbedingungen": "MEDIUM",
|
||||
"cookie": "INFO",
|
||||
"impressum": "INFO",
|
||||
"social_media": "INFO",
|
||||
}
|
||||
|
||||
|
||||
def _sld(host: str) -> str:
|
||||
"""Extract the second-level domain. Handles compound TLDs."""
|
||||
if not host:
|
||||
return ""
|
||||
host = host.lower().lstrip("www.")
|
||||
parts = host.split(".")
|
||||
if len(parts) < 2:
|
||||
return host
|
||||
if len(parts) >= 3 and ".".join(parts[-2:]) in _COMPOUND_TLDS:
|
||||
return parts[-3]
|
||||
return parts[-2]
|
||||
|
||||
|
||||
def _site_origin_sld(state: dict) -> str:
|
||||
"""Find the primary site SLD by counting most common host in
|
||||
submitted URLs."""
|
||||
counter: dict[str, int] = {}
|
||||
for e in (state.get("doc_entries") or []):
|
||||
url = (e.get("url") or "").strip()
|
||||
if not url or "://" not in url:
|
||||
continue
|
||||
# Skip auto-discovered docs (they may already be cross-domain
|
||||
# by design — we want the USER's stated origin).
|
||||
if e.get("auto_discovered"):
|
||||
continue
|
||||
try:
|
||||
host = urlparse(url).netloc
|
||||
sld = _sld(host)
|
||||
if sld:
|
||||
counter[sld] = counter.get(sld, 0) + 1
|
||||
except Exception:
|
||||
continue
|
||||
if not counter:
|
||||
# Fallback: use any URL
|
||||
for e in (state.get("doc_entries") or []):
|
||||
url = (e.get("url") or "").strip()
|
||||
if url and "://" in url:
|
||||
return _sld(urlparse(url).netloc)
|
||||
return ""
|
||||
return max(counter, key=counter.get)
|
||||
|
||||
|
||||
def check_cross_domain_docs(state: dict) -> list[dict]:
|
||||
"""Emit findings for doc_entries whose URL has a different SLD
|
||||
than the site origin."""
|
||||
primary = _site_origin_sld(state)
|
||||
if not primary:
|
||||
return []
|
||||
findings: list[dict] = []
|
||||
for e in (state.get("doc_entries") or []):
|
||||
url = (e.get("url") or "").strip()
|
||||
doc_type = (e.get("doc_type") or "").lower()
|
||||
if not url or "://" not in url:
|
||||
continue
|
||||
try:
|
||||
host = urlparse(url).netloc
|
||||
url_sld = _sld(host)
|
||||
except Exception:
|
||||
continue
|
||||
if not url_sld or url_sld == primary:
|
||||
continue
|
||||
# Cross-Domain detected
|
||||
severity = _SEVERITY_BY_DOC.get(doc_type, "MEDIUM")
|
||||
doc_label = {
|
||||
"agb": "Allgemeine Geschäftsbedingungen",
|
||||
"widerruf": "Widerrufsbelehrung",
|
||||
"dse": "Datenschutzerklärung",
|
||||
"nutzungsbedingungen": "Nutzungsbedingungen",
|
||||
"cookie": "Cookie-Richtlinie",
|
||||
"impressum": "Impressum",
|
||||
"social_media": "Social-Media-Hinweise",
|
||||
}.get(doc_type, doc_type.upper())
|
||||
findings.append({
|
||||
"check_id": "CROSS-DOMAIN-DOC-001",
|
||||
"severity": severity,
|
||||
"severity_reason": "third_party_hosted",
|
||||
"doc_type": doc_type,
|
||||
"site_sld": primary,
|
||||
"host_sld": url_sld,
|
||||
"url": url,
|
||||
"title": (
|
||||
f"{doc_label} liegt auf Drittanbieter-Domain "
|
||||
f"({host}) statt {primary}"
|
||||
),
|
||||
"norm": (
|
||||
"DSGVO Art. 28 (AVV) + Art. 13 Abs. 1 lit. e (Empfänger) + "
|
||||
"§ 312i BGB (Cool-URLs / Vertragspflicht)"
|
||||
),
|
||||
"evidence": (
|
||||
f"Site-Origin: {primary} · "
|
||||
f"Dokument gehostet auf: {host} · "
|
||||
f"URL: {url[:120]}"
|
||||
),
|
||||
"recommended_action": (
|
||||
f"Entweder das Dokument auf eigene Domain ({primary}) "
|
||||
"migrieren ODER (a) den externen Host {host} als "
|
||||
"Auftragsverarbeiter in der DSE benennen, (b) AVV "
|
||||
"abschließen, (c) sicherstellen dass URL-Stabilität "
|
||||
f"vertraglich garantiert ist (§ 312i BGB Cool-URL-Pflicht)."
|
||||
),
|
||||
})
|
||||
if findings:
|
||||
logger.info("B22 cross-domain: %d finding(s)", len(findings))
|
||||
return findings
|
||||
@@ -27,59 +27,85 @@ from ._vendor_cards import (
|
||||
render_info_box_rechtsrahmen,
|
||||
render_vendor_cards,
|
||||
)
|
||||
from ._executive_summary import collapsible, render_executive_summary
|
||||
from ._legacy_wrappers import render_all_legacy
|
||||
from ._style import page_close, page_open
|
||||
|
||||
|
||||
def compose_v2(state: dict) -> str:
|
||||
"""Build the full audit-mail HTML in the V2 layout."""
|
||||
"""Build the full audit-mail HTML in the V2 layout.
|
||||
|
||||
Struktur:
|
||||
1. Header (Site-Name + Datum)
|
||||
2. Executive Summary (Compliance-Score + Top-3 + Cookie-Stats)
|
||||
3. Critical Findings (immer offen, max 5)
|
||||
4. Alle anderen Sektionen als <details>-Akkordeons (kollabiert)
|
||||
5. Caveats + Attachments + Page-Close
|
||||
"""
|
||||
site = state.get("site_name") or "—"
|
||||
parts = [
|
||||
page_open(site),
|
||||
render_header(state),
|
||||
render_info_box_rechtsrahmen(),
|
||||
render_toc(state),
|
||||
render_vendor_cards(
|
||||
state.get("cmp_vendors") or [],
|
||||
state.get("cookie_coherence_findings") or [],
|
||||
),
|
||||
render_executive_summary(state),
|
||||
# IMMER OFFEN: kritische Findings + Sofortmaßnahmen
|
||||
render_critical(state),
|
||||
render_manual_review(state),
|
||||
render_internal_reminders(state),
|
||||
render_sofortmassnahmen(state),
|
||||
render_per_doc(state),
|
||||
render_per_theme(state),
|
||||
# B4 — Cross-Doc Vendor-Consistency (Elli Vertex↔Iadvize pattern)
|
||||
state.get("vendor_consistency_html", ""),
|
||||
# B5 — AI-Act Art. 50 Transparenzpflicht
|
||||
state.get("ai_act_html", ""),
|
||||
# B6/B7/B8/B9/B10 — DPO + Staleness + CMP + MultiEntity + Transfer
|
||||
state.get("extra_findings_html", ""),
|
||||
# B12 Chatbot-Cookie-Klassifikation
|
||||
state.get("chatbot_cookie_html", ""),
|
||||
# B13 Widerrufsbelehrung-Reachability (B2C-Pflicht)
|
||||
state.get("widerruf_reach_html", ""),
|
||||
# B14 Widersprüchliche Speicherdauer im selben Doc
|
||||
state.get("retention_conflict_html", ""),
|
||||
# B15 AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
|
||||
state.get("ai_legal_basis_html", ""),
|
||||
# B16 Footer-Label-vs-URL-Slug-Drift (SEO / Bookmarks)
|
||||
state.get("url_slug_drift_html", ""),
|
||||
# B17 Audit-Walk-Video (Beweis-Aufzeichnung)
|
||||
state.get("audit_walk_html", ""),
|
||||
# B18 Impressum-Specialist-Agent (Pattern + LLM)
|
||||
state.get("impressum_agent_html", ""),
|
||||
# B19 Cookie-Coherence-Check (Salesforce-as-essential etc.)
|
||||
state.get("cookie_coherence_html", ""),
|
||||
# B20 Legacy-URL-Discovery + Multi-Version-DSE-Vergleich
|
||||
state.get("multi_version_dse_html", ""),
|
||||
state.get("legacy_url_html", ""),
|
||||
# Browser-Matrix (Stage 1.c)
|
||||
state.get("browser_matrix_html", ""),
|
||||
# All legacy build_*_html() wrapped in V2 sections — preserves
|
||||
# every information block from the old renderer (Exec Summary,
|
||||
# Banner-Screenshot, VVT, Redundancy, Solutions, Diff, etc.)
|
||||
render_all_legacy(state),
|
||||
|
||||
# AKKORDEON-Sektionen (kollabiert, Reviewer öffnet selektiv)
|
||||
collapsible("🍪 Cookie-Inventar (alle deklarierten + im Browser)",
|
||||
state.get("cookie_inventory_html", "")
|
||||
+ _render_per_theme_inventory_only(state)),
|
||||
collapsible("🏷️ Vendor-Übersicht (aggregiert nach Anbieter)",
|
||||
render_vendor_cards(
|
||||
state.get("cmp_vendors") or [],
|
||||
state.get("cookie_coherence_findings") or [],
|
||||
)),
|
||||
collapsible("🍪 Cookie-Kohärenz (Salesforce-Pattern, Pseudo-Zwecke)",
|
||||
state.get("cookie_coherence_html", "")),
|
||||
collapsible("💬 Chatbot-Cookie-Klassifikation",
|
||||
state.get("chatbot_cookie_html", "")),
|
||||
collapsible("📜 Widerrufsbelehrung-Reachability (B2C)",
|
||||
state.get("widerruf_reach_html", "")),
|
||||
collapsible("⏱️ Widersprüchliche Speicherdauer",
|
||||
state.get("retention_conflict_html", "")),
|
||||
collapsible("🤖 AI-Act Rechtsgrundlage (LLM-Vendor)",
|
||||
state.get("ai_legal_basis_html", "")),
|
||||
collapsible("🔗 URL-Slug-Drift (SEO / Bookmarks)",
|
||||
state.get("url_slug_drift_html", "")),
|
||||
collapsible("🎥 Audit-Walk-Video (Beweis-Aufzeichnung)",
|
||||
state.get("audit_walk_html", "")),
|
||||
collapsible("🤖 Impressum-Agent (Pattern + LLM)",
|
||||
state.get("impressum_agent_html", "")),
|
||||
collapsible("📑 Mehrere DSE-Versionen erkannt",
|
||||
state.get("multi_version_dse_html", "")),
|
||||
collapsible("🗂️ Legacy-URL-Inventar",
|
||||
state.get("legacy_url_html", "")),
|
||||
collapsible("🌐 Vertragsdoc auf Fremd-Domain (Cross-Domain)",
|
||||
state.get("cross_domain_doc_html", "")),
|
||||
collapsible("🔍 Cross-Doc Vendor-Konsistenz",
|
||||
state.get("vendor_consistency_html", "")),
|
||||
collapsible("⚖️ AI-Act Art. 50 Transparenzpflicht",
|
||||
state.get("ai_act_html", "")),
|
||||
collapsible("📌 Cross-Doc-Befunde (DPO, Staleness, CMP, Transfer)",
|
||||
state.get("extra_findings_html", "")),
|
||||
collapsible("🌐 Browser-Matrix (per-Browser-Verhalten)",
|
||||
state.get("browser_matrix_html", "")),
|
||||
collapsible("📋 Manuell zu prüfen",
|
||||
render_manual_review(state)),
|
||||
collapsible("🔧 Interne Erinnerungen",
|
||||
render_internal_reminders(state)),
|
||||
collapsible("📄 Per-Dokument-Befunde",
|
||||
render_per_doc(state)),
|
||||
collapsible("🧩 Per-Thema-Übersicht (Sub-Sektionen)",
|
||||
render_per_theme(state)),
|
||||
collapsible("📚 Rechtsrahmen-Info (Art. 13 DSGVO, § 25 TDDDG, …)",
|
||||
render_info_box_rechtsrahmen()),
|
||||
collapsible("📑 Inhaltsverzeichnis (alt)",
|
||||
render_toc(state)),
|
||||
collapsible("🗃️ Vollständige Legacy-Blöcke (Banner-Screenshot, "
|
||||
"VVT, Redundancy, Solutions, Diff)",
|
||||
render_all_legacy(state)),
|
||||
|
||||
render_caveats(state),
|
||||
render_attachments(state),
|
||||
page_close(state.get("check_id", ""),
|
||||
@@ -88,6 +114,17 @@ def compose_v2(state: dict) -> str:
|
||||
return "".join(p for p in parts if p)
|
||||
|
||||
|
||||
def _render_per_theme_inventory_only(state: dict) -> str:
|
||||
"""Extrahiert nur die Cookie-Inventar-Tabelle aus per_theme (die
|
||||
742er-Tabelle). per_theme rendert sonst ALL themes — wir wollen
|
||||
hier nur das Inventory-Theme."""
|
||||
try:
|
||||
from ._blocks import render_theme_cookie_inventory
|
||||
return render_theme_cookie_inventory(state)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def is_v2_enabled() -> bool:
|
||||
return os.environ.get("MAIL_RENDER_V2", "false").lower() in (
|
||||
"true", "1", "yes", "on",
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
"""Executive Summary für die V2-Audit-Mail.
|
||||
|
||||
Sitzt ganz oben in der Mail. Reviewer sieht in ≤ 15 Zeilen:
|
||||
- Compliance-Score (gross + farbig)
|
||||
- Top-3-Findings nach Severity
|
||||
- Cookie-Statistik (deklariert / Browser / Drittland)
|
||||
- Saving-Indikation (1 Zahl)
|
||||
- Verteilung der Findings-Typen
|
||||
|
||||
Alles danach (B-Blocks, Per-Doc, Per-Theme) wird in
|
||||
`<details><summary>` kollabiert ausgespielt.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter
|
||||
from html import escape as h
|
||||
|
||||
|
||||
def _scorecard_html(state: dict) -> str:
|
||||
sc = state.get("scorecard") or {}
|
||||
score = sc.get("compliance_pct")
|
||||
if score is None:
|
||||
score = sc.get("completeness_pct", 0)
|
||||
score_int = int(score) if score is not None else 0
|
||||
color = ("#15803d" if score_int >= 80
|
||||
else "#f59e0b" if score_int >= 50 else "#dc2626")
|
||||
label = ("GUT" if score_int >= 80
|
||||
else "VERBESSERUNGSBEDARF" if score_int >= 50 else "KRITISCH")
|
||||
return (
|
||||
f"<div style='text-align:center;padding:14px 20px;"
|
||||
f"background:#fff;border-radius:6px;min-width:140px;'>"
|
||||
f"<div style='font-size:11px;color:#64748b;text-transform:uppercase;"
|
||||
f"letter-spacing:1px;'>Compliance-Score</div>"
|
||||
f"<div style='font-size:42px;font-weight:700;color:{color};"
|
||||
f"line-height:1;margin:6px 0;'>{score_int}%</div>"
|
||||
f"<div style='font-size:11px;color:{color};font-weight:600;'>"
|
||||
f"{label}</div></div>"
|
||||
)
|
||||
|
||||
|
||||
def _findings_top_severity_html(state: dict, top_n: int = 3) -> str:
|
||||
extras = state.get("extra_findings") or []
|
||||
# Filter to HIGH/MEDIUM, take top_n
|
||||
high_med = [
|
||||
f for f in extras
|
||||
if (f.get("severity") or "").upper() in ("HIGH", "MEDIUM")
|
||||
]
|
||||
high_med.sort(
|
||||
key=lambda f: 0 if (f.get("severity") or "").upper() == "HIGH" else 1,
|
||||
)
|
||||
if not high_med:
|
||||
return (
|
||||
"<p style='font-size:13px;color:#64748b;margin:0;'>"
|
||||
"<em>Keine HIGH/MEDIUM Findings — siehe Detail-Sektionen "
|
||||
"für Hinweise.</em></p>"
|
||||
)
|
||||
rows = []
|
||||
for f in high_med[:top_n]:
|
||||
sev = (f.get("severity") or "").upper()
|
||||
color = "#dc2626" if sev == "HIGH" else "#f59e0b"
|
||||
title = (f.get("title") or "")[:120]
|
||||
norm = (f.get("norm") or "")[:80]
|
||||
rows.append(
|
||||
f"<div style='display:flex;gap:10px;padding:8px 10px;"
|
||||
f"background:#fff;border-left:3px solid {color};"
|
||||
f"margin-bottom:6px;border-radius:3px;'>"
|
||||
f"<div style='font-size:11px;font-weight:700;color:{color};"
|
||||
f"min-width:60px;'>{sev}</div>"
|
||||
f"<div><div style='font-size:13px;font-weight:600;color:#1e293b;'>"
|
||||
f"{h(title)}</div>"
|
||||
f"<div style='font-size:11px;color:#64748b;margin-top:2px;'>"
|
||||
f"{h(norm)}</div></div></div>"
|
||||
)
|
||||
return "".join(rows)
|
||||
|
||||
|
||||
def _cookie_stats_html(state: dict) -> str:
|
||||
cmp_vendors = state.get("cmp_vendors") or []
|
||||
declared = sum(len(v.get("cookies") or []) for v in cmp_vendors)
|
||||
banner = state.get("banner_result") or {}
|
||||
in_browser = len(banner.get("cookies_detailed") or [])
|
||||
third_country = sum(
|
||||
1 for v in cmp_vendors
|
||||
if (v.get("country") or "").upper() not in ("DE", "AT", "BE", "FR",
|
||||
"NL", "IT", "ES", "IE", "DK", "FI", "SE", "PT", "PL", "CZ",
|
||||
"CH", "NO", "LI", "IS", "")
|
||||
)
|
||||
return (
|
||||
f"<div style='display:flex;gap:8px;flex-wrap:wrap;'>"
|
||||
f"<div style='flex:1;min-width:80px;text-align:center;padding:8px;"
|
||||
f"background:#fff;border-radius:4px;'>"
|
||||
f"<div style='font-size:22px;font-weight:700;color:#1e293b;'>"
|
||||
f"{declared}</div>"
|
||||
f"<div style='font-size:10px;color:#64748b;text-transform:uppercase;'>"
|
||||
f"Cookies deklariert</div></div>"
|
||||
f"<div style='flex:1;min-width:80px;text-align:center;padding:8px;"
|
||||
f"background:#fff;border-radius:4px;'>"
|
||||
f"<div style='font-size:22px;font-weight:700;color:#1e293b;'>"
|
||||
f"{in_browser}</div>"
|
||||
f"<div style='font-size:10px;color:#64748b;text-transform:uppercase;'>"
|
||||
f"Im Browser gesetzt</div></div>"
|
||||
f"<div style='flex:1;min-width:80px;text-align:center;padding:8px;"
|
||||
f"background:#fff;border-radius:4px;'>"
|
||||
f"<div style='font-size:22px;font-weight:700;color:"
|
||||
f"{'#dc2626' if third_country > 0 else '#64748b'};'>"
|
||||
f"{third_country}</div>"
|
||||
f"<div style='font-size:10px;color:#64748b;text-transform:uppercase;'>"
|
||||
f"Vendoren Drittland</div></div>"
|
||||
f"</div>"
|
||||
)
|
||||
|
||||
|
||||
def _findings_distribution_html(state: dict) -> str:
|
||||
extras = state.get("extra_findings") or []
|
||||
if not extras:
|
||||
return ""
|
||||
by_sev = Counter(
|
||||
(f.get("severity") or "").upper() for f in extras
|
||||
)
|
||||
parts = []
|
||||
for sev, color in (("HIGH", "#dc2626"), ("MEDIUM", "#f59e0b"),
|
||||
("LOW", "#64748b"), ("INFO", "#94a3b8")):
|
||||
n = by_sev.get(sev, 0)
|
||||
if n > 0:
|
||||
parts.append(
|
||||
f"<span style='font-size:11px;color:{color};"
|
||||
f"font-weight:600;'>{n} {sev}</span>"
|
||||
)
|
||||
return " · ".join(parts)
|
||||
|
||||
|
||||
def render_executive_summary(state: dict) -> str:
|
||||
"""Top-of-mail TL;DR. Should fit on 1 screen."""
|
||||
return (
|
||||
"<div style='margin:0 0 16px;padding:18px 20px;"
|
||||
"background:linear-gradient(135deg,#f1f5f9,#e2e8f0);"
|
||||
"border-radius:8px;border-left:5px solid #0f766e;'>"
|
||||
"<h2 style='margin:0 0 12px;color:#1e293b;font-size:18px;'>"
|
||||
"📊 Executive Summary"
|
||||
"</h2>"
|
||||
"<div style='display:flex;gap:14px;align-items:stretch;"
|
||||
"margin-bottom:14px;flex-wrap:wrap;'>"
|
||||
+ _scorecard_html(state) +
|
||||
f"<div style='flex:1;min-width:240px;'>"
|
||||
f"<div style='font-size:11px;color:#64748b;font-weight:600;"
|
||||
f"text-transform:uppercase;margin-bottom:6px;letter-spacing:1px;'>"
|
||||
f"Top Befunde · {_findings_distribution_html(state)}"
|
||||
f"</div>"
|
||||
+ _findings_top_severity_html(state, top_n=3) +
|
||||
"</div>"
|
||||
"</div>"
|
||||
+ _cookie_stats_html(state) +
|
||||
"<p style='margin:12px 0 0;font-size:11px;color:#64748b;"
|
||||
"text-align:right;'>"
|
||||
"<em>Details siehe Akkordeons unten — alle Sektionen "
|
||||
"klappbar.</em></p>"
|
||||
"</div>"
|
||||
)
|
||||
|
||||
|
||||
def collapsible(title: str, body: str, *, open_default: bool = False) -> str:
|
||||
"""Wrap any HTML block in a <details>/<summary> accordion."""
|
||||
if not body:
|
||||
return ""
|
||||
open_attr = " open" if open_default else ""
|
||||
return (
|
||||
f"<details{open_attr} style='margin:12px 0;border:1px solid #e2e8f0;"
|
||||
f"border-radius:6px;padding:0;background:#fff;'>"
|
||||
f"<summary style='cursor:pointer;padding:12px 16px;"
|
||||
f"font-weight:600;color:#1e293b;font-size:14px;"
|
||||
f"background:#f8fafc;border-radius:6px 6px 0 0;'>{h(title)}</summary>"
|
||||
f"<div style='padding:8px 16px 16px;'>{body}</div>"
|
||||
"</details>"
|
||||
)
|
||||
@@ -0,0 +1,88 @@
|
||||
"""Tests for B22 Cross-Domain-Legal-Doc-Detector."""
|
||||
|
||||
from compliance.services.cross_domain_doc_check import (
|
||||
_site_origin_sld,
|
||||
_sld,
|
||||
check_cross_domain_docs,
|
||||
)
|
||||
|
||||
|
||||
class TestSld:
|
||||
def test_simple(self):
|
||||
assert _sld("www.bmw.de") == "bmw"
|
||||
|
||||
def test_compound_tld(self):
|
||||
assert _sld("docs.example.co.uk") == "example"
|
||||
|
||||
def test_no_www(self):
|
||||
assert _sld("elli.eco") == "elli"
|
||||
|
||||
|
||||
class TestPrimaryDetection:
|
||||
def test_majority_wins(self):
|
||||
state = {"doc_entries": [
|
||||
{"url": "https://elli.eco/de/impressum"},
|
||||
{"url": "https://elli.eco/de/datenschutz"},
|
||||
{"url": "https://docs.logpay.de/_docs/agb.pdf"},
|
||||
]}
|
||||
assert _site_origin_sld(state) == "elli"
|
||||
|
||||
def test_auto_discovered_excluded(self):
|
||||
# discovery results don't influence primary detection
|
||||
state = {"doc_entries": [
|
||||
{"url": "https://elli.eco/de/impressum", "auto_discovered": False},
|
||||
{"url": "https://discovered.tld/foo", "auto_discovered": True},
|
||||
]}
|
||||
assert _site_origin_sld(state) == "elli"
|
||||
|
||||
|
||||
class TestCheck:
|
||||
def test_elli_logpay_pattern(self):
|
||||
state = {"doc_entries": [
|
||||
{"doc_type": "dse", "url": "https://www.elli.eco/de/datenschutz"},
|
||||
{"doc_type": "impressum",
|
||||
"url": "https://www.elli.eco/de/impressum"},
|
||||
{"doc_type": "agb",
|
||||
"url": "https://docs.logpay.de/_docs/de/"
|
||||
"allgemeine_geschaeftsbedingungen_de_EM.pdf"},
|
||||
]}
|
||||
findings = check_cross_domain_docs(state)
|
||||
assert len(findings) == 1
|
||||
f = findings[0]
|
||||
assert f["check_id"] == "CROSS-DOMAIN-DOC-001"
|
||||
assert f["severity"] == "HIGH" # AGB is HIGH
|
||||
assert f["doc_type"] == "agb"
|
||||
assert f["site_sld"] == "elli"
|
||||
assert f["host_sld"] == "logpay"
|
||||
|
||||
def test_same_subdomain_no_finding(self):
|
||||
# docs.bmw.de is same SLD as www.bmw.de — no finding
|
||||
state = {"doc_entries": [
|
||||
{"doc_type": "dse",
|
||||
"url": "https://www.bmw.de/de/datenschutz.html"},
|
||||
{"doc_type": "agb",
|
||||
"url": "https://docs.bmw.de/agb.pdf"},
|
||||
]}
|
||||
findings = check_cross_domain_docs(state)
|
||||
assert findings == []
|
||||
|
||||
def test_no_primary_no_finding(self):
|
||||
# No URLs at all
|
||||
state = {"doc_entries": []}
|
||||
assert check_cross_domain_docs(state) == []
|
||||
|
||||
def test_severity_per_doc_type(self):
|
||||
state = {"doc_entries": [
|
||||
{"doc_type": "agb", "url": "https://acme.de/x"},
|
||||
{"doc_type": "dse",
|
||||
"url": "https://docs.thirdparty.com/agb"},
|
||||
{"doc_type": "impressum",
|
||||
"url": "https://www.other.com/impressum"},
|
||||
]}
|
||||
findings = check_cross_domain_docs(state)
|
||||
sev_by_doc = {f["doc_type"]: f["severity"] for f in findings}
|
||||
# agb is on primary (acme.de) — no finding
|
||||
# dse on thirdparty.com → MEDIUM
|
||||
# impressum on other.com → INFO
|
||||
assert sev_by_doc.get("dse") == "MEDIUM"
|
||||
assert sev_by_doc.get("impressum") == "INFO"
|
||||
Reference in New Issue
Block a user