Files
breakpilot-compliance/backend-compliance/compliance/api/agent_doc_check_critical.py
T
Benjamin Admin 57c0f940a2
CI / detect-changes (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / nodejs-build (push) Successful in 2m19s
CI / test-go (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 16s
CI / loc-budget (push) Failing after 15s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 37s
feat(consent+report): P56-P67 Mercedes-Audit-Cycle (Anti-Audit, Phase G Vendors, Cookie-Behavior-Validator + 5 Mail-Polish-Items) [migration-approved]
P56  Anti-Auditing-Detection als constructive Compliance-Finding (Audit-API-
     Empfehlung statt Anklage, weil Mercedes berechtigt Bots blockiert)
P57  Phase G vendor_details Union mit cmp_vendors -> 42 Anbieter sichtbar
P58  Anti-Audit-Detection robuster (Script-Domain-Check + Settings-spezifisch)
P59  Cookie-Behavior-Validator (4 Layer, 3-Tier-Severity: MEDIUM=Kategorie-
     Mismatch / HIGH=Zweck-Mismatch / CRITICAL=beide=Vorsatz-Indiz)
     + Open Cookie Database (CC0) als Library-Seed (2264 Cookies)
P59b Cookie-Behavior in Banner-Check verdrahtet + Mail-Block (BUGFIX:
     SessionLocal selbst oeffnen, db war im Background-Task nicht im Scope)

Mail-Polish nach Mercedes-Review:
P63  Banner-Footer-Links auch im wb7-link/role=link erkennen (Shadow-DOM-
     Walker label-based statt nur <a href>)
P64  Re-Access-Severity: MEDIUM statt HIGH, wenn Footer "Einstellungen" oder
     Mercedes-typisch existiert; OEM-Footer-Detection (wb7-footer)
P65  Text-Truncation: Word-Boundary statt Zeichen-Cut (kein "einfa"-Bruch
     mehr in Sofortmassnahmen)
P66  GF-Aktionen: Service-Zweck vs Cookie-Zweck explizit erklaert
     (haeufige Verwechslung Marketing/GF: "Akamai-Beschreibung" != Cookie-
     Zweck pro DSK-OH 2024)
P67  Stirring-Finding mit "Verlust-Framing"-Erklaerung + Alt-vs-Neutral-
     Beispiel, statt nur EDPB-Fachbegriff

Compliance-Advisor FAQ (admin agent-core/soul):
  + CNIL/EDPB Top-Bussgelder (Google 100M, Meta 60M, Amazon 35M)
  + Deutsche Praezedenz (LG Muenchen Google Fonts, EuGH Planet49, BGH I ZR 7/16)
  + 4 Risiko-Pfade (Bussgeld/Abmahnung/Sammelklage/NOYB) + Berechnungs-Methodik

Document-Generator Templates: AGB-DE (142), Impressum (140), Widerrufs-
formular-Anlage (143), DSR-Process-Dedup (139), Cookie-Library (144).

Architektur: doc_action_mappings.py + banner_dom_walkers.py +
cookie_behavior_validator.py + vendor_detail_extractor.py rausgezogen,
um die 500-LOC-Caps in agent_doc_check_report.py und
banner_text_checker.py einzuhalten.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 06:28:25 +02:00

229 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
P18 — Critical-Findings-Block fuer die Executive-Summary.
Analysiert die echten Daten (banner_checks, phases, scorecard, results) und
rendert einen ROTEN Sofortmassnahmen-Block GANZ OBEN in der Email — mit
Quellenangaben (DSK, EDPB, EuGH, Behoerden-Buessgeld-Faelle) und konkreten
Sofortmassnahmen.
Regel: Block wird nur gerendert wenn echte kritische Verstoesse vorliegen.
Bei sauberen Sites bleibt er weg.
"""
from __future__ import annotations
def _truncate_words(text: str, max_chars: int) -> str:
"""P65: Truncate at word boundary, never mid-word."""
if not text or len(text) <= max_chars:
return text
cut = text[:max_chars]
last_space = cut.rfind(" ")
if last_space > max_chars // 2:
cut = cut[:last_space]
return cut.rstrip(",;:.") + ""
# Bekannte Buessgeld-Praezedenzfaelle als Quellen-Hint
_BUSSGELD_REFS = {
"no_provider_per_category": "CNIL France 2023 — TikTok 5 Mio EUR (fehlende Vendor-Transparenz)",
"dse_unvollstaendig": "BayLDA 2024 — diverse Mittelstand-Faelle, 5k50k EUR",
"cookie_doc_missing": "LfDI BW 2023 — fehlende Cookie-Erklaerung, 30k EUR",
"dark_pattern_reject": "EDPB Guidelines 3/2022 + DSK 2024 — Bussgeldrahmen Art. 83 DSGVO",
"schrems_ii": "EuGH C-311/18 (Schrems II) — Bussgeldrahmen bis 4% Konzern-Umsatz",
"impressum_im_banner": "LG Rostock 3 O 22/19 — Impressum-Pflicht ueberlagernder Banner",
}
def _detect_critical_issues(
banner_result: dict | None,
scorecard: dict | None,
results: list,
) -> list[dict]:
"""Erkenne kritische Verstoesse aus den vorliegenden Daten."""
issues: list[dict] = []
br = banner_result or {}
sc = scorecard or {}
# 1) Banner-Violations (HIGH/CRITICAL) aus consent-tester
for v in (br.get("banner_checks") or {}).get("violations", []):
sev = (v.get("severity") or "").upper()
if sev in ("CRITICAL", "HIGH"):
issues.append({
"key": "banner_violation",
"title": _truncate_words(v.get("text", ""), 260),
"severity": sev,
"action": _action_for_banner_violation(v),
"source": v.get("legal_ref", ""),
"bussgeld": _BUSSGELD_REFS.get("impressum_im_banner")
if "impressum" in (v.get("text") or "").lower()
else _BUSSGELD_REFS.get("dark_pattern_reject"),
})
# 2) Category-Tests: Banner zeigt keine Provider-Details pro Kategorie.
# Bevorzugt das echte Signal aus dem Click-Through-Test (P19):
# provider_details_visible. Fallback: leere tracking_services.
cat_tests = br.get("category_tests") or []
cats_without_details = [
c for c in cat_tests
if c.get("category") != "necessary"
and (c.get("provider_details_visible") is False
or (c.get("provider_details_visible") is None
and not c.get("tracking_services")))
]
if cats_without_details and len(cat_tests) >= 2:
cats = ", ".join(c.get("category_label", c.get("category", "?"))
for c in cats_without_details)
issues.append({
"key": "no_provider_per_category",
"title": f"Cookie-Banner: Kategorien ({cats}) zeigen keine "
f"Provider-/Cookie-Details",
"severity": "HIGH",
"action": ("Pro Banner-Kategorie eine Liste der eingebundenen "
"Anbieter + Cookie-Details (Name, Zweck, Speicherdauer, "
"Drittlandtransfer) sichtbar machen — am besten als "
"ausklappbares Detail-Panel. Sonst ist die "
"Einwilligung nicht 'informiert' nach Art. 7 DSGVO "
"und gilt als unwirksam."),
"source": "Art. 7 Abs. 1 DSGVO, EDPB Guidelines 2/2023, DSK 2024",
"bussgeld": _BUSSGELD_REFS["no_provider_per_category"],
})
# 3) DSGVO/TDDDG-Score < 30%: DSE rechtswidrig
pct = int((sc.get("totals") or {}).get("pct", 100))
if pct and pct < 30:
issues.append({
"key": "dse_unvollstaendig",
"title": f"Datenschutzerklaerung erfuellt nur {pct}% der Pflichten",
"severity": "HIGH",
"action": ("Vollstaendig nach Art. 13 DSGVO ueberarbeiten: "
"Verantwortlicher, Zwecke, Rechtsgrundlage, "
"Speicherdauer, Drittland-Transfers, alle Betroffenen-"
"rechte, konkrete Aufsichtsbehoerde."),
"source": "Art. 13 DSGVO + Art. 14 (alternativ), DSK-OH Telemedien 2024",
"bussgeld": _BUSSGELD_REFS["dse_unvollstaendig"],
})
# 4) Cookie-Richtlinie fehlt komplett (nicht erreichbar)
cookie_missing = any(
(r.doc_type == "cookie" if hasattr(r, "doc_type") else
r.get("doc_type") == "cookie")
and ((r.error if hasattr(r, "error") else r.get("error", "")) or "")
.startswith("Auf der Website nicht gefunden")
for r in (results or [])
)
cookie_deduped = any(
(r.doc_type == "cookie" if hasattr(r, "doc_type") else
r.get("doc_type") == "cookie")
and "Nicht separat vorhanden" in
((r.error if hasattr(r, "error") else r.get("error", "")) or "")
for r in (results or [])
)
if cookie_missing or cookie_deduped:
issues.append({
"key": "cookie_doc_missing",
"title": ("Keine eigenstaendige Cookie-Richtlinie"
if cookie_deduped
else "Cookie-Richtlinie nicht auffindbar"),
"severity": "HIGH",
"action": ("Separate Cookie-Richtlinie-Seite erstellen mit "
"tabellarischer Auflistung aller Cookies (Name, "
"Anbieter, Zweck, Speicherdauer, Drittlandtransfer). "
"Direkt aus dem Banner verlinken."),
"source": "Art. 13 DSGVO, §25 TDDDG, DSK-OH Telemedien 2024",
"bussgeld": _BUSSGELD_REFS["cookie_doc_missing"],
})
# 5) Schrems-II-Risiko: Google/Meta/Microsoft im Banner, aber keine SCC/DPF
# Detection: pre-/post-consent-cookies in den phases enthalten US-Tracker
phases = br.get("phases") or {}
has_us_tracker = False
for ph in phases.values():
if not isinstance(ph, dict):
continue
for t in (ph.get("tracking_services") or []):
if isinstance(t, dict):
name = (t.get("name", "") or "").lower()
else:
name = str(t).lower()
if any(w in name for w in ("google", "meta", "facebook",
"microsoft", "linkedin", "tiktok")):
has_us_tracker = True
break
if has_us_tracker:
issues.append({
"key": "schrems_ii",
"title": "US-Tracker geladen — Schrems-II-Risiko",
"severity": "HIGH",
"action": ("Pro Drittland-Anbieter dokumentieren: SCC (Art. 46 "
"DSGVO) ODER DPF-Zertifizierung pruefen + in der "
"Datenschutzerklaerung explizit benennen."),
"source": "Art. 44 ff. DSGVO, EuGH C-311/18 (Schrems II)",
"bussgeld": _BUSSGELD_REFS["schrems_ii"],
})
return issues
def _action_for_banner_violation(v: dict) -> str:
text = (v.get("text") or "").lower()
if "impressum" in text:
return ("Impressum-Link direkt im Banner ergaenzen — bei "
"ueberlagerndem Banner Pflicht nach §5 TMG.")
if "ablehnen" in text or "dark pattern" in text:
return ("'Ablehnen'-Button visuell gleichwertig zu 'Akzeptieren' "
"gestalten (gleiche Groesse, Farbe, Position).")
if "widerruf" in text or "cookie-einstellungen" in text:
return ("Floating-Icon oder Footer-Link 'Cookie-Einstellungen' "
"permanent einblenden — Widerruf so einfach wie Erteilung.")
return ("Banner-Verstoss beheben gemaess der genannten Rechtsgrundlage.")
def build_critical_findings_html(
banner_result: dict | None,
scorecard: dict | None,
results: list,
) -> str:
"""Render der Critical-Findings-Box. Leerer String wenn keine Issues."""
issues = _detect_critical_issues(banner_result, scorecard, results)
if not issues:
return ""
items = []
for i in issues:
items.append(
f'<div style="margin-bottom:10px;padding:10px 12px;'
f'background:rgba(255,255,255,0.06);border-radius:4px;'
f'border-left:3px solid #fca5a5">'
f'<div style="font-size:13px;font-weight:700;color:#fff;'
f'margin-bottom:4px">'
f'<span style="display:inline-block;background:#dc2626;color:#fff;'
f'padding:1px 6px;border-radius:3px;font-size:9px;'
f'margin-right:6px">{i["severity"]}</span>{i["title"]}</div>'
f'<div style="font-size:11px;color:#fecaca;margin-top:4px">'
f'<strong>Sofortmassnahme:</strong> {i["action"]}</div>'
f'<div style="font-size:10px;color:#fca5a5;margin-top:4px;'
f'font-style:italic">Rechtsgrundlage: {i.get("source","")}'
+ (f' &middot; Praezedenz: {i["bussgeld"]}'
if i.get("bussgeld") else "") +
f'</div></div>'
)
n = len(issues)
return (
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
'max-width:700px;margin:0 auto 18px;padding:18px 22px;'
'background:#7f1d1d;border-radius:10px;color:white">'
'<div style="font-size:12px;color:#fecaca;text-transform:uppercase;'
'letter-spacing:1.5px;margin-bottom:6px;font-weight:700">'
'🚨 Sofortmassnahmen erforderlich</div>'
f'<h2 style="margin:0 0 10px;font-size:18px;color:white">'
f'{n} kritische Compliance-Risiken mit Bussgeldpotenzial</h2>'
'<p style="margin:0 0 12px;font-size:12px;color:#fecaca">'
'Die folgenden Verstoesse sind durch Tool-Analyse belegt und '
'erfordern Sofortmassnahmen. Bussgeldrahmen nach Art. 83 DSGVO: '
'<strong>bis 4% des weltweiten Jahresumsatzes</strong>.</p>'
+ "".join(items) +
'</div>'
)