diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index 4fbd9225..754c0fb9 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -1081,6 +1081,24 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): except Exception as e: logger.warning("P102 mismatch detection failed: %s", e) + # P92 + P94: Banner-Konsistenz (CMP-Tool kaputt / Banner-vs-Doc-Diff) + consistency_html = "" + try: + from compliance.services.banner_consistency_checks import ( + run_all as run_consistency_checks, + build_consistency_block_html, + ) + cookie_doc_for_check = (doc_texts.get("cookie") + or doc_texts.get("dse") or "") + cons_findings = run_consistency_checks( + banner_result or {}, cookie_doc_for_check, + ) + if cons_findings: + consistency_html = build_consistency_block_html(cons_findings) + logger.info("P92/P94: %d Konsistenz-Findings", len(cons_findings)) + except Exception as e: + logger.warning("P92/P94 consistency-check failed: %s", e) + # P82: GF-1-Pager ganz oben in der Mail — 5-Bullet-Zusammenfassung # damit die GF nicht 124k Char lesen muss. gf_one_pager_html = "" @@ -1103,6 +1121,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): + cookie_arch_html + summary_html + scanned_html + profile_html + scorecard_html + redundancy_html + providers_html + banner_deep_html + library_mismatch_html + + consistency_html + vvt_html + report_html ) diff --git a/backend-compliance/compliance/services/banner_consistency_checks.py b/backend-compliance/compliance/services/banner_consistency_checks.py new file mode 100644 index 00000000..4eec49bf --- /dev/null +++ b/backend-compliance/compliance/services/banner_consistency_checks.py @@ -0,0 +1,226 @@ +""" +P92 + P94 — Banner-Konsistenz-Checks (Post-hoc auf banner_result). + +P92 — CMP-Tool-Verfuegbarkeit: + Wenn "Anpassen"/"Einstellungen" angeklickt wurde und das Tool laed + nicht (Network-Error, Timeout, weisse Seite, fehlende + consent-Elemente nach Klick), ist das ein HIGH-Verstoss — der + Nutzer hat formal die Moeglichkeit zur granularen Wahl, aber sie + funktioniert nicht. + +P94 — Banner-Init-vs-Cookie-Footer-Konsistenz: + Cookie-Liste im Initial-Banner-Settings darf nicht von der Liste + im permanenten Cookie-Richtlinien-Dokument abweichen. Wenn Banner + 12 Cookies nennt, die Cookie-Doc aber 47, ist mindestens eine der + beiden Quellen unvollstaendig → MEDIUM-Finding. + +Beide liefern dict mit shape: + {"severity": "HIGH"|"MEDIUM", "code": str, "label": str, "detail": str} +oder None, wenn der Check nicht greift. +""" + +from __future__ import annotations + +import logging +import re + +logger = logging.getLogger(__name__) + +_ANPASSEN_KEYS = ( + "anpassen", "einstellungen", "customize", "preferences", + "settings", "individuelle", "auswahl", "manage", +) + + +def _phases(banner_result: dict) -> dict: + if not isinstance(banner_result, dict): + return {} + return banner_result.get("phases") or {} + + +def check_cmp_tool_availability(banner_result: dict) -> dict | None: + """P92 — Anpassen-Klick aber Settings-Tool defekt / leer.""" + phases = _phases(banner_result) + settings_ph = phases.get("settings") or phases.get("after_settings_click") + if not isinstance(settings_ph, dict): + return None + + initial_ph = phases.get("initial") or phases.get("before_accept") or {} + initial_text = (initial_ph.get("banner_text") or "").lower() + if not any(k in initial_text for k in _ANPASSEN_KEYS): + return None # Wenn kein Anpassen-Button gar nicht im Initial-Banner, + # ist das P100s Job — nicht hier doppelt melden. + + error = settings_ph.get("error") or settings_ph.get("status_error") + settings_text = (settings_ph.get("banner_text") or "").strip() + has_categories = bool( + settings_ph.get("categories") + or settings_ph.get("category_tests") + or (settings_ph.get("structured_checks") or []) + ) + has_toggles = bool(re.search(r"checkbox|toggle|switch|aria-checked", + (settings_ph.get("banner_html") or ""), re.I)) + timed_out = bool(settings_ph.get("timeout")) + + failure_signals: list[str] = [] + if error: + failure_signals.append(f'Fehler: {str(error)[:120]}') + if timed_out: + failure_signals.append('Zeitueberschreitung beim Laden') + if len(settings_text) < 80 and not has_categories: + failure_signals.append( + f'Settings-Bereich nur {len(settings_text)} Zeichen, ' + 'keine Kategorien sichtbar' + ) + if not has_toggles and not has_categories: + failure_signals.append( + 'Keine Checkboxen / Toggles im Settings-Bereich' + ) + + if not failure_signals: + return None + + return { + "severity": "HIGH", + "code": "cmp_tool_unavailable", + "label": 'Cookie-Einstellungen ueber "Anpassen" formal vorhanden, ' + 'Tool laed aber nicht oder ist leer', + "detail": " | ".join(failure_signals), + "legal_basis": "Art. 7 (3) DSGVO + EDPB 03/2022 — die Moeglichkeit " + "zur granularen Auswahl muss tatsaechlich funktionieren.", + } + + +def _normalize_cookie_names(items) -> set[str]: + out: set[str] = set() + if not items: + return out + for it in items: + if isinstance(it, str): + name = it.strip() + elif isinstance(it, dict): + name = (it.get("name") or it.get("cookie") or it.get("id") or "").strip() + else: + continue + if name and len(name) <= 120: + out.add(name.lower()) + return out + + +def check_init_banner_vs_cookie_doc( + banner_result: dict, + cookie_doc_text: str | None, +) -> dict | None: + """P94 — Cookie-Liste im Init-Banner vs in der Cookie-Richtlinie.""" + if not cookie_doc_text or len(cookie_doc_text) < 500: + return None + + phases = _phases(banner_result) + banner_cookies = _normalize_cookie_names( + (phases.get("settings") or {}).get("cookies") or [] + ) | _normalize_cookie_names( + (phases.get("initial") or phases.get("before_accept") or {}).get("cookies") or [] + ) + + # Aus dem Cookie-Doc-Text: Cookie-Namen sind typischerweise + # camelCase oder _underscored, 4-40 Zeichen, ohne Leerzeichen. + candidates = set(re.findall( + r"\b([A-Za-z_][A-Za-z0-9_\-\.]{3,40})\b", cookie_doc_text + )) + # Filter: heuristisch wahrscheinliche Cookie-Namen + doc_cookies: set[str] = set() + for c in candidates: + cl = c.lower() + if any(p in cl for p in ( + "_ga", "_gid", "_gcl", "_fbp", "uc_", "ot_", + "cookieconsent", "sessionid", "csrf", "ajs_", "amp_", + "datadome", "incap_", "_pk_", "wp-", "yt-", + )): + doc_cookies.add(cl) + elif re.match(r"^[a-z][a-z0-9_]{3,30}$", cl) and ( + "cookie" in cl or "consent" in cl or "track" in cl or "session" in cl + ): + doc_cookies.add(cl) + + if len(doc_cookies) < 5 or not banner_cookies: + return None # Datenlage zu duenn fuer sinnvolle Aussage. + + only_in_doc = doc_cookies - banner_cookies + only_in_banner = banner_cookies - doc_cookies + + if len(only_in_doc) < 5 and len(only_in_banner) < 3: + return None # Tolerable Abweichung. + + severity = "MEDIUM" + # HIGH wenn beide Seiten massiv abweichen — dann fehlt klar + # die Cross-Reference. + if len(only_in_doc) >= 15 and len(only_in_banner) >= 5: + severity = "HIGH" + + return { + "severity": severity, + "code": "banner_cookie_doc_mismatch", + "label": ( + f"Cookie-Liste im Banner-Einstellungen ({len(banner_cookies)}) " + f"weicht von Cookie-Richtlinie ({len(doc_cookies)}) ab" + ), + "detail": ( + f"Nur im Cookie-Dokument: {len(only_in_doc)} Cookies (Beispiele: " + f"{', '.join(sorted(only_in_doc)[:5])}). " + f"Nur im Banner: {len(only_in_banner)} Cookies. " + "Empfehlung: eine der beiden Quellen als Single-Source-of-Truth " + "definieren und die andere automatisch generieren." + ), + "legal_basis": ( + "Art. 13(1)(c) DSGVO + Art. 12 DSGVO — Informationen ueber die " + "Verarbeitung muessen vollstaendig und konsistent sein." + ), + } + + +def run_all(banner_result: dict, cookie_doc_text: str | None = None) -> list[dict]: + findings: list[dict] = [] + try: + f1 = check_cmp_tool_availability(banner_result) + if f1: + findings.append(f1) + except Exception as e: + logger.warning("P92 cmp_tool_availability failed: %s", e) + try: + f2 = check_init_banner_vs_cookie_doc(banner_result, cookie_doc_text) + if f2: + findings.append(f2) + except Exception as e: + logger.warning("P94 init_vs_cookie_doc failed: %s", e) + return findings + + +def build_consistency_block_html(findings: list[dict]) -> str: + if not findings: + return "" + items: list[str] = [] + for f in findings: + sev = f.get("severity", "MEDIUM") + sev_color = "#dc2626" if sev == "HIGH" else "#d97706" + items.append( + f'