""" Banner ↔ Cookie-Richtlinie Cross-Check. Compares what the cookie banner actually does (Playwright scan) with what the cookie policy text promises. Finds mismatches: - Services used but not documented - Tracking before consent despite claiming consent-only - False "no tracking" claims contradicted by actual cookies """ import logging import re logger = logging.getLogger(__name__) def cross_check_banner_vs_cookie( banner_result: dict, cookie_text: str, ) -> list[dict]: """Cross-check: Does the banner behavior match the cookie policy text? Returns list of CheckItem-compatible dicts for mismatches. """ findings: list[dict] = [] cookie_lower = cookie_text.lower() phases = banner_result.get("phases", {}) before = phases.get("before_consent", {}) after_accept = phases.get("after_accept", {}) # 1. Services found by banner but not mentioned in cookie policy tracking_before = before.get("tracking_services", []) all_tracking = set(tracking_before) for s in after_accept.get("tracking_services", []): if isinstance(s, str): all_tracking.add(s) for service in all_tracking: service_lower = service.lower() mentioned = any(kw in cookie_lower for kw in [ service_lower, service_lower.replace(" ", ""), service_lower.split()[0] if " " in service_lower else service_lower, ]) if not mentioned: findings.append({ "id": f"cross-{service_lower.replace(' ', '_')[:20]}", "label": f"Dienst '{service}' fehlt in Cookie-Richtlinie", "passed": False, "severity": "HIGH", "level": 2, "parent": None, "skipped": False, "matched_text": "", "hint": ( f"Der Banner-Scan hat '{service}' auf der Website erkannt, " f"aber die Cookie-Richtlinie erwaehnt diesen Dienst nicht. " f"Gemaess Art. 13(1)(e) DSGVO muessen alle Empfaenger/" f"Dienste in der DSI benannt werden." ), "source": "cross_check", }) # 2. Tracking before consent but cookie policy says "nur mit Einwilligung" if tracking_before: consent_words = re.findall( r"(?:einwilligung|consent|zustimmung).*?" r"(?:erforderlich|notwendig|vorher|vorab)", cookie_lower, ) if consent_words: findings.append({ "id": "cross-tracking-before-consent", "label": "Tracking vor Consent trotz Einwilligungspflicht", "passed": False, "severity": "CRITICAL", "level": 2, "parent": None, "skipped": False, "matched_text": ", ".join(tracking_before[:3]), "hint": ( f"Die Cookie-Richtlinie spricht von Einwilligungspflicht, " f"aber der Banner-Scan hat {len(tracking_before)} Tracking-" f"Dienst(e) VOR Consent erkannt: {', '.join(tracking_before[:3])}. " f"Dies ist ein Verstoss gegen §25 Abs. 1 TDDDG." ), "source": "cross_check", }) # 3. Banner sets tracking cookies but policy claims "no tracking" accept_cookies = after_accept.get("cookies", []) tracking_prefixes = ("_ga", "_gid", "_fbp", "_fbc", "IDE", "_gcl", "fr", "_pin", "_tt_", "li_sugr", "_hj") tracking_cookies = [c for c in accept_cookies if any(c.startswith(p) for p in tracking_prefixes)] if tracking_cookies: no_tracking_claim = any(kw in cookie_lower for kw in [ "keine tracking", "kein tracking", "keine marketing", "keine werbe", "no tracking", ]) if no_tracking_claim: findings.append({ "id": "cross-false-no-tracking", "label": "Cookie-Richtlinie behauptet 'kein Tracking' — " "Banner widerspricht", "passed": False, "severity": "CRITICAL", "level": 2, "parent": None, "skipped": False, "matched_text": ", ".join(tracking_cookies[:5]), "hint": ( f"Die Cookie-Richtlinie behauptet kein Tracking, aber " f"nach Akzeptieren wurden Tracking-Cookies gesetzt: " f"{', '.join(tracking_cookies[:5])}. " f"Transparenzverstoss nach Art. 5(1)(a) DSGVO." ), "source": "cross_check", }) # 4. Banner detected but no cookie policy provided banner_detected = banner_result.get("banner_detected", False) if banner_detected and len(cookie_lower.strip()) < 100: findings.append({ "id": "cross-banner-no-policy", "label": "Cookie-Banner vorhanden aber keine Cookie-Richtlinie", "passed": False, "severity": "HIGH", "level": 2, "parent": None, "skipped": False, "matched_text": "", "hint": ( "Die Website zeigt ein Cookie-Banner, aber es wurde keine " "Cookie-Richtlinie eingereicht. Eine separate Cookie-Richtlinie " "oder ein Cookie-Abschnitt in der DSI ist nach §25 TDDDG " "erforderlich wenn nicht-essentielle Cookies verwendet werden." ), "source": "cross_check", }) logger.info("Cross-check: %d findings (%d services, %d tracking before)", len(findings), len(all_tracking), len(tracking_before)) return findings def cross_check_vendors_vs_dsi( vendors: list[dict], dsi_text: str, ) -> list[dict]: """Cross-check: Are all TCF vendors documented in the DSI? Checks per vendor: 1. Is the vendor mentioned by name? 2. Is third-country transfer documented (if non-EU)? 3. Is storage duration mentioned? Returns list of CheckItem-compatible dicts. """ findings: list[dict] = [] dsi_lower = dsi_text.lower() for v in vendors: name = v.get("name", "") name_lower = name.lower() if not name_lower: continue # Check if vendor is mentioned in DSI mentioned = any(kw in dsi_lower for kw in [ name_lower, name_lower.replace(" ", ""), name_lower.split()[0] if " " in name_lower else name_lower, ]) if not mentioned: findings.append({ "id": f"vendor-{v.get('vendor_id', name_lower[:20])}", "label": f"Verarbeiter '{name}' fehlt in DSI", "passed": False, "severity": "HIGH", "level": 2, "parent": None, "skipped": False, "matched_text": "", "hint": ( f"Der Cookie-Banner listet '{name}' als Verarbeiter " f"({v.get('zweck_kurz', 'unbekannt')}), aber die DSI " f"erwaehnt diesen Dienst nicht. Art. 13(1)(e) DSGVO " f"verlangt die Benennung aller Empfaenger." ), "source": "vendor_cross_check", }) # Check third-country transfer documentation if v.get("drittland") and mentioned: country = v.get("land", "Drittland") transfer_mentioned = any(kw in dsi_lower for kw in [ name_lower + ".*" + "usa", name_lower + ".*" + "drittland", "scc", "standardvertragsklausel", "data privacy framework", "angemessenheitsbeschluss", ]) if not transfer_mentioned: findings.append({ "id": f"vendor-transfer-{v.get('vendor_id', '')}", "label": f"Drittlandtransfer fuer '{name}' nicht dokumentiert", "passed": False, "severity": "MEDIUM", "level": 2, "parent": None, "skipped": False, "matched_text": "", "hint": ( f"'{name}' verarbeitet Daten in {country} (ausserhalb EWR). " f"Die DSI muss den Transfermechanismus benennen: " f"SCC (Art. 46(2)(c)) oder DPF (Angemessenheitsbeschluss)." ), "source": "vendor_cross_check", }) logger.info("Vendor cross-check: %d findings for %d vendors", len(findings), len(vendors)) return findings