""" Banner text legal checks — extracted from consent_scanner.py. 11 checks for cookie banner legal compliance: 1. Impressum link accessible (§5 TMG) 2. DSE link in banner (Art. 13 DSGVO) 3. Wrong DSE consent wording (Art. 13 DSGVO) 4. Reject button visible (§25 TDDDG) 5. Pre-ticked checkboxes (Planet49) 6. Dark pattern button size (EDPB 05/2020) 7. Cookie wall (Phase B check) 8. Re-access to settings (Art. 7(3) DSGVO) 9. Third-party DSE link (Art. 13 DSGVO) 10. Dark-pattern language (EDPB 05/2020) 11. Non-modal dismiss = consent (Planet49) """ import logging from services.script_analyzer import Violation from services.banner_advanced_checks import run_advanced_checks from services.banner_dom_walkers import ( SHADOW_BANNER_WALKER_JS, FOOTER_LABELS_WALKER_JS, ) logger = logging.getLogger(__name__) async def check_banner_text(page) -> dict: """Check cookie banner text for legal issues. 1. Impressum link must be accessible even with banner overlay (§5 TMG) 2. DSE link must be accessible from banner 3. "Zustimmung zur Datenschutzerklärung" is WRONG — DSE is an information obligation (Art. 13 DSGVO), not something users "agree" to """ violations = [] has_impressum = False has_dse = False try: # Get banner text and links banner_text = "" banner_links = [] # Try common banner container selectors for selector in [ "#CybotCookiebotDialog", "#onetrust-banner-sdk", "#didomi-host", "#usercentrics-root", ".cky-consent-container", "#cmpbox", '[class*="cookie-banner"]', '[class*="consent-banner"]', '[class*="cookie-notice"]', '[role="dialog"]', ]: try: el = page.locator(selector).first if await el.count() > 0: banner_text = (await el.text_content() or "").strip() # Get links inside banner links = await el.locator("a[href]").all() for link in links: href = await link.get_attribute("href") or "" text = (await link.text_content() or "").strip() banner_links.append({"href": href.lower(), "text": text.lower()}) if banner_text: break except Exception: continue # P28a + P63: Shadow-DOM Web Component CMPs (Mercedes cmm-cookie-banner, # BMW cookie-consent-banner). Walker pierces shadow tree + extracts # label-based legal links (wb7-link/button/role=link). See # banner_dom_walkers.SHADOW_BANNER_WALKER_JS. if not banner_text or not banner_links: try: shadow_data = await page.evaluate(SHADOW_BANNER_WALKER_JS) if shadow_data and isinstance(shadow_data, dict): if shadow_data.get("text"): banner_text = (banner_text + " " + shadow_data["text"]).strip() if shadow_data.get("links"): banner_links.extend(shadow_data["links"]) except Exception: pass if not banner_text: return {"violations": violations, "has_impressum": False, "has_dse": False} banner_lower = banner_text.lower() # Check 1: Impressum link in or accessible through banner has_impressum = any( "impressum" in l["href"] or "impressum" in l["text"] or "imprint" in l["href"] or "legal notice" in l["text"] for l in banner_links ) # Also check if impressum is visible behind/around banner if not has_impressum: try: imp_visible = await page.locator('a[href*="impressum"], a[href*="imprint"]').first if await imp_visible.count() > 0 and await imp_visible.is_visible(): has_impressum = True except Exception: pass if not has_impressum: violations.append(Violation( service="Cookie-Banner", severity="HIGH", text="Impressum nicht aus dem Cookie-Banner erreichbar. " "Bei ueberlagerndem Banner muss ein Impressum-Link im Banner vorhanden sein (§5 TMG).", legal_ref="§5 TMG, LG Rostock Az. 3 O 22/19", )) # Check 2: DSE link in banner has_dse = any( "datenschutz" in l["href"] or "datenschutz" in l["text"] or "privacy" in l["href"] or "privacy" in l["text"] or "dsgvo" in l["href"] for l in banner_links ) if not has_dse: violations.append(Violation( service="Cookie-Banner", severity="MEDIUM", text="Kein Link zur Datenschutzerklaerung im Cookie-Banner. " "Nutzer sollten vor der Einwilligung die DSE einsehen koennen.", legal_ref="Art. 13 DSGVO, ErwGr. 42 DSGVO (informierte Einwilligung)", )) # Check 3: Wrong wording — "Zustimmung zur Datenschutzerklärung" wrong_dse_consent_patterns = [ "stimme der datenschutz", "stimme den datenschutz", "akzeptiere die datenschutz", "akzeptiere die privacy", "agree to the privacy policy", "accept the privacy", "datenschutzerklaerung zustimmen", "datenschutzrichtlinie akzeptieren", "datenschutzrichtlinie zustimmen", "i agree to the privacy", "i accept the privacy", ] for pattern in wrong_dse_consent_patterns: if pattern in banner_lower: violations.append(Violation( service="Cookie-Banner", severity="HIGH", text=f"Falsche Formulierung im Banner: 'Zustimmung zur Datenschutzerklaerung'. " f"Die DSE ist eine Informationspflicht (Art. 13 DSGVO) — man kann sie " f"nur zur Kenntnis nehmen, nicht 'zustimmen'. " f"Korrekt: 'Ich habe die Datenschutzinformationen zur Kenntnis genommen'.", legal_ref="Art. 13 DSGVO, ErwGr. 42 (informierte Einwilligung ≠ Zustimmung zur DSE)", )) break # P28b Check 4: Reject mechanism present + explicit-labeled? # HIGH = no reject mechanism at all # MEDIUM = reject available but not labeled "Ablehnen"/"Reject" # (e.g. only "Nur technisch Notwendige" — semantically # a reject but EDPB 5/2020 + DSK-OH 2024 prefer explicit # labeling so users recognize it as the reject option) # P93: EDPB 5/2020 schreibt kein bestimmtes Wort vor — Reject- # Mechanismus muss gleichwertig zur Annahme sein. BMW nutzt # "Cookies verbieten", andere Sites "Tracking ablehnen" o.ae. — # alle rechtlich gleichwertig. explicit_reject_texts = [ "ablehnen", "reject", "alle ablehnen", "decline", "alles ablehnen", "cookies verbieten", "cookies blockieren", "tracking ablehnen", "tracking verbieten", "zurueckweisen", "block all", "deny all", "alle verweigern", "verweigern", ] implicit_reject_texts = ["nur notwendige", "nur technisch", "nur essenzielle", "nur essentielle", "notwendige akzeptieren", "essential only", "only necessary", "nur erforderliche"] has_explicit_reject = any(t in banner_lower for t in explicit_reject_texts) has_implicit_reject = any(t in banner_lower for t in implicit_reject_texts) if not has_explicit_reject and not has_implicit_reject: violations.append(Violation( service="Cookie-Banner", severity="HIGH", text="Kein 'Ablehnen'-Mechanismus im Banner erkannt. " "Die Ablehnung muss ebenso einfach sein wie die Zustimmung.", legal_ref="§25 Abs. 1 TDDDG, EDPB Guidelines 05/2020 (Consent)", )) elif not has_explicit_reject and has_implicit_reject: violations.append(Violation( service="Cookie-Banner", severity="MEDIUM", text="Reject-Moeglichkeit vorhanden ('Nur technisch Notwendige' o.ae.), " "aber nicht als 'Ablehnen' beschriftet. Nutzer erkennen 'Ablehnen' " "schneller als sprachlich umschriebene Varianten. " "Empfehlung: zusaetzlich 'Ablehnen' als Button-Label.", legal_ref="EDPB 5/2020 (Consent) + DSK-OH 2024 (Telemedien)", )) # P100: Granular-Wahl-Pruefung — "Anpassen"/"Einstellungen"-Button # im Initial-Banner. Wenn er FEHLT (VW-Pattern), ist die granulare # Cookie-Wahl erst nach Akzeptanz/Ablehnung moeglich — faktische # Manipulation Richtung "Alle akzeptieren". EDPB 5/2020 §82. granular_button_texts = [ "anpassen", "einstellungen", "cookie-einstellungen", "cookies verwalten", "manage cookies", "customize", "weitere optionen", "more options", "settings", "individuell", "detaillierte einstellungen", "praeferenzen", "preferences", ] has_granular_button = any(t in banner_lower for t in granular_button_texts) if not has_granular_button: violations.append(Violation( service="Cookie-Banner", severity="HIGH", text="Granulare Cookie-Auswahl im Initial-Banner nicht " "moeglich (kein 'Anpassen'/'Einstellungen'-Button). " "Nutzer koennen nur 'Alle akzeptieren' oder 'Nur " "technisch notwendige' waehlen — Detailwahl pro " "Kategorie erst nach Akzeptanz/Ablehnung. Das ist " "faktische Manipulation Richtung Pauschal-Akzeptanz.", legal_ref="EDPB Guidelines 5/2020 §82 (granular consent), " "§25 Abs. 1 TDDDG, Art. 4(11) DSGVO (informierte " "Einwilligung)", )) # Check 5: Pre-ticked checkboxes (EuGH Planet49) try: pre_checked = await page.evaluate(""" () => { const banner = document.querySelector( '#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, ' + '#usercentrics-root, .cky-consent-container, #cmpbox, ' + '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]' ); if (!banner) return []; const checked = banner.querySelectorAll( 'input[type="checkbox"]:checked:not([disabled])' ); return [...checked] .filter(cb => { const label = cb.closest('label')?.textContent || cb.getAttribute('aria-label') || ''; const isNecessary = /notwendig|necessary|essential|erforderlich/i.test(label); return !isNecessary; }) .map(cb => cb.closest('label')?.textContent?.trim() || cb.id || 'unknown'); } """) if pre_checked: violations.append(Violation( service="Cookie-Banner", severity="HIGH", text=f"Vorausgewaehlte Checkboxen im Banner: {', '.join(pre_checked[:3])}. " f"Einwilligung muss durch aktive Handlung erfolgen — vorausgefuellte " f"Checkboxen sind ungueltig.", legal_ref="Art. 4(11) DSGVO, EuGH C-673/17 (Planet49)", )) except Exception: pass # Check 6: Dark Pattern — button size/prominence comparison try: button_info = await page.evaluate(""" () => { const banner = document.querySelector( '#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, ' + '#usercentrics-root, .cky-consent-container, #cmpbox, ' + '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]' ); if (!banner) return null; const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')]; return buttons.slice(0, 6).map(b => { const style = window.getComputedStyle(b); const rect = b.getBoundingClientRect(); return { text: b.textContent?.trim()?.substring(0, 40) || '', width: rect.width, height: rect.height, area: rect.width * rect.height, bgColor: style.backgroundColor, fontSize: parseFloat(style.fontSize), visible: rect.width > 0 && rect.height > 0, }; }); } """) if button_info and len(button_info) >= 2: accept_btn = None reject_btn = None accept_kw = ["akzeptieren", "accept", "zustimmen", "agree", "einverstanden", "ok"] reject_kw = ["ablehnen", "reject", "notwendige", "decline", "nein", "technisch", "essenzielle", "essential", "erforderliche"] for btn in button_info: text_lower = btn["text"].lower() if any(k in text_lower for k in accept_kw): accept_btn = btn elif any(k in text_lower for k in reject_kw): reject_btn = btn if accept_btn and reject_btn: area_ratio = accept_btn["area"] / max(reject_btn["area"], 1) if area_ratio > 2.5: violations.append(Violation( service="Cookie-Banner", severity="MEDIUM", text=f"Dark Pattern: 'Akzeptieren'-Button ist {area_ratio:.1f}x groesser als " f"'Ablehnen'-Button. Beide Optionen muessen gleichwertig dargestellt werden.", legal_ref="EDPB Guidelines 05/2020, §25 TDDDG, DSK Orientierungshilfe Telemedien", )) size_ratio = accept_btn["fontSize"] / max(reject_btn["fontSize"], 1) if size_ratio > 1.5: violations.append(Violation( service="Cookie-Banner", severity="MEDIUM", text=f"Dark Pattern: Schriftgroesse 'Akzeptieren' ({accept_btn['fontSize']:.0f}px) " f"vs. 'Ablehnen' ({reject_btn['fontSize']:.0f}px). " f"Unterschiedliche Schriftgroessen sind ein Dark Pattern.", legal_ref="EDPB Guidelines 05/2020 (gleichwertige Darstellung)", )) except Exception: pass # Check 7: Cookie Wall — does rejecting block the site? # (This is checked in Phase B — if after reject the page is not navigable) # P29 Check 8: Re-access to cookie settings (Art. 7(3) DSGVO). # Three quality tiers: # OK = persistent floating cookie icon OR explicit-labeled # footer link ("Cookie-Einstellungen", "Cookie-Richtlinie", # "Cookies verwalten", etc.) # MEDIUM = re-access only via ambiguous label (e.g. "Einstellungen" # alone — could mean theme/language) OR only via # cookies.html doc link (not a settings dialog) # HIGH = no re-access mechanism found at all try: has_floating_icon = False floating_selectors = [ ".cky-btn-revisit", "#ot-sdk-btn", "#ot-sdk-btn-floating", "[class*='ot-floating']", "[class*='cookie-floating']", "[id*='cookiebot-renew']", "[class*='cmp-floating']", "[id*='cmplz-cookiebanner-status']", ".uc-cookie-settings-trigger", "[class*='consent-floating']", "[data-testid*='cookie-revisit']", ] for sel in floating_selectors: try: if await page.locator(sel).count() > 0: has_floating_icon = True break except Exception: continue # Footer label inspection — distinguish explicit vs ambiguous # P64: OEM design-systems (Mercedes wb7-footer, BMW b-footer) don't # use