diff --git a/consent-tester/services/banner_advanced_checks.py b/consent-tester/services/banner_advanced_checks.py new file mode 100644 index 0000000..d2856e8 --- /dev/null +++ b/consent-tester/services/banner_advanced_checks.py @@ -0,0 +1,396 @@ +""" +Banner advanced legal checks (12-20). + +12. Click count: reject vs. accept (CNIL enforcement) +13. Color contrast dark pattern (EDPB 3/2022) +14. Google Consent Mode default values +15. Consent before cookies (pre-banner tracking) +16. Registration-consent coupling (Art. 7(4) DSGVO) +17. Banner language vs. page language (all EU languages) +18. Consent cookie expiry > 13 months (CNIL) +19. Nudging/scrolling to find reject (EDPB 3/2022) +20. Emotional/manipulative language (EDPB 3/2022 Stirring) +""" + +import logging +import re + +from services.script_analyzer import Violation + +logger = logging.getLogger(__name__) + +# All EU/EEA official languages for Check 17 +EU_LANGUAGES = { + "bg": {"name": "Bulgarian", "cookie_words": ["бисквитки", "съгласие"]}, + "cs": {"name": "Czech", "cookie_words": ["cookies", "souhlas", "soubory cookie"]}, + "da": {"name": "Danish", "cookie_words": ["cookies", "samtykke", "cookiepolitik"]}, + "de": {"name": "German", "cookie_words": ["cookies", "einwilligung", "datenschutz", "zustimm"]}, + "el": {"name": "Greek", "cookie_words": ["cookies", "συναίνεση", "απορρήτου"]}, + "en": {"name": "English", "cookie_words": ["cookies", "consent", "privacy", "accept"]}, + "es": {"name": "Spanish", "cookie_words": ["cookies", "consentimiento", "privacidad", "aceptar"]}, + "et": {"name": "Estonian", "cookie_words": ["küpsised", "nõusolek", "privaatsus"]}, + "fi": {"name": "Finnish", "cookie_words": ["evästeet", "suostumus", "tietosuoja"]}, + "fr": {"name": "French", "cookie_words": ["cookies", "consentement", "confidentialité", "accepter"]}, + "ga": {"name": "Irish", "cookie_words": ["fianáin", "toiliú", "príobháideachais"]}, + "hr": {"name": "Croatian", "cookie_words": ["kolačići", "pristanak", "privatnost"]}, + "hu": {"name": "Hungarian", "cookie_words": ["sütik", "hozzájárulás", "adatvédel"]}, + "is": {"name": "Icelandic", "cookie_words": ["vafrakökur", "samþykki", "persónuvernd"]}, + "it": {"name": "Italian", "cookie_words": ["cookie", "consenso", "privacy", "accett"]}, + "lt": {"name": "Lithuanian", "cookie_words": ["slapukai", "sutikimas", "privatumas"]}, + "lv": {"name": "Latvian", "cookie_words": ["sīkdatnes", "piekrišana", "privātums"]}, + "mt": {"name": "Maltese", "cookie_words": ["cookies", "kunsens", "privatezza"]}, + "nl": {"name": "Dutch", "cookie_words": ["cookies", "toestemming", "privacy", "accepter"]}, + "no": {"name": "Norwegian", "cookie_words": ["informasjonskapsler", "samtykke", "personvern"]}, + "pl": {"name": "Polish", "cookie_words": ["ciasteczka", "zgoda", "prywatność", "akceptuj"]}, + "pt": {"name": "Portuguese", "cookie_words": ["cookies", "consentimento", "privacidade", "aceitar"]}, + "ro": {"name": "Romanian", "cookie_words": ["cookie-uri", "consimțământ", "confidențialitate"]}, + "sk": {"name": "Slovak", "cookie_words": ["cookies", "súhlas", "súkromie"]}, + "sl": {"name": "Slovenian", "cookie_words": ["piškotki", "soglasje", "zasebnost"]}, + "sv": {"name": "Swedish", "cookie_words": ["kakor", "samtycke", "integritet", "godkänn"]}, +} + + +def _detect_language(text: str) -> str | None: + """Detect language of text using cookie-specific keywords.""" + text_lower = text.lower() + scores: dict[str, int] = {} + for lang, info in EU_LANGUAGES.items(): + score = sum(1 for w in info["cookie_words"] if w in text_lower) + if score > 0: + scores[lang] = score + if not scores: + return None + return max(scores, key=scores.get) # type: ignore[arg-type] + + +def _detect_page_language(page_html: str, page_lang_attr: str) -> str | None: + """Detect page language from or meta tags.""" + if page_lang_attr: + return page_lang_attr[:2].lower() + # Fallback: check meta content-language + match = re.search(r'content=["\']([a-z]{2})', page_html[:2000].lower()) + return match.group(1) if match else None + + +async def run_advanced_checks(page, banner_text: str) -> list[Violation]: + """Run checks 12-20 on the banner.""" + violations: list[Violation] = [] + banner_lower = banner_text.lower() + + # ── Check 12: Click count reject vs. accept ──────────────── + try: + click_info = await page.evaluate(""" + () => { + const banner = document.querySelector( + '#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, ' + + '#usercentrics-root, .cky-consent-container, #cmpbox, ' + + '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]' + ); + if (!banner) return null; + const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')]; + const acceptKw = ['akzeptieren','accept','zustimmen','agree','einverstanden','alle akzeptieren','accept all']; + const rejectKw = ['ablehnen','reject','notwendige','decline','nur notwendige','reject all','alle ablehnen']; + const settingsKw = ['einstellungen','settings','anpassen','customize','details','mehr']; + let acceptClicks = 0, rejectClicks = 0; + for (const b of buttons) { + const t = (b.textContent || '').trim().toLowerCase(); + if (acceptKw.some(k => t.includes(k))) { acceptClicks = 1; } + if (rejectKw.some(k => t.includes(k))) { rejectClicks = 1; } + } + // If no direct reject but settings button exists → 2 clicks to reject + if (rejectClicks === 0) { + for (const b of buttons) { + const t = (b.textContent || '').trim().toLowerCase(); + if (settingsKw.some(k => t.includes(k))) { rejectClicks = 2; break; } + } + } + return { acceptClicks, rejectClicks }; + } + """) + if click_info and click_info["rejectClicks"] > click_info["acceptClicks"]: + violations.append(Violation( + service="Cookie-Banner", + severity="HIGH", + text=f"Ablehnung erfordert {click_info['rejectClicks']} Klick(s), " + f"Zustimmung nur {click_info['acceptClicks']}. " + f"Beides muss mit gleicher Anzahl Klicks erreichbar sein. " + f"Die CNIL hat hierfuer bereits Bussgelder verhaengt (Google: 150 Mio. EUR).", + legal_ref="§25 TDDDG, CNIL Deliberation SAN-2021-024, EDPB Guidelines 05/2020", + )) + except Exception: + pass + + # ── Check 13: Color contrast dark pattern ────────────────── + try: + color_info = await page.evaluate(""" + () => { + const banner = document.querySelector( + '#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, ' + + '#usercentrics-root, .cky-consent-container, #cmpbox, ' + + '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]' + ); + if (!banner) return null; + const bannerBg = window.getComputedStyle(banner).backgroundColor; + const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')]; + const acceptKw = ['akzeptieren','accept','zustimmen','agree','einverstanden']; + const rejectKw = ['ablehnen','reject','notwendige','decline','nein']; + let acceptColor = null, rejectColor = null; + for (const b of buttons) { + const t = (b.textContent || '').trim().toLowerCase(); + const style = window.getComputedStyle(b); + if (acceptKw.some(k => t.includes(k))) { + acceptColor = { bg: style.backgroundColor, color: style.color, border: style.borderColor }; + } + if (rejectKw.some(k => t.includes(k))) { + rejectColor = { bg: style.backgroundColor, color: style.color, border: style.borderColor }; + } + } + return { bannerBg, acceptColor, rejectColor }; + } + """) + if color_info and color_info.get("acceptColor") and color_info.get("rejectColor"): + accept_bg = color_info["acceptColor"]["bg"] + reject_bg = color_info["rejectColor"]["bg"] + banner_bg = color_info["bannerBg"] + # If reject button bg matches banner bg (invisible) + if reject_bg == banner_bg or reject_bg in ("transparent", "rgba(0, 0, 0, 0)"): + violations.append(Violation( + service="Cookie-Banner", + severity="MEDIUM", + text="Dark Pattern: 'Ablehnen'-Button hat gleiche Hintergrundfarbe wie der Banner " + "oder ist transparent — optisch kaum sichtbar. Beide Optionen muessen " + "visuell gleichwertig dargestellt werden.", + legal_ref="EDPB Guidelines 3/2022 (Deceptive Design Patterns), §25 TDDDG", + )) + except Exception: + pass + + # ── Check 14: Google Consent Mode defaults ───────────────── + try: + gcm_info = await page.evaluate(""" + () => { + const scripts = document.querySelectorAll('script'); + let foundDefault = false; + let grantedBeforeConsent = false; + for (const s of scripts) { + const text = s.textContent || ''; + if (text.includes('consent') && text.includes('default')) { + foundDefault = true; + // Check for analytics_storage or ad_storage granted as default + const grantedMatch = text.match(/analytics_storage['"\\s:]*['"]granted/); + const adGranted = text.match(/ad_storage['"\\s:]*['"]granted/); + if (grantedMatch || adGranted) { + grantedBeforeConsent = true; + } + } + } + return { foundDefault, grantedBeforeConsent }; + } + """) + if gcm_info and gcm_info.get("grantedBeforeConsent"): + violations.append(Violation( + service="Google Consent Mode", + severity="CRITICAL", + text="Google Consent Mode: analytics_storage oder ad_storage ist als " + "Default auf 'granted' gesetzt BEVOR der Nutzer zugestimmt hat. " + "Default muss 'denied' sein bis explizite Einwilligung vorliegt.", + legal_ref="§25 TDDDG, Art. 5(3) ePrivacy-RL, Google Consent Mode v2 Docs", + )) + except Exception: + pass + + # ── Check 15: Cookies set before consent interaction ─────── + try: + pre_consent_tracking = await page.evaluate(""" + () => { + const cookies = document.cookie.split(';').map(c => c.trim().split('=')[0]); + const trackingPatterns = ['_ga', '_gid', '_fbp', '_fbc', 'IDE', '_gcl', 'fr', '_pin', + '_tt_', 'li_sugr', '_hj', 'mp_', 'ajs_', '_clck', '_clsk']; + return cookies.filter(name => + trackingPatterns.some(p => name.startsWith(p)) + ); + } + """) + if pre_consent_tracking and len(pre_consent_tracking) > 0: + violations.append(Violation( + service="Cookie-Banner", + severity="CRITICAL", + text=f"Tracking-Cookies vor Consent gesetzt: {', '.join(pre_consent_tracking[:5])}. " + f"Nicht-essentielle Cookies duerfen erst NACH expliziter Einwilligung gesetzt werden.", + legal_ref="§25 Abs. 1 TDDDG, Art. 5(3) ePrivacy-RL, EuGH C-673/17 Planet49", + )) + except Exception: + pass + + # ── Check 16: Registration/Login consent coupling ────────── + try: + coupling_info = await page.evaluate(""" + () => { + const forms = document.querySelectorAll('form'); + const results = []; + for (const form of forms) { + const buttons = form.querySelectorAll('button[type="submit"], input[type="submit"]'); + for (const btn of buttons) { + const text = (btn.textContent || btn.value || '').toLowerCase(); + const isLoginRegister = /anmelden|registrieren|login|sign.?up|register|einloggen/.test(text); + if (!isLoginRegister) continue; + // Check surrounding text for consent coupling + const formText = form.textContent.toLowerCase(); + const hasCoupling = /klicken.*stimmen.*zu|clicking.*agree|accept.*terms|akzeptieren.*bedingungen/.test(formText); + const hasCheckbox = form.querySelectorAll('input[type="checkbox"]').length; + if (hasCoupling && hasCheckbox === 0) { + results.push({ button: text.trim().substring(0, 30), hasCoupling: true }); + } + } + } + return results; + } + """) + if coupling_info and len(coupling_info) > 0: + violations.append(Violation( + service="Registration/Login", + severity="HIGH", + text=f"Koppelungsverbot: '{coupling_info[0]['button']}'-Button erteilt gleichzeitig " + f"Datenschutz-Einwilligung ohne separate Checkbox. Einwilligung darf nicht " + f"an Registrierung/Login gekoppelt werden.", + legal_ref="Art. 7(4) DSGVO (Koppelungsverbot), ErwGr. 43, EuGH Planet49", + )) + except Exception: + pass + + # ── Check 17: Banner language vs. page language ──────────── + try: + page_lang = await page.evaluate("() => document.documentElement.lang || ''") + page_lang_code = (page_lang or "")[:2].lower() + banner_lang = _detect_language(banner_text) + + if page_lang_code and banner_lang and page_lang_code != banner_lang: + page_lang_name = EU_LANGUAGES.get(page_lang_code, {}).get("name", page_lang_code) + banner_lang_name = EU_LANGUAGES.get(banner_lang, {}).get("name", banner_lang) + violations.append(Violation( + service="Cookie-Banner", + severity="MEDIUM", + text=f"Banner-Sprache ({banner_lang_name}) stimmt nicht mit Seitensprache " + f"({page_lang_name}) ueberein. Der Cookie-Banner muss in der Sprache " + f"der Website verfasst sein, damit Nutzer eine informierte Entscheidung " + f"treffen koennen.", + legal_ref="Art. 12(1) DSGVO (klare und einfache Sprache), ErwGr. 39", + )) + except Exception: + pass + + # ── Check 18: Consent cookie expiry > 13 months ──────────── + try: + consent_cookies = await page.evaluate(""" + () => { + const consentNames = ['CookieConsent', 'cookieconsent_status', 'cc_cookie', + 'eupubconsent', 'eupubconsent-v2', 'OptanonConsent', 'OptanonAlertBoxClosed', + 'didomi_token', 'uc_settings', 'cky-consent', 'bp_consent', + 'cmplz_consent_status', 'borlabs-cookie', 'cookie_notice_accepted']; + const all = document.cookie.split(';').map(c => c.trim().split('=')[0]); + return all.filter(name => consentNames.some(cn => + name.toLowerCase().includes(cn.toLowerCase()) + )); + } + """) + if consent_cookies: + # Check expiry via cookie details (Playwright gives us this) + cookies = await page.context.cookies() + for cookie in cookies: + name = cookie.get("name", "") + if not any(cn.lower() in name.lower() for cn in [ + "consent", "cookie", "optanon", "didomi", "uc_settings", + "cky-consent", "borlabs", "cmplz", + ]): + continue + expires = cookie.get("expires", 0) + if expires > 0: + import time + days_until_expiry = (expires - time.time()) / 86400 + if days_until_expiry > 395: # 13 months ≈ 395 days + violations.append(Violation( + service="Cookie-Banner", + severity="MEDIUM", + text=f"Consent-Cookie '{name}' laeuft erst in {int(days_until_expiry)} Tagen ab. " + f"Die CNIL empfiehlt maximal 13 Monate (395 Tage). " + f"Danach muss erneut um Einwilligung gebeten werden.", + legal_ref="CNIL Leitlinien (max. 13 Monate), EDPB Guidelines 05/2020", + )) + break + except Exception: + pass + + # ── Check 19: Nudging — reject only visible after scrolling ─ + try: + nudge_info = await page.evaluate(""" + () => { + const banner = document.querySelector( + '#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, ' + + '#usercentrics-root, .cky-consent-container, #cmpbox, ' + + '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]' + ); + if (!banner) return null; + const bannerRect = banner.getBoundingClientRect(); + const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')]; + const rejectKw = ['ablehnen','reject','notwendige','decline','nein','alle ablehnen']; + for (const b of buttons) { + const t = (b.textContent || '').trim().toLowerCase(); + if (rejectKw.some(k => t.includes(k))) { + const btnRect = b.getBoundingClientRect(); + // Button is below visible banner area + const isBelow = btnRect.top > bannerRect.bottom; + const isHidden = btnRect.height === 0 || btnRect.width === 0; + return { found: true, isBelow, isHidden, btnTop: btnRect.top, bannerBottom: bannerRect.bottom }; + } + } + return { found: false }; + } + """) + if nudge_info and nudge_info.get("found"): + if nudge_info.get("isBelow") or nudge_info.get("isHidden"): + violations.append(Violation( + service="Cookie-Banner", + severity="HIGH", + text="Nudging: Der 'Ablehnen'-Button ist nicht im sichtbaren Bereich des Banners " + "— Nutzer muessen scrollen um ihn zu finden. Der Ablehn-Button muss ohne " + "Scrollen sichtbar sein.", + legal_ref="EDPB Guidelines 3/2022 (Deceptive Design: Hindering), §25 TDDDG", + )) + except Exception: + pass + + # ── Check 20: Emotional/manipulative language (Stirring) ─── + stirring_patterns = [ + # German + ("erleben sie das volle potenzial", "Stirring: 'Erleben Sie das volle Potenzial'"), + ("bestmoegliches erlebnis", "Stirring: 'bestmoegliches Erlebnis'"), + ("optimale nutzung", "Stirring: 'optimale Nutzung'"), + ("eingeschraenkte funktionen", "Stirring: 'eingeschraenkte Funktionen' bei Ablehnung"), + ("eingeschraenkt weiter", "Stirring: 'eingeschraenkt weiter'"), + ("ohne cookies eingeschraenkt", "Stirring: 'ohne Cookies eingeschraenkt'"), + ("volle funktionalitaet", "Stirring: 'volle Funktionalitaet'"), + ("nur mit cookies moeglich", "Stirring: 'nur mit Cookies moeglich'"), + # English + ("best possible experience", "Stirring: 'best possible experience'"), + ("full experience", "Stirring: 'full experience'"), + ("limited functionality", "Stirring: 'limited functionality' if rejected"), + ("enhanced experience", "Stirring: 'enhanced experience'"), + ("may not work properly", "Stirring: 'may not work properly'"), + ("some features may not", "Stirring: 'some features may not'"), + # French + ("meilleure experience", "Stirring: 'meilleure experience'"), + ("experience optimale", "Stirring: 'experience optimale'"), + ] + for pattern, label in stirring_patterns: + if pattern in banner_lower: + violations.append(Violation( + service="Cookie-Banner", + severity="LOW", + text=f"Emotionale Sprache im Banner: {label}. " + f"Solche Formulierungen koennen als 'Stirring' (emotionale Manipulation) " + f"gewertet werden und die Freiwilligkeit der Einwilligung beeintraechtigen.", + legal_ref="EDPB Guidelines 3/2022 (Deceptive Design: Stirring), Art. 7(4) DSGVO", + )) + break # One finding is enough + + return violations diff --git a/consent-tester/services/banner_text_checker.py b/consent-tester/services/banner_text_checker.py index d14b5c7..77d5510 100644 --- a/consent-tester/services/banner_text_checker.py +++ b/consent-tester/services/banner_text_checker.py @@ -18,6 +18,7 @@ Banner text legal checks — extracted from consent_scanner.py. import logging from services.script_analyzer import Violation +from services.banner_advanced_checks import run_advanced_checks logger = logging.getLogger(__name__) @@ -393,6 +394,13 @@ async def check_banner_text(page) -> dict: except Exception: pass + # ── Checks 12-20: Advanced checks ────────────────────── + try: + advanced = await run_advanced_checks(page, banner_text) + violations.extend(advanced) + except Exception as e: + logger.warning("Advanced banner checks failed: %s", e) + except Exception as e: logger.warning("Banner text check failed: %s", e)