diff --git a/consent-tester/main.py b/consent-tester/main.py index 5a7c2ad..5e2593f 100644 --- a/consent-tester/main.py +++ b/consent-tester/main.py @@ -42,6 +42,7 @@ class ScanResponse(BaseModel): summary: dict scanned_at: str category_tests: list = [] + banner_checks: dict = {} @app.get("/health") @@ -81,11 +82,17 @@ async def scan_consent(req: ScanRequest): }, summary={ "critical": sum(1 for v in result.reject_violations if v.severity == "CRITICAL"), - "high": len(result.before_violations), + "high": len(result.before_violations) + sum(1 for v in result.banner_text_violations if v.severity == "HIGH"), "undocumented": len(result.accept_undocumented), - "total_violations": len(result.before_violations) + len(result.reject_violations), + "total_violations": len(result.before_violations) + len(result.reject_violations) + len(result.banner_text_violations), "category_violations": sum(len(ct.violations) for ct in result.category_tests), "categories_tested": len(result.category_tests), + "banner_text_issues": len(result.banner_text_violations), + }, + banner_checks={ + "has_impressum_link": result.banner_has_impressum_link, + "has_dse_link": result.banner_has_dse_link, + "violations": [v.__dict__ for v in result.banner_text_violations], }, scanned_at=datetime.now(timezone.utc).isoformat(), category_tests=[{ diff --git a/consent-tester/services/consent_scanner.py b/consent-tester/services/consent_scanner.py index a11cd90..538a0bf 100644 --- a/consent-tester/services/consent_scanner.py +++ b/consent-tester/services/consent_scanner.py @@ -46,6 +46,10 @@ class ConsentTestResult: accept_undocumented: list[str] = field(default_factory=list) # Phase D-F: Per-category tests category_tests: list = field(default_factory=list) # list[CategoryTestResult] + # Banner text checks + banner_text_violations: list[Violation] = field(default_factory=list) + banner_has_impressum_link: bool = False + banner_has_dse_link: bool = False async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult: @@ -80,6 +84,13 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult: result.banner_detected = banner.detected result.banner_provider = banner.provider + # Check banner text for legal issues + if banner.detected: + banner_violations = await _check_banner_text(page_a) + result.banner_text_violations = banner_violations["violations"] + result.banner_has_impressum_link = banner_violations["has_impressum"] + result.banner_has_dse_link = banner_violations["has_dse"] + await ctx_a.close() if not banner.detected: @@ -198,3 +209,274 @@ def _get_page_scripts(collected: list[str]) -> list[str]: def _get_cookie_names(cookies: list[dict]) -> list[str]: """Extract cookie names from Playwright cookie list.""" return sorted(set(c.get("name", "") for c in cookies if c.get("name"))) + + +async def _check_banner_text(page) -> dict: + """Check cookie banner text for legal issues. + + 1. Impressum link must be accessible even with banner overlay (§5 TMG) + 2. DSE link must be accessible from banner + 3. "Zustimmung zur Datenschutzerklärung" is WRONG — DSE is an information + obligation (Art. 13 DSGVO), not something users "agree" to + """ + violations = [] + has_impressum = False + has_dse = False + + try: + # Get banner text and links + banner_text = "" + banner_links = [] + + # Try common banner container selectors + for selector in [ + "#CybotCookiebotDialog", "#onetrust-banner-sdk", "#didomi-host", + "#usercentrics-root", ".cky-consent-container", "#cmpbox", + '[class*="cookie-banner"]', '[class*="consent-banner"]', + '[class*="cookie-notice"]', '[role="dialog"]', + ]: + try: + el = page.locator(selector).first + if await el.count() > 0: + banner_text = (await el.text_content() or "").strip() + # Get links inside banner + links = await el.locator("a[href]").all() + for link in links: + href = await link.get_attribute("href") or "" + text = (await link.text_content() or "").strip() + banner_links.append({"href": href.lower(), "text": text.lower()}) + if banner_text: + break + except Exception: + continue + + if not banner_text: + return {"violations": violations, "has_impressum": False, "has_dse": False} + + banner_lower = banner_text.lower() + + # Check 1: Impressum link in or accessible through banner + has_impressum = any( + "impressum" in l["href"] or "impressum" in l["text"] or + "imprint" in l["href"] or "legal notice" in l["text"] + for l in banner_links + ) + # Also check if impressum is visible behind/around banner + if not has_impressum: + try: + imp_visible = await page.locator('a[href*="impressum"], a[href*="imprint"]').first + if await imp_visible.count() > 0 and await imp_visible.is_visible(): + has_impressum = True + except Exception: + pass + + if not has_impressum: + violations.append(Violation( + service="Cookie-Banner", + severity="HIGH", + text="Impressum nicht aus dem Cookie-Banner erreichbar. " + "Bei ueberlagerndem Banner muss ein Impressum-Link im Banner vorhanden sein (§5 TMG).", + legal_ref="§5 TMG, LG Rostock Az. 3 O 22/19", + )) + + # Check 2: DSE link in banner + has_dse = any( + "datenschutz" in l["href"] or "datenschutz" in l["text"] or + "privacy" in l["href"] or "privacy" in l["text"] or + "dsgvo" in l["href"] + for l in banner_links + ) + if not has_dse: + violations.append(Violation( + service="Cookie-Banner", + severity="MEDIUM", + text="Kein Link zur Datenschutzerklaerung im Cookie-Banner. " + "Nutzer sollten vor der Einwilligung die DSE einsehen koennen.", + legal_ref="Art. 13 DSGVO, ErwGr. 42 DSGVO (informierte Einwilligung)", + )) + + # Check 3: Wrong wording — "Zustimmung zur Datenschutzerklärung" + wrong_dse_consent_patterns = [ + "stimme der datenschutz", + "stimme den datenschutz", + "akzeptiere die datenschutz", + "akzeptiere die privacy", + "agree to the privacy policy", + "accept the privacy", + "datenschutzerklaerung zustimmen", + "datenschutzrichtlinie akzeptieren", + "datenschutzrichtlinie zustimmen", + "i agree to the privacy", + "i accept the privacy", + ] + for pattern in wrong_dse_consent_patterns: + if pattern in banner_lower: + violations.append(Violation( + service="Cookie-Banner", + severity="HIGH", + text=f"Falsche Formulierung im Banner: 'Zustimmung zur Datenschutzerklaerung'. " + f"Die DSE ist eine Informationspflicht (Art. 13 DSGVO) — man kann sie " + f"nur zur Kenntnis nehmen, nicht 'zustimmen'. " + f"Korrekt: 'Ich habe die Datenschutzinformationen zur Kenntnis genommen'.", + legal_ref="Art. 13 DSGVO, ErwGr. 42 (informierte Einwilligung ≠ Zustimmung zur DSE)", + )) + break + + # Check 4: Reject button visible (no hidden reject) + reject_texts = ["ablehnen", "reject", "nur notwendige", "alle ablehnen", "decline"] + has_visible_reject = any(t in banner_lower for t in reject_texts) + if not has_visible_reject: + violations.append(Violation( + service="Cookie-Banner", + severity="HIGH", + text="Kein sichtbarer 'Ablehnen'-Button im Banner erkannt. " + "Die Ablehnung muss ebenso einfach sein wie die Zustimmung.", + legal_ref="§25 Abs. 1 TDDDG, EDPB Guidelines 05/2020 (Consent)", + )) + + # Check 5: Pre-ticked checkboxes (EuGH Planet49) + try: + pre_checked = await page.evaluate(""" + () => { + const banner = document.querySelector( + '#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, ' + + '#usercentrics-root, .cky-consent-container, #cmpbox, ' + + '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]' + ); + if (!banner) return []; + const checked = banner.querySelectorAll( + 'input[type="checkbox"]:checked:not([disabled])' + ); + return [...checked] + .filter(cb => { + const label = cb.closest('label')?.textContent || cb.getAttribute('aria-label') || ''; + const isNecessary = /notwendig|necessary|essential|erforderlich/i.test(label); + return !isNecessary; + }) + .map(cb => cb.closest('label')?.textContent?.trim() || cb.id || 'unknown'); + } + """) + if pre_checked: + violations.append(Violation( + service="Cookie-Banner", + severity="HIGH", + text=f"Vorausgewaehlte Checkboxen im Banner: {', '.join(pre_checked[:3])}. " + f"Einwilligung muss durch aktive Handlung erfolgen — vorausgefuellte " + f"Checkboxen sind ungueltig.", + legal_ref="Art. 4(11) DSGVO, EuGH C-673/17 (Planet49)", + )) + except Exception: + pass + + # Check 6: Dark Pattern — button size/prominence comparison + try: + button_info = await page.evaluate(""" + () => { + const banner = document.querySelector( + '#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, ' + + '#usercentrics-root, .cky-consent-container, #cmpbox, ' + + '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]' + ); + if (!banner) return null; + const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')]; + return buttons.slice(0, 6).map(b => { + const style = window.getComputedStyle(b); + const rect = b.getBoundingClientRect(); + return { + text: b.textContent?.trim()?.substring(0, 40) || '', + width: rect.width, + height: rect.height, + area: rect.width * rect.height, + bgColor: style.backgroundColor, + fontSize: parseFloat(style.fontSize), + visible: rect.width > 0 && rect.height > 0, + }; + }); + } + """) + if button_info and len(button_info) >= 2: + accept_btn = None + reject_btn = None + accept_kw = ["akzeptieren", "accept", "zustimmen", "agree", "einverstanden", "ok"] + reject_kw = ["ablehnen", "reject", "notwendige", "decline", "nein"] + + for btn in button_info: + text_lower = btn["text"].lower() + if any(k in text_lower for k in accept_kw): + accept_btn = btn + elif any(k in text_lower for k in reject_kw): + reject_btn = btn + + if accept_btn and reject_btn: + area_ratio = accept_btn["area"] / max(reject_btn["area"], 1) + if area_ratio > 2.5: + violations.append(Violation( + service="Cookie-Banner", + severity="MEDIUM", + text=f"Dark Pattern: 'Akzeptieren'-Button ist {area_ratio:.1f}x groesser als " + f"'Ablehnen'-Button. Beide Optionen muessen gleichwertig dargestellt werden.", + legal_ref="EDPB Guidelines 05/2020, §25 TDDDG, DSK Orientierungshilfe Telemedien", + )) + size_ratio = accept_btn["fontSize"] / max(reject_btn["fontSize"], 1) + if size_ratio > 1.5: + violations.append(Violation( + service="Cookie-Banner", + severity="MEDIUM", + text=f"Dark Pattern: Schriftgroesse 'Akzeptieren' ({accept_btn['fontSize']:.0f}px) " + f"vs. 'Ablehnen' ({reject_btn['fontSize']:.0f}px). " + f"Unterschiedliche Schriftgroessen sind ein Dark Pattern.", + legal_ref="EDPB Guidelines 05/2020 (gleichwertige Darstellung)", + )) + except Exception: + pass + + # Check 7: Cookie Wall — does rejecting block the site? + # (This is checked in Phase B — if after reject the page is not navigable) + + # Check 8: Re-access to settings (Art. 7(3) — revocation as easy as consent) + try: + settings_accessible = False + settings_selectors = [ + '[class*="cookie-settings"]', '[class*="privacy-settings"]', + 'a[href*="cookie"]', 'a[href*="datenschutz-einstellungen"]', + '[class*="consent-settings"]', '#ot-sdk-btn', + '.cky-btn-revisit', '#CybotCookiebotDialogBodyButtonDetails', + '[data-testid="uc-footer-link"]', + ] + for sel in settings_selectors: + try: + if await page.locator(sel).count() > 0: + settings_accessible = True + break + except Exception: + continue + + # Also check footer for cookie settings link + if not settings_accessible: + footer_text = "" + try: + footer = page.locator("footer").first + if await footer.count() > 0: + footer_text = (await footer.text_content() or "").lower() + except Exception: + pass + if any(kw in footer_text for kw in ["cookie-einstellungen", "cookie settings", + "datenschutz-einstellungen", "privacy settings"]): + settings_accessible = True + + if not settings_accessible: + violations.append(Violation( + service="Cookie-Banner", + severity="MEDIUM", + text="Kein erneuter Zugang zu Cookie-Einstellungen gefunden. " + "Der Widerruf der Einwilligung muss ebenso einfach sein wie " + "die Erteilung (Art. 7 Abs. 3 DSGVO).", + legal_ref="Art. 7 Abs. 3 DSGVO (Widerruf so einfach wie Einwilligung)", + )) + except Exception: + pass + + except Exception as e: + logger.warning("Banner text check failed: %s", e) + + return {"violations": violations, "has_impressum": has_impressum, "has_dse": has_dse}