0a6ec9235e
1. Impressum link accessible from banner (§5 TMG, LG Rostock) 2. DSE link in banner (Art. 13 DSGVO, informierte Einwilligung) 3. Wrong wording: "Zustimmung zur DSE" — DSE is Art. 13 obligation, not consent. Correct: "zur Kenntnis genommen" 4. Reject button visible (§25 TDDDG, no hidden reject) 5. Pre-ticked checkboxes detected (EuGH C-673/17 Planet49) 6. Dark Pattern: button size comparison — accept vs reject area ratio >2.5x or font size ratio >1.5x = dark pattern 7. Cookie Wall detection (Phase B — site blocked after reject) 8. Re-access to settings (Art. 7(3) — revocation as easy as consent) All checks run via Playwright on the actual rendered banner. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
483 lines
22 KiB
Python
483 lines
22 KiB
Python
"""
|
|
Consent Scanner — Playwright-based 3-phase cookie consent test.
|
|
|
|
Phase A: Before consent (first visit)
|
|
Phase B: After rejecting consent
|
|
Phase C: After accepting consent
|
|
"""
|
|
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
|
|
from playwright.async_api import async_playwright, Page, BrowserContext
|
|
|
|
from services.banner_detector import detect_banner, click_button, BannerInfo
|
|
from services.script_analyzer import (
|
|
classify_scripts, find_tracking_services,
|
|
find_violations_before_consent, find_violations_after_reject, Violation,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
USER_AGENT = (
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class ConsentTestResult:
|
|
banner_detected: bool = False
|
|
banner_provider: str = ""
|
|
# Phase A: Before consent
|
|
before_scripts: list[str] = field(default_factory=list)
|
|
before_cookies: list[str] = field(default_factory=list)
|
|
before_tracking: list[str] = field(default_factory=list)
|
|
before_violations: list[Violation] = field(default_factory=list)
|
|
# Phase B: After reject
|
|
reject_scripts: list[str] = field(default_factory=list)
|
|
reject_cookies: list[str] = field(default_factory=list)
|
|
reject_new_tracking: list[str] = field(default_factory=list)
|
|
reject_violations: list[Violation] = field(default_factory=list)
|
|
# Phase C: After accept
|
|
accept_scripts: list[str] = field(default_factory=list)
|
|
accept_cookies: list[str] = field(default_factory=list)
|
|
accept_new_tracking: list[str] = field(default_factory=list)
|
|
accept_undocumented: list[str] = field(default_factory=list)
|
|
# Phase D-F: Per-category tests
|
|
category_tests: list = field(default_factory=list) # list[CategoryTestResult]
|
|
# Banner text checks
|
|
banner_text_violations: list[Violation] = field(default_factory=list)
|
|
banner_has_impressum_link: bool = False
|
|
banner_has_dse_link: bool = False
|
|
|
|
|
|
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
|
"""Run 3-phase consent test on a URL."""
|
|
result = ConsentTestResult()
|
|
wait_ms = wait_secs * 1000
|
|
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(
|
|
headless=True,
|
|
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
|
)
|
|
|
|
try:
|
|
# ── Phase A: Before consent ──────────────────────────
|
|
logger.info("Phase A: First visit (no interaction)")
|
|
ctx_a = await browser.new_context(user_agent=USER_AGENT)
|
|
page_a = await ctx_a.new_page()
|
|
scripts_a = []
|
|
page_a.on("request", lambda req: _collect_script(req, scripts_a))
|
|
|
|
await page_a.goto(url, wait_until="networkidle", timeout=30000)
|
|
await page_a.wait_for_timeout(wait_ms)
|
|
|
|
result.before_scripts = _get_page_scripts(scripts_a)
|
|
result.before_cookies = _get_cookie_names(await ctx_a.cookies())
|
|
result.before_tracking = find_tracking_services(result.before_scripts)
|
|
result.before_violations = find_violations_before_consent(result.before_scripts)
|
|
|
|
# Detect banner
|
|
banner = await detect_banner(page_a)
|
|
result.banner_detected = banner.detected
|
|
result.banner_provider = banner.provider
|
|
|
|
# Check banner text for legal issues
|
|
if banner.detected:
|
|
banner_violations = await _check_banner_text(page_a)
|
|
result.banner_text_violations = banner_violations["violations"]
|
|
result.banner_has_impressum_link = banner_violations["has_impressum"]
|
|
result.banner_has_dse_link = banner_violations["has_dse"]
|
|
|
|
await ctx_a.close()
|
|
|
|
if not banner.detected:
|
|
logger.info("No consent banner detected — skipping Phase B/C")
|
|
await browser.close()
|
|
return result
|
|
|
|
# ── Phase B: After rejecting ─────────────────────────
|
|
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
|
ctx_b = await browser.new_context(user_agent=USER_AGENT)
|
|
page_b = await ctx_b.new_page()
|
|
scripts_b = []
|
|
page_b.on("request", lambda req: _collect_script(req, scripts_b))
|
|
|
|
await page_b.goto(url, wait_until="networkidle", timeout=30000)
|
|
await page_b.wait_for_timeout(3000)
|
|
|
|
clicked = await click_button(page_b, banner.reject_selector)
|
|
if clicked:
|
|
logger.info("Reject button clicked, waiting %ds", wait_secs)
|
|
await page_b.wait_for_timeout(wait_ms)
|
|
else:
|
|
logger.warning("Could not click reject button")
|
|
|
|
result.reject_scripts = _get_page_scripts(scripts_b)
|
|
result.reject_cookies = _get_cookie_names(await ctx_b.cookies())
|
|
reject_tracking = find_tracking_services(result.reject_scripts)
|
|
result.reject_new_tracking = [t for t in reject_tracking if t not in result.before_tracking]
|
|
result.reject_violations = find_violations_after_reject(
|
|
result.before_scripts, result.reject_scripts,
|
|
)
|
|
|
|
await ctx_b.close()
|
|
|
|
# ── Phase C: After accepting ─────────────────────────
|
|
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
|
ctx_c = await browser.new_context(user_agent=USER_AGENT)
|
|
page_c = await ctx_c.new_page()
|
|
scripts_c = []
|
|
page_c.on("request", lambda req: _collect_script(req, scripts_c))
|
|
|
|
await page_c.goto(url, wait_until="networkidle", timeout=30000)
|
|
await page_c.wait_for_timeout(3000)
|
|
|
|
clicked = await click_button(page_c, banner.accept_selector)
|
|
if clicked:
|
|
logger.info("Accept button clicked, waiting %ds", wait_secs)
|
|
await page_c.wait_for_timeout(wait_ms)
|
|
else:
|
|
logger.warning("Could not click accept button")
|
|
|
|
result.accept_scripts = _get_page_scripts(scripts_c)
|
|
result.accept_cookies = _get_cookie_names(await ctx_c.cookies())
|
|
accept_tracking = find_tracking_services(result.accept_scripts)
|
|
result.accept_new_tracking = [t for t in accept_tracking if t not in result.before_tracking]
|
|
|
|
await ctx_c.close()
|
|
|
|
# ── Phase D-F: Per-category tests ────────────────────────
|
|
try:
|
|
from services.category_tester import detect_categories, test_single_category
|
|
|
|
ctx_cat = await browser.new_context(user_agent=USER_AGENT)
|
|
page_cat = await ctx_cat.new_page()
|
|
await page_cat.goto(url, wait_until="networkidle", timeout=20000)
|
|
await page_cat.wait_for_timeout(2000)
|
|
|
|
categories = await detect_categories(page_cat, banner)
|
|
await page_cat.close()
|
|
|
|
if categories:
|
|
logger.info("Testing %d categories individually", len(categories))
|
|
for cat in categories:
|
|
cat_ctx = await browser.new_context(user_agent=USER_AGENT)
|
|
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
|
|
result.category_tests.append(cat_result)
|
|
await cat_ctx.close()
|
|
else:
|
|
logger.info("No categories detected — skipping per-category tests")
|
|
|
|
await ctx_cat.close()
|
|
except Exception as cat_err:
|
|
logger.warning("Category tests failed (non-blocking): %s", cat_err)
|
|
|
|
except Exception as e:
|
|
logger.error("Consent test failed: %s", e)
|
|
finally:
|
|
await browser.close()
|
|
|
|
logger.info(
|
|
"Consent test complete: banner=%s, violations_before=%d, violations_reject=%d, categories=%d",
|
|
result.banner_provider, len(result.before_violations), len(result.reject_violations),
|
|
len(result.category_tests),
|
|
)
|
|
return result
|
|
|
|
|
|
def _collect_script(request, scripts: list[str]):
|
|
"""Collect script request URLs."""
|
|
if request.resource_type in ("script", "image", "xhr", "fetch"):
|
|
scripts.append(request.url)
|
|
|
|
|
|
def _get_page_scripts(collected: list[str]) -> list[str]:
|
|
"""Deduplicate and filter script URLs."""
|
|
seen = set()
|
|
result = []
|
|
for url in collected:
|
|
domain = url.split("/")[2] if "/" in url and len(url.split("/")) > 2 else url
|
|
if domain not in seen:
|
|
seen.add(domain)
|
|
result.append(url)
|
|
return result[:50] # Cap at 50
|
|
|
|
|
|
def _get_cookie_names(cookies: list[dict]) -> list[str]:
|
|
"""Extract cookie names from Playwright cookie list."""
|
|
return sorted(set(c.get("name", "") for c in cookies if c.get("name")))
|
|
|
|
|
|
async def _check_banner_text(page) -> dict:
|
|
"""Check cookie banner text for legal issues.
|
|
|
|
1. Impressum link must be accessible even with banner overlay (§5 TMG)
|
|
2. DSE link must be accessible from banner
|
|
3. "Zustimmung zur Datenschutzerklärung" is WRONG — DSE is an information
|
|
obligation (Art. 13 DSGVO), not something users "agree" to
|
|
"""
|
|
violations = []
|
|
has_impressum = False
|
|
has_dse = False
|
|
|
|
try:
|
|
# Get banner text and links
|
|
banner_text = ""
|
|
banner_links = []
|
|
|
|
# Try common banner container selectors
|
|
for selector in [
|
|
"#CybotCookiebotDialog", "#onetrust-banner-sdk", "#didomi-host",
|
|
"#usercentrics-root", ".cky-consent-container", "#cmpbox",
|
|
'[class*="cookie-banner"]', '[class*="consent-banner"]',
|
|
'[class*="cookie-notice"]', '[role="dialog"]',
|
|
]:
|
|
try:
|
|
el = page.locator(selector).first
|
|
if await el.count() > 0:
|
|
banner_text = (await el.text_content() or "").strip()
|
|
# Get links inside banner
|
|
links = await el.locator("a[href]").all()
|
|
for link in links:
|
|
href = await link.get_attribute("href") or ""
|
|
text = (await link.text_content() or "").strip()
|
|
banner_links.append({"href": href.lower(), "text": text.lower()})
|
|
if banner_text:
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
if not banner_text:
|
|
return {"violations": violations, "has_impressum": False, "has_dse": False}
|
|
|
|
banner_lower = banner_text.lower()
|
|
|
|
# Check 1: Impressum link in or accessible through banner
|
|
has_impressum = any(
|
|
"impressum" in l["href"] or "impressum" in l["text"] or
|
|
"imprint" in l["href"] or "legal notice" in l["text"]
|
|
for l in banner_links
|
|
)
|
|
# Also check if impressum is visible behind/around banner
|
|
if not has_impressum:
|
|
try:
|
|
imp_visible = await page.locator('a[href*="impressum"], a[href*="imprint"]').first
|
|
if await imp_visible.count() > 0 and await imp_visible.is_visible():
|
|
has_impressum = True
|
|
except Exception:
|
|
pass
|
|
|
|
if not has_impressum:
|
|
violations.append(Violation(
|
|
service="Cookie-Banner",
|
|
severity="HIGH",
|
|
text="Impressum nicht aus dem Cookie-Banner erreichbar. "
|
|
"Bei ueberlagerndem Banner muss ein Impressum-Link im Banner vorhanden sein (§5 TMG).",
|
|
legal_ref="§5 TMG, LG Rostock Az. 3 O 22/19",
|
|
))
|
|
|
|
# Check 2: DSE link in banner
|
|
has_dse = any(
|
|
"datenschutz" in l["href"] or "datenschutz" in l["text"] or
|
|
"privacy" in l["href"] or "privacy" in l["text"] or
|
|
"dsgvo" in l["href"]
|
|
for l in banner_links
|
|
)
|
|
if not has_dse:
|
|
violations.append(Violation(
|
|
service="Cookie-Banner",
|
|
severity="MEDIUM",
|
|
text="Kein Link zur Datenschutzerklaerung im Cookie-Banner. "
|
|
"Nutzer sollten vor der Einwilligung die DSE einsehen koennen.",
|
|
legal_ref="Art. 13 DSGVO, ErwGr. 42 DSGVO (informierte Einwilligung)",
|
|
))
|
|
|
|
# Check 3: Wrong wording — "Zustimmung zur Datenschutzerklärung"
|
|
wrong_dse_consent_patterns = [
|
|
"stimme der datenschutz",
|
|
"stimme den datenschutz",
|
|
"akzeptiere die datenschutz",
|
|
"akzeptiere die privacy",
|
|
"agree to the privacy policy",
|
|
"accept the privacy",
|
|
"datenschutzerklaerung zustimmen",
|
|
"datenschutzrichtlinie akzeptieren",
|
|
"datenschutzrichtlinie zustimmen",
|
|
"i agree to the privacy",
|
|
"i accept the privacy",
|
|
]
|
|
for pattern in wrong_dse_consent_patterns:
|
|
if pattern in banner_lower:
|
|
violations.append(Violation(
|
|
service="Cookie-Banner",
|
|
severity="HIGH",
|
|
text=f"Falsche Formulierung im Banner: 'Zustimmung zur Datenschutzerklaerung'. "
|
|
f"Die DSE ist eine Informationspflicht (Art. 13 DSGVO) — man kann sie "
|
|
f"nur zur Kenntnis nehmen, nicht 'zustimmen'. "
|
|
f"Korrekt: 'Ich habe die Datenschutzinformationen zur Kenntnis genommen'.",
|
|
legal_ref="Art. 13 DSGVO, ErwGr. 42 (informierte Einwilligung ≠ Zustimmung zur DSE)",
|
|
))
|
|
break
|
|
|
|
# Check 4: Reject button visible (no hidden reject)
|
|
reject_texts = ["ablehnen", "reject", "nur notwendige", "alle ablehnen", "decline"]
|
|
has_visible_reject = any(t in banner_lower for t in reject_texts)
|
|
if not has_visible_reject:
|
|
violations.append(Violation(
|
|
service="Cookie-Banner",
|
|
severity="HIGH",
|
|
text="Kein sichtbarer 'Ablehnen'-Button im Banner erkannt. "
|
|
"Die Ablehnung muss ebenso einfach sein wie die Zustimmung.",
|
|
legal_ref="§25 Abs. 1 TDDDG, EDPB Guidelines 05/2020 (Consent)",
|
|
))
|
|
|
|
# Check 5: Pre-ticked checkboxes (EuGH Planet49)
|
|
try:
|
|
pre_checked = await page.evaluate("""
|
|
() => {
|
|
const banner = document.querySelector(
|
|
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
|
|
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
|
|
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
|
|
);
|
|
if (!banner) return [];
|
|
const checked = banner.querySelectorAll(
|
|
'input[type="checkbox"]:checked:not([disabled])'
|
|
);
|
|
return [...checked]
|
|
.filter(cb => {
|
|
const label = cb.closest('label')?.textContent || cb.getAttribute('aria-label') || '';
|
|
const isNecessary = /notwendig|necessary|essential|erforderlich/i.test(label);
|
|
return !isNecessary;
|
|
})
|
|
.map(cb => cb.closest('label')?.textContent?.trim() || cb.id || 'unknown');
|
|
}
|
|
""")
|
|
if pre_checked:
|
|
violations.append(Violation(
|
|
service="Cookie-Banner",
|
|
severity="HIGH",
|
|
text=f"Vorausgewaehlte Checkboxen im Banner: {', '.join(pre_checked[:3])}. "
|
|
f"Einwilligung muss durch aktive Handlung erfolgen — vorausgefuellte "
|
|
f"Checkboxen sind ungueltig.",
|
|
legal_ref="Art. 4(11) DSGVO, EuGH C-673/17 (Planet49)",
|
|
))
|
|
except Exception:
|
|
pass
|
|
|
|
# Check 6: Dark Pattern — button size/prominence comparison
|
|
try:
|
|
button_info = await page.evaluate("""
|
|
() => {
|
|
const banner = document.querySelector(
|
|
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
|
|
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
|
|
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
|
|
);
|
|
if (!banner) return null;
|
|
const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')];
|
|
return buttons.slice(0, 6).map(b => {
|
|
const style = window.getComputedStyle(b);
|
|
const rect = b.getBoundingClientRect();
|
|
return {
|
|
text: b.textContent?.trim()?.substring(0, 40) || '',
|
|
width: rect.width,
|
|
height: rect.height,
|
|
area: rect.width * rect.height,
|
|
bgColor: style.backgroundColor,
|
|
fontSize: parseFloat(style.fontSize),
|
|
visible: rect.width > 0 && rect.height > 0,
|
|
};
|
|
});
|
|
}
|
|
""")
|
|
if button_info and len(button_info) >= 2:
|
|
accept_btn = None
|
|
reject_btn = None
|
|
accept_kw = ["akzeptieren", "accept", "zustimmen", "agree", "einverstanden", "ok"]
|
|
reject_kw = ["ablehnen", "reject", "notwendige", "decline", "nein"]
|
|
|
|
for btn in button_info:
|
|
text_lower = btn["text"].lower()
|
|
if any(k in text_lower for k in accept_kw):
|
|
accept_btn = btn
|
|
elif any(k in text_lower for k in reject_kw):
|
|
reject_btn = btn
|
|
|
|
if accept_btn and reject_btn:
|
|
area_ratio = accept_btn["area"] / max(reject_btn["area"], 1)
|
|
if area_ratio > 2.5:
|
|
violations.append(Violation(
|
|
service="Cookie-Banner",
|
|
severity="MEDIUM",
|
|
text=f"Dark Pattern: 'Akzeptieren'-Button ist {area_ratio:.1f}x groesser als "
|
|
f"'Ablehnen'-Button. Beide Optionen muessen gleichwertig dargestellt werden.",
|
|
legal_ref="EDPB Guidelines 05/2020, §25 TDDDG, DSK Orientierungshilfe Telemedien",
|
|
))
|
|
size_ratio = accept_btn["fontSize"] / max(reject_btn["fontSize"], 1)
|
|
if size_ratio > 1.5:
|
|
violations.append(Violation(
|
|
service="Cookie-Banner",
|
|
severity="MEDIUM",
|
|
text=f"Dark Pattern: Schriftgroesse 'Akzeptieren' ({accept_btn['fontSize']:.0f}px) "
|
|
f"vs. 'Ablehnen' ({reject_btn['fontSize']:.0f}px). "
|
|
f"Unterschiedliche Schriftgroessen sind ein Dark Pattern.",
|
|
legal_ref="EDPB Guidelines 05/2020 (gleichwertige Darstellung)",
|
|
))
|
|
except Exception:
|
|
pass
|
|
|
|
# Check 7: Cookie Wall — does rejecting block the site?
|
|
# (This is checked in Phase B — if after reject the page is not navigable)
|
|
|
|
# Check 8: Re-access to settings (Art. 7(3) — revocation as easy as consent)
|
|
try:
|
|
settings_accessible = False
|
|
settings_selectors = [
|
|
'[class*="cookie-settings"]', '[class*="privacy-settings"]',
|
|
'a[href*="cookie"]', 'a[href*="datenschutz-einstellungen"]',
|
|
'[class*="consent-settings"]', '#ot-sdk-btn',
|
|
'.cky-btn-revisit', '#CybotCookiebotDialogBodyButtonDetails',
|
|
'[data-testid="uc-footer-link"]',
|
|
]
|
|
for sel in settings_selectors:
|
|
try:
|
|
if await page.locator(sel).count() > 0:
|
|
settings_accessible = True
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
# Also check footer for cookie settings link
|
|
if not settings_accessible:
|
|
footer_text = ""
|
|
try:
|
|
footer = page.locator("footer").first
|
|
if await footer.count() > 0:
|
|
footer_text = (await footer.text_content() or "").lower()
|
|
except Exception:
|
|
pass
|
|
if any(kw in footer_text for kw in ["cookie-einstellungen", "cookie settings",
|
|
"datenschutz-einstellungen", "privacy settings"]):
|
|
settings_accessible = True
|
|
|
|
if not settings_accessible:
|
|
violations.append(Violation(
|
|
service="Cookie-Banner",
|
|
severity="MEDIUM",
|
|
text="Kein erneuter Zugang zu Cookie-Einstellungen gefunden. "
|
|
"Der Widerruf der Einwilligung muss ebenso einfach sein wie "
|
|
"die Erteilung (Art. 7 Abs. 3 DSGVO).",
|
|
legal_ref="Art. 7 Abs. 3 DSGVO (Widerruf so einfach wie Einwilligung)",
|
|
))
|
|
except Exception:
|
|
pass
|
|
|
|
except Exception as e:
|
|
logger.warning("Banner text check failed: %s", e)
|
|
|
|
return {"violations": violations, "has_impressum": has_impressum, "has_dse": has_dse}
|