feat: 3 new banner legal checks (11 total) + extract banner_text_checker
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 46s
CI / validate-canonical-controls (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 18s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m32s
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 21s

New checks (from EUIPO reference case):
- Check 9: Third-party DSE link — detects when consent dialog links to
  external domain's privacy policy instead of own DSE (Art. 13 DSGVO)
- Check 10: Dark-pattern language — detects "muessen/erforderlich" for
  non-essential cookies suggesting false technical necessity (EDPB Rn. 70)
- Check 11: Non-modal dismiss = consent — detects when clicking outside
  dialog closes it (possibly treating as consent, Planet49 violation)

Refactor: extracted _check_banner_text (375 LOC) from consent_scanner.py
into services/banner_text_checker.py to keep both files under 500 LOC.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-04 08:01:54 +02:00
parent 0b7e14f202
commit 7fc43a3f1f
2 changed files with 612 additions and 0 deletions
@@ -0,0 +1,399 @@
"""
Banner text legal checks — extracted from consent_scanner.py.
11 checks for cookie banner legal compliance:
1. Impressum link accessible (§5 TMG)
2. DSE link in banner (Art. 13 DSGVO)
3. Wrong DSE consent wording (Art. 13 DSGVO)
4. Reject button visible (§25 TDDDG)
5. Pre-ticked checkboxes (Planet49)
6. Dark pattern button size (EDPB 05/2020)
7. Cookie wall (Phase B check)
8. Re-access to settings (Art. 7(3) DSGVO)
9. Third-party DSE link (Art. 13 DSGVO)
10. Dark-pattern language (EDPB 05/2020)
11. Non-modal dismiss = consent (Planet49)
"""
import logging
from services.script_analyzer import Violation
logger = logging.getLogger(__name__)
async def check_banner_text(page) -> dict:
"""Check cookie banner text for legal issues.
1. Impressum link must be accessible even with banner overlay (§5 TMG)
2. DSE link must be accessible from banner
3. "Zustimmung zur Datenschutzerklärung" is WRONG — DSE is an information
obligation (Art. 13 DSGVO), not something users "agree" to
"""
violations = []
has_impressum = False
has_dse = False
try:
# Get banner text and links
banner_text = ""
banner_links = []
# Try common banner container selectors
for selector in [
"#CybotCookiebotDialog", "#onetrust-banner-sdk", "#didomi-host",
"#usercentrics-root", ".cky-consent-container", "#cmpbox",
'[class*="cookie-banner"]', '[class*="consent-banner"]',
'[class*="cookie-notice"]', '[role="dialog"]',
]:
try:
el = page.locator(selector).first
if await el.count() > 0:
banner_text = (await el.text_content() or "").strip()
# Get links inside banner
links = await el.locator("a[href]").all()
for link in links:
href = await link.get_attribute("href") or ""
text = (await link.text_content() or "").strip()
banner_links.append({"href": href.lower(), "text": text.lower()})
if banner_text:
break
except Exception:
continue
if not banner_text:
return {"violations": violations, "has_impressum": False, "has_dse": False}
banner_lower = banner_text.lower()
# Check 1: Impressum link in or accessible through banner
has_impressum = any(
"impressum" in l["href"] or "impressum" in l["text"] or
"imprint" in l["href"] or "legal notice" in l["text"]
for l in banner_links
)
# Also check if impressum is visible behind/around banner
if not has_impressum:
try:
imp_visible = await page.locator('a[href*="impressum"], a[href*="imprint"]').first
if await imp_visible.count() > 0 and await imp_visible.is_visible():
has_impressum = True
except Exception:
pass
if not has_impressum:
violations.append(Violation(
service="Cookie-Banner",
severity="HIGH",
text="Impressum nicht aus dem Cookie-Banner erreichbar. "
"Bei ueberlagerndem Banner muss ein Impressum-Link im Banner vorhanden sein (§5 TMG).",
legal_ref="§5 TMG, LG Rostock Az. 3 O 22/19",
))
# Check 2: DSE link in banner
has_dse = any(
"datenschutz" in l["href"] or "datenschutz" in l["text"] or
"privacy" in l["href"] or "privacy" in l["text"] or
"dsgvo" in l["href"]
for l in banner_links
)
if not has_dse:
violations.append(Violation(
service="Cookie-Banner",
severity="MEDIUM",
text="Kein Link zur Datenschutzerklaerung im Cookie-Banner. "
"Nutzer sollten vor der Einwilligung die DSE einsehen koennen.",
legal_ref="Art. 13 DSGVO, ErwGr. 42 DSGVO (informierte Einwilligung)",
))
# Check 3: Wrong wording — "Zustimmung zur Datenschutzerklärung"
wrong_dse_consent_patterns = [
"stimme der datenschutz",
"stimme den datenschutz",
"akzeptiere die datenschutz",
"akzeptiere die privacy",
"agree to the privacy policy",
"accept the privacy",
"datenschutzerklaerung zustimmen",
"datenschutzrichtlinie akzeptieren",
"datenschutzrichtlinie zustimmen",
"i agree to the privacy",
"i accept the privacy",
]
for pattern in wrong_dse_consent_patterns:
if pattern in banner_lower:
violations.append(Violation(
service="Cookie-Banner",
severity="HIGH",
text=f"Falsche Formulierung im Banner: 'Zustimmung zur Datenschutzerklaerung'. "
f"Die DSE ist eine Informationspflicht (Art. 13 DSGVO) — man kann sie "
f"nur zur Kenntnis nehmen, nicht 'zustimmen'. "
f"Korrekt: 'Ich habe die Datenschutzinformationen zur Kenntnis genommen'.",
legal_ref="Art. 13 DSGVO, ErwGr. 42 (informierte Einwilligung ≠ Zustimmung zur DSE)",
))
break
# Check 4: Reject button visible (no hidden reject)
reject_texts = ["ablehnen", "reject", "nur notwendige", "alle ablehnen", "decline"]
has_visible_reject = any(t in banner_lower for t in reject_texts)
if not has_visible_reject:
violations.append(Violation(
service="Cookie-Banner",
severity="HIGH",
text="Kein sichtbarer 'Ablehnen'-Button im Banner erkannt. "
"Die Ablehnung muss ebenso einfach sein wie die Zustimmung.",
legal_ref="§25 Abs. 1 TDDDG, EDPB Guidelines 05/2020 (Consent)",
))
# Check 5: Pre-ticked checkboxes (EuGH Planet49)
try:
pre_checked = await page.evaluate("""
() => {
const banner = document.querySelector(
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
);
if (!banner) return [];
const checked = banner.querySelectorAll(
'input[type="checkbox"]:checked:not([disabled])'
);
return [...checked]
.filter(cb => {
const label = cb.closest('label')?.textContent || cb.getAttribute('aria-label') || '';
const isNecessary = /notwendig|necessary|essential|erforderlich/i.test(label);
return !isNecessary;
})
.map(cb => cb.closest('label')?.textContent?.trim() || cb.id || 'unknown');
}
""")
if pre_checked:
violations.append(Violation(
service="Cookie-Banner",
severity="HIGH",
text=f"Vorausgewaehlte Checkboxen im Banner: {', '.join(pre_checked[:3])}. "
f"Einwilligung muss durch aktive Handlung erfolgen — vorausgefuellte "
f"Checkboxen sind ungueltig.",
legal_ref="Art. 4(11) DSGVO, EuGH C-673/17 (Planet49)",
))
except Exception:
pass
# Check 6: Dark Pattern — button size/prominence comparison
try:
button_info = await page.evaluate("""
() => {
const banner = document.querySelector(
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
);
if (!banner) return null;
const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')];
return buttons.slice(0, 6).map(b => {
const style = window.getComputedStyle(b);
const rect = b.getBoundingClientRect();
return {
text: b.textContent?.trim()?.substring(0, 40) || '',
width: rect.width,
height: rect.height,
area: rect.width * rect.height,
bgColor: style.backgroundColor,
fontSize: parseFloat(style.fontSize),
visible: rect.width > 0 && rect.height > 0,
};
});
}
""")
if button_info and len(button_info) >= 2:
accept_btn = None
reject_btn = None
accept_kw = ["akzeptieren", "accept", "zustimmen", "agree", "einverstanden", "ok"]
reject_kw = ["ablehnen", "reject", "notwendige", "decline", "nein"]
for btn in button_info:
text_lower = btn["text"].lower()
if any(k in text_lower for k in accept_kw):
accept_btn = btn
elif any(k in text_lower for k in reject_kw):
reject_btn = btn
if accept_btn and reject_btn:
area_ratio = accept_btn["area"] / max(reject_btn["area"], 1)
if area_ratio > 2.5:
violations.append(Violation(
service="Cookie-Banner",
severity="MEDIUM",
text=f"Dark Pattern: 'Akzeptieren'-Button ist {area_ratio:.1f}x groesser als "
f"'Ablehnen'-Button. Beide Optionen muessen gleichwertig dargestellt werden.",
legal_ref="EDPB Guidelines 05/2020, §25 TDDDG, DSK Orientierungshilfe Telemedien",
))
size_ratio = accept_btn["fontSize"] / max(reject_btn["fontSize"], 1)
if size_ratio > 1.5:
violations.append(Violation(
service="Cookie-Banner",
severity="MEDIUM",
text=f"Dark Pattern: Schriftgroesse 'Akzeptieren' ({accept_btn['fontSize']:.0f}px) "
f"vs. 'Ablehnen' ({reject_btn['fontSize']:.0f}px). "
f"Unterschiedliche Schriftgroessen sind ein Dark Pattern.",
legal_ref="EDPB Guidelines 05/2020 (gleichwertige Darstellung)",
))
except Exception:
pass
# Check 7: Cookie Wall — does rejecting block the site?
# (This is checked in Phase B — if after reject the page is not navigable)
# Check 8: Re-access to settings (Art. 7(3) — revocation as easy as consent)
try:
settings_accessible = False
settings_selectors = [
'[class*="cookie-settings"]', '[class*="privacy-settings"]',
'a[href*="cookie"]', 'a[href*="datenschutz-einstellungen"]',
'[class*="consent-settings"]', '#ot-sdk-btn',
'.cky-btn-revisit', '#CybotCookiebotDialogBodyButtonDetails',
'[data-testid="uc-footer-link"]',
]
for sel in settings_selectors:
try:
if await page.locator(sel).count() > 0:
settings_accessible = True
break
except Exception:
continue
# Also check footer for cookie settings link
if not settings_accessible:
footer_text = ""
try:
footer = page.locator("footer").first
if await footer.count() > 0:
footer_text = (await footer.text_content() or "").lower()
except Exception:
pass
if any(kw in footer_text for kw in ["cookie-einstellungen", "cookie settings",
"datenschutz-einstellungen", "privacy settings"]):
settings_accessible = True
if not settings_accessible:
violations.append(Violation(
service="Cookie-Banner",
severity="MEDIUM",
text="Kein erneuter Zugang zu Cookie-Einstellungen gefunden. "
"Der Widerruf der Einwilligung muss ebenso einfach sein wie "
"die Erteilung (Art. 7 Abs. 3 DSGVO).",
legal_ref="Art. 7 Abs. 3 DSGVO (Widerruf so einfach wie Einwilligung)",
))
except Exception:
pass
# Check 9: Third-party DSE link — consent links to external domain DSE
try:
page_domain = page.url.split("/")[2].replace("www.", "")
for link in banner_links:
href = link["href"]
if not href.startswith("http"):
continue
link_domain = href.split("/")[2].replace("www.", "") if len(href.split("/")) > 2 else ""
if not link_domain:
continue
is_dse_link = any(kw in link["text"] for kw in [
"datenschutz", "privacy", "dsgvo", "data protection",
])
if is_dse_link and link_domain != page_domain:
violations.append(Violation(
service="Cookie-Banner",
severity="HIGH",
text=f"Consent verweist auf Datenschutzerklaerung von {link_domain} "
f"statt auf eigene DSE. Der Verantwortliche muss eine eigene "
f"Datenschutzerklaerung bereitstellen (Art. 13 DSGVO). "
f"Ein Verweis auf die DSE eines Drittanbieters/Auftragsverarbeiters "
f"reicht nicht aus.",
legal_ref="Art. 13 DSGVO (Informationspflichten), Art. 26 DSGVO (gemeinsame Verantwortlichkeit)",
))
break
except Exception:
pass
# Check 10: Dark-Pattern language — "muessen/erforderlich" for non-essential
dark_pattern_phrases = [
("muessen heruntergeladen werden", "heruntergeladen"),
("muessen akzeptiert werden", "akzeptiert"),
("muessen gesetzt werden", "gesetzt"),
("cookies sind erforderlich", "erforderlich"),
("cookies are required", "required"),
("must be downloaded", "downloaded"),
("must be accepted", "accepted"),
("sind zwingend notwendig", "zwingend"),
("unbedingt erforderlich", "unbedingt"),
]
for phrase, keyword in dark_pattern_phrases:
if phrase in banner_lower:
# Check if context is about non-essential cookies
context_essential = any(kw in banner_lower for kw in [
"technisch notwendig", "essential", "strictly necessary",
"unbedingt erforderlich fuer den betrieb",
])
if not context_essential:
violations.append(Violation(
service="Cookie-Banner",
severity="MEDIUM",
text=f"Dark-Pattern-Sprache: '{phrase}' suggeriert technische "
f"Notwendigkeit fuer nicht-essentielle Cookies. Nutzer koennten "
f"den Eindruck gewinnen, eine Zustimmung sei alternativlos.",
legal_ref="EDPB Guidelines 05/2020 Rn. 70, Art. 7(4) DSGVO (freiwillige Einwilligung)",
))
break
# Check 11: Modal dismiss = consent (click outside closes + sets consent)
try:
dismiss_is_consent = await page.evaluate("""
() => {
const dialog = document.querySelector(
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
);
if (!dialog) return { hasOverlay: false, overlayCloses: false };
// Check for overlay/backdrop elements
const overlays = document.querySelectorAll(
'.overlay, .backdrop, .modal-backdrop, '
+ '[class*="overlay"], [class*="backdrop"], '
+ '[class*="dimmer"], .cdk-overlay-backdrop'
);
let overlayHasClick = false;
for (const ov of overlays) {
const listeners = getEventListeners ? getEventListeners(ov) : {};
if (listeners.click && listeners.click.length > 0) {
overlayHasClick = true;
}
}
// Alternative: check if dialog is non-modal (no inert on background)
const isModal = dialog.getAttribute('aria-modal') === 'true' ||
dialog.hasAttribute('open');
return {
hasOverlay: overlays.length > 0,
overlayHasClick: overlayHasClick,
isModal: isModal,
dialogRole: dialog.getAttribute('role'),
};
}
""")
if dismiss_is_consent and dismiss_is_consent.get("hasOverlay") and not dismiss_is_consent.get("isModal"):
violations.append(Violation(
service="Cookie-Banner",
severity="HIGH",
text="Consent-Dialog ist nicht modal — Klick auf den Hintergrund kann "
"das Fenster schliessen und als Einwilligung gewertet werden. "
"Ein versehentlicher Klick ist keine aktive Einwilligung. "
"Der Dialog muss modal sein (nur explizite Buttons als Optionen).",
legal_ref="EuGH C-673/17 Planet49 (aktive Handlung), Art. 7(1) DSGVO (Nachweispflicht), "
"EDPB Guidelines 05/2020 Rn. 77 (silence/inactivity ≠ consent)",
))
except Exception:
pass
except Exception as e:
logger.warning("Banner text check failed: %s", e)
return {"violations": violations, "has_impressum": has_impressum, "has_dse": has_dse}
+213
View File
@@ -0,0 +1,213 @@
"""
Consent Scanner — Playwright-based 3-phase cookie consent test.
Phase A: Before consent (first visit)
Phase B: After rejecting consent
Phase C: After accepting consent
"""
import logging
from dataclasses import dataclass, field
from playwright.async_api import async_playwright, Page, BrowserContext
from services.banner_detector import detect_banner, click_button, BannerInfo
from services.script_analyzer import (
classify_scripts, find_tracking_services,
find_violations_before_consent, find_violations_after_reject, Violation,
)
from services.banner_text_checker import check_banner_text as _check_banner_text
logger = logging.getLogger(__name__)
USER_AGENT = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
@dataclass
class ConsentTestResult:
banner_detected: bool = False
banner_provider: str = ""
# Phase A: Before consent
before_scripts: list[str] = field(default_factory=list)
before_cookies: list[str] = field(default_factory=list)
before_tracking: list[str] = field(default_factory=list)
before_violations: list[Violation] = field(default_factory=list)
# Phase B: After reject
reject_scripts: list[str] = field(default_factory=list)
reject_cookies: list[str] = field(default_factory=list)
reject_new_tracking: list[str] = field(default_factory=list)
reject_violations: list[Violation] = field(default_factory=list)
# Phase C: After accept
accept_scripts: list[str] = field(default_factory=list)
accept_cookies: list[str] = field(default_factory=list)
accept_new_tracking: list[str] = field(default_factory=list)
accept_undocumented: list[str] = field(default_factory=list)
# Phase D-F: Per-category tests
category_tests: list = field(default_factory=list) # list[CategoryTestResult]
# Banner text checks
banner_text_violations: list[Violation] = field(default_factory=list)
banner_has_impressum_link: bool = False
banner_has_dse_link: bool = False
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
"""Run 3-phase consent test on a URL."""
result = ConsentTestResult()
wait_ms = wait_secs * 1000
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=["--no-sandbox", "--disable-dev-shm-usage"],
)
try:
# ── Phase A: Before consent ──────────────────────────
logger.info("Phase A: First visit (no interaction)")
ctx_a = await browser.new_context(user_agent=USER_AGENT)
page_a = await ctx_a.new_page()
scripts_a = []
page_a.on("request", lambda req: _collect_script(req, scripts_a))
await page_a.goto(url, wait_until="networkidle", timeout=30000)
await page_a.wait_for_timeout(wait_ms)
result.before_scripts = _get_page_scripts(scripts_a)
result.before_cookies = _get_cookie_names(await ctx_a.cookies())
result.before_tracking = find_tracking_services(result.before_scripts)
result.before_violations = find_violations_before_consent(result.before_scripts)
# Detect banner
banner = await detect_banner(page_a)
result.banner_detected = banner.detected
result.banner_provider = banner.provider
# Check banner text for legal issues
if banner.detected:
banner_violations = await _check_banner_text(page_a)
result.banner_text_violations = banner_violations["violations"]
result.banner_has_impressum_link = banner_violations["has_impressum"]
result.banner_has_dse_link = banner_violations["has_dse"]
await ctx_a.close()
if not banner.detected:
logger.info("No consent banner detected — skipping Phase B/C")
await browser.close()
return result
# ── Phase B: After rejecting ─────────────────────────
logger.info("Phase B: Reject consent (%s)", banner.provider)
ctx_b = await browser.new_context(user_agent=USER_AGENT)
page_b = await ctx_b.new_page()
scripts_b = []
page_b.on("request", lambda req: _collect_script(req, scripts_b))
await page_b.goto(url, wait_until="networkidle", timeout=30000)
await page_b.wait_for_timeout(3000)
clicked = await click_button(page_b, banner.reject_selector)
if clicked:
logger.info("Reject button clicked, waiting %ds", wait_secs)
await page_b.wait_for_timeout(wait_ms)
else:
logger.warning("Could not click reject button")
result.reject_scripts = _get_page_scripts(scripts_b)
result.reject_cookies = _get_cookie_names(await ctx_b.cookies())
reject_tracking = find_tracking_services(result.reject_scripts)
result.reject_new_tracking = [t for t in reject_tracking if t not in result.before_tracking]
result.reject_violations = find_violations_after_reject(
result.before_scripts, result.reject_scripts,
)
await ctx_b.close()
# ── Phase C: After accepting ─────────────────────────
logger.info("Phase C: Accept consent (%s)", banner.provider)
ctx_c = await browser.new_context(user_agent=USER_AGENT)
page_c = await ctx_c.new_page()
scripts_c = []
page_c.on("request", lambda req: _collect_script(req, scripts_c))
await page_c.goto(url, wait_until="networkidle", timeout=30000)
await page_c.wait_for_timeout(3000)
clicked = await click_button(page_c, banner.accept_selector)
if clicked:
logger.info("Accept button clicked, waiting %ds", wait_secs)
await page_c.wait_for_timeout(wait_ms)
else:
logger.warning("Could not click accept button")
result.accept_scripts = _get_page_scripts(scripts_c)
result.accept_cookies = _get_cookie_names(await ctx_c.cookies())
accept_tracking = find_tracking_services(result.accept_scripts)
result.accept_new_tracking = [t for t in accept_tracking if t not in result.before_tracking]
await ctx_c.close()
# ── Phase D-F: Per-category tests ────────────────────────
try:
from services.category_tester import detect_categories, test_single_category
ctx_cat = await browser.new_context(user_agent=USER_AGENT)
page_cat = await ctx_cat.new_page()
await page_cat.goto(url, wait_until="networkidle", timeout=20000)
await page_cat.wait_for_timeout(2000)
categories = await detect_categories(page_cat, banner)
await page_cat.close()
if categories:
logger.info("Testing %d categories individually", len(categories))
for cat in categories:
cat_ctx = await browser.new_context(user_agent=USER_AGENT)
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
result.category_tests.append(cat_result)
await cat_ctx.close()
else:
logger.info("No categories detected — skipping per-category tests")
await ctx_cat.close()
except Exception as cat_err:
logger.warning("Category tests failed (non-blocking): %s", cat_err)
except Exception as e:
logger.error("Consent test failed: %s", e)
finally:
await browser.close()
logger.info(
"Consent test complete: banner=%s, violations_before=%d, violations_reject=%d, categories=%d",
result.banner_provider, len(result.before_violations), len(result.reject_violations),
len(result.category_tests),
)
return result
def _collect_script(request, scripts: list[str]):
"""Collect script request URLs."""
if request.resource_type in ("script", "image", "xhr", "fetch"):
scripts.append(request.url)
def _get_page_scripts(collected: list[str]) -> list[str]:
"""Deduplicate and filter script URLs."""
seen = set()
result = []
for url in collected:
domain = url.split("/")[2] if "/" in url and len(url.split("/")) > 2 else url
if domain not in seen:
seen.add(domain)
result.append(url)
return result[:50] # Cap at 50
def _get_cookie_names(cookies: list[dict]) -> list[str]:
"""Extract cookie names from Playwright cookie list."""
return sorted(set(c.get("name", "") for c in cookies if c.get("name")))