fix: restore all missing consent-tester service modules
banner_detector.py, script_analyzer.py, category_tester.py, authenticated_scanner.py were only on the feature branch — needed for consent-tester to start. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,230 @@
|
|||||||
|
"""
|
||||||
|
Authenticated Scanner — tests post-login functionality.
|
||||||
|
|
||||||
|
Checks §312k BGB (cancellation), Art. 17 (deletion), Art. 20 (export),
|
||||||
|
Art. 7(3) (consent withdrawal), Art. 15 (data access).
|
||||||
|
|
||||||
|
Credentials are NEVER stored, logged, or transmitted beyond the browser context.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from playwright.async_api import async_playwright, Page
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CheckResult:
|
||||||
|
found: bool = False
|
||||||
|
selector: str = ""
|
||||||
|
text: str = ""
|
||||||
|
clicks_needed: int = 0
|
||||||
|
screenshot: bytes = b""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AuthTestResult:
|
||||||
|
authenticated: bool = False
|
||||||
|
login_error: str = ""
|
||||||
|
cancel_subscription: CheckResult = field(default_factory=CheckResult)
|
||||||
|
delete_account: CheckResult = field(default_factory=CheckResult)
|
||||||
|
export_data: CheckResult = field(default_factory=CheckResult)
|
||||||
|
consent_settings: CheckResult = field(default_factory=CheckResult)
|
||||||
|
profile_visible: CheckResult = field(default_factory=CheckResult)
|
||||||
|
|
||||||
|
|
||||||
|
# Search patterns for each check (DE + EN)
|
||||||
|
CANCEL_PATTERNS = [
|
||||||
|
"kündigen", "kuendigen", "vertrag beenden", "abo beenden",
|
||||||
|
"mitgliedschaft kündigen", "cancel subscription", "unsubscribe",
|
||||||
|
"cancel membership", "vertrag kündigen",
|
||||||
|
]
|
||||||
|
|
||||||
|
DELETE_PATTERNS = [
|
||||||
|
"konto löschen", "konto loeschen", "account löschen", "delete account",
|
||||||
|
"account deaktivieren", "profil löschen", "remove account",
|
||||||
|
]
|
||||||
|
|
||||||
|
EXPORT_PATTERNS = [
|
||||||
|
"daten exportieren", "daten herunterladen", "export data", "download data",
|
||||||
|
"meine daten", "datenauskunft", "data download", "daten anfordern",
|
||||||
|
]
|
||||||
|
|
||||||
|
CONSENT_PATTERNS = [
|
||||||
|
"einwilligung", "einstellungen", "datenschutz-einstellungen",
|
||||||
|
"consent", "privacy settings", "cookie-einstellungen",
|
||||||
|
"werbeeinstellungen", "marketing preferences",
|
||||||
|
]
|
||||||
|
|
||||||
|
PROFILE_PATTERNS = [
|
||||||
|
"profil", "mein konto", "kontodaten", "persönliche daten",
|
||||||
|
"profile", "my account", "account settings", "personal data",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def run_authenticated_test(
|
||||||
|
url: str,
|
||||||
|
username: str,
|
||||||
|
password: str,
|
||||||
|
username_selector: str = "",
|
||||||
|
password_selector: str = "",
|
||||||
|
submit_selector: str = "",
|
||||||
|
) -> AuthTestResult:
|
||||||
|
"""Run authenticated area test. Credentials are destroyed after test."""
|
||||||
|
result = AuthTestResult()
|
||||||
|
|
||||||
|
async with async_playwright() as p:
|
||||||
|
browser = await p.chromium.launch(
|
||||||
|
headless=True,
|
||||||
|
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
||||||
|
)
|
||||||
|
context = await browser.new_context(user_agent=USER_AGENT)
|
||||||
|
page = await context.new_page()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Login
|
||||||
|
await page.goto(url, wait_until="networkidle", timeout=30000)
|
||||||
|
await page.wait_for_timeout(2000)
|
||||||
|
|
||||||
|
login_ok = await _try_login(
|
||||||
|
page, username, password,
|
||||||
|
username_selector, password_selector, submit_selector,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not login_ok:
|
||||||
|
result.login_error = "Login fehlgeschlagen — Formular nicht gefunden oder Credentials falsch"
|
||||||
|
await context.close()
|
||||||
|
await browser.close()
|
||||||
|
return result
|
||||||
|
|
||||||
|
result.authenticated = True
|
||||||
|
await page.wait_for_timeout(3000)
|
||||||
|
|
||||||
|
# Step 2: Check cancellation (§312k BGB)
|
||||||
|
result.cancel_subscription = await _check_patterns(page, CANCEL_PATTERNS, "cancel")
|
||||||
|
logger.info("Cancel check: found=%s", result.cancel_subscription.found)
|
||||||
|
|
||||||
|
# Step 3: Check delete account (Art. 17 DSGVO)
|
||||||
|
result.delete_account = await _check_patterns(page, DELETE_PATTERNS, "delete")
|
||||||
|
|
||||||
|
# Step 4: Check data export (Art. 20 DSGVO)
|
||||||
|
result.export_data = await _check_patterns(page, EXPORT_PATTERNS, "export")
|
||||||
|
|
||||||
|
# Step 5: Check consent settings (Art. 7(3) DSGVO)
|
||||||
|
result.consent_settings = await _check_patterns(page, CONSENT_PATTERNS, "consent")
|
||||||
|
|
||||||
|
# Step 6: Check profile visibility (Art. 15 DSGVO)
|
||||||
|
result.profile_visible = await _check_patterns(page, PROFILE_PATTERNS, "profile")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Authenticated test failed: %s", e)
|
||||||
|
result.login_error = str(e)
|
||||||
|
finally:
|
||||||
|
# CRITICAL: Destroy context — wipes all credentials, cookies, session
|
||||||
|
await context.close()
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def _try_login(
|
||||||
|
page: Page, username: str, password: str,
|
||||||
|
user_sel: str, pass_sel: str, submit_sel: str,
|
||||||
|
) -> bool:
|
||||||
|
"""Attempt to fill and submit login form."""
|
||||||
|
try:
|
||||||
|
# Auto-detect selectors if not provided
|
||||||
|
if not user_sel:
|
||||||
|
for sel in ['input[type="email"]', 'input[name="email"]', 'input[name="username"]',
|
||||||
|
'input[name="login"]', 'input[id="email"]', 'input[id="username"]']:
|
||||||
|
if await page.locator(sel).count() > 0:
|
||||||
|
user_sel = sel
|
||||||
|
break
|
||||||
|
if not pass_sel:
|
||||||
|
for sel in ['input[type="password"]', 'input[name="password"]', 'input[id="password"]']:
|
||||||
|
if await page.locator(sel).count() > 0:
|
||||||
|
pass_sel = sel
|
||||||
|
break
|
||||||
|
if not submit_sel:
|
||||||
|
for sel in ['button[type="submit"]', 'input[type="submit"]',
|
||||||
|
'button:has-text("Anmelden")', 'button:has-text("Login")',
|
||||||
|
'button:has-text("Sign in")', 'button:has-text("Einloggen")']:
|
||||||
|
if await page.locator(sel).count() > 0:
|
||||||
|
submit_sel = sel
|
||||||
|
break
|
||||||
|
|
||||||
|
if not user_sel or not pass_sel:
|
||||||
|
return False
|
||||||
|
|
||||||
|
await page.fill(user_sel, username)
|
||||||
|
await page.fill(pass_sel, password)
|
||||||
|
|
||||||
|
if submit_sel:
|
||||||
|
await page.click(submit_sel)
|
||||||
|
else:
|
||||||
|
await page.press(pass_sel, "Enter")
|
||||||
|
|
||||||
|
await page.wait_for_timeout(5000)
|
||||||
|
|
||||||
|
# Check if login succeeded (URL changed or login form disappeared)
|
||||||
|
still_on_login = await page.locator('input[type="password"]').count() > 0
|
||||||
|
return not still_on_login
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Login attempt failed: %s", e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _check_patterns(page: Page, patterns: list[str], check_name: str) -> CheckResult:
|
||||||
|
"""Search current page and navigation for patterns."""
|
||||||
|
result = CheckResult()
|
||||||
|
|
||||||
|
# Check current page text
|
||||||
|
for pattern in patterns:
|
||||||
|
try:
|
||||||
|
locator = page.get_by_text(pattern, exact=False)
|
||||||
|
count = await locator.count()
|
||||||
|
if count > 0:
|
||||||
|
text = await locator.first.text_content()
|
||||||
|
result.found = True
|
||||||
|
result.text = (text or "").strip()[:100]
|
||||||
|
return result
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check links/buttons
|
||||||
|
for pattern in patterns:
|
||||||
|
try:
|
||||||
|
for sel in [f'a:has-text("{pattern}")', f'button:has-text("{pattern}")',
|
||||||
|
f'[href*="{pattern.replace(" ", "-")}"]']:
|
||||||
|
locator = page.locator(sel)
|
||||||
|
if await locator.count() > 0:
|
||||||
|
result.found = True
|
||||||
|
result.selector = sel
|
||||||
|
result.text = pattern
|
||||||
|
return result
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check navigation menus (common locations for account management)
|
||||||
|
for nav_sel in ['nav', '[role="navigation"]', '.sidebar', '.account-menu', '#account']:
|
||||||
|
try:
|
||||||
|
nav = page.locator(nav_sel)
|
||||||
|
if await nav.count() > 0:
|
||||||
|
nav_text = (await nav.first.text_content() or "").lower()
|
||||||
|
for pattern in patterns:
|
||||||
|
if pattern.lower() in nav_text:
|
||||||
|
result.found = True
|
||||||
|
result.text = f"In Navigation: {pattern}"
|
||||||
|
return result
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return result
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
"""
|
||||||
|
Banner Detector — identifies Consent Management Platforms and their buttons.
|
||||||
|
|
||||||
|
Supports 10+ CMPs with specific selectors + generic fallback.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from playwright.async_api import Page, Locator
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BannerInfo:
|
||||||
|
detected: bool
|
||||||
|
provider: str
|
||||||
|
accept_selector: str
|
||||||
|
reject_selector: str
|
||||||
|
|
||||||
|
|
||||||
|
# CMP-specific selectors (ordered by market share)
|
||||||
|
CMP_SELECTORS = [
|
||||||
|
{
|
||||||
|
"name": "Didomi",
|
||||||
|
"detect": "#didomi-host, [class*='didomi']",
|
||||||
|
"accept": "#didomi-notice-agree-button",
|
||||||
|
"reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "OneTrust",
|
||||||
|
"detect": "#onetrust-banner-sdk, [class*='onetrust']",
|
||||||
|
"accept": "#onetrust-accept-btn-handler",
|
||||||
|
"reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Cookiebot",
|
||||||
|
"detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']",
|
||||||
|
"accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
||||||
|
"reject": "#CybotCookiebotDialogBodyButtonDecline",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Usercentrics",
|
||||||
|
"detect": "#usercentrics-root, [data-testid='uc-banner']",
|
||||||
|
"accept": "[data-testid='uc-accept-all-button']",
|
||||||
|
"reject": "[data-testid='uc-deny-all-button']",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CookieYes",
|
||||||
|
"detect": ".cky-consent-container, [class*='cky-']",
|
||||||
|
"accept": ".cky-btn-accept",
|
||||||
|
"reject": ".cky-btn-reject, .cky-btn-customize",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Quantcast",
|
||||||
|
"detect": ".qc-cmp2-container, [class*='qc-cmp']",
|
||||||
|
"accept": "[class*='qc-cmp2-summary-buttons'] button:first-child",
|
||||||
|
"reject": "[class*='qc-cmp2-summary-buttons'] button:last-child",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Borlabs",
|
||||||
|
"detect": "#BorlabsCookieBox, [class*='BorlabsCookie']",
|
||||||
|
"accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]",
|
||||||
|
"reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Consentmanager",
|
||||||
|
"detect": "#cmpbox, [class*='cmpbox']",
|
||||||
|
"accept": ".cmpboxbtn.cmpboxbtnyes",
|
||||||
|
"reject": ".cmpboxbtn.cmpboxbtnno",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Klaro",
|
||||||
|
"detect": ".klaro, [class*='klaro']",
|
||||||
|
"accept": ".klaro .cm-btn-accept",
|
||||||
|
"reject": ".klaro .cm-btn-decline",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "TarteAuCitron",
|
||||||
|
"detect": "#tarteaucitronRoot, [class*='tarteaucitron']",
|
||||||
|
"accept": "#tarteaucitronPersonalize2",
|
||||||
|
"reject": "#tarteaucitronAllDenied2",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Generic fallback patterns (text-based)
|
||||||
|
GENERIC_ACCEPT_TEXTS = [
|
||||||
|
"Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren",
|
||||||
|
"Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen",
|
||||||
|
"Einverstanden", "Ich stimme zu", "Ja, einverstanden",
|
||||||
|
]
|
||||||
|
|
||||||
|
GENERIC_REJECT_TEXTS = [
|
||||||
|
"Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen",
|
||||||
|
"Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige",
|
||||||
|
"Decline", "Nein", "Nicht einverstanden",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def detect_banner(page: Page) -> BannerInfo:
|
||||||
|
"""Detect which CMP is used and return button selectors."""
|
||||||
|
# Try CMP-specific selectors first
|
||||||
|
for cmp in CMP_SELECTORS:
|
||||||
|
try:
|
||||||
|
count = await page.locator(cmp["detect"]).count()
|
||||||
|
if count > 0:
|
||||||
|
return BannerInfo(
|
||||||
|
detected=True,
|
||||||
|
provider=cmp["name"],
|
||||||
|
accept_selector=cmp["accept"],
|
||||||
|
reject_selector=cmp["reject"],
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Generic fallback — search for buttons by text
|
||||||
|
for text in GENERIC_ACCEPT_TEXTS:
|
||||||
|
try:
|
||||||
|
btn = page.get_by_text(text, exact=False)
|
||||||
|
if await btn.count() > 0:
|
||||||
|
accept = f'button:has-text("{text}")'
|
||||||
|
# Try to find reject button nearby
|
||||||
|
reject = ""
|
||||||
|
for rtext in GENERIC_REJECT_TEXTS:
|
||||||
|
rbtn = page.get_by_text(rtext, exact=False)
|
||||||
|
if await rbtn.count() > 0:
|
||||||
|
reject = f'button:has-text("{rtext}")'
|
||||||
|
break
|
||||||
|
return BannerInfo(
|
||||||
|
detected=True,
|
||||||
|
provider="Generic",
|
||||||
|
accept_selector=accept,
|
||||||
|
reject_selector=reject,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
|
||||||
|
|
||||||
|
|
||||||
|
async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
|
||||||
|
"""Try to click a consent button. Returns True if clicked successfully."""
|
||||||
|
if not selector:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
locator = page.locator(selector).first
|
||||||
|
await locator.wait_for(state="visible", timeout=timeout)
|
||||||
|
await locator.click()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
@@ -0,0 +1,278 @@
|
|||||||
|
"""
|
||||||
|
Category Tester — tests individual cookie consent categories.
|
||||||
|
|
||||||
|
Tests each category in isolation: only "Statistics" on, only "Marketing" on, etc.
|
||||||
|
Detects miscategorization: e.g., Facebook Pixel loading when only Statistics is enabled.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from playwright.async_api import Page, BrowserContext
|
||||||
|
|
||||||
|
from services.banner_detector import BannerInfo, click_button
|
||||||
|
from services.script_analyzer import find_tracking_services, Violation
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Which tracking service belongs to which consent category
|
||||||
|
SERVICE_CATEGORY_MAP: dict[str, str] = {
|
||||||
|
# Statistics / Analytics
|
||||||
|
"Google Analytics": "statistics",
|
||||||
|
"Matomo": "statistics",
|
||||||
|
"Plausible Analytics": "statistics",
|
||||||
|
"Hotjar": "statistics",
|
||||||
|
"Microsoft Clarity": "statistics",
|
||||||
|
"etracker": "statistics",
|
||||||
|
"Heap Analytics": "statistics",
|
||||||
|
"Amplitude": "statistics",
|
||||||
|
"Mixpanel": "statistics",
|
||||||
|
"PostHog": "statistics",
|
||||||
|
"Mouseflow": "statistics",
|
||||||
|
"Crazy Egg": "statistics",
|
||||||
|
"Lucky Orange": "statistics",
|
||||||
|
"FullStory": "statistics",
|
||||||
|
# Marketing / Advertising
|
||||||
|
"Meta/Facebook Pixel": "marketing",
|
||||||
|
"Google Ads": "marketing",
|
||||||
|
"Google Ads/DoubleClick": "marketing",
|
||||||
|
"TikTok Pixel": "marketing",
|
||||||
|
"LinkedIn Insight": "marketing",
|
||||||
|
"Pinterest Tag": "marketing",
|
||||||
|
"Criteo": "marketing",
|
||||||
|
"Taboola": "marketing",
|
||||||
|
"Outbrain": "marketing",
|
||||||
|
"Amazon Ads": "marketing",
|
||||||
|
"Bing/Microsoft Ads": "marketing",
|
||||||
|
"Salesforce Pardot": "marketing",
|
||||||
|
# Functional
|
||||||
|
"Intercom": "functional",
|
||||||
|
"Zendesk": "functional",
|
||||||
|
"Tidio Chat": "functional",
|
||||||
|
"Crisp Chat": "functional",
|
||||||
|
"LiveChat": "functional",
|
||||||
|
"Freshdesk/Freshchat": "functional",
|
||||||
|
"HelpScout Beacon": "functional",
|
||||||
|
}
|
||||||
|
|
||||||
|
CATEGORY_LABELS = {
|
||||||
|
"statistics": "Statistik / Analytics",
|
||||||
|
"marketing": "Marketing / Werbung",
|
||||||
|
"functional": "Funktional / Komfort",
|
||||||
|
"social_media": "Social Media",
|
||||||
|
}
|
||||||
|
|
||||||
|
# CMP-specific category selectors
|
||||||
|
CMP_CATEGORY_CONFIG: dict[str, dict] = {
|
||||||
|
"Cookiebot": {
|
||||||
|
"settings_button": "#CybotCookiebotDialogBodyButtonDetails",
|
||||||
|
"save_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowallSelection",
|
||||||
|
"categories": {
|
||||||
|
"statistics": "#CybotCookiebotDialogBodyLevelButtonStatistics",
|
||||||
|
"marketing": "#CybotCookiebotDialogBodyLevelButtonMarketing",
|
||||||
|
"preferences": "#CybotCookiebotDialogBodyLevelButtonPreferences",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"OneTrust": {
|
||||||
|
"settings_button": "#onetrust-pc-btn-handler, .ot-sdk-show-settings",
|
||||||
|
"save_button": ".save-preference-btn-handler, #onetrust-accept-btn-handler",
|
||||||
|
"categories": {
|
||||||
|
"statistics": ".ot-switch[data-ot-category='C0002'] input, #ot-group-id-C0002",
|
||||||
|
"marketing": ".ot-switch[data-ot-category='C0004'] input, #ot-group-id-C0004",
|
||||||
|
"functional": ".ot-switch[data-ot-category='C0003'] input, #ot-group-id-C0003",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"Usercentrics": {
|
||||||
|
"settings_button": "[data-testid='uc-more-information-button'], button:has-text('Mehr Informationen')",
|
||||||
|
"save_button": "[data-testid='uc-save-button']",
|
||||||
|
"categories": {
|
||||||
|
"statistics": "[data-testid='uc-category-statistics'] input",
|
||||||
|
"marketing": "[data-testid='uc-category-marketing'] input",
|
||||||
|
"functional": "[data-testid='uc-category-functional'] input",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"Didomi": {
|
||||||
|
"settings_button": "#didomi-notice-learn-more-button, .didomi-learn-more-button",
|
||||||
|
"save_button": ".didomi-components-button--primary:has-text('Auswahl speichern'), #didomi-notice-agree-button",
|
||||||
|
"categories": {
|
||||||
|
"statistics": "[data-purpose='analytics_purposes'] input, [data-purpose='measure'] input",
|
||||||
|
"marketing": "[data-purpose='advertising_purposes'] input, [data-purpose='ads'] input",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generic category keywords for fallback detection
|
||||||
|
CATEGORY_KEYWORDS = {
|
||||||
|
"statistics": ["statistik", "analytics", "analyse", "statistics", "messung", "reichweite"],
|
||||||
|
"marketing": ["marketing", "werbung", "advertising", "targeting", "remarketing", "anzeigen"],
|
||||||
|
"functional": ["funktional", "functional", "preferences", "praeferenz", "komfort", "einstellungen"],
|
||||||
|
"social_media": ["social media", "soziale medien", "social", "teilen"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CategoryInfo:
|
||||||
|
name: str
|
||||||
|
label: str
|
||||||
|
selector: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CategoryTestResult:
|
||||||
|
category: str
|
||||||
|
category_label: str
|
||||||
|
scripts_loaded: list[str] = field(default_factory=list)
|
||||||
|
cookies_set: list[str] = field(default_factory=list)
|
||||||
|
tracking_services: list[str] = field(default_factory=list)
|
||||||
|
violations: list[dict] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
async def detect_categories(page: Page, banner: BannerInfo) -> list[CategoryInfo]:
|
||||||
|
"""Detect available cookie categories in the CMP."""
|
||||||
|
categories = []
|
||||||
|
provider = banner.provider
|
||||||
|
|
||||||
|
# CMP-specific detection
|
||||||
|
config = CMP_CATEGORY_CONFIG.get(provider)
|
||||||
|
if config:
|
||||||
|
# Open settings panel first
|
||||||
|
if config.get("settings_button"):
|
||||||
|
await click_button(page, config["settings_button"], timeout=3000)
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
for cat_name, selector in config.get("categories", {}).items():
|
||||||
|
try:
|
||||||
|
if await page.locator(selector.split(",")[0].strip()).count() > 0:
|
||||||
|
categories.append(CategoryInfo(
|
||||||
|
name=cat_name,
|
||||||
|
label=CATEGORY_LABELS.get(cat_name, cat_name),
|
||||||
|
selector=selector,
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Generic fallback: search for toggle/checkbox elements with category keywords
|
||||||
|
if not categories:
|
||||||
|
try:
|
||||||
|
toggles = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const elements = document.querySelectorAll(
|
||||||
|
'input[type="checkbox"], [role="switch"], [class*="toggle"], [class*="switch"]'
|
||||||
|
);
|
||||||
|
return [...elements].map(el => ({
|
||||||
|
text: (el.closest('label')?.textContent || el.getAttribute('aria-label') || '').trim(),
|
||||||
|
id: el.id || '',
|
||||||
|
selector: el.id ? '#' + el.id : '',
|
||||||
|
})).filter(e => e.text.length > 0);
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
for toggle in (toggles or []):
|
||||||
|
text_lower = toggle["text"].lower()
|
||||||
|
for cat_name, keywords in CATEGORY_KEYWORDS.items():
|
||||||
|
if any(kw in text_lower for kw in keywords):
|
||||||
|
sel = toggle["selector"] or f'[aria-label*="{toggle["text"][:20]}"]'
|
||||||
|
categories.append(CategoryInfo(
|
||||||
|
name=cat_name,
|
||||||
|
label=toggle["text"][:50],
|
||||||
|
selector=sel,
|
||||||
|
))
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Generic category detection failed: %s", e)
|
||||||
|
|
||||||
|
logger.info("Detected %d categories for %s", len(categories), provider)
|
||||||
|
return categories
|
||||||
|
|
||||||
|
|
||||||
|
async def test_single_category(
|
||||||
|
context: BrowserContext,
|
||||||
|
url: str,
|
||||||
|
category: CategoryInfo,
|
||||||
|
banner: BannerInfo,
|
||||||
|
wait_ms: int = 5000,
|
||||||
|
) -> CategoryTestResult:
|
||||||
|
"""Test a single category in isolation: enable only this one, disable others."""
|
||||||
|
result = CategoryTestResult(
|
||||||
|
category=category.name,
|
||||||
|
category_label=category.label,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
page = await context.new_page()
|
||||||
|
scripts: list[str] = []
|
||||||
|
page.on("request", lambda req: _collect(req, scripts))
|
||||||
|
|
||||||
|
await page.goto(url, wait_until="networkidle", timeout=20000)
|
||||||
|
await page.wait_for_timeout(2000)
|
||||||
|
|
||||||
|
config = CMP_CATEGORY_CONFIG.get(banner.provider)
|
||||||
|
|
||||||
|
if config:
|
||||||
|
# Open settings
|
||||||
|
if config.get("settings_button"):
|
||||||
|
await click_button(page, config["settings_button"], timeout=3000)
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
# Disable ALL categories first
|
||||||
|
for cat_sel in config.get("categories", {}).values():
|
||||||
|
try:
|
||||||
|
el = page.locator(cat_sel.split(",")[0].strip()).first
|
||||||
|
if await el.is_checked():
|
||||||
|
await el.click()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Enable ONLY the target category
|
||||||
|
try:
|
||||||
|
el = page.locator(category.selector.split(",")[0].strip()).first
|
||||||
|
if not await el.is_checked():
|
||||||
|
await el.click()
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Could not toggle category %s", category.name)
|
||||||
|
|
||||||
|
# Save selection
|
||||||
|
if config.get("save_button"):
|
||||||
|
await click_button(page, config["save_button"], timeout=3000)
|
||||||
|
|
||||||
|
await page.wait_for_timeout(wait_ms)
|
||||||
|
|
||||||
|
# Collect results
|
||||||
|
result.scripts_loaded = _dedup_scripts(scripts)
|
||||||
|
result.cookies_set = [c.get("name", "") for c in await context.cookies()]
|
||||||
|
result.tracking_services = find_tracking_services(result.scripts_loaded)
|
||||||
|
|
||||||
|
# Find violations: services that don't belong to this category
|
||||||
|
for service in result.tracking_services:
|
||||||
|
expected_cat = SERVICE_CATEGORY_MAP.get(service)
|
||||||
|
if expected_cat and expected_cat != category.name:
|
||||||
|
result.violations.append({
|
||||||
|
"service": service,
|
||||||
|
"severity": "HIGH",
|
||||||
|
"text": f"{service} laedt bei '{category.label}' — gehoert aber zu '{CATEGORY_LABELS.get(expected_cat, expected_cat)}'",
|
||||||
|
"expected_category": expected_cat,
|
||||||
|
"actual_category": category.name,
|
||||||
|
})
|
||||||
|
|
||||||
|
await page.close()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Category test failed for %s: %s", category.name, e)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _collect(request, scripts: list[str]):
|
||||||
|
if request.resource_type in ("script", "image", "xhr", "fetch"):
|
||||||
|
scripts.append(request.url)
|
||||||
|
|
||||||
|
|
||||||
|
def _dedup_scripts(scripts: list[str]) -> list[str]:
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for url in scripts:
|
||||||
|
domain = url.split("/")[2] if len(url.split("/")) > 2 else url
|
||||||
|
if domain not in seen:
|
||||||
|
seen.add(domain)
|
||||||
|
result.append(url)
|
||||||
|
return result[:30]
|
||||||
@@ -0,0 +1,157 @@
|
|||||||
|
"""
|
||||||
|
Script Analyzer — classifies detected scripts and cookies against known services.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
SERVICE_PATTERNS: dict[str, dict] = {
|
||||||
|
r"google.?analytics|gtag|UA-\d|G-\w{5}": {
|
||||||
|
"name": "Google Analytics", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
r"googletagmanager|gtm\.js": {
|
||||||
|
"name": "Google Tag Manager", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG",
|
||||||
|
},
|
||||||
|
r"facebook\.net|fbevents|fbq": {
|
||||||
|
"name": "Meta/Facebook Pixel", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
r"hotjar\.com|_hjSettings": {
|
||||||
|
"name": "Hotjar", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG (Session Recording)",
|
||||||
|
},
|
||||||
|
r"clarity\.ms": {
|
||||||
|
"name": "Microsoft Clarity", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG (Session Replay)",
|
||||||
|
},
|
||||||
|
r"tiktok\.com/i18n|analytics\.tiktok": {
|
||||||
|
"name": "TikTok Pixel", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG, Drittlandtransfer China",
|
||||||
|
},
|
||||||
|
r"linkedin\.com/insight|snap\.licdn": {
|
||||||
|
"name": "LinkedIn Insight", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
r"pinterest\.com/ct|pinimg\.com/ct": {
|
||||||
|
"name": "Pinterest Tag", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG",
|
||||||
|
},
|
||||||
|
r"criteo\.com|criteo\.net": {
|
||||||
|
"name": "Criteo", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG",
|
||||||
|
},
|
||||||
|
r"doubleclick\.net|googlesyndication": {
|
||||||
|
"name": "Google Ads/DoubleClick", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
r"fonts\.googleapis\.com|fonts\.gstatic": {
|
||||||
|
"name": "Google Fonts", "requires_consent": True,
|
||||||
|
"legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
|
||||||
|
},
|
||||||
|
r"recaptcha|grecaptcha": {
|
||||||
|
"name": "Google reCAPTCHA", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG",
|
||||||
|
},
|
||||||
|
r"youtube\.com/embed|ytimg": {
|
||||||
|
"name": "YouTube", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
r"maps\.googleapis|maps\.google": {
|
||||||
|
"name": "Google Maps", "requires_consent": True,
|
||||||
|
"legal_ref": "§25 TDDDG",
|
||||||
|
},
|
||||||
|
r"intercom\.io|intercomcdn": {
|
||||||
|
"name": "Intercom", "requires_consent": True,
|
||||||
|
"legal_ref": "Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
r"zendesk\.com|zdassets": {
|
||||||
|
"name": "Zendesk", "requires_consent": True,
|
||||||
|
"legal_ref": "Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
r"sentry\.io|sentry-cdn": {
|
||||||
|
"name": "Sentry", "requires_consent": False,
|
||||||
|
"legal_ref": "Berechtigtes Interesse (Error Tracking)",
|
||||||
|
},
|
||||||
|
r"cdn\.cloudflare\.com": {
|
||||||
|
"name": "Cloudflare CDN", "requires_consent": False,
|
||||||
|
"legal_ref": "Berechtigtes Interesse (CDN)",
|
||||||
|
},
|
||||||
|
r"didomi|cookiebot|onetrust|usercentrics|consentmanager": {
|
||||||
|
"name": "Consent Management", "requires_consent": False,
|
||||||
|
"legal_ref": "Notwendig (CMP)",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Violation:
|
||||||
|
service: str
|
||||||
|
severity: str # "HIGH", "CRITICAL"
|
||||||
|
text: str
|
||||||
|
legal_ref: str
|
||||||
|
|
||||||
|
|
||||||
|
def classify_scripts(scripts: list[str]) -> list[str]:
|
||||||
|
"""Classify script URLs into known service names."""
|
||||||
|
services = set()
|
||||||
|
for script in scripts:
|
||||||
|
for pattern, meta in SERVICE_PATTERNS.items():
|
||||||
|
if re.search(pattern, script, re.IGNORECASE):
|
||||||
|
services.add(meta["name"])
|
||||||
|
break
|
||||||
|
return sorted(services)
|
||||||
|
|
||||||
|
|
||||||
|
def find_tracking_services(scripts: list[str]) -> list[str]:
|
||||||
|
"""Find services that require consent."""
|
||||||
|
tracking = []
|
||||||
|
for script in scripts:
|
||||||
|
for pattern, meta in SERVICE_PATTERNS.items():
|
||||||
|
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
|
||||||
|
tracking.append(meta["name"])
|
||||||
|
break
|
||||||
|
return sorted(set(tracking))
|
||||||
|
|
||||||
|
|
||||||
|
def find_violations_before_consent(scripts: list[str]) -> list[Violation]:
|
||||||
|
"""Find tracking scripts that load without consent (HIGH)."""
|
||||||
|
violations = []
|
||||||
|
seen = set()
|
||||||
|
for script in scripts:
|
||||||
|
for pattern, meta in SERVICE_PATTERNS.items():
|
||||||
|
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
|
||||||
|
name = meta["name"]
|
||||||
|
if name not in seen:
|
||||||
|
seen.add(name)
|
||||||
|
violations.append(Violation(
|
||||||
|
service=name, severity="HIGH",
|
||||||
|
text=f"{name} laedt OHNE vorherige Einwilligung",
|
||||||
|
legal_ref=meta["legal_ref"],
|
||||||
|
))
|
||||||
|
break
|
||||||
|
return violations
|
||||||
|
|
||||||
|
|
||||||
|
def find_violations_after_reject(
|
||||||
|
before_scripts: list[str], after_scripts: list[str],
|
||||||
|
) -> list[Violation]:
|
||||||
|
"""Find tracking scripts that still load after rejection (CRITICAL)."""
|
||||||
|
violations = []
|
||||||
|
after_tracking = find_tracking_services(after_scripts)
|
||||||
|
before_tracking = find_tracking_services(before_scripts)
|
||||||
|
|
||||||
|
for service in after_tracking:
|
||||||
|
if service in before_tracking:
|
||||||
|
# Was already loading before AND still loads after reject = CRITICAL
|
||||||
|
for pattern, meta in SERVICE_PATTERNS.items():
|
||||||
|
if meta["name"] == service:
|
||||||
|
violations.append(Violation(
|
||||||
|
service=service, severity="CRITICAL",
|
||||||
|
text=f"{service} laedt TROTZ Ablehnung — moegliches Dark Pattern",
|
||||||
|
legal_ref=meta["legal_ref"] + ", Art. 5(3) ePrivacy",
|
||||||
|
))
|
||||||
|
break
|
||||||
|
|
||||||
|
return violations
|
||||||
Reference in New Issue
Block a user