diff --git a/consent-tester/services/authenticated_scanner.py b/consent-tester/services/authenticated_scanner.py new file mode 100644 index 0000000..58d8fe7 --- /dev/null +++ b/consent-tester/services/authenticated_scanner.py @@ -0,0 +1,230 @@ +""" +Authenticated Scanner — tests post-login functionality. + +Checks §312k BGB (cancellation), Art. 17 (deletion), Art. 20 (export), +Art. 7(3) (consent withdrawal), Art. 15 (data access). + +Credentials are NEVER stored, logged, or transmitted beyond the browser context. +""" + +import logging +from dataclasses import dataclass, field + +from playwright.async_api import async_playwright, Page + +logger = logging.getLogger(__name__) + +USER_AGENT = ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +) + + +@dataclass +class CheckResult: + found: bool = False + selector: str = "" + text: str = "" + clicks_needed: int = 0 + screenshot: bytes = b"" + + +@dataclass +class AuthTestResult: + authenticated: bool = False + login_error: str = "" + cancel_subscription: CheckResult = field(default_factory=CheckResult) + delete_account: CheckResult = field(default_factory=CheckResult) + export_data: CheckResult = field(default_factory=CheckResult) + consent_settings: CheckResult = field(default_factory=CheckResult) + profile_visible: CheckResult = field(default_factory=CheckResult) + + +# Search patterns for each check (DE + EN) +CANCEL_PATTERNS = [ + "kündigen", "kuendigen", "vertrag beenden", "abo beenden", + "mitgliedschaft kündigen", "cancel subscription", "unsubscribe", + "cancel membership", "vertrag kündigen", +] + +DELETE_PATTERNS = [ + "konto löschen", "konto loeschen", "account löschen", "delete account", + "account deaktivieren", "profil löschen", "remove account", +] + +EXPORT_PATTERNS = [ + "daten exportieren", "daten herunterladen", "export data", "download data", + "meine daten", "datenauskunft", "data download", "daten anfordern", +] + +CONSENT_PATTERNS = [ + "einwilligung", "einstellungen", "datenschutz-einstellungen", + "consent", "privacy settings", "cookie-einstellungen", + "werbeeinstellungen", "marketing preferences", +] + +PROFILE_PATTERNS = [ + "profil", "mein konto", "kontodaten", "persönliche daten", + "profile", "my account", "account settings", "personal data", +] + + +async def run_authenticated_test( + url: str, + username: str, + password: str, + username_selector: str = "", + password_selector: str = "", + submit_selector: str = "", +) -> AuthTestResult: + """Run authenticated area test. Credentials are destroyed after test.""" + result = AuthTestResult() + + async with async_playwright() as p: + browser = await p.chromium.launch( + headless=True, + args=["--no-sandbox", "--disable-dev-shm-usage"], + ) + context = await browser.new_context(user_agent=USER_AGENT) + page = await context.new_page() + + try: + # Step 1: Login + await page.goto(url, wait_until="networkidle", timeout=30000) + await page.wait_for_timeout(2000) + + login_ok = await _try_login( + page, username, password, + username_selector, password_selector, submit_selector, + ) + + if not login_ok: + result.login_error = "Login fehlgeschlagen — Formular nicht gefunden oder Credentials falsch" + await context.close() + await browser.close() + return result + + result.authenticated = True + await page.wait_for_timeout(3000) + + # Step 2: Check cancellation (§312k BGB) + result.cancel_subscription = await _check_patterns(page, CANCEL_PATTERNS, "cancel") + logger.info("Cancel check: found=%s", result.cancel_subscription.found) + + # Step 3: Check delete account (Art. 17 DSGVO) + result.delete_account = await _check_patterns(page, DELETE_PATTERNS, "delete") + + # Step 4: Check data export (Art. 20 DSGVO) + result.export_data = await _check_patterns(page, EXPORT_PATTERNS, "export") + + # Step 5: Check consent settings (Art. 7(3) DSGVO) + result.consent_settings = await _check_patterns(page, CONSENT_PATTERNS, "consent") + + # Step 6: Check profile visibility (Art. 15 DSGVO) + result.profile_visible = await _check_patterns(page, PROFILE_PATTERNS, "profile") + + except Exception as e: + logger.error("Authenticated test failed: %s", e) + result.login_error = str(e) + finally: + # CRITICAL: Destroy context — wipes all credentials, cookies, session + await context.close() + await browser.close() + + return result + + +async def _try_login( + page: Page, username: str, password: str, + user_sel: str, pass_sel: str, submit_sel: str, +) -> bool: + """Attempt to fill and submit login form.""" + try: + # Auto-detect selectors if not provided + if not user_sel: + for sel in ['input[type="email"]', 'input[name="email"]', 'input[name="username"]', + 'input[name="login"]', 'input[id="email"]', 'input[id="username"]']: + if await page.locator(sel).count() > 0: + user_sel = sel + break + if not pass_sel: + for sel in ['input[type="password"]', 'input[name="password"]', 'input[id="password"]']: + if await page.locator(sel).count() > 0: + pass_sel = sel + break + if not submit_sel: + for sel in ['button[type="submit"]', 'input[type="submit"]', + 'button:has-text("Anmelden")', 'button:has-text("Login")', + 'button:has-text("Sign in")', 'button:has-text("Einloggen")']: + if await page.locator(sel).count() > 0: + submit_sel = sel + break + + if not user_sel or not pass_sel: + return False + + await page.fill(user_sel, username) + await page.fill(pass_sel, password) + + if submit_sel: + await page.click(submit_sel) + else: + await page.press(pass_sel, "Enter") + + await page.wait_for_timeout(5000) + + # Check if login succeeded (URL changed or login form disappeared) + still_on_login = await page.locator('input[type="password"]').count() > 0 + return not still_on_login + + except Exception as e: + logger.warning("Login attempt failed: %s", e) + return False + + +async def _check_patterns(page: Page, patterns: list[str], check_name: str) -> CheckResult: + """Search current page and navigation for patterns.""" + result = CheckResult() + + # Check current page text + for pattern in patterns: + try: + locator = page.get_by_text(pattern, exact=False) + count = await locator.count() + if count > 0: + text = await locator.first.text_content() + result.found = True + result.text = (text or "").strip()[:100] + return result + except Exception: + continue + + # Check links/buttons + for pattern in patterns: + try: + for sel in [f'a:has-text("{pattern}")', f'button:has-text("{pattern}")', + f'[href*="{pattern.replace(" ", "-")}"]']: + locator = page.locator(sel) + if await locator.count() > 0: + result.found = True + result.selector = sel + result.text = pattern + return result + except Exception: + continue + + # Check navigation menus (common locations for account management) + for nav_sel in ['nav', '[role="navigation"]', '.sidebar', '.account-menu', '#account']: + try: + nav = page.locator(nav_sel) + if await nav.count() > 0: + nav_text = (await nav.first.text_content() or "").lower() + for pattern in patterns: + if pattern.lower() in nav_text: + result.found = True + result.text = f"In Navigation: {pattern}" + return result + except Exception: + continue + + return result diff --git a/consent-tester/services/banner_detector.py b/consent-tester/services/banner_detector.py new file mode 100644 index 0000000..396c5dd --- /dev/null +++ b/consent-tester/services/banner_detector.py @@ -0,0 +1,149 @@ +""" +Banner Detector — identifies Consent Management Platforms and their buttons. + +Supports 10+ CMPs with specific selectors + generic fallback. +""" + +from dataclasses import dataclass + +from playwright.async_api import Page, Locator + + +@dataclass +class BannerInfo: + detected: bool + provider: str + accept_selector: str + reject_selector: str + + +# CMP-specific selectors (ordered by market share) +CMP_SELECTORS = [ + { + "name": "Didomi", + "detect": "#didomi-host, [class*='didomi']", + "accept": "#didomi-notice-agree-button", + "reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary", + }, + { + "name": "OneTrust", + "detect": "#onetrust-banner-sdk, [class*='onetrust']", + "accept": "#onetrust-accept-btn-handler", + "reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler", + }, + { + "name": "Cookiebot", + "detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']", + "accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll", + "reject": "#CybotCookiebotDialogBodyButtonDecline", + }, + { + "name": "Usercentrics", + "detect": "#usercentrics-root, [data-testid='uc-banner']", + "accept": "[data-testid='uc-accept-all-button']", + "reject": "[data-testid='uc-deny-all-button']", + }, + { + "name": "CookieYes", + "detect": ".cky-consent-container, [class*='cky-']", + "accept": ".cky-btn-accept", + "reject": ".cky-btn-reject, .cky-btn-customize", + }, + { + "name": "Quantcast", + "detect": ".qc-cmp2-container, [class*='qc-cmp']", + "accept": "[class*='qc-cmp2-summary-buttons'] button:first-child", + "reject": "[class*='qc-cmp2-summary-buttons'] button:last-child", + }, + { + "name": "Borlabs", + "detect": "#BorlabsCookieBox, [class*='BorlabsCookie']", + "accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]", + "reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]", + }, + { + "name": "Consentmanager", + "detect": "#cmpbox, [class*='cmpbox']", + "accept": ".cmpboxbtn.cmpboxbtnyes", + "reject": ".cmpboxbtn.cmpboxbtnno", + }, + { + "name": "Klaro", + "detect": ".klaro, [class*='klaro']", + "accept": ".klaro .cm-btn-accept", + "reject": ".klaro .cm-btn-decline", + }, + { + "name": "TarteAuCitron", + "detect": "#tarteaucitronRoot, [class*='tarteaucitron']", + "accept": "#tarteaucitronPersonalize2", + "reject": "#tarteaucitronAllDenied2", + }, +] + +# Generic fallback patterns (text-based) +GENERIC_ACCEPT_TEXTS = [ + "Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren", + "Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen", + "Einverstanden", "Ich stimme zu", "Ja, einverstanden", +] + +GENERIC_REJECT_TEXTS = [ + "Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen", + "Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige", + "Decline", "Nein", "Nicht einverstanden", +] + + +async def detect_banner(page: Page) -> BannerInfo: + """Detect which CMP is used and return button selectors.""" + # Try CMP-specific selectors first + for cmp in CMP_SELECTORS: + try: + count = await page.locator(cmp["detect"]).count() + if count > 0: + return BannerInfo( + detected=True, + provider=cmp["name"], + accept_selector=cmp["accept"], + reject_selector=cmp["reject"], + ) + except Exception: + continue + + # Generic fallback — search for buttons by text + for text in GENERIC_ACCEPT_TEXTS: + try: + btn = page.get_by_text(text, exact=False) + if await btn.count() > 0: + accept = f'button:has-text("{text}")' + # Try to find reject button nearby + reject = "" + for rtext in GENERIC_REJECT_TEXTS: + rbtn = page.get_by_text(rtext, exact=False) + if await rbtn.count() > 0: + reject = f'button:has-text("{rtext}")' + break + return BannerInfo( + detected=True, + provider="Generic", + accept_selector=accept, + reject_selector=reject, + ) + except Exception: + continue + + return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="") + + +async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool: + """Try to click a consent button. Returns True if clicked successfully.""" + if not selector: + return False + try: + locator = page.locator(selector).first + await locator.wait_for(state="visible", timeout=timeout) + await locator.click() + return True + except Exception: + return False diff --git a/consent-tester/services/category_tester.py b/consent-tester/services/category_tester.py new file mode 100644 index 0000000..ddac093 --- /dev/null +++ b/consent-tester/services/category_tester.py @@ -0,0 +1,278 @@ +""" +Category Tester — tests individual cookie consent categories. + +Tests each category in isolation: only "Statistics" on, only "Marketing" on, etc. +Detects miscategorization: e.g., Facebook Pixel loading when only Statistics is enabled. +""" + +import logging +from dataclasses import dataclass, field + +from playwright.async_api import Page, BrowserContext + +from services.banner_detector import BannerInfo, click_button +from services.script_analyzer import find_tracking_services, Violation + +logger = logging.getLogger(__name__) + +# Which tracking service belongs to which consent category +SERVICE_CATEGORY_MAP: dict[str, str] = { + # Statistics / Analytics + "Google Analytics": "statistics", + "Matomo": "statistics", + "Plausible Analytics": "statistics", + "Hotjar": "statistics", + "Microsoft Clarity": "statistics", + "etracker": "statistics", + "Heap Analytics": "statistics", + "Amplitude": "statistics", + "Mixpanel": "statistics", + "PostHog": "statistics", + "Mouseflow": "statistics", + "Crazy Egg": "statistics", + "Lucky Orange": "statistics", + "FullStory": "statistics", + # Marketing / Advertising + "Meta/Facebook Pixel": "marketing", + "Google Ads": "marketing", + "Google Ads/DoubleClick": "marketing", + "TikTok Pixel": "marketing", + "LinkedIn Insight": "marketing", + "Pinterest Tag": "marketing", + "Criteo": "marketing", + "Taboola": "marketing", + "Outbrain": "marketing", + "Amazon Ads": "marketing", + "Bing/Microsoft Ads": "marketing", + "Salesforce Pardot": "marketing", + # Functional + "Intercom": "functional", + "Zendesk": "functional", + "Tidio Chat": "functional", + "Crisp Chat": "functional", + "LiveChat": "functional", + "Freshdesk/Freshchat": "functional", + "HelpScout Beacon": "functional", +} + +CATEGORY_LABELS = { + "statistics": "Statistik / Analytics", + "marketing": "Marketing / Werbung", + "functional": "Funktional / Komfort", + "social_media": "Social Media", +} + +# CMP-specific category selectors +CMP_CATEGORY_CONFIG: dict[str, dict] = { + "Cookiebot": { + "settings_button": "#CybotCookiebotDialogBodyButtonDetails", + "save_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowallSelection", + "categories": { + "statistics": "#CybotCookiebotDialogBodyLevelButtonStatistics", + "marketing": "#CybotCookiebotDialogBodyLevelButtonMarketing", + "preferences": "#CybotCookiebotDialogBodyLevelButtonPreferences", + }, + }, + "OneTrust": { + "settings_button": "#onetrust-pc-btn-handler, .ot-sdk-show-settings", + "save_button": ".save-preference-btn-handler, #onetrust-accept-btn-handler", + "categories": { + "statistics": ".ot-switch[data-ot-category='C0002'] input, #ot-group-id-C0002", + "marketing": ".ot-switch[data-ot-category='C0004'] input, #ot-group-id-C0004", + "functional": ".ot-switch[data-ot-category='C0003'] input, #ot-group-id-C0003", + }, + }, + "Usercentrics": { + "settings_button": "[data-testid='uc-more-information-button'], button:has-text('Mehr Informationen')", + "save_button": "[data-testid='uc-save-button']", + "categories": { + "statistics": "[data-testid='uc-category-statistics'] input", + "marketing": "[data-testid='uc-category-marketing'] input", + "functional": "[data-testid='uc-category-functional'] input", + }, + }, + "Didomi": { + "settings_button": "#didomi-notice-learn-more-button, .didomi-learn-more-button", + "save_button": ".didomi-components-button--primary:has-text('Auswahl speichern'), #didomi-notice-agree-button", + "categories": { + "statistics": "[data-purpose='analytics_purposes'] input, [data-purpose='measure'] input", + "marketing": "[data-purpose='advertising_purposes'] input, [data-purpose='ads'] input", + }, + }, +} + +# Generic category keywords for fallback detection +CATEGORY_KEYWORDS = { + "statistics": ["statistik", "analytics", "analyse", "statistics", "messung", "reichweite"], + "marketing": ["marketing", "werbung", "advertising", "targeting", "remarketing", "anzeigen"], + "functional": ["funktional", "functional", "preferences", "praeferenz", "komfort", "einstellungen"], + "social_media": ["social media", "soziale medien", "social", "teilen"], +} + + +@dataclass +class CategoryInfo: + name: str + label: str + selector: str + + +@dataclass +class CategoryTestResult: + category: str + category_label: str + scripts_loaded: list[str] = field(default_factory=list) + cookies_set: list[str] = field(default_factory=list) + tracking_services: list[str] = field(default_factory=list) + violations: list[dict] = field(default_factory=list) + + +async def detect_categories(page: Page, banner: BannerInfo) -> list[CategoryInfo]: + """Detect available cookie categories in the CMP.""" + categories = [] + provider = banner.provider + + # CMP-specific detection + config = CMP_CATEGORY_CONFIG.get(provider) + if config: + # Open settings panel first + if config.get("settings_button"): + await click_button(page, config["settings_button"], timeout=3000) + await page.wait_for_timeout(1000) + + for cat_name, selector in config.get("categories", {}).items(): + try: + if await page.locator(selector.split(",")[0].strip()).count() > 0: + categories.append(CategoryInfo( + name=cat_name, + label=CATEGORY_LABELS.get(cat_name, cat_name), + selector=selector, + )) + except Exception: + continue + + # Generic fallback: search for toggle/checkbox elements with category keywords + if not categories: + try: + toggles = await page.evaluate(""" + () => { + const elements = document.querySelectorAll( + 'input[type="checkbox"], [role="switch"], [class*="toggle"], [class*="switch"]' + ); + return [...elements].map(el => ({ + text: (el.closest('label')?.textContent || el.getAttribute('aria-label') || '').trim(), + id: el.id || '', + selector: el.id ? '#' + el.id : '', + })).filter(e => e.text.length > 0); + } + """) + + for toggle in (toggles or []): + text_lower = toggle["text"].lower() + for cat_name, keywords in CATEGORY_KEYWORDS.items(): + if any(kw in text_lower for kw in keywords): + sel = toggle["selector"] or f'[aria-label*="{toggle["text"][:20]}"]' + categories.append(CategoryInfo( + name=cat_name, + label=toggle["text"][:50], + selector=sel, + )) + break + except Exception as e: + logger.warning("Generic category detection failed: %s", e) + + logger.info("Detected %d categories for %s", len(categories), provider) + return categories + + +async def test_single_category( + context: BrowserContext, + url: str, + category: CategoryInfo, + banner: BannerInfo, + wait_ms: int = 5000, +) -> CategoryTestResult: + """Test a single category in isolation: enable only this one, disable others.""" + result = CategoryTestResult( + category=category.name, + category_label=category.label, + ) + + try: + page = await context.new_page() + scripts: list[str] = [] + page.on("request", lambda req: _collect(req, scripts)) + + await page.goto(url, wait_until="networkidle", timeout=20000) + await page.wait_for_timeout(2000) + + config = CMP_CATEGORY_CONFIG.get(banner.provider) + + if config: + # Open settings + if config.get("settings_button"): + await click_button(page, config["settings_button"], timeout=3000) + await page.wait_for_timeout(1000) + + # Disable ALL categories first + for cat_sel in config.get("categories", {}).values(): + try: + el = page.locator(cat_sel.split(",")[0].strip()).first + if await el.is_checked(): + await el.click() + except Exception: + continue + + # Enable ONLY the target category + try: + el = page.locator(category.selector.split(",")[0].strip()).first + if not await el.is_checked(): + await el.click() + except Exception: + logger.warning("Could not toggle category %s", category.name) + + # Save selection + if config.get("save_button"): + await click_button(page, config["save_button"], timeout=3000) + + await page.wait_for_timeout(wait_ms) + + # Collect results + result.scripts_loaded = _dedup_scripts(scripts) + result.cookies_set = [c.get("name", "") for c in await context.cookies()] + result.tracking_services = find_tracking_services(result.scripts_loaded) + + # Find violations: services that don't belong to this category + for service in result.tracking_services: + expected_cat = SERVICE_CATEGORY_MAP.get(service) + if expected_cat and expected_cat != category.name: + result.violations.append({ + "service": service, + "severity": "HIGH", + "text": f"{service} laedt bei '{category.label}' — gehoert aber zu '{CATEGORY_LABELS.get(expected_cat, expected_cat)}'", + "expected_category": expected_cat, + "actual_category": category.name, + }) + + await page.close() + + except Exception as e: + logger.error("Category test failed for %s: %s", category.name, e) + + return result + + +def _collect(request, scripts: list[str]): + if request.resource_type in ("script", "image", "xhr", "fetch"): + scripts.append(request.url) + + +def _dedup_scripts(scripts: list[str]) -> list[str]: + seen = set() + result = [] + for url in scripts: + domain = url.split("/")[2] if len(url.split("/")) > 2 else url + if domain not in seen: + seen.add(domain) + result.append(url) + return result[:30] diff --git a/consent-tester/services/script_analyzer.py b/consent-tester/services/script_analyzer.py new file mode 100644 index 0000000..4079362 --- /dev/null +++ b/consent-tester/services/script_analyzer.py @@ -0,0 +1,157 @@ +""" +Script Analyzer — classifies detected scripts and cookies against known services. +""" + +import re +from dataclasses import dataclass + +SERVICE_PATTERNS: dict[str, dict] = { + r"google.?analytics|gtag|UA-\d|G-\w{5}": { + "name": "Google Analytics", "requires_consent": True, + "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", + }, + r"googletagmanager|gtm\.js": { + "name": "Google Tag Manager", "requires_consent": True, + "legal_ref": "§25 TDDDG", + }, + r"facebook\.net|fbevents|fbq": { + "name": "Meta/Facebook Pixel", "requires_consent": True, + "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", + }, + r"hotjar\.com|_hjSettings": { + "name": "Hotjar", "requires_consent": True, + "legal_ref": "§25 TDDDG (Session Recording)", + }, + r"clarity\.ms": { + "name": "Microsoft Clarity", "requires_consent": True, + "legal_ref": "§25 TDDDG (Session Replay)", + }, + r"tiktok\.com/i18n|analytics\.tiktok": { + "name": "TikTok Pixel", "requires_consent": True, + "legal_ref": "§25 TDDDG, Drittlandtransfer China", + }, + r"linkedin\.com/insight|snap\.licdn": { + "name": "LinkedIn Insight", "requires_consent": True, + "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", + }, + r"pinterest\.com/ct|pinimg\.com/ct": { + "name": "Pinterest Tag", "requires_consent": True, + "legal_ref": "§25 TDDDG", + }, + r"criteo\.com|criteo\.net": { + "name": "Criteo", "requires_consent": True, + "legal_ref": "§25 TDDDG", + }, + r"doubleclick\.net|googlesyndication": { + "name": "Google Ads/DoubleClick", "requires_consent": True, + "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", + }, + r"fonts\.googleapis\.com|fonts\.gstatic": { + "name": "Google Fonts", "requires_consent": True, + "legal_ref": "LG Muenchen I, Az. 3 O 17493/20", + }, + r"recaptcha|grecaptcha": { + "name": "Google reCAPTCHA", "requires_consent": True, + "legal_ref": "§25 TDDDG", + }, + r"youtube\.com/embed|ytimg": { + "name": "YouTube", "requires_consent": True, + "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", + }, + r"maps\.googleapis|maps\.google": { + "name": "Google Maps", "requires_consent": True, + "legal_ref": "§25 TDDDG", + }, + r"intercom\.io|intercomcdn": { + "name": "Intercom", "requires_consent": True, + "legal_ref": "Art. 44-49 DSGVO", + }, + r"zendesk\.com|zdassets": { + "name": "Zendesk", "requires_consent": True, + "legal_ref": "Art. 44-49 DSGVO", + }, + r"sentry\.io|sentry-cdn": { + "name": "Sentry", "requires_consent": False, + "legal_ref": "Berechtigtes Interesse (Error Tracking)", + }, + r"cdn\.cloudflare\.com": { + "name": "Cloudflare CDN", "requires_consent": False, + "legal_ref": "Berechtigtes Interesse (CDN)", + }, + r"didomi|cookiebot|onetrust|usercentrics|consentmanager": { + "name": "Consent Management", "requires_consent": False, + "legal_ref": "Notwendig (CMP)", + }, +} + + +@dataclass +class Violation: + service: str + severity: str # "HIGH", "CRITICAL" + text: str + legal_ref: str + + +def classify_scripts(scripts: list[str]) -> list[str]: + """Classify script URLs into known service names.""" + services = set() + for script in scripts: + for pattern, meta in SERVICE_PATTERNS.items(): + if re.search(pattern, script, re.IGNORECASE): + services.add(meta["name"]) + break + return sorted(services) + + +def find_tracking_services(scripts: list[str]) -> list[str]: + """Find services that require consent.""" + tracking = [] + for script in scripts: + for pattern, meta in SERVICE_PATTERNS.items(): + if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]: + tracking.append(meta["name"]) + break + return sorted(set(tracking)) + + +def find_violations_before_consent(scripts: list[str]) -> list[Violation]: + """Find tracking scripts that load without consent (HIGH).""" + violations = [] + seen = set() + for script in scripts: + for pattern, meta in SERVICE_PATTERNS.items(): + if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]: + name = meta["name"] + if name not in seen: + seen.add(name) + violations.append(Violation( + service=name, severity="HIGH", + text=f"{name} laedt OHNE vorherige Einwilligung", + legal_ref=meta["legal_ref"], + )) + break + return violations + + +def find_violations_after_reject( + before_scripts: list[str], after_scripts: list[str], +) -> list[Violation]: + """Find tracking scripts that still load after rejection (CRITICAL).""" + violations = [] + after_tracking = find_tracking_services(after_scripts) + before_tracking = find_tracking_services(before_scripts) + + for service in after_tracking: + if service in before_tracking: + # Was already loading before AND still loads after reject = CRITICAL + for pattern, meta in SERVICE_PATTERNS.items(): + if meta["name"] == service: + violations.append(Violation( + service=service, severity="CRITICAL", + text=f"{service} laedt TROTZ Ablehnung — moegliches Dark Pattern", + legal_ref=meta["legal_ref"] + ", Art. 5(3) ePrivacy", + )) + break + + return violations