""" Category Tester — tests individual cookie consent categories. Tests each category in isolation: only "Statistics" on, only "Marketing" on, etc. Detects miscategorization: e.g., Facebook Pixel loading when only Statistics is enabled. """ import logging from dataclasses import dataclass, field from playwright.async_api import Page, BrowserContext from services.banner_detector import BannerInfo, click_button from services.script_analyzer import find_tracking_services, Violation logger = logging.getLogger(__name__) # Which tracking service belongs to which consent category SERVICE_CATEGORY_MAP: dict[str, str] = { # Statistics / Analytics "Google Analytics": "statistics", "Matomo": "statistics", "Plausible Analytics": "statistics", "Hotjar": "statistics", "Microsoft Clarity": "statistics", "etracker": "statistics", "Heap Analytics": "statistics", "Amplitude": "statistics", "Mixpanel": "statistics", "PostHog": "statistics", "Mouseflow": "statistics", "Crazy Egg": "statistics", "Lucky Orange": "statistics", "FullStory": "statistics", # Marketing / Advertising "Meta/Facebook Pixel": "marketing", "Google Ads": "marketing", "Google Ads/DoubleClick": "marketing", "TikTok Pixel": "marketing", "LinkedIn Insight": "marketing", "Pinterest Tag": "marketing", "Criteo": "marketing", "Taboola": "marketing", "Outbrain": "marketing", "Amazon Ads": "marketing", "Bing/Microsoft Ads": "marketing", "Salesforce Pardot": "marketing", # Functional "Intercom": "functional", "Zendesk": "functional", "Tidio Chat": "functional", "Crisp Chat": "functional", "LiveChat": "functional", "Freshdesk/Freshchat": "functional", "HelpScout Beacon": "functional", } CATEGORY_LABELS = { "statistics": "Statistik / Analytics", "marketing": "Marketing / Werbung", "functional": "Funktional / Komfort", "social_media": "Social Media", } # CMP-specific category selectors CMP_CATEGORY_CONFIG: dict[str, dict] = { "Cookiebot": { "settings_button": "#CybotCookiebotDialogBodyButtonDetails", "save_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowallSelection", "categories": { "statistics": "#CybotCookiebotDialogBodyLevelButtonStatistics", "marketing": "#CybotCookiebotDialogBodyLevelButtonMarketing", "preferences": "#CybotCookiebotDialogBodyLevelButtonPreferences", }, }, "OneTrust": { "settings_button": "#onetrust-pc-btn-handler, .ot-sdk-show-settings", "save_button": ".save-preference-btn-handler, #onetrust-accept-btn-handler", "categories": { "statistics": ".ot-switch[data-ot-category='C0002'] input, #ot-group-id-C0002", "marketing": ".ot-switch[data-ot-category='C0004'] input, #ot-group-id-C0004", "functional": ".ot-switch[data-ot-category='C0003'] input, #ot-group-id-C0003", }, }, "Usercentrics": { "settings_button": "[data-testid='uc-more-information-button'], button:has-text('Mehr Informationen')", "save_button": "[data-testid='uc-save-button']", "categories": { "statistics": "[data-testid='uc-category-statistics'] input", "marketing": "[data-testid='uc-category-marketing'] input", "functional": "[data-testid='uc-category-functional'] input", }, }, "Didomi": { "settings_button": "#didomi-notice-learn-more-button, .didomi-learn-more-button", "save_button": ".didomi-components-button--primary:has-text('Auswahl speichern'), #didomi-notice-agree-button", "categories": { "statistics": "[data-purpose='analytics_purposes'] input, [data-purpose='measure'] input", "marketing": "[data-purpose='advertising_purposes'] input, [data-purpose='ads'] input", }, }, # P19: TYPO3 dp-cookieconsent (Dirk Persky) — basiert auf osano cookieconsent. # Banner zeigt Checkboxes direkt; KEIN Settings-Modal, KEINE Provider-Details. # Detection: Checkbox-IDs dp--cookie-*. Provider-/Cookie-Liste fehlt # systematisch -> explizites Finding. "dp-cookieconsent": { "settings_button": None, "save_button": "a.cc-allow:not(.cc-allow-all), button:has-text('Speichern')", "categories": { "statistics": "#dp--cookie-statistics", "marketing": "#dp--cookie-marketing", }, }, "Cookie Consent (Insites)": { # alias — banner_detector benennt dp-cookieconsent so "settings_button": None, "save_button": "a.cc-allow:not(.cc-allow-all), button:has-text('Speichern')", "categories": { "statistics": "#dp--cookie-statistics, input[id*='statistic' i]", "marketing": "#dp--cookie-marketing, input[id*='marketing' i]", }, }, } # Selektoren um zu prueffen ob ein Banner Provider-/Cookie-Details # nach Kategorie-Selektion ZEIGT (Per-Category-Vendor-Listing). _PROVIDER_DETAIL_SELECTORS = ( "[class*='cookie-list' i]", "[class*='cookielist' i]", "[class*='vendor-list' i]", "[class*='vendor_list' i]", "[class*='provider-list' i]", "[class*='cookie-detail' i]", "[class*='vendor-detail' i]", "[class*='cookie-item' i]", "[class*='vendor-item' i]", "table[class*='cookie' i]", "table[class*='vendor' i]", "ul[class*='cookie' i] li", ) async def _provider_details_visible(page, category_label: str) -> bool: """True wenn im Banner sichtbare Provider-/Cookie-Details existieren. Heuristik: irgendein Element matched die Detail-Selektoren UND ist visible. Bei Banner wie dp-cookieconsent (kein Listing) immer False -> Finding. """ try: return await page.evaluate( """(selectors) => { for (const sel of selectors) { const els = document.querySelectorAll(sel); for (const el of els) { const r = el.getBoundingClientRect(); if (r.width > 30 && r.height > 10) return true; } } return false; }""", list(_PROVIDER_DETAIL_SELECTORS), ) except Exception: return False # Generic category keywords for fallback detection CATEGORY_KEYWORDS = { "statistics": ["statistik", "analytics", "analyse", "statistics", "messung", "reichweite"], "marketing": ["marketing", "werbung", "advertising", "targeting", "remarketing", "anzeigen"], "functional": ["funktional", "functional", "preferences", "praeferenz", "komfort", "einstellungen"], "social_media": ["social media", "soziale medien", "social", "teilen"], } @dataclass class CategoryInfo: name: str label: str selector: str @dataclass class CategoryTestResult: category: str category_label: str scripts_loaded: list[str] = field(default_factory=list) cookies_set: list[str] = field(default_factory=list) tracking_services: list[str] = field(default_factory=list) violations: list[dict] = field(default_factory=list) # P19: Per-Category-Transparenz im Banner provider_details_visible: bool = False async def detect_categories(page: Page, banner: BannerInfo) -> list[CategoryInfo]: """Detect available cookie categories in the CMP.""" categories = [] provider = banner.provider # CMP-specific detection config = CMP_CATEGORY_CONFIG.get(provider) if config: # Open settings panel first if config.get("settings_button"): await click_button(page, config["settings_button"], timeout=3000) await page.wait_for_timeout(1000) for cat_name, selector in config.get("categories", {}).items(): try: if await page.locator(selector.split(",")[0].strip()).count() > 0: categories.append(CategoryInfo( name=cat_name, label=CATEGORY_LABELS.get(cat_name, cat_name), selector=selector, )) except Exception: continue # P22: Shadow-DOM-Fallback fuer Web-Component-CMPs (Mercedes cmm-cookie-banner). # Sucht Checkboxes/Switches rekursiv durch alle shadowRoots. if not categories: try: shadow_cats = await page.evaluate(""" () => { const out = []; function walk(root, depth) { if (depth > 6) return; for (const el of root.querySelectorAll('*')) { if (el.shadowRoot) { const sr = el.shadowRoot; const inputs = sr.querySelectorAll('input[type=checkbox], [role=switch], [role=checkbox]'); for (const i of inputs) { const lbl = (i.closest('label')?.textContent || i.getAttribute('aria-label') || '').trim(); if (lbl.length > 0) { out.push({label: lbl.slice(0,60), host: el.tagName.toLowerCase()}); } } walk(sr, depth + 1); } } } walk(document, 0); return out; } """) for sc in (shadow_cats or []): text_lower = sc["label"].lower() for cat_name, keywords in CATEGORY_KEYWORDS.items(): if any(kw in text_lower for kw in keywords): # Marker selector — toggling per shadow:cat: categories.append(CategoryInfo( name=cat_name, label=sc["label"][:50], selector=f"shadow-toggle:{sc['label'][:50]}", )) break if categories: logger.info("P22: %d shadow-DOM categories detected", len(categories)) except Exception as e: logger.warning("Shadow-DOM category detection failed: %s", e) # Generic fallback: search for toggle/checkbox elements with category keywords if not categories: try: toggles = await page.evaluate(""" () => { const elements = document.querySelectorAll( 'input[type="checkbox"], [role="switch"], [class*="toggle"], [class*="switch"]' ); return [...elements].map(el => ({ text: (el.closest('label')?.textContent || el.getAttribute('aria-label') || '').trim(), id: el.id || '', selector: el.id ? '#' + el.id : '', })).filter(e => e.text.length > 0); } """) for toggle in (toggles or []): text_lower = toggle["text"].lower() for cat_name, keywords in CATEGORY_KEYWORDS.items(): if any(kw in text_lower for kw in keywords): sel = toggle["selector"] or f'[aria-label*="{toggle["text"][:20]}"]' categories.append(CategoryInfo( name=cat_name, label=toggle["text"][:50], selector=sel, )) break except Exception as e: logger.warning("Generic category detection failed: %s", e) logger.info("Detected %d categories for %s", len(categories), provider) return categories async def test_single_category( context: BrowserContext, url: str, category: CategoryInfo, banner: BannerInfo, wait_ms: int = 5000, ) -> CategoryTestResult: """Test a single category in isolation: enable only this one, disable others.""" result = CategoryTestResult( category=category.name, category_label=category.label, ) try: page = await context.new_page() scripts: list[str] = [] page.on("request", lambda req: _collect(req, scripts)) try: await page.goto(url, wait_until="networkidle", timeout=20000) except Exception: await page.goto(url, wait_until="load", timeout=20000) await page.wait_for_timeout(2000) # P22: Shadow-DOM-Toggle fuer Web-Component-CMPs (Mercedes etc.) if category.selector.startswith("shadow-toggle:"): label_pat = category.selector[len("shadow-toggle:"):] try: await page.evaluate("""(pat) => { const lbl = pat.toLowerCase(); function walk(root) { for (const el of root.querySelectorAll('*')) { if (el.shadowRoot) { const inputs = el.shadowRoot.querySelectorAll( 'input[type=checkbox], [role=switch], [role=checkbox]'); for (const i of inputs) { const t = (i.closest('label')?.textContent || i.getAttribute('aria-label') || '').toLowerCase(); if (t.includes(lbl) && !i.checked) { i.click(); return true; } } if (walk(el.shadowRoot)) return true; } } return false; } walk(document); }""", label_pat) await page.wait_for_timeout(500) # Save via accept-text "Speichern" / "Save" inside shadow await page.evaluate("""() => { const SAVE = /speichern|save|bestaetigen|confirm/i; function walk(root) { for (const el of root.querySelectorAll('*')) { if (el.shadowRoot) { for (const b of el.shadowRoot.querySelectorAll('button, [role=button]')) { if (SAVE.test(b.textContent || '')) { b.click(); return true; } } if (walk(el.shadowRoot)) return true; } } return false; } walk(document); }""") await page.wait_for_timeout(wait_ms) except Exception as e: logger.warning("Shadow-toggle for %s failed: %s", category.name, e) config = CMP_CATEGORY_CONFIG.get(banner.provider) if config: # Open settings if config.get("settings_button"): await click_button(page, config["settings_button"], timeout=3000) await page.wait_for_timeout(1000) # Disable ALL categories first for cat_sel in config.get("categories", {}).values(): try: el = page.locator(cat_sel.split(",")[0].strip()).first if await el.is_checked(): await el.click() except Exception: continue # Enable ONLY the target category try: el = page.locator(category.selector.split(",")[0].strip()).first if not await el.is_checked(): await el.click() except Exception: logger.warning("Could not toggle category %s", category.name) # Save selection if config.get("save_button"): await click_button(page, config["save_button"], timeout=3000) await page.wait_for_timeout(wait_ms) # Collect results result.scripts_loaded = _dedup_scripts(scripts) result.cookies_set = [c.get("name", "") for c in await context.cookies()] result.tracking_services = find_tracking_services(result.scripts_loaded) # P19: pruefe ob das Banner Provider-/Cookie-Details fuer diese # Kategorie sichtbar macht — bei dp-cookieconsent (Safetykon) immer # False -> kritischer Verstoss (Art. 7 DSGVO: keine informierte # Einwilligung ohne Detail-Listing pro Kategorie). result.provider_details_visible = await _provider_details_visible( page, category.label, ) if not result.provider_details_visible: result.violations.append({ "service": "Cookie-Banner", "severity": "HIGH", "text": (f"Kategorie '{category.label}' zeigt keine " f"Provider-/Cookie-Details im Banner — Nutzer " f"kann nicht informiert einwilligen " f"(Art. 7 Abs. 1 DSGVO)."), "legal_ref": "Art. 7 Abs. 1 DSGVO, EDPB Guidelines 2/2023, " "DSK-OH Telemedien 2024", "expected_category": category.name, "actual_category": category.name, }) # Find violations: services that don't belong to this category for service in result.tracking_services: expected_cat = SERVICE_CATEGORY_MAP.get(service) if expected_cat and expected_cat != category.name: result.violations.append({ "service": service, "severity": "HIGH", "text": f"{service} laedt bei '{category.label}' — gehoert aber zu '{CATEGORY_LABELS.get(expected_cat, expected_cat)}'", "expected_category": expected_cat, "actual_category": category.name, }) await page.close() except Exception as e: logger.error("Category test failed for %s: %s", category.name, e) return result def _collect(request, scripts: list[str]): if request.resource_type in ("script", "image", "xhr", "fetch"): scripts.append(request.url) def _dedup_scripts(scripts: list[str]) -> list[str]: seen = set() result = [] for url in scripts: domain = url.split("/")[2] if len(url.split("/")) > 2 else url if domain not in seen: seen.add(domain) result.append(url) return result[:30]