From 6864849115d8fcafe9a25e1a9c726395baa3d2cc Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 1 May 2026 21:15:23 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20Phase=2011=20=E2=80=94=20granular=20coo?= =?UTF-8?q?kie=20category=20testing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests each consent category in isolation: - Phase D: Only "Statistics" enabled → checks if only analytics loads - Phase E: Only "Marketing" enabled → checks if only ads load - Phase F: Only "Functional" enabled → checks no tracking loads CMP-specific category selectors for Cookiebot, OneTrust, Usercentrics, Didomi. Generic fallback via toggle/checkbox keyword detection. SERVICE_CATEGORY_MAP maps 35+ services to expected categories. Violations: "Facebook Pixel loads with only Statistics enabled" = miscategorization. Frontend: category test results shown below Phase A-C with per-category violation cards. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../agent/_components/ConsentTestResult.tsx | 41 +++ consent-tester/main.py | 9 + consent-tester/services/category_tester.py | 278 ++++++++++++++++++ consent-tester/services/consent_scanner.py | 31 +- 4 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 consent-tester/services/category_tester.py diff --git a/admin-compliance/app/sdk/agent/_components/ConsentTestResult.tsx b/admin-compliance/app/sdk/agent/_components/ConsentTestResult.tsx index 57385ae..4a462ab 100644 --- a/admin-compliance/app/sdk/agent/_components/ConsentTestResult.tsx +++ b/admin-compliance/app/sdk/agent/_components/ConsentTestResult.tsx @@ -31,7 +31,15 @@ interface ConsentData { high: number undocumented: number total_violations: number + category_violations?: number + categories_tested?: number } + category_tests?: { + category: string + category_label: string + tracking_services: string[] + violations: { service: string; severity: string; text: string }[] + }[] } const SEV = { @@ -154,6 +162,39 @@ export function ConsentTestResult({ data }: { data: ConsentData }) { )} + {/* Category Tests (Phase D-F) */} + {data.category_tests && data.category_tests.length > 0 && ( +
+

Kategorie-Tests ({data.category_tests.length})

+ {data.category_tests.map((ct, i) => { + const hasViolations = ct.violations.length > 0 + return ( +
+

+ 🔀 Nur "{ct.category_label}" +

+ {ct.violations.length > 0 ? ( + ct.violations.map((v, vi) => ( +
+ FALSCH + {v.text} +
+ )) + ) : ( +
+ {ct.tracking_services.length > 0 ? ( + ct.tracking_services.map((s, si) =>
✓ {s} — korrekte Kategorie
) + ) : ( +
✓ Keine Tracking-Dienste geladen — korrekt
+ )} +
+ )} +
+ ) + })} +
+ )} + {/* No banner warning */} {!data.banner_detected && (
diff --git a/consent-tester/main.py b/consent-tester/main.py index f06d3cc..5a7c2ad 100644 --- a/consent-tester/main.py +++ b/consent-tester/main.py @@ -41,6 +41,7 @@ class ScanResponse(BaseModel): phases: dict summary: dict scanned_at: str + category_tests: list = [] @app.get("/health") @@ -83,8 +84,16 @@ async def scan_consent(req: ScanRequest): "high": len(result.before_violations), "undocumented": len(result.accept_undocumented), "total_violations": len(result.before_violations) + len(result.reject_violations), + "category_violations": sum(len(ct.violations) for ct in result.category_tests), + "categories_tested": len(result.category_tests), }, scanned_at=datetime.now(timezone.utc).isoformat(), + category_tests=[{ + "category": ct.category, + "category_label": ct.category_label, + "tracking_services": ct.tracking_services, + "violations": ct.violations, + } for ct in result.category_tests] if result.category_tests else [], ) diff --git a/consent-tester/services/category_tester.py b/consent-tester/services/category_tester.py new file mode 100644 index 0000000..ddac093 --- /dev/null +++ b/consent-tester/services/category_tester.py @@ -0,0 +1,278 @@ +""" +Category Tester — tests individual cookie consent categories. + +Tests each category in isolation: only "Statistics" on, only "Marketing" on, etc. +Detects miscategorization: e.g., Facebook Pixel loading when only Statistics is enabled. +""" + +import logging +from dataclasses import dataclass, field + +from playwright.async_api import Page, BrowserContext + +from services.banner_detector import BannerInfo, click_button +from services.script_analyzer import find_tracking_services, Violation + +logger = logging.getLogger(__name__) + +# Which tracking service belongs to which consent category +SERVICE_CATEGORY_MAP: dict[str, str] = { + # Statistics / Analytics + "Google Analytics": "statistics", + "Matomo": "statistics", + "Plausible Analytics": "statistics", + "Hotjar": "statistics", + "Microsoft Clarity": "statistics", + "etracker": "statistics", + "Heap Analytics": "statistics", + "Amplitude": "statistics", + "Mixpanel": "statistics", + "PostHog": "statistics", + "Mouseflow": "statistics", + "Crazy Egg": "statistics", + "Lucky Orange": "statistics", + "FullStory": "statistics", + # Marketing / Advertising + "Meta/Facebook Pixel": "marketing", + "Google Ads": "marketing", + "Google Ads/DoubleClick": "marketing", + "TikTok Pixel": "marketing", + "LinkedIn Insight": "marketing", + "Pinterest Tag": "marketing", + "Criteo": "marketing", + "Taboola": "marketing", + "Outbrain": "marketing", + "Amazon Ads": "marketing", + "Bing/Microsoft Ads": "marketing", + "Salesforce Pardot": "marketing", + # Functional + "Intercom": "functional", + "Zendesk": "functional", + "Tidio Chat": "functional", + "Crisp Chat": "functional", + "LiveChat": "functional", + "Freshdesk/Freshchat": "functional", + "HelpScout Beacon": "functional", +} + +CATEGORY_LABELS = { + "statistics": "Statistik / Analytics", + "marketing": "Marketing / Werbung", + "functional": "Funktional / Komfort", + "social_media": "Social Media", +} + +# CMP-specific category selectors +CMP_CATEGORY_CONFIG: dict[str, dict] = { + "Cookiebot": { + "settings_button": "#CybotCookiebotDialogBodyButtonDetails", + "save_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowallSelection", + "categories": { + "statistics": "#CybotCookiebotDialogBodyLevelButtonStatistics", + "marketing": "#CybotCookiebotDialogBodyLevelButtonMarketing", + "preferences": "#CybotCookiebotDialogBodyLevelButtonPreferences", + }, + }, + "OneTrust": { + "settings_button": "#onetrust-pc-btn-handler, .ot-sdk-show-settings", + "save_button": ".save-preference-btn-handler, #onetrust-accept-btn-handler", + "categories": { + "statistics": ".ot-switch[data-ot-category='C0002'] input, #ot-group-id-C0002", + "marketing": ".ot-switch[data-ot-category='C0004'] input, #ot-group-id-C0004", + "functional": ".ot-switch[data-ot-category='C0003'] input, #ot-group-id-C0003", + }, + }, + "Usercentrics": { + "settings_button": "[data-testid='uc-more-information-button'], button:has-text('Mehr Informationen')", + "save_button": "[data-testid='uc-save-button']", + "categories": { + "statistics": "[data-testid='uc-category-statistics'] input", + "marketing": "[data-testid='uc-category-marketing'] input", + "functional": "[data-testid='uc-category-functional'] input", + }, + }, + "Didomi": { + "settings_button": "#didomi-notice-learn-more-button, .didomi-learn-more-button", + "save_button": ".didomi-components-button--primary:has-text('Auswahl speichern'), #didomi-notice-agree-button", + "categories": { + "statistics": "[data-purpose='analytics_purposes'] input, [data-purpose='measure'] input", + "marketing": "[data-purpose='advertising_purposes'] input, [data-purpose='ads'] input", + }, + }, +} + +# Generic category keywords for fallback detection +CATEGORY_KEYWORDS = { + "statistics": ["statistik", "analytics", "analyse", "statistics", "messung", "reichweite"], + "marketing": ["marketing", "werbung", "advertising", "targeting", "remarketing", "anzeigen"], + "functional": ["funktional", "functional", "preferences", "praeferenz", "komfort", "einstellungen"], + "social_media": ["social media", "soziale medien", "social", "teilen"], +} + + +@dataclass +class CategoryInfo: + name: str + label: str + selector: str + + +@dataclass +class CategoryTestResult: + category: str + category_label: str + scripts_loaded: list[str] = field(default_factory=list) + cookies_set: list[str] = field(default_factory=list) + tracking_services: list[str] = field(default_factory=list) + violations: list[dict] = field(default_factory=list) + + +async def detect_categories(page: Page, banner: BannerInfo) -> list[CategoryInfo]: + """Detect available cookie categories in the CMP.""" + categories = [] + provider = banner.provider + + # CMP-specific detection + config = CMP_CATEGORY_CONFIG.get(provider) + if config: + # Open settings panel first + if config.get("settings_button"): + await click_button(page, config["settings_button"], timeout=3000) + await page.wait_for_timeout(1000) + + for cat_name, selector in config.get("categories", {}).items(): + try: + if await page.locator(selector.split(",")[0].strip()).count() > 0: + categories.append(CategoryInfo( + name=cat_name, + label=CATEGORY_LABELS.get(cat_name, cat_name), + selector=selector, + )) + except Exception: + continue + + # Generic fallback: search for toggle/checkbox elements with category keywords + if not categories: + try: + toggles = await page.evaluate(""" + () => { + const elements = document.querySelectorAll( + 'input[type="checkbox"], [role="switch"], [class*="toggle"], [class*="switch"]' + ); + return [...elements].map(el => ({ + text: (el.closest('label')?.textContent || el.getAttribute('aria-label') || '').trim(), + id: el.id || '', + selector: el.id ? '#' + el.id : '', + })).filter(e => e.text.length > 0); + } + """) + + for toggle in (toggles or []): + text_lower = toggle["text"].lower() + for cat_name, keywords in CATEGORY_KEYWORDS.items(): + if any(kw in text_lower for kw in keywords): + sel = toggle["selector"] or f'[aria-label*="{toggle["text"][:20]}"]' + categories.append(CategoryInfo( + name=cat_name, + label=toggle["text"][:50], + selector=sel, + )) + break + except Exception as e: + logger.warning("Generic category detection failed: %s", e) + + logger.info("Detected %d categories for %s", len(categories), provider) + return categories + + +async def test_single_category( + context: BrowserContext, + url: str, + category: CategoryInfo, + banner: BannerInfo, + wait_ms: int = 5000, +) -> CategoryTestResult: + """Test a single category in isolation: enable only this one, disable others.""" + result = CategoryTestResult( + category=category.name, + category_label=category.label, + ) + + try: + page = await context.new_page() + scripts: list[str] = [] + page.on("request", lambda req: _collect(req, scripts)) + + await page.goto(url, wait_until="networkidle", timeout=20000) + await page.wait_for_timeout(2000) + + config = CMP_CATEGORY_CONFIG.get(banner.provider) + + if config: + # Open settings + if config.get("settings_button"): + await click_button(page, config["settings_button"], timeout=3000) + await page.wait_for_timeout(1000) + + # Disable ALL categories first + for cat_sel in config.get("categories", {}).values(): + try: + el = page.locator(cat_sel.split(",")[0].strip()).first + if await el.is_checked(): + await el.click() + except Exception: + continue + + # Enable ONLY the target category + try: + el = page.locator(category.selector.split(",")[0].strip()).first + if not await el.is_checked(): + await el.click() + except Exception: + logger.warning("Could not toggle category %s", category.name) + + # Save selection + if config.get("save_button"): + await click_button(page, config["save_button"], timeout=3000) + + await page.wait_for_timeout(wait_ms) + + # Collect results + result.scripts_loaded = _dedup_scripts(scripts) + result.cookies_set = [c.get("name", "") for c in await context.cookies()] + result.tracking_services = find_tracking_services(result.scripts_loaded) + + # Find violations: services that don't belong to this category + for service in result.tracking_services: + expected_cat = SERVICE_CATEGORY_MAP.get(service) + if expected_cat and expected_cat != category.name: + result.violations.append({ + "service": service, + "severity": "HIGH", + "text": f"{service} laedt bei '{category.label}' — gehoert aber zu '{CATEGORY_LABELS.get(expected_cat, expected_cat)}'", + "expected_category": expected_cat, + "actual_category": category.name, + }) + + await page.close() + + except Exception as e: + logger.error("Category test failed for %s: %s", category.name, e) + + return result + + +def _collect(request, scripts: list[str]): + if request.resource_type in ("script", "image", "xhr", "fetch"): + scripts.append(request.url) + + +def _dedup_scripts(scripts: list[str]) -> list[str]: + seen = set() + result = [] + for url in scripts: + domain = url.split("/")[2] if len(url.split("/")) > 2 else url + if domain not in seen: + seen.add(domain) + result.append(url) + return result[:30] diff --git a/consent-tester/services/consent_scanner.py b/consent-tester/services/consent_scanner.py index caa1c32..a11cd90 100644 --- a/consent-tester/services/consent_scanner.py +++ b/consent-tester/services/consent_scanner.py @@ -44,6 +44,8 @@ class ConsentTestResult: accept_cookies: list[str] = field(default_factory=list) accept_new_tracking: list[str] = field(default_factory=list) accept_undocumented: list[str] = field(default_factory=list) + # Phase D-F: Per-category tests + category_tests: list = field(default_factory=list) # list[CategoryTestResult] async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult: @@ -136,14 +138,41 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult: await ctx_c.close() + # ── Phase D-F: Per-category tests ──────────────────────── + try: + from services.category_tester import detect_categories, test_single_category + + ctx_cat = await browser.new_context(user_agent=USER_AGENT) + page_cat = await ctx_cat.new_page() + await page_cat.goto(url, wait_until="networkidle", timeout=20000) + await page_cat.wait_for_timeout(2000) + + categories = await detect_categories(page_cat, banner) + await page_cat.close() + + if categories: + logger.info("Testing %d categories individually", len(categories)) + for cat in categories: + cat_ctx = await browser.new_context(user_agent=USER_AGENT) + cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms) + result.category_tests.append(cat_result) + await cat_ctx.close() + else: + logger.info("No categories detected — skipping per-category tests") + + await ctx_cat.close() + except Exception as cat_err: + logger.warning("Category tests failed (non-blocking): %s", cat_err) + except Exception as e: logger.error("Consent test failed: %s", e) finally: await browser.close() logger.info( - "Consent test complete: banner=%s, violations_before=%d, violations_reject=%d", + "Consent test complete: banner=%s, violations_before=%d, violations_reject=%d, categories=%d", result.banner_provider, len(result.before_violations), len(result.reject_violations), + len(result.category_tests), ) return result