feat: Phase 11 — granular cookie category testing
Tests each consent category in isolation: - Phase D: Only "Statistics" enabled → checks if only analytics loads - Phase E: Only "Marketing" enabled → checks if only ads load - Phase F: Only "Functional" enabled → checks no tracking loads CMP-specific category selectors for Cookiebot, OneTrust, Usercentrics, Didomi. Generic fallback via toggle/checkbox keyword detection. SERVICE_CATEGORY_MAP maps 35+ services to expected categories. Violations: "Facebook Pixel loads with only Statistics enabled" = miscategorization. Frontend: category test results shown below Phase A-C with per-category violation cards. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -31,7 +31,15 @@ interface ConsentData {
|
|||||||
high: number
|
high: number
|
||||||
undocumented: number
|
undocumented: number
|
||||||
total_violations: number
|
total_violations: number
|
||||||
|
category_violations?: number
|
||||||
|
categories_tested?: number
|
||||||
}
|
}
|
||||||
|
category_tests?: {
|
||||||
|
category: string
|
||||||
|
category_label: string
|
||||||
|
tracking_services: string[]
|
||||||
|
violations: { service: string; severity: string; text: string }[]
|
||||||
|
}[]
|
||||||
}
|
}
|
||||||
|
|
||||||
const SEV = {
|
const SEV = {
|
||||||
@@ -154,6 +162,39 @@ export function ConsentTestResult({ data }: { data: ConsentData }) {
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Category Tests (Phase D-F) */}
|
||||||
|
{data.category_tests && data.category_tests.length > 0 && (
|
||||||
|
<div className="space-y-3">
|
||||||
|
<h4 className="text-sm font-semibold text-gray-900 mt-2">Kategorie-Tests ({data.category_tests.length})</h4>
|
||||||
|
{data.category_tests.map((ct, i) => {
|
||||||
|
const hasViolations = ct.violations.length > 0
|
||||||
|
return (
|
||||||
|
<div key={i} className={`border rounded-lg p-4 ${hasViolations ? 'border-red-200 bg-red-50' : 'border-green-200 bg-green-50'}`}>
|
||||||
|
<h4 className="text-sm font-semibold text-gray-900 mb-2 flex items-center gap-2">
|
||||||
|
<span>🔀</span> Nur "{ct.category_label}"
|
||||||
|
</h4>
|
||||||
|
{ct.violations.length > 0 ? (
|
||||||
|
ct.violations.map((v, vi) => (
|
||||||
|
<div key={vi} className="mb-2 p-2 rounded border border-red-300 bg-red-100">
|
||||||
|
<span className="text-xs font-bold text-red-800 px-1.5 py-0.5 rounded bg-red-200">FALSCH</span>
|
||||||
|
<span className="text-xs text-red-700 ml-2">{v.text}</span>
|
||||||
|
</div>
|
||||||
|
))
|
||||||
|
) : (
|
||||||
|
<div className="text-xs text-green-700">
|
||||||
|
{ct.tracking_services.length > 0 ? (
|
||||||
|
ct.tracking_services.map((s, si) => <div key={si}>✓ {s} — korrekte Kategorie</div>)
|
||||||
|
) : (
|
||||||
|
<div>✓ Keine Tracking-Dienste geladen — korrekt</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* No banner warning */}
|
{/* No banner warning */}
|
||||||
{!data.banner_detected && (
|
{!data.banner_detected && (
|
||||||
<div className="bg-red-50 border border-red-200 rounded-lg p-3 text-xs text-red-700">
|
<div className="bg-red-50 border border-red-200 rounded-lg p-3 text-xs text-red-700">
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ class ScanResponse(BaseModel):
|
|||||||
phases: dict
|
phases: dict
|
||||||
summary: dict
|
summary: dict
|
||||||
scanned_at: str
|
scanned_at: str
|
||||||
|
category_tests: list = []
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
@@ -83,8 +84,16 @@ async def scan_consent(req: ScanRequest):
|
|||||||
"high": len(result.before_violations),
|
"high": len(result.before_violations),
|
||||||
"undocumented": len(result.accept_undocumented),
|
"undocumented": len(result.accept_undocumented),
|
||||||
"total_violations": len(result.before_violations) + len(result.reject_violations),
|
"total_violations": len(result.before_violations) + len(result.reject_violations),
|
||||||
|
"category_violations": sum(len(ct.violations) for ct in result.category_tests),
|
||||||
|
"categories_tested": len(result.category_tests),
|
||||||
},
|
},
|
||||||
scanned_at=datetime.now(timezone.utc).isoformat(),
|
scanned_at=datetime.now(timezone.utc).isoformat(),
|
||||||
|
category_tests=[{
|
||||||
|
"category": ct.category,
|
||||||
|
"category_label": ct.category_label,
|
||||||
|
"tracking_services": ct.tracking_services,
|
||||||
|
"violations": ct.violations,
|
||||||
|
} for ct in result.category_tests] if result.category_tests else [],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,278 @@
|
|||||||
|
"""
|
||||||
|
Category Tester — tests individual cookie consent categories.
|
||||||
|
|
||||||
|
Tests each category in isolation: only "Statistics" on, only "Marketing" on, etc.
|
||||||
|
Detects miscategorization: e.g., Facebook Pixel loading when only Statistics is enabled.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from playwright.async_api import Page, BrowserContext
|
||||||
|
|
||||||
|
from services.banner_detector import BannerInfo, click_button
|
||||||
|
from services.script_analyzer import find_tracking_services, Violation
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Which tracking service belongs to which consent category
|
||||||
|
SERVICE_CATEGORY_MAP: dict[str, str] = {
|
||||||
|
# Statistics / Analytics
|
||||||
|
"Google Analytics": "statistics",
|
||||||
|
"Matomo": "statistics",
|
||||||
|
"Plausible Analytics": "statistics",
|
||||||
|
"Hotjar": "statistics",
|
||||||
|
"Microsoft Clarity": "statistics",
|
||||||
|
"etracker": "statistics",
|
||||||
|
"Heap Analytics": "statistics",
|
||||||
|
"Amplitude": "statistics",
|
||||||
|
"Mixpanel": "statistics",
|
||||||
|
"PostHog": "statistics",
|
||||||
|
"Mouseflow": "statistics",
|
||||||
|
"Crazy Egg": "statistics",
|
||||||
|
"Lucky Orange": "statistics",
|
||||||
|
"FullStory": "statistics",
|
||||||
|
# Marketing / Advertising
|
||||||
|
"Meta/Facebook Pixel": "marketing",
|
||||||
|
"Google Ads": "marketing",
|
||||||
|
"Google Ads/DoubleClick": "marketing",
|
||||||
|
"TikTok Pixel": "marketing",
|
||||||
|
"LinkedIn Insight": "marketing",
|
||||||
|
"Pinterest Tag": "marketing",
|
||||||
|
"Criteo": "marketing",
|
||||||
|
"Taboola": "marketing",
|
||||||
|
"Outbrain": "marketing",
|
||||||
|
"Amazon Ads": "marketing",
|
||||||
|
"Bing/Microsoft Ads": "marketing",
|
||||||
|
"Salesforce Pardot": "marketing",
|
||||||
|
# Functional
|
||||||
|
"Intercom": "functional",
|
||||||
|
"Zendesk": "functional",
|
||||||
|
"Tidio Chat": "functional",
|
||||||
|
"Crisp Chat": "functional",
|
||||||
|
"LiveChat": "functional",
|
||||||
|
"Freshdesk/Freshchat": "functional",
|
||||||
|
"HelpScout Beacon": "functional",
|
||||||
|
}
|
||||||
|
|
||||||
|
CATEGORY_LABELS = {
|
||||||
|
"statistics": "Statistik / Analytics",
|
||||||
|
"marketing": "Marketing / Werbung",
|
||||||
|
"functional": "Funktional / Komfort",
|
||||||
|
"social_media": "Social Media",
|
||||||
|
}
|
||||||
|
|
||||||
|
# CMP-specific category selectors
|
||||||
|
CMP_CATEGORY_CONFIG: dict[str, dict] = {
|
||||||
|
"Cookiebot": {
|
||||||
|
"settings_button": "#CybotCookiebotDialogBodyButtonDetails",
|
||||||
|
"save_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowallSelection",
|
||||||
|
"categories": {
|
||||||
|
"statistics": "#CybotCookiebotDialogBodyLevelButtonStatistics",
|
||||||
|
"marketing": "#CybotCookiebotDialogBodyLevelButtonMarketing",
|
||||||
|
"preferences": "#CybotCookiebotDialogBodyLevelButtonPreferences",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"OneTrust": {
|
||||||
|
"settings_button": "#onetrust-pc-btn-handler, .ot-sdk-show-settings",
|
||||||
|
"save_button": ".save-preference-btn-handler, #onetrust-accept-btn-handler",
|
||||||
|
"categories": {
|
||||||
|
"statistics": ".ot-switch[data-ot-category='C0002'] input, #ot-group-id-C0002",
|
||||||
|
"marketing": ".ot-switch[data-ot-category='C0004'] input, #ot-group-id-C0004",
|
||||||
|
"functional": ".ot-switch[data-ot-category='C0003'] input, #ot-group-id-C0003",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"Usercentrics": {
|
||||||
|
"settings_button": "[data-testid='uc-more-information-button'], button:has-text('Mehr Informationen')",
|
||||||
|
"save_button": "[data-testid='uc-save-button']",
|
||||||
|
"categories": {
|
||||||
|
"statistics": "[data-testid='uc-category-statistics'] input",
|
||||||
|
"marketing": "[data-testid='uc-category-marketing'] input",
|
||||||
|
"functional": "[data-testid='uc-category-functional'] input",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"Didomi": {
|
||||||
|
"settings_button": "#didomi-notice-learn-more-button, .didomi-learn-more-button",
|
||||||
|
"save_button": ".didomi-components-button--primary:has-text('Auswahl speichern'), #didomi-notice-agree-button",
|
||||||
|
"categories": {
|
||||||
|
"statistics": "[data-purpose='analytics_purposes'] input, [data-purpose='measure'] input",
|
||||||
|
"marketing": "[data-purpose='advertising_purposes'] input, [data-purpose='ads'] input",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Generic category keywords for fallback detection
|
||||||
|
CATEGORY_KEYWORDS = {
|
||||||
|
"statistics": ["statistik", "analytics", "analyse", "statistics", "messung", "reichweite"],
|
||||||
|
"marketing": ["marketing", "werbung", "advertising", "targeting", "remarketing", "anzeigen"],
|
||||||
|
"functional": ["funktional", "functional", "preferences", "praeferenz", "komfort", "einstellungen"],
|
||||||
|
"social_media": ["social media", "soziale medien", "social", "teilen"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CategoryInfo:
|
||||||
|
name: str
|
||||||
|
label: str
|
||||||
|
selector: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CategoryTestResult:
|
||||||
|
category: str
|
||||||
|
category_label: str
|
||||||
|
scripts_loaded: list[str] = field(default_factory=list)
|
||||||
|
cookies_set: list[str] = field(default_factory=list)
|
||||||
|
tracking_services: list[str] = field(default_factory=list)
|
||||||
|
violations: list[dict] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
async def detect_categories(page: Page, banner: BannerInfo) -> list[CategoryInfo]:
|
||||||
|
"""Detect available cookie categories in the CMP."""
|
||||||
|
categories = []
|
||||||
|
provider = banner.provider
|
||||||
|
|
||||||
|
# CMP-specific detection
|
||||||
|
config = CMP_CATEGORY_CONFIG.get(provider)
|
||||||
|
if config:
|
||||||
|
# Open settings panel first
|
||||||
|
if config.get("settings_button"):
|
||||||
|
await click_button(page, config["settings_button"], timeout=3000)
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
for cat_name, selector in config.get("categories", {}).items():
|
||||||
|
try:
|
||||||
|
if await page.locator(selector.split(",")[0].strip()).count() > 0:
|
||||||
|
categories.append(CategoryInfo(
|
||||||
|
name=cat_name,
|
||||||
|
label=CATEGORY_LABELS.get(cat_name, cat_name),
|
||||||
|
selector=selector,
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Generic fallback: search for toggle/checkbox elements with category keywords
|
||||||
|
if not categories:
|
||||||
|
try:
|
||||||
|
toggles = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const elements = document.querySelectorAll(
|
||||||
|
'input[type="checkbox"], [role="switch"], [class*="toggle"], [class*="switch"]'
|
||||||
|
);
|
||||||
|
return [...elements].map(el => ({
|
||||||
|
text: (el.closest('label')?.textContent || el.getAttribute('aria-label') || '').trim(),
|
||||||
|
id: el.id || '',
|
||||||
|
selector: el.id ? '#' + el.id : '',
|
||||||
|
})).filter(e => e.text.length > 0);
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
for toggle in (toggles or []):
|
||||||
|
text_lower = toggle["text"].lower()
|
||||||
|
for cat_name, keywords in CATEGORY_KEYWORDS.items():
|
||||||
|
if any(kw in text_lower for kw in keywords):
|
||||||
|
sel = toggle["selector"] or f'[aria-label*="{toggle["text"][:20]}"]'
|
||||||
|
categories.append(CategoryInfo(
|
||||||
|
name=cat_name,
|
||||||
|
label=toggle["text"][:50],
|
||||||
|
selector=sel,
|
||||||
|
))
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Generic category detection failed: %s", e)
|
||||||
|
|
||||||
|
logger.info("Detected %d categories for %s", len(categories), provider)
|
||||||
|
return categories
|
||||||
|
|
||||||
|
|
||||||
|
async def test_single_category(
|
||||||
|
context: BrowserContext,
|
||||||
|
url: str,
|
||||||
|
category: CategoryInfo,
|
||||||
|
banner: BannerInfo,
|
||||||
|
wait_ms: int = 5000,
|
||||||
|
) -> CategoryTestResult:
|
||||||
|
"""Test a single category in isolation: enable only this one, disable others."""
|
||||||
|
result = CategoryTestResult(
|
||||||
|
category=category.name,
|
||||||
|
category_label=category.label,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
page = await context.new_page()
|
||||||
|
scripts: list[str] = []
|
||||||
|
page.on("request", lambda req: _collect(req, scripts))
|
||||||
|
|
||||||
|
await page.goto(url, wait_until="networkidle", timeout=20000)
|
||||||
|
await page.wait_for_timeout(2000)
|
||||||
|
|
||||||
|
config = CMP_CATEGORY_CONFIG.get(banner.provider)
|
||||||
|
|
||||||
|
if config:
|
||||||
|
# Open settings
|
||||||
|
if config.get("settings_button"):
|
||||||
|
await click_button(page, config["settings_button"], timeout=3000)
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
# Disable ALL categories first
|
||||||
|
for cat_sel in config.get("categories", {}).values():
|
||||||
|
try:
|
||||||
|
el = page.locator(cat_sel.split(",")[0].strip()).first
|
||||||
|
if await el.is_checked():
|
||||||
|
await el.click()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Enable ONLY the target category
|
||||||
|
try:
|
||||||
|
el = page.locator(category.selector.split(",")[0].strip()).first
|
||||||
|
if not await el.is_checked():
|
||||||
|
await el.click()
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Could not toggle category %s", category.name)
|
||||||
|
|
||||||
|
# Save selection
|
||||||
|
if config.get("save_button"):
|
||||||
|
await click_button(page, config["save_button"], timeout=3000)
|
||||||
|
|
||||||
|
await page.wait_for_timeout(wait_ms)
|
||||||
|
|
||||||
|
# Collect results
|
||||||
|
result.scripts_loaded = _dedup_scripts(scripts)
|
||||||
|
result.cookies_set = [c.get("name", "") for c in await context.cookies()]
|
||||||
|
result.tracking_services = find_tracking_services(result.scripts_loaded)
|
||||||
|
|
||||||
|
# Find violations: services that don't belong to this category
|
||||||
|
for service in result.tracking_services:
|
||||||
|
expected_cat = SERVICE_CATEGORY_MAP.get(service)
|
||||||
|
if expected_cat and expected_cat != category.name:
|
||||||
|
result.violations.append({
|
||||||
|
"service": service,
|
||||||
|
"severity": "HIGH",
|
||||||
|
"text": f"{service} laedt bei '{category.label}' — gehoert aber zu '{CATEGORY_LABELS.get(expected_cat, expected_cat)}'",
|
||||||
|
"expected_category": expected_cat,
|
||||||
|
"actual_category": category.name,
|
||||||
|
})
|
||||||
|
|
||||||
|
await page.close()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Category test failed for %s: %s", category.name, e)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _collect(request, scripts: list[str]):
|
||||||
|
if request.resource_type in ("script", "image", "xhr", "fetch"):
|
||||||
|
scripts.append(request.url)
|
||||||
|
|
||||||
|
|
||||||
|
def _dedup_scripts(scripts: list[str]) -> list[str]:
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for url in scripts:
|
||||||
|
domain = url.split("/")[2] if len(url.split("/")) > 2 else url
|
||||||
|
if domain not in seen:
|
||||||
|
seen.add(domain)
|
||||||
|
result.append(url)
|
||||||
|
return result[:30]
|
||||||
@@ -44,6 +44,8 @@ class ConsentTestResult:
|
|||||||
accept_cookies: list[str] = field(default_factory=list)
|
accept_cookies: list[str] = field(default_factory=list)
|
||||||
accept_new_tracking: list[str] = field(default_factory=list)
|
accept_new_tracking: list[str] = field(default_factory=list)
|
||||||
accept_undocumented: list[str] = field(default_factory=list)
|
accept_undocumented: list[str] = field(default_factory=list)
|
||||||
|
# Phase D-F: Per-category tests
|
||||||
|
category_tests: list = field(default_factory=list) # list[CategoryTestResult]
|
||||||
|
|
||||||
|
|
||||||
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
||||||
@@ -136,14 +138,41 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
|||||||
|
|
||||||
await ctx_c.close()
|
await ctx_c.close()
|
||||||
|
|
||||||
|
# ── Phase D-F: Per-category tests ────────────────────────
|
||||||
|
try:
|
||||||
|
from services.category_tester import detect_categories, test_single_category
|
||||||
|
|
||||||
|
ctx_cat = await browser.new_context(user_agent=USER_AGENT)
|
||||||
|
page_cat = await ctx_cat.new_page()
|
||||||
|
await page_cat.goto(url, wait_until="networkidle", timeout=20000)
|
||||||
|
await page_cat.wait_for_timeout(2000)
|
||||||
|
|
||||||
|
categories = await detect_categories(page_cat, banner)
|
||||||
|
await page_cat.close()
|
||||||
|
|
||||||
|
if categories:
|
||||||
|
logger.info("Testing %d categories individually", len(categories))
|
||||||
|
for cat in categories:
|
||||||
|
cat_ctx = await browser.new_context(user_agent=USER_AGENT)
|
||||||
|
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
|
||||||
|
result.category_tests.append(cat_result)
|
||||||
|
await cat_ctx.close()
|
||||||
|
else:
|
||||||
|
logger.info("No categories detected — skipping per-category tests")
|
||||||
|
|
||||||
|
await ctx_cat.close()
|
||||||
|
except Exception as cat_err:
|
||||||
|
logger.warning("Category tests failed (non-blocking): %s", cat_err)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Consent test failed: %s", e)
|
logger.error("Consent test failed: %s", e)
|
||||||
finally:
|
finally:
|
||||||
await browser.close()
|
await browser.close()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Consent test complete: banner=%s, violations_before=%d, violations_reject=%d",
|
"Consent test complete: banner=%s, violations_before=%d, violations_reject=%d, categories=%d",
|
||||||
result.banner_provider, len(result.before_violations), len(result.reject_violations),
|
result.banner_provider, len(result.before_violations), len(result.reject_violations),
|
||||||
|
len(result.category_tests),
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user