fix: restore all missing consent-tester service modules
banner_detector.py, script_analyzer.py, category_tester.py, authenticated_scanner.py were only on the feature branch — needed for consent-tester to start. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,230 @@
|
||||
"""
|
||||
Authenticated Scanner — tests post-login functionality.
|
||||
|
||||
Checks §312k BGB (cancellation), Art. 17 (deletion), Art. 20 (export),
|
||||
Art. 7(3) (consent withdrawal), Art. 15 (data access).
|
||||
|
||||
Credentials are NEVER stored, logged, or transmitted beyond the browser context.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from playwright.async_api import async_playwright, Page
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckResult:
|
||||
found: bool = False
|
||||
selector: str = ""
|
||||
text: str = ""
|
||||
clicks_needed: int = 0
|
||||
screenshot: bytes = b""
|
||||
|
||||
|
||||
@dataclass
|
||||
class AuthTestResult:
|
||||
authenticated: bool = False
|
||||
login_error: str = ""
|
||||
cancel_subscription: CheckResult = field(default_factory=CheckResult)
|
||||
delete_account: CheckResult = field(default_factory=CheckResult)
|
||||
export_data: CheckResult = field(default_factory=CheckResult)
|
||||
consent_settings: CheckResult = field(default_factory=CheckResult)
|
||||
profile_visible: CheckResult = field(default_factory=CheckResult)
|
||||
|
||||
|
||||
# Search patterns for each check (DE + EN)
|
||||
CANCEL_PATTERNS = [
|
||||
"kündigen", "kuendigen", "vertrag beenden", "abo beenden",
|
||||
"mitgliedschaft kündigen", "cancel subscription", "unsubscribe",
|
||||
"cancel membership", "vertrag kündigen",
|
||||
]
|
||||
|
||||
DELETE_PATTERNS = [
|
||||
"konto löschen", "konto loeschen", "account löschen", "delete account",
|
||||
"account deaktivieren", "profil löschen", "remove account",
|
||||
]
|
||||
|
||||
EXPORT_PATTERNS = [
|
||||
"daten exportieren", "daten herunterladen", "export data", "download data",
|
||||
"meine daten", "datenauskunft", "data download", "daten anfordern",
|
||||
]
|
||||
|
||||
CONSENT_PATTERNS = [
|
||||
"einwilligung", "einstellungen", "datenschutz-einstellungen",
|
||||
"consent", "privacy settings", "cookie-einstellungen",
|
||||
"werbeeinstellungen", "marketing preferences",
|
||||
]
|
||||
|
||||
PROFILE_PATTERNS = [
|
||||
"profil", "mein konto", "kontodaten", "persönliche daten",
|
||||
"profile", "my account", "account settings", "personal data",
|
||||
]
|
||||
|
||||
|
||||
async def run_authenticated_test(
|
||||
url: str,
|
||||
username: str,
|
||||
password: str,
|
||||
username_selector: str = "",
|
||||
password_selector: str = "",
|
||||
submit_selector: str = "",
|
||||
) -> AuthTestResult:
|
||||
"""Run authenticated area test. Credentials are destroyed after test."""
|
||||
result = AuthTestResult()
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=True,
|
||||
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
||||
)
|
||||
context = await browser.new_context(user_agent=USER_AGENT)
|
||||
page = await context.new_page()
|
||||
|
||||
try:
|
||||
# Step 1: Login
|
||||
await page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
await page.wait_for_timeout(2000)
|
||||
|
||||
login_ok = await _try_login(
|
||||
page, username, password,
|
||||
username_selector, password_selector, submit_selector,
|
||||
)
|
||||
|
||||
if not login_ok:
|
||||
result.login_error = "Login fehlgeschlagen — Formular nicht gefunden oder Credentials falsch"
|
||||
await context.close()
|
||||
await browser.close()
|
||||
return result
|
||||
|
||||
result.authenticated = True
|
||||
await page.wait_for_timeout(3000)
|
||||
|
||||
# Step 2: Check cancellation (§312k BGB)
|
||||
result.cancel_subscription = await _check_patterns(page, CANCEL_PATTERNS, "cancel")
|
||||
logger.info("Cancel check: found=%s", result.cancel_subscription.found)
|
||||
|
||||
# Step 3: Check delete account (Art. 17 DSGVO)
|
||||
result.delete_account = await _check_patterns(page, DELETE_PATTERNS, "delete")
|
||||
|
||||
# Step 4: Check data export (Art. 20 DSGVO)
|
||||
result.export_data = await _check_patterns(page, EXPORT_PATTERNS, "export")
|
||||
|
||||
# Step 5: Check consent settings (Art. 7(3) DSGVO)
|
||||
result.consent_settings = await _check_patterns(page, CONSENT_PATTERNS, "consent")
|
||||
|
||||
# Step 6: Check profile visibility (Art. 15 DSGVO)
|
||||
result.profile_visible = await _check_patterns(page, PROFILE_PATTERNS, "profile")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Authenticated test failed: %s", e)
|
||||
result.login_error = str(e)
|
||||
finally:
|
||||
# CRITICAL: Destroy context — wipes all credentials, cookies, session
|
||||
await context.close()
|
||||
await browser.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def _try_login(
|
||||
page: Page, username: str, password: str,
|
||||
user_sel: str, pass_sel: str, submit_sel: str,
|
||||
) -> bool:
|
||||
"""Attempt to fill and submit login form."""
|
||||
try:
|
||||
# Auto-detect selectors if not provided
|
||||
if not user_sel:
|
||||
for sel in ['input[type="email"]', 'input[name="email"]', 'input[name="username"]',
|
||||
'input[name="login"]', 'input[id="email"]', 'input[id="username"]']:
|
||||
if await page.locator(sel).count() > 0:
|
||||
user_sel = sel
|
||||
break
|
||||
if not pass_sel:
|
||||
for sel in ['input[type="password"]', 'input[name="password"]', 'input[id="password"]']:
|
||||
if await page.locator(sel).count() > 0:
|
||||
pass_sel = sel
|
||||
break
|
||||
if not submit_sel:
|
||||
for sel in ['button[type="submit"]', 'input[type="submit"]',
|
||||
'button:has-text("Anmelden")', 'button:has-text("Login")',
|
||||
'button:has-text("Sign in")', 'button:has-text("Einloggen")']:
|
||||
if await page.locator(sel).count() > 0:
|
||||
submit_sel = sel
|
||||
break
|
||||
|
||||
if not user_sel or not pass_sel:
|
||||
return False
|
||||
|
||||
await page.fill(user_sel, username)
|
||||
await page.fill(pass_sel, password)
|
||||
|
||||
if submit_sel:
|
||||
await page.click(submit_sel)
|
||||
else:
|
||||
await page.press(pass_sel, "Enter")
|
||||
|
||||
await page.wait_for_timeout(5000)
|
||||
|
||||
# Check if login succeeded (URL changed or login form disappeared)
|
||||
still_on_login = await page.locator('input[type="password"]').count() > 0
|
||||
return not still_on_login
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Login attempt failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
async def _check_patterns(page: Page, patterns: list[str], check_name: str) -> CheckResult:
|
||||
"""Search current page and navigation for patterns."""
|
||||
result = CheckResult()
|
||||
|
||||
# Check current page text
|
||||
for pattern in patterns:
|
||||
try:
|
||||
locator = page.get_by_text(pattern, exact=False)
|
||||
count = await locator.count()
|
||||
if count > 0:
|
||||
text = await locator.first.text_content()
|
||||
result.found = True
|
||||
result.text = (text or "").strip()[:100]
|
||||
return result
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Check links/buttons
|
||||
for pattern in patterns:
|
||||
try:
|
||||
for sel in [f'a:has-text("{pattern}")', f'button:has-text("{pattern}")',
|
||||
f'[href*="{pattern.replace(" ", "-")}"]']:
|
||||
locator = page.locator(sel)
|
||||
if await locator.count() > 0:
|
||||
result.found = True
|
||||
result.selector = sel
|
||||
result.text = pattern
|
||||
return result
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Check navigation menus (common locations for account management)
|
||||
for nav_sel in ['nav', '[role="navigation"]', '.sidebar', '.account-menu', '#account']:
|
||||
try:
|
||||
nav = page.locator(nav_sel)
|
||||
if await nav.count() > 0:
|
||||
nav_text = (await nav.first.text_content() or "").lower()
|
||||
for pattern in patterns:
|
||||
if pattern.lower() in nav_text:
|
||||
result.found = True
|
||||
result.text = f"In Navigation: {pattern}"
|
||||
return result
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
Banner Detector — identifies Consent Management Platforms and their buttons.
|
||||
|
||||
Supports 10+ CMPs with specific selectors + generic fallback.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from playwright.async_api import Page, Locator
|
||||
|
||||
|
||||
@dataclass
|
||||
class BannerInfo:
|
||||
detected: bool
|
||||
provider: str
|
||||
accept_selector: str
|
||||
reject_selector: str
|
||||
|
||||
|
||||
# CMP-specific selectors (ordered by market share)
|
||||
CMP_SELECTORS = [
|
||||
{
|
||||
"name": "Didomi",
|
||||
"detect": "#didomi-host, [class*='didomi']",
|
||||
"accept": "#didomi-notice-agree-button",
|
||||
"reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary",
|
||||
},
|
||||
{
|
||||
"name": "OneTrust",
|
||||
"detect": "#onetrust-banner-sdk, [class*='onetrust']",
|
||||
"accept": "#onetrust-accept-btn-handler",
|
||||
"reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler",
|
||||
},
|
||||
{
|
||||
"name": "Cookiebot",
|
||||
"detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']",
|
||||
"accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
||||
"reject": "#CybotCookiebotDialogBodyButtonDecline",
|
||||
},
|
||||
{
|
||||
"name": "Usercentrics",
|
||||
"detect": "#usercentrics-root, [data-testid='uc-banner']",
|
||||
"accept": "[data-testid='uc-accept-all-button']",
|
||||
"reject": "[data-testid='uc-deny-all-button']",
|
||||
},
|
||||
{
|
||||
"name": "CookieYes",
|
||||
"detect": ".cky-consent-container, [class*='cky-']",
|
||||
"accept": ".cky-btn-accept",
|
||||
"reject": ".cky-btn-reject, .cky-btn-customize",
|
||||
},
|
||||
{
|
||||
"name": "Quantcast",
|
||||
"detect": ".qc-cmp2-container, [class*='qc-cmp']",
|
||||
"accept": "[class*='qc-cmp2-summary-buttons'] button:first-child",
|
||||
"reject": "[class*='qc-cmp2-summary-buttons'] button:last-child",
|
||||
},
|
||||
{
|
||||
"name": "Borlabs",
|
||||
"detect": "#BorlabsCookieBox, [class*='BorlabsCookie']",
|
||||
"accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]",
|
||||
"reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]",
|
||||
},
|
||||
{
|
||||
"name": "Consentmanager",
|
||||
"detect": "#cmpbox, [class*='cmpbox']",
|
||||
"accept": ".cmpboxbtn.cmpboxbtnyes",
|
||||
"reject": ".cmpboxbtn.cmpboxbtnno",
|
||||
},
|
||||
{
|
||||
"name": "Klaro",
|
||||
"detect": ".klaro, [class*='klaro']",
|
||||
"accept": ".klaro .cm-btn-accept",
|
||||
"reject": ".klaro .cm-btn-decline",
|
||||
},
|
||||
{
|
||||
"name": "TarteAuCitron",
|
||||
"detect": "#tarteaucitronRoot, [class*='tarteaucitron']",
|
||||
"accept": "#tarteaucitronPersonalize2",
|
||||
"reject": "#tarteaucitronAllDenied2",
|
||||
},
|
||||
]
|
||||
|
||||
# Generic fallback patterns (text-based)
|
||||
GENERIC_ACCEPT_TEXTS = [
|
||||
"Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren",
|
||||
"Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen",
|
||||
"Einverstanden", "Ich stimme zu", "Ja, einverstanden",
|
||||
]
|
||||
|
||||
GENERIC_REJECT_TEXTS = [
|
||||
"Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen",
|
||||
"Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige",
|
||||
"Decline", "Nein", "Nicht einverstanden",
|
||||
]
|
||||
|
||||
|
||||
async def detect_banner(page: Page) -> BannerInfo:
|
||||
"""Detect which CMP is used and return button selectors."""
|
||||
# Try CMP-specific selectors first
|
||||
for cmp in CMP_SELECTORS:
|
||||
try:
|
||||
count = await page.locator(cmp["detect"]).count()
|
||||
if count > 0:
|
||||
return BannerInfo(
|
||||
detected=True,
|
||||
provider=cmp["name"],
|
||||
accept_selector=cmp["accept"],
|
||||
reject_selector=cmp["reject"],
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Generic fallback — search for buttons by text
|
||||
for text in GENERIC_ACCEPT_TEXTS:
|
||||
try:
|
||||
btn = page.get_by_text(text, exact=False)
|
||||
if await btn.count() > 0:
|
||||
accept = f'button:has-text("{text}")'
|
||||
# Try to find reject button nearby
|
||||
reject = ""
|
||||
for rtext in GENERIC_REJECT_TEXTS:
|
||||
rbtn = page.get_by_text(rtext, exact=False)
|
||||
if await rbtn.count() > 0:
|
||||
reject = f'button:has-text("{rtext}")'
|
||||
break
|
||||
return BannerInfo(
|
||||
detected=True,
|
||||
provider="Generic",
|
||||
accept_selector=accept,
|
||||
reject_selector=reject,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
|
||||
|
||||
|
||||
async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
|
||||
"""Try to click a consent button. Returns True if clicked successfully."""
|
||||
if not selector:
|
||||
return False
|
||||
try:
|
||||
locator = page.locator(selector).first
|
||||
await locator.wait_for(state="visible", timeout=timeout)
|
||||
await locator.click()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
@@ -0,0 +1,278 @@
|
||||
"""
|
||||
Category Tester — tests individual cookie consent categories.
|
||||
|
||||
Tests each category in isolation: only "Statistics" on, only "Marketing" on, etc.
|
||||
Detects miscategorization: e.g., Facebook Pixel loading when only Statistics is enabled.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from playwright.async_api import Page, BrowserContext
|
||||
|
||||
from services.banner_detector import BannerInfo, click_button
|
||||
from services.script_analyzer import find_tracking_services, Violation
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Which tracking service belongs to which consent category
|
||||
SERVICE_CATEGORY_MAP: dict[str, str] = {
|
||||
# Statistics / Analytics
|
||||
"Google Analytics": "statistics",
|
||||
"Matomo": "statistics",
|
||||
"Plausible Analytics": "statistics",
|
||||
"Hotjar": "statistics",
|
||||
"Microsoft Clarity": "statistics",
|
||||
"etracker": "statistics",
|
||||
"Heap Analytics": "statistics",
|
||||
"Amplitude": "statistics",
|
||||
"Mixpanel": "statistics",
|
||||
"PostHog": "statistics",
|
||||
"Mouseflow": "statistics",
|
||||
"Crazy Egg": "statistics",
|
||||
"Lucky Orange": "statistics",
|
||||
"FullStory": "statistics",
|
||||
# Marketing / Advertising
|
||||
"Meta/Facebook Pixel": "marketing",
|
||||
"Google Ads": "marketing",
|
||||
"Google Ads/DoubleClick": "marketing",
|
||||
"TikTok Pixel": "marketing",
|
||||
"LinkedIn Insight": "marketing",
|
||||
"Pinterest Tag": "marketing",
|
||||
"Criteo": "marketing",
|
||||
"Taboola": "marketing",
|
||||
"Outbrain": "marketing",
|
||||
"Amazon Ads": "marketing",
|
||||
"Bing/Microsoft Ads": "marketing",
|
||||
"Salesforce Pardot": "marketing",
|
||||
# Functional
|
||||
"Intercom": "functional",
|
||||
"Zendesk": "functional",
|
||||
"Tidio Chat": "functional",
|
||||
"Crisp Chat": "functional",
|
||||
"LiveChat": "functional",
|
||||
"Freshdesk/Freshchat": "functional",
|
||||
"HelpScout Beacon": "functional",
|
||||
}
|
||||
|
||||
CATEGORY_LABELS = {
|
||||
"statistics": "Statistik / Analytics",
|
||||
"marketing": "Marketing / Werbung",
|
||||
"functional": "Funktional / Komfort",
|
||||
"social_media": "Social Media",
|
||||
}
|
||||
|
||||
# CMP-specific category selectors
|
||||
CMP_CATEGORY_CONFIG: dict[str, dict] = {
|
||||
"Cookiebot": {
|
||||
"settings_button": "#CybotCookiebotDialogBodyButtonDetails",
|
||||
"save_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowallSelection",
|
||||
"categories": {
|
||||
"statistics": "#CybotCookiebotDialogBodyLevelButtonStatistics",
|
||||
"marketing": "#CybotCookiebotDialogBodyLevelButtonMarketing",
|
||||
"preferences": "#CybotCookiebotDialogBodyLevelButtonPreferences",
|
||||
},
|
||||
},
|
||||
"OneTrust": {
|
||||
"settings_button": "#onetrust-pc-btn-handler, .ot-sdk-show-settings",
|
||||
"save_button": ".save-preference-btn-handler, #onetrust-accept-btn-handler",
|
||||
"categories": {
|
||||
"statistics": ".ot-switch[data-ot-category='C0002'] input, #ot-group-id-C0002",
|
||||
"marketing": ".ot-switch[data-ot-category='C0004'] input, #ot-group-id-C0004",
|
||||
"functional": ".ot-switch[data-ot-category='C0003'] input, #ot-group-id-C0003",
|
||||
},
|
||||
},
|
||||
"Usercentrics": {
|
||||
"settings_button": "[data-testid='uc-more-information-button'], button:has-text('Mehr Informationen')",
|
||||
"save_button": "[data-testid='uc-save-button']",
|
||||
"categories": {
|
||||
"statistics": "[data-testid='uc-category-statistics'] input",
|
||||
"marketing": "[data-testid='uc-category-marketing'] input",
|
||||
"functional": "[data-testid='uc-category-functional'] input",
|
||||
},
|
||||
},
|
||||
"Didomi": {
|
||||
"settings_button": "#didomi-notice-learn-more-button, .didomi-learn-more-button",
|
||||
"save_button": ".didomi-components-button--primary:has-text('Auswahl speichern'), #didomi-notice-agree-button",
|
||||
"categories": {
|
||||
"statistics": "[data-purpose='analytics_purposes'] input, [data-purpose='measure'] input",
|
||||
"marketing": "[data-purpose='advertising_purposes'] input, [data-purpose='ads'] input",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Generic category keywords for fallback detection
|
||||
CATEGORY_KEYWORDS = {
|
||||
"statistics": ["statistik", "analytics", "analyse", "statistics", "messung", "reichweite"],
|
||||
"marketing": ["marketing", "werbung", "advertising", "targeting", "remarketing", "anzeigen"],
|
||||
"functional": ["funktional", "functional", "preferences", "praeferenz", "komfort", "einstellungen"],
|
||||
"social_media": ["social media", "soziale medien", "social", "teilen"],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class CategoryInfo:
|
||||
name: str
|
||||
label: str
|
||||
selector: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class CategoryTestResult:
|
||||
category: str
|
||||
category_label: str
|
||||
scripts_loaded: list[str] = field(default_factory=list)
|
||||
cookies_set: list[str] = field(default_factory=list)
|
||||
tracking_services: list[str] = field(default_factory=list)
|
||||
violations: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
async def detect_categories(page: Page, banner: BannerInfo) -> list[CategoryInfo]:
|
||||
"""Detect available cookie categories in the CMP."""
|
||||
categories = []
|
||||
provider = banner.provider
|
||||
|
||||
# CMP-specific detection
|
||||
config = CMP_CATEGORY_CONFIG.get(provider)
|
||||
if config:
|
||||
# Open settings panel first
|
||||
if config.get("settings_button"):
|
||||
await click_button(page, config["settings_button"], timeout=3000)
|
||||
await page.wait_for_timeout(1000)
|
||||
|
||||
for cat_name, selector in config.get("categories", {}).items():
|
||||
try:
|
||||
if await page.locator(selector.split(",")[0].strip()).count() > 0:
|
||||
categories.append(CategoryInfo(
|
||||
name=cat_name,
|
||||
label=CATEGORY_LABELS.get(cat_name, cat_name),
|
||||
selector=selector,
|
||||
))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Generic fallback: search for toggle/checkbox elements with category keywords
|
||||
if not categories:
|
||||
try:
|
||||
toggles = await page.evaluate("""
|
||||
() => {
|
||||
const elements = document.querySelectorAll(
|
||||
'input[type="checkbox"], [role="switch"], [class*="toggle"], [class*="switch"]'
|
||||
);
|
||||
return [...elements].map(el => ({
|
||||
text: (el.closest('label')?.textContent || el.getAttribute('aria-label') || '').trim(),
|
||||
id: el.id || '',
|
||||
selector: el.id ? '#' + el.id : '',
|
||||
})).filter(e => e.text.length > 0);
|
||||
}
|
||||
""")
|
||||
|
||||
for toggle in (toggles or []):
|
||||
text_lower = toggle["text"].lower()
|
||||
for cat_name, keywords in CATEGORY_KEYWORDS.items():
|
||||
if any(kw in text_lower for kw in keywords):
|
||||
sel = toggle["selector"] or f'[aria-label*="{toggle["text"][:20]}"]'
|
||||
categories.append(CategoryInfo(
|
||||
name=cat_name,
|
||||
label=toggle["text"][:50],
|
||||
selector=sel,
|
||||
))
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning("Generic category detection failed: %s", e)
|
||||
|
||||
logger.info("Detected %d categories for %s", len(categories), provider)
|
||||
return categories
|
||||
|
||||
|
||||
async def test_single_category(
|
||||
context: BrowserContext,
|
||||
url: str,
|
||||
category: CategoryInfo,
|
||||
banner: BannerInfo,
|
||||
wait_ms: int = 5000,
|
||||
) -> CategoryTestResult:
|
||||
"""Test a single category in isolation: enable only this one, disable others."""
|
||||
result = CategoryTestResult(
|
||||
category=category.name,
|
||||
category_label=category.label,
|
||||
)
|
||||
|
||||
try:
|
||||
page = await context.new_page()
|
||||
scripts: list[str] = []
|
||||
page.on("request", lambda req: _collect(req, scripts))
|
||||
|
||||
await page.goto(url, wait_until="networkidle", timeout=20000)
|
||||
await page.wait_for_timeout(2000)
|
||||
|
||||
config = CMP_CATEGORY_CONFIG.get(banner.provider)
|
||||
|
||||
if config:
|
||||
# Open settings
|
||||
if config.get("settings_button"):
|
||||
await click_button(page, config["settings_button"], timeout=3000)
|
||||
await page.wait_for_timeout(1000)
|
||||
|
||||
# Disable ALL categories first
|
||||
for cat_sel in config.get("categories", {}).values():
|
||||
try:
|
||||
el = page.locator(cat_sel.split(",")[0].strip()).first
|
||||
if await el.is_checked():
|
||||
await el.click()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Enable ONLY the target category
|
||||
try:
|
||||
el = page.locator(category.selector.split(",")[0].strip()).first
|
||||
if not await el.is_checked():
|
||||
await el.click()
|
||||
except Exception:
|
||||
logger.warning("Could not toggle category %s", category.name)
|
||||
|
||||
# Save selection
|
||||
if config.get("save_button"):
|
||||
await click_button(page, config["save_button"], timeout=3000)
|
||||
|
||||
await page.wait_for_timeout(wait_ms)
|
||||
|
||||
# Collect results
|
||||
result.scripts_loaded = _dedup_scripts(scripts)
|
||||
result.cookies_set = [c.get("name", "") for c in await context.cookies()]
|
||||
result.tracking_services = find_tracking_services(result.scripts_loaded)
|
||||
|
||||
# Find violations: services that don't belong to this category
|
||||
for service in result.tracking_services:
|
||||
expected_cat = SERVICE_CATEGORY_MAP.get(service)
|
||||
if expected_cat and expected_cat != category.name:
|
||||
result.violations.append({
|
||||
"service": service,
|
||||
"severity": "HIGH",
|
||||
"text": f"{service} laedt bei '{category.label}' — gehoert aber zu '{CATEGORY_LABELS.get(expected_cat, expected_cat)}'",
|
||||
"expected_category": expected_cat,
|
||||
"actual_category": category.name,
|
||||
})
|
||||
|
||||
await page.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Category test failed for %s: %s", category.name, e)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _collect(request, scripts: list[str]):
|
||||
if request.resource_type in ("script", "image", "xhr", "fetch"):
|
||||
scripts.append(request.url)
|
||||
|
||||
|
||||
def _dedup_scripts(scripts: list[str]) -> list[str]:
|
||||
seen = set()
|
||||
result = []
|
||||
for url in scripts:
|
||||
domain = url.split("/")[2] if len(url.split("/")) > 2 else url
|
||||
if domain not in seen:
|
||||
seen.add(domain)
|
||||
result.append(url)
|
||||
return result[:30]
|
||||
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
Script Analyzer — classifies detected scripts and cookies against known services.
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
SERVICE_PATTERNS: dict[str, dict] = {
|
||||
r"google.?analytics|gtag|UA-\d|G-\w{5}": {
|
||||
"name": "Google Analytics", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||
},
|
||||
r"googletagmanager|gtm\.js": {
|
||||
"name": "Google Tag Manager", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG",
|
||||
},
|
||||
r"facebook\.net|fbevents|fbq": {
|
||||
"name": "Meta/Facebook Pixel", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||
},
|
||||
r"hotjar\.com|_hjSettings": {
|
||||
"name": "Hotjar", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG (Session Recording)",
|
||||
},
|
||||
r"clarity\.ms": {
|
||||
"name": "Microsoft Clarity", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG (Session Replay)",
|
||||
},
|
||||
r"tiktok\.com/i18n|analytics\.tiktok": {
|
||||
"name": "TikTok Pixel", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG, Drittlandtransfer China",
|
||||
},
|
||||
r"linkedin\.com/insight|snap\.licdn": {
|
||||
"name": "LinkedIn Insight", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||
},
|
||||
r"pinterest\.com/ct|pinimg\.com/ct": {
|
||||
"name": "Pinterest Tag", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG",
|
||||
},
|
||||
r"criteo\.com|criteo\.net": {
|
||||
"name": "Criteo", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG",
|
||||
},
|
||||
r"doubleclick\.net|googlesyndication": {
|
||||
"name": "Google Ads/DoubleClick", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||
},
|
||||
r"fonts\.googleapis\.com|fonts\.gstatic": {
|
||||
"name": "Google Fonts", "requires_consent": True,
|
||||
"legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
|
||||
},
|
||||
r"recaptcha|grecaptcha": {
|
||||
"name": "Google reCAPTCHA", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG",
|
||||
},
|
||||
r"youtube\.com/embed|ytimg": {
|
||||
"name": "YouTube", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||
},
|
||||
r"maps\.googleapis|maps\.google": {
|
||||
"name": "Google Maps", "requires_consent": True,
|
||||
"legal_ref": "§25 TDDDG",
|
||||
},
|
||||
r"intercom\.io|intercomcdn": {
|
||||
"name": "Intercom", "requires_consent": True,
|
||||
"legal_ref": "Art. 44-49 DSGVO",
|
||||
},
|
||||
r"zendesk\.com|zdassets": {
|
||||
"name": "Zendesk", "requires_consent": True,
|
||||
"legal_ref": "Art. 44-49 DSGVO",
|
||||
},
|
||||
r"sentry\.io|sentry-cdn": {
|
||||
"name": "Sentry", "requires_consent": False,
|
||||
"legal_ref": "Berechtigtes Interesse (Error Tracking)",
|
||||
},
|
||||
r"cdn\.cloudflare\.com": {
|
||||
"name": "Cloudflare CDN", "requires_consent": False,
|
||||
"legal_ref": "Berechtigtes Interesse (CDN)",
|
||||
},
|
||||
r"didomi|cookiebot|onetrust|usercentrics|consentmanager": {
|
||||
"name": "Consent Management", "requires_consent": False,
|
||||
"legal_ref": "Notwendig (CMP)",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Violation:
|
||||
service: str
|
||||
severity: str # "HIGH", "CRITICAL"
|
||||
text: str
|
||||
legal_ref: str
|
||||
|
||||
|
||||
def classify_scripts(scripts: list[str]) -> list[str]:
|
||||
"""Classify script URLs into known service names."""
|
||||
services = set()
|
||||
for script in scripts:
|
||||
for pattern, meta in SERVICE_PATTERNS.items():
|
||||
if re.search(pattern, script, re.IGNORECASE):
|
||||
services.add(meta["name"])
|
||||
break
|
||||
return sorted(services)
|
||||
|
||||
|
||||
def find_tracking_services(scripts: list[str]) -> list[str]:
|
||||
"""Find services that require consent."""
|
||||
tracking = []
|
||||
for script in scripts:
|
||||
for pattern, meta in SERVICE_PATTERNS.items():
|
||||
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
|
||||
tracking.append(meta["name"])
|
||||
break
|
||||
return sorted(set(tracking))
|
||||
|
||||
|
||||
def find_violations_before_consent(scripts: list[str]) -> list[Violation]:
|
||||
"""Find tracking scripts that load without consent (HIGH)."""
|
||||
violations = []
|
||||
seen = set()
|
||||
for script in scripts:
|
||||
for pattern, meta in SERVICE_PATTERNS.items():
|
||||
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
|
||||
name = meta["name"]
|
||||
if name not in seen:
|
||||
seen.add(name)
|
||||
violations.append(Violation(
|
||||
service=name, severity="HIGH",
|
||||
text=f"{name} laedt OHNE vorherige Einwilligung",
|
||||
legal_ref=meta["legal_ref"],
|
||||
))
|
||||
break
|
||||
return violations
|
||||
|
||||
|
||||
def find_violations_after_reject(
|
||||
before_scripts: list[str], after_scripts: list[str],
|
||||
) -> list[Violation]:
|
||||
"""Find tracking scripts that still load after rejection (CRITICAL)."""
|
||||
violations = []
|
||||
after_tracking = find_tracking_services(after_scripts)
|
||||
before_tracking = find_tracking_services(before_scripts)
|
||||
|
||||
for service in after_tracking:
|
||||
if service in before_tracking:
|
||||
# Was already loading before AND still loads after reject = CRITICAL
|
||||
for pattern, meta in SERVICE_PATTERNS.items():
|
||||
if meta["name"] == service:
|
||||
violations.append(Violation(
|
||||
service=service, severity="CRITICAL",
|
||||
text=f"{service} laedt TROTZ Ablehnung — moegliches Dark Pattern",
|
||||
legal_ref=meta["legal_ref"] + ", Art. 5(3) ePrivacy",
|
||||
))
|
||||
break
|
||||
|
||||
return violations
|
||||
Reference in New Issue
Block a user