fix: restore all missing consent-tester service modules

banner_detector.py, script_analyzer.py, category_tester.py, authenticated_scanner.py
were only on the feature branch — needed for consent-tester to start.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-05 00:14:26 +02:00
parent 3fade26d89
commit f3e44cf59f
4 changed files with 814 additions and 0 deletions
@@ -0,0 +1,230 @@
"""
Authenticated Scanner — tests post-login functionality.
Checks §312k BGB (cancellation), Art. 17 (deletion), Art. 20 (export),
Art. 7(3) (consent withdrawal), Art. 15 (data access).
Credentials are NEVER stored, logged, or transmitted beyond the browser context.
"""
import logging
from dataclasses import dataclass, field
from playwright.async_api import async_playwright, Page
logger = logging.getLogger(__name__)
USER_AGENT = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
@dataclass
class CheckResult:
found: bool = False
selector: str = ""
text: str = ""
clicks_needed: int = 0
screenshot: bytes = b""
@dataclass
class AuthTestResult:
authenticated: bool = False
login_error: str = ""
cancel_subscription: CheckResult = field(default_factory=CheckResult)
delete_account: CheckResult = field(default_factory=CheckResult)
export_data: CheckResult = field(default_factory=CheckResult)
consent_settings: CheckResult = field(default_factory=CheckResult)
profile_visible: CheckResult = field(default_factory=CheckResult)
# Search patterns for each check (DE + EN)
CANCEL_PATTERNS = [
"kündigen", "kuendigen", "vertrag beenden", "abo beenden",
"mitgliedschaft kündigen", "cancel subscription", "unsubscribe",
"cancel membership", "vertrag kündigen",
]
DELETE_PATTERNS = [
"konto löschen", "konto loeschen", "account löschen", "delete account",
"account deaktivieren", "profil löschen", "remove account",
]
EXPORT_PATTERNS = [
"daten exportieren", "daten herunterladen", "export data", "download data",
"meine daten", "datenauskunft", "data download", "daten anfordern",
]
CONSENT_PATTERNS = [
"einwilligung", "einstellungen", "datenschutz-einstellungen",
"consent", "privacy settings", "cookie-einstellungen",
"werbeeinstellungen", "marketing preferences",
]
PROFILE_PATTERNS = [
"profil", "mein konto", "kontodaten", "persönliche daten",
"profile", "my account", "account settings", "personal data",
]
async def run_authenticated_test(
url: str,
username: str,
password: str,
username_selector: str = "",
password_selector: str = "",
submit_selector: str = "",
) -> AuthTestResult:
"""Run authenticated area test. Credentials are destroyed after test."""
result = AuthTestResult()
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=["--no-sandbox", "--disable-dev-shm-usage"],
)
context = await browser.new_context(user_agent=USER_AGENT)
page = await context.new_page()
try:
# Step 1: Login
await page.goto(url, wait_until="networkidle", timeout=30000)
await page.wait_for_timeout(2000)
login_ok = await _try_login(
page, username, password,
username_selector, password_selector, submit_selector,
)
if not login_ok:
result.login_error = "Login fehlgeschlagen — Formular nicht gefunden oder Credentials falsch"
await context.close()
await browser.close()
return result
result.authenticated = True
await page.wait_for_timeout(3000)
# Step 2: Check cancellation (§312k BGB)
result.cancel_subscription = await _check_patterns(page, CANCEL_PATTERNS, "cancel")
logger.info("Cancel check: found=%s", result.cancel_subscription.found)
# Step 3: Check delete account (Art. 17 DSGVO)
result.delete_account = await _check_patterns(page, DELETE_PATTERNS, "delete")
# Step 4: Check data export (Art. 20 DSGVO)
result.export_data = await _check_patterns(page, EXPORT_PATTERNS, "export")
# Step 5: Check consent settings (Art. 7(3) DSGVO)
result.consent_settings = await _check_patterns(page, CONSENT_PATTERNS, "consent")
# Step 6: Check profile visibility (Art. 15 DSGVO)
result.profile_visible = await _check_patterns(page, PROFILE_PATTERNS, "profile")
except Exception as e:
logger.error("Authenticated test failed: %s", e)
result.login_error = str(e)
finally:
# CRITICAL: Destroy context — wipes all credentials, cookies, session
await context.close()
await browser.close()
return result
async def _try_login(
page: Page, username: str, password: str,
user_sel: str, pass_sel: str, submit_sel: str,
) -> bool:
"""Attempt to fill and submit login form."""
try:
# Auto-detect selectors if not provided
if not user_sel:
for sel in ['input[type="email"]', 'input[name="email"]', 'input[name="username"]',
'input[name="login"]', 'input[id="email"]', 'input[id="username"]']:
if await page.locator(sel).count() > 0:
user_sel = sel
break
if not pass_sel:
for sel in ['input[type="password"]', 'input[name="password"]', 'input[id="password"]']:
if await page.locator(sel).count() > 0:
pass_sel = sel
break
if not submit_sel:
for sel in ['button[type="submit"]', 'input[type="submit"]',
'button:has-text("Anmelden")', 'button:has-text("Login")',
'button:has-text("Sign in")', 'button:has-text("Einloggen")']:
if await page.locator(sel).count() > 0:
submit_sel = sel
break
if not user_sel or not pass_sel:
return False
await page.fill(user_sel, username)
await page.fill(pass_sel, password)
if submit_sel:
await page.click(submit_sel)
else:
await page.press(pass_sel, "Enter")
await page.wait_for_timeout(5000)
# Check if login succeeded (URL changed or login form disappeared)
still_on_login = await page.locator('input[type="password"]').count() > 0
return not still_on_login
except Exception as e:
logger.warning("Login attempt failed: %s", e)
return False
async def _check_patterns(page: Page, patterns: list[str], check_name: str) -> CheckResult:
"""Search current page and navigation for patterns."""
result = CheckResult()
# Check current page text
for pattern in patterns:
try:
locator = page.get_by_text(pattern, exact=False)
count = await locator.count()
if count > 0:
text = await locator.first.text_content()
result.found = True
result.text = (text or "").strip()[:100]
return result
except Exception:
continue
# Check links/buttons
for pattern in patterns:
try:
for sel in [f'a:has-text("{pattern}")', f'button:has-text("{pattern}")',
f'[href*="{pattern.replace(" ", "-")}"]']:
locator = page.locator(sel)
if await locator.count() > 0:
result.found = True
result.selector = sel
result.text = pattern
return result
except Exception:
continue
# Check navigation menus (common locations for account management)
for nav_sel in ['nav', '[role="navigation"]', '.sidebar', '.account-menu', '#account']:
try:
nav = page.locator(nav_sel)
if await nav.count() > 0:
nav_text = (await nav.first.text_content() or "").lower()
for pattern in patterns:
if pattern.lower() in nav_text:
result.found = True
result.text = f"In Navigation: {pattern}"
return result
except Exception:
continue
return result
+149
View File
@@ -0,0 +1,149 @@
"""
Banner Detector — identifies Consent Management Platforms and their buttons.
Supports 10+ CMPs with specific selectors + generic fallback.
"""
from dataclasses import dataclass
from playwright.async_api import Page, Locator
@dataclass
class BannerInfo:
detected: bool
provider: str
accept_selector: str
reject_selector: str
# CMP-specific selectors (ordered by market share)
CMP_SELECTORS = [
{
"name": "Didomi",
"detect": "#didomi-host, [class*='didomi']",
"accept": "#didomi-notice-agree-button",
"reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary",
},
{
"name": "OneTrust",
"detect": "#onetrust-banner-sdk, [class*='onetrust']",
"accept": "#onetrust-accept-btn-handler",
"reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler",
},
{
"name": "Cookiebot",
"detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']",
"accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
"reject": "#CybotCookiebotDialogBodyButtonDecline",
},
{
"name": "Usercentrics",
"detect": "#usercentrics-root, [data-testid='uc-banner']",
"accept": "[data-testid='uc-accept-all-button']",
"reject": "[data-testid='uc-deny-all-button']",
},
{
"name": "CookieYes",
"detect": ".cky-consent-container, [class*='cky-']",
"accept": ".cky-btn-accept",
"reject": ".cky-btn-reject, .cky-btn-customize",
},
{
"name": "Quantcast",
"detect": ".qc-cmp2-container, [class*='qc-cmp']",
"accept": "[class*='qc-cmp2-summary-buttons'] button:first-child",
"reject": "[class*='qc-cmp2-summary-buttons'] button:last-child",
},
{
"name": "Borlabs",
"detect": "#BorlabsCookieBox, [class*='BorlabsCookie']",
"accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]",
"reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]",
},
{
"name": "Consentmanager",
"detect": "#cmpbox, [class*='cmpbox']",
"accept": ".cmpboxbtn.cmpboxbtnyes",
"reject": ".cmpboxbtn.cmpboxbtnno",
},
{
"name": "Klaro",
"detect": ".klaro, [class*='klaro']",
"accept": ".klaro .cm-btn-accept",
"reject": ".klaro .cm-btn-decline",
},
{
"name": "TarteAuCitron",
"detect": "#tarteaucitronRoot, [class*='tarteaucitron']",
"accept": "#tarteaucitronPersonalize2",
"reject": "#tarteaucitronAllDenied2",
},
]
# Generic fallback patterns (text-based)
GENERIC_ACCEPT_TEXTS = [
"Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren",
"Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen",
"Einverstanden", "Ich stimme zu", "Ja, einverstanden",
]
GENERIC_REJECT_TEXTS = [
"Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen",
"Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige",
"Decline", "Nein", "Nicht einverstanden",
]
async def detect_banner(page: Page) -> BannerInfo:
"""Detect which CMP is used and return button selectors."""
# Try CMP-specific selectors first
for cmp in CMP_SELECTORS:
try:
count = await page.locator(cmp["detect"]).count()
if count > 0:
return BannerInfo(
detected=True,
provider=cmp["name"],
accept_selector=cmp["accept"],
reject_selector=cmp["reject"],
)
except Exception:
continue
# Generic fallback — search for buttons by text
for text in GENERIC_ACCEPT_TEXTS:
try:
btn = page.get_by_text(text, exact=False)
if await btn.count() > 0:
accept = f'button:has-text("{text}")'
# Try to find reject button nearby
reject = ""
for rtext in GENERIC_REJECT_TEXTS:
rbtn = page.get_by_text(rtext, exact=False)
if await rbtn.count() > 0:
reject = f'button:has-text("{rtext}")'
break
return BannerInfo(
detected=True,
provider="Generic",
accept_selector=accept,
reject_selector=reject,
)
except Exception:
continue
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
"""Try to click a consent button. Returns True if clicked successfully."""
if not selector:
return False
try:
locator = page.locator(selector).first
await locator.wait_for(state="visible", timeout=timeout)
await locator.click()
return True
except Exception:
return False
+278
View File
@@ -0,0 +1,278 @@
"""
Category Tester — tests individual cookie consent categories.
Tests each category in isolation: only "Statistics" on, only "Marketing" on, etc.
Detects miscategorization: e.g., Facebook Pixel loading when only Statistics is enabled.
"""
import logging
from dataclasses import dataclass, field
from playwright.async_api import Page, BrowserContext
from services.banner_detector import BannerInfo, click_button
from services.script_analyzer import find_tracking_services, Violation
logger = logging.getLogger(__name__)
# Which tracking service belongs to which consent category
SERVICE_CATEGORY_MAP: dict[str, str] = {
# Statistics / Analytics
"Google Analytics": "statistics",
"Matomo": "statistics",
"Plausible Analytics": "statistics",
"Hotjar": "statistics",
"Microsoft Clarity": "statistics",
"etracker": "statistics",
"Heap Analytics": "statistics",
"Amplitude": "statistics",
"Mixpanel": "statistics",
"PostHog": "statistics",
"Mouseflow": "statistics",
"Crazy Egg": "statistics",
"Lucky Orange": "statistics",
"FullStory": "statistics",
# Marketing / Advertising
"Meta/Facebook Pixel": "marketing",
"Google Ads": "marketing",
"Google Ads/DoubleClick": "marketing",
"TikTok Pixel": "marketing",
"LinkedIn Insight": "marketing",
"Pinterest Tag": "marketing",
"Criteo": "marketing",
"Taboola": "marketing",
"Outbrain": "marketing",
"Amazon Ads": "marketing",
"Bing/Microsoft Ads": "marketing",
"Salesforce Pardot": "marketing",
# Functional
"Intercom": "functional",
"Zendesk": "functional",
"Tidio Chat": "functional",
"Crisp Chat": "functional",
"LiveChat": "functional",
"Freshdesk/Freshchat": "functional",
"HelpScout Beacon": "functional",
}
CATEGORY_LABELS = {
"statistics": "Statistik / Analytics",
"marketing": "Marketing / Werbung",
"functional": "Funktional / Komfort",
"social_media": "Social Media",
}
# CMP-specific category selectors
CMP_CATEGORY_CONFIG: dict[str, dict] = {
"Cookiebot": {
"settings_button": "#CybotCookiebotDialogBodyButtonDetails",
"save_button": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowallSelection",
"categories": {
"statistics": "#CybotCookiebotDialogBodyLevelButtonStatistics",
"marketing": "#CybotCookiebotDialogBodyLevelButtonMarketing",
"preferences": "#CybotCookiebotDialogBodyLevelButtonPreferences",
},
},
"OneTrust": {
"settings_button": "#onetrust-pc-btn-handler, .ot-sdk-show-settings",
"save_button": ".save-preference-btn-handler, #onetrust-accept-btn-handler",
"categories": {
"statistics": ".ot-switch[data-ot-category='C0002'] input, #ot-group-id-C0002",
"marketing": ".ot-switch[data-ot-category='C0004'] input, #ot-group-id-C0004",
"functional": ".ot-switch[data-ot-category='C0003'] input, #ot-group-id-C0003",
},
},
"Usercentrics": {
"settings_button": "[data-testid='uc-more-information-button'], button:has-text('Mehr Informationen')",
"save_button": "[data-testid='uc-save-button']",
"categories": {
"statistics": "[data-testid='uc-category-statistics'] input",
"marketing": "[data-testid='uc-category-marketing'] input",
"functional": "[data-testid='uc-category-functional'] input",
},
},
"Didomi": {
"settings_button": "#didomi-notice-learn-more-button, .didomi-learn-more-button",
"save_button": ".didomi-components-button--primary:has-text('Auswahl speichern'), #didomi-notice-agree-button",
"categories": {
"statistics": "[data-purpose='analytics_purposes'] input, [data-purpose='measure'] input",
"marketing": "[data-purpose='advertising_purposes'] input, [data-purpose='ads'] input",
},
},
}
# Generic category keywords for fallback detection
CATEGORY_KEYWORDS = {
"statistics": ["statistik", "analytics", "analyse", "statistics", "messung", "reichweite"],
"marketing": ["marketing", "werbung", "advertising", "targeting", "remarketing", "anzeigen"],
"functional": ["funktional", "functional", "preferences", "praeferenz", "komfort", "einstellungen"],
"social_media": ["social media", "soziale medien", "social", "teilen"],
}
@dataclass
class CategoryInfo:
name: str
label: str
selector: str
@dataclass
class CategoryTestResult:
category: str
category_label: str
scripts_loaded: list[str] = field(default_factory=list)
cookies_set: list[str] = field(default_factory=list)
tracking_services: list[str] = field(default_factory=list)
violations: list[dict] = field(default_factory=list)
async def detect_categories(page: Page, banner: BannerInfo) -> list[CategoryInfo]:
"""Detect available cookie categories in the CMP."""
categories = []
provider = banner.provider
# CMP-specific detection
config = CMP_CATEGORY_CONFIG.get(provider)
if config:
# Open settings panel first
if config.get("settings_button"):
await click_button(page, config["settings_button"], timeout=3000)
await page.wait_for_timeout(1000)
for cat_name, selector in config.get("categories", {}).items():
try:
if await page.locator(selector.split(",")[0].strip()).count() > 0:
categories.append(CategoryInfo(
name=cat_name,
label=CATEGORY_LABELS.get(cat_name, cat_name),
selector=selector,
))
except Exception:
continue
# Generic fallback: search for toggle/checkbox elements with category keywords
if not categories:
try:
toggles = await page.evaluate("""
() => {
const elements = document.querySelectorAll(
'input[type="checkbox"], [role="switch"], [class*="toggle"], [class*="switch"]'
);
return [...elements].map(el => ({
text: (el.closest('label')?.textContent || el.getAttribute('aria-label') || '').trim(),
id: el.id || '',
selector: el.id ? '#' + el.id : '',
})).filter(e => e.text.length > 0);
}
""")
for toggle in (toggles or []):
text_lower = toggle["text"].lower()
for cat_name, keywords in CATEGORY_KEYWORDS.items():
if any(kw in text_lower for kw in keywords):
sel = toggle["selector"] or f'[aria-label*="{toggle["text"][:20]}"]'
categories.append(CategoryInfo(
name=cat_name,
label=toggle["text"][:50],
selector=sel,
))
break
except Exception as e:
logger.warning("Generic category detection failed: %s", e)
logger.info("Detected %d categories for %s", len(categories), provider)
return categories
async def test_single_category(
context: BrowserContext,
url: str,
category: CategoryInfo,
banner: BannerInfo,
wait_ms: int = 5000,
) -> CategoryTestResult:
"""Test a single category in isolation: enable only this one, disable others."""
result = CategoryTestResult(
category=category.name,
category_label=category.label,
)
try:
page = await context.new_page()
scripts: list[str] = []
page.on("request", lambda req: _collect(req, scripts))
await page.goto(url, wait_until="networkidle", timeout=20000)
await page.wait_for_timeout(2000)
config = CMP_CATEGORY_CONFIG.get(banner.provider)
if config:
# Open settings
if config.get("settings_button"):
await click_button(page, config["settings_button"], timeout=3000)
await page.wait_for_timeout(1000)
# Disable ALL categories first
for cat_sel in config.get("categories", {}).values():
try:
el = page.locator(cat_sel.split(",")[0].strip()).first
if await el.is_checked():
await el.click()
except Exception:
continue
# Enable ONLY the target category
try:
el = page.locator(category.selector.split(",")[0].strip()).first
if not await el.is_checked():
await el.click()
except Exception:
logger.warning("Could not toggle category %s", category.name)
# Save selection
if config.get("save_button"):
await click_button(page, config["save_button"], timeout=3000)
await page.wait_for_timeout(wait_ms)
# Collect results
result.scripts_loaded = _dedup_scripts(scripts)
result.cookies_set = [c.get("name", "") for c in await context.cookies()]
result.tracking_services = find_tracking_services(result.scripts_loaded)
# Find violations: services that don't belong to this category
for service in result.tracking_services:
expected_cat = SERVICE_CATEGORY_MAP.get(service)
if expected_cat and expected_cat != category.name:
result.violations.append({
"service": service,
"severity": "HIGH",
"text": f"{service} laedt bei '{category.label}' — gehoert aber zu '{CATEGORY_LABELS.get(expected_cat, expected_cat)}'",
"expected_category": expected_cat,
"actual_category": category.name,
})
await page.close()
except Exception as e:
logger.error("Category test failed for %s: %s", category.name, e)
return result
def _collect(request, scripts: list[str]):
if request.resource_type in ("script", "image", "xhr", "fetch"):
scripts.append(request.url)
def _dedup_scripts(scripts: list[str]) -> list[str]:
seen = set()
result = []
for url in scripts:
domain = url.split("/")[2] if len(url.split("/")) > 2 else url
if domain not in seen:
seen.add(domain)
result.append(url)
return result[:30]
+157
View File
@@ -0,0 +1,157 @@
"""
Script Analyzer — classifies detected scripts and cookies against known services.
"""
import re
from dataclasses import dataclass
SERVICE_PATTERNS: dict[str, dict] = {
r"google.?analytics|gtag|UA-\d|G-\w{5}": {
"name": "Google Analytics", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"googletagmanager|gtm\.js": {
"name": "Google Tag Manager", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"facebook\.net|fbevents|fbq": {
"name": "Meta/Facebook Pixel", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"hotjar\.com|_hjSettings": {
"name": "Hotjar", "requires_consent": True,
"legal_ref": "§25 TDDDG (Session Recording)",
},
r"clarity\.ms": {
"name": "Microsoft Clarity", "requires_consent": True,
"legal_ref": "§25 TDDDG (Session Replay)",
},
r"tiktok\.com/i18n|analytics\.tiktok": {
"name": "TikTok Pixel", "requires_consent": True,
"legal_ref": "§25 TDDDG, Drittlandtransfer China",
},
r"linkedin\.com/insight|snap\.licdn": {
"name": "LinkedIn Insight", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"pinterest\.com/ct|pinimg\.com/ct": {
"name": "Pinterest Tag", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"criteo\.com|criteo\.net": {
"name": "Criteo", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"doubleclick\.net|googlesyndication": {
"name": "Google Ads/DoubleClick", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"fonts\.googleapis\.com|fonts\.gstatic": {
"name": "Google Fonts", "requires_consent": True,
"legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
},
r"recaptcha|grecaptcha": {
"name": "Google reCAPTCHA", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"youtube\.com/embed|ytimg": {
"name": "YouTube", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"maps\.googleapis|maps\.google": {
"name": "Google Maps", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"intercom\.io|intercomcdn": {
"name": "Intercom", "requires_consent": True,
"legal_ref": "Art. 44-49 DSGVO",
},
r"zendesk\.com|zdassets": {
"name": "Zendesk", "requires_consent": True,
"legal_ref": "Art. 44-49 DSGVO",
},
r"sentry\.io|sentry-cdn": {
"name": "Sentry", "requires_consent": False,
"legal_ref": "Berechtigtes Interesse (Error Tracking)",
},
r"cdn\.cloudflare\.com": {
"name": "Cloudflare CDN", "requires_consent": False,
"legal_ref": "Berechtigtes Interesse (CDN)",
},
r"didomi|cookiebot|onetrust|usercentrics|consentmanager": {
"name": "Consent Management", "requires_consent": False,
"legal_ref": "Notwendig (CMP)",
},
}
@dataclass
class Violation:
service: str
severity: str # "HIGH", "CRITICAL"
text: str
legal_ref: str
def classify_scripts(scripts: list[str]) -> list[str]:
"""Classify script URLs into known service names."""
services = set()
for script in scripts:
for pattern, meta in SERVICE_PATTERNS.items():
if re.search(pattern, script, re.IGNORECASE):
services.add(meta["name"])
break
return sorted(services)
def find_tracking_services(scripts: list[str]) -> list[str]:
"""Find services that require consent."""
tracking = []
for script in scripts:
for pattern, meta in SERVICE_PATTERNS.items():
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
tracking.append(meta["name"])
break
return sorted(set(tracking))
def find_violations_before_consent(scripts: list[str]) -> list[Violation]:
"""Find tracking scripts that load without consent (HIGH)."""
violations = []
seen = set()
for script in scripts:
for pattern, meta in SERVICE_PATTERNS.items():
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
name = meta["name"]
if name not in seen:
seen.add(name)
violations.append(Violation(
service=name, severity="HIGH",
text=f"{name} laedt OHNE vorherige Einwilligung",
legal_ref=meta["legal_ref"],
))
break
return violations
def find_violations_after_reject(
before_scripts: list[str], after_scripts: list[str],
) -> list[Violation]:
"""Find tracking scripts that still load after rejection (CRITICAL)."""
violations = []
after_tracking = find_tracking_services(after_scripts)
before_tracking = find_tracking_services(before_scripts)
for service in after_tracking:
if service in before_tracking:
# Was already loading before AND still loads after reject = CRITICAL
for pattern, meta in SERVICE_PATTERNS.items():
if meta["name"] == service:
violations.append(Violation(
service=service, severity="CRITICAL",
text=f"{service} laedt TROTZ Ablehnung — moegliches Dark Pattern",
legal_ref=meta["legal_ref"] + ", Art. 5(3) ePrivacy",
))
break
return violations