""" Script Analyzer — classifies detected scripts and cookies against known services. """ import re from dataclasses import dataclass SERVICE_PATTERNS: dict[str, dict] = { r"google.?analytics|gtag|UA-\d|G-\w{5}": { "name": "Google Analytics", "requires_consent": True, "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", }, r"googletagmanager|gtm\.js": { "name": "Google Tag Manager", "requires_consent": True, "legal_ref": "§25 TDDDG", }, r"facebook\.net|fbevents|fbq": { "name": "Meta/Facebook Pixel", "requires_consent": True, "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", }, r"hotjar\.com|_hjSettings": { "name": "Hotjar", "requires_consent": True, "legal_ref": "§25 TDDDG (Session Recording)", }, r"clarity\.ms": { "name": "Microsoft Clarity", "requires_consent": True, "legal_ref": "§25 TDDDG (Session Replay)", }, r"tiktok\.com/i18n|analytics\.tiktok": { "name": "TikTok Pixel", "requires_consent": True, "legal_ref": "§25 TDDDG, Drittlandtransfer China", }, r"linkedin\.com/insight|snap\.licdn": { "name": "LinkedIn Insight", "requires_consent": True, "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", }, r"pinterest\.com/ct|pinimg\.com/ct": { "name": "Pinterest Tag", "requires_consent": True, "legal_ref": "§25 TDDDG", }, r"criteo\.com|criteo\.net": { "name": "Criteo", "requires_consent": True, "legal_ref": "§25 TDDDG", }, r"doubleclick\.net|googlesyndication": { "name": "Google Ads/DoubleClick", "requires_consent": True, "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", }, r"fonts\.googleapis\.com|fonts\.gstatic": { "name": "Google Fonts", "requires_consent": True, "legal_ref": "LG Muenchen I, Az. 3 O 17493/20", }, r"recaptcha|grecaptcha": { "name": "Google reCAPTCHA", "requires_consent": True, "legal_ref": "§25 TDDDG", }, r"youtube\.com/embed|ytimg": { "name": "YouTube", "requires_consent": True, "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO", }, r"maps\.googleapis|maps\.google": { "name": "Google Maps", "requires_consent": True, "legal_ref": "§25 TDDDG", }, r"intercom\.io|intercomcdn": { "name": "Intercom", "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO", }, r"zendesk\.com|zdassets": { "name": "Zendesk", "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO", }, r"sentry\.io|sentry-cdn": { "name": "Sentry", "requires_consent": False, "legal_ref": "Berechtigtes Interesse (Error Tracking)", }, r"cdn\.cloudflare\.com": { "name": "Cloudflare CDN", "requires_consent": False, "legal_ref": "Berechtigtes Interesse (CDN)", }, r"didomi|cookiebot|onetrust|usercentrics|consentmanager": { "name": "Consent Management", "requires_consent": False, "legal_ref": "Notwendig (CMP)", }, } @dataclass class Violation: service: str severity: str # "HIGH", "CRITICAL" text: str legal_ref: str def classify_scripts(scripts: list[str]) -> list[str]: """Classify script URLs into known service names.""" services = set() for script in scripts: for pattern, meta in SERVICE_PATTERNS.items(): if re.search(pattern, script, re.IGNORECASE): services.add(meta["name"]) break return sorted(services) def find_tracking_services(scripts: list[str]) -> list[str]: """Find services that require consent.""" tracking = [] for script in scripts: for pattern, meta in SERVICE_PATTERNS.items(): if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]: tracking.append(meta["name"]) break return sorted(set(tracking)) def find_violations_before_consent(scripts: list[str]) -> list[Violation]: """Find tracking scripts that load without consent (HIGH).""" violations = [] seen = set() for script in scripts: for pattern, meta in SERVICE_PATTERNS.items(): if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]: name = meta["name"] if name not in seen: seen.add(name) violations.append(Violation( service=name, severity="HIGH", text=f"{name} laedt OHNE vorherige Einwilligung", legal_ref=meta["legal_ref"], )) break return violations def find_violations_after_reject( before_scripts: list[str], after_scripts: list[str], ) -> list[Violation]: """Find tracking scripts that still load after rejection (CRITICAL).""" violations = [] after_tracking = find_tracking_services(after_scripts) before_tracking = find_tracking_services(before_scripts) for service in after_tracking: if service in before_tracking: # Was already loading before AND still loads after reject = CRITICAL for pattern, meta in SERVICE_PATTERNS.items(): if meta["name"] == service: violations.append(Violation( service=service, severity="CRITICAL", text=f"{service} laedt TROTZ Ablehnung — moegliches Dark Pattern", legal_ref=meta["legal_ref"] + ", Art. 5(3) ePrivacy", )) break return violations