f3e44cf59f
banner_detector.py, script_analyzer.py, category_tester.py, authenticated_scanner.py were only on the feature branch — needed for consent-tester to start. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
158 lines
5.5 KiB
Python
158 lines
5.5 KiB
Python
"""
|
|
Script Analyzer — classifies detected scripts and cookies against known services.
|
|
"""
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
|
|
SERVICE_PATTERNS: dict[str, dict] = {
|
|
r"google.?analytics|gtag|UA-\d|G-\w{5}": {
|
|
"name": "Google Analytics", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
|
},
|
|
r"googletagmanager|gtm\.js": {
|
|
"name": "Google Tag Manager", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG",
|
|
},
|
|
r"facebook\.net|fbevents|fbq": {
|
|
"name": "Meta/Facebook Pixel", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
|
},
|
|
r"hotjar\.com|_hjSettings": {
|
|
"name": "Hotjar", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG (Session Recording)",
|
|
},
|
|
r"clarity\.ms": {
|
|
"name": "Microsoft Clarity", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG (Session Replay)",
|
|
},
|
|
r"tiktok\.com/i18n|analytics\.tiktok": {
|
|
"name": "TikTok Pixel", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG, Drittlandtransfer China",
|
|
},
|
|
r"linkedin\.com/insight|snap\.licdn": {
|
|
"name": "LinkedIn Insight", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
|
},
|
|
r"pinterest\.com/ct|pinimg\.com/ct": {
|
|
"name": "Pinterest Tag", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG",
|
|
},
|
|
r"criteo\.com|criteo\.net": {
|
|
"name": "Criteo", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG",
|
|
},
|
|
r"doubleclick\.net|googlesyndication": {
|
|
"name": "Google Ads/DoubleClick", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
|
},
|
|
r"fonts\.googleapis\.com|fonts\.gstatic": {
|
|
"name": "Google Fonts", "requires_consent": True,
|
|
"legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
|
|
},
|
|
r"recaptcha|grecaptcha": {
|
|
"name": "Google reCAPTCHA", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG",
|
|
},
|
|
r"youtube\.com/embed|ytimg": {
|
|
"name": "YouTube", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
|
},
|
|
r"maps\.googleapis|maps\.google": {
|
|
"name": "Google Maps", "requires_consent": True,
|
|
"legal_ref": "§25 TDDDG",
|
|
},
|
|
r"intercom\.io|intercomcdn": {
|
|
"name": "Intercom", "requires_consent": True,
|
|
"legal_ref": "Art. 44-49 DSGVO",
|
|
},
|
|
r"zendesk\.com|zdassets": {
|
|
"name": "Zendesk", "requires_consent": True,
|
|
"legal_ref": "Art. 44-49 DSGVO",
|
|
},
|
|
r"sentry\.io|sentry-cdn": {
|
|
"name": "Sentry", "requires_consent": False,
|
|
"legal_ref": "Berechtigtes Interesse (Error Tracking)",
|
|
},
|
|
r"cdn\.cloudflare\.com": {
|
|
"name": "Cloudflare CDN", "requires_consent": False,
|
|
"legal_ref": "Berechtigtes Interesse (CDN)",
|
|
},
|
|
r"didomi|cookiebot|onetrust|usercentrics|consentmanager": {
|
|
"name": "Consent Management", "requires_consent": False,
|
|
"legal_ref": "Notwendig (CMP)",
|
|
},
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class Violation:
|
|
service: str
|
|
severity: str # "HIGH", "CRITICAL"
|
|
text: str
|
|
legal_ref: str
|
|
|
|
|
|
def classify_scripts(scripts: list[str]) -> list[str]:
|
|
"""Classify script URLs into known service names."""
|
|
services = set()
|
|
for script in scripts:
|
|
for pattern, meta in SERVICE_PATTERNS.items():
|
|
if re.search(pattern, script, re.IGNORECASE):
|
|
services.add(meta["name"])
|
|
break
|
|
return sorted(services)
|
|
|
|
|
|
def find_tracking_services(scripts: list[str]) -> list[str]:
|
|
"""Find services that require consent."""
|
|
tracking = []
|
|
for script in scripts:
|
|
for pattern, meta in SERVICE_PATTERNS.items():
|
|
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
|
|
tracking.append(meta["name"])
|
|
break
|
|
return sorted(set(tracking))
|
|
|
|
|
|
def find_violations_before_consent(scripts: list[str]) -> list[Violation]:
|
|
"""Find tracking scripts that load without consent (HIGH)."""
|
|
violations = []
|
|
seen = set()
|
|
for script in scripts:
|
|
for pattern, meta in SERVICE_PATTERNS.items():
|
|
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
|
|
name = meta["name"]
|
|
if name not in seen:
|
|
seen.add(name)
|
|
violations.append(Violation(
|
|
service=name, severity="HIGH",
|
|
text=f"{name} laedt OHNE vorherige Einwilligung",
|
|
legal_ref=meta["legal_ref"],
|
|
))
|
|
break
|
|
return violations
|
|
|
|
|
|
def find_violations_after_reject(
|
|
before_scripts: list[str], after_scripts: list[str],
|
|
) -> list[Violation]:
|
|
"""Find tracking scripts that still load after rejection (CRITICAL)."""
|
|
violations = []
|
|
after_tracking = find_tracking_services(after_scripts)
|
|
before_tracking = find_tracking_services(before_scripts)
|
|
|
|
for service in after_tracking:
|
|
if service in before_tracking:
|
|
# Was already loading before AND still loads after reject = CRITICAL
|
|
for pattern, meta in SERVICE_PATTERNS.items():
|
|
if meta["name"] == service:
|
|
violations.append(Violation(
|
|
service=service, severity="CRITICAL",
|
|
text=f"{service} laedt TROTZ Ablehnung — moegliches Dark Pattern",
|
|
legal_ref=meta["legal_ref"] + ", Art. 5(3) ePrivacy",
|
|
))
|
|
break
|
|
|
|
return violations
|