fix: restore all missing consent-tester service modules

banner_detector.py, script_analyzer.py, category_tester.py, authenticated_scanner.py
were only on the feature branch — needed for consent-tester to start.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-05 00:14:26 +02:00
parent 3fade26d89
commit f3e44cf59f
4 changed files with 814 additions and 0 deletions
+157
View File
@@ -0,0 +1,157 @@
"""
Script Analyzer — classifies detected scripts and cookies against known services.
"""
import re
from dataclasses import dataclass
SERVICE_PATTERNS: dict[str, dict] = {
r"google.?analytics|gtag|UA-\d|G-\w{5}": {
"name": "Google Analytics", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"googletagmanager|gtm\.js": {
"name": "Google Tag Manager", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"facebook\.net|fbevents|fbq": {
"name": "Meta/Facebook Pixel", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"hotjar\.com|_hjSettings": {
"name": "Hotjar", "requires_consent": True,
"legal_ref": "§25 TDDDG (Session Recording)",
},
r"clarity\.ms": {
"name": "Microsoft Clarity", "requires_consent": True,
"legal_ref": "§25 TDDDG (Session Replay)",
},
r"tiktok\.com/i18n|analytics\.tiktok": {
"name": "TikTok Pixel", "requires_consent": True,
"legal_ref": "§25 TDDDG, Drittlandtransfer China",
},
r"linkedin\.com/insight|snap\.licdn": {
"name": "LinkedIn Insight", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"pinterest\.com/ct|pinimg\.com/ct": {
"name": "Pinterest Tag", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"criteo\.com|criteo\.net": {
"name": "Criteo", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"doubleclick\.net|googlesyndication": {
"name": "Google Ads/DoubleClick", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"fonts\.googleapis\.com|fonts\.gstatic": {
"name": "Google Fonts", "requires_consent": True,
"legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
},
r"recaptcha|grecaptcha": {
"name": "Google reCAPTCHA", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"youtube\.com/embed|ytimg": {
"name": "YouTube", "requires_consent": True,
"legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
},
r"maps\.googleapis|maps\.google": {
"name": "Google Maps", "requires_consent": True,
"legal_ref": "§25 TDDDG",
},
r"intercom\.io|intercomcdn": {
"name": "Intercom", "requires_consent": True,
"legal_ref": "Art. 44-49 DSGVO",
},
r"zendesk\.com|zdassets": {
"name": "Zendesk", "requires_consent": True,
"legal_ref": "Art. 44-49 DSGVO",
},
r"sentry\.io|sentry-cdn": {
"name": "Sentry", "requires_consent": False,
"legal_ref": "Berechtigtes Interesse (Error Tracking)",
},
r"cdn\.cloudflare\.com": {
"name": "Cloudflare CDN", "requires_consent": False,
"legal_ref": "Berechtigtes Interesse (CDN)",
},
r"didomi|cookiebot|onetrust|usercentrics|consentmanager": {
"name": "Consent Management", "requires_consent": False,
"legal_ref": "Notwendig (CMP)",
},
}
@dataclass
class Violation:
service: str
severity: str # "HIGH", "CRITICAL"
text: str
legal_ref: str
def classify_scripts(scripts: list[str]) -> list[str]:
"""Classify script URLs into known service names."""
services = set()
for script in scripts:
for pattern, meta in SERVICE_PATTERNS.items():
if re.search(pattern, script, re.IGNORECASE):
services.add(meta["name"])
break
return sorted(services)
def find_tracking_services(scripts: list[str]) -> list[str]:
"""Find services that require consent."""
tracking = []
for script in scripts:
for pattern, meta in SERVICE_PATTERNS.items():
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
tracking.append(meta["name"])
break
return sorted(set(tracking))
def find_violations_before_consent(scripts: list[str]) -> list[Violation]:
"""Find tracking scripts that load without consent (HIGH)."""
violations = []
seen = set()
for script in scripts:
for pattern, meta in SERVICE_PATTERNS.items():
if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
name = meta["name"]
if name not in seen:
seen.add(name)
violations.append(Violation(
service=name, severity="HIGH",
text=f"{name} laedt OHNE vorherige Einwilligung",
legal_ref=meta["legal_ref"],
))
break
return violations
def find_violations_after_reject(
before_scripts: list[str], after_scripts: list[str],
) -> list[Violation]:
"""Find tracking scripts that still load after rejection (CRITICAL)."""
violations = []
after_tracking = find_tracking_services(after_scripts)
before_tracking = find_tracking_services(before_scripts)
for service in after_tracking:
if service in before_tracking:
# Was already loading before AND still loads after reject = CRITICAL
for pattern, meta in SERVICE_PATTERNS.items():
if meta["name"] == service:
violations.append(Violation(
service=service, severity="CRITICAL",
text=f"{service} laedt TROTZ Ablehnung — moegliches Dark Pattern",
legal_ref=meta["legal_ref"] + ", Art. 5(3) ePrivacy",
))
break
return violations