fix: restore all missing consent-tester service modules

banner_detector.py, script_analyzer.py, category_tester.py, authenticated_scanner.py were only on the feature branch — needed for consent-tester to start. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-05 00:14:26 +02:00
parent 3fade26d89
commit f3e44cf59f
4 changed files with 814 additions and 0 deletions
@@ -0,0 +1,157 @@
+"""
+Script Analyzer — classifies detected scripts and cookies against known services.
+"""
+
+import re
+from dataclasses import dataclass
+
+SERVICE_PATTERNS: dict[str, dict] = {
+    r"google.?analytics|gtag|UA-\d|G-\w{5}": {
+        "name": "Google Analytics", "requires_consent": True,
+        "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
+    },
+    r"googletagmanager|gtm\.js": {
+        "name": "Google Tag Manager", "requires_consent": True,
+        "legal_ref": "§25 TDDDG",
+    },
+    r"facebook\.net|fbevents|fbq": {
+        "name": "Meta/Facebook Pixel", "requires_consent": True,
+        "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
+    },
+    r"hotjar\.com|_hjSettings": {
+        "name": "Hotjar", "requires_consent": True,
+        "legal_ref": "§25 TDDDG (Session Recording)",
+    },
+    r"clarity\.ms": {
+        "name": "Microsoft Clarity", "requires_consent": True,
+        "legal_ref": "§25 TDDDG (Session Replay)",
+    },
+    r"tiktok\.com/i18n|analytics\.tiktok": {
+        "name": "TikTok Pixel", "requires_consent": True,
+        "legal_ref": "§25 TDDDG, Drittlandtransfer China",
+    },
+    r"linkedin\.com/insight|snap\.licdn": {
+        "name": "LinkedIn Insight", "requires_consent": True,
+        "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
+    },
+    r"pinterest\.com/ct|pinimg\.com/ct": {
+        "name": "Pinterest Tag", "requires_consent": True,
+        "legal_ref": "§25 TDDDG",
+    },
+    r"criteo\.com|criteo\.net": {
+        "name": "Criteo", "requires_consent": True,
+        "legal_ref": "§25 TDDDG",
+    },
+    r"doubleclick\.net|googlesyndication": {
+        "name": "Google Ads/DoubleClick", "requires_consent": True,
+        "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
+    },
+    r"fonts\.googleapis\.com|fonts\.gstatic": {
+        "name": "Google Fonts", "requires_consent": True,
+        "legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
+    },
+    r"recaptcha|grecaptcha": {
+        "name": "Google reCAPTCHA", "requires_consent": True,
+        "legal_ref": "§25 TDDDG",
+    },
+    r"youtube\.com/embed|ytimg": {
+        "name": "YouTube", "requires_consent": True,
+        "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
+    },
+    r"maps\.googleapis|maps\.google": {
+        "name": "Google Maps", "requires_consent": True,
+        "legal_ref": "§25 TDDDG",
+    },
+    r"intercom\.io|intercomcdn": {
+        "name": "Intercom", "requires_consent": True,
+        "legal_ref": "Art. 44-49 DSGVO",
+    },
+    r"zendesk\.com|zdassets": {
+        "name": "Zendesk", "requires_consent": True,
+        "legal_ref": "Art. 44-49 DSGVO",
+    },
+    r"sentry\.io|sentry-cdn": {
+        "name": "Sentry", "requires_consent": False,
+        "legal_ref": "Berechtigtes Interesse (Error Tracking)",
+    },
+    r"cdn\.cloudflare\.com": {
+        "name": "Cloudflare CDN", "requires_consent": False,
+        "legal_ref": "Berechtigtes Interesse (CDN)",
+    },
+    r"didomi|cookiebot|onetrust|usercentrics|consentmanager": {
+        "name": "Consent Management", "requires_consent": False,
+        "legal_ref": "Notwendig (CMP)",
+    },
+}
+
+
+@dataclass
+class Violation:
+    service: str
+    severity: str  # "HIGH", "CRITICAL"
+    text: str
+    legal_ref: str
+
+
+def classify_scripts(scripts: list[str]) -> list[str]:
+    """Classify script URLs into known service names."""
+    services = set()
+    for script in scripts:
+        for pattern, meta in SERVICE_PATTERNS.items():
+            if re.search(pattern, script, re.IGNORECASE):
+                services.add(meta["name"])
+                break
+    return sorted(services)
+
+
+def find_tracking_services(scripts: list[str]) -> list[str]:
+    """Find services that require consent."""
+    tracking = []
+    for script in scripts:
+        for pattern, meta in SERVICE_PATTERNS.items():
+            if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
+                tracking.append(meta["name"])
+                break
+    return sorted(set(tracking))
+
+
+def find_violations_before_consent(scripts: list[str]) -> list[Violation]:
+    """Find tracking scripts that load without consent (HIGH)."""
+    violations = []
+    seen = set()
+    for script in scripts:
+        for pattern, meta in SERVICE_PATTERNS.items():
+            if re.search(pattern, script, re.IGNORECASE) and meta["requires_consent"]:
+                name = meta["name"]
+                if name not in seen:
+                    seen.add(name)
+                    violations.append(Violation(
+                        service=name, severity="HIGH",
+                        text=f"{name} laedt OHNE vorherige Einwilligung",
+                        legal_ref=meta["legal_ref"],
+                    ))
+                break
+    return violations
+
+
+def find_violations_after_reject(
+    before_scripts: list[str], after_scripts: list[str],
+) -> list[Violation]:
+    """Find tracking scripts that still load after rejection (CRITICAL)."""
+    violations = []
+    after_tracking = find_tracking_services(after_scripts)
+    before_tracking = find_tracking_services(before_scripts)
+
+    for service in after_tracking:
+        if service in before_tracking:
+            # Was already loading before AND still loads after reject = CRITICAL
+            for pattern, meta in SERVICE_PATTERNS.items():
+                if meta["name"] == service:
+                    violations.append(Violation(
+                        service=service, severity="CRITICAL",
+                        text=f"{service} laedt TROTZ Ablehnung — moegliches Dark Pattern",
+                        legal_ref=meta["legal_ref"] + ", Art. 5(3) ePrivacy",
+                    ))
+                    break
+
+    return violations