Files
breakpilot-compliance/consent-tester/checks/banner_runner.py
T
Benjamin Admin d2dc0c9fe4 feat: Deep consent verification — DataLayer, Storage, GCM, TCF
5 verification layers added to the 3-phase banner test:

1. DataLayer/GTM Interception: Proxy on window.dataLayer captures
   all push() events. Distinguishes safe lifecycle events (gtm.js,
   gtm.dom) from tracking events (page_view, conversion, purchase).
   Flags tracking events before consent as violations.

2. localStorage/sessionStorage Monitoring: Intercepts setItem() to
   detect tracking keys (_ga, _fbp, amplitude, mixpanel, etc.)
   written before consent.

3. Google Consent Mode v2 Runtime Verification: Reads actual GCM
   state (analytics_storage, ad_storage) per phase. Verifies
   default=denied before consent, stays denied after reject,
   switches to granted after accept.

4. TCF v2.2 State: Reads __tcfapi('getTCData') if available.
   Verifies consent purpose states match user choice.

5. Cookie Attribute Analysis: Domain (1st vs 3rd party), expires
   (>13 months), secure flag for tracking cookies.

10 new L2 checks with expert hints (EDPB, CNIL, §25 TDDDG).
All interceptor calls wrapped in try/except for graceful fallback.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-10 08:58:44 +02:00

297 lines
12 KiB
Python

"""
Banner Runner — maps scan results to the L1/L2 check hierarchy.
Takes the raw ScanResponse dict and produces a structured_checks list
compatible with ChecklistView (same format as document checks).
"""
from checks.banner_checks import BANNER_CHECKLIST
def map_scan_to_checks(scan_result: dict) -> dict:
"""Map a /scan response to the L1/L2 banner check hierarchy.
Returns dict with:
- structured_checks: list of CheckItem dicts
- completeness_pct: L1 pass rate (0-100)
- correctness_pct: L2 pass rate (0-100)
"""
# Collect all violation codes from every source
violation_codes = _collect_violation_codes(scan_result)
# Collect pass codes — some checks produce boolean signals, not violations
pass_codes = _collect_pass_codes(scan_result)
# Build structured checks
checks: list[dict] = []
l1_checks: list[dict] = []
l2_checks: list[dict] = []
for defn in BANNER_CHECKLIST:
key = defn["check_key"]
level = defn["level"]
parent = defn.get("parent")
# Determine pass/fail
is_violation_key = key in violation_codes
is_pass_key = key in pass_codes
# For checks whose check_key appears in violations → failed
# For checks whose check_key appears only in passes → passed
# For checks where neither:
# - Phase-based checks (tracking/cookies) → PASS (absence = good)
# - Banner UI checks → PASS only if banner was detected and
# the scanner actually ran the relevant check
if is_violation_key:
passed = False
matched_text = violation_codes[key]
elif is_pass_key:
passed = True
matched_text = pass_codes.get(key, "")
else:
banner_detected = scan_result.get("banner_detected", False)
if key == "banner_detected":
passed = banner_detected
elif key in _ABSENCE_IS_PASS:
# For these checks, no violation = passed (e.g. no tracking cookies)
passed = True
elif banner_detected:
# Banner was detected but this specific check produced no result.
# If the scanner ran banner_checks → assume checked and passed.
# If banner_checks is empty → scanner couldn't test → not passed.
has_banner_results = bool(scan_result.get("banner_checks", {}).get("violations") is not None)
passed = has_banner_results
else:
passed = False
matched_text = ""
# L2 checks are skipped if their parent L1 failed
skipped = False
if level == 2 and parent:
parent_check = next(
(c for c in checks if c["id"] == parent), None
)
if parent_check and not parent_check["passed"]:
skipped = True
item = {
"id": defn["id"],
"label": defn["label"],
"passed": passed and not skipped,
"severity": defn["severity"],
"level": level,
"parent": parent,
"skipped": skipped,
"hint": defn.get("hint", ""),
"matched_text": matched_text if passed else "",
}
checks.append(item)
if level == 1:
l1_checks.append(item)
elif level == 2:
l2_checks.append(item)
# Compute percentages
l1_total = len(l1_checks)
l1_passed = sum(1 for c in l1_checks if c["passed"])
completeness_pct = round(l1_passed / l1_total * 100) if l1_total else 0
l2_active = [c for c in l2_checks if not c["skipped"]]
l2_passed = sum(1 for c in l2_active if c["passed"])
correctness_pct = round(l2_passed / len(l2_active) * 100) if l2_active else 0
return {
"structured_checks": checks,
"completeness_pct": completeness_pct,
"correctness_pct": correctness_pct,
}
_TEXT_TO_CODE: list[tuple[str, str]] = [
("impressum", "impressum_link"),
("erneuter zugang", "re_access_settings"),
("cookie-einstellung", "re_access_settings"),
("widerruf der einwilligung", "re_access_settings"),
("vorausgewaehlte", "pre_ticked_checkboxes"),
("vorausgew", "pre_ticked_checkboxes"),
("akzeptieren.*groesser", "dark_pattern_button_size"),
("akzeptieren.*gr\u00f6\u00dfer", "dark_pattern_button_size"),
("hintergrundfarbe", "color_contrast_dark_pattern"),
("optisch kaum sichtbar", "color_contrast_dark_pattern"),
("dark pattern", "color_contrast_dark_pattern"),
("cookie wall", "cookie_wall"),
("ablehnen.*button", "reject_button_visible"),
("kein sichtbarer", "reject_button_visible"),
("zustimmung zur datenschutz", "wrong_dse_consent"),
("consent mode", "google_consent_mode_defaults"),
("tracking.*vor consent", "cookies_before_consent"),
("tracking-cookie", "cookies_before_consent"),
("nicht modal", "non_modal_dismiss"),
("hintergrund.*schliessen", "non_modal_dismiss"),
("klick.*asymmetri", "click_count_asymmetry"),
("ablehnung.*klick", "click_count_asymmetry"),
("koppelungsverbot", "registration_consent_coupling"),
("registrierung", "registration_consent_coupling"),
("sprache.*stimmt nicht", "banner_language_mismatch"),
("banner-sprache", "banner_language_mismatch"),
("consent-cookie.*laeuft", "consent_cookie_expiry_13m"),
("consent-cookie.*l\u00e4uft", "consent_cookie_expiry_13m"),
("13 monate", "consent_cookie_expiry_13m"),
("nudging", "nudging_reject_hidden"),
("scrollen", "nudging_reject_hidden"),
("emotionale sprache", "stirring_emotional_language"),
("stirring", "stirring_emotional_language"),
("drittanbieter.*dse", "third_party_dse_link"),
("ohne vorherige einwilligung", "tracking_before_consent"),
("trotz ablehnung", "tracking_after_reject"),
("datalayer.*vor consent", "datalayer_events_before"),
("datalayer.*vor einwilligung", "datalayer_events_before"),
("localstorage.*tracking", "localstorage_tracking_before"),
("storage.*tracking.*vor", "localstorage_tracking_before"),
("consent mode.*runtime.*denied", "gcm_runtime_denied"),
("gcm.*nicht denied", "gcm_runtime_denied"),
("datalayer.*nach ablehnung", "datalayer_events_after_reject"),
("consent mode.*bleibt", "gcm_stays_denied"),
("gcm.*nach reject", "gcm_stays_denied"),
("storage.*nach ablehnung", "storage_cleared_after_reject"),
("tracking-cookie.*vor consent", "cookie_domain_check"),
("cookie.*geschrieben.*vor", "cookie_domain_check"),
("cookie.*13 monate", "cookie_expires_check"),
("cookie.*ablauf.*ueber", "cookie_expires_check"),
("tcf.*consent", "tcf_consent_valid"),
("__tcfapi", "tcf_consent_valid"),
("sendbeacon.*tracking", "response_blocked_before"),
("beacon.*vor consent", "response_blocked_before"),
]
def _text_to_code(text: str) -> str:
"""Infer a check_key from violation text content."""
t = text.lower()
for pattern, code in _TEXT_TO_CODE:
if pattern in t:
return code
return ""
def _collect_violation_codes(scan: dict) -> dict[str, str]:
"""Collect check_key → violation text from all sources."""
codes: dict[str, str] = {}
# Banner text violations — match by code field OR by text content
banner_checks = scan.get("banner_checks", {})
for v in banner_checks.get("violations", []):
code = v.get("code", "") or _text_to_code(v.get("text", ""))
if code:
codes[code] = v.get("text", "")[:120]
# Phase A violations (before consent)
phase_a = scan.get("phases", {}).get("before_consent", {})
for v in phase_a.get("violations", []):
code = v.get("code", "") or _text_to_code(v.get("text", ""))
if code:
codes[code] = v.get("text", "")[:120]
# Phase B violations (after reject)
phase_b = scan.get("phases", {}).get("after_reject", {})
for v in phase_b.get("violations", []):
code = v.get("code", "") or _text_to_code(v.get("text", ""))
if code:
codes[code] = v.get("text", "")[:120]
# Tracking services in phase A → tracking_before_consent
tracking_a = phase_a.get("tracking_services", [])
if tracking_a and "tracking_before_consent" not in codes:
codes["tracking_before_consent"] = ", ".join(tracking_a[:5])
# Cookies before consent → cookies_before_consent
cookies_a = phase_a.get("cookies", [])
tracking_cookies = [c for c in cookies_a if _is_tracking_cookie(c)]
if tracking_cookies and "cookies_before_consent" not in codes:
codes["cookies_before_consent"] = ", ".join(tracking_cookies[:5])
# New tracking after reject → tracking_after_reject
new_tracking_b = phase_b.get("new_tracking", [])
if new_tracking_b and "tracking_after_reject" not in codes:
codes["tracking_after_reject"] = ", ".join(new_tracking_b[:5])
# Deep verification violations (from consent interceptor)
deep = scan.get("deep_verification", {})
for phase_key in ("before_consent", "after_reject"):
for v in deep.get(phase_key, {}).get("violations", []):
raw_code = v.get("code", "")
if not raw_code:
continue
# Map interceptor codes to banner check_keys
check_key = _INTERCEPTOR_CODE_MAP.get(raw_code, raw_code)
codes[check_key] = v.get("text", "")[:120]
return codes
def _collect_pass_codes(scan: dict) -> dict[str, str]:
"""Collect explicit pass signals from scan results."""
passes: dict[str, str] = {}
# Banner detected
if scan.get("banner_detected"):
passes["banner_detected"] = scan.get("banner_provider", "detected")
# Provider named
provider = scan.get("banner_provider", "")
if provider:
passes["banner_provider_named"] = provider
# Impressum link
bc = scan.get("banner_checks", {})
if bc.get("has_impressum_link"):
passes["impressum_link"] = "Impressum-Link gefunden"
if bc.get("has_dse_link"):
passes["dse_link"] = "DSE-Link gefunden"
return passes
# Map consent_interceptor violation codes → banner check_keys
_INTERCEPTOR_CODE_MAP: dict[str, str] = {
"DL_TRACK_BEFORE_CONSENT": "datalayer_events_before",
"STORAGE_TRACK_BEFORE_CONSENT": "localstorage_tracking_before",
"GCM_NOT_DENIED_BEFORE_CONSENT": "gcm_runtime_denied",
"DL_TRACK_AFTER_REJECT": "datalayer_events_after_reject",
"GCM_NOT_DENIED_AFTER_REJECT": "gcm_stays_denied",
"STORAGE_TRACK_AFTER_REJECT": "storage_cleared_after_reject",
}
# Checks where absence of a violation means PASS (not "untested")
# These are phase-based checks: if no tracking was detected, that's good.
_ABSENCE_IS_PASS = {
"tracking_before_consent",
"cookies_before_consent",
"tracking_after_reject",
"google_consent_mode_defaults",
"banner_language_mismatch",
"cookie_wall",
# Deep verification checks (absence = no violation found = PASS)
"datalayer_events_before",
"localstorage_tracking_before",
"gcm_runtime_denied",
"datalayer_events_after_reject",
"gcm_stays_denied",
"storage_cleared_after_reject",
"cookie_domain_check",
"cookie_expires_check",
"tcf_consent_valid",
"response_blocked_before",
}
_TRACKING_COOKIE_PREFIXES = (
"_ga", "_gid", "_fbp", "_fbc", "IDE", "_gcl", "fr", "_pin",
"_tt_", "li_sugr", "_hj", "mp_", "ajs_", "_clck", "_clsk",
)
def _is_tracking_cookie(name: str) -> bool:
"""Check if a cookie name is a known tracking cookie."""
return any(name.startswith(p) for p in _TRACKING_COOKIE_PREFIXES)