""" Banner Runner — maps scan results to the L1/L2 check hierarchy. Takes the raw ScanResponse dict and produces a structured_checks list compatible with ChecklistView (same format as document checks). """ from checks.banner_checks import BANNER_CHECKLIST def map_scan_to_checks(scan_result: dict) -> dict: """Map a /scan response to the L1/L2 banner check hierarchy. Returns dict with: - structured_checks: list of CheckItem dicts - completeness_pct: L1 pass rate (0-100) - correctness_pct: L2 pass rate (0-100) """ # Collect all violation codes from every source violation_codes = _collect_violation_codes(scan_result) # Collect pass codes — some checks produce boolean signals, not violations pass_codes = _collect_pass_codes(scan_result) # Build structured checks checks: list[dict] = [] l1_checks: list[dict] = [] l2_checks: list[dict] = [] for defn in BANNER_CHECKLIST: key = defn["check_key"] level = defn["level"] parent = defn.get("parent") # Determine pass/fail is_violation_key = key in violation_codes is_pass_key = key in pass_codes # For checks whose check_key appears in violations → failed # For checks whose check_key appears only in passes → passed # For checks where neither: # - Phase-based checks (tracking/cookies) → PASS (absence = good) # - Banner UI checks → PASS only if banner was detected and # the scanner actually ran the relevant check if is_violation_key: passed = False matched_text = violation_codes[key] elif is_pass_key: passed = True matched_text = pass_codes.get(key, "") else: banner_detected = scan_result.get("banner_detected", False) if key == "banner_detected": passed = banner_detected elif key in _ABSENCE_IS_PASS: # For these checks, no violation = passed (e.g. no tracking cookies) passed = True elif banner_detected: # Banner was detected but this specific check produced no result. # If the scanner ran banner_checks → assume checked and passed. # If banner_checks is empty → scanner couldn't test → not passed. has_banner_results = bool(scan_result.get("banner_checks", {}).get("violations") is not None) passed = has_banner_results else: passed = False matched_text = "" # L2 checks are skipped if their parent L1 failed skipped = False if level == 2 and parent: parent_check = next( (c for c in checks if c["id"] == parent), None ) if parent_check and not parent_check["passed"]: skipped = True item = { "id": defn["id"], "label": defn["label"], "passed": passed and not skipped, "severity": defn["severity"], "level": level, "parent": parent, "skipped": skipped, "hint": defn.get("hint", ""), "matched_text": matched_text if passed else "", } checks.append(item) if level == 1: l1_checks.append(item) elif level == 2: l2_checks.append(item) # Compute percentages l1_total = len(l1_checks) l1_passed = sum(1 for c in l1_checks if c["passed"]) completeness_pct = round(l1_passed / l1_total * 100) if l1_total else 0 l2_active = [c for c in l2_checks if not c["skipped"]] l2_passed = sum(1 for c in l2_active if c["passed"]) correctness_pct = round(l2_passed / len(l2_active) * 100) if l2_active else 0 return { "structured_checks": checks, "completeness_pct": completeness_pct, "correctness_pct": correctness_pct, } _TEXT_TO_CODE: list[tuple[str, str]] = [ ("impressum", "impressum_link"), ("erneuter zugang", "re_access_settings"), ("cookie-einstellung", "re_access_settings"), ("widerruf der einwilligung", "re_access_settings"), ("vorausgewaehlte", "pre_ticked_checkboxes"), ("vorausgew", "pre_ticked_checkboxes"), ("akzeptieren.*groesser", "dark_pattern_button_size"), ("akzeptieren.*gr\u00f6\u00dfer", "dark_pattern_button_size"), ("hintergrundfarbe", "color_contrast_dark_pattern"), ("optisch kaum sichtbar", "color_contrast_dark_pattern"), ("dark pattern", "color_contrast_dark_pattern"), ("cookie wall", "cookie_wall"), ("ablehnen.*button", "reject_button_visible"), ("kein sichtbarer", "reject_button_visible"), ("zustimmung zur datenschutz", "wrong_dse_consent"), ("consent mode", "google_consent_mode_defaults"), ("tracking.*vor consent", "cookies_before_consent"), ("tracking-cookie", "cookies_before_consent"), ("nicht modal", "non_modal_dismiss"), ("hintergrund.*schliessen", "non_modal_dismiss"), ("klick.*asymmetri", "click_count_asymmetry"), ("ablehnung.*klick", "click_count_asymmetry"), ("koppelungsverbot", "registration_consent_coupling"), ("registrierung", "registration_consent_coupling"), ("sprache.*stimmt nicht", "banner_language_mismatch"), ("banner-sprache", "banner_language_mismatch"), ("consent-cookie.*laeuft", "consent_cookie_expiry_13m"), ("consent-cookie.*l\u00e4uft", "consent_cookie_expiry_13m"), ("13 monate", "consent_cookie_expiry_13m"), ("nudging", "nudging_reject_hidden"), ("scrollen", "nudging_reject_hidden"), ("emotionale sprache", "stirring_emotional_language"), ("stirring", "stirring_emotional_language"), ("drittanbieter.*dse", "third_party_dse_link"), ("ohne vorherige einwilligung", "tracking_before_consent"), ("trotz ablehnung", "tracking_after_reject"), ] def _text_to_code(text: str) -> str: """Infer a check_key from violation text content.""" t = text.lower() for pattern, code in _TEXT_TO_CODE: if pattern in t: return code return "" def _collect_violation_codes(scan: dict) -> dict[str, str]: """Collect check_key → violation text from all sources.""" codes: dict[str, str] = {} # Banner text violations — match by code field OR by text content banner_checks = scan.get("banner_checks", {}) for v in banner_checks.get("violations", []): code = v.get("code", "") or _text_to_code(v.get("text", "")) if code: codes[code] = v.get("text", "")[:120] # Phase A violations (before consent) phase_a = scan.get("phases", {}).get("before_consent", {}) for v in phase_a.get("violations", []): code = v.get("code", "") or _text_to_code(v.get("text", "")) if code: codes[code] = v.get("text", "")[:120] # Phase B violations (after reject) phase_b = scan.get("phases", {}).get("after_reject", {}) for v in phase_b.get("violations", []): code = v.get("code", "") or _text_to_code(v.get("text", "")) if code: codes[code] = v.get("text", "")[:120] # Tracking services in phase A → tracking_before_consent tracking_a = phase_a.get("tracking_services", []) if tracking_a and "tracking_before_consent" not in codes: codes["tracking_before_consent"] = ", ".join(tracking_a[:5]) # Cookies before consent → cookies_before_consent cookies_a = phase_a.get("cookies", []) tracking_cookies = [c for c in cookies_a if _is_tracking_cookie(c)] if tracking_cookies and "cookies_before_consent" not in codes: codes["cookies_before_consent"] = ", ".join(tracking_cookies[:5]) # New tracking after reject → tracking_after_reject new_tracking_b = phase_b.get("new_tracking", []) if new_tracking_b and "tracking_after_reject" not in codes: codes["tracking_after_reject"] = ", ".join(new_tracking_b[:5]) return codes def _collect_pass_codes(scan: dict) -> dict[str, str]: """Collect explicit pass signals from scan results.""" passes: dict[str, str] = {} # Banner detected if scan.get("banner_detected"): passes["banner_detected"] = scan.get("banner_provider", "detected") # Provider named provider = scan.get("banner_provider", "") if provider: passes["banner_provider_named"] = provider # Impressum link bc = scan.get("banner_checks", {}) if bc.get("has_impressum_link"): passes["impressum_link"] = "Impressum-Link gefunden" if bc.get("has_dse_link"): passes["dse_link"] = "DSE-Link gefunden" return passes # Checks where absence of a violation means PASS (not "untested") # These are phase-based checks: if no tracking was detected, that's good. _ABSENCE_IS_PASS = { "tracking_before_consent", "cookies_before_consent", "tracking_after_reject", "google_consent_mode_defaults", "banner_language_mismatch", "cookie_wall", } _TRACKING_COOKIE_PREFIXES = ( "_ga", "_gid", "_fbp", "_fbc", "IDE", "_gcl", "fr", "_pin", "_tt_", "li_sugr", "_hj", "mp_", "ajs_", "_clck", "_clsk", ) def _is_tracking_cookie(name: str) -> bool: """Check if a cookie name is a known tracking cookie.""" return any(name.startswith(p) for p in _TRACKING_COOKIE_PREFIXES)