diff --git a/consent-tester/checks/banner_runner.py b/consent-tester/checks/banner_runner.py index fe0ef67..8166f03 100644 --- a/consent-tester/checks/banner_runner.py +++ b/consent-tester/checks/banner_runner.py @@ -97,28 +97,77 @@ def map_scan_to_checks(scan_result: dict) -> dict: } +_TEXT_TO_CODE: list[tuple[str, str]] = [ + ("impressum", "impressum_link"), + ("erneuter zugang", "re_access_settings"), + ("cookie-einstellung", "re_access_settings"), + ("widerruf der einwilligung", "re_access_settings"), + ("vorausgewaehlte", "pre_ticked_checkboxes"), + ("vorausgew", "pre_ticked_checkboxes"), + ("akzeptieren.*groesser", "dark_pattern_button_size"), + ("akzeptieren.*gr\u00f6\u00dfer", "dark_pattern_button_size"), + ("hintergrundfarbe", "color_contrast_dark_pattern"), + ("optisch kaum sichtbar", "color_contrast_dark_pattern"), + ("dark pattern", "color_contrast_dark_pattern"), + ("cookie wall", "cookie_wall"), + ("ablehnen.*button", "reject_button_visible"), + ("kein sichtbarer", "reject_button_visible"), + ("zustimmung zur datenschutz", "wrong_dse_consent"), + ("consent mode", "google_consent_mode_defaults"), + ("tracking.*vor consent", "cookies_before_consent"), + ("tracking-cookie", "cookies_before_consent"), + ("nicht modal", "non_modal_dismiss"), + ("hintergrund.*schliessen", "non_modal_dismiss"), + ("klick.*asymmetri", "click_count_asymmetry"), + ("ablehnung.*klick", "click_count_asymmetry"), + ("koppelungsverbot", "registration_consent_coupling"), + ("registrierung", "registration_consent_coupling"), + ("sprache.*stimmt nicht", "banner_language_mismatch"), + ("banner-sprache", "banner_language_mismatch"), + ("consent-cookie.*laeuft", "consent_cookie_expiry_13m"), + ("consent-cookie.*l\u00e4uft", "consent_cookie_expiry_13m"), + ("13 monate", "consent_cookie_expiry_13m"), + ("nudging", "nudging_reject_hidden"), + ("scrollen", "nudging_reject_hidden"), + ("emotionale sprache", "stirring_emotional_language"), + ("stirring", "stirring_emotional_language"), + ("drittanbieter.*dse", "third_party_dse_link"), + ("ohne vorherige einwilligung", "tracking_before_consent"), + ("trotz ablehnung", "tracking_after_reject"), +] + + +def _text_to_code(text: str) -> str: + """Infer a check_key from violation text content.""" + t = text.lower() + for pattern, code in _TEXT_TO_CODE: + if pattern in t: + return code + return "" + + def _collect_violation_codes(scan: dict) -> dict[str, str]: """Collect check_key → violation text from all sources.""" codes: dict[str, str] = {} - # Banner text violations + # Banner text violations — match by code field OR by text content banner_checks = scan.get("banner_checks", {}) for v in banner_checks.get("violations", []): - code = v.get("code", "") + code = v.get("code", "") or _text_to_code(v.get("text", "")) if code: codes[code] = v.get("text", "")[:120] # Phase A violations (before consent) phase_a = scan.get("phases", {}).get("before_consent", {}) for v in phase_a.get("violations", []): - code = v.get("code", "") + code = v.get("code", "") or _text_to_code(v.get("text", "")) if code: codes[code] = v.get("text", "")[:120] # Phase B violations (after reject) phase_b = scan.get("phases", {}).get("after_reject", {}) for v in phase_b.get("violations", []): - code = v.get("code", "") + code = v.get("code", "") or _text_to_code(v.get("text", "")) if code: codes[code] = v.get("text", "")[:120]