"""Consent-Historie-/Widerruf-Erkennung (Borlabs-Stil) während des Scans. Erkennt, ob die Site ihre Einwilligung versioniert speichert (Borlabs hält die zugestimmte Version + Zeitstempel → Nutzer kann nachvollziehen, welcher Version er wann zugestimmt hat) und ob ein dauerhaftes Widerruf-/„Cookie-Einstellungen"- Widget angeboten wird. Reine Klassifikation (`classify_provider`) ist ohne Browser unit-testbar; `detect_consent_history` kapselt das Playwright-IO. """ from __future__ import annotations from typing import Any, Optional # Signatur-Fragmente in Storage-Keys/Cookie-Namen → CMP-Anbieter. _PROVIDERS = [ ("Borlabs", ["borlabs-cookie", "borlabscookie", "borlabs"]), ("Usercentrics", ["uc_settings", "uc_user_interaction", "usercentrics"]), ("OneTrust", ["optanonconsent", "optanonalertbox", "onetrust"]), ("Cookiebot", ["cookieconsent", "cookiebot"]), ("Complianz", ["cmplz_", "complianz"]), ("Cookie-Script", ["cookiescriptconsent"]), ] # Wer trägt von Haus aus eine versionierte Consent-Historie (Capability). _HISTORY_CAPABLE = {"Borlabs", "Usercentrics", "OneTrust", "Cookiebot"} # Selektoren für ein dauerhaftes Widerruf-/Einstellungs-Widget. _WITHDRAW_SELECTOR = ( 'a:has-text("Cookie-Einstellungen"), button:has-text("Cookie-Einstellungen"), ' 'a:has-text("Einwilligung"), button:has-text("Einwilligung"), ' 'a:has-text("Cookie Settings"), button:has-text("Cookie Settings"), ' 'a:has-text("Consent"), button:has-text("Consent"), ' '[id*="borlabs-cookie"], [class*="borlabs-cookie"], #BorlabsCookieBox, ' '[class*="cookie-preference"], [class*="cmplz-manage"]' ) def classify_provider(names: list[str]) -> str: """Storage-Keys + Cookie-Namen → CMP-Anbieter ('' wenn unbekannt). Pur.""" blob = " ".join(n.lower() for n in names if n) for provider, sigs in _PROVIDERS: if any(s in blob for s in sigs): return provider return "" def _is_versioned(provider: str, stored_value: Optional[str]) -> bool: """True, wenn der gespeicherte Consent eine Version/Consent-Liste trägt (Indiz für nachvollziehbare Historie).""" if not stored_value: return provider in _HISTORY_CAPABLE # Capability auch ohne Wert low = stored_value.lower() return any(t in low for t in ("version", "consents", "timestamp", "consentid")) async def detect_consent_history(page: Any, banner_provider: str = "") -> dict: """Liest Storage/Cookies + DOM und liefert: {provider, stored, versioned_consent, history_capable, withdraw_ui}. `banner_provider` = bereits aus dem Banner-DOM erkanntes CMP — wird als Fallback genutzt, da Storage-Keys in Phase A (vor Consent) oft noch fehlen.""" keys: list[str] = [] try: keys = await page.evaluate("() => Object.keys(window.localStorage || {})") except Exception: keys = [] cookie_names: list[str] = [] try: cookie_names = [c.get("name", "") for c in await page.context.cookies()] except Exception: cookie_names = [] # Provider zuerst aus Storage/Cookies, sonst aus dem erkannten Banner-CMP. provider = (classify_provider(list(keys) + cookie_names) or classify_provider([banner_provider or ""])) stored_value = None if provider == "Borlabs": try: stored_value = await page.evaluate( "() => localStorage.getItem('borlabs-cookie') || " "localStorage.getItem('BorlabsCookie')") except Exception: stored_value = None versioned = _is_versioned(provider, stored_value) withdraw = False try: withdraw = await page.locator(_WITHDRAW_SELECTOR).count() > 0 except Exception: withdraw = False return { "provider": provider, "stored": bool(provider), "versioned_consent": versioned, "history_capable": versioned or provider in _HISTORY_CAPABLE, "withdraw_ui": withdraw, }