""" Migrate extracted vendor records -> CookieBannerConfig (admin-compliance schema in einwilligungen/types/cookie-banner.ts). Input : list[VendorRecord] as produced by vendor_extractor + vendor_classifier + cookie_link_validator Output : dict matching CookieBannerConfig shape, ready for the /sdk/cookie-banner module to import. The mapper also returns `flags[]` โ€” items that need manual review before going live (broken opt-out URL, missing expiry, etc.). """ from __future__ import annotations import logging from typing import Any logger = logging.getLogger(__name__) # ePaaS / OneTrust / etc. category -> CookieBannerCategory enum. # CookieCategory has only 4 values, so we project marketing onto # PERSONALIZATION and use EXTERNAL_MEDIA for embedded-content vendors. _CATEGORY_MAP = { "necessary": "ESSENTIAL", "strictlynecessary": "ESSENTIAL", "essential": "ESSENTIAL", "functional": "ESSENTIAL", # Cookiebot conflates these "statistics": "PERFORMANCE", "analytics": "PERFORMANCE", "performance": "PERFORMANCE", "marketing": "PERSONALIZATION", "advertising": "PERSONALIZATION", "personalization": "PERSONALIZATION", } # Vendor names that indicate embedded external content _EXTERNAL_MEDIA_HINTS = ( "youtube", "vimeo", "twitch", "google maps", "googlemaps", "soundcloud", "spotify", ) _CATEGORY_LABELS = { "ESSENTIAL": { "de": "Erforderliche Cookies", "en": "Essential Cookies", "desc_de": "Diese Cookies sind fuer den Betrieb der Website " "unbedingt erforderlich (ยง25 Abs. 2 TDDDG) und koennen " "nicht deaktiviert werden.", "desc_en": "These cookies are strictly necessary for the operation " "of the website and cannot be disabled.", }, "PERFORMANCE": { "de": "Analyse & Performance", "en": "Analytics & Performance", "desc_de": "Analyse-Cookies messen die Nutzung unserer Website, " "um sie kontinuierlich zu verbessern.", "desc_en": "Analytics cookies measure how visitors use our site so " "we can improve it.", }, "PERSONALIZATION": { "de": "Marketing & Personalisierung", "en": "Marketing & Personalization", "desc_de": "Diese Cookies dienen der personalisierten " "Ansprache und werbebezogenen Auswertung.", "desc_en": "These cookies support personalised content and " "marketing measurement.", }, "EXTERNAL_MEDIA": { "de": "Externe Medien", "en": "External Media", "desc_de": "Eingebettete Inhalte von Drittanbietern (z. B. " "Videos, Karten, Audio) koennen Cookies setzen.", "desc_en": "Embedded third-party media (videos, maps, audio) " "may set cookies.", }, } def map_category(vendor_category: str, vendor_name: str) -> str: """Resolve a CMP category + vendor name to a CookieCategory enum value.""" name_l = (vendor_name or "").lower() if any(h in name_l for h in _EXTERNAL_MEDIA_HINTS): return "EXTERNAL_MEDIA" return _CATEGORY_MAP.get((vendor_category or "").lower(), "PERSONALIZATION") def build_banner_config( vendors: list[dict], site_name: str = "", privacy_policy_url: str = "", language: str = "de", ) -> dict: """Produce a CookieBannerConfig + flags from the extracted vendor list. `vendors` is the list emitted by vendor_extractor.extract_vendors_from_payloads (+ score_vendors for the compliance_flags). We bucket them by canonical CookieCategory and build a CookieInfo entry per persistence. """ by_cat: dict[str, list[dict]] = { "ESSENTIAL": [], "PERFORMANCE": [], "PERSONALIZATION": [], "EXTERNAL_MEDIA": [], } flags: list[dict] = [] cookies_total = 0 vendors_with_no_cookies = 0 for v in vendors or []: cat = map_category(v.get("category", ""), v.get("name", "")) provider = v.get("name") or "Unbekannt" cookies = v.get("cookies") or [] if not cookies: vendors_with_no_cookies += 1 flags.append({ "level": "WARNING", "vendor": provider, "issue": "no_cookies_listed", "message": ( f"Anbieter '{provider}' wurde erfasst, " "aber keine Cookies sind dokumentiert. Vor " "Veroeffentlichung manuell ergaenzen." ), }) continue for c in cookies: cname = (c.get("name") or "").strip() if not cname: continue cookies_total += 1 entry = { "name": cname, "provider": provider, "purpose": {language: c.get("purpose") or v.get("purpose") or ""}, "expiry": c.get("expiry") or "", "type": ("THIRD_PARTY" if c.get("is_third_party") else "FIRST_PARTY"), } by_cat[cat].append(entry) if not c.get("expiry"): flags.append({ "level": "INFO", "vendor": provider, "issue": "cookie_no_expiry", "message": ( f"Cookie '{cname}' bei '{provider}' ohne " "Speicherdauer โ€” fuer DSK-Konformitaet ergaenzen." ), }) # Vendor-level link validation flags if v.get("opt_out_url") and v.get("opt_out_ok") is False: flags.append({ "level": "ERROR", "vendor": provider, "issue": "broken_opt_out", "message": ( f"Opt-Out-Link von '{provider}' antwortet mit " f"HTTP {v.get('opt_out_status')} โ€” " "Art. 7(3) DSGVO erfordert funktionierenden Widerruf." ), }) categories: list[dict] = [] for cat_id in ("ESSENTIAL", "PERFORMANCE", "PERSONALIZATION", "EXTERNAL_MEDIA"): cookies = by_cat[cat_id] if not cookies and cat_id != "ESSENTIAL": continue meta = _CATEGORY_LABELS[cat_id] categories.append({ "id": cat_id, "name": {"de": meta["de"], "en": meta["en"]}, "description": {"de": meta["desc_de"], "en": meta["desc_en"]}, "isRequired": cat_id == "ESSENTIAL", "defaultEnabled": cat_id == "ESSENTIAL", "dataPointIds": [], "cookies": cookies, }) config = { "id": "", # filled by tenant on apply "tenantId": "", "categories": categories, "styling": { "position": "BOTTOM", "theme": "LIGHT", "primaryColor": "#2563eb", "borderRadius": 8, }, "texts": { "title": {"de": "Wir verwenden Cookies", "en": "We use cookies"}, "description": { "de": (f"Auf {site_name or 'unserer Website'} setzen wir " "Cookies und aehnliche Technologien ein, um die " "Nutzererfahrung zu verbessern. Sie koennen Ihre " "Auswahl jederzeit anpassen."), "en": (f"On {site_name or 'this website'} we use cookies " "and similar technologies. You can change your " "selection at any time."), }, "acceptAll": {"de": "Alle akzeptieren", "en": "Accept all"}, "rejectAll": {"de": "Alle ablehnen", "en": "Reject all"}, "customize": {"de": "Auswahl anpassen", "en": "Customize"}, "save": {"de": "Auswahl speichern", "en": "Save preferences"}, "privacyPolicyLink": {"de": privacy_policy_url or "/datenschutz", "en": privacy_policy_url or "/privacy"}, }, } summary = { "vendors_total": len(vendors or []), "vendors_with_no_cookies": vendors_with_no_cookies, "cookies_total": cookies_total, "categories": {cat_id: len(by_cat[cat_id]) for cat_id in by_cat}, "flags_error": sum(1 for f in flags if f["level"] == "ERROR"), "flags_warning": sum(1 for f in flags if f["level"] == "WARNING"), "flags_info": sum(1 for f in flags if f["level"] == "INFO"), } logger.info( "Banner migration prepared: %d vendors -> %d cookies in %d " "categories. Flags: %d ERROR, %d WARNING, %d INFO.", summary["vendors_total"], summary["cookies_total"], len(categories), summary["flags_error"], summary["flags_warning"], summary["flags_info"], ) return { "config": config, "flags": flags, "summary": summary, }