""" Cookie-zu-Vendor-Fallback (P52 Lite). Wenn weder cmp_payloads noch vendor_llm_extract Vendors lieferten, matchen wir die im after_accept gesehenen Cookies gegen die compliance.cookie_library und bauen Vendor-Records aus den Library- Eintraegen (cookie_name → vendor_name, actual_category). Typisches Szenario: VW nutzt ein Custom-CMP (cookiemgmt-Wrapper), kein bekanntes IAB-Tool. cmp_payloads = leer, aber after_accept.cookies hat 28 Eintraege. Diese 28 Cookies sind in der Library = ~15-20 Vendors. """ from __future__ import annotations import logging from typing import Iterable from sqlalchemy import text from sqlalchemy.orm import Session logger = logging.getLogger(__name__) def _collect_cookie_names(banner_result: dict | None) -> set[str]: names: set[str] = set() if not isinstance(banner_result, dict): return names for ph in (banner_result.get("phases") or {}).values(): if not isinstance(ph, dict): continue for ck in (ph.get("cookies") or []): if isinstance(ck, str): names.add(ck.strip()) elif isinstance(ck, dict): n = (ck.get("name") or "").strip() if n: names.add(n) return {n for n in names if n and len(n) <= 120} def lookup_vendors_from_library( db: Session, cookie_names: Iterable[str], ) -> list[dict]: """Resolves cookie names to vendor records via cookie_library.""" names = [n for n in cookie_names if n] if not names: return [] rows = db.execute(text( """ SELECT cookie_name, actual_category, vendor_name FROM compliance.cookie_library WHERE LOWER(cookie_name) = ANY(:lc) """ ), {"lc": [n.lower() for n in names]}).fetchall() by_vendor: dict[str, dict] = {} for cname, cat, vendor in rows: if not vendor: continue entry = by_vendor.setdefault(vendor, { "name": vendor, "country": "", "purpose": "", "category": cat or "", "opt_out_url": "", "privacy_policy_url": "", "persistence": "", "cookies": [], "source": "library_fallback", }) entry["cookies"].append({ "name": cname, "purpose": "", "expiry": "", "is_third_party": True, }) return list(by_vendor.values()) def fallback_vendors_for_run( db: Session, banner_result: dict | None, existing_vendor_count: int, ) -> list[dict]: """Returns extra vendor records to merge with the run's cmp_vendors. Only fires when existing_vendor_count is suspiciously low (< 3) AND we have enough cookies to look up (>= 5). Otherwise skip. """ if existing_vendor_count >= 3: return [] names = _collect_cookie_names(banner_result) if len(names) < 5: return [] vendors = lookup_vendors_from_library(db, names) if vendors: logger.info( "Cookie-Library-Fallback: %d Vendors aus %d Cookies (vorher %d)", len(vendors), len(names), existing_vendor_count, ) return vendors