""" P105 — IAB TCF Vendor-Liste als externe Authority. Die IAB TCF v2.2 Global Vendor List (https://vendor-list.consensu.org/v3/ vendor-list.json) ist die DSGVO-Authoritaet fuer Werbe-Vendoren: jeder gelistete Vendor hat verbindliche IAB-Purposes: Purpose 1 — Speichern + Zugriff (essential) Purpose 2 — Auswahl Werbung (functional/marketing) Purpose 3 — Personalisierte Werbeprofile (marketing) Purpose 4 — Personalisierte Werbung (marketing) Purpose 5 — Personalisierte Inhaltsprofile (marketing/personalization) Purpose 6 — Personalisierte Inhalte (marketing/personalization) Purpose 7 — Werbe-Performance-Messung (statistics) Purpose 8 — Inhalts-Performance-Messung (statistics) Purpose 9 — Marktforschung (statistics) Purpose 10 — Produkt-Verbesserung (statistics) Wenn ein Vendor in der TCF-Liste mit Purpose 3/4 registriert ist und die Site ihn als "Funktional" deklariert → eindeutiger Verstoss (eine externe Authority widerspricht der Deklaration). Ingest-Mode: idempotenter Fetch + Upsert in compliance.tcf_vendors_v2. Lookup-Mode: by_vendor_name + by_cookie_owner. """ from __future__ import annotations import logging from typing import Iterable import httpx from sqlalchemy import text as sa_text from sqlalchemy.orm import Session logger = logging.getLogger(__name__) _TCF_URL = "https://vendor-list.consensu.org/v3/vendor-list.json" # IAB-Purpose → BreakPilot-Kategorie _PURPOSE_TO_CATEGORY = { 1: "essential", 2: "marketing", 3: "marketing", 4: "marketing", 5: "personalization", 6: "personalization", 7: "statistics", 8: "statistics", 9: "statistics", 10: "statistics", 11: "marketing", } def _category_for_purposes(purposes: Iterable[int]) -> str: """Aggregiert Purposes zu der STRENGSTEN Kategorie (Marketing > stats > personalization > essential). Wenn ein Vendor sowohl essential als auch marketing nutzt, ist die rechtlich verbindliche Kategorie Marketing (Einwilligungspflicht).""" cats = {_PURPOSE_TO_CATEGORY.get(p, "marketing") for p in purposes} if "marketing" in cats: return "marketing" if "statistics" in cats: return "statistics" if "personalization" in cats: return "personalization" return "essential" async def fetch_and_ingest_tcf_vendors(db: Session) -> dict: """Idempotenter Ingest. Schema-Migration vermeiden — nutzt nur bestehende cookie_library-Tabelle und kennzeichnet TCF-Source via vendor_name='[TCF] '.""" async with httpx.AsyncClient(timeout=60.0) as client: resp = await client.get(_TCF_URL) resp.raise_for_status() data = resp.json() vendors = data.get("vendors") or {} if not vendors: return {"error": "no vendors in TCF response", "n_vendors": 0} # Erst alte TCF-Eintraege weg (kein UNIQUE-Index auf cookie_name, # daher kein ON CONFLICT moeglich → idempotent via DELETE+INSERT). db.execute(sa_text( "DELETE FROM compliance.cookie_library WHERE source_name='iab_tcf_v2'" )) db.commit() inserted = 0 skipped = 0 for vid, v in vendors.items(): name = (v.get("name") or "").strip() if not name: continue purposes = v.get("purposes") or [] leg_purposes = v.get("legIntPurposes") or [] all_purposes = list(set(purposes) | set(leg_purposes)) category = _category_for_purposes(all_purposes) privacy_url = (v.get("policyUrl") or "").strip()[:500] or None # Cookie-Names die der Vendor laut TCF setzt sind nicht in der # GVL — wir kennzeichnen nur den Vendor-Eintrag mit ID + Purposes. marker = f"_tcf_v{vid}" try: db.execute(sa_text( """ INSERT INTO compliance.cookie_library (cookie_name, domain_pattern, vendor_name, vendor_privacy_url, actual_category, purpose_en, source_name, source_url, confidence) VALUES (:n, :dp, :v, :pu, :cat, :purp, 'iab_tcf_v2', 'https://vendor-list.consensu.org/v3/vendor-list.json', 0.99) """ ), {"n": marker, "dp": "*", "v": f"[TCF-{vid}] {name}", "pu": privacy_url, "cat": category, "purp": f"IAB TCF v2 Purposes: {sorted(all_purposes)}"}) db.commit() # Per-Vendor-Commit damit ein Fehler nicht # die naechsten Eintraege blockt. inserted += 1 except Exception as e: logger.warning("TCF vendor %s insert failed: %s", vid, e) skipped += 1 db.rollback() # frische Transaktion fuer den naechsten Insert return {"n_vendors_in_gvl": len(vendors), "inserted": inserted, "skipped": skipped} def lookup_tcf_authority( db: Session, vendor_name: str | None, ) -> dict | None: """Liefert TCF-Authority-Daten fuer einen Vendor-Namen, wenn er in der TCF-Liste registriert ist. Returns {tcf_id, name, category} oder None. Fuzzy-Match: 'Google' matched '[TCF-755] Google Advertising Products'. """ if not vendor_name: return None nl = vendor_name.lower().strip() try: rows = db.execute(sa_text( """ SELECT cookie_name, actual_category, vendor_name FROM compliance.cookie_library WHERE source = 'iab_tcf_v2' AND LOWER(vendor_name) LIKE :pat LIMIT 5 """ ), {"pat": f"%{nl}%"}).fetchall() for r in rows: tcf_name = r[2] # '[TCF-755] Google ...' if tcf_name and "]" in tcf_name: tcf_id = tcf_name.split("]")[0].lstrip("[TCF-") clean = tcf_name.split("]", 1)[1].strip() return {"tcf_id": tcf_id, "name": clean, "category": r[1]} except Exception as e: logger.warning("TCF lookup failed: %s", e) return None def cross_reference_with_tcf( db: Session, declared_vendors: list[dict], ) -> list[dict]: """Liefert pro Vendor mit Discrepancy ein Finding-dict. Eingang: list[{name, category}] aus cmp_vendors. Ausgang: list[{vendor, declared_category, tcf_category, severity}] """ out: list[dict] = [] for v in (declared_vendors or []): if not isinstance(v, dict): continue name = (v.get("name") or "").strip() declared_cat = (v.get("category") or "").lower().strip() if not name or not declared_cat: continue tcf = lookup_tcf_authority(db, name) if not tcf: continue if tcf["category"] == declared_cat: continue # Marketing/Statistics vs Functional/Essential ist die kritische # Diskrepanz. functional + personalization sind weicher. severity = "HIGH" if (tcf["category"] == "marketing" and declared_cat in ("essential", "functional", "necessary")) else "MEDIUM" out.append({ "vendor": name, "tcf_id": tcf["tcf_id"], "tcf_name": tcf["name"], "declared_category": declared_cat, "tcf_category": tcf["category"], "severity": severity, }) return out def build_tcf_authority_block_html(findings: list[dict]) -> str: if not findings: return "" items: list[str] = [] for f in findings[:30]: sev_color = "#dc2626" if f["severity"] == "HIGH" else "#d97706" items.append( f'
  • ' f'{f["vendor"]} ' f'— deklariert als ' f'{f["declared_category"]}, ' f'IAB TCF v2 (Vendor-ID ' f'{f["tcf_id"]}) listet als ' f'' f'{f["tcf_category"]}' f'
  • ' ) return ( '
    ' '
    ' 'IAB TCF v2 Authority-Check — Vendor-Kategorie-Diskrepanz
    ' f'

    ' f'{len(findings)} Vendor{"en" if len(findings) != 1 else ""} ' 'mit Kategorie-Widerspruch zur offiziellen IAB-Liste

    ' '

    ' 'Die IAB Transparency & Consent Framework v2 Global Vendor List ' 'ist die rechtliche Authoritaet fuer die Klassifizierung von ' 'Werbe-Vendoren in der EU. Wenn ein Vendor dort als "Marketing" ' 'gefuehrt ist, kann die Site ihn nicht als "Funktional" einstufen ' '— das ist eine externe, durchgesetzte Klassifikation.

    ' '' '

    Quelle: ' 'https://vendor-list.consensu.org/v3/vendor-list.json — ' 'die TCF-Liste ist verbindlich fuer alle CMP-Tools die IAB-TCF v2 ' 'implementieren (Cookiebot, OneTrust, Usercentrics, Sourcepoint, …).

    ' '
    ' )