diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py
index ec905ef5..47db13a6 100644
--- a/backend-compliance/compliance/api/agent_compliance_check_routes.py
+++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py
@@ -207,6 +207,22 @@ async def get_snapshot(snapshot_id: str):
db.close()
+@router.post("/admin/tcf-ingest")
+async def tcf_ingest():
+ """P105 — IAB TCF Vendor-Liste ingestieren / refreshen.
+ Idempotent: holt aktuelle GVL und upserted in compliance.cookie_library
+ mit source='iab_tcf_v2'. Aufruf ein paar Mal pro Jahr ausreichend."""
+ from database import SessionLocal
+ from compliance.services.tcf_vendor_authority import (
+ fetch_and_ingest_tcf_vendors,
+ )
+ db = SessionLocal()
+ try:
+ return await fetch_and_ingest_tcf_vendors(db)
+ finally:
+ db.close()
+
+
@router.get("/snapshots/{snapshot_id}/pdf")
async def export_snapshot_pdf(snapshot_id: str):
"""P88 — PDF-Export der Audit-Mail. Liefert application/pdf."""
@@ -1285,6 +1301,53 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
except Exception as e:
logger.warning("Scope-disclaimer block skipped: %s", e)
+ # P103 + P104 — Cookie-Value-Entropy + Network-Tracing (Stufe 3 + 4)
+ entropy_html = ""
+ network_trace_html = ""
+ try:
+ from compliance.services.cookie_value_entropy import (
+ check_cookies_for_entropy_mismatch, build_entropy_block_html,
+ )
+ from compliance.services.cookie_network_tracer import (
+ trace_cookie_network, build_network_trace_block_html,
+ )
+ cookies_detailed = (banner_result or {}).get("cookies_detailed") or []
+ entropy_findings = check_cookies_for_entropy_mismatch(cookies_detailed)
+ if entropy_findings:
+ entropy_html = build_entropy_block_html(entropy_findings)
+ logger.info("P103 Entropy: %d Findings", len(entropy_findings))
+ primary_url = ""
+ for e_ in doc_entries:
+ if e_.get("url"):
+ primary_url = e_["url"]; break
+ net_findings = trace_cookie_network(cookies_detailed, primary_url)
+ if net_findings:
+ network_trace_html = build_network_trace_block_html(net_findings)
+ logger.info("P104 Network-Trace: %d Findings", len(net_findings))
+ except Exception as e:
+ logger.warning("P103/P104 entropy/network-trace skipped: %s", e)
+
+ # P105 — IAB TCF Authority-Cross-Reference (Stufe 5)
+ tcf_authority_html = ""
+ try:
+ from compliance.services.tcf_vendor_authority import (
+ cross_reference_with_tcf, build_tcf_authority_block_html,
+ )
+ from database import SessionLocal as _SLtcf
+ _tcf_db = _SLtcf()
+ try:
+ tcf_findings = cross_reference_with_tcf(_tcf_db, cmp_vendors)
+ if tcf_findings:
+ tcf_authority_html = build_tcf_authority_block_html(tcf_findings)
+ logger.info(
+ "TCF-Authority: %d Vendor-Discrepancies gefunden",
+ len(tcf_findings),
+ )
+ finally:
+ _tcf_db.close()
+ except Exception as e:
+ logger.warning("TCF-Authority-Check skipped: %s", e)
+
# COOKIE-COMPLIANCE-AUDIT (3-Quellen-Vergleich) — das ist der
# zentrale USP: deklariert in Richtlinie vs tatsaechlich im
# Browser geladen vs Library-Match.
@@ -1524,6 +1587,9 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
+ scorecard_html + redundancy_html
+ providers_html + banner_deep_html
+ cookie_audit_html
+ + tcf_authority_html
+ + entropy_html
+ + network_trace_html
+ library_mismatch_html
+ consistency_html + signals_html + solutions_html
+ jc_decision_html
diff --git a/backend-compliance/compliance/services/cookie_network_tracer.py b/backend-compliance/compliance/services/cookie_network_tracer.py
new file mode 100644
index 00000000..13360839
--- /dev/null
+++ b/backend-compliance/compliance/services/cookie_network_tracer.py
@@ -0,0 +1,216 @@
+"""
+P104 — Cookie-Network-Tracing (Stufe 4).
+
+cookies_detailed[i].domain zeigt welche Domain das Cookie via Set-Cookie
+gesetzt hat. Wir vergleichen:
+* Site-Hauptdomain vs Cookie-Domain → First-Party / Third-Party
+* Cookie-Domain vs bekannte Vendoren → wer ist der echte Empfaenger
+* Vendor-Land vs EU/Drittland → Drittland-Transfer-Hinweis
+
+Defeat-Device-Pattern: "Funktional"-Cookie wird aber von doubleclick.net
+gesetzt → das ist physisch ein Third-Party-Tracking-Cookie, kein
+funktionales First-Party-Cookie.
+"""
+
+from __future__ import annotations
+
+import logging
+from urllib.parse import urlparse
+
+logger = logging.getLogger(__name__)
+
+
+# Vendor-Domain → bekannter Vendor + Land
+_DOMAIN_VENDORS: dict[str, tuple[str, str]] = {
+ ".doubleclick.net": ("Google DoubleClick", "US"),
+ ".google.com": ("Google", "US"),
+ ".google-analytics.com": ("Google Analytics", "US"),
+ ".googletagmanager.com": ("Google Tag Manager", "US"),
+ ".googleadservices.com": ("Google Ads", "US"),
+ ".gstatic.com": ("Google CDN", "US"),
+ ".facebook.com": ("Meta / Facebook", "US"),
+ ".facebook.net": ("Meta / Facebook", "US"),
+ ".instagram.com": ("Meta / Instagram", "US"),
+ ".linkedin.com": ("LinkedIn (Microsoft)", "US"),
+ ".pinterest.com": ("Pinterest", "US"),
+ ".pinimg.com": ("Pinterest", "US"),
+ ".tiktok.com": ("TikTok (ByteDance)", "CN"),
+ ".bing.com": ("Microsoft Bing", "US"),
+ ".clarity.ms": ("Microsoft Clarity", "US"),
+ ".criteo.com": ("Criteo", "FR"),
+ ".adnxs.com": ("AppNexus / Xandr", "US"),
+ ".rubiconproject.com": ("Rubicon Project", "US"),
+ ".pubmatic.com": ("PubMatic", "US"),
+ ".adobedtm.com": ("Adobe DTM", "US"),
+ ".adobetarget.com": ("Adobe Target", "US"),
+ ".demdex.net": ("Adobe Experience Cloud", "US"),
+ ".omtrdc.net": ("Adobe Analytics", "US"),
+ ".everesttech.net": ("Adobe Advertising Cloud", "US"),
+ ".2o7.net": ("Adobe Analytics", "US"),
+ ".adform.net": ("AdForm", "DK"),
+ ".trade-desk.com": ("The Trade Desk", "US"),
+ ".tradedesk.com": ("The Trade Desk", "US"),
+ ".adsrvr.org": ("The Trade Desk", "US"),
+ ".hotjar.com": ("Hotjar", "MT"),
+ ".matomo.cloud": ("Matomo", "DE"),
+ ".etracker.com": ("etracker", "DE"),
+ ".etracker.de": ("etracker", "DE"),
+ ".cloudflare.com": ("Cloudflare", "US"),
+ ".cookielaw.org": ("OneTrust", "US"),
+ ".cookiebot.com": ("Cookiebot (Cybot)", "DK"),
+ ".usercentrics.eu": ("Usercentrics", "DE"),
+ ".usercentrics.com": ("Usercentrics", "DE"),
+ ".consensu.org": ("IAB Europe TCF", "BE"),
+ ".datadoghq.eu": ("Datadog", "US"),
+ ".datadoghq.com": ("Datadog", "US"),
+ ".datadome.co": ("DataDome", "FR"),
+ ".incapsula.com": ("Imperva Incapsula", "US"),
+ ".imperva.com": ("Imperva", "US"),
+ ".akamai.net": ("Akamai", "US"),
+ ".akamaiedge.net": ("Akamai", "US"),
+ ".salesforce.com": ("Salesforce", "US"),
+ ".force.com": ("Salesforce", "US"),
+}
+
+_NON_EU_COUNTRIES = {"US", "CN", "RU", "IN", "JP", "BR", "AU"}
+
+
+def _registrable_domain(host: str) -> str:
+ """vw.de von www.vw.de oder bla.vw.de oder vw.de"""
+ h = (host or "").lstrip(".").lower()
+ parts = h.split(".")
+ if len(parts) >= 2:
+ return ".".join(parts[-2:])
+ return h
+
+
+def _lookup_vendor_by_domain(cookie_domain: str) -> tuple[str, str] | None:
+ if not cookie_domain:
+ return None
+ cd = cookie_domain.lower()
+ if not cd.startswith("."):
+ cd = "." + cd
+ for suffix, (vendor, country) in _DOMAIN_VENDORS.items():
+ if cd.endswith(suffix):
+ return (vendor, country)
+ return None
+
+
+def trace_cookie_network(
+ cookies_detailed: list[dict] | None,
+ site_url: str | None = None,
+) -> list[dict]:
+ """Liefert Findings fuer Cookies die von externer/Drittland-Domain
+ gesetzt werden waehrend sie als First-Party / essential deklariert sind."""
+ if not cookies_detailed:
+ return []
+ site_host = ""
+ if site_url:
+ try:
+ site_host = _registrable_domain(urlparse(site_url).netloc)
+ except Exception:
+ site_host = ""
+
+ out: list[dict] = []
+ for ck in cookies_detailed:
+ if not isinstance(ck, dict):
+ continue
+ name = (ck.get("name") or "").strip()
+ domain = (ck.get("domain") or "").strip()
+ declared = (ck.get("declared_category") or "").lower().strip()
+ if not name or not domain:
+ continue
+
+ cookie_reg = _registrable_domain(domain)
+ is_third_party = bool(site_host and cookie_reg != site_host)
+ vendor_match = _lookup_vendor_by_domain(domain)
+
+ if not vendor_match and not is_third_party:
+ continue
+
+ # Defeat-Device-Pattern: essential/functional + Third-Party
+ if declared in ("essential", "functional", "necessary") and is_third_party:
+ sev = "HIGH" if vendor_match else "MEDIUM"
+ vendor_name = vendor_match[0] if vendor_match else cookie_reg
+ country = vendor_match[1] if vendor_match else ""
+ third_country = country in _NON_EU_COUNTRIES
+ out.append({
+ "cookie": name,
+ "declared": declared,
+ "cookie_domain": domain,
+ "site_domain": site_host,
+ "vendor": vendor_name,
+ "vendor_country": country,
+ "third_country": third_country,
+ "severity": sev,
+ "label": (
+ f"Cookie '{name}' deklariert als '{declared}', "
+ f"wird aber von externer Domain "
+ f"{vendor_name} "
+ f"({domain}) gesetzt"
+ + (f" — Drittland: {country}" if third_country else "")
+ ),
+ })
+ elif vendor_match and declared in ("essential", "functional", "necessary"):
+ # Auch wenn First-Party-Cookie aber bekannter Tracker-Vendor →
+ # Mismatch (z.B. Google Tag Manager kann via CNAME als
+ # First-Party erscheinen)
+ out.append({
+ "cookie": name,
+ "declared": declared,
+ "cookie_domain": domain,
+ "vendor": vendor_match[0],
+ "vendor_country": vendor_match[1],
+ "third_country": vendor_match[1] in _NON_EU_COUNTRIES,
+ "severity": "MEDIUM",
+ "label": (
+ f"Cookie '{name}' deklariert als '{declared}', "
+ f"Domain {domain} gehoert aber zu "
+ f"{vendor_match[0]} "
+ f"({vendor_match[1]})"
+ ),
+ })
+ return out
+
+
+def build_network_trace_block_html(findings: list[dict]) -> str:
+ if not findings:
+ return ""
+ n_third = sum(1 for f in findings if f.get("third_country"))
+ items: list[str] = []
+ for f in findings[:30]:
+ sev_color = "#dc2626" if f["severity"] == "HIGH" else "#d97706"
+ country_flag = ""
+ if f.get("third_country"):
+ country_flag = (
+ f' DRITTLAND {f.get("vendor_country","")}'
+ )
+ items.append(
+ f'
{f["label"]}{country_flag}'
+ )
+ return (
+ ''
+ '
'
+ 'Cookie-Netzwerk-Verhalten (Defeat-Device-Heuristik)
'
+ f'
'
+ f'{len(findings)} Cookie{"s" if len(findings) != 1 else ""} '
+ f'mit Vendor-Domain-Diskrepanz'
+ f'{f" — davon {n_third} mit Drittland-Transfer" if n_third else ""}'
+ f'
'
+ '
'
+ 'Diese Cookies sind als "essential" oder "funktional" deklariert, '
+ 'werden aber von einer externen Domain gesetzt — typisch fuer '
+ 'getarnte Tracker. Drittland-Markierungen sind besonders kritisch: '
+ 'sie loesen Pflichten nach Art. 44-49 DSGVO aus (SCC / Angemessen-'
+ 'heitsbeschluss / Schrems II Folge-Massnahmen).'
+ '
'
+ '
'
+ + "".join(items) +
+ '
'
+ )
diff --git a/backend-compliance/compliance/services/cookie_value_entropy.py b/backend-compliance/compliance/services/cookie_value_entropy.py
new file mode 100644
index 00000000..b3d488bd
--- /dev/null
+++ b/backend-compliance/compliance/services/cookie_value_entropy.py
@@ -0,0 +1,148 @@
+"""
+P103 — Cookie-Value-Entropy-Check (Stufe 3).
+
+Bewertet ob der Cookie-Wert zur deklarierten Kategorie passt:
+* "Funktional" + 2-char-Wert ('1', 'de') → konsistent (Flag)
+* "Funktional" + 64-char-Base64 → INKONSISTENT (Tracking-ID-Pattern)
+* "Marketing" + 32+ char Hash → konsistent
+* "Marketing" + 2-char-Wert → konsistent (Boolean-Opt-Out)
+
+Defeat-Device-Pattern: Site deklariert "Funktional" um Consent zu
+umgehen, aber Wert sieht wie pseudonymisierte Tracking-ID aus.
+"""
+
+from __future__ import annotations
+
+import logging
+import math
+import re
+
+logger = logging.getLogger(__name__)
+
+
+def _shannon_entropy(s: str) -> float:
+ if not s:
+ return 0.0
+ from collections import Counter
+ n = len(s)
+ counts = Counter(s)
+ return -sum((c / n) * math.log2(c / n) for c in counts.values())
+
+
+_BASE64_RE = re.compile(r"^[A-Za-z0-9+/=_-]{20,}$")
+_HEX_RE = re.compile(r"^[a-fA-F0-9]{16,}$")
+_UUID_RE = re.compile(
+ r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-"
+ r"[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
+)
+_FLAG_VALUES = {"0", "1", "true", "false", "yes", "no",
+ "de", "en", "de-de", "en-us", "fr-fr",
+ "accept", "deny", "essential", "on", "off"}
+
+
+def _classify_value_shape(value: str) -> str:
+ """Returns one of: 'flag', 'short_id', 'long_token', 'uuid', 'hash',
+ 'json_blob', 'unknown'."""
+ if not value:
+ return "flag"
+ v = value.strip()
+ if v.lower() in _FLAG_VALUES:
+ return "flag"
+ if len(v) <= 4:
+ return "flag"
+ if _UUID_RE.match(v):
+ return "uuid"
+ if _HEX_RE.match(v) and len(v) >= 32:
+ return "hash"
+ if _BASE64_RE.match(v) and len(v) >= 40:
+ return "long_token"
+ if v.startswith("{") or v.startswith("["):
+ return "json_blob"
+ if len(v) >= 16 and _shannon_entropy(v) > 3.5:
+ return "long_token"
+ if len(v) >= 6:
+ return "short_id"
+ return "flag"
+
+
+def check_cookies_for_entropy_mismatch(
+ cookies_detailed: list[dict] | None,
+) -> list[dict]:
+ """Liefert Findings fuer Cookies deren Wert-Shape nicht zur
+ deklarierten Kategorie passt."""
+ out: list[dict] = []
+ if not cookies_detailed:
+ return out
+ for ck in cookies_detailed:
+ if not isinstance(ck, dict):
+ continue
+ name = (ck.get("name") or "").strip()
+ value = (ck.get("value") or "").strip()
+ declared = (ck.get("declared_category") or "").lower().strip()
+ if not name or not declared:
+ continue
+ shape = _classify_value_shape(value)
+
+ # Regel: 'essential' / 'functional' Cookies mit hoher
+ # Tracking-ID-Komplexitaet sind verdaechtig.
+ is_low_cat = declared in ("essential", "functional", "necessary")
+ is_id_shape = shape in ("uuid", "hash", "long_token")
+ if is_low_cat and is_id_shape:
+ out.append({
+ "cookie": name,
+ "declared": declared,
+ "value_shape": shape,
+ "value_len": len(value),
+ "severity": "MEDIUM",
+ "label": (
+ f"Cookie '{name}' deklariert als '{declared}', "
+ f"aber Wert ist ein {shape} ({len(value)} Zeichen) — "
+ "typisches Tracking-ID-Pattern"
+ ),
+ "detail": (
+ "Funktionale/notwendige Cookies speichern normalerweise "
+ "kurze Flags (1, true, de-DE). Ein langer Hash/UUID-Wert "
+ "in einem als 'essential' deklarierten Cookie ist ein "
+ "Indikator fuer verstecktes Tracking — vergleichbar mit "
+ "einem 'Defeat Device', das auf dem Pruefstand harmlos "
+ "aussieht aber im Realbetrieb anderes tut."
+ ),
+ })
+ return out
+
+
+def build_entropy_block_html(findings: list[dict]) -> str:
+ if not findings:
+ return ""
+ items: list[str] = []
+ for f in findings[:25]:
+ items.append(
+ f''
+ f'{f["cookie"]} '
+ f'(deklariert: '
+ f'{f["declared"]}) — Wert-Shape: '
+ f''
+ f'{f["value_shape"]} '
+ f'({f["value_len"]} Zeichen)'
+ f''
+ )
+ return (
+ ''
+ '
'
+ 'Cookie-Werte-Plausibilitaet (Defeat-Device-Heuristik)
'
+ f'
'
+ f'{len(findings)} Cookie{"s" if len(findings) != 1 else ""} '
+ 'mit verdaechtigem Wert-Pattern
'
+ '
'
+ 'Diese Cookies sind als "essential" oder "funktional" deklariert, '
+ 'ihr tatsaechlicher Wert sieht aber wie eine Tracking-ID aus '
+ '(UUID, Hash, langer Base64-Token). Empfehlung: pruefen ob diese '
+ 'Cookies wirklich nur technisch notwendig sind oder de facto '
+ 'pseudonymisierte User-Tracker.
'
+ '
'
+ + "".join(items) +
+ '
'
+ )
diff --git a/backend-compliance/compliance/services/tcf_vendor_authority.py b/backend-compliance/compliance/services/tcf_vendor_authority.py
new file mode 100644
index 00000000..7618425d
--- /dev/null
+++ b/backend-compliance/compliance/services/tcf_vendor_authority.py
@@ -0,0 +1,238 @@
+"""
+P105 — IAB TCF Vendor-Liste als externe Authority.
+
+Die IAB TCF v2.2 Global Vendor List (https://vendor-list.consensu.org/v3/
+vendor-list.json) ist die DSGVO-Authoritaet fuer Werbe-Vendoren: jeder
+gelistete Vendor hat verbindliche IAB-Purposes:
+ Purpose 1 — Speichern + Zugriff (essential)
+ Purpose 2 — Auswahl Werbung (functional/marketing)
+ Purpose 3 — Personalisierte Werbeprofile (marketing)
+ Purpose 4 — Personalisierte Werbung (marketing)
+ Purpose 5 — Personalisierte Inhaltsprofile (marketing/personalization)
+ Purpose 6 — Personalisierte Inhalte (marketing/personalization)
+ Purpose 7 — Werbe-Performance-Messung (statistics)
+ Purpose 8 — Inhalts-Performance-Messung (statistics)
+ Purpose 9 — Marktforschung (statistics)
+ Purpose 10 — Produkt-Verbesserung (statistics)
+
+Wenn ein Vendor in der TCF-Liste mit Purpose 3/4 registriert ist und die
+Site ihn als "Funktional" deklariert → eindeutiger Verstoss (eine externe
+Authority widerspricht der Deklaration).
+
+Ingest-Mode: idempotenter Fetch + Upsert in compliance.tcf_vendors_v2.
+Lookup-Mode: by_vendor_name + by_cookie_owner.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Iterable
+
+import httpx
+from sqlalchemy import text as sa_text
+from sqlalchemy.orm import Session
+
+logger = logging.getLogger(__name__)
+
+
+_TCF_URL = "https://vendor-list.consensu.org/v3/vendor-list.json"
+
+# IAB-Purpose → BreakPilot-Kategorie
+_PURPOSE_TO_CATEGORY = {
+ 1: "essential",
+ 2: "marketing",
+ 3: "marketing",
+ 4: "marketing",
+ 5: "personalization",
+ 6: "personalization",
+ 7: "statistics",
+ 8: "statistics",
+ 9: "statistics",
+ 10: "statistics",
+ 11: "marketing",
+}
+
+
+def _category_for_purposes(purposes: Iterable[int]) -> str:
+ """Aggregiert Purposes zu der STRENGSTEN Kategorie (Marketing > stats
+ > personalization > essential). Wenn ein Vendor sowohl essential als
+ auch marketing nutzt, ist die rechtlich verbindliche Kategorie
+ Marketing (Einwilligungspflicht)."""
+ cats = {_PURPOSE_TO_CATEGORY.get(p, "marketing") for p in purposes}
+ if "marketing" in cats:
+ return "marketing"
+ if "statistics" in cats:
+ return "statistics"
+ if "personalization" in cats:
+ return "personalization"
+ return "essential"
+
+
+async def fetch_and_ingest_tcf_vendors(db: Session) -> dict:
+ """Idempotenter Ingest. Schema-Migration vermeiden — nutzt nur
+ bestehende cookie_library-Tabelle und kennzeichnet TCF-Source via
+ vendor_name='[TCF] '."""
+ async with httpx.AsyncClient(timeout=60.0) as client:
+ resp = await client.get(_TCF_URL)
+ resp.raise_for_status()
+ data = resp.json()
+
+ vendors = data.get("vendors") or {}
+ if not vendors:
+ return {"error": "no vendors in TCF response", "n_vendors": 0}
+
+ inserted = 0
+ skipped = 0
+ for vid, v in vendors.items():
+ name = (v.get("name") or "").strip()
+ if not name:
+ continue
+ purposes = v.get("purposes") or []
+ leg_purposes = v.get("legIntPurposes") or []
+ all_purposes = list(set(purposes) | set(leg_purposes))
+ category = _category_for_purposes(all_purposes)
+
+ # Cookie-Names die der Vendor laut TCF setzt sind nicht in der
+ # GVL — wir kennzeichnen nur den Vendor-Eintrag mit ID + Purposes.
+ # Vendor wird mit synthetic cookie_name='_tcf_marker'
+ # gespeichert; Library-Lookup nutzt vendor_name-Match.
+ marker = f"_tcf_v{vid}"
+ try:
+ db.execute(sa_text(
+ """
+ INSERT INTO compliance.cookie_library
+ (cookie_name, actual_category, vendor_name, source)
+ VALUES (:n, :cat, :v, 'iab_tcf_v2')
+ ON CONFLICT (cookie_name) DO UPDATE
+ SET actual_category = EXCLUDED.actual_category,
+ vendor_name = EXCLUDED.vendor_name
+ """
+ ), {"n": marker, "cat": category,
+ "v": f"[TCF-{vid}] {name}"})
+ inserted += 1
+ except Exception as e:
+ logger.warning("TCF vendor %s insert failed: %s", vid, e)
+ skipped += 1
+ db.commit()
+ return {"n_vendors_in_gvl": len(vendors), "inserted": inserted,
+ "skipped": skipped}
+
+
+def lookup_tcf_authority(
+ db: Session,
+ vendor_name: str | None,
+) -> dict | None:
+ """Liefert TCF-Authority-Daten fuer einen Vendor-Namen, wenn er
+ in der TCF-Liste registriert ist. Returns {tcf_id, name, category}
+ oder None.
+
+ Fuzzy-Match: 'Google' matched '[TCF-755] Google Advertising Products'.
+ """
+ if not vendor_name:
+ return None
+ nl = vendor_name.lower().strip()
+ try:
+ rows = db.execute(sa_text(
+ """
+ SELECT cookie_name, actual_category, vendor_name
+ FROM compliance.cookie_library
+ WHERE source = 'iab_tcf_v2'
+ AND LOWER(vendor_name) LIKE :pat
+ LIMIT 5
+ """
+ ), {"pat": f"%{nl}%"}).fetchall()
+ for r in rows:
+ tcf_name = r[2] # '[TCF-755] Google ...'
+ if tcf_name and "]" in tcf_name:
+ tcf_id = tcf_name.split("]")[0].lstrip("[TCF-")
+ clean = tcf_name.split("]", 1)[1].strip()
+ return {"tcf_id": tcf_id, "name": clean,
+ "category": r[1]}
+ except Exception as e:
+ logger.warning("TCF lookup failed: %s", e)
+ return None
+
+
+def cross_reference_with_tcf(
+ db: Session,
+ declared_vendors: list[dict],
+) -> list[dict]:
+ """Liefert pro Vendor mit Discrepancy ein Finding-dict.
+
+ Eingang: list[{name, category}] aus cmp_vendors.
+ Ausgang: list[{vendor, declared_category, tcf_category, severity}]
+ """
+ out: list[dict] = []
+ for v in (declared_vendors or []):
+ if not isinstance(v, dict):
+ continue
+ name = (v.get("name") or "").strip()
+ declared_cat = (v.get("category") or "").lower().strip()
+ if not name or not declared_cat:
+ continue
+ tcf = lookup_tcf_authority(db, name)
+ if not tcf:
+ continue
+ if tcf["category"] == declared_cat:
+ continue
+ # Marketing/Statistics vs Functional/Essential ist die kritische
+ # Diskrepanz. functional + personalization sind weicher.
+ severity = "HIGH" if (tcf["category"] == "marketing"
+ and declared_cat in ("essential",
+ "functional",
+ "necessary")) else "MEDIUM"
+ out.append({
+ "vendor": name,
+ "tcf_id": tcf["tcf_id"],
+ "tcf_name": tcf["name"],
+ "declared_category": declared_cat,
+ "tcf_category": tcf["category"],
+ "severity": severity,
+ })
+ return out
+
+
+def build_tcf_authority_block_html(findings: list[dict]) -> str:
+ if not findings:
+ return ""
+ items: list[str] = []
+ for f in findings[:30]:
+ sev_color = "#dc2626" if f["severity"] == "HIGH" else "#d97706"
+ items.append(
+ f''
+ f'{f["vendor"]} '
+ f'— deklariert als '
+ f'{f["declared_category"]}, '
+ f'IAB TCF v2 (Vendor-ID '
+ f'{f["tcf_id"]}) listet als '
+ f''
+ f'{f["tcf_category"]}'
+ f''
+ )
+ return (
+ ''
+ '
'
+ 'IAB TCF v2 Authority-Check — Vendor-Kategorie-Diskrepanz
'
+ f'
'
+ f'{len(findings)} Vendor{"en" if len(findings) != 1 else ""} '
+ 'mit Kategorie-Widerspruch zur offiziellen IAB-Liste
'
+ '
'
+ 'Die IAB Transparency & Consent Framework v2 Global Vendor List '
+ 'ist die rechtliche Authoritaet fuer die Klassifizierung von '
+ 'Werbe-Vendoren in der EU. Wenn ein Vendor dort als "Marketing" '
+ 'gefuehrt ist, kann die Site ihn nicht als "Funktional" einstufen '
+ '— das ist eine externe, durchgesetzte Klassifikation.
'
+ '
'
+ + "".join(items) +
+ '
'
+ '
Quelle: '
+ 'https://vendor-list.consensu.org/v3/vendor-list.json — '
+ 'die TCF-Liste ist verbindlich fuer alle CMP-Tools die IAB-TCF v2 '
+ 'implementieren (Cookiebot, OneTrust, Usercentrics, Sourcepoint, …).
'
+ '
'
+ )