feat(tcf-vendors): GVL cache + vendor extraction + VVT mapping
CI / loc-budget (push) Failing after 16s
Build + Deploy / build-admin-compliance (push) Successful in 14s
Build + Deploy / build-backend-compliance (push) Successful in 16s
Build + Deploy / build-ai-sdk (push) Successful in 20s
Build + Deploy / build-developer-portal (push) Successful in 12s
Build + Deploy / build-tts (push) Successful in 15s
Build + Deploy / build-document-crawler (push) Successful in 13s
Build + Deploy / build-dsms-gateway (push) Successful in 13s
Build + Deploy / build-dsms-node (push) Successful in 12s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-python-document-crawler (push) Successful in 26s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m49s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 45s
CI / test-python-backend (push) Successful in 38s
CI / test-python-dsms-gateway (push) Successful in 23s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m23s
CI / loc-budget (push) Failing after 16s
Build + Deploy / build-admin-compliance (push) Successful in 14s
Build + Deploy / build-backend-compliance (push) Successful in 16s
Build + Deploy / build-ai-sdk (push) Successful in 20s
Build + Deploy / build-developer-portal (push) Successful in 12s
Build + Deploy / build-tts (push) Successful in 15s
Build + Deploy / build-document-crawler (push) Successful in 13s
Build + Deploy / build-dsms-gateway (push) Successful in 13s
Build + Deploy / build-dsms-node (push) Successful in 12s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-python-document-crawler (push) Successful in 26s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m49s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 45s
CI / test-python-backend (push) Successful in 38s
CI / test-python-dsms-gateway (push) Successful in 23s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m23s
Phase 1-2 of the closed quality loop: - GVL cache (consent-tester/services/gvl_cache.py): downloads and caches IAB Global Vendor List with 24h TTL, resolves vendor IDs to names, purposes, policy URLs, retention, country - Vendor extraction (consent_interceptor.py): extract_tcf_vendors() reads __tcfapi after accept phase, resolves via GVL - Scan response: tcf_vendors field added to /scan endpoint - VVT mapper (vendor_vvt_mapper.py): maps TCF vendors to VVT format with purpose labels, Rechtsgrundlage, Drittland detection - Vendor cross-check (banner_cookie_cross_check.py): checks all TCF vendors against DSI text — missing vendors, undocumented transfers - Compliance check integrates Step 3d: TCF vendors vs DSI Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -258,16 +258,29 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
banner_result, doc_texts["cookie"],
|
||||
)
|
||||
if cross_findings:
|
||||
# Add cross-check findings to cookie results
|
||||
for r in results:
|
||||
if r.doc_type == "cookie":
|
||||
for cf in cross_findings:
|
||||
r.checks.append(CheckItem(**cf))
|
||||
# Recompute
|
||||
l2 = [c for c in r.checks if c.level == 2 and not c.skipped]
|
||||
l2p = sum(1 for c in l2 if c.passed)
|
||||
r.correctness_pct = round(l2p / len(l2) * 100) if l2 else 0
|
||||
|
||||
# Step 3d: TCF Vendor cross-check against DSI
|
||||
tcf_vendors = banner_result.get("tcf_vendors", []) if banner_result else []
|
||||
vvt_entries: list[dict] = []
|
||||
if tcf_vendors and "dse" in doc_texts:
|
||||
_update(check_id, f"{len(tcf_vendors)} TCF-Verarbeiter vs. DSI abgleichen...")
|
||||
from compliance.services.banner_cookie_cross_check import cross_check_vendors_vs_dsi
|
||||
from compliance.services.vendor_vvt_mapper import map_vendors_to_vvt
|
||||
vendor_findings = cross_check_vendors_vs_dsi(tcf_vendors, doc_texts["dse"])
|
||||
if vendor_findings:
|
||||
for r in results:
|
||||
if r.doc_type == "dse":
|
||||
for vf in vendor_findings:
|
||||
r.checks.append(CheckItem(**vf))
|
||||
vvt_entries = map_vendors_to_vvt(tcf_vendors)
|
||||
|
||||
# Step 4: Extract profile hints from documents
|
||||
_update(check_id, "Profil wird aus Dokumenten extrahiert...")
|
||||
from compliance.services.profile_extractor import extract_profile_from_documents
|
||||
@@ -307,7 +320,9 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
"detected": banner_result.get("banner_detected", False) if banner_result else False,
|
||||
"provider": banner_result.get("banner_provider", "") if banner_result else "",
|
||||
"violations": len(banner_result.get("banner_checks", {}).get("violations", [])) if banner_result else 0,
|
||||
"tcf_vendor_count": len(tcf_vendors),
|
||||
} if banner_result else None,
|
||||
"tcf_vendors": vvt_entries if tcf_vendors else [],
|
||||
"total_documents": len(results),
|
||||
"total_findings": total_findings,
|
||||
"email_status": email_result.get("status", "failed"),
|
||||
|
||||
@@ -143,3 +143,83 @@ def cross_check_banner_vs_cookie(
|
||||
logger.info("Cross-check: %d findings (%d services, %d tracking before)",
|
||||
len(findings), len(all_tracking), len(tracking_before))
|
||||
return findings
|
||||
|
||||
|
||||
def cross_check_vendors_vs_dsi(
|
||||
vendors: list[dict],
|
||||
dsi_text: str,
|
||||
) -> list[dict]:
|
||||
"""Cross-check: Are all TCF vendors documented in the DSI?
|
||||
|
||||
Checks per vendor:
|
||||
1. Is the vendor mentioned by name?
|
||||
2. Is third-country transfer documented (if non-EU)?
|
||||
3. Is storage duration mentioned?
|
||||
|
||||
Returns list of CheckItem-compatible dicts.
|
||||
"""
|
||||
findings: list[dict] = []
|
||||
dsi_lower = dsi_text.lower()
|
||||
|
||||
for v in vendors:
|
||||
name = v.get("name", "")
|
||||
name_lower = name.lower()
|
||||
if not name_lower:
|
||||
continue
|
||||
|
||||
# Check if vendor is mentioned in DSI
|
||||
mentioned = any(kw in dsi_lower for kw in [
|
||||
name_lower,
|
||||
name_lower.replace(" ", ""),
|
||||
name_lower.split()[0] if " " in name_lower else name_lower,
|
||||
])
|
||||
|
||||
if not mentioned:
|
||||
findings.append({
|
||||
"id": f"vendor-{v.get('vendor_id', name_lower[:20])}",
|
||||
"label": f"Verarbeiter '{name}' fehlt in DSI",
|
||||
"passed": False,
|
||||
"severity": "HIGH",
|
||||
"level": 2,
|
||||
"parent": None,
|
||||
"skipped": False,
|
||||
"matched_text": "",
|
||||
"hint": (
|
||||
f"Der Cookie-Banner listet '{name}' als Verarbeiter "
|
||||
f"({v.get('zweck_kurz', 'unbekannt')}), aber die DSI "
|
||||
f"erwaehnt diesen Dienst nicht. Art. 13(1)(e) DSGVO "
|
||||
f"verlangt die Benennung aller Empfaenger."
|
||||
),
|
||||
"source": "vendor_cross_check",
|
||||
})
|
||||
|
||||
# Check third-country transfer documentation
|
||||
if v.get("drittland") and mentioned:
|
||||
country = v.get("land", "Drittland")
|
||||
transfer_mentioned = any(kw in dsi_lower for kw in [
|
||||
name_lower + ".*" + "usa",
|
||||
name_lower + ".*" + "drittland",
|
||||
"scc", "standardvertragsklausel", "data privacy framework",
|
||||
"angemessenheitsbeschluss",
|
||||
])
|
||||
if not transfer_mentioned:
|
||||
findings.append({
|
||||
"id": f"vendor-transfer-{v.get('vendor_id', '')}",
|
||||
"label": f"Drittlandtransfer fuer '{name}' nicht dokumentiert",
|
||||
"passed": False,
|
||||
"severity": "MEDIUM",
|
||||
"level": 2,
|
||||
"parent": None,
|
||||
"skipped": False,
|
||||
"matched_text": "",
|
||||
"hint": (
|
||||
f"'{name}' verarbeitet Daten in {country} (ausserhalb EWR). "
|
||||
f"Die DSI muss den Transfermechanismus benennen: "
|
||||
f"SCC (Art. 46(2)(c)) oder DPF (Angemessenheitsbeschluss)."
|
||||
),
|
||||
"source": "vendor_cross_check",
|
||||
})
|
||||
|
||||
logger.info("Vendor cross-check: %d findings for %d vendors",
|
||||
len(findings), len(vendors))
|
||||
return findings
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
Vendor VVT Mapper — map TCF vendors to VVT entries.
|
||||
|
||||
Converts resolved TCF vendor data (from GVL) into the format
|
||||
needed for the Verarbeitungsverzeichnis (VVT) and for DSI
|
||||
cross-checking.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# IAB TCF v2.2 Purpose definitions (German)
|
||||
TCF_PURPOSE_LABELS = {
|
||||
1: "Speicherung/Zugriff auf Endgeraet",
|
||||
2: "Auswahl einfacher Anzeigen",
|
||||
3: "Personalisiertes Anzeigenprofil erstellen",
|
||||
4: "Personalisierte Anzeigen auswaehlen",
|
||||
5: "Personalisiertes Inhaltsprofil erstellen",
|
||||
6: "Personalisierte Inhalte auswaehlen",
|
||||
7: "Anzeigenleistung messen",
|
||||
8: "Inhaltsleistung messen",
|
||||
9: "Marktforschung",
|
||||
10: "Produkte entwickeln und verbessern",
|
||||
11: "Geraeteeigenschaften zur Identifizierung nutzen",
|
||||
}
|
||||
|
||||
# Purpose → Banner-Kategorie Mapping
|
||||
PURPOSE_CATEGORY = {
|
||||
1: "necessary",
|
||||
2: "marketing", 3: "marketing", 4: "marketing",
|
||||
5: "marketing", 6: "marketing",
|
||||
7: "statistics", 8: "statistics",
|
||||
9: "statistics", 10: "functional", 11: "functional",
|
||||
}
|
||||
|
||||
# EWR countries
|
||||
_EU_EWR = {
|
||||
"AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR",
|
||||
"DE", "GR", "HU", "IE", "IT", "LV", "LT", "LU", "MT", "NL",
|
||||
"PL", "PT", "RO", "SK", "SI", "ES", "SE", "IS", "LI", "NO",
|
||||
"CH", "GB",
|
||||
}
|
||||
|
||||
|
||||
def tcf_vendor_to_vvt(vendor: dict) -> dict:
|
||||
"""Map a resolved TCF vendor to a VVT entry.
|
||||
|
||||
Args:
|
||||
vendor: Resolved GVL vendor dict with name, purposes, country, etc.
|
||||
|
||||
Returns:
|
||||
VVT-compatible dict with name, zweck, rechtsgrundlage, drittland, etc.
|
||||
"""
|
||||
purposes = vendor.get("purposes", [])
|
||||
country = vendor.get("country")
|
||||
is_eu = vendor.get("is_eu", country in _EU_EWR if country else None)
|
||||
|
||||
# Determine primary category from purposes
|
||||
categories = set()
|
||||
for p in purposes:
|
||||
cat = PURPOSE_CATEGORY.get(p, "functional")
|
||||
categories.add(cat)
|
||||
|
||||
# Rechtsgrundlage depends on category
|
||||
if "marketing" in categories or "statistics" in categories:
|
||||
rechtsgrundlage = "Einwilligung (Art. 6(1)(a) DSGVO, §25 Abs. 1 TDDDG)"
|
||||
else:
|
||||
rechtsgrundlage = "Berechtigtes Interesse (Art. 6(1)(f) DSGVO, §25 Abs. 2 TDDDG)"
|
||||
|
||||
return {
|
||||
"vendor_id": vendor.get("vendor_id"),
|
||||
"name": vendor.get("name", ""),
|
||||
"zweck": [TCF_PURPOSE_LABELS.get(p, f"Zweck {p}") for p in purposes],
|
||||
"zweck_kurz": _summarize_purposes(purposes),
|
||||
"kategorie": sorted(categories)[0] if categories else "functional",
|
||||
"rechtsgrundlage": rechtsgrundlage,
|
||||
"drittland": not is_eu if is_eu is not None else None,
|
||||
"land": country,
|
||||
"transfermechanismus": "SCC/DPF" if (not is_eu and is_eu is not None) else None,
|
||||
"speicherdauer_tage": vendor.get("retention_days"),
|
||||
"policy_url": vendor.get("policy_url", ""),
|
||||
"uses_cookies": vendor.get("uses_cookies", False),
|
||||
}
|
||||
|
||||
|
||||
def map_vendors_to_vvt(vendors: list[dict]) -> list[dict]:
|
||||
"""Map a list of TCF vendors to VVT entries."""
|
||||
return [tcf_vendor_to_vvt(v) for v in vendors]
|
||||
|
||||
|
||||
def _summarize_purposes(purposes: list[int]) -> str:
|
||||
"""Short German summary of purposes."""
|
||||
if not purposes:
|
||||
return "Keine Zwecke angegeben"
|
||||
cats = set(PURPOSE_CATEGORY.get(p, "sonstig") for p in purposes)
|
||||
labels = {
|
||||
"marketing": "Marketing/Werbung",
|
||||
"statistics": "Analyse/Messung",
|
||||
"functional": "Funktional",
|
||||
"necessary": "Technisch notwendig",
|
||||
"sonstig": "Sonstige",
|
||||
}
|
||||
return ", ".join(labels.get(c, c) for c in sorted(cats))
|
||||
Reference in New Issue
Block a user