From 4183379dc566e8c6231d05f0f1422a78bad8138e Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 21 May 2026 17:11:47 +0200 Subject: [PATCH] feat(audit): P33 3-Spalten-Vendor-Konsistenz (DSE/Cookie-Doc/Banner) check_three_source_vendor_consistency: scannt DSE-, Cookie-Doc- und Banner-Vendor-Liste auf 15 typische Vendor-Signaturen (Google Analytics, Meta Pixel, Hotjar, HubSpot, LinkedIn Insight, ...). Listet Vendors die in mind. einer Quelle stehen, aber nicht in allen sources_with_data. Liefert MEDIUM-Finding mit konkreter 'fehlt in: DSE, Banner-Liste'- Liste pro Vendor. Empfehlung: zentrale Vendor-Liste pflegen + in alle drei Dokumenttypen propagieren. (Art. 13(1)(c)+(e) DSGVO) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/agent_compliance_check_routes.py | 1 + .../services/banner_consistency_checks.py | 88 ++++++++++++++++++- .../compliance/services/check_replay.py | 1 + 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index c782ce59..03e2f306 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -1160,6 +1160,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): or doc_texts.get("dse") or "") cons_findings = run_consistency_checks( banner_result or {}, cookie_doc_for_check, cmp_vendors, + doc_texts=doc_texts, ) if cons_findings: consistency_html = build_consistency_block_html(cons_findings) diff --git a/backend-compliance/compliance/services/banner_consistency_checks.py b/backend-compliance/compliance/services/banner_consistency_checks.py index 0275dbc4..be18ba7a 100644 --- a/backend-compliance/compliance/services/banner_consistency_checks.py +++ b/backend-compliance/compliance/services/banner_consistency_checks.py @@ -178,6 +178,85 @@ def check_init_banner_vs_cookie_doc( } +_VENDOR_LIST_SIGNALS = ( + "google analytics", "google ads", "facebook pixel", "meta pixel", + "hotjar", "matomo", "etracker", "salesforce", "hubspot", + "linkedin insight", "twitter conversion", "tiktok pixel", + "criteo", "the trade desk", "doubleclick", +) + + +def _vendors_mentioned_in_text(text: str) -> set[str]: + if not text: + return set() + t = text.lower() + return {v for v in _VENDOR_LIST_SIGNALS if v in t} + + +def check_three_source_vendor_consistency( + doc_texts: dict[str, str] | None, + cmp_vendors: list | None, +) -> dict | None: + """P33 — 3-Spalten-Konsistenz: DSE vs Cookie-Doc vs Banner-Vendors. + + Wenn ein Vendor (z.B. 'Google Analytics') in der DSE und in der + Cookie-Richtlinie genannt wird, aber NICHT in der Banner-Vendor- + Liste auftaucht (oder umgekehrt), ist die Drei-Quellen-Aussage + nicht konsistent. MEDIUM-Finding mit Liste der jeweils fehlenden + Vendors. + """ + if not doc_texts: + return None + dse_v = _vendors_mentioned_in_text(doc_texts.get("dse") or "") + cookie_v = _vendors_mentioned_in_text(doc_texts.get("cookie") or "") + banner_v: set[str] = set() + for v in (cmp_vendors or []): + name = (v.get("name") or "").lower() + for sig in _VENDOR_LIST_SIGNALS: + if sig in name or name in sig: + banner_v.add(sig) + + sources_with_data = sum(1 for s in (dse_v, cookie_v, banner_v) if s) + if sources_with_data < 2: + return None + + # Vendors in mind. einer Quelle aber nicht in allen vorhandenen + universe = dse_v | cookie_v | banner_v + issues: list[str] = [] + for vendor in sorted(universe): + missing_in = [] + if dse_v and vendor not in dse_v: + missing_in.append("DSE") + if cookie_v and vendor not in cookie_v: + missing_in.append("Cookie-Doc") + if banner_v and vendor not in banner_v: + missing_in.append("Banner-Liste") + if missing_in and len(missing_in) < sources_with_data: + issues.append(f'{vendor} (fehlt in: {", ".join(missing_in)})') + + if not issues: + return None + + return { + "severity": "MEDIUM", + "code": "three_source_vendor_inconsistency", + "label": ( + f"{len(issues)} Vendor{'en' if len(issues) != 1 else ''} " + "nicht konsistent zwischen DSE, Cookie-Richtlinie und Banner" + ), + "detail": ( + "Folgende Vendors sind nicht in allen Quellen genannt: " + + "; ".join(issues[:8]) + + (" ..." if len(issues) > 8 else "") + + ". Empfehlung: zentrale Vendor-Liste pflegen und in alle " + "drei Dokumenttypen propagieren." + ), + "legal_basis": "Art. 13(1)(c)+(e) DSGVO + EDPB 5/2020 — die " + "Empfaenger / Drittlandtransfers muessen ueber alle " + "Touch-Points konsistent kommuniziert werden.", + } + + def check_banner_vs_cmp_partner_count( banner_result: dict, cmp_vendors: list | None, @@ -225,7 +304,8 @@ def check_banner_vs_cmp_partner_count( def run_all(banner_result: dict, cookie_doc_text: str | None = None, - cmp_vendors: list | None = None) -> list[dict]: + cmp_vendors: list | None = None, + doc_texts: dict[str, str] | None = None) -> list[dict]: findings: list[dict] = [] try: f1 = check_cmp_tool_availability(banner_result) @@ -245,6 +325,12 @@ def run_all(banner_result: dict, cookie_doc_text: str | None = None, findings.append(f3) except Exception as e: logger.warning("P75 banner_vs_cmp_count failed: %s", e) + try: + f4 = check_three_source_vendor_consistency(doc_texts, cmp_vendors) + if f4: + findings.append(f4) + except Exception as e: + logger.warning("P33 three_source_vendor failed: %s", e) return findings diff --git a/backend-compliance/compliance/services/check_replay.py b/backend-compliance/compliance/services/check_replay.py index 13170cf2..c34c7924 100644 --- a/backend-compliance/compliance/services/check_replay.py +++ b/backend-compliance/compliance/services/check_replay.py @@ -159,6 +159,7 @@ def replay_from_snapshot( cookie_doc_for_check = doc_texts.get("cookie") or doc_texts.get("dse") or "" cons = run_consistency_checks( banner_result or {}, cookie_doc_for_check, cmp_vendors, + doc_texts=doc_texts, ) if cons: cons_html = build_consistency_block_html(cons)