Files
breakpilot-compliance/backend-compliance/compliance/services/vendor_package_cookies.py
T
Benjamin Admin 603381a67f
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Has been skipped
CI / detect-changes (push) Successful in 12s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 14s
CI / loc-budget (push) Failing after 15s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
feat(audit-mail): P58/P59c/P60b/P61/P62 — Mercedes-Cycle Phase 1 abgeschlossen
P58  Anti-Audit-Detection robuster (script-domain + settings-spezifisch —
     war bereits im Code, jetzt sauber als completed dokumentiert).

P59c DACH-Custom-Cookies in compliance.cookie_library: Borlabs,
     etracker, Matomo/Piwik, Userlike, Cookiebot/Cookieyes/Usercentrics,
     Akamai/Cloudflare/Datadome Bot-Manager + HubSpot. 21 neue Eintraege
     (3 von 24 schon via Open-Cookie-Database vorhanden).
     Script: backend-compliance/scripts/seed_dach_cookies.py.

P60b Vendor-Pattern-Dedupe mit Fuzzy-Match (Jaccard >= 0.7) statt exakter
     Tuple-Equality. Vendors mit teilweise befuellten Feldern (z.B.
     Sitzland eingetragen) fallen nicht mehr aus der globalen Notice —
     Bug: Amazon/Psyma/Qualtrics hatten zuvor wiederholte per-row Actions.

P61  "Untergeschobene Cookies"-Erkennung — wenn ein deklarierter Vendor
     (z.B. Google Tag Manager) automatisch weitere mitbringt (GA + GCL_AU
     + DoubleClick), werden diese als separater Mail-Block (gelb) mit
     COOKIE/VENDOR-Badges + Quellen-Doku ausgewiesen. Neuer Service:
     compliance.services.vendor_package_cookies (8 Primary-Vendors mit
     je 2-4 implicit Cookies/Vendors).

P62  Marketing-Manager-Disclaimer "Was wir sehen / nicht sehen" als
     blauer Box-Block direkt unter dem Critical-Findings-Block. Erklaert
     Grenzen unseres Audits (Server-Side-Tracking, Vendor-interne
     Datenweitergabe, Cross-Page-Banner) und Risiko des Falschvertrauens
     in einen 100%-Score. Neuer Renderer: compliance.api.scope_disclaimer.

Architektur: VVT-Tabellen-Renderer aus agent_doc_check_extras.py (552
LOC -> 242 LOC) in compliance.api.vvt_table_renderer ausgelagert, um den
500-LOC-Hardcap einzuhalten.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 08:01:27 +02:00

182 lines
8.8 KiB
Python

"""
P61 — "Untergeschobene Cookies"-Erkennung.
Wenn eine Site einen Vendor einbindet (z.B. "Google Tag Manager"), kommen
oft AUTOMATISCH weitere Cookies/Vendors mit, die der Marketing-Manager
nicht aktiv ausgewaehlt hat (DoubleClick-Werbe-IDs ueber GTM, Facebook-
Conversion-API ueber Meta-Pixel, Hotjar-Recordings ueber HubSpot etc.).
Dieses Modul mappt:
Primary-Vendor (eingebunden) -> Implicit-Cookies/Vendors (mitgekommen)
Mit Quellen-Doku aus offiziellen Anbieter-Pages.
"""
from __future__ import annotations
from typing import TypedDict
class ImplicitItem(TypedDict, total=False):
name: str
type: str # "cookie" | "vendor"
category: str # essential/functional/statistics/marketing
why: str # warum kommt das mit
source_url: str # Anbieter-Doku
# Primary-Vendor (lowercase, substring-match) -> Liste implizit mitgeladener Items
VENDOR_PACKAGE_COOKIES: dict[str, list[ImplicitItem]] = {
# Google Tag Manager — laedt typischerweise Google Analytics + Ads
"google tag manager": [
{"name": "_ga", "type": "cookie", "category": "statistics",
"why": "GTM laedt Google Analytics by default mit, sobald ein "
"GA4-Tag konfiguriert ist.",
"source_url": "https://support.google.com/tagmanager/answer/9442095"},
{"name": "_gid", "type": "cookie", "category": "statistics",
"why": "Google Analytics Session-ID, automatisch mit GA.",
"source_url": "https://support.google.com/analytics/answer/11397207"},
{"name": "_gcl_au", "type": "cookie", "category": "marketing",
"why": "Google Ads Conversion-Linker — kommt mit jedem GTM-Container "
"der ein Conversion-Tag enthaelt (z.B. Floodlight, Ads).",
"source_url": "https://support.google.com/google-ads/answer/7521212"},
{"name": "Google Ads", "type": "vendor", "category": "marketing",
"why": "GTM ist Google-Infrastruktur — Google sieht alle Requests "
"ueber GTM (auch wenn nur Analytics konfiguriert ist).",
"source_url": "https://support.google.com/tagmanager/answer/9323295"},
],
# Google Analytics — implizit oft DoubleClick / Ads-Personalization
"google analytics": [
{"name": "_gcl_au", "type": "cookie", "category": "marketing",
"why": "GA4 mit aktivierter Google-Signals (Werbeberichte) setzt "
"Conversion-Linker — auch ohne Ads-Konfiguration.",
"source_url": "https://support.google.com/analytics/answer/9445345"},
{"name": "DSID", "type": "cookie", "category": "marketing",
"why": "DoubleClick-Cookie ueber doubleclick.net — laeuft mit "
"GA4 + Google-Signals automatisch.",
"source_url": "https://policies.google.com/technologies/cookies"},
{"name": "Google Marketing Platform", "type": "vendor", "category": "marketing",
"why": "Mit Google-Signals fliessen aggregierte Daten in Googles "
"Werbeprofil-Datenbank.",
"source_url": "https://policies.google.com/technologies/cookies"},
],
# Meta-Pixel — kommt typischerweise mit Facebook Login + Conversion-API
"meta pixel": [
{"name": "_fbc", "type": "cookie", "category": "marketing",
"why": "Facebook Click-ID — wird vom Meta-Pixel beim ersten Besuch "
"via Werbe-Klick gesetzt.",
"source_url": "https://developers.facebook.com/docs/marketing-api/conversions-api/parameters/fbp-and-fbc"},
{"name": "fr", "type": "cookie", "category": "marketing",
"why": "Facebook Cross-Site-Tracking — wird ueber facebook.com "
"Subdomain gesetzt, auch ohne aktiven FB-Login.",
"source_url": "https://www.facebook.com/policies/cookies/"},
{"name": "Facebook Conversion API", "type": "vendor", "category": "marketing",
"why": "Server-zu-Server Tracking ergaenzt das Browser-Pixel — wird "
"oft via 'Erweiterte Matching'-Setting automatisch aktiviert.",
"source_url": "https://developers.facebook.com/docs/marketing-api/conversions-api/"},
],
"facebook pixel": [
# Alias-Eintrag — verweist auf gleiche implicits
{"name": "_fbc", "type": "cookie", "category": "marketing",
"why": "siehe Meta-Pixel-Eintrag (Aliase).",
"source_url": "https://www.facebook.com/policies/cookies/"},
{"name": "fr", "type": "cookie", "category": "marketing",
"why": "siehe Meta-Pixel-Eintrag (Aliase).",
"source_url": "https://www.facebook.com/policies/cookies/"},
],
# HubSpot — mit jedem Embed kommt Tracking + Chat + Forms
"hubspot": [
{"name": "__hstc", "type": "cookie", "category": "marketing",
"why": "HubSpot-Analytics-Cookie wird beim ersten HubSpot-Tag "
"automatisch gesetzt.",
"source_url": "https://knowledge.hubspot.com/de/privacy-and-consent/what-cookies-does-hubspot-set-in-a-visitor-s-browser"},
{"name": "hubspotutk", "type": "cookie", "category": "marketing",
"why": "User-Token zur seitenuebergreifenden Identifikation.",
"source_url": "https://knowledge.hubspot.com/de/privacy-and-consent/what-cookies-does-hubspot-set-in-a-visitor-s-browser"},
{"name": "HubSpot Chat (Drift / Conversations)", "type": "vendor",
"category": "functional",
"why": "HubSpot CMS aktiviert oft den Chat-Widget by default.",
"source_url": "https://www.hubspot.com/data-privacy/cookies"},
],
# Akamai (CDN/Security) — Bot-Manager-Cookies sind essential, aber Akamai
# selbst hat Web-Performance-Cookies die als statistics gelten koennen.
"akamai": [
{"name": "AKA_A2", "type": "cookie", "category": "functional",
"why": "Akamai Adaptive-Acceleration Performance-Cookie.",
"source_url": "https://techdocs.akamai.com/"},
],
# Adobe Analytics (Marketing Cloud) — laedt Audience-Manager-Cookies
"adobe analytics": [
{"name": "s_cc", "type": "cookie", "category": "statistics",
"why": "Adobe Analytics Session-Cookie.",
"source_url": "https://experienceleague.adobe.com/docs/analytics/implementation/vars/config-vars/cookies.html"},
{"name": "AAM_uuid", "type": "cookie", "category": "marketing",
"why": "Adobe Audience Manager — kommt mit Adobe Analytics-Tag wenn "
"Audience-Sharing aktiviert ist.",
"source_url": "https://experienceleague.adobe.com/docs/audience-manager.html"},
],
# LinkedIn Insight Tag — laedt LinkedIn + AdvertiserSync Cookies
"linkedin insight": [
{"name": "li_sugr", "type": "cookie", "category": "marketing",
"why": "LinkedIn-Browser-ID — wird vom Insight-Tag gesetzt.",
"source_url": "https://www.linkedin.com/legal/l/cookie-table"},
{"name": "AnalyticsSyncHistory", "type": "cookie", "category": "marketing",
"why": "LinkedIn-Cross-Domain-Tracking ueber Insight-Tag.",
"source_url": "https://www.linkedin.com/legal/l/cookie-table"},
],
}
def detect_implicit_cookies(
declared_vendors: list[str],
actual_cookies_set: list[str] | None = None,
) -> list[dict]:
"""Findet untergeschobene Cookies/Vendors.
Args:
declared_vendors: Liste der vom CMP/Banner deklarierten Vendor-Namen.
actual_cookies_set: Optional — Cookie-Namen, die tatsaechlich gesetzt
wurden. Wenn gegeben, wird nur reportiert was nicht in der
declared-Liste UND tatsaechlich gesetzt ist.
Returns:
Liste Finding-Dicts mit:
primary_vendor, implicit (ImplicitItem), present_in_actual (bool)
"""
findings: list[dict] = []
actual_lower = {c.lower() for c in (actual_cookies_set or [])}
declared_lower = {v.lower() for v in declared_vendors}
for primary in declared_vendors:
plower = primary.lower()
implicits = []
for key, items in VENDOR_PACKAGE_COOKIES.items():
if key in plower:
implicits.extend(items)
for impl in implicits:
name_lower = impl["name"].lower()
# Skip if user has explicitly declared this implicit vendor
if impl["type"] == "vendor":
if any(name_lower in d for d in declared_lower):
continue
# If actuals provided: only report if cookie really set
present = True
if actual_cookies_set is not None and impl["type"] == "cookie":
present = impl["name"] in actual_cookies_set or any(
impl["name"].lower() in c.lower() for c in actual_cookies_set
)
if not present:
continue
findings.append({
"primary_vendor": primary,
"implicit": impl,
"present_in_actual": present,
})
return findings