603381a67f
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Has been skipped
CI / detect-changes (push) Successful in 12s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 14s
CI / loc-budget (push) Failing after 15s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
P58 Anti-Audit-Detection robuster (script-domain + settings-spezifisch —
war bereits im Code, jetzt sauber als completed dokumentiert).
P59c DACH-Custom-Cookies in compliance.cookie_library: Borlabs,
etracker, Matomo/Piwik, Userlike, Cookiebot/Cookieyes/Usercentrics,
Akamai/Cloudflare/Datadome Bot-Manager + HubSpot. 21 neue Eintraege
(3 von 24 schon via Open-Cookie-Database vorhanden).
Script: backend-compliance/scripts/seed_dach_cookies.py.
P60b Vendor-Pattern-Dedupe mit Fuzzy-Match (Jaccard >= 0.7) statt exakter
Tuple-Equality. Vendors mit teilweise befuellten Feldern (z.B.
Sitzland eingetragen) fallen nicht mehr aus der globalen Notice —
Bug: Amazon/Psyma/Qualtrics hatten zuvor wiederholte per-row Actions.
P61 "Untergeschobene Cookies"-Erkennung — wenn ein deklarierter Vendor
(z.B. Google Tag Manager) automatisch weitere mitbringt (GA + GCL_AU
+ DoubleClick), werden diese als separater Mail-Block (gelb) mit
COOKIE/VENDOR-Badges + Quellen-Doku ausgewiesen. Neuer Service:
compliance.services.vendor_package_cookies (8 Primary-Vendors mit
je 2-4 implicit Cookies/Vendors).
P62 Marketing-Manager-Disclaimer "Was wir sehen / nicht sehen" als
blauer Box-Block direkt unter dem Critical-Findings-Block. Erklaert
Grenzen unseres Audits (Server-Side-Tracking, Vendor-interne
Datenweitergabe, Cross-Page-Banner) und Risiko des Falschvertrauens
in einen 100%-Score. Neuer Renderer: compliance.api.scope_disclaimer.
Architektur: VVT-Tabellen-Renderer aus agent_doc_check_extras.py (552
LOC -> 242 LOC) in compliance.api.vvt_table_renderer ausgelagert, um den
500-LOC-Hardcap einzuhalten.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
182 lines
8.8 KiB
Python
182 lines
8.8 KiB
Python
"""
|
|
P61 — "Untergeschobene Cookies"-Erkennung.
|
|
|
|
Wenn eine Site einen Vendor einbindet (z.B. "Google Tag Manager"), kommen
|
|
oft AUTOMATISCH weitere Cookies/Vendors mit, die der Marketing-Manager
|
|
nicht aktiv ausgewaehlt hat (DoubleClick-Werbe-IDs ueber GTM, Facebook-
|
|
Conversion-API ueber Meta-Pixel, Hotjar-Recordings ueber HubSpot etc.).
|
|
|
|
Dieses Modul mappt:
|
|
Primary-Vendor (eingebunden) -> Implicit-Cookies/Vendors (mitgekommen)
|
|
|
|
Mit Quellen-Doku aus offiziellen Anbieter-Pages.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import TypedDict
|
|
|
|
|
|
class ImplicitItem(TypedDict, total=False):
|
|
name: str
|
|
type: str # "cookie" | "vendor"
|
|
category: str # essential/functional/statistics/marketing
|
|
why: str # warum kommt das mit
|
|
source_url: str # Anbieter-Doku
|
|
|
|
|
|
# Primary-Vendor (lowercase, substring-match) -> Liste implizit mitgeladener Items
|
|
VENDOR_PACKAGE_COOKIES: dict[str, list[ImplicitItem]] = {
|
|
# Google Tag Manager — laedt typischerweise Google Analytics + Ads
|
|
"google tag manager": [
|
|
{"name": "_ga", "type": "cookie", "category": "statistics",
|
|
"why": "GTM laedt Google Analytics by default mit, sobald ein "
|
|
"GA4-Tag konfiguriert ist.",
|
|
"source_url": "https://support.google.com/tagmanager/answer/9442095"},
|
|
{"name": "_gid", "type": "cookie", "category": "statistics",
|
|
"why": "Google Analytics Session-ID, automatisch mit GA.",
|
|
"source_url": "https://support.google.com/analytics/answer/11397207"},
|
|
{"name": "_gcl_au", "type": "cookie", "category": "marketing",
|
|
"why": "Google Ads Conversion-Linker — kommt mit jedem GTM-Container "
|
|
"der ein Conversion-Tag enthaelt (z.B. Floodlight, Ads).",
|
|
"source_url": "https://support.google.com/google-ads/answer/7521212"},
|
|
{"name": "Google Ads", "type": "vendor", "category": "marketing",
|
|
"why": "GTM ist Google-Infrastruktur — Google sieht alle Requests "
|
|
"ueber GTM (auch wenn nur Analytics konfiguriert ist).",
|
|
"source_url": "https://support.google.com/tagmanager/answer/9323295"},
|
|
],
|
|
|
|
# Google Analytics — implizit oft DoubleClick / Ads-Personalization
|
|
"google analytics": [
|
|
{"name": "_gcl_au", "type": "cookie", "category": "marketing",
|
|
"why": "GA4 mit aktivierter Google-Signals (Werbeberichte) setzt "
|
|
"Conversion-Linker — auch ohne Ads-Konfiguration.",
|
|
"source_url": "https://support.google.com/analytics/answer/9445345"},
|
|
{"name": "DSID", "type": "cookie", "category": "marketing",
|
|
"why": "DoubleClick-Cookie ueber doubleclick.net — laeuft mit "
|
|
"GA4 + Google-Signals automatisch.",
|
|
"source_url": "https://policies.google.com/technologies/cookies"},
|
|
{"name": "Google Marketing Platform", "type": "vendor", "category": "marketing",
|
|
"why": "Mit Google-Signals fliessen aggregierte Daten in Googles "
|
|
"Werbeprofil-Datenbank.",
|
|
"source_url": "https://policies.google.com/technologies/cookies"},
|
|
],
|
|
|
|
# Meta-Pixel — kommt typischerweise mit Facebook Login + Conversion-API
|
|
"meta pixel": [
|
|
{"name": "_fbc", "type": "cookie", "category": "marketing",
|
|
"why": "Facebook Click-ID — wird vom Meta-Pixel beim ersten Besuch "
|
|
"via Werbe-Klick gesetzt.",
|
|
"source_url": "https://developers.facebook.com/docs/marketing-api/conversions-api/parameters/fbp-and-fbc"},
|
|
{"name": "fr", "type": "cookie", "category": "marketing",
|
|
"why": "Facebook Cross-Site-Tracking — wird ueber facebook.com "
|
|
"Subdomain gesetzt, auch ohne aktiven FB-Login.",
|
|
"source_url": "https://www.facebook.com/policies/cookies/"},
|
|
{"name": "Facebook Conversion API", "type": "vendor", "category": "marketing",
|
|
"why": "Server-zu-Server Tracking ergaenzt das Browser-Pixel — wird "
|
|
"oft via 'Erweiterte Matching'-Setting automatisch aktiviert.",
|
|
"source_url": "https://developers.facebook.com/docs/marketing-api/conversions-api/"},
|
|
],
|
|
"facebook pixel": [
|
|
# Alias-Eintrag — verweist auf gleiche implicits
|
|
{"name": "_fbc", "type": "cookie", "category": "marketing",
|
|
"why": "siehe Meta-Pixel-Eintrag (Aliase).",
|
|
"source_url": "https://www.facebook.com/policies/cookies/"},
|
|
{"name": "fr", "type": "cookie", "category": "marketing",
|
|
"why": "siehe Meta-Pixel-Eintrag (Aliase).",
|
|
"source_url": "https://www.facebook.com/policies/cookies/"},
|
|
],
|
|
|
|
# HubSpot — mit jedem Embed kommt Tracking + Chat + Forms
|
|
"hubspot": [
|
|
{"name": "__hstc", "type": "cookie", "category": "marketing",
|
|
"why": "HubSpot-Analytics-Cookie wird beim ersten HubSpot-Tag "
|
|
"automatisch gesetzt.",
|
|
"source_url": "https://knowledge.hubspot.com/de/privacy-and-consent/what-cookies-does-hubspot-set-in-a-visitor-s-browser"},
|
|
{"name": "hubspotutk", "type": "cookie", "category": "marketing",
|
|
"why": "User-Token zur seitenuebergreifenden Identifikation.",
|
|
"source_url": "https://knowledge.hubspot.com/de/privacy-and-consent/what-cookies-does-hubspot-set-in-a-visitor-s-browser"},
|
|
{"name": "HubSpot Chat (Drift / Conversations)", "type": "vendor",
|
|
"category": "functional",
|
|
"why": "HubSpot CMS aktiviert oft den Chat-Widget by default.",
|
|
"source_url": "https://www.hubspot.com/data-privacy/cookies"},
|
|
],
|
|
|
|
# Akamai (CDN/Security) — Bot-Manager-Cookies sind essential, aber Akamai
|
|
# selbst hat Web-Performance-Cookies die als statistics gelten koennen.
|
|
"akamai": [
|
|
{"name": "AKA_A2", "type": "cookie", "category": "functional",
|
|
"why": "Akamai Adaptive-Acceleration Performance-Cookie.",
|
|
"source_url": "https://techdocs.akamai.com/"},
|
|
],
|
|
|
|
# Adobe Analytics (Marketing Cloud) — laedt Audience-Manager-Cookies
|
|
"adobe analytics": [
|
|
{"name": "s_cc", "type": "cookie", "category": "statistics",
|
|
"why": "Adobe Analytics Session-Cookie.",
|
|
"source_url": "https://experienceleague.adobe.com/docs/analytics/implementation/vars/config-vars/cookies.html"},
|
|
{"name": "AAM_uuid", "type": "cookie", "category": "marketing",
|
|
"why": "Adobe Audience Manager — kommt mit Adobe Analytics-Tag wenn "
|
|
"Audience-Sharing aktiviert ist.",
|
|
"source_url": "https://experienceleague.adobe.com/docs/audience-manager.html"},
|
|
],
|
|
|
|
# LinkedIn Insight Tag — laedt LinkedIn + AdvertiserSync Cookies
|
|
"linkedin insight": [
|
|
{"name": "li_sugr", "type": "cookie", "category": "marketing",
|
|
"why": "LinkedIn-Browser-ID — wird vom Insight-Tag gesetzt.",
|
|
"source_url": "https://www.linkedin.com/legal/l/cookie-table"},
|
|
{"name": "AnalyticsSyncHistory", "type": "cookie", "category": "marketing",
|
|
"why": "LinkedIn-Cross-Domain-Tracking ueber Insight-Tag.",
|
|
"source_url": "https://www.linkedin.com/legal/l/cookie-table"},
|
|
],
|
|
}
|
|
|
|
|
|
def detect_implicit_cookies(
|
|
declared_vendors: list[str],
|
|
actual_cookies_set: list[str] | None = None,
|
|
) -> list[dict]:
|
|
"""Findet untergeschobene Cookies/Vendors.
|
|
|
|
Args:
|
|
declared_vendors: Liste der vom CMP/Banner deklarierten Vendor-Namen.
|
|
actual_cookies_set: Optional — Cookie-Namen, die tatsaechlich gesetzt
|
|
wurden. Wenn gegeben, wird nur reportiert was nicht in der
|
|
declared-Liste UND tatsaechlich gesetzt ist.
|
|
|
|
Returns:
|
|
Liste Finding-Dicts mit:
|
|
primary_vendor, implicit (ImplicitItem), present_in_actual (bool)
|
|
"""
|
|
findings: list[dict] = []
|
|
actual_lower = {c.lower() for c in (actual_cookies_set or [])}
|
|
declared_lower = {v.lower() for v in declared_vendors}
|
|
|
|
for primary in declared_vendors:
|
|
plower = primary.lower()
|
|
implicits = []
|
|
for key, items in VENDOR_PACKAGE_COOKIES.items():
|
|
if key in plower:
|
|
implicits.extend(items)
|
|
for impl in implicits:
|
|
name_lower = impl["name"].lower()
|
|
# Skip if user has explicitly declared this implicit vendor
|
|
if impl["type"] == "vendor":
|
|
if any(name_lower in d for d in declared_lower):
|
|
continue
|
|
# If actuals provided: only report if cookie really set
|
|
present = True
|
|
if actual_cookies_set is not None and impl["type"] == "cookie":
|
|
present = impl["name"] in actual_cookies_set or any(
|
|
impl["name"].lower() in c.lower() for c in actual_cookies_set
|
|
)
|
|
if not present:
|
|
continue
|
|
findings.append({
|
|
"primary_vendor": primary,
|
|
"implicit": impl,
|
|
"present_in_actual": present,
|
|
})
|
|
return findings
|