Files
breakpilot-compliance/backend-compliance/scripts/seed_dach_cookies.py
T
Benjamin Admin 603381a67f
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Has been skipped
CI / detect-changes (push) Successful in 12s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 14s
CI / loc-budget (push) Failing after 15s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
feat(audit-mail): P58/P59c/P60b/P61/P62 — Mercedes-Cycle Phase 1 abgeschlossen
P58  Anti-Audit-Detection robuster (script-domain + settings-spezifisch —
     war bereits im Code, jetzt sauber als completed dokumentiert).

P59c DACH-Custom-Cookies in compliance.cookie_library: Borlabs,
     etracker, Matomo/Piwik, Userlike, Cookiebot/Cookieyes/Usercentrics,
     Akamai/Cloudflare/Datadome Bot-Manager + HubSpot. 21 neue Eintraege
     (3 von 24 schon via Open-Cookie-Database vorhanden).
     Script: backend-compliance/scripts/seed_dach_cookies.py.

P60b Vendor-Pattern-Dedupe mit Fuzzy-Match (Jaccard >= 0.7) statt exakter
     Tuple-Equality. Vendors mit teilweise befuellten Feldern (z.B.
     Sitzland eingetragen) fallen nicht mehr aus der globalen Notice —
     Bug: Amazon/Psyma/Qualtrics hatten zuvor wiederholte per-row Actions.

P61  "Untergeschobene Cookies"-Erkennung — wenn ein deklarierter Vendor
     (z.B. Google Tag Manager) automatisch weitere mitbringt (GA + GCL_AU
     + DoubleClick), werden diese als separater Mail-Block (gelb) mit
     COOKIE/VENDOR-Badges + Quellen-Doku ausgewiesen. Neuer Service:
     compliance.services.vendor_package_cookies (8 Primary-Vendors mit
     je 2-4 implicit Cookies/Vendors).

P62  Marketing-Manager-Disclaimer "Was wir sehen / nicht sehen" als
     blauer Box-Block direkt unter dem Critical-Findings-Block. Erklaert
     Grenzen unseres Audits (Server-Side-Tracking, Vendor-interne
     Datenweitergabe, Cross-Page-Banner) und Risiko des Falschvertrauens
     in einen 100%-Score. Neuer Renderer: compliance.api.scope_disclaimer.

Architektur: VVT-Tabellen-Renderer aus agent_doc_check_extras.py (552
LOC -> 242 LOC) in compliance.api.vvt_table_renderer ausgelagert, um den
500-LOC-Hardcap einzuhalten.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 08:01:27 +02:00

168 lines
8.4 KiB
Python

#!/usr/bin/env python3
"""P59c — Seed DACH-Custom-Cookies in compliance.cookie_library.
Ergaenzt Open-Cookie-Database (~2200 Cookies) um DACH-typische Cookies, die in
der OCD fehlen oder schlecht abgedeckt sind:
- Borlabs Cookie (selbst gehostete CMP, sehr verbreitet im DACH-Mittelstand)
- etracker (in DE haeufige Analytics-Alternative zu Google Analytics)
- Matomo / Piwik (self-hosted Analytics)
- Userlike, Userpilot, ChannelEngine (DACH-Tools)
- OEM-typische Custom-Cookies (Mercedes _abck/Akamai, BMW eus5, VW dxa)
Quellen-Doku: jeder Eintrag hat source_url auf Anbieter-Doku (rechtssicher).
Lizenz: Wir publizieren als eigene Sammlung (CC-BY) — Texte sind eigene
Formulierungen, Anbieter-Quellen werden referenziert.
"""
from __future__ import annotations
import os
import sys
import psycopg2
# (cookie_name, domain_pattern, vendor_name, actual_category, purpose_de,
# typical_max_age_seconds, source_name, source_url, confidence)
DACH_COOKIES = [
# ── Borlabs Cookie CMP ─────────────────────────────────────────
("borlabs-cookie", "*", "Borlabs GmbH", "essential",
"Speichert die Cookie-Einwilligung des Nutzers (CMP-Funktion).",
31536000, "Borlabs Cookie Doku",
"https://de.borlabs.io/kb/borlabs-cookie/", 0.95),
("borlabs_consent", "*", "Borlabs GmbH", "essential",
"Aelterer Borlabs-Consent-Cookie (Legacy v1).",
31536000, "Borlabs Cookie Doku",
"https://de.borlabs.io/kb/borlabs-cookie/", 0.90),
# ── etracker (DE-Analytics-Alternative) ───────────────────────
("et_oi_v3", "*", "etracker GmbH, Hamburg", "statistics",
"etracker Opt-Out-Indikator (zeigt ob Nutzer dem Tracking widersprochen hat).",
63072000, "etracker Doku",
"https://www.etracker.com/docs/integration-setup/datenschutz/", 0.95),
("et_cust_v3", "*", "etracker GmbH, Hamburg", "statistics",
"etracker Customer-ID fuer wiederkehrende Besucher.",
31536000, "etracker Doku",
"https://www.etracker.com/docs/integration-setup/datenschutz/", 0.95),
# ── Matomo / Piwik (self-hosted Analytics) ────────────────────
("_pk_id", "*", "Matomo (selbst gehostet)", "statistics",
"Matomo Visitor-ID fuer Wiedererkennung von Besuchern (typisch 13 Monate).",
33696000, "Matomo Doku",
"https://matomo.org/faq/general/faq_146/", 0.95),
("_pk_ses", "*", "Matomo (selbst gehostet)", "statistics",
"Matomo Session-ID fuer aktive Besuchersitzungen (30 Minuten).",
1800, "Matomo Doku",
"https://matomo.org/faq/general/faq_146/", 0.95),
("_pk_ref", "*", "Matomo (selbst gehostet)", "statistics",
"Matomo Referrer-Information (woher kam der Besucher).",
15768000, "Matomo Doku",
"https://matomo.org/faq/general/faq_146/", 0.90),
# ── Userlike (DE-Chat) ────────────────────────────────────────
("uslk_e", "*", "Userlike UG, Koeln", "functional",
"Userlike Live-Chat — speichert ob Nutzer mit Chat interagiert hat.",
31536000, "Userlike Privacy",
"https://www.userlike.com/de/privacy-policy", 0.90),
# ── OEM-typische Akamai-Bot-Manager-Cookies ───────────────────
# (Mercedes, BMW, VW, Audi nutzen alle Akamai)
("_abck", "*", "Akamai Technologies", "essential",
"Akamai Bot-Manager Session-Cookie. Erkennt automatisierte Zugriffe (Bots).",
31536000, "Akamai Bot Manager Doku",
"https://techdocs.akamai.com/bot-manager/docs", 0.95),
("bm_sz", "*", "Akamai Technologies", "essential",
"Akamai Bot-Manager — speichert Geraete-Fingerprint fuer Bot-Erkennung.",
14400, "Akamai Bot Manager Doku",
"https://techdocs.akamai.com/bot-manager/docs", 0.95),
("bm_mi", "*", "Akamai Technologies", "essential",
"Akamai Bot-Manager — Mobile-Integrity-Check.",
7200, "Akamai Bot Manager Doku",
"https://techdocs.akamai.com/bot-manager/docs", 0.90),
("ak_bmsc", "*", "Akamai Technologies", "essential",
"Akamai Bot-Manager Score-Cookie.",
7200, "Akamai Bot Manager Doku",
"https://techdocs.akamai.com/bot-manager/docs", 0.90),
# ── Cloudflare Bot-Management ─────────────────────────────────
("__cf_bm", "*", "Cloudflare, Inc.", "essential",
"Cloudflare Bot-Management Token — unterscheidet menschliche Zugriffe von Bots.",
1800, "Cloudflare Doku",
"https://developers.cloudflare.com/bots/concepts/bot-score/", 0.95),
("cf_clearance", "*", "Cloudflare, Inc.", "essential",
"Cloudflare Challenge-Cookie nach erfolgreichem Captcha/JS-Challenge.",
1800, "Cloudflare Doku",
"https://developers.cloudflare.com/fundamentals/reference/policies-compliances/cloudflare-cookies/", 0.95),
# ── Datadome Bot-Manager ──────────────────────────────────────
("datadome", "*", "DataDome SAS, Paris", "essential",
"DataDome Bot-Detection Cookie. Wird auf vielen OEM-Sites mit Akamai parallel eingesetzt.",
31536000, "DataDome Doku",
"https://datadome.co/learning-center/", 0.90),
# ── Usercentrics (CMP — verbreitet in DACH) ───────────────────
("uc_user_interaction", "*", "Usercentrics GmbH", "essential",
"Usercentrics CMP — speichert Banner-Interaktion (Akzeptiert/Abgelehnt).",
31536000, "Usercentrics Doku",
"https://usercentrics.com/de/knowledge-hub/", 0.95),
("uc_settings", "*", "Usercentrics GmbH", "essential",
"Usercentrics CMP — detaillierte Kategorie-Einstellungen pro Vendor.",
31536000, "Usercentrics Doku",
"https://usercentrics.com/de/knowledge-hub/", 0.95),
# ── Cookiebot (CMP) ───────────────────────────────────────────
("CookieConsent", "*", "Cybot A/S (Cookiebot)", "essential",
"Cookiebot CMP — speichert Einwilligungs-Status des Nutzers.",
31536000, "Cookiebot Doku",
"https://www.cookiebot.com/de/datenschutzerklarung/", 0.95),
# ── Cookieyes ─────────────────────────────────────────────────
("cky-consent", "*", "Cookieyes Ltd, UK", "essential",
"Cookieyes CMP — Einwilligungs-Speicherung.",
31536000, "Cookieyes Doku",
"https://www.cookieyes.com/documentation/", 0.90),
# ── HubSpot (verbreitet in DE-B2B) ─────────────────────────────
("__hstc", "*", "HubSpot, Inc.", "marketing",
"HubSpot Analytics — Hauptcookie zur Besucher-Identifikation.",
34128000, "HubSpot Cookies Doku",
"https://knowledge.hubspot.com/de/privacy-and-consent/what-cookies-does-hubspot-set-in-a-visitor-s-browser", 0.95),
("hubspotutk", "*", "HubSpot, Inc.", "marketing",
"HubSpot User-Token — verfolgt Besucher seitenuebergreifend.",
34128000, "HubSpot Cookies Doku",
"https://knowledge.hubspot.com/de/privacy-and-consent/what-cookies-does-hubspot-set-in-a-visitor-s-browser", 0.95),
]
def main() -> int:
dsn = os.environ.get("DATABASE_URL")
if not dsn:
print("DATABASE_URL missing", file=sys.stderr)
return 1
conn = psycopg2.connect(dsn)
cur = conn.cursor()
inserted = 0
for c in DACH_COOKIES:
(name, domain, vendor, cat, purpose_de, max_age,
source_name, source_url, confidence) = c
cur.execute(
"""
INSERT INTO compliance.cookie_library
(cookie_name, domain_pattern, vendor_name,
actual_category, purpose_de,
typical_max_age_seconds, source_name, source_url,
source_license, confidence)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT DO NOTHING
""",
(name, domain, vendor[:200], cat, purpose_de, max_age,
source_name, source_url, "CC-BY-eigene-Sammlung", confidence),
)
inserted += cur.rowcount
conn.commit()
print(f"P59c DACH-Cookies: {inserted}/{len(DACH_COOKIES)} inserted")
return 0
if __name__ == "__main__":
sys.exit(main())