Files
breakpilot-compliance/backend-compliance/compliance/services/cookie_table_generator.py
T
Benjamin Admin 6ed2505871
Build + Deploy / build-admin-compliance (push) Successful in 2m3s
Build + Deploy / build-backend-compliance (push) Failing after 3m19s
Build + Deploy / build-ai-sdk (push) Successful in 50s
Build + Deploy / build-developer-portal (push) Successful in 1m12s
Build + Deploy / build-tts (push) Successful in 1m44s
Build + Deploy / build-document-crawler (push) Successful in 37s
Build + Deploy / build-dsms-gateway (push) Successful in 22s
Build + Deploy / build-dsms-node (push) Successful in 10s
Build + Deploy / trigger-orca (push) Has been skipped
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-go (push) Successful in 41s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 13s
CI / loc-budget (push) Failing after 17s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m44s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
feat: Cookie banner vendors per category + {{COOKIE_TABLE}} generator
- CookieBannerOverlay: shows vendors per category with expandable tables
  (Verarbeiter, Cookies, Dauer, Land) for full transparency
- Demo vendors: 4 necessary, 3 statistics, 3 marketing, 3 functional
- cookie_table_generator.py: renders {{COOKIE_TABLE}} Markdown tables
  from vendor configs (DB) or service registry (fallback)
- SERVICE_COOKIES: 16 known vendor-to-cookie mappings with provider + country

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-02 20:07:20 +02:00

294 lines
9.5 KiB
Python

"""
Cookie Table Generator — renders {{COOKIE_TABLE}} placeholder content.
Generates structured Markdown tables grouped by cookie category, showing:
Cookie/Tool | Anbieter | Zweck | Typ | Dauer | Drittland
Data sources:
1. BannerVendorConfigDB per site (if site_config_id provided)
2. Service Registry (fallback for unconfiguered sites)
3. First-party defaults (session, consent cookie)
"""
import logging
from typing import Optional
from sqlalchemy.orm import Session
from compliance.db.banner_models import (
BannerSiteConfigDB,
BannerVendorConfigDB,
)
logger = logging.getLogger(__name__)
# First-party cookies that every site has
FIRST_PARTY_COOKIES = [
{
"name": "session_id",
"provider": "Eigener Server",
"purpose": "Session-Verwaltung",
"category": "necessary",
"retention": "Session",
"third_country": "Nein",
},
{
"name": "bp_consent",
"provider": "Eigener Server",
"purpose": "Speicherung der Cookie-Einwilligung",
"category": "necessary",
"retention": "12 Monate",
"third_country": "Nein",
},
{
"name": "bp_lang",
"provider": "Eigener Server",
"purpose": "Spracheinstellung",
"category": "functional",
"retention": "12 Monate",
"third_country": "Nein",
},
]
# Known cookies per service (from service registry)
SERVICE_COOKIES = {
"Google Analytics": {
"cookies": ["_ga", "_ga_*", "_gid", "_gat"],
"purpose": "Nutzungsanalyse und Reichweitenmessung",
"provider": "Google LLC",
"third_country": "USA (DPF)",
},
"Google Tag Manager": {
"cookies": ["_gcl_au"],
"purpose": "Tag-Verwaltung und Conversion-Tracking",
"provider": "Google LLC",
"third_country": "USA (DPF)",
},
"Google Ads": {
"cookies": ["_gcl_aw", "_gcl_dc", "IDE"],
"purpose": "Werbe-Conversion-Messung",
"provider": "Google LLC",
"third_country": "USA (DPF)",
},
"Facebook Pixel": {
"cookies": ["_fbp", "_fbc", "fr"],
"purpose": "Werbe-Tracking und Remarketing",
"provider": "Meta Platforms Inc.",
"third_country": "USA (DPF)",
},
"Hotjar": {
"cookies": ["_hj*", "_hjid", "_hjSession*"],
"purpose": "Heatmaps und Session-Recording",
"provider": "Hotjar Ltd.",
"third_country": "Nein (EU — Malta)",
},
"Matomo": {
"cookies": ["_pk_id.*", "_pk_ses.*"],
"purpose": "Datenschutzfreundliche Nutzungsanalyse",
"provider": "InnoCraft Ltd.",
"third_country": "Nein (EU/Self-Hosted)",
},
"Stripe": {
"cookies": ["__stripe_mid", "__stripe_sid"],
"purpose": "Zahlungsabwicklung",
"provider": "Stripe Inc.",
"third_country": "USA (DPF)",
},
"PayPal": {
"cookies": ["PYPF", "tsrce"],
"purpose": "Zahlungsabwicklung",
"provider": "PayPal (Europe) S.a.r.l.",
"third_country": "Nein (EU — Luxemburg)",
},
"Klarna": {
"cookies": ["klarna_*"],
"purpose": "Zahlungsabwicklung (BNPL)",
"provider": "Klarna Bank AB",
"third_country": "Nein (EU — Schweden)",
},
"YouTube": {
"cookies": ["YSC", "VISITOR_INFO1_LIVE", "CONSENT"],
"purpose": "Video-Einbettung",
"provider": "Google LLC",
"third_country": "USA (DPF)",
},
"Cloudflare": {
"cookies": ["__cf_bm", "cf_clearance"],
"purpose": "CDN und DDoS-Schutz",
"provider": "Cloudflare Inc.",
"third_country": "USA (DPF)",
},
"Usercentrics": {
"cookies": ["uc_*"],
"purpose": "Consent-Management",
"provider": "Usercentrics GmbH",
"third_country": "Nein (EU — Deutschland)",
},
"Cookiebot": {
"cookies": ["CookieConsent", "CookieConsentBulkTicket"],
"purpose": "Consent-Management",
"provider": "Cybot A/S",
"third_country": "Nein (EU — Daenemark)",
},
"HubSpot": {
"cookies": ["__hstc", "__hssc", "hubspotutk"],
"purpose": "CRM und Marketing-Automation",
"provider": "HubSpot Inc.",
"third_country": "USA (DPF)",
},
"LinkedIn Insight": {
"cookies": ["_li_ss", "bcookie", "li_sugr"],
"purpose": "B2B-Marketing und Conversion-Tracking",
"provider": "LinkedIn Ireland UC",
"third_country": "Nein (EU — Irland)",
},
"Sentry": {
"cookies": ["sentry-sc"],
"purpose": "Fehlererfassung und Monitoring",
"provider": "Sentry (Functional Software Inc.)",
"third_country": "USA (DPF)",
},
}
CATEGORY_LABELS = {
"necessary": "Technisch notwendig",
"functional": "Funktional",
"statistics": "Analyse / Statistik",
"marketing": "Marketing / Tracking",
}
CATEGORY_ORDER = ["necessary", "functional", "statistics", "marketing"]
def _format_retention(days: int) -> str:
if days == 0:
return "Session"
if days <= 1:
return "1 Tag"
if days <= 30:
return f"{days} Tage"
if days <= 365:
months = round(days / 30)
return f"{months} Monat{'e' if months > 1 else ''}"
years = round(days / 365, 1)
if years == int(years):
return f"{int(years)} Jahr{'e' if int(years) > 1 else ''}"
return f"{years} Jahre"
def generate_cookie_table_from_vendors(
db: Session,
tenant_id: str,
site_id: str,
) -> str:
"""Generate {{COOKIE_TABLE}} content from vendor configs in the database."""
import uuid as _uuid
tid = _uuid.UUID(tenant_id)
config = (
db.query(BannerSiteConfigDB)
.filter(BannerSiteConfigDB.tenant_id == tid, BannerSiteConfigDB.site_id == site_id)
.first()
)
rows_by_category: dict[str, list[dict]] = {cat: [] for cat in CATEGORY_ORDER}
# First-party cookies
for fp in FIRST_PARTY_COOKIES:
cat = fp["category"]
if cat in rows_by_category:
rows_by_category[cat].append(fp)
# Vendor cookies from DB
if config:
vendors = (
db.query(BannerVendorConfigDB)
.filter(BannerVendorConfigDB.site_config_id == config.id, BannerVendorConfigDB.is_active)
.all()
)
for v in vendors:
cat = v.category_key
if cat not in rows_by_category:
rows_by_category[cat] = []
known = SERVICE_COOKIES.get(v.vendor_name, {})
cookies = known.get("cookies", v.cookie_names or [])
rows_by_category[cat].append({
"name": ", ".join(cookies) if cookies else v.vendor_name,
"provider": known.get("provider", v.description_de or v.vendor_name),
"purpose": known.get("purpose", v.description_de or ""),
"category": cat,
"retention": _format_retention(v.retention_days or 365),
"third_country": known.get("third_country", ""),
})
return _render_tables(rows_by_category)
def generate_cookie_table_from_registry() -> str:
"""Generate {{COOKIE_TABLE}} from service registry (no DB needed)."""
from compliance.services.vendor_banner_sync import (
CATEGORY_MAP, RETENTION_DEFAULTS, SERVICE_RETENTION,
)
from compliance.services.service_registry import SERVICE_REGISTRY
rows_by_category: dict[str, list[dict]] = {cat: [] for cat in CATEGORY_ORDER}
for fp in FIRST_PARTY_COOKIES:
rows_by_category[fp["category"]].append(fp)
for _pattern, meta in SERVICE_REGISTRY.items():
service_id = meta.get("id", "")
if service_id == "cmp":
continue
category = meta.get("category", "other")
banner_cat = CATEGORY_MAP.get(category, "functional")
name = meta["name"]
known = SERVICE_COOKIES.get(name, {})
retention = SERVICE_RETENTION.get(service_id, RETENTION_DEFAULTS.get(banner_cat, 365))
if banner_cat not in rows_by_category:
rows_by_category[banner_cat] = []
rows_by_category[banner_cat].append({
"name": ", ".join(known.get("cookies", [])) if known.get("cookies") else name,
"provider": known.get("provider", meta.get("provider", "")),
"purpose": known.get("purpose", f"{name} ({meta.get('provider', '')})"),
"category": banner_cat,
"retention": _format_retention(retention),
"third_country": known.get("third_country", _infer_third_country(meta)),
})
return _render_tables(rows_by_category)
def _infer_third_country(meta: dict) -> str:
country = meta.get("country", "")
eu_adequate = meta.get("eu_adequate", False)
if not country:
return ""
if country == "US":
return "USA (DPF)" if eu_adequate else "USA"
if country in ("DE", "AT", "FR", "NL", "IE", "SE", "DK", "FI", "BE", "IT", "ES", "MT", "LU"):
return f"Nein (EU)"
return country
def _render_tables(rows_by_category: dict[str, list[dict]]) -> str:
parts = []
for cat in CATEGORY_ORDER:
rows = rows_by_category.get(cat, [])
if not rows:
continue
label = CATEGORY_LABELS.get(cat, cat)
parts.append(f"#### {label}\n")
parts.append("| Cookie/Tool | Anbieter | Zweck | Typ | Dauer | Drittland |")
parts.append("|---|---|---|---|---|---|")
for r in rows:
parts.append(
f"| {r['name']} | {r['provider']} | {r['purpose']} | "
f"{CATEGORY_LABELS.get(r['category'], r['category'])} | "
f"{r['retention']} | {r['third_country']} |"
)
parts.append("")
return "\n".join(parts)