feat: Cookie-Banner ↔ Backend Integration (DSR, Retention, Consent Proof)

Phase 1: Vendor sync from service registry (82+ services → banner vendors)
Phase 2: Category-based retention (marketing=90d, statistics=790d, not hardcoded 365d)
Phase 3: DSR ↔ Banner email linking (link-email, by-email, Art.17 erasure, Art.15/20 export)
Phase 4: Consent sync (Banner → Einwilligungen bridge)
Phase 6: Consent proof (SHA256 config hash + config_version in audit log, Art. 7(1) DSGVO)

New files:
- banner_dsr_service.py — email linking + DSR integration
- vendor_banner_sync.py — service registry → vendor configs
- migration 106 — linked_email, banner_config_hash, consent_version columns

Tests: 20+ new backend tests + 2 Playwright E2E test suites (API + UI)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-02 19:41:22 +02:00
parent c3f8e19e92
commit 44acd68c96
12 changed files with 1522 additions and 5 deletions
@@ -25,6 +25,7 @@ def consent_to_dict(c: BannerConsentDB) -> dict[str, Any]:
"vendors": c.vendors or [],
"ip_hash": c.ip_hash,
"consent_string": c.consent_string,
"linked_email": c.linked_email,
"expires_at": c.expires_at.isoformat() if c.expires_at else None,
"created_at": c.created_at.isoformat() if c.created_at else None,
"updated_at": c.updated_at.isoformat() if c.updated_at else None,
@@ -45,6 +46,7 @@ def site_config_to_dict(s: BannerSiteConfigDB) -> dict[str, Any]:
"dsb_email": s.dsb_email,
"theme": s.theme or {},
"tcf_enabled": s.tcf_enabled,
"config_version": s.config_version,
"is_active": s.is_active,
"created_at": s.created_at.isoformat() if s.created_at else None,
"updated_at": s.updated_at.isoformat() if s.updated_at else None,
@@ -9,9 +9,12 @@ display), export, and per-site consent statistics.
Admin-side site/category/vendor management lives in
``compliance.services.banner_admin_service.BannerAdminService``.
DSR-facing email linking lives in
``compliance.services.banner_dsr_service.BannerDSRService``.
"""
import hashlib
import json
import uuid
from datetime import datetime, timedelta, timezone
from typing import Any, Optional
@@ -33,6 +36,15 @@ from compliance.services._banner_serializers import (
vendor_to_dict,
)
# Default consent expiration per banner category (days).
# Based on: DSGVO Art. 5(1)(e), CNIL guidelines, EDPB recommendations.
CATEGORY_RETENTION_DAYS = {
"necessary": 365, # Session + functional = max 12 months
"statistics": 790, # Max 26 months (Google Analytics default)
"marketing": 90, # Max 90 days for retargeting
"functional": 365, # Max 12 months
}
class BannerConsentService:
"""Business logic for public SDK banner consent endpoints."""
@@ -59,6 +71,8 @@ class BannerConsentService:
device_fingerprint: Optional[str] = None,
categories: Optional[list[str]] = None,
ip_hash: Optional[str] = None,
banner_config_hash: Optional[str] = None,
consent_version: Optional[int] = None,
) -> None:
entry = BannerConsentAuditLogDB(
tenant_id=tenant_id,
@@ -68,9 +82,53 @@ class BannerConsentService:
device_fingerprint=device_fingerprint,
categories=categories or [],
ip_hash=ip_hash,
banner_config_hash=banner_config_hash,
consent_version=consent_version,
)
self.db.add(entry)
def _compute_config_hash(self, tenant_id: uuid.UUID, site_id: str) -> tuple[Optional[str], Optional[int]]:
"""Compute SHA256 hash of current site config for consent proof (Art. 7(1) DSGVO)."""
config = (
self.db.query(BannerSiteConfigDB)
.filter(
BannerSiteConfigDB.tenant_id == tenant_id,
BannerSiteConfigDB.site_id == site_id,
)
.first()
)
if not config:
return None, None
snapshot = json.dumps({
"banner_title": config.banner_title,
"banner_description": config.banner_description,
"privacy_url": config.privacy_url,
"imprint_url": config.imprint_url,
}, sort_keys=True)
return hashlib.sha256(snapshot.encode()).hexdigest()[:32], config.config_version
def _get_max_retention(self, tenant_id: uuid.UUID, site_id: str, categories: list[str]) -> int:
"""Determine consent expiration based on accepted categories and vendor retention."""
config = (
self.db.query(BannerSiteConfigDB)
.filter(BannerSiteConfigDB.tenant_id == tenant_id, BannerSiteConfigDB.site_id == site_id)
.first()
)
if not config:
return 365
vendors = (
self.db.query(BannerVendorConfigDB)
.filter(
BannerVendorConfigDB.site_config_id == config.id,
BannerVendorConfigDB.category_key.in_(categories),
BannerVendorConfigDB.is_active,
)
.all()
)
if vendors:
return max(v.retention_days for v in vendors if v.retention_days)
return max((CATEGORY_RETENTION_DAYS.get(c, 365) for c in categories), default=365)
# ------------------------------------------------------------------
# Consent CRUD (public SDK)
# ------------------------------------------------------------------
@@ -86,11 +144,19 @@ class BannerConsentService:
user_agent: Optional[str],
consent_string: Optional[str],
) -> dict[str, Any]:
"""Upsert a device consent row for (tenant, site, device_fingerprint)."""
"""Upsert a device consent row for (tenant, site, device_fingerprint).
Expiration is derived from the maximum vendor retention for the
accepted categories (Phase 2 — DSGVO Art. 5(1)(e)).
A SHA256 hash of the banner config is stored in the audit log
for consent proof (Phase 6 — Art. 7(1) DSGVO).
"""
tid = uuid.UUID(tenant_id)
ip_hash = self._hash_ip(ip_address)
now = datetime.now(timezone.utc)
expires_at = now + timedelta(days=365)
retention = self._get_max_retention(tid, site_id, categories)
expires_at = now + timedelta(days=retention)
config_hash, config_ver = self._compute_config_hash(tid, site_id)
existing = (
self.db.query(BannerConsentDB)
@@ -113,7 +179,7 @@ class BannerConsentService:
self.db.flush()
self._log(
tid, existing.id, "consent_updated", site_id, device_fingerprint,
categories, ip_hash,
categories, ip_hash, config_hash, config_ver,
)
self.db.commit()
self.db.refresh(existing)
@@ -134,7 +200,7 @@ class BannerConsentService:
self.db.flush()
self._log(
tid, consent.id, "consent_given", site_id, device_fingerprint,
categories, ip_hash,
categories, ip_hash, config_hash, config_ver,
)
self.db.commit()
self.db.refresh(consent)
@@ -0,0 +1,266 @@
# mypy: disable-error-code="arg-type,assignment"
"""
Banner DSR service — bridges device-based banner consents with
user-based DSR (Data Subject Request) processing.
Phase 3: Email linking allows correlating anonymous device fingerprints
with user emails (e.g. after newsletter signup or account creation).
This enables Art. 15 (access), Art. 17 (erasure), and Art. 20
(portability) requests to include/delete banner consent data.
Phase 4: Consent sync bridges banner consents (device-based) with
Einwilligungen (user-based) for unified consent management.
"""
import uuid
from datetime import datetime, timezone
from typing import Any, Optional
from sqlalchemy.orm import Session
from compliance.db.banner_models import (
BannerConsentAuditLogDB,
BannerConsentDB,
)
from compliance.db.einwilligungen_models import (
EinwilligungenConsentDB,
EinwilligungenConsentHistoryDB,
)
from compliance.domain import NotFoundError, ValidationError
from compliance.services._banner_serializers import consent_to_dict
class BannerDSRService:
"""Email linking + DSR integration for banner consents."""
def __init__(self, db: Session) -> None:
self.db = db
# ------------------------------------------------------------------
# Phase 3: Email linking
# ------------------------------------------------------------------
def link_email(
self,
tenant_id: str,
site_id: str,
device_fingerprint: str,
email: str,
) -> dict[str, Any]:
"""Link an email address to a device fingerprint's consent.
Typically called after newsletter signup, account creation, or
login — any point where the user's email becomes known.
"""
tid = uuid.UUID(tenant_id)
if not email or "@" not in email:
raise ValidationError("Invalid email address")
consent = (
self.db.query(BannerConsentDB)
.filter(
BannerConsentDB.tenant_id == tid,
BannerConsentDB.site_id == site_id,
BannerConsentDB.device_fingerprint == device_fingerprint,
)
.first()
)
if not consent:
raise NotFoundError("No consent found for this device")
consent.linked_email = email.lower().strip()
consent.updated_at = datetime.now(timezone.utc)
# Audit the linking
self.db.add(BannerConsentAuditLogDB(
tenant_id=tid,
consent_id=consent.id,
action="email_linked",
site_id=site_id,
device_fingerprint=device_fingerprint,
categories=consent.categories or [],
))
self.db.commit()
self.db.refresh(consent)
return consent_to_dict(consent)
def get_consents_by_email(
self, tenant_id: str, email: str,
) -> list[dict[str, Any]]:
"""Find all banner consents linked to an email (Art. 15 DSGVO)."""
tid = uuid.UUID(tenant_id)
consents = (
self.db.query(BannerConsentDB)
.filter(
BannerConsentDB.tenant_id == tid,
BannerConsentDB.linked_email == email.lower().strip(),
)
.all()
)
return [consent_to_dict(c) for c in consents]
def delete_consents_by_email(
self, tenant_id: str, email: str,
) -> dict[str, Any]:
"""Delete all banner consents for an email (Art. 17 DSGVO erasure).
Creates audit log entries before deletion for compliance proof.
"""
tid = uuid.UUID(tenant_id)
normalized = email.lower().strip()
consents = (
self.db.query(BannerConsentDB)
.filter(
BannerConsentDB.tenant_id == tid,
BannerConsentDB.linked_email == normalized,
)
.all()
)
deleted = 0
for c in consents:
self.db.add(BannerConsentAuditLogDB(
tenant_id=tid,
consent_id=c.id,
action="consent_deleted_dsr",
site_id=c.site_id,
device_fingerprint=c.device_fingerprint,
categories=c.categories or [],
))
self.db.delete(c)
deleted += 1
self.db.commit()
return {"deleted": deleted, "email": normalized}
def export_for_dsr(
self, tenant_id: str, email: str,
) -> dict[str, Any]:
"""Export all banner consent data for a DSR (Art. 15/20 DSGVO).
Returns consent records + audit trail for the email.
"""
tid = uuid.UUID(tenant_id)
normalized = email.lower().strip()
consents = (
self.db.query(BannerConsentDB)
.filter(
BannerConsentDB.tenant_id == tid,
BannerConsentDB.linked_email == normalized,
)
.all()
)
consent_ids = [c.id for c in consents]
audit = []
if consent_ids:
audit = (
self.db.query(BannerConsentAuditLogDB)
.filter(BannerConsentAuditLogDB.consent_id.in_(consent_ids))
.order_by(BannerConsentAuditLogDB.created_at.desc())
.all()
)
return {
"email": normalized,
"banner_consents": [consent_to_dict(c) for c in consents],
"audit_trail": [
{
"id": str(a.id),
"action": a.action,
"site_id": a.site_id,
"categories": a.categories or [],
"banner_config_hash": a.banner_config_hash,
"consent_version": a.consent_version,
"created_at": a.created_at.isoformat() if a.created_at else None,
}
for a in audit
],
}
# ------------------------------------------------------------------
# Phase 4: Consent sync (Banner ↔ Einwilligungen)
# ------------------------------------------------------------------
def sync_consent_to_einwilligungen(
self,
tenant_id: str,
device_fingerprint: str,
email: str,
site_id: str,
) -> dict[str, Any]:
"""Sync banner consent categories to user-based Einwilligungen.
Called when a user logs in and their email becomes known.
Creates/updates EinwilligungenConsent entries for each accepted
banner category, bridging device-based and user-based systems.
"""
tid = uuid.UUID(tenant_id)
consent = (
self.db.query(BannerConsentDB)
.filter(
BannerConsentDB.tenant_id == tid,
BannerConsentDB.site_id == site_id,
BannerConsentDB.device_fingerprint == device_fingerprint,
)
.first()
)
if not consent:
raise NotFoundError("No banner consent found for this device")
# Link email if not already linked
normalized = email.lower().strip()
if not consent.linked_email:
consent.linked_email = normalized
consent.updated_at = datetime.now(timezone.utc)
synced = 0
categories = consent.categories or []
for cat in categories:
data_point_id = f"banner_{cat}"
existing = (
self.db.query(EinwilligungenConsentDB)
.filter(
EinwilligungenConsentDB.tenant_id == tid,
EinwilligungenConsentDB.user_id == normalized,
EinwilligungenConsentDB.data_point_id == data_point_id,
)
.first()
)
now = datetime.now(timezone.utc)
if existing:
if not existing.granted:
existing.granted = True
existing.granted_at = now
existing.revoked_at = None
existing.source = "banner_sync"
self.db.add(EinwilligungenConsentHistoryDB(
consent_id=existing.id,
tenant_id=tid,
action="granted",
source="banner_sync",
))
synced += 1
else:
new_consent = EinwilligungenConsentDB(
tenant_id=tid,
user_id=normalized,
data_point_id=data_point_id,
granted=True,
granted_at=now,
consent_version="1",
source="banner_sync",
)
self.db.add(new_consent)
self.db.flush()
self.db.add(EinwilligungenConsentHistoryDB(
consent_id=new_consent.id,
tenant_id=tid,
action="granted",
source="banner_sync",
))
synced += 1
self.db.commit()
return {
"synced": synced,
"categories": categories,
"email": normalized,
}
@@ -142,9 +142,33 @@ class DSRWorkflowService:
if body.result_data:
dsr.data_export = body.result_data
dsr.updated_at = now
# Phase 3: Auto-delete banner consents on Art. 17 erasure
banner_result = None
if dsr.request_type == "erasure" and dsr.requester_email:
from compliance.services.banner_dsr_service import BannerDSRService
banner_svc = BannerDSRService(self._db)
banner_result = banner_svc.delete_consents_by_email(
tenant_id, dsr.requester_email,
)
# Phase 3: Include banner consents in data export for access/portability
if dsr.request_type in ("access", "portability") and dsr.requester_email:
from compliance.services.banner_dsr_service import BannerDSRService
banner_svc = BannerDSRService(self._db)
export = banner_svc.export_for_dsr(tenant_id, dsr.requester_email)
if export.get("banner_consents"):
existing_export = dsr.data_export or {}
if isinstance(existing_export, dict):
existing_export["banner_consents"] = export
dsr.data_export = existing_export
self._db.commit()
self._db.refresh(dsr)
return _dsr_to_dict(dsr)
result = _dsr_to_dict(dsr)
if banner_result:
result["banner_consents_deleted"] = banner_result["deleted"]
return result
# -- Reject --------------------------------------------------------------
@@ -0,0 +1,127 @@
"""
Vendor-Banner Sync maps the 82-service registry to banner vendor configs.
Automatically creates vendor entries in the cookie banner with correct
category assignment and legally required retention periods.
"""
import logging
import os
import uuid
logger = logging.getLogger(__name__)
# Service category → Banner category mapping
CATEGORY_MAP = {
"tracking": "statistics",
"heatmap": "statistics",
"tag_manager": "statistics",
"marketing": "marketing",
"social": "marketing",
"push": "marketing",
"crm": "marketing",
"chatbot": "functional",
"support": "functional",
"video": "functional",
"testing": "functional",
"cdn": "necessary",
"payment": "necessary",
"error_tracking": "necessary",
"accessibility": "necessary",
"cmp": "necessary",
"other": "functional",
}
# Legally required max retention per category (in days)
# Based on: DSGVO Art. 5(1)(e), CNIL guidelines, EDPB recommendations
RETENTION_DEFAULTS = {
"necessary": 365, # Session + functional = max 12 months
"statistics": 790, # Max 26 months (Google Analytics default)
"marketing": 90, # Max 90 days for retargeting
"functional": 365, # Max 12 months
}
# Specific service retention overrides
SERVICE_RETENTION = {
"google_analytics": 790, # 26 months (GA4 default)
"matomo": 790, # 26 months
"hotjar": 365, # 12 months
"facebook_pixel": 90, # 90 days (Meta default)
"google_ads": 90, # 90 days
"stripe": 0, # Session only (payment)
"paypal": 0, # Session only
"klarna": 0, # Session only
}
def get_banner_vendors_from_registry() -> list[dict]:
"""Convert service registry entries to banner vendor configs."""
from compliance.services.service_registry import SERVICE_REGISTRY
vendors = []
for pattern, meta in SERVICE_REGISTRY.items():
service_id = meta.get("id", "")
category = meta.get("category", "other")
banner_category = CATEGORY_MAP.get(category, "functional")
# Skip CMP — consent managers are not vendor entries
if service_id == "cmp":
continue
retention = SERVICE_RETENTION.get(service_id, RETENTION_DEFAULTS.get(banner_category, 365))
vendors.append({
"vendor_name": meta["name"],
"vendor_url": "", # Would need manual entry
"category_key": banner_category,
"description_de": f"{meta['name']} ({meta.get('provider', '')})",
"description_en": f"{meta['name']} ({meta.get('provider', '')})",
"cookie_names": [], # Service-specific, populated later
"retention_days": retention,
"is_active": True,
"country": meta.get("country", ""),
"eu_adequate": meta.get("eu_adequate", False),
"requires_consent": meta.get("requires_consent", True),
"legal_ref": meta.get("legal_ref", ""),
"service_id": service_id,
})
logger.info("Generated %d banner vendors from service registry", len(vendors))
return vendors
async def sync_vendors_to_site(pool, site_config_id: str, tenant_id: str) -> dict:
"""Sync service registry vendors to a site's banner vendor configs."""
vendors = get_banner_vendors_from_registry()
created = 0
updated = 0
async with pool.acquire() as conn:
for v in vendors:
# Check if vendor already exists for this site
existing = await conn.fetchrow("""
SELECT id FROM compliance_banner_vendor_configs
WHERE site_config_id = $1 AND vendor_name = $2
""", uuid.UUID(site_config_id), v["vendor_name"])
if existing:
await conn.execute("""
UPDATE compliance_banner_vendor_configs
SET category_key = $1, retention_days = $2, is_active = $3
WHERE id = $4
""", v["category_key"], v["retention_days"], v["is_active"], existing["id"])
updated += 1
else:
import json
await conn.execute("""
INSERT INTO compliance_banner_vendor_configs
(site_config_id, vendor_name, category_key, description_de,
description_en, cookie_names, retention_days, is_active)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
""", uuid.UUID(site_config_id), v["vendor_name"], v["category_key"],
v["description_de"], v["description_en"],
json.dumps(v["cookie_names"]), v["retention_days"], v["is_active"])
created += 1
logger.info("Synced vendors to site %s: %d created, %d updated", site_config_id, created, updated)
return {"created": created, "updated": updated, "total": len(vendors)}