Files
breakpilot-compliance/backend-compliance/compliance/services/migration_to_banner.py
T
Benjamin Admin df7d83134b feat(agent): migrate compliance-check results to banner + documents (M1-M5)
After a compliance-check run finishes, the user can now apply the
extracted vendor inventory directly to their own:

  - CookieBanner config (admin /sdk/einwilligungen)
  - Cookie-Policy / VVT-Register / Privacy-Policy templates
    (admin /sdk/document-generator)

Backend:
  - migration_to_banner.py: vendor list -> CookieBannerConfig with
    ESSENTIAL/PERFORMANCE/PERSONALIZATION/EXTERNAL_MEDIA buckets +
    review flags (broken opt-out URLs, missing expiry, no cookies listed)
  - migration_to_document.py: vendor list -> pre-fills for 3 doc
    templates, recipient-type aware (INTERNAL/GROUP/PROCESSOR/CONTROLLER)
  - agent_migration_routes.py: GET /banner-preview, /document-preview,
    /summary keyed on check_id
  - compliance_audit_log: new check_payloads table persists cmp_vendors +
    extracted_profile so the preview survives an app restart
  - tests: 9 mapper units + 4 endpoint integration tests

Frontend:
  - MigrationPanel.tsx: modal showing banner-config diff + document
    pre-fills, plus links into the existing editors
  - ComplianceCheckTab.tsx: replaces standalone audit link with the
    panel; net -3 lines, stays at the 500-cap

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 14:06:28 +02:00

233 lines
8.8 KiB
Python

"""
Migrate extracted vendor records -> CookieBannerConfig (admin-compliance
schema in einwilligungen/types/cookie-banner.ts).
Input : list[VendorRecord] as produced by vendor_extractor +
vendor_classifier + cookie_link_validator
Output : dict matching CookieBannerConfig shape, ready for the
/sdk/cookie-banner module to import.
The mapper also returns `flags[]` — items that need manual review
before going live (broken opt-out URL, missing expiry, etc.).
"""
from __future__ import annotations
import logging
from typing import Any
logger = logging.getLogger(__name__)
# ePaaS / OneTrust / etc. category -> CookieBannerCategory enum.
# CookieCategory has only 4 values, so we project marketing onto
# PERSONALIZATION and use EXTERNAL_MEDIA for embedded-content vendors.
_CATEGORY_MAP = {
"necessary": "ESSENTIAL",
"strictlynecessary": "ESSENTIAL",
"essential": "ESSENTIAL",
"functional": "ESSENTIAL", # Cookiebot conflates these
"statistics": "PERFORMANCE",
"analytics": "PERFORMANCE",
"performance": "PERFORMANCE",
"marketing": "PERSONALIZATION",
"advertising": "PERSONALIZATION",
"personalization": "PERSONALIZATION",
}
# Vendor names that indicate embedded external content
_EXTERNAL_MEDIA_HINTS = (
"youtube", "vimeo", "twitch", "google maps", "googlemaps",
"soundcloud", "spotify",
)
_CATEGORY_LABELS = {
"ESSENTIAL": {
"de": "Erforderliche Cookies",
"en": "Essential Cookies",
"desc_de": "Diese Cookies sind fuer den Betrieb der Website "
"unbedingt erforderlich (§25 Abs. 2 TDDDG) und koennen "
"nicht deaktiviert werden.",
"desc_en": "These cookies are strictly necessary for the operation "
"of the website and cannot be disabled.",
},
"PERFORMANCE": {
"de": "Analyse & Performance",
"en": "Analytics & Performance",
"desc_de": "Analyse-Cookies messen die Nutzung unserer Website, "
"um sie kontinuierlich zu verbessern.",
"desc_en": "Analytics cookies measure how visitors use our site so "
"we can improve it.",
},
"PERSONALIZATION": {
"de": "Marketing & Personalisierung",
"en": "Marketing & Personalization",
"desc_de": "Diese Cookies dienen der personalisierten "
"Ansprache und werbebezogenen Auswertung.",
"desc_en": "These cookies support personalised content and "
"marketing measurement.",
},
"EXTERNAL_MEDIA": {
"de": "Externe Medien",
"en": "External Media",
"desc_de": "Eingebettete Inhalte von Drittanbietern (z. B. "
"Videos, Karten, Audio) koennen Cookies setzen.",
"desc_en": "Embedded third-party media (videos, maps, audio) "
"may set cookies.",
},
}
def map_category(vendor_category: str, vendor_name: str) -> str:
"""Resolve a CMP category + vendor name to a CookieCategory enum value."""
name_l = (vendor_name or "").lower()
if any(h in name_l for h in _EXTERNAL_MEDIA_HINTS):
return "EXTERNAL_MEDIA"
return _CATEGORY_MAP.get((vendor_category or "").lower(), "PERSONALIZATION")
def build_banner_config(
vendors: list[dict],
site_name: str = "",
privacy_policy_url: str = "",
language: str = "de",
) -> dict:
"""Produce a CookieBannerConfig + flags from the extracted vendor list.
`vendors` is the list emitted by vendor_extractor.extract_vendors_from_payloads
(+ score_vendors for the compliance_flags). We bucket them by canonical
CookieCategory and build a CookieInfo entry per persistence.
"""
by_cat: dict[str, list[dict]] = {
"ESSENTIAL": [], "PERFORMANCE": [],
"PERSONALIZATION": [], "EXTERNAL_MEDIA": [],
}
flags: list[dict] = []
cookies_total = 0
vendors_with_no_cookies = 0
for v in vendors or []:
cat = map_category(v.get("category", ""), v.get("name", ""))
provider = v.get("name") or "Unbekannt"
cookies = v.get("cookies") or []
if not cookies:
vendors_with_no_cookies += 1
flags.append({
"level": "WARNING",
"vendor": provider,
"issue": "no_cookies_listed",
"message": (
f"Anbieter '{provider}' wurde erfasst, "
"aber keine Cookies sind dokumentiert. Vor "
"Veroeffentlichung manuell ergaenzen."
),
})
continue
for c in cookies:
cname = (c.get("name") or "").strip()
if not cname:
continue
cookies_total += 1
entry = {
"name": cname,
"provider": provider,
"purpose": {language: c.get("purpose") or v.get("purpose") or ""},
"expiry": c.get("expiry") or "",
"type": ("THIRD_PARTY"
if c.get("is_third_party") else "FIRST_PARTY"),
}
by_cat[cat].append(entry)
if not c.get("expiry"):
flags.append({
"level": "INFO",
"vendor": provider,
"issue": "cookie_no_expiry",
"message": (
f"Cookie '{cname}' bei '{provider}' ohne "
"Speicherdauer — fuer DSK-Konformitaet ergaenzen."
),
})
# Vendor-level link validation flags
if v.get("opt_out_url") and v.get("opt_out_ok") is False:
flags.append({
"level": "ERROR",
"vendor": provider,
"issue": "broken_opt_out",
"message": (
f"Opt-Out-Link von '{provider}' antwortet mit "
f"HTTP {v.get('opt_out_status')}"
"Art. 7(3) DSGVO erfordert funktionierenden Widerruf."
),
})
categories: list[dict] = []
for cat_id in ("ESSENTIAL", "PERFORMANCE", "PERSONALIZATION", "EXTERNAL_MEDIA"):
cookies = by_cat[cat_id]
if not cookies and cat_id != "ESSENTIAL":
continue
meta = _CATEGORY_LABELS[cat_id]
categories.append({
"id": cat_id,
"name": {"de": meta["de"], "en": meta["en"]},
"description": {"de": meta["desc_de"], "en": meta["desc_en"]},
"isRequired": cat_id == "ESSENTIAL",
"defaultEnabled": cat_id == "ESSENTIAL",
"dataPointIds": [],
"cookies": cookies,
})
config = {
"id": "", # filled by tenant on apply
"tenantId": "",
"categories": categories,
"styling": {
"position": "BOTTOM",
"theme": "LIGHT",
"primaryColor": "#2563eb",
"borderRadius": 8,
},
"texts": {
"title": {"de": "Wir verwenden Cookies",
"en": "We use cookies"},
"description": {
"de": (f"Auf {site_name or 'unserer Website'} setzen wir "
"Cookies und aehnliche Technologien ein, um die "
"Nutzererfahrung zu verbessern. Sie koennen Ihre "
"Auswahl jederzeit anpassen."),
"en": (f"On {site_name or 'this website'} we use cookies "
"and similar technologies. You can change your "
"selection at any time."),
},
"acceptAll": {"de": "Alle akzeptieren", "en": "Accept all"},
"rejectAll": {"de": "Alle ablehnen", "en": "Reject all"},
"customize": {"de": "Auswahl anpassen", "en": "Customize"},
"save": {"de": "Auswahl speichern", "en": "Save preferences"},
"privacyPolicyLink": {"de": privacy_policy_url or "/datenschutz",
"en": privacy_policy_url or "/privacy"},
},
}
summary = {
"vendors_total": len(vendors or []),
"vendors_with_no_cookies": vendors_with_no_cookies,
"cookies_total": cookies_total,
"categories": {cat_id: len(by_cat[cat_id]) for cat_id in by_cat},
"flags_error": sum(1 for f in flags if f["level"] == "ERROR"),
"flags_warning": sum(1 for f in flags if f["level"] == "WARNING"),
"flags_info": sum(1 for f in flags if f["level"] == "INFO"),
}
logger.info(
"Banner migration prepared: %d vendors -> %d cookies in %d "
"categories. Flags: %d ERROR, %d WARNING, %d INFO.",
summary["vendors_total"], summary["cookies_total"],
len(categories), summary["flags_error"],
summary["flags_warning"], summary["flags_info"],
)
return {
"config": config,
"flags": flags,
"summary": summary,
}