feat(agent): migrate compliance-check results to banner + documents (M1-M5)

After a compliance-check run finishes, the user can now apply the
extracted vendor inventory directly to their own:

  - CookieBanner config (admin /sdk/einwilligungen)
  - Cookie-Policy / VVT-Register / Privacy-Policy templates
    (admin /sdk/document-generator)

Backend:
  - migration_to_banner.py: vendor list -> CookieBannerConfig with
    ESSENTIAL/PERFORMANCE/PERSONALIZATION/EXTERNAL_MEDIA buckets +
    review flags (broken opt-out URLs, missing expiry, no cookies listed)
  - migration_to_document.py: vendor list -> pre-fills for 3 doc
    templates, recipient-type aware (INTERNAL/GROUP/PROCESSOR/CONTROLLER)
  - agent_migration_routes.py: GET /banner-preview, /document-preview,
    /summary keyed on check_id
  - compliance_audit_log: new check_payloads table persists cmp_vendors +
    extracted_profile so the preview survives an app restart
  - tests: 9 mapper units + 4 endpoint integration tests

Frontend:
  - MigrationPanel.tsx: modal showing banner-config diff + document
    pre-fills, plus links into the existing editors
  - ComplianceCheckTab.tsx: replaces standalone audit link with the
    panel; net -3 lines, stays at the 500-cap

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-17 14:06:28 +02:00
parent f4c9cea770
commit df7d83134b
13 changed files with 1207 additions and 15 deletions
@@ -58,9 +58,63 @@ def _ensure_db() -> None:
);
CREATE INDEX IF NOT EXISTS idx_mc_check ON mc_results(check_id);
CREATE INDEX IF NOT EXISTS idx_mc_reg ON mc_results(regulation, passed);
-- Migration-source payloads (cmp_vendors + extracted_profile),
-- kept as JSON blobs so the /migration/* endpoints can rebuild
-- a banner config or document pre-fill after the in-memory
-- _compliance_check_jobs entry is gone.
CREATE TABLE IF NOT EXISTS check_payloads (
check_id TEXT PRIMARY KEY,
vendors TEXT, -- JSON list[dict]
profile TEXT -- JSON dict
);
""")
def record_check_payload(
check_id: str,
vendors: list[dict] | None,
profile: dict | None,
) -> None:
"""Persist cmp_vendors + extracted_profile for later migration use."""
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.execute(
"INSERT OR REPLACE INTO check_payloads "
"(check_id, vendors, profile) VALUES (?, ?, ?)",
(
check_id,
json.dumps(vendors or [], ensure_ascii=False),
json.dumps(profile or {}, ensure_ascii=False),
),
)
conn.commit()
except Exception as e:
logger.warning("record_check_payload failed for %s: %s", check_id, e)
def get_check_payload(check_id: str) -> dict | None:
"""Load cmp_vendors + extracted_profile for a previous check."""
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT vendors, profile FROM check_payloads WHERE check_id=?",
(check_id,),
).fetchone()
if not row:
return None
return {
"vendors": json.loads(row["vendors"] or "[]"),
"profile": json.loads(row["profile"] or "{}"),
}
except Exception as e:
logger.warning("get_check_payload failed: %s", e)
return None
def record_check_run(
check_id: str,
tenant_id: str,
@@ -0,0 +1,232 @@
"""
Migrate extracted vendor records -> CookieBannerConfig (admin-compliance
schema in einwilligungen/types/cookie-banner.ts).
Input : list[VendorRecord] as produced by vendor_extractor +
vendor_classifier + cookie_link_validator
Output : dict matching CookieBannerConfig shape, ready for the
/sdk/cookie-banner module to import.
The mapper also returns `flags[]` — items that need manual review
before going live (broken opt-out URL, missing expiry, etc.).
"""
from __future__ import annotations
import logging
from typing import Any
logger = logging.getLogger(__name__)
# ePaaS / OneTrust / etc. category -> CookieBannerCategory enum.
# CookieCategory has only 4 values, so we project marketing onto
# PERSONALIZATION and use EXTERNAL_MEDIA for embedded-content vendors.
_CATEGORY_MAP = {
"necessary": "ESSENTIAL",
"strictlynecessary": "ESSENTIAL",
"essential": "ESSENTIAL",
"functional": "ESSENTIAL", # Cookiebot conflates these
"statistics": "PERFORMANCE",
"analytics": "PERFORMANCE",
"performance": "PERFORMANCE",
"marketing": "PERSONALIZATION",
"advertising": "PERSONALIZATION",
"personalization": "PERSONALIZATION",
}
# Vendor names that indicate embedded external content
_EXTERNAL_MEDIA_HINTS = (
"youtube", "vimeo", "twitch", "google maps", "googlemaps",
"soundcloud", "spotify",
)
_CATEGORY_LABELS = {
"ESSENTIAL": {
"de": "Erforderliche Cookies",
"en": "Essential Cookies",
"desc_de": "Diese Cookies sind fuer den Betrieb der Website "
"unbedingt erforderlich (§25 Abs. 2 TDDDG) und koennen "
"nicht deaktiviert werden.",
"desc_en": "These cookies are strictly necessary for the operation "
"of the website and cannot be disabled.",
},
"PERFORMANCE": {
"de": "Analyse & Performance",
"en": "Analytics & Performance",
"desc_de": "Analyse-Cookies messen die Nutzung unserer Website, "
"um sie kontinuierlich zu verbessern.",
"desc_en": "Analytics cookies measure how visitors use our site so "
"we can improve it.",
},
"PERSONALIZATION": {
"de": "Marketing & Personalisierung",
"en": "Marketing & Personalization",
"desc_de": "Diese Cookies dienen der personalisierten "
"Ansprache und werbebezogenen Auswertung.",
"desc_en": "These cookies support personalised content and "
"marketing measurement.",
},
"EXTERNAL_MEDIA": {
"de": "Externe Medien",
"en": "External Media",
"desc_de": "Eingebettete Inhalte von Drittanbietern (z. B. "
"Videos, Karten, Audio) koennen Cookies setzen.",
"desc_en": "Embedded third-party media (videos, maps, audio) "
"may set cookies.",
},
}
def map_category(vendor_category: str, vendor_name: str) -> str:
"""Resolve a CMP category + vendor name to a CookieCategory enum value."""
name_l = (vendor_name or "").lower()
if any(h in name_l for h in _EXTERNAL_MEDIA_HINTS):
return "EXTERNAL_MEDIA"
return _CATEGORY_MAP.get((vendor_category or "").lower(), "PERSONALIZATION")
def build_banner_config(
vendors: list[dict],
site_name: str = "",
privacy_policy_url: str = "",
language: str = "de",
) -> dict:
"""Produce a CookieBannerConfig + flags from the extracted vendor list.
`vendors` is the list emitted by vendor_extractor.extract_vendors_from_payloads
(+ score_vendors for the compliance_flags). We bucket them by canonical
CookieCategory and build a CookieInfo entry per persistence.
"""
by_cat: dict[str, list[dict]] = {
"ESSENTIAL": [], "PERFORMANCE": [],
"PERSONALIZATION": [], "EXTERNAL_MEDIA": [],
}
flags: list[dict] = []
cookies_total = 0
vendors_with_no_cookies = 0
for v in vendors or []:
cat = map_category(v.get("category", ""), v.get("name", ""))
provider = v.get("name") or "Unbekannt"
cookies = v.get("cookies") or []
if not cookies:
vendors_with_no_cookies += 1
flags.append({
"level": "WARNING",
"vendor": provider,
"issue": "no_cookies_listed",
"message": (
f"Anbieter '{provider}' wurde erfasst, "
"aber keine Cookies sind dokumentiert. Vor "
"Veroeffentlichung manuell ergaenzen."
),
})
continue
for c in cookies:
cname = (c.get("name") or "").strip()
if not cname:
continue
cookies_total += 1
entry = {
"name": cname,
"provider": provider,
"purpose": {language: c.get("purpose") or v.get("purpose") or ""},
"expiry": c.get("expiry") or "",
"type": ("THIRD_PARTY"
if c.get("is_third_party") else "FIRST_PARTY"),
}
by_cat[cat].append(entry)
if not c.get("expiry"):
flags.append({
"level": "INFO",
"vendor": provider,
"issue": "cookie_no_expiry",
"message": (
f"Cookie '{cname}' bei '{provider}' ohne "
"Speicherdauer — fuer DSK-Konformitaet ergaenzen."
),
})
# Vendor-level link validation flags
if v.get("opt_out_url") and v.get("opt_out_ok") is False:
flags.append({
"level": "ERROR",
"vendor": provider,
"issue": "broken_opt_out",
"message": (
f"Opt-Out-Link von '{provider}' antwortet mit "
f"HTTP {v.get('opt_out_status')}"
"Art. 7(3) DSGVO erfordert funktionierenden Widerruf."
),
})
categories: list[dict] = []
for cat_id in ("ESSENTIAL", "PERFORMANCE", "PERSONALIZATION", "EXTERNAL_MEDIA"):
cookies = by_cat[cat_id]
if not cookies and cat_id != "ESSENTIAL":
continue
meta = _CATEGORY_LABELS[cat_id]
categories.append({
"id": cat_id,
"name": {"de": meta["de"], "en": meta["en"]},
"description": {"de": meta["desc_de"], "en": meta["desc_en"]},
"isRequired": cat_id == "ESSENTIAL",
"defaultEnabled": cat_id == "ESSENTIAL",
"dataPointIds": [],
"cookies": cookies,
})
config = {
"id": "", # filled by tenant on apply
"tenantId": "",
"categories": categories,
"styling": {
"position": "BOTTOM",
"theme": "LIGHT",
"primaryColor": "#2563eb",
"borderRadius": 8,
},
"texts": {
"title": {"de": "Wir verwenden Cookies",
"en": "We use cookies"},
"description": {
"de": (f"Auf {site_name or 'unserer Website'} setzen wir "
"Cookies und aehnliche Technologien ein, um die "
"Nutzererfahrung zu verbessern. Sie koennen Ihre "
"Auswahl jederzeit anpassen."),
"en": (f"On {site_name or 'this website'} we use cookies "
"and similar technologies. You can change your "
"selection at any time."),
},
"acceptAll": {"de": "Alle akzeptieren", "en": "Accept all"},
"rejectAll": {"de": "Alle ablehnen", "en": "Reject all"},
"customize": {"de": "Auswahl anpassen", "en": "Customize"},
"save": {"de": "Auswahl speichern", "en": "Save preferences"},
"privacyPolicyLink": {"de": privacy_policy_url or "/datenschutz",
"en": privacy_policy_url or "/privacy"},
},
}
summary = {
"vendors_total": len(vendors or []),
"vendors_with_no_cookies": vendors_with_no_cookies,
"cookies_total": cookies_total,
"categories": {cat_id: len(by_cat[cat_id]) for cat_id in by_cat},
"flags_error": sum(1 for f in flags if f["level"] == "ERROR"),
"flags_warning": sum(1 for f in flags if f["level"] == "WARNING"),
"flags_info": sum(1 for f in flags if f["level"] == "INFO"),
}
logger.info(
"Banner migration prepared: %d vendors -> %d cookies in %d "
"categories. Flags: %d ERROR, %d WARNING, %d INFO.",
summary["vendors_total"], summary["cookies_total"],
len(categories), summary["flags_error"],
summary["flags_warning"], summary["flags_info"],
)
return {
"config": config,
"flags": flags,
"summary": summary,
}
@@ -0,0 +1,260 @@
"""
Migrate extracted vendor records + scorecard -> Document-Generator
pre-fills.
We can pre-fill several templateType candidates:
- cookie_policy : compose a Cookie-Richtlinie text from the vendor
list (one section per category, table per vendor with name, purpose,
expiry, opt-out)
- vvt_register : populate VVT entries (one per vendor, with recipient
category, opt-out URL, etc.)
- privacy_policy: a 'Drittanbieter' section listing vendors as
recipients + transfer mechanism
Output for each: {templateType, placeholderValues, initialContent,
suggested_template_search} that the frontend can drop into
DocumentGeneratorState.
"""
from __future__ import annotations
import logging
logger = logging.getLogger(__name__)
_RECIPIENT_TYPE_LABEL = {
"INTERNAL": "Eigene Verarbeitung",
"GROUP_COMPANY": "Konzernunternehmen",
"PROCESSOR": "Auftragsverarbeiter",
"CONTROLLER": "Joint / unabhaengiger Verantwortlicher",
"AUTHORITY": "Behoerde",
"OTHER": "Sonstiger Empfaenger",
}
def build_document_prefills(
vendors: list[dict],
extracted_profile: dict | None = None,
site_name: str = "",
privacy_policy_url: str = "",
) -> dict:
"""Generate pre-fills for cookie_policy + vvt_register + privacy_policy."""
profile = (extracted_profile or {}).get("company_profile", {}) or {}
company_name = (profile.get("companyName") or site_name or "Unbekannt").strip()
address = ", ".join(filter(None, [
profile.get("headquartersStreet"),
profile.get("headquartersZip"),
profile.get("headquartersCity"),
]))
dpo_email = profile.get("dpoEmail") or ""
placeholders_common = {
"company_name": company_name,
"company_address": address,
"dpo_email": dpo_email,
"privacy_policy_url": privacy_policy_url,
"site_name": site_name,
"vendor_count": str(len(vendors or [])),
}
return {
"cookie_policy": _build_cookie_policy(
vendors or [], placeholders_common,
),
"vvt_register": _build_vvt_register(
vendors or [], placeholders_common,
),
"privacy_policy": _build_privacy_policy_section(
vendors or [], placeholders_common,
),
}
# ── cookie_policy ───────────────────────────────────────────────────
def _build_cookie_policy(vendors: list[dict], placeholders: dict) -> dict:
by_cat: dict[str, list[dict]] = {}
for v in vendors:
cat = (v.get("category") or "marketing").lower()
by_cat.setdefault(cat, []).append(v)
parts: list[str] = [
"# Cookie-Richtlinie",
"",
f"Diese Cookie-Richtlinie informiert Sie ueber den Einsatz von Cookies "
f"und aehnlichen Technologien auf den Webseiten der "
f"**{placeholders['company_name']}**.",
"",
]
if placeholders["company_address"]:
parts.append(f"Verantwortlich: {placeholders['company_name']}, "
f"{placeholders['company_address']}.")
parts.append("")
if placeholders["dpo_email"]:
parts.append(f"Datenschutzbeauftragte/r erreichbar unter: "
f"{placeholders['dpo_email']}.")
parts.append("")
cat_order = ("necessary", "strictlynecessary", "functional",
"statistics", "performance", "marketing", "advertising",
"personalization")
for cat in cat_order:
rows = by_cat.get(cat) or []
if not rows:
continue
parts.append("")
parts.append(f"## Kategorie: {_human_cat(cat)}")
parts.append("")
parts.append("| Anbieter | Zweck | Speicherdauer | Opt-Out |")
parts.append("|----------|-------|---------------|---------|")
for v in rows:
name = (v.get("name") or "").replace("|", " ")
purpose = (v.get("purpose") or "").replace("|", " ")[:140]
persistence = ", ".join(
c.get("expiry", "") for c in (v.get("cookies") or [])
if c.get("expiry")
)[:60] or ""
opt = v.get("opt_out_url") or ""
parts.append(f"| {name} | {purpose} | {persistence} | {opt} |")
parts.append("")
parts.append("Stand: automatisch generiert durch BreakPilot. "
"Bitte vor Veroeffentlichung pruefen.")
return {
"templateType": "cookie_policy",
"placeholderValues": placeholders,
"initialContent": "\n".join(parts),
"suggested_template_search": "Cookie-Richtlinie DSGVO TDDDG Deutsch",
}
def _human_cat(cat: str) -> str:
return {
"necessary": "Erforderlich (§25 Abs. 2 TDDDG)",
"strictlynecessary": "Erforderlich (§25 Abs. 2 TDDDG)",
"functional": "Funktional",
"statistics": "Statistik / Analyse",
"performance": "Statistik / Analyse",
"marketing": "Marketing & Werbung",
"advertising": "Marketing & Werbung",
"personalization": "Personalisierung",
}.get(cat, cat.capitalize())
# ── vvt_register ────────────────────────────────────────────────────
def _build_vvt_register(vendors: list[dict], placeholders: dict) -> dict:
"""Generate VVT-Eintraege als JSON, das der vvt-Modul direkt importieren kann.
Schema lehnt sich an admin-compliance/lib/sdk/vvt-types.ts VVTActivity an.
"""
activities: list[dict] = []
for v in vendors:
rtype = (v.get("recipient_type") or "OTHER").upper()
recipient_type = (
"INTERNAL" if rtype == "INTERNAL"
else "GROUP_COMPANY" if rtype == "GROUP_COMPANY"
else "PROCESSOR" if rtype == "PROCESSOR"
else "CONTROLLER" if rtype == "CONTROLLER"
else "OTHER"
)
activities.append({
"name": v.get("name") or "Unbekannte Verarbeitung",
"description": v.get("purpose") or "",
"purposes": [v.get("purpose")] if v.get("purpose") else [],
"businessFunction": _guess_business_function(v),
"recipientCategories": [{
"type": recipient_type,
"name": v.get("name") or "",
"isThirdCountry": bool(v.get("country")) and
v.get("country") not in
("DE", "AT", "BE", "BG", "HR", "CY", "CZ",
"DK", "EE", "FI", "FR", "GR", "HU", "IE",
"IT", "LV", "LT", "LU", "MT", "NL", "PL",
"PT", "RO", "SK", "SI", "ES", "SE", "IS",
"LI", "NO", "CH"),
"country": v.get("country") or "",
}],
"retentionPeriod": {
"description": _summarise_expiry(v.get("cookies") or []),
},
"tomDescription": "Siehe TOM-Anlage (automatisch verlinken).",
"sourceTemplateId": f"breakpilot-cookie-vendor-{v.get('name', '').lower()[:30]}",
})
return {
"templateType": "vvt_register",
"placeholderValues": placeholders,
"initialContent": "",
"activities": activities,
"suggested_template_search": "VVT Art. 30 DSGVO Verarbeitungsverzeichnis",
}
def _guess_business_function(v: dict) -> str:
cat = (v.get("category") or "").lower()
if cat in ("marketing", "advertising", "personalization"):
return "marketing"
if cat in ("statistics", "performance", "analytics"):
return "marketing"
if cat in ("necessary", "strictlynecessary"):
return "it_operations"
return "other"
def _summarise_expiry(cookies: list[dict]) -> str:
exps = sorted({(c.get("expiry") or "").strip()
for c in cookies if c.get("expiry")})
if not exps:
return "Speicherdauer pro Cookie pflegen."
if len(exps) == 1:
return exps[0]
return ", ".join(exps[:5]) + (f" (+{len(exps) - 5} weitere)" if len(exps) > 5 else "")
# ── privacy_policy (Drittanbieter-Block) ───────────────────────────
def _build_privacy_policy_section(vendors: list[dict], placeholders: dict) -> dict:
"""Generate the 'Drittanbieter' section for the privacy_policy template."""
by_rtype: dict[str, list[dict]] = {}
for v in vendors:
rtype = (v.get("recipient_type") or "OTHER").upper()
by_rtype.setdefault(rtype, []).append(v)
parts: list[str] = [
"## Empfaenger personenbezogener Daten",
"",
f"Im Rahmen unseres Webseiten-Betriebs uebermitteln wir "
f"personenbezogene Daten an folgende Kategorien von Empfaengern "
f"(Art. 13 Abs. 1 lit. e DSGVO):",
"",
]
order = ("INTERNAL", "GROUP_COMPANY", "PROCESSOR", "CONTROLLER",
"AUTHORITY", "OTHER")
for rtype in order:
rows = by_rtype.get(rtype) or []
if not rows:
continue
parts.append(f"### {_RECIPIENT_TYPE_LABEL.get(rtype, rtype)}")
for v in rows:
name = v.get("name") or ""
country = v.get("country") or ""
country_str = f" ({country})" if country else ""
line = f"- **{name}**{country_str}"
if v.get("purpose"):
line += f"{v.get('purpose')[:140]}"
if v.get("opt_out_url"):
line += f" [Opt-Out]({v.get('opt_out_url')})"
if v.get("privacy_policy_url"):
line += f" [Datenschutz]({v.get('privacy_policy_url')})"
parts.append(line)
parts.append("")
return {
"templateType": "privacy_policy",
"placeholderValues": placeholders,
"initialContent": "\n".join(parts),
"suggested_template_search": "Datenschutzerklaerung Art. 13 DSGVO Webseite",
}