Files
breakpilot-compliance/backend-compliance/compliance/services/migration_to_document.py
T
Benjamin Admin df7d83134b feat(agent): migrate compliance-check results to banner + documents (M1-M5)
After a compliance-check run finishes, the user can now apply the
extracted vendor inventory directly to their own:

  - CookieBanner config (admin /sdk/einwilligungen)
  - Cookie-Policy / VVT-Register / Privacy-Policy templates
    (admin /sdk/document-generator)

Backend:
  - migration_to_banner.py: vendor list -> CookieBannerConfig with
    ESSENTIAL/PERFORMANCE/PERSONALIZATION/EXTERNAL_MEDIA buckets +
    review flags (broken opt-out URLs, missing expiry, no cookies listed)
  - migration_to_document.py: vendor list -> pre-fills for 3 doc
    templates, recipient-type aware (INTERNAL/GROUP/PROCESSOR/CONTROLLER)
  - agent_migration_routes.py: GET /banner-preview, /document-preview,
    /summary keyed on check_id
  - compliance_audit_log: new check_payloads table persists cmp_vendors +
    extracted_profile so the preview survives an app restart
  - tests: 9 mapper units + 4 endpoint integration tests

Frontend:
  - MigrationPanel.tsx: modal showing banner-config diff + document
    pre-fills, plus links into the existing editors
  - ComplianceCheckTab.tsx: replaces standalone audit link with the
    panel; net -3 lines, stays at the 500-cap

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 14:06:28 +02:00

261 lines
9.8 KiB
Python

"""
Migrate extracted vendor records + scorecard -> Document-Generator
pre-fills.
We can pre-fill several templateType candidates:
- cookie_policy : compose a Cookie-Richtlinie text from the vendor
list (one section per category, table per vendor with name, purpose,
expiry, opt-out)
- vvt_register : populate VVT entries (one per vendor, with recipient
category, opt-out URL, etc.)
- privacy_policy: a 'Drittanbieter' section listing vendors as
recipients + transfer mechanism
Output for each: {templateType, placeholderValues, initialContent,
suggested_template_search} that the frontend can drop into
DocumentGeneratorState.
"""
from __future__ import annotations
import logging
logger = logging.getLogger(__name__)
_RECIPIENT_TYPE_LABEL = {
"INTERNAL": "Eigene Verarbeitung",
"GROUP_COMPANY": "Konzernunternehmen",
"PROCESSOR": "Auftragsverarbeiter",
"CONTROLLER": "Joint / unabhaengiger Verantwortlicher",
"AUTHORITY": "Behoerde",
"OTHER": "Sonstiger Empfaenger",
}
def build_document_prefills(
vendors: list[dict],
extracted_profile: dict | None = None,
site_name: str = "",
privacy_policy_url: str = "",
) -> dict:
"""Generate pre-fills for cookie_policy + vvt_register + privacy_policy."""
profile = (extracted_profile or {}).get("company_profile", {}) or {}
company_name = (profile.get("companyName") or site_name or "Unbekannt").strip()
address = ", ".join(filter(None, [
profile.get("headquartersStreet"),
profile.get("headquartersZip"),
profile.get("headquartersCity"),
]))
dpo_email = profile.get("dpoEmail") or ""
placeholders_common = {
"company_name": company_name,
"company_address": address,
"dpo_email": dpo_email,
"privacy_policy_url": privacy_policy_url,
"site_name": site_name,
"vendor_count": str(len(vendors or [])),
}
return {
"cookie_policy": _build_cookie_policy(
vendors or [], placeholders_common,
),
"vvt_register": _build_vvt_register(
vendors or [], placeholders_common,
),
"privacy_policy": _build_privacy_policy_section(
vendors or [], placeholders_common,
),
}
# ── cookie_policy ───────────────────────────────────────────────────
def _build_cookie_policy(vendors: list[dict], placeholders: dict) -> dict:
by_cat: dict[str, list[dict]] = {}
for v in vendors:
cat = (v.get("category") or "marketing").lower()
by_cat.setdefault(cat, []).append(v)
parts: list[str] = [
"# Cookie-Richtlinie",
"",
f"Diese Cookie-Richtlinie informiert Sie ueber den Einsatz von Cookies "
f"und aehnlichen Technologien auf den Webseiten der "
f"**{placeholders['company_name']}**.",
"",
]
if placeholders["company_address"]:
parts.append(f"Verantwortlich: {placeholders['company_name']}, "
f"{placeholders['company_address']}.")
parts.append("")
if placeholders["dpo_email"]:
parts.append(f"Datenschutzbeauftragte/r erreichbar unter: "
f"{placeholders['dpo_email']}.")
parts.append("")
cat_order = ("necessary", "strictlynecessary", "functional",
"statistics", "performance", "marketing", "advertising",
"personalization")
for cat in cat_order:
rows = by_cat.get(cat) or []
if not rows:
continue
parts.append("")
parts.append(f"## Kategorie: {_human_cat(cat)}")
parts.append("")
parts.append("| Anbieter | Zweck | Speicherdauer | Opt-Out |")
parts.append("|----------|-------|---------------|---------|")
for v in rows:
name = (v.get("name") or "").replace("|", " ")
purpose = (v.get("purpose") or "").replace("|", " ")[:140]
persistence = ", ".join(
c.get("expiry", "") for c in (v.get("cookies") or [])
if c.get("expiry")
)[:60] or ""
opt = v.get("opt_out_url") or ""
parts.append(f"| {name} | {purpose} | {persistence} | {opt} |")
parts.append("")
parts.append("Stand: automatisch generiert durch BreakPilot. "
"Bitte vor Veroeffentlichung pruefen.")
return {
"templateType": "cookie_policy",
"placeholderValues": placeholders,
"initialContent": "\n".join(parts),
"suggested_template_search": "Cookie-Richtlinie DSGVO TDDDG Deutsch",
}
def _human_cat(cat: str) -> str:
return {
"necessary": "Erforderlich (§25 Abs. 2 TDDDG)",
"strictlynecessary": "Erforderlich (§25 Abs. 2 TDDDG)",
"functional": "Funktional",
"statistics": "Statistik / Analyse",
"performance": "Statistik / Analyse",
"marketing": "Marketing & Werbung",
"advertising": "Marketing & Werbung",
"personalization": "Personalisierung",
}.get(cat, cat.capitalize())
# ── vvt_register ────────────────────────────────────────────────────
def _build_vvt_register(vendors: list[dict], placeholders: dict) -> dict:
"""Generate VVT-Eintraege als JSON, das der vvt-Modul direkt importieren kann.
Schema lehnt sich an admin-compliance/lib/sdk/vvt-types.ts VVTActivity an.
"""
activities: list[dict] = []
for v in vendors:
rtype = (v.get("recipient_type") or "OTHER").upper()
recipient_type = (
"INTERNAL" if rtype == "INTERNAL"
else "GROUP_COMPANY" if rtype == "GROUP_COMPANY"
else "PROCESSOR" if rtype == "PROCESSOR"
else "CONTROLLER" if rtype == "CONTROLLER"
else "OTHER"
)
activities.append({
"name": v.get("name") or "Unbekannte Verarbeitung",
"description": v.get("purpose") or "",
"purposes": [v.get("purpose")] if v.get("purpose") else [],
"businessFunction": _guess_business_function(v),
"recipientCategories": [{
"type": recipient_type,
"name": v.get("name") or "",
"isThirdCountry": bool(v.get("country")) and
v.get("country") not in
("DE", "AT", "BE", "BG", "HR", "CY", "CZ",
"DK", "EE", "FI", "FR", "GR", "HU", "IE",
"IT", "LV", "LT", "LU", "MT", "NL", "PL",
"PT", "RO", "SK", "SI", "ES", "SE", "IS",
"LI", "NO", "CH"),
"country": v.get("country") or "",
}],
"retentionPeriod": {
"description": _summarise_expiry(v.get("cookies") or []),
},
"tomDescription": "Siehe TOM-Anlage (automatisch verlinken).",
"sourceTemplateId": f"breakpilot-cookie-vendor-{v.get('name', '').lower()[:30]}",
})
return {
"templateType": "vvt_register",
"placeholderValues": placeholders,
"initialContent": "",
"activities": activities,
"suggested_template_search": "VVT Art. 30 DSGVO Verarbeitungsverzeichnis",
}
def _guess_business_function(v: dict) -> str:
cat = (v.get("category") or "").lower()
if cat in ("marketing", "advertising", "personalization"):
return "marketing"
if cat in ("statistics", "performance", "analytics"):
return "marketing"
if cat in ("necessary", "strictlynecessary"):
return "it_operations"
return "other"
def _summarise_expiry(cookies: list[dict]) -> str:
exps = sorted({(c.get("expiry") or "").strip()
for c in cookies if c.get("expiry")})
if not exps:
return "Speicherdauer pro Cookie pflegen."
if len(exps) == 1:
return exps[0]
return ", ".join(exps[:5]) + (f" (+{len(exps) - 5} weitere)" if len(exps) > 5 else "")
# ── privacy_policy (Drittanbieter-Block) ───────────────────────────
def _build_privacy_policy_section(vendors: list[dict], placeholders: dict) -> dict:
"""Generate the 'Drittanbieter' section for the privacy_policy template."""
by_rtype: dict[str, list[dict]] = {}
for v in vendors:
rtype = (v.get("recipient_type") or "OTHER").upper()
by_rtype.setdefault(rtype, []).append(v)
parts: list[str] = [
"## Empfaenger personenbezogener Daten",
"",
f"Im Rahmen unseres Webseiten-Betriebs uebermitteln wir "
f"personenbezogene Daten an folgende Kategorien von Empfaengern "
f"(Art. 13 Abs. 1 lit. e DSGVO):",
"",
]
order = ("INTERNAL", "GROUP_COMPANY", "PROCESSOR", "CONTROLLER",
"AUTHORITY", "OTHER")
for rtype in order:
rows = by_rtype.get(rtype) or []
if not rows:
continue
parts.append(f"### {_RECIPIENT_TYPE_LABEL.get(rtype, rtype)}")
for v in rows:
name = v.get("name") or ""
country = v.get("country") or ""
country_str = f" ({country})" if country else ""
line = f"- **{name}**{country_str}"
if v.get("purpose"):
line += f"{v.get('purpose')[:140]}"
if v.get("opt_out_url"):
line += f" [Opt-Out]({v.get('opt_out_url')})"
if v.get("privacy_policy_url"):
line += f" [Datenschutz]({v.get('privacy_policy_url')})"
parts.append(line)
parts.append("")
return {
"templateType": "privacy_policy",
"placeholderValues": placeholders,
"initialContent": "\n".join(parts),
"suggested_template_search": "Datenschutzerklaerung Art. 13 DSGVO Webseite",
}