""" Migrate extracted vendor records + scorecard -> Document-Generator pre-fills. We can pre-fill several templateType candidates: - cookie_policy : compose a Cookie-Richtlinie text from the vendor list (one section per category, table per vendor with name, purpose, expiry, opt-out) - vvt_register : populate VVT entries (one per vendor, with recipient category, opt-out URL, etc.) - privacy_policy: a 'Drittanbieter' section listing vendors as recipients + transfer mechanism Output for each: {templateType, placeholderValues, initialContent, suggested_template_search} that the frontend can drop into DocumentGeneratorState. """ from __future__ import annotations import logging logger = logging.getLogger(__name__) _RECIPIENT_TYPE_LABEL = { "INTERNAL": "Eigene Verarbeitung", "GROUP_COMPANY": "Konzernunternehmen", "PROCESSOR": "Auftragsverarbeiter", "CONTROLLER": "Joint / unabhaengiger Verantwortlicher", "AUTHORITY": "Behoerde", "OTHER": "Sonstiger Empfaenger", } def build_document_prefills( vendors: list[dict], extracted_profile: dict | None = None, site_name: str = "", privacy_policy_url: str = "", ) -> dict: """Generate pre-fills for cookie_policy + vvt_register + privacy_policy.""" profile = (extracted_profile or {}).get("company_profile", {}) or {} company_name = (profile.get("companyName") or site_name or "Unbekannt").strip() address = ", ".join(filter(None, [ profile.get("headquartersStreet"), profile.get("headquartersZip"), profile.get("headquartersCity"), ])) dpo_email = profile.get("dpoEmail") or "" placeholders_common = { "company_name": company_name, "company_address": address, "dpo_email": dpo_email, "privacy_policy_url": privacy_policy_url, "site_name": site_name, "vendor_count": str(len(vendors or [])), } return { "cookie_policy": _build_cookie_policy( vendors or [], placeholders_common, ), "vvt_register": _build_vvt_register( vendors or [], placeholders_common, ), "privacy_policy": _build_privacy_policy_section( vendors or [], placeholders_common, ), } # ── cookie_policy ─────────────────────────────────────────────────── def _build_cookie_policy(vendors: list[dict], placeholders: dict) -> dict: by_cat: dict[str, list[dict]] = {} for v in vendors: cat = (v.get("category") or "marketing").lower() by_cat.setdefault(cat, []).append(v) parts: list[str] = [ "# Cookie-Richtlinie", "", f"Diese Cookie-Richtlinie informiert Sie ueber den Einsatz von Cookies " f"und aehnlichen Technologien auf den Webseiten der " f"**{placeholders['company_name']}**.", "", ] if placeholders["company_address"]: parts.append(f"Verantwortlich: {placeholders['company_name']}, " f"{placeholders['company_address']}.") parts.append("") if placeholders["dpo_email"]: parts.append(f"Datenschutzbeauftragte/r erreichbar unter: " f"{placeholders['dpo_email']}.") parts.append("") cat_order = ("necessary", "strictlynecessary", "functional", "statistics", "performance", "marketing", "advertising", "personalization") for cat in cat_order: rows = by_cat.get(cat) or [] if not rows: continue parts.append("") parts.append(f"## Kategorie: {_human_cat(cat)}") parts.append("") parts.append("| Anbieter | Zweck | Speicherdauer | Opt-Out |") parts.append("|----------|-------|---------------|---------|") for v in rows: name = (v.get("name") or "").replace("|", " ") purpose = (v.get("purpose") or "").replace("|", " ")[:140] persistence = ", ".join( c.get("expiry", "") for c in (v.get("cookies") or []) if c.get("expiry") )[:60] or "—" opt = v.get("opt_out_url") or "—" parts.append(f"| {name} | {purpose} | {persistence} | {opt} |") parts.append("") parts.append("Stand: automatisch generiert durch BreakPilot. " "Bitte vor Veroeffentlichung pruefen.") return { "templateType": "cookie_policy", "placeholderValues": placeholders, "initialContent": "\n".join(parts), "suggested_template_search": "Cookie-Richtlinie DSGVO TDDDG Deutsch", } def _human_cat(cat: str) -> str: return { "necessary": "Erforderlich (§25 Abs. 2 TDDDG)", "strictlynecessary": "Erforderlich (§25 Abs. 2 TDDDG)", "functional": "Funktional", "statistics": "Statistik / Analyse", "performance": "Statistik / Analyse", "marketing": "Marketing & Werbung", "advertising": "Marketing & Werbung", "personalization": "Personalisierung", }.get(cat, cat.capitalize()) # ── vvt_register ──────────────────────────────────────────────────── def _build_vvt_register(vendors: list[dict], placeholders: dict) -> dict: """Generate VVT-Eintraege als JSON, das der vvt-Modul direkt importieren kann. Schema lehnt sich an admin-compliance/lib/sdk/vvt-types.ts VVTActivity an. """ activities: list[dict] = [] for v in vendors: rtype = (v.get("recipient_type") or "OTHER").upper() recipient_type = ( "INTERNAL" if rtype == "INTERNAL" else "GROUP_COMPANY" if rtype == "GROUP_COMPANY" else "PROCESSOR" if rtype == "PROCESSOR" else "CONTROLLER" if rtype == "CONTROLLER" else "OTHER" ) activities.append({ "name": v.get("name") or "Unbekannte Verarbeitung", "description": v.get("purpose") or "", "purposes": [v.get("purpose")] if v.get("purpose") else [], "businessFunction": _guess_business_function(v), "recipientCategories": [{ "type": recipient_type, "name": v.get("name") or "", "isThirdCountry": bool(v.get("country")) and v.get("country") not in ("DE", "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR", "GR", "HU", "IE", "IT", "LV", "LT", "LU", "MT", "NL", "PL", "PT", "RO", "SK", "SI", "ES", "SE", "IS", "LI", "NO", "CH"), "country": v.get("country") or "", }], "retentionPeriod": { "description": _summarise_expiry(v.get("cookies") or []), }, "tomDescription": "Siehe TOM-Anlage (automatisch verlinken).", "sourceTemplateId": f"breakpilot-cookie-vendor-{v.get('name', '').lower()[:30]}", }) return { "templateType": "vvt_register", "placeholderValues": placeholders, "initialContent": "", "activities": activities, "suggested_template_search": "VVT Art. 30 DSGVO Verarbeitungsverzeichnis", } def _guess_business_function(v: dict) -> str: cat = (v.get("category") or "").lower() if cat in ("marketing", "advertising", "personalization"): return "marketing" if cat in ("statistics", "performance", "analytics"): return "marketing" if cat in ("necessary", "strictlynecessary"): return "it_operations" return "other" def _summarise_expiry(cookies: list[dict]) -> str: exps = sorted({(c.get("expiry") or "").strip() for c in cookies if c.get("expiry")}) if not exps: return "Speicherdauer pro Cookie pflegen." if len(exps) == 1: return exps[0] return ", ".join(exps[:5]) + (f" (+{len(exps) - 5} weitere)" if len(exps) > 5 else "") # ── privacy_policy (Drittanbieter-Block) ─────────────────────────── def _build_privacy_policy_section(vendors: list[dict], placeholders: dict) -> dict: """Generate the 'Drittanbieter' section for the privacy_policy template.""" by_rtype: dict[str, list[dict]] = {} for v in vendors: rtype = (v.get("recipient_type") or "OTHER").upper() by_rtype.setdefault(rtype, []).append(v) parts: list[str] = [ "## Empfaenger personenbezogener Daten", "", f"Im Rahmen unseres Webseiten-Betriebs uebermitteln wir " f"personenbezogene Daten an folgende Kategorien von Empfaengern " f"(Art. 13 Abs. 1 lit. e DSGVO):", "", ] order = ("INTERNAL", "GROUP_COMPANY", "PROCESSOR", "CONTROLLER", "AUTHORITY", "OTHER") for rtype in order: rows = by_rtype.get(rtype) or [] if not rows: continue parts.append(f"### {_RECIPIENT_TYPE_LABEL.get(rtype, rtype)}") for v in rows: name = v.get("name") or "" country = v.get("country") or "" country_str = f" ({country})" if country else "" line = f"- **{name}**{country_str}" if v.get("purpose"): line += f" — {v.get('purpose')[:140]}" if v.get("opt_out_url"): line += f" [Opt-Out]({v.get('opt_out_url')})" if v.get("privacy_policy_url"): line += f" [Datenschutz]({v.get('privacy_policy_url')})" parts.append(line) parts.append("") return { "templateType": "privacy_policy", "placeholderValues": placeholders, "initialContent": "\n".join(parts), "suggested_template_search": "Datenschutzerklaerung Art. 13 DSGVO Webseite", }