From df7d83134bf65946056fb025b78eda3f02cb2bff Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 17 May 2026 14:06:28 +0200 Subject: [PATCH] feat(agent): migrate compliance-check results to banner + documents (M1-M5) After a compliance-check run finishes, the user can now apply the extracted vendor inventory directly to their own: - CookieBanner config (admin /sdk/einwilligungen) - Cookie-Policy / VVT-Register / Privacy-Policy templates (admin /sdk/document-generator) Backend: - migration_to_banner.py: vendor list -> CookieBannerConfig with ESSENTIAL/PERFORMANCE/PERSONALIZATION/EXTERNAL_MEDIA buckets + review flags (broken opt-out URLs, missing expiry, no cookies listed) - migration_to_document.py: vendor list -> pre-fills for 3 doc templates, recipient-type aware (INTERNAL/GROUP/PROCESSOR/CONTROLLER) - agent_migration_routes.py: GET /banner-preview, /document-preview, /summary keyed on check_id - compliance_audit_log: new check_payloads table persists cmp_vendors + extracted_profile so the preview survives an app restart - tests: 9 mapper units + 4 endpoint integration tests Frontend: - MigrationPanel.tsx: modal showing banner-config diff + document pre-fills, plus links into the existing editors - ComplianceCheckTab.tsx: replaces standalone audit link with the panel; net -3 lines, stays at the 500-cap Co-Authored-By: Claude Opus 4.7 (1M context) --- .../[checkId]/banner-preview/route.ts | 25 ++ .../[checkId]/document-preview/route.ts | 24 ++ .../migration/[checkId]/summary/route.ts | 24 ++ .../agent/_components/ComplianceCheckTab.tsx | 24 +- .../sdk/agent/_components/MigrationPanel.tsx | 194 +++++++++++++ .../api/agent_compliance_check_routes.py | 6 + .../compliance/api/agent_migration_routes.py | 123 +++++++++ .../services/compliance_audit_log.py | 54 ++++ .../services/migration_to_banner.py | 232 ++++++++++++++++ .../services/migration_to_document.py | 260 ++++++++++++++++++ backend-compliance/main.py | 2 + .../tests/test_migration_endpoints.py | 116 ++++++++ .../tests/test_migration_mappers.py | 138 ++++++++++ 13 files changed, 1207 insertions(+), 15 deletions(-) create mode 100644 admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/banner-preview/route.ts create mode 100644 admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/document-preview/route.ts create mode 100644 admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/summary/route.ts create mode 100644 admin-compliance/app/sdk/agent/_components/MigrationPanel.tsx create mode 100644 backend-compliance/compliance/api/agent_migration_routes.py create mode 100644 backend-compliance/compliance/services/migration_to_banner.py create mode 100644 backend-compliance/compliance/services/migration_to_document.py create mode 100644 backend-compliance/tests/test_migration_endpoints.py create mode 100644 backend-compliance/tests/test_migration_mappers.py diff --git a/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/banner-preview/route.ts b/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/banner-preview/route.ts new file mode 100644 index 00000000..bd34fc02 --- /dev/null +++ b/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/banner-preview/route.ts @@ -0,0 +1,25 @@ +/** + * Proxy: GET /api/sdk/v1/agent/migration//banner-preview + * -> backend GET /api/compliance/agent/migration//banner-preview + */ +import { NextRequest, NextResponse } from 'next/server' + +const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002' + +export async function GET( + request: NextRequest, + { params }: { params: { checkId: string } }, +) { + const qs = request.nextUrl.searchParams.toString() + const url = `${BACKEND_URL}/api/compliance/agent/migration/${params.checkId}/banner-preview${qs ? `?${qs}` : ''}` + try { + const resp = await fetch(url, { signal: AbortSignal.timeout(15000) }) + const data = await resp.json() + return NextResponse.json(data, { status: resp.status }) + } catch { + return NextResponse.json( + { error: 'Banner-Preview fehlgeschlagen' }, + { status: 503 }, + ) + } +} diff --git a/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/document-preview/route.ts b/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/document-preview/route.ts new file mode 100644 index 00000000..ea64d5d7 --- /dev/null +++ b/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/document-preview/route.ts @@ -0,0 +1,24 @@ +/** + * Proxy: GET /api/sdk/v1/agent/migration//document-preview + * -> backend GET /api/compliance/agent/migration//document-preview + */ +import { NextRequest, NextResponse } from 'next/server' + +const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002' + +export async function GET( + _request: NextRequest, + { params }: { params: { checkId: string } }, +) { + const url = `${BACKEND_URL}/api/compliance/agent/migration/${params.checkId}/document-preview` + try { + const resp = await fetch(url, { signal: AbortSignal.timeout(15000) }) + const data = await resp.json() + return NextResponse.json(data, { status: resp.status }) + } catch { + return NextResponse.json( + { error: 'Dokument-Preview fehlgeschlagen' }, + { status: 503 }, + ) + } +} diff --git a/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/summary/route.ts b/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/summary/route.ts new file mode 100644 index 00000000..df1c6132 --- /dev/null +++ b/admin-compliance/app/api/sdk/v1/agent/migration/[checkId]/summary/route.ts @@ -0,0 +1,24 @@ +/** + * Proxy: GET /api/sdk/v1/agent/migration//summary + * -> backend GET /api/compliance/agent/migration//summary + */ +import { NextRequest, NextResponse } from 'next/server' + +const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002' + +export async function GET( + _request: NextRequest, + { params }: { params: { checkId: string } }, +) { + const url = `${BACKEND_URL}/api/compliance/agent/migration/${params.checkId}/summary` + try { + const resp = await fetch(url, { signal: AbortSignal.timeout(15000) }) + const data = await resp.json() + return NextResponse.json(data, { status: resp.status }) + } catch { + return NextResponse.json( + { error: 'Migrations-Summary fehlgeschlagen' }, + { status: 503 }, + ) + } +} diff --git a/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx b/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx index 4f67ab23..97c610e1 100644 --- a/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx +++ b/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx @@ -3,6 +3,7 @@ import React, { useState, useCallback } from 'react' import { ChecklistView } from './ChecklistView' import { DocumentRow } from './DocumentRow' +import { MigrationPanel } from './MigrationPanel' const DOCUMENT_TYPES = [ { id: 'dse', label: 'DSI (Datenschutzinformation)', required: true }, @@ -455,21 +456,14 @@ export function ComplianceCheckTab() { - {/* Email status + Full-audit link */} -
- {results.email_status && ( -
- - E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status} -
- )} - {results.check_id && ( - - Voll-Audit oeffnen (alle MCs) → - - )} -
+ {/* Email + Migration + Full-audit */} + {results.email_status && ( +
+ + E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status} +
+ )} + {results.check_id && } )} diff --git a/admin-compliance/app/sdk/agent/_components/MigrationPanel.tsx b/admin-compliance/app/sdk/agent/_components/MigrationPanel.tsx new file mode 100644 index 00000000..fdea9694 --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/MigrationPanel.tsx @@ -0,0 +1,194 @@ +'use client' + +import { useState } from 'react' + +interface BannerFlag { + level: 'ERROR' | 'WARNING' | 'INFO' + vendor: string + issue: string + message: string +} + +interface BannerPreview { + config: { categories: { id: string; cookies: { name: string }[] }[] } + flags: BannerFlag[] + summary: { + vendors_total: number + vendors_with_no_cookies: number + cookies_total: number + categories: Record + flags_error: number + flags_warning: number + flags_info: number + } +} + +interface DocumentPreview { + check_id: string + vendor_count: number + templates: Record +} + +type Mode = 'banner' | 'documents' + +export function MigrationPanel({ checkId }: { checkId: string }) { + const [open, setOpen] = useState(false) + const [mode, setMode] = useState('banner') + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const [banner, setBanner] = useState(null) + const [docs, setDocs] = useState(null) + + async function loadPreview(next: Mode) { + setMode(next) + setOpen(true) + setError(null) + setLoading(true) + try { + const path = next === 'banner' + ? `/api/sdk/v1/agent/migration/${checkId}/banner-preview` + : `/api/sdk/v1/agent/migration/${checkId}/document-preview` + const r = await fetch(path) + if (!r.ok) throw new Error(`HTTP ${r.status}`) + const data = await r.json() + if (next === 'banner') setBanner(data) + else setDocs(data) + } catch (e) { + setError(e instanceof Error ? e.message : 'Preview-Ladefehler') + } finally { + setLoading(false) + } + } + + return ( + <> +
+
+ + +
+ + Voll-Audit oeffnen (alle MCs) → + +
+ + {open && ( +
+
+
+

+ {mode === 'banner' ? 'Cookie-Banner Migration' : 'Dokument-Vorbefuellung'} +

+ +
+ + {loading &&
Lade Preview ...
} + {error &&
Fehler: {error}
} + + {!loading && !error && mode === 'banner' && banner && ( + + )} + + {!loading && !error && mode === 'documents' && docs && ( + + )} + +
+ + + Im Editor oeffnen + +
+
+
+ )} + + ) +} + +function BannerPreviewBody({ data }: { data: BannerPreview }) { + const { summary, flags, config } = data + return ( +
+
+ + + n > 0).length} /> +
+
+ + + +
+
+

Kategorien

+
    + {config.categories.map(c => ( +
  • {c.id}: {c.cookies.length} Cookie(s)
  • + ))} +
+
+ {flags.length > 0 && ( +
+

Pruefpunkte

+
    + {flags.map((f, i) => ( +
  • + [{f.level}] {f.vendor}: {f.message} +
  • + ))} +
+
+ )} +
+ ) +} + +function DocumentPreviewBody({ data }: { data: DocumentPreview }) { + return ( +
+
+ {data.vendor_count} Anbieter werden in {Object.keys(data.templates).length} Vorlagen eingespielt. +
+ {Object.entries(data.templates).map(([key, tpl]) => ( +
+
+

{tpl.templateType}

+ {tpl.suggested_template_search && ( + Vorschlag: {tpl.suggested_template_search} + )} +
+
+            {tpl.initialContent.slice(0, 1200)}{tpl.initialContent.length > 1200 ? '\n…' : ''}
+          
+
+ ))} +
+ ) +} + +function Stat({ label, value, tone = 'gray' }: { label: string; value: number; tone?: 'red' | 'amber' | 'gray' }) { + const color = tone === 'red' ? 'text-red-700' : tone === 'amber' ? 'text-amber-700' : 'text-gray-800' + return ( +
+
{value}
+
{label}
+
+ ) +} diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index 01dd00d7..d42d614c 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -551,6 +551,12 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): }, mc_records=audit_rows, ) + from compliance.services.compliance_audit_log import record_check_payload + record_check_payload( + check_id=check_id, + vendors=cmp_vendors, + profile=extracted_profile, + ) except Exception as e: logger.warning("Audit persistence skipped: %s", e) diff --git a/backend-compliance/compliance/api/agent_migration_routes.py b/backend-compliance/compliance/api/agent_migration_routes.py new file mode 100644 index 00000000..64df61a2 --- /dev/null +++ b/backend-compliance/compliance/api/agent_migration_routes.py @@ -0,0 +1,123 @@ +""" +Migration endpoints: Compliance-Check → Customer Banner / Documents. + +After a /compliance/agent/compliance-check run finishes, the user can +migrate the extracted CMP vendor list + extracted profile into: + - their CookieBanner config (admin-compliance /sdk/einwilligungen) + - their Document-Generator (Cookie-Policy / VVT / Privacy-Policy) + +These endpoints are read-only previews — the actual write to a tenant's +SDK state is initiated by the frontend with the existing save endpoints +(/sdk/cookie-banner, /sdk/document-generator). We only return the +ready-to-apply payload + flags for manual review. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from fastapi import APIRouter, HTTPException, Query + +from compliance.services.compliance_audit_log import ( + get_check_payload, get_check_run, +) +from compliance.services.migration_to_banner import build_banner_config +from compliance.services.migration_to_document import build_document_prefills + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/compliance/agent/migration", tags=["agent-migration"]) + + +def _load_check_context(check_id: str) -> tuple[list[dict], dict, dict]: + """Return (vendors, profile, run_meta) for a stored check_id.""" + # Prefer the in-memory job cache (richest data, before sidecar trim). + try: + from compliance.api.agent_compliance_check_routes import ( + _compliance_check_jobs, + ) + job = _compliance_check_jobs.get(check_id) + except Exception: + job = None + if job and (result := job.get("result")): + return ( + result.get("cmp_vendors") or [], + result.get("extracted_profile") or {}, + { + "site_name": result.get("business_profile", {}).get("siteName", ""), + "base_domain": result.get("business_profile", {}).get("baseUrl", ""), + }, + ) + + payload = get_check_payload(check_id) + if payload is None: + raise HTTPException(404, f"Unknown check_id '{check_id}'") + run = get_check_run(check_id) or {} + return ( + payload.get("vendors") or [], + payload.get("profile") or {}, + { + "site_name": run.get("site_name", ""), + "base_domain": run.get("base_domain", ""), + }, + ) + + +@router.get("/{check_id}/banner-preview") +async def preview_banner_migration( + check_id: str, + language: str = Query("de", pattern="^(de|en)$"), +) -> dict[str, Any]: + """Build a CookieBannerConfig from a finished compliance-check run. + + Returns: { config, flags, summary } — the frontend renders a diff + against the tenant's current banner and lets the user accept. + """ + vendors, _profile, meta = _load_check_context(check_id) + return build_banner_config( + vendors=vendors, + site_name=meta["site_name"] or meta["base_domain"], + privacy_policy_url="", + language=language, + ) + + +@router.get("/{check_id}/document-preview") +async def preview_document_migration(check_id: str) -> dict[str, Any]: + """Return pre-fills for cookie_policy / vvt_register / privacy_policy.""" + vendors, profile, meta = _load_check_context(check_id) + prefills = build_document_prefills( + vendors=vendors, + extracted_profile={"company_profile": profile} if profile else None, + site_name=meta["site_name"] or meta["base_domain"], + privacy_policy_url="", + ) + return { + "check_id": check_id, + "site_name": meta["site_name"], + "base_domain": meta["base_domain"], + "vendor_count": len(vendors), + "templates": prefills, + } + + +@router.get("/{check_id}/summary") +async def migration_summary(check_id: str) -> dict[str, Any]: + """High-level summary: how many vendors, how many cookies, how many issues.""" + vendors, profile, meta = _load_check_context(check_id) + banner = build_banner_config( + vendors=vendors, + site_name=meta["site_name"] or meta["base_domain"], + ) + return { + "check_id": check_id, + "site_name": meta["site_name"], + "base_domain": meta["base_domain"], + "company_name": (profile or {}).get("companyName", ""), + "vendor_count": len(vendors), + "banner_summary": banner.get("summary"), + "available_templates": [ + "cookie_policy", "vvt_register", "privacy_policy", + ], + } diff --git a/backend-compliance/compliance/services/compliance_audit_log.py b/backend-compliance/compliance/services/compliance_audit_log.py index 9ce74c02..b240039e 100644 --- a/backend-compliance/compliance/services/compliance_audit_log.py +++ b/backend-compliance/compliance/services/compliance_audit_log.py @@ -58,9 +58,63 @@ def _ensure_db() -> None: ); CREATE INDEX IF NOT EXISTS idx_mc_check ON mc_results(check_id); CREATE INDEX IF NOT EXISTS idx_mc_reg ON mc_results(regulation, passed); + + -- Migration-source payloads (cmp_vendors + extracted_profile), + -- kept as JSON blobs so the /migration/* endpoints can rebuild + -- a banner config or document pre-fill after the in-memory + -- _compliance_check_jobs entry is gone. + CREATE TABLE IF NOT EXISTS check_payloads ( + check_id TEXT PRIMARY KEY, + vendors TEXT, -- JSON list[dict] + profile TEXT -- JSON dict + ); """) +def record_check_payload( + check_id: str, + vendors: list[dict] | None, + profile: dict | None, +) -> None: + """Persist cmp_vendors + extracted_profile for later migration use.""" + try: + _ensure_db() + with sqlite3.connect(DB_PATH) as conn: + conn.execute( + "INSERT OR REPLACE INTO check_payloads " + "(check_id, vendors, profile) VALUES (?, ?, ?)", + ( + check_id, + json.dumps(vendors or [], ensure_ascii=False), + json.dumps(profile or {}, ensure_ascii=False), + ), + ) + conn.commit() + except Exception as e: + logger.warning("record_check_payload failed for %s: %s", check_id, e) + + +def get_check_payload(check_id: str) -> dict | None: + """Load cmp_vendors + extracted_profile for a previous check.""" + try: + _ensure_db() + with sqlite3.connect(DB_PATH) as conn: + conn.row_factory = sqlite3.Row + row = conn.execute( + "SELECT vendors, profile FROM check_payloads WHERE check_id=?", + (check_id,), + ).fetchone() + if not row: + return None + return { + "vendors": json.loads(row["vendors"] or "[]"), + "profile": json.loads(row["profile"] or "{}"), + } + except Exception as e: + logger.warning("get_check_payload failed: %s", e) + return None + + def record_check_run( check_id: str, tenant_id: str, diff --git a/backend-compliance/compliance/services/migration_to_banner.py b/backend-compliance/compliance/services/migration_to_banner.py new file mode 100644 index 00000000..132b1694 --- /dev/null +++ b/backend-compliance/compliance/services/migration_to_banner.py @@ -0,0 +1,232 @@ +""" +Migrate extracted vendor records -> CookieBannerConfig (admin-compliance +schema in einwilligungen/types/cookie-banner.ts). + +Input : list[VendorRecord] as produced by vendor_extractor + + vendor_classifier + cookie_link_validator +Output : dict matching CookieBannerConfig shape, ready for the + /sdk/cookie-banner module to import. + +The mapper also returns `flags[]` — items that need manual review +before going live (broken opt-out URL, missing expiry, etc.). +""" + +from __future__ import annotations + +import logging +from typing import Any + +logger = logging.getLogger(__name__) + +# ePaaS / OneTrust / etc. category -> CookieBannerCategory enum. +# CookieCategory has only 4 values, so we project marketing onto +# PERSONALIZATION and use EXTERNAL_MEDIA for embedded-content vendors. +_CATEGORY_MAP = { + "necessary": "ESSENTIAL", + "strictlynecessary": "ESSENTIAL", + "essential": "ESSENTIAL", + "functional": "ESSENTIAL", # Cookiebot conflates these + "statistics": "PERFORMANCE", + "analytics": "PERFORMANCE", + "performance": "PERFORMANCE", + "marketing": "PERSONALIZATION", + "advertising": "PERSONALIZATION", + "personalization": "PERSONALIZATION", +} + +# Vendor names that indicate embedded external content +_EXTERNAL_MEDIA_HINTS = ( + "youtube", "vimeo", "twitch", "google maps", "googlemaps", + "soundcloud", "spotify", +) + +_CATEGORY_LABELS = { + "ESSENTIAL": { + "de": "Erforderliche Cookies", + "en": "Essential Cookies", + "desc_de": "Diese Cookies sind fuer den Betrieb der Website " + "unbedingt erforderlich (§25 Abs. 2 TDDDG) und koennen " + "nicht deaktiviert werden.", + "desc_en": "These cookies are strictly necessary for the operation " + "of the website and cannot be disabled.", + }, + "PERFORMANCE": { + "de": "Analyse & Performance", + "en": "Analytics & Performance", + "desc_de": "Analyse-Cookies messen die Nutzung unserer Website, " + "um sie kontinuierlich zu verbessern.", + "desc_en": "Analytics cookies measure how visitors use our site so " + "we can improve it.", + }, + "PERSONALIZATION": { + "de": "Marketing & Personalisierung", + "en": "Marketing & Personalization", + "desc_de": "Diese Cookies dienen der personalisierten " + "Ansprache und werbebezogenen Auswertung.", + "desc_en": "These cookies support personalised content and " + "marketing measurement.", + }, + "EXTERNAL_MEDIA": { + "de": "Externe Medien", + "en": "External Media", + "desc_de": "Eingebettete Inhalte von Drittanbietern (z. B. " + "Videos, Karten, Audio) koennen Cookies setzen.", + "desc_en": "Embedded third-party media (videos, maps, audio) " + "may set cookies.", + }, +} + + +def map_category(vendor_category: str, vendor_name: str) -> str: + """Resolve a CMP category + vendor name to a CookieCategory enum value.""" + name_l = (vendor_name or "").lower() + if any(h in name_l for h in _EXTERNAL_MEDIA_HINTS): + return "EXTERNAL_MEDIA" + return _CATEGORY_MAP.get((vendor_category or "").lower(), "PERSONALIZATION") + + +def build_banner_config( + vendors: list[dict], + site_name: str = "", + privacy_policy_url: str = "", + language: str = "de", +) -> dict: + """Produce a CookieBannerConfig + flags from the extracted vendor list. + + `vendors` is the list emitted by vendor_extractor.extract_vendors_from_payloads + (+ score_vendors for the compliance_flags). We bucket them by canonical + CookieCategory and build a CookieInfo entry per persistence. + """ + by_cat: dict[str, list[dict]] = { + "ESSENTIAL": [], "PERFORMANCE": [], + "PERSONALIZATION": [], "EXTERNAL_MEDIA": [], + } + flags: list[dict] = [] + cookies_total = 0 + vendors_with_no_cookies = 0 + + for v in vendors or []: + cat = map_category(v.get("category", ""), v.get("name", "")) + provider = v.get("name") or "Unbekannt" + cookies = v.get("cookies") or [] + if not cookies: + vendors_with_no_cookies += 1 + flags.append({ + "level": "WARNING", + "vendor": provider, + "issue": "no_cookies_listed", + "message": ( + f"Anbieter '{provider}' wurde erfasst, " + "aber keine Cookies sind dokumentiert. Vor " + "Veroeffentlichung manuell ergaenzen." + ), + }) + continue + for c in cookies: + cname = (c.get("name") or "").strip() + if not cname: + continue + cookies_total += 1 + entry = { + "name": cname, + "provider": provider, + "purpose": {language: c.get("purpose") or v.get("purpose") or ""}, + "expiry": c.get("expiry") or "", + "type": ("THIRD_PARTY" + if c.get("is_third_party") else "FIRST_PARTY"), + } + by_cat[cat].append(entry) + if not c.get("expiry"): + flags.append({ + "level": "INFO", + "vendor": provider, + "issue": "cookie_no_expiry", + "message": ( + f"Cookie '{cname}' bei '{provider}' ohne " + "Speicherdauer — fuer DSK-Konformitaet ergaenzen." + ), + }) + + # Vendor-level link validation flags + if v.get("opt_out_url") and v.get("opt_out_ok") is False: + flags.append({ + "level": "ERROR", + "vendor": provider, + "issue": "broken_opt_out", + "message": ( + f"Opt-Out-Link von '{provider}' antwortet mit " + f"HTTP {v.get('opt_out_status')} — " + "Art. 7(3) DSGVO erfordert funktionierenden Widerruf." + ), + }) + + categories: list[dict] = [] + for cat_id in ("ESSENTIAL", "PERFORMANCE", "PERSONALIZATION", "EXTERNAL_MEDIA"): + cookies = by_cat[cat_id] + if not cookies and cat_id != "ESSENTIAL": + continue + meta = _CATEGORY_LABELS[cat_id] + categories.append({ + "id": cat_id, + "name": {"de": meta["de"], "en": meta["en"]}, + "description": {"de": meta["desc_de"], "en": meta["desc_en"]}, + "isRequired": cat_id == "ESSENTIAL", + "defaultEnabled": cat_id == "ESSENTIAL", + "dataPointIds": [], + "cookies": cookies, + }) + + config = { + "id": "", # filled by tenant on apply + "tenantId": "", + "categories": categories, + "styling": { + "position": "BOTTOM", + "theme": "LIGHT", + "primaryColor": "#2563eb", + "borderRadius": 8, + }, + "texts": { + "title": {"de": "Wir verwenden Cookies", + "en": "We use cookies"}, + "description": { + "de": (f"Auf {site_name or 'unserer Website'} setzen wir " + "Cookies und aehnliche Technologien ein, um die " + "Nutzererfahrung zu verbessern. Sie koennen Ihre " + "Auswahl jederzeit anpassen."), + "en": (f"On {site_name or 'this website'} we use cookies " + "and similar technologies. You can change your " + "selection at any time."), + }, + "acceptAll": {"de": "Alle akzeptieren", "en": "Accept all"}, + "rejectAll": {"de": "Alle ablehnen", "en": "Reject all"}, + "customize": {"de": "Auswahl anpassen", "en": "Customize"}, + "save": {"de": "Auswahl speichern", "en": "Save preferences"}, + "privacyPolicyLink": {"de": privacy_policy_url or "/datenschutz", + "en": privacy_policy_url or "/privacy"}, + }, + } + + summary = { + "vendors_total": len(vendors or []), + "vendors_with_no_cookies": vendors_with_no_cookies, + "cookies_total": cookies_total, + "categories": {cat_id: len(by_cat[cat_id]) for cat_id in by_cat}, + "flags_error": sum(1 for f in flags if f["level"] == "ERROR"), + "flags_warning": sum(1 for f in flags if f["level"] == "WARNING"), + "flags_info": sum(1 for f in flags if f["level"] == "INFO"), + } + + logger.info( + "Banner migration prepared: %d vendors -> %d cookies in %d " + "categories. Flags: %d ERROR, %d WARNING, %d INFO.", + summary["vendors_total"], summary["cookies_total"], + len(categories), summary["flags_error"], + summary["flags_warning"], summary["flags_info"], + ) + + return { + "config": config, + "flags": flags, + "summary": summary, + } diff --git a/backend-compliance/compliance/services/migration_to_document.py b/backend-compliance/compliance/services/migration_to_document.py new file mode 100644 index 00000000..3b4d4047 --- /dev/null +++ b/backend-compliance/compliance/services/migration_to_document.py @@ -0,0 +1,260 @@ +""" +Migrate extracted vendor records + scorecard -> Document-Generator +pre-fills. + +We can pre-fill several templateType candidates: + + - cookie_policy : compose a Cookie-Richtlinie text from the vendor + list (one section per category, table per vendor with name, purpose, + expiry, opt-out) + - vvt_register : populate VVT entries (one per vendor, with recipient + category, opt-out URL, etc.) + - privacy_policy: a 'Drittanbieter' section listing vendors as + recipients + transfer mechanism + +Output for each: {templateType, placeholderValues, initialContent, +suggested_template_search} that the frontend can drop into +DocumentGeneratorState. +""" + +from __future__ import annotations + +import logging + +logger = logging.getLogger(__name__) + + +_RECIPIENT_TYPE_LABEL = { + "INTERNAL": "Eigene Verarbeitung", + "GROUP_COMPANY": "Konzernunternehmen", + "PROCESSOR": "Auftragsverarbeiter", + "CONTROLLER": "Joint / unabhaengiger Verantwortlicher", + "AUTHORITY": "Behoerde", + "OTHER": "Sonstiger Empfaenger", +} + + +def build_document_prefills( + vendors: list[dict], + extracted_profile: dict | None = None, + site_name: str = "", + privacy_policy_url: str = "", +) -> dict: + """Generate pre-fills for cookie_policy + vvt_register + privacy_policy.""" + profile = (extracted_profile or {}).get("company_profile", {}) or {} + company_name = (profile.get("companyName") or site_name or "Unbekannt").strip() + address = ", ".join(filter(None, [ + profile.get("headquartersStreet"), + profile.get("headquartersZip"), + profile.get("headquartersCity"), + ])) + dpo_email = profile.get("dpoEmail") or "" + + placeholders_common = { + "company_name": company_name, + "company_address": address, + "dpo_email": dpo_email, + "privacy_policy_url": privacy_policy_url, + "site_name": site_name, + "vendor_count": str(len(vendors or [])), + } + + return { + "cookie_policy": _build_cookie_policy( + vendors or [], placeholders_common, + ), + "vvt_register": _build_vvt_register( + vendors or [], placeholders_common, + ), + "privacy_policy": _build_privacy_policy_section( + vendors or [], placeholders_common, + ), + } + + +# ── cookie_policy ─────────────────────────────────────────────────── + +def _build_cookie_policy(vendors: list[dict], placeholders: dict) -> dict: + by_cat: dict[str, list[dict]] = {} + for v in vendors: + cat = (v.get("category") or "marketing").lower() + by_cat.setdefault(cat, []).append(v) + + parts: list[str] = [ + "# Cookie-Richtlinie", + "", + f"Diese Cookie-Richtlinie informiert Sie ueber den Einsatz von Cookies " + f"und aehnlichen Technologien auf den Webseiten der " + f"**{placeholders['company_name']}**.", + "", + ] + if placeholders["company_address"]: + parts.append(f"Verantwortlich: {placeholders['company_name']}, " + f"{placeholders['company_address']}.") + parts.append("") + if placeholders["dpo_email"]: + parts.append(f"Datenschutzbeauftragte/r erreichbar unter: " + f"{placeholders['dpo_email']}.") + parts.append("") + + cat_order = ("necessary", "strictlynecessary", "functional", + "statistics", "performance", "marketing", "advertising", + "personalization") + for cat in cat_order: + rows = by_cat.get(cat) or [] + if not rows: + continue + parts.append("") + parts.append(f"## Kategorie: {_human_cat(cat)}") + parts.append("") + parts.append("| Anbieter | Zweck | Speicherdauer | Opt-Out |") + parts.append("|----------|-------|---------------|---------|") + for v in rows: + name = (v.get("name") or "").replace("|", " ") + purpose = (v.get("purpose") or "").replace("|", " ")[:140] + persistence = ", ".join( + c.get("expiry", "") for c in (v.get("cookies") or []) + if c.get("expiry") + )[:60] or "—" + opt = v.get("opt_out_url") or "—" + parts.append(f"| {name} | {purpose} | {persistence} | {opt} |") + parts.append("") + parts.append("Stand: automatisch generiert durch BreakPilot. " + "Bitte vor Veroeffentlichung pruefen.") + + return { + "templateType": "cookie_policy", + "placeholderValues": placeholders, + "initialContent": "\n".join(parts), + "suggested_template_search": "Cookie-Richtlinie DSGVO TDDDG Deutsch", + } + + +def _human_cat(cat: str) -> str: + return { + "necessary": "Erforderlich (§25 Abs. 2 TDDDG)", + "strictlynecessary": "Erforderlich (§25 Abs. 2 TDDDG)", + "functional": "Funktional", + "statistics": "Statistik / Analyse", + "performance": "Statistik / Analyse", + "marketing": "Marketing & Werbung", + "advertising": "Marketing & Werbung", + "personalization": "Personalisierung", + }.get(cat, cat.capitalize()) + + +# ── vvt_register ──────────────────────────────────────────────────── + +def _build_vvt_register(vendors: list[dict], placeholders: dict) -> dict: + """Generate VVT-Eintraege als JSON, das der vvt-Modul direkt importieren kann. + + Schema lehnt sich an admin-compliance/lib/sdk/vvt-types.ts VVTActivity an. + """ + activities: list[dict] = [] + for v in vendors: + rtype = (v.get("recipient_type") or "OTHER").upper() + recipient_type = ( + "INTERNAL" if rtype == "INTERNAL" + else "GROUP_COMPANY" if rtype == "GROUP_COMPANY" + else "PROCESSOR" if rtype == "PROCESSOR" + else "CONTROLLER" if rtype == "CONTROLLER" + else "OTHER" + ) + activities.append({ + "name": v.get("name") or "Unbekannte Verarbeitung", + "description": v.get("purpose") or "", + "purposes": [v.get("purpose")] if v.get("purpose") else [], + "businessFunction": _guess_business_function(v), + "recipientCategories": [{ + "type": recipient_type, + "name": v.get("name") or "", + "isThirdCountry": bool(v.get("country")) and + v.get("country") not in + ("DE", "AT", "BE", "BG", "HR", "CY", "CZ", + "DK", "EE", "FI", "FR", "GR", "HU", "IE", + "IT", "LV", "LT", "LU", "MT", "NL", "PL", + "PT", "RO", "SK", "SI", "ES", "SE", "IS", + "LI", "NO", "CH"), + "country": v.get("country") or "", + }], + "retentionPeriod": { + "description": _summarise_expiry(v.get("cookies") or []), + }, + "tomDescription": "Siehe TOM-Anlage (automatisch verlinken).", + "sourceTemplateId": f"breakpilot-cookie-vendor-{v.get('name', '').lower()[:30]}", + }) + + return { + "templateType": "vvt_register", + "placeholderValues": placeholders, + "initialContent": "", + "activities": activities, + "suggested_template_search": "VVT Art. 30 DSGVO Verarbeitungsverzeichnis", + } + + +def _guess_business_function(v: dict) -> str: + cat = (v.get("category") or "").lower() + if cat in ("marketing", "advertising", "personalization"): + return "marketing" + if cat in ("statistics", "performance", "analytics"): + return "marketing" + if cat in ("necessary", "strictlynecessary"): + return "it_operations" + return "other" + + +def _summarise_expiry(cookies: list[dict]) -> str: + exps = sorted({(c.get("expiry") or "").strip() + for c in cookies if c.get("expiry")}) + if not exps: + return "Speicherdauer pro Cookie pflegen." + if len(exps) == 1: + return exps[0] + return ", ".join(exps[:5]) + (f" (+{len(exps) - 5} weitere)" if len(exps) > 5 else "") + + +# ── privacy_policy (Drittanbieter-Block) ─────────────────────────── + +def _build_privacy_policy_section(vendors: list[dict], placeholders: dict) -> dict: + """Generate the 'Drittanbieter' section for the privacy_policy template.""" + by_rtype: dict[str, list[dict]] = {} + for v in vendors: + rtype = (v.get("recipient_type") or "OTHER").upper() + by_rtype.setdefault(rtype, []).append(v) + + parts: list[str] = [ + "## Empfaenger personenbezogener Daten", + "", + f"Im Rahmen unseres Webseiten-Betriebs uebermitteln wir " + f"personenbezogene Daten an folgende Kategorien von Empfaengern " + f"(Art. 13 Abs. 1 lit. e DSGVO):", + "", + ] + order = ("INTERNAL", "GROUP_COMPANY", "PROCESSOR", "CONTROLLER", + "AUTHORITY", "OTHER") + for rtype in order: + rows = by_rtype.get(rtype) or [] + if not rows: + continue + parts.append(f"### {_RECIPIENT_TYPE_LABEL.get(rtype, rtype)}") + for v in rows: + name = v.get("name") or "" + country = v.get("country") or "" + country_str = f" ({country})" if country else "" + line = f"- **{name}**{country_str}" + if v.get("purpose"): + line += f" — {v.get('purpose')[:140]}" + if v.get("opt_out_url"): + line += f" [Opt-Out]({v.get('opt_out_url')})" + if v.get("privacy_policy_url"): + line += f" [Datenschutz]({v.get('privacy_policy_url')})" + parts.append(line) + parts.append("") + + return { + "templateType": "privacy_policy", + "placeholderValues": placeholders, + "initialContent": "\n".join(parts), + "suggested_template_search": "Datenschutzerklaerung Art. 13 DSGVO Webseite", + } diff --git a/backend-compliance/main.py b/backend-compliance/main.py index 521d7745..30957460 100644 --- a/backend-compliance/main.py +++ b/backend-compliance/main.py @@ -50,6 +50,7 @@ from compliance.api.agent_recurring_routes import router as agent_recurring_rout from compliance.api.agent_compare_routes import router as agent_compare_router from compliance.api.agent_doc_check_routes import router as agent_doc_check_router from compliance.api.agent_compliance_check_routes import router as agent_compliance_check_router +from compliance.api.agent_migration_routes import router as agent_migration_router from compliance.api.vendor_assessment_routes import router as vendor_assessment_router # Middleware @@ -155,6 +156,7 @@ app.include_router(agent_recurring_router, prefix="/api") app.include_router(agent_compare_router, prefix="/api") app.include_router(agent_doc_check_router, prefix="/api") app.include_router(agent_compliance_check_router, prefix="/api") +app.include_router(agent_migration_router, prefix="/api") # Vendor Contract Assessment app.include_router(vendor_assessment_router, prefix="/api") diff --git a/backend-compliance/tests/test_migration_endpoints.py b/backend-compliance/tests/test_migration_endpoints.py new file mode 100644 index 00000000..7b45c3b3 --- /dev/null +++ b/backend-compliance/tests/test_migration_endpoints.py @@ -0,0 +1,116 @@ +""" +Integration test for the /compliance/agent/migration/* endpoints. + +Simulates a finished compliance-check run by persisting cmp_vendors + +extracted_profile via the sidecar audit log, then exercises the FastAPI +TestClient against banner-preview / document-preview / summary. + +This is the M5 BMW-scenario in miniature: realistic ePaaS-shaped vendor +records (BMW INTERNAL + 2 third-party PROCESSOR) feed through to a +ready-to-apply banner config and pre-filled documents. +""" + +from __future__ import annotations + +import os +import tempfile + +import pytest +from fastapi.testclient import TestClient + + +@pytest.fixture() +def app_client(monkeypatch): + # Isolate the sidecar SQLite so this test never races with /data prod DB + tmp = tempfile.mkdtemp() + monkeypatch.setenv("COMPLIANCE_AUDIT_DB", os.path.join(tmp, "audit.db")) + # Build a minimal app — avoid importing the full main.py which pulls in + # smtp_sender / weasyprint / pydantic-v1 modules not relevant here. + from fastapi import FastAPI + from compliance.api.agent_migration_routes import router + app = FastAPI() + app.include_router(router, prefix="/api") + return TestClient(app) + + +@pytest.fixture() +def seeded_check_id(): + """Persist a fake compliance check so the migration routes have data.""" + from compliance.services.compliance_audit_log import record_check_payload, record_check_run + cid = "bmw-test-check-001" + vendors = [ + { + "name": "BMW AG", "category": "necessary", + "recipient_type": "INTERNAL", + "purpose": "Grundfunktionen + Login", + "cookies": [{"name": "JSESSIONID", "expiry": "Session"}], + }, + { + "name": "Adobe Analytics", "category": "statistics", + "recipient_type": "PROCESSOR", "country": "US", + "purpose": "Reichweitenmessung", + "opt_out_url": "https://adobe.com/opt-out", "opt_out_ok": True, + "privacy_policy_url": "https://adobe.com/privacy", + "cookies": [{"name": "s_cc", "expiry": "1 Tag", + "is_third_party": True}], + }, + { + "name": "YouTube", "category": "marketing", + "recipient_type": "PROCESSOR", "country": "US", + "purpose": "Videos", + "cookies": [{"name": "VISITOR_INFO1_LIVE", "expiry": "6 Monate", + "is_third_party": True}], + }, + ] + record_check_run( + check_id=cid, tenant_id="t1", site_name="bmw.de", + base_domain="bmw.de", doc_count=4, + scorecard={"totals": {"pct": 75, "passed": 30, "failed": 10, + "total": 40, "skipped": 0}}, + ) + record_check_payload(check_id=cid, vendors=vendors, + profile={"companyName": "BMW AG", + "headquartersStreet": "Petuelring 130", + "headquartersZip": "80809", + "headquartersCity": "Muenchen", + "dpoEmail": "datenschutz@bmw.de"}) + return cid + + +def test_banner_preview_returns_valid_config(app_client, seeded_check_id): + r = app_client.get(f"/api/compliance/agent/migration/{seeded_check_id}/banner-preview") + assert r.status_code == 200, r.text + body = r.json() + assert body["summary"]["vendors_total"] == 3 + cat_ids = {c["id"] for c in body["config"]["categories"]} + assert "ESSENTIAL" in cat_ids + assert "EXTERNAL_MEDIA" in cat_ids # YouTube + # BMW AG (INTERNAL, cookies present) should not raise any flags + assert not any(f.get("vendor") == "BMW AG" for f in body["flags"]) + + +def test_document_preview_includes_all_three_templates(app_client, seeded_check_id): + r = app_client.get(f"/api/compliance/agent/migration/{seeded_check_id}/document-preview") + assert r.status_code == 200, r.text + body = r.json() + assert body["vendor_count"] == 3 + assert set(body["templates"].keys()) == { + "cookie_policy", "vvt_register", "privacy_policy", + } + assert "BMW AG" in body["templates"]["cookie_policy"]["initialContent"] + assert "Petuelring" in body["templates"]["cookie_policy"]["initialContent"] + + +def test_summary_returns_overview(app_client, seeded_check_id): + r = app_client.get(f"/api/compliance/agent/migration/{seeded_check_id}/summary") + assert r.status_code == 200, r.text + body = r.json() + assert body["company_name"] == "BMW AG" + assert body["vendor_count"] == 3 + assert body["site_name"] == "bmw.de" + assert "cookie_policy" in body["available_templates"] + + +def test_unknown_check_id_returns_404(app_client): + r = app_client.get("/api/compliance/agent/migration/nope-not-there/banner-preview") + assert r.status_code == 404 diff --git a/backend-compliance/tests/test_migration_mappers.py b/backend-compliance/tests/test_migration_mappers.py new file mode 100644 index 00000000..4d42b786 --- /dev/null +++ b/backend-compliance/tests/test_migration_mappers.py @@ -0,0 +1,138 @@ +""" +Unit tests for vendor → CookieBannerConfig and vendor → Document pre-fill +mappers (M1 + M2 of the customer-banner migration feature). +""" + +from __future__ import annotations + +import pytest + +from compliance.services.migration_to_banner import ( + build_banner_config, map_category, +) +from compliance.services.migration_to_document import build_document_prefills + + +@pytest.fixture() +def sample_vendors() -> list[dict]: + return [ + { + "name": "BMW AG", + "category": "necessary", + "recipient_type": "INTERNAL", + "purpose": "Sicherstellung der Grundfunktionen", + "cookies": [{"name": "JSESSIONID", "expiry": "Session"}], + }, + { + "name": "Google Analytics", + "category": "statistics", + "recipient_type": "PROCESSOR", + "purpose": "Reichweitenmessung", + "country": "US", + "opt_out_url": "https://tools.google.com/dlpage/gaoptout", + "opt_out_ok": True, + "privacy_policy_url": "https://policies.google.com/privacy", + "cookies": [ + {"name": "_ga", "expiry": "2 Jahre", "is_third_party": True}, + {"name": "_gid", "expiry": "1 Tag", "is_third_party": True}, + ], + }, + { + "name": "YouTube", + "category": "marketing", + "recipient_type": "PROCESSOR", + "purpose": "Eingebettete Videos", + "cookies": [], + }, + { + "name": "Broken Pixel", + "category": "marketing", + "recipient_type": "PROCESSOR", + "purpose": "Werbung", + "opt_out_url": "https://example.com/optout", + "opt_out_ok": False, + "opt_out_status": 404, + "cookies": [{"name": "_pix", "expiry": ""}], + }, + ] + + +def test_map_category_youtube_routes_to_external_media(): + assert map_category("marketing", "YouTube Player") == "EXTERNAL_MEDIA" + + +def test_map_category_unknown_falls_back_to_personalization(): + assert map_category("weird-cat", "Some Vendor") == "PERSONALIZATION" + + +def test_map_category_necessary_is_essential(): + assert map_category("necessary", "JSESSIONID") == "ESSENTIAL" + + +def test_build_banner_config_buckets_categories(sample_vendors): + out = build_banner_config(sample_vendors, site_name="bmw.de") + cats = {c["id"]: c for c in out["config"]["categories"]} + assert "ESSENTIAL" in cats + assert "PERFORMANCE" in cats + assert any(c["name"] == "_ga" for c in cats["PERFORMANCE"]["cookies"]) + # YouTube vendor had no cookies → it should not pollute EXTERNAL_MEDIA + # but should produce a WARNING flag for the missing list + assert any(f["vendor"] == "YouTube" and f["issue"] == "no_cookies_listed" + for f in out["flags"]) + + +def test_build_banner_config_flags_broken_opt_out(sample_vendors): + out = build_banner_config(sample_vendors, site_name="bmw.de") + errors = [f for f in out["flags"] if f["level"] == "ERROR"] + assert any(f["issue"] == "broken_opt_out" and f["vendor"] == "Broken Pixel" + for f in errors) + + +def test_build_banner_config_summary_counts(sample_vendors): + out = build_banner_config(sample_vendors, site_name="bmw.de") + s = out["summary"] + assert s["vendors_total"] == 4 + assert s["vendors_with_no_cookies"] == 1 + assert s["cookies_total"] == 4 # JSESSIONID + _ga + _gid + _pix + + +def test_build_document_prefills_emits_all_three_templates(sample_vendors): + out = build_document_prefills( + sample_vendors, + extracted_profile={ + "company_profile": { + "companyName": "BMW AG", + "headquartersStreet": "Petuelring 130", + "headquartersZip": "80809", + "headquartersCity": "Muenchen", + "dpoEmail": "datenschutz@bmw.de", + }, + }, + site_name="bmw.de", + ) + assert set(out.keys()) == {"cookie_policy", "vvt_register", "privacy_policy"} + cp = out["cookie_policy"] + assert cp["templateType"] == "cookie_policy" + assert "BMW AG" in cp["initialContent"] + assert "Google Analytics" in cp["initialContent"] + assert "Petuelring 130" in cp["initialContent"] + + +def test_vvt_register_marks_third_country_for_us_processor(sample_vendors): + out = build_document_prefills(sample_vendors, site_name="bmw.de") + acts = out["vvt_register"]["activities"] + ga = next(a for a in acts if a["name"] == "Google Analytics") + rcat = ga["recipientCategories"][0] + assert rcat["type"] == "PROCESSOR" + assert rcat["country"] == "US" + assert rcat["isThirdCountry"] is True + + +def test_privacy_policy_section_groups_by_recipient_type(sample_vendors): + out = build_document_prefills(sample_vendors, site_name="bmw.de") + body = out["privacy_policy"]["initialContent"] + assert "Eigene Verarbeitung" in body + assert "Auftragsverarbeiter" in body + # BMW AG (INTERNAL) must appear under Eigene, not under Auftragsverarbeiter + internal_block = body.split("### Auftragsverarbeiter")[0] + assert "BMW AG" in internal_block