From d18ef79f18e9d6ce306cc411ae0e692feb2ca6d6 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 11 Jun 2026 08:18:25 +0200 Subject: [PATCH] feat(cookie): Pro-Cookie-Library-Abgleich (2287er OCD + 35er rich) + Panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - analyze_cookies gleicht Cookies gegen BEIDE Libraries ab: compliance.cookie_library (2287, OCD/CC0 — Kategorie/Retention) + 35er rich-DB (technical_necessity/reid/ schrems/eu_alternative). 5 Befund-Typen: tracker_as_necessary, missing_purpose, excessive_lifetime (Art.5), third_country (Art.44), eu_alternative (kommerziell). - Endpoint GET /snapshots/{id}/cookie-check (load_big_library batch + analyze). - Frontend CookieLibraryPanel im Snapshot-Detail. - Fix CookieResultView: Zweck nicht mehr auf 60 Zeichen gekuerzt; Rolle 'unknown' als Strich statt 'Unbekannt'. Tests: 7 backend + frontend vitest gruen. Co-Authored-By: Claude Opus 4.7 --- .../[snapshotId]/cookie-check/route.ts | 33 +++ .../agent/_components/CookieLibraryPanel.tsx | 97 +++++++++ .../agent/_components/CookieResultView.tsx | 18 +- .../__tests__/CookieLibraryPanel.test.tsx | 29 +++ .../sdk/agent/snapshots/[snapshotId]/page.tsx | 6 +- .../api/agent_compliance_check_routes.py | 23 +++ .../services/cookie_library_check.py | 191 ++++++++++++++++++ .../tests/test_cookie_library_check.py | 95 +++++++++ 8 files changed, 486 insertions(+), 6 deletions(-) create mode 100644 admin-compliance/app/api/sdk/v1/agent/snapshots/[snapshotId]/cookie-check/route.ts create mode 100644 admin-compliance/app/sdk/agent/_components/CookieLibraryPanel.tsx create mode 100644 admin-compliance/app/sdk/agent/_components/__tests__/CookieLibraryPanel.test.tsx create mode 100644 backend-compliance/compliance/services/cookie_library_check.py create mode 100644 backend-compliance/compliance/tests/test_cookie_library_check.py diff --git a/admin-compliance/app/api/sdk/v1/agent/snapshots/[snapshotId]/cookie-check/route.ts b/admin-compliance/app/api/sdk/v1/agent/snapshots/[snapshotId]/cookie-check/route.ts new file mode 100644 index 00000000..57ce6adb --- /dev/null +++ b/admin-compliance/app/api/sdk/v1/agent/snapshots/[snapshotId]/cookie-check/route.ts @@ -0,0 +1,33 @@ +/** + * Cookie-Library-Abgleich-Proxy + * GET /api/sdk/v1/agent/snapshots/{snapshotId}/cookie-check + * → backend /api/compliance/agent/snapshots/{snapshotId}/cookie-check + * + * Pro-Cookie-Abgleich gegen die cookie_knowledge_db (deklariert vs. echt). + */ + +import { NextRequest, NextResponse } from 'next/server' + +const BACKEND_URL = + process.env.BACKEND_API_URL || process.env.BACKEND_URL || + 'http://backend-compliance:8002' + +export async function GET( + _request: NextRequest, + { params }: { params: Promise<{ snapshotId: string }> }, +) { + const { snapshotId } = await params + try { + const response = await fetch( + `${BACKEND_URL}/api/compliance/agent/snapshots/${snapshotId}/cookie-check`, + { signal: AbortSignal.timeout(60_000) }, + ) + const data = await response.json() + return NextResponse.json(data, { status: response.status }) + } catch { + return NextResponse.json( + { error: 'Cookie-Library-Abgleich fehlgeschlagen', findings: [] }, + { status: 503 }, + ) + } +} diff --git a/admin-compliance/app/sdk/agent/_components/CookieLibraryPanel.tsx b/admin-compliance/app/sdk/agent/_components/CookieLibraryPanel.tsx new file mode 100644 index 00000000..e9d709b6 --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/CookieLibraryPanel.tsx @@ -0,0 +1,97 @@ +'use client' + +/** + * CookieLibraryPanel — Pro-Cookie-Abgleich gegen die Knowledge-Library: + * findet als „notwendig" deklarierte Tracker + fehlende Zwecke und zeigt je + * Befund die Abstellmaßnahme. Lädt aus dem Snapshot (kein Re-Crawl). + */ + +import React, { useEffect, useState } from 'react' + +export interface CookieFinding { + vendor: string + cookie: string + type: string + severity: string + declared: string + library_purpose: string + remediation: string +} + +interface CheckData { + summary?: { checked?: number; in_library?: number; findings?: number } + findings?: CookieFinding[] +} + +const SEV_COLOR: Record = { + HIGH: 'bg-red-100 text-red-700', + MEDIUM: 'bg-amber-100 text-amber-700', + LOW: 'bg-blue-100 text-blue-700', +} +const TYPE_LABEL: Record = { + tracker_as_necessary: 'Tracker als „notwendig" deklariert', + missing_purpose: 'Zweck fehlt', + excessive_lifetime: 'Speicherdauer zu lang', + third_country: 'Drittland-Transfer', + eu_alternative: 'EU-Alternative verfügbar', +} + +// Pure, testbar. +export function CookieFindingList({ data }: { data: CheckData }) { + const findings = data.findings || [] + const s = data.summary || {} + return ( +
+
+ Library-Abgleich — {findings.length} Befund{findings.length !== 1 ? 'e' : ''} + + {s.in_library ?? 0}/{s.checked ?? 0} Cookies in der Library erkannt + +
+ {findings.length === 0 ? ( +
+ Keine Abweichungen gegen die Library. +
+ ) : ( +
+ {findings.map((f, i) => ( +
+
+ + {f.severity} + + {f.cookie} + · {f.vendor} + + {TYPE_LABEL[f.type] || f.type} + +
+ {f.library_purpose && ( +
Library-Zweck: {f.library_purpose}
+ )} +
{f.remediation}
+
+ ))} +
+ )} +
+ ) +} + +export function CookieLibraryPanel({ snapshotId }: { snapshotId: string }) { + const [data, setData] = useState(null) + const [loading, setLoading] = useState(true) + + useEffect(() => { + let cancelled = false + fetch(`/api/sdk/v1/agent/snapshots/${snapshotId}/cookie-check`) + .then(r => r.json()) + .then(d => { if (!cancelled) setData(d) }) + .catch(() => { if (!cancelled) setData({ findings: [] }) }) + .finally(() => { if (!cancelled) setLoading(false) }) + return () => { cancelled = true } + }, [snapshotId]) + + if (loading) return
Library-Abgleich läuft…
+ return +} diff --git a/admin-compliance/app/sdk/agent/_components/CookieResultView.tsx b/admin-compliance/app/sdk/agent/_components/CookieResultView.tsx index f2fcbf67..9c673d9c 100644 --- a/admin-compliance/app/sdk/agent/_components/CookieResultView.tsx +++ b/admin-compliance/app/sdk/agent/_components/CookieResultView.tsx @@ -112,11 +112,19 @@ function VendorRow({ v }: { v: SnapshotVendor }) { {cookies.map((c, i) => ( - - {c.name} - {ROLE_LABEL[c.functional_role || 'unknown'] || c.functional_role} - {c.purpose ? c.purpose.slice(0, 60) : kein Zweck} - {c.expiry || '—'} + + {c.name} + + {c.functional_role && c.functional_role !== 'unknown' + ? (ROLE_LABEL[c.functional_role] || c.functional_role) + : } + + + {c.purpose + ? c.purpose + : kein Zweck} + + {c.expiry || '—'} ))} diff --git a/admin-compliance/app/sdk/agent/_components/__tests__/CookieLibraryPanel.test.tsx b/admin-compliance/app/sdk/agent/_components/__tests__/CookieLibraryPanel.test.tsx new file mode 100644 index 00000000..42e6631a --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/__tests__/CookieLibraryPanel.test.tsx @@ -0,0 +1,29 @@ +import { describe, it, expect } from 'vitest' +import { render, screen } from '@testing-library/react' + +import { CookieFindingList } from '../CookieLibraryPanel' + +describe('CookieFindingList', () => { + it('zeigt Befunde mit Severity, Library-Zweck + Maßnahme', () => { + const data = { + summary: { checked: 10, in_library: 4, findings: 1 }, + findings: [{ + vendor: 'Salesforce', cookie: '_ga', type: 'tracker_as_necessary', + severity: 'HIGH', declared: 'necessary', + library_purpose: 'Besucher eindeutig unterscheiden', + remediation: 'Als einwilligungspflichtig (§ 25 TDDDG) einstufen.', + }], + } + render() + expect(screen.getByText(/1 Befund/)).toBeInTheDocument() + expect(screen.getByText('_ga')).toBeInTheDocument() + expect(screen.getByText('HIGH')).toBeInTheDocument() + expect(screen.getByText(/§ 25 TDDDG/)).toBeInTheDocument() + expect(screen.getByText(/4\/10 Cookies/)).toBeInTheDocument() + }) + + it('zeigt grünen Hinweis bei 0 Befunden', () => { + render() + expect(screen.getByText(/Keine Abweichungen/)).toBeInTheDocument() + }) +}) diff --git a/admin-compliance/app/sdk/agent/snapshots/[snapshotId]/page.tsx b/admin-compliance/app/sdk/agent/snapshots/[snapshotId]/page.tsx index f48fdca9..999689ef 100644 --- a/admin-compliance/app/sdk/agent/snapshots/[snapshotId]/page.tsx +++ b/admin-compliance/app/sdk/agent/snapshots/[snapshotId]/page.tsx @@ -9,6 +9,7 @@ import React, { use as useUnwrap, useEffect, useState } from 'react' import Link from 'next/link' +import { CookieLibraryPanel } from '../../_components/CookieLibraryPanel' import { CookieResultView } from '../../_components/CookieResultView' export default function SnapshotDetail( @@ -45,7 +46,10 @@ export default function SnapshotDetail( ) : error || !snap ? (
Snapshot nicht gefunden.
) : hasCookies ? ( - + <> + + + ) : (
Dieser Snapshot enthält keine Cookie-/Vendor-Daten. diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index 6cdeacf5..19deb426 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -223,6 +223,29 @@ async def get_snapshot(snapshot_id: str): db.close() +@router.get("/snapshots/{snapshot_id}/cookie-check") +async def snapshot_cookie_check(snapshot_id: str): + """Pro-Cookie-Abgleich der Snapshot-Vendors gegen cookie_knowledge_db.""" + from fastapi import HTTPException + from database import SessionLocal + from compliance.services.check_snapshot import load_snapshot + from compliance.services.cookie_library_check import ( + analyze_cookies, load_big_library, + ) + db = SessionLocal() + try: + snap = load_snapshot(db, snapshot_id) + if not snap: + raise HTTPException(status_code=404, detail="snapshot not found") + vendors = snap.get("cmp_vendors") or [] + names = [c.get("name", "") + for v in vendors for c in (v.get("cookies") or [])] + big = load_big_library(db, names) + return analyze_cookies(vendors, big) + finally: + db.close() + + @router.get("/admin/benchmark") async def benchmark( industry: str = "", diff --git a/backend-compliance/compliance/services/cookie_library_check.py b/backend-compliance/compliance/services/cookie_library_check.py new file mode 100644 index 00000000..157d3796 --- /dev/null +++ b/backend-compliance/compliance/services/cookie_library_check.py @@ -0,0 +1,191 @@ +"""Pro-Cookie-Abgleich gegen die Cookie-Knowledge-Library. + +Vergleicht die DEKLARIERTEN Angaben aus dem CMP/Snapshot (Kategorie, Zweck, +Laufzeit) mit dem, was unsere Library (`cookie_knowledge_db`) über den Cookie +weiß — und leitet pro Befund eine Abstellmaßnahme ab. + +Befund-Typen: + tracker_as_necessary — als notwendig deklariert, laut Library kein techn. Zweck + missing_purpose — kein Zweck deklariert, Library kennt ihn + excessive_lifetime — deklarierte Speicherdauer >> typische (Art. 5(1)(e)) + third_country — Drittland-Transfer (Schrems II, Art. 44 ff.) [je Vendor] + eu_alternative — EU-Ersatz verfügbar (kommerziell) [je Vendor] +""" + +from __future__ import annotations + +import re + +from sqlalchemy import text + +from compliance.services.cookie_knowledge_db import lookup_cookie + +_TRACKER_CATS = {"marketing", "statistics", "social_media", "targeting"} + + +def load_big_library(db, names: list[str]) -> dict: + """Batch-Lookup der grossen Open-Cookie-Database (compliance.cookie_library, + ~2287 Cookies) fuer die gegebenen Namen. Breite Abdeckung: Kategorie, + Retention, Vendor.""" + uniq = sorted({(n or "").lower() for n in names if n}) + if not uniq: + return {} + rows = db.execute( + text( + "SELECT lower(cookie_name) AS n, actual_category, " + "typical_max_age_seconds, vendor_name, purpose_de, purpose_en, " + "is_pii FROM compliance.cookie_library " + "WHERE lower(cookie_name) = ANY(:names)" + ), + {"names": uniq}, + ).mappings().fetchall() + return {r["n"]: dict(r) for r in rows} + +_NECESSARY_CATS = { + "necessary", "notwendig", "essential", "essenziell", + "funktional", "functional", +} +_EEA = { + "DE", "FR", "IE", "NL", "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", + "FI", "GR", "HU", "IT", "LV", "LT", "LU", "MT", "PL", "PT", "RO", "SK", + "SI", "ES", "SE", "IS", "LI", "NO", +} +_SEV_ORDER = {"HIGH": 0, "MEDIUM": 1, "LOW": 2} + + +def _duration_days(s: str) -> int: + """Grobe Normalisierung einer Laufzeit-Angabe in Tage (0 = Session).""" + s = (s or "").lower() + if not s or "session" in s: + return 0 + m = re.search(r"(\d+)", s) + n = int(m.group(1)) if m else 0 + if "jahr" in s or "year" in s: + return n * 365 + if "monat" in s or "month" in s: + return n * 30 + if "woche" in s or "week" in s: + return n * 7 + if "tag" in s or "day" in s: + return n + if "stunde" in s or "hour" in s: + return 1 + return n + + +def analyze_cookies(vendors: list[dict], big_lib: dict | None = None) -> dict: + """Gleiche alle Cookies gegen BEIDE Libraries ab: die 2287er Open-Cookie-DB + (`big_lib`, breite Abdeckung: Kategorie/Retention) + die 35er rich-DB + (`lookup_cookie`, tiefe Rechtsfelder).""" + big_lib = big_lib or {} + findings: list[dict] = [] + checked = 0 + in_library = 0 + seen_third: set[str] = set() + seen_alt: set[str] = set() + + for v in vendors or []: + vcat = (v.get("category") or "").lower() + vcat_label = v.get("category") or "—" + vname = v.get("name") or "?" + for c in v.get("cookies") or []: + checked += 1 + name = c.get("name", "") + rich = lookup_cookie(name) or {} + big = big_lib.get(name.lower(), {}) + if not rich and not big: + continue + in_library += 1 + necessity = rich.get("technical_necessity", "") + actual_cat = (big.get("actual_category") or "").lower() + purpose = (rich.get("exact_purpose") or big.get("purpose_de") + or big.get("purpose_en") or "") + alt = rich.get("eu_alternative_vendor", "") + country = (rich.get("vendor_country") or "").upper() + schrems = rich.get("schrems_ii_status", "") + is_tracker = necessity in ("none", "partial") or actual_cat in _TRACKER_CATS + + # 1) Als notwendig deklariert, laut Library aber Tracker. + if vcat in _NECESSARY_CATS and is_tracker: + rem = ( + f"'{name}' ({vname}) ist als '{vcat_label}' eingestuft, ist laut " + f"Library aber kein rein technischer Cookie" + + (f" ({purpose})" if purpose else "") + + ". Als einwilligungspflichtig nach § 25 Abs. 1 TDDDG einstufen" + ) + if alt: + rem += f"; EU-Alternative: {alt}" + findings.append({ + "vendor": vname, "cookie": name, "type": "tracker_as_necessary", + "severity": "HIGH" if rich.get("reid_risk") == "high" else "MEDIUM", + "declared": vcat_label, "library_purpose": purpose, + "remediation": rem + ".", + }) + # 2) Kein Zweck deklariert, Library kennt ihn. + elif not (c.get("purpose") or "").strip() and purpose: + findings.append({ + "vendor": vname, "cookie": name, "type": "missing_purpose", + "severity": "MEDIUM", "declared": "(kein Zweck angegeben)", + "library_purpose": purpose, + "remediation": f"Zweck für '{name}' ergänzen. Laut Library: {purpose}", + }) + + # 3) Speicherdauer deutlich über typischer Laufzeit. + decl_days = _duration_days(c.get("expiry", "")) + max_age = big.get("typical_max_age_seconds") + if max_age: + lib_days = int(max_age) // 86400 + typ = f"{lib_days} Tage" + else: + lib_days = _duration_days(rich.get("typical_lifetime", "")) + typ = rich.get("typical_lifetime", "") + if lib_days > 0 and decl_days - lib_days > 180: + findings.append({ + "vendor": vname, "cookie": name, "type": "excessive_lifetime", + "severity": "LOW", + "declared": c.get("expiry", "") or "—", + "library_purpose": f"typisch: {typ}", + "remediation": ( + f"Speicherdauer von '{name}' ({c.get('expiry', '')}) " + f"überschreitet die typische ({typ}) deutlich — Art. 5 Abs. 1 " + f"lit. e DSGVO (Speicherbegrenzung) prüfen." + ), + }) + + # 4) Drittland-Transfer (je Vendor einmal). + if (country and country not in _EEA or schrems) and vname not in seen_third: + seen_third.add(vname) + findings.append({ + "vendor": vname, "cookie": name, "type": "third_country", + "severity": "MEDIUM", + "declared": country or "—", + "library_purpose": schrems or f"Anbieter-Sitz {country}", + "remediation": ( + f"{vname} überträgt in ein Drittland ({country or 'außerhalb EWR'}) — " + f"SCC (Art. 46) oder DPF-Zertifizierung prüfen und in der " + f"Datenschutzerklärung benennen (Art. 44 ff. DSGVO)." + ), + }) + + # 8) EU-Alternative (je Vendor einmal, kommerziell). + if alt and (vname + alt) not in seen_alt: + seen_alt.add(vname + alt) + findings.append({ + "vendor": vname, "cookie": name, "type": "eu_alternative", + "severity": "LOW", "declared": vname, + "library_purpose": f"EU-Ersatz: {alt}", + "remediation": ( + f"EU-Alternative für {vname}: {alt} — gleiche Funktion, kein " + f"Drittland-Transfer, häufig Lizenzkosten-Ersparnis." + ), + }) + + findings.sort(key=lambda f: _SEV_ORDER.get(f["severity"], 3)) + return { + "summary": { + "checked": checked, + "in_library": in_library, + "findings": len(findings), + }, + "findings": findings, + } diff --git a/backend-compliance/compliance/tests/test_cookie_library_check.py b/backend-compliance/compliance/tests/test_cookie_library_check.py new file mode 100644 index 00000000..9b015a01 --- /dev/null +++ b/backend-compliance/compliance/tests/test_cookie_library_check.py @@ -0,0 +1,95 @@ +"""Pro-Cookie-Library-Abgleich: deklariert vs. cookie_knowledge_db.""" + +from __future__ import annotations + +from compliance.services.cookie_library_check import analyze_cookies + + +def test_tracker_declared_necessary_is_high_finding(): + # _ga ist laut Library technical_necessity=none, reid=high. + vendors = [{ + "name": "Salesforce", "category": "necessary", + "cookies": [{"name": "_ga", "purpose": "Funktionsverbesserung"}], + }] + out = analyze_cookies(vendors) + assert out["summary"]["in_library"] == 1 + f = out["findings"][0] + assert f["type"] == "tracker_as_necessary" + assert f["severity"] == "HIGH" + assert "§ 25" in f["remediation"] + assert f["library_purpose"] # exact_purpose aus Library + + +def test_missing_purpose_when_library_knows_it(): + vendors = [{ + "name": "X", "category": "marketing", + "cookies": [{"name": "_ga", "purpose": ""}], + }] + out = analyze_cookies(vendors) + f = out["findings"][0] + assert f["type"] == "missing_purpose" + assert f["severity"] == "MEDIUM" + assert f["library_purpose"] + + +def test_unknown_cookie_no_finding(): + vendors = [{ + "name": "Y", "category": "necessary", + "cookies": [{"name": "completely_unknown_xyz_123", "purpose": ""}], + }] + out = analyze_cookies(vendors) + assert out["summary"]["checked"] == 1 + assert out["summary"]["in_library"] == 0 + assert out["findings"] == [] + + +def _types(out): + return {f["type"] for f in out["findings"]} + + +def test_third_country_and_eu_alternative_for_us_tracker(): + # _ga: US-Vendor + EU-Alternative Matomo in der Library. + out = analyze_cookies([{ + "name": "Google", "category": "marketing", + "cookies": [{"name": "_ga", "purpose": "Statistik", "expiry": "2 Jahre"}], + }]) + t = _types(out) + assert "third_country" in t + assert "eu_alternative" in t + + +def test_third_country_deduped_per_vendor(): + out = analyze_cookies([{ + "name": "Google", "category": "marketing", + "cookies": [ + {"name": "_ga", "purpose": "x", "expiry": "2 Jahre"}, + {"name": "_gid", "purpose": "x", "expiry": "1 Tag"}, + ], + }]) + assert sum(1 for f in out["findings"] if f["type"] == "third_country") == 1 + + +def test_excessive_lifetime(): + # _gid: typische Laufzeit 24 Stunden; deklariert 2 Jahre. + out = analyze_cookies([{ + "name": "Google", "category": "marketing", + "cookies": [{"name": "_gid", "purpose": "x", "expiry": "2 Jahre"}], + }]) + el = [f for f in out["findings"] if f["type"] == "excessive_lifetime"] + assert el and "Art. 5" in el[0]["remediation"] + + +def test_big_library_covers_cookie_not_in_rich_db(): + # Cookie nicht in der 35er rich-DB, aber in der grossen 2287er (big_lib). + big = {"bmw_track_de": { + "actual_category": "marketing", "typical_max_age_seconds": 86400, + "purpose_de": "Reichweiten-Tracking", "vendor_name": "BMW", + }} + out = analyze_cookies([{ + "name": "BMW", "category": "necessary", + "cookies": [{"name": "bmw_track_de", "purpose": "", "expiry": "2 Jahre"}], + }], big) + assert out["summary"]["in_library"] == 1 + t = {f["type"] for f in out["findings"]} + assert "tracker_as_necessary" in t # actual_category=marketing → Tracker + assert "excessive_lifetime" in t # 2 Jahre vs. 1 Tag