feat(cookie): Pro-Cookie-Library-Abgleich (2287er OCD + 35er rich) + Panel

- analyze_cookies gleicht Cookies gegen BEIDE Libraries ab: compliance.cookie_library
  (2287, OCD/CC0 — Kategorie/Retention) + 35er rich-DB (technical_necessity/reid/
  schrems/eu_alternative). 5 Befund-Typen: tracker_as_necessary, missing_purpose,
  excessive_lifetime (Art.5), third_country (Art.44), eu_alternative (kommerziell).
- Endpoint GET /snapshots/{id}/cookie-check (load_big_library batch + analyze).
- Frontend CookieLibraryPanel im Snapshot-Detail.
- Fix CookieResultView: Zweck nicht mehr auf 60 Zeichen gekuerzt; Rolle 'unknown'
  als Strich statt 'Unbekannt'.

Tests: 7 backend + frontend vitest gruen.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-11 08:18:25 +02:00
parent 19786c96f8
commit d18ef79f18
8 changed files with 486 additions and 6 deletions
@@ -0,0 +1,33 @@
/**
* Cookie-Library-Abgleich-Proxy
* GET /api/sdk/v1/agent/snapshots/{snapshotId}/cookie-check
* → backend /api/compliance/agent/snapshots/{snapshotId}/cookie-check
*
* Pro-Cookie-Abgleich gegen die cookie_knowledge_db (deklariert vs. echt).
*/
import { NextRequest, NextResponse } from 'next/server'
const BACKEND_URL =
process.env.BACKEND_API_URL || process.env.BACKEND_URL ||
'http://backend-compliance:8002'
export async function GET(
_request: NextRequest,
{ params }: { params: Promise<{ snapshotId: string }> },
) {
const { snapshotId } = await params
try {
const response = await fetch(
`${BACKEND_URL}/api/compliance/agent/snapshots/${snapshotId}/cookie-check`,
{ signal: AbortSignal.timeout(60_000) },
)
const data = await response.json()
return NextResponse.json(data, { status: response.status })
} catch {
return NextResponse.json(
{ error: 'Cookie-Library-Abgleich fehlgeschlagen', findings: [] },
{ status: 503 },
)
}
}
@@ -0,0 +1,97 @@
'use client'
/**
* CookieLibraryPanel — Pro-Cookie-Abgleich gegen die Knowledge-Library:
* findet als „notwendig" deklarierte Tracker + fehlende Zwecke und zeigt je
* Befund die Abstellmaßnahme. Lädt aus dem Snapshot (kein Re-Crawl).
*/
import React, { useEffect, useState } from 'react'
export interface CookieFinding {
vendor: string
cookie: string
type: string
severity: string
declared: string
library_purpose: string
remediation: string
}
interface CheckData {
summary?: { checked?: number; in_library?: number; findings?: number }
findings?: CookieFinding[]
}
const SEV_COLOR: Record<string, string> = {
HIGH: 'bg-red-100 text-red-700',
MEDIUM: 'bg-amber-100 text-amber-700',
LOW: 'bg-blue-100 text-blue-700',
}
const TYPE_LABEL: Record<string, string> = {
tracker_as_necessary: 'Tracker als „notwendig" deklariert',
missing_purpose: 'Zweck fehlt',
excessive_lifetime: 'Speicherdauer zu lang',
third_country: 'Drittland-Transfer',
eu_alternative: 'EU-Alternative verfügbar',
}
// Pure, testbar.
export function CookieFindingList({ data }: { data: CheckData }) {
const findings = data.findings || []
const s = data.summary || {}
return (
<div className="border rounded-lg overflow-hidden">
<div className="px-4 py-2.5 bg-slate-50 border-b text-sm font-semibold text-gray-800">
Library-Abgleich {findings.length} Befund{findings.length !== 1 ? 'e' : ''}
<span className="ml-2 text-xs font-normal text-gray-400">
{s.in_library ?? 0}/{s.checked ?? 0} Cookies in der Library erkannt
</span>
</div>
{findings.length === 0 ? (
<div className="px-4 py-3 text-sm text-green-700">
Keine Abweichungen gegen die Library.
</div>
) : (
<div className="divide-y divide-gray-100 max-h-96 overflow-auto">
{findings.map((f, i) => (
<div key={i} className="px-4 py-2.5 space-y-1">
<div className="flex items-center gap-2 flex-wrap">
<span className={`text-[10px] font-semibold px-1.5 py-0.5 rounded ${SEV_COLOR[f.severity] || 'bg-gray-100 text-gray-600'}`}>
{f.severity}
</span>
<code className="text-xs text-gray-700">{f.cookie}</code>
<span className="text-xs text-gray-400">· {f.vendor}</span>
<span className="text-[10px] text-gray-500 ml-auto">
{TYPE_LABEL[f.type] || f.type}
</span>
</div>
{f.library_purpose && (
<div className="text-xs text-gray-500">Library-Zweck: {f.library_purpose}</div>
)}
<div className="text-xs text-gray-700">{f.remediation}</div>
</div>
))}
</div>
)}
</div>
)
}
export function CookieLibraryPanel({ snapshotId }: { snapshotId: string }) {
const [data, setData] = useState<CheckData | null>(null)
const [loading, setLoading] = useState(true)
useEffect(() => {
let cancelled = false
fetch(`/api/sdk/v1/agent/snapshots/${snapshotId}/cookie-check`)
.then(r => r.json())
.then(d => { if (!cancelled) setData(d) })
.catch(() => { if (!cancelled) setData({ findings: [] }) })
.finally(() => { if (!cancelled) setLoading(false) })
return () => { cancelled = true }
}, [snapshotId])
if (loading) return <div className="text-xs text-gray-400">Library-Abgleich läuft</div>
return <CookieFindingList data={data || {}} />
}
@@ -112,11 +112,19 @@ function VendorRow({ v }: { v: SnapshotVendor }) {
</thead> </thead>
<tbody> <tbody>
{cookies.map((c, i) => ( {cookies.map((c, i) => (
<tr key={i} className="border-t border-gray-100"> <tr key={i} className="border-t border-gray-100 align-top">
<td className="px-2 py-1 font-mono text-gray-700 break-all">{c.name}</td> <td className="px-2 py-1 font-mono text-gray-700 break-all w-40">{c.name}</td>
<td className="px-2 py-1 text-gray-500">{ROLE_LABEL[c.functional_role || 'unknown'] || c.functional_role}</td> <td className="px-2 py-1 text-gray-500 w-24">
<td className="px-2 py-1 text-gray-500">{c.purpose ? c.purpose.slice(0, 60) : <span className="text-amber-600 italic">kein Zweck</span>}</td> {c.functional_role && c.functional_role !== 'unknown'
<td className="px-2 py-1 text-gray-400">{c.expiry || '—'}</td> ? (ROLE_LABEL[c.functional_role] || c.functional_role)
: <span className="text-gray-300"></span>}
</td>
<td className="px-2 py-1 text-gray-500 break-words">
{c.purpose
? c.purpose
: <span className="text-amber-600 italic">kein Zweck</span>}
</td>
<td className="px-2 py-1 text-gray-400 w-24 whitespace-nowrap">{c.expiry || '—'}</td>
</tr> </tr>
))} ))}
</tbody> </tbody>
@@ -0,0 +1,29 @@
import { describe, it, expect } from 'vitest'
import { render, screen } from '@testing-library/react'
import { CookieFindingList } from '../CookieLibraryPanel'
describe('CookieFindingList', () => {
it('zeigt Befunde mit Severity, Library-Zweck + Maßnahme', () => {
const data = {
summary: { checked: 10, in_library: 4, findings: 1 },
findings: [{
vendor: 'Salesforce', cookie: '_ga', type: 'tracker_as_necessary',
severity: 'HIGH', declared: 'necessary',
library_purpose: 'Besucher eindeutig unterscheiden',
remediation: 'Als einwilligungspflichtig (§ 25 TDDDG) einstufen.',
}],
}
render(<CookieFindingList data={data} />)
expect(screen.getByText(/1 Befund/)).toBeInTheDocument()
expect(screen.getByText('_ga')).toBeInTheDocument()
expect(screen.getByText('HIGH')).toBeInTheDocument()
expect(screen.getByText(/§ 25 TDDDG/)).toBeInTheDocument()
expect(screen.getByText(/4\/10 Cookies/)).toBeInTheDocument()
})
it('zeigt grünen Hinweis bei 0 Befunden', () => {
render(<CookieFindingList data={{ summary: { checked: 5, in_library: 2 }, findings: [] }} />)
expect(screen.getByText(/Keine Abweichungen/)).toBeInTheDocument()
})
})
@@ -9,6 +9,7 @@
import React, { use as useUnwrap, useEffect, useState } from 'react' import React, { use as useUnwrap, useEffect, useState } from 'react'
import Link from 'next/link' import Link from 'next/link'
import { CookieLibraryPanel } from '../../_components/CookieLibraryPanel'
import { CookieResultView } from '../../_components/CookieResultView' import { CookieResultView } from '../../_components/CookieResultView'
export default function SnapshotDetail( export default function SnapshotDetail(
@@ -45,7 +46,10 @@ export default function SnapshotDetail(
) : error || !snap ? ( ) : error || !snap ? (
<div className="text-sm text-red-600">Snapshot nicht gefunden.</div> <div className="text-sm text-red-600">Snapshot nicht gefunden.</div>
) : hasCookies ? ( ) : hasCookies ? (
<>
<CookieLibraryPanel snapshotId={snapshotId} />
<CookieResultView snapshot={snap} /> <CookieResultView snapshot={snap} />
</>
) : ( ) : (
<div className="text-sm text-gray-500"> <div className="text-sm text-gray-500">
Dieser Snapshot enthält keine Cookie-/Vendor-Daten. Dieser Snapshot enthält keine Cookie-/Vendor-Daten.
@@ -223,6 +223,29 @@ async def get_snapshot(snapshot_id: str):
db.close() db.close()
@router.get("/snapshots/{snapshot_id}/cookie-check")
async def snapshot_cookie_check(snapshot_id: str):
"""Pro-Cookie-Abgleich der Snapshot-Vendors gegen cookie_knowledge_db."""
from fastapi import HTTPException
from database import SessionLocal
from compliance.services.check_snapshot import load_snapshot
from compliance.services.cookie_library_check import (
analyze_cookies, load_big_library,
)
db = SessionLocal()
try:
snap = load_snapshot(db, snapshot_id)
if not snap:
raise HTTPException(status_code=404, detail="snapshot not found")
vendors = snap.get("cmp_vendors") or []
names = [c.get("name", "")
for v in vendors for c in (v.get("cookies") or [])]
big = load_big_library(db, names)
return analyze_cookies(vendors, big)
finally:
db.close()
@router.get("/admin/benchmark") @router.get("/admin/benchmark")
async def benchmark( async def benchmark(
industry: str = "", industry: str = "",
@@ -0,0 +1,191 @@
"""Pro-Cookie-Abgleich gegen die Cookie-Knowledge-Library.
Vergleicht die DEKLARIERTEN Angaben aus dem CMP/Snapshot (Kategorie, Zweck,
Laufzeit) mit dem, was unsere Library (`cookie_knowledge_db`) über den Cookie
weiß — und leitet pro Befund eine Abstellmaßnahme ab.
Befund-Typen:
tracker_as_necessary — als notwendig deklariert, laut Library kein techn. Zweck
missing_purpose — kein Zweck deklariert, Library kennt ihn
excessive_lifetime — deklarierte Speicherdauer >> typische (Art. 5(1)(e))
third_country — Drittland-Transfer (Schrems II, Art. 44 ff.) [je Vendor]
eu_alternative — EU-Ersatz verfügbar (kommerziell) [je Vendor]
"""
from __future__ import annotations
import re
from sqlalchemy import text
from compliance.services.cookie_knowledge_db import lookup_cookie
_TRACKER_CATS = {"marketing", "statistics", "social_media", "targeting"}
def load_big_library(db, names: list[str]) -> dict:
"""Batch-Lookup der grossen Open-Cookie-Database (compliance.cookie_library,
~2287 Cookies) fuer die gegebenen Namen. Breite Abdeckung: Kategorie,
Retention, Vendor."""
uniq = sorted({(n or "").lower() for n in names if n})
if not uniq:
return {}
rows = db.execute(
text(
"SELECT lower(cookie_name) AS n, actual_category, "
"typical_max_age_seconds, vendor_name, purpose_de, purpose_en, "
"is_pii FROM compliance.cookie_library "
"WHERE lower(cookie_name) = ANY(:names)"
),
{"names": uniq},
).mappings().fetchall()
return {r["n"]: dict(r) for r in rows}
_NECESSARY_CATS = {
"necessary", "notwendig", "essential", "essenziell",
"funktional", "functional",
}
_EEA = {
"DE", "FR", "IE", "NL", "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE",
"FI", "GR", "HU", "IT", "LV", "LT", "LU", "MT", "PL", "PT", "RO", "SK",
"SI", "ES", "SE", "IS", "LI", "NO",
}
_SEV_ORDER = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
def _duration_days(s: str) -> int:
"""Grobe Normalisierung einer Laufzeit-Angabe in Tage (0 = Session)."""
s = (s or "").lower()
if not s or "session" in s:
return 0
m = re.search(r"(\d+)", s)
n = int(m.group(1)) if m else 0
if "jahr" in s or "year" in s:
return n * 365
if "monat" in s or "month" in s:
return n * 30
if "woche" in s or "week" in s:
return n * 7
if "tag" in s or "day" in s:
return n
if "stunde" in s or "hour" in s:
return 1
return n
def analyze_cookies(vendors: list[dict], big_lib: dict | None = None) -> dict:
"""Gleiche alle Cookies gegen BEIDE Libraries ab: die 2287er Open-Cookie-DB
(`big_lib`, breite Abdeckung: Kategorie/Retention) + die 35er rich-DB
(`lookup_cookie`, tiefe Rechtsfelder)."""
big_lib = big_lib or {}
findings: list[dict] = []
checked = 0
in_library = 0
seen_third: set[str] = set()
seen_alt: set[str] = set()
for v in vendors or []:
vcat = (v.get("category") or "").lower()
vcat_label = v.get("category") or ""
vname = v.get("name") or "?"
for c in v.get("cookies") or []:
checked += 1
name = c.get("name", "")
rich = lookup_cookie(name) or {}
big = big_lib.get(name.lower(), {})
if not rich and not big:
continue
in_library += 1
necessity = rich.get("technical_necessity", "")
actual_cat = (big.get("actual_category") or "").lower()
purpose = (rich.get("exact_purpose") or big.get("purpose_de")
or big.get("purpose_en") or "")
alt = rich.get("eu_alternative_vendor", "")
country = (rich.get("vendor_country") or "").upper()
schrems = rich.get("schrems_ii_status", "")
is_tracker = necessity in ("none", "partial") or actual_cat in _TRACKER_CATS
# 1) Als notwendig deklariert, laut Library aber Tracker.
if vcat in _NECESSARY_CATS and is_tracker:
rem = (
f"'{name}' ({vname}) ist als '{vcat_label}' eingestuft, ist laut "
f"Library aber kein rein technischer Cookie"
+ (f" ({purpose})" if purpose else "")
+ ". Als einwilligungspflichtig nach § 25 Abs. 1 TDDDG einstufen"
)
if alt:
rem += f"; EU-Alternative: {alt}"
findings.append({
"vendor": vname, "cookie": name, "type": "tracker_as_necessary",
"severity": "HIGH" if rich.get("reid_risk") == "high" else "MEDIUM",
"declared": vcat_label, "library_purpose": purpose,
"remediation": rem + ".",
})
# 2) Kein Zweck deklariert, Library kennt ihn.
elif not (c.get("purpose") or "").strip() and purpose:
findings.append({
"vendor": vname, "cookie": name, "type": "missing_purpose",
"severity": "MEDIUM", "declared": "(kein Zweck angegeben)",
"library_purpose": purpose,
"remediation": f"Zweck für '{name}' ergänzen. Laut Library: {purpose}",
})
# 3) Speicherdauer deutlich über typischer Laufzeit.
decl_days = _duration_days(c.get("expiry", ""))
max_age = big.get("typical_max_age_seconds")
if max_age:
lib_days = int(max_age) // 86400
typ = f"{lib_days} Tage"
else:
lib_days = _duration_days(rich.get("typical_lifetime", ""))
typ = rich.get("typical_lifetime", "")
if lib_days > 0 and decl_days - lib_days > 180:
findings.append({
"vendor": vname, "cookie": name, "type": "excessive_lifetime",
"severity": "LOW",
"declared": c.get("expiry", "") or "",
"library_purpose": f"typisch: {typ}",
"remediation": (
f"Speicherdauer von '{name}' ({c.get('expiry', '')}) "
f"überschreitet die typische ({typ}) deutlich — Art. 5 Abs. 1 "
f"lit. e DSGVO (Speicherbegrenzung) prüfen."
),
})
# 4) Drittland-Transfer (je Vendor einmal).
if (country and country not in _EEA or schrems) and vname not in seen_third:
seen_third.add(vname)
findings.append({
"vendor": vname, "cookie": name, "type": "third_country",
"severity": "MEDIUM",
"declared": country or "",
"library_purpose": schrems or f"Anbieter-Sitz {country}",
"remediation": (
f"{vname} überträgt in ein Drittland ({country or 'außerhalb EWR'}) — "
f"SCC (Art. 46) oder DPF-Zertifizierung prüfen und in der "
f"Datenschutzerklärung benennen (Art. 44 ff. DSGVO)."
),
})
# 8) EU-Alternative (je Vendor einmal, kommerziell).
if alt and (vname + alt) not in seen_alt:
seen_alt.add(vname + alt)
findings.append({
"vendor": vname, "cookie": name, "type": "eu_alternative",
"severity": "LOW", "declared": vname,
"library_purpose": f"EU-Ersatz: {alt}",
"remediation": (
f"EU-Alternative für {vname}: {alt} — gleiche Funktion, kein "
f"Drittland-Transfer, häufig Lizenzkosten-Ersparnis."
),
})
findings.sort(key=lambda f: _SEV_ORDER.get(f["severity"], 3))
return {
"summary": {
"checked": checked,
"in_library": in_library,
"findings": len(findings),
},
"findings": findings,
}
@@ -0,0 +1,95 @@
"""Pro-Cookie-Library-Abgleich: deklariert vs. cookie_knowledge_db."""
from __future__ import annotations
from compliance.services.cookie_library_check import analyze_cookies
def test_tracker_declared_necessary_is_high_finding():
# _ga ist laut Library technical_necessity=none, reid=high.
vendors = [{
"name": "Salesforce", "category": "necessary",
"cookies": [{"name": "_ga", "purpose": "Funktionsverbesserung"}],
}]
out = analyze_cookies(vendors)
assert out["summary"]["in_library"] == 1
f = out["findings"][0]
assert f["type"] == "tracker_as_necessary"
assert f["severity"] == "HIGH"
assert "§ 25" in f["remediation"]
assert f["library_purpose"] # exact_purpose aus Library
def test_missing_purpose_when_library_knows_it():
vendors = [{
"name": "X", "category": "marketing",
"cookies": [{"name": "_ga", "purpose": ""}],
}]
out = analyze_cookies(vendors)
f = out["findings"][0]
assert f["type"] == "missing_purpose"
assert f["severity"] == "MEDIUM"
assert f["library_purpose"]
def test_unknown_cookie_no_finding():
vendors = [{
"name": "Y", "category": "necessary",
"cookies": [{"name": "completely_unknown_xyz_123", "purpose": ""}],
}]
out = analyze_cookies(vendors)
assert out["summary"]["checked"] == 1
assert out["summary"]["in_library"] == 0
assert out["findings"] == []
def _types(out):
return {f["type"] for f in out["findings"]}
def test_third_country_and_eu_alternative_for_us_tracker():
# _ga: US-Vendor + EU-Alternative Matomo in der Library.
out = analyze_cookies([{
"name": "Google", "category": "marketing",
"cookies": [{"name": "_ga", "purpose": "Statistik", "expiry": "2 Jahre"}],
}])
t = _types(out)
assert "third_country" in t
assert "eu_alternative" in t
def test_third_country_deduped_per_vendor():
out = analyze_cookies([{
"name": "Google", "category": "marketing",
"cookies": [
{"name": "_ga", "purpose": "x", "expiry": "2 Jahre"},
{"name": "_gid", "purpose": "x", "expiry": "1 Tag"},
],
}])
assert sum(1 for f in out["findings"] if f["type"] == "third_country") == 1
def test_excessive_lifetime():
# _gid: typische Laufzeit 24 Stunden; deklariert 2 Jahre.
out = analyze_cookies([{
"name": "Google", "category": "marketing",
"cookies": [{"name": "_gid", "purpose": "x", "expiry": "2 Jahre"}],
}])
el = [f for f in out["findings"] if f["type"] == "excessive_lifetime"]
assert el and "Art. 5" in el[0]["remediation"]
def test_big_library_covers_cookie_not_in_rich_db():
# Cookie nicht in der 35er rich-DB, aber in der grossen 2287er (big_lib).
big = {"bmw_track_de": {
"actual_category": "marketing", "typical_max_age_seconds": 86400,
"purpose_de": "Reichweiten-Tracking", "vendor_name": "BMW",
}}
out = analyze_cookies([{
"name": "BMW", "category": "necessary",
"cookies": [{"name": "bmw_track_de", "purpose": "", "expiry": "2 Jahre"}],
}], big)
assert out["summary"]["in_library"] == 1
t = {f["type"] for f in out["findings"]}
assert "tracker_as_necessary" in t # actual_category=marketing → Tracker
assert "excessive_lifetime" in t # 2 Jahre vs. 1 Tag