feat(cookie): Pro-Cookie-Library-Abgleich (2287er OCD + 35er rich) + Panel
- analyze_cookies gleicht Cookies gegen BEIDE Libraries ab: compliance.cookie_library
(2287, OCD/CC0 — Kategorie/Retention) + 35er rich-DB (technical_necessity/reid/
schrems/eu_alternative). 5 Befund-Typen: tracker_as_necessary, missing_purpose,
excessive_lifetime (Art.5), third_country (Art.44), eu_alternative (kommerziell).
- Endpoint GET /snapshots/{id}/cookie-check (load_big_library batch + analyze).
- Frontend CookieLibraryPanel im Snapshot-Detail.
- Fix CookieResultView: Zweck nicht mehr auf 60 Zeichen gekuerzt; Rolle 'unknown'
als Strich statt 'Unbekannt'.
Tests: 7 backend + frontend vitest gruen.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -223,6 +223,29 @@ async def get_snapshot(snapshot_id: str):
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/snapshots/{snapshot_id}/cookie-check")
|
||||
async def snapshot_cookie_check(snapshot_id: str):
|
||||
"""Pro-Cookie-Abgleich der Snapshot-Vendors gegen cookie_knowledge_db."""
|
||||
from fastapi import HTTPException
|
||||
from database import SessionLocal
|
||||
from compliance.services.check_snapshot import load_snapshot
|
||||
from compliance.services.cookie_library_check import (
|
||||
analyze_cookies, load_big_library,
|
||||
)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
snap = load_snapshot(db, snapshot_id)
|
||||
if not snap:
|
||||
raise HTTPException(status_code=404, detail="snapshot not found")
|
||||
vendors = snap.get("cmp_vendors") or []
|
||||
names = [c.get("name", "")
|
||||
for v in vendors for c in (v.get("cookies") or [])]
|
||||
big = load_big_library(db, names)
|
||||
return analyze_cookies(vendors, big)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/admin/benchmark")
|
||||
async def benchmark(
|
||||
industry: str = "",
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
"""Pro-Cookie-Abgleich gegen die Cookie-Knowledge-Library.
|
||||
|
||||
Vergleicht die DEKLARIERTEN Angaben aus dem CMP/Snapshot (Kategorie, Zweck,
|
||||
Laufzeit) mit dem, was unsere Library (`cookie_knowledge_db`) über den Cookie
|
||||
weiß — und leitet pro Befund eine Abstellmaßnahme ab.
|
||||
|
||||
Befund-Typen:
|
||||
tracker_as_necessary — als notwendig deklariert, laut Library kein techn. Zweck
|
||||
missing_purpose — kein Zweck deklariert, Library kennt ihn
|
||||
excessive_lifetime — deklarierte Speicherdauer >> typische (Art. 5(1)(e))
|
||||
third_country — Drittland-Transfer (Schrems II, Art. 44 ff.) [je Vendor]
|
||||
eu_alternative — EU-Ersatz verfügbar (kommerziell) [je Vendor]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from compliance.services.cookie_knowledge_db import lookup_cookie
|
||||
|
||||
_TRACKER_CATS = {"marketing", "statistics", "social_media", "targeting"}
|
||||
|
||||
|
||||
def load_big_library(db, names: list[str]) -> dict:
|
||||
"""Batch-Lookup der grossen Open-Cookie-Database (compliance.cookie_library,
|
||||
~2287 Cookies) fuer die gegebenen Namen. Breite Abdeckung: Kategorie,
|
||||
Retention, Vendor."""
|
||||
uniq = sorted({(n or "").lower() for n in names if n})
|
||||
if not uniq:
|
||||
return {}
|
||||
rows = db.execute(
|
||||
text(
|
||||
"SELECT lower(cookie_name) AS n, actual_category, "
|
||||
"typical_max_age_seconds, vendor_name, purpose_de, purpose_en, "
|
||||
"is_pii FROM compliance.cookie_library "
|
||||
"WHERE lower(cookie_name) = ANY(:names)"
|
||||
),
|
||||
{"names": uniq},
|
||||
).mappings().fetchall()
|
||||
return {r["n"]: dict(r) for r in rows}
|
||||
|
||||
_NECESSARY_CATS = {
|
||||
"necessary", "notwendig", "essential", "essenziell",
|
||||
"funktional", "functional",
|
||||
}
|
||||
_EEA = {
|
||||
"DE", "FR", "IE", "NL", "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE",
|
||||
"FI", "GR", "HU", "IT", "LV", "LT", "LU", "MT", "PL", "PT", "RO", "SK",
|
||||
"SI", "ES", "SE", "IS", "LI", "NO",
|
||||
}
|
||||
_SEV_ORDER = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
|
||||
|
||||
|
||||
def _duration_days(s: str) -> int:
|
||||
"""Grobe Normalisierung einer Laufzeit-Angabe in Tage (0 = Session)."""
|
||||
s = (s or "").lower()
|
||||
if not s or "session" in s:
|
||||
return 0
|
||||
m = re.search(r"(\d+)", s)
|
||||
n = int(m.group(1)) if m else 0
|
||||
if "jahr" in s or "year" in s:
|
||||
return n * 365
|
||||
if "monat" in s or "month" in s:
|
||||
return n * 30
|
||||
if "woche" in s or "week" in s:
|
||||
return n * 7
|
||||
if "tag" in s or "day" in s:
|
||||
return n
|
||||
if "stunde" in s or "hour" in s:
|
||||
return 1
|
||||
return n
|
||||
|
||||
|
||||
def analyze_cookies(vendors: list[dict], big_lib: dict | None = None) -> dict:
|
||||
"""Gleiche alle Cookies gegen BEIDE Libraries ab: die 2287er Open-Cookie-DB
|
||||
(`big_lib`, breite Abdeckung: Kategorie/Retention) + die 35er rich-DB
|
||||
(`lookup_cookie`, tiefe Rechtsfelder)."""
|
||||
big_lib = big_lib or {}
|
||||
findings: list[dict] = []
|
||||
checked = 0
|
||||
in_library = 0
|
||||
seen_third: set[str] = set()
|
||||
seen_alt: set[str] = set()
|
||||
|
||||
for v in vendors or []:
|
||||
vcat = (v.get("category") or "").lower()
|
||||
vcat_label = v.get("category") or "—"
|
||||
vname = v.get("name") or "?"
|
||||
for c in v.get("cookies") or []:
|
||||
checked += 1
|
||||
name = c.get("name", "")
|
||||
rich = lookup_cookie(name) or {}
|
||||
big = big_lib.get(name.lower(), {})
|
||||
if not rich and not big:
|
||||
continue
|
||||
in_library += 1
|
||||
necessity = rich.get("technical_necessity", "")
|
||||
actual_cat = (big.get("actual_category") or "").lower()
|
||||
purpose = (rich.get("exact_purpose") or big.get("purpose_de")
|
||||
or big.get("purpose_en") or "")
|
||||
alt = rich.get("eu_alternative_vendor", "")
|
||||
country = (rich.get("vendor_country") or "").upper()
|
||||
schrems = rich.get("schrems_ii_status", "")
|
||||
is_tracker = necessity in ("none", "partial") or actual_cat in _TRACKER_CATS
|
||||
|
||||
# 1) Als notwendig deklariert, laut Library aber Tracker.
|
||||
if vcat in _NECESSARY_CATS and is_tracker:
|
||||
rem = (
|
||||
f"'{name}' ({vname}) ist als '{vcat_label}' eingestuft, ist laut "
|
||||
f"Library aber kein rein technischer Cookie"
|
||||
+ (f" ({purpose})" if purpose else "")
|
||||
+ ". Als einwilligungspflichtig nach § 25 Abs. 1 TDDDG einstufen"
|
||||
)
|
||||
if alt:
|
||||
rem += f"; EU-Alternative: {alt}"
|
||||
findings.append({
|
||||
"vendor": vname, "cookie": name, "type": "tracker_as_necessary",
|
||||
"severity": "HIGH" if rich.get("reid_risk") == "high" else "MEDIUM",
|
||||
"declared": vcat_label, "library_purpose": purpose,
|
||||
"remediation": rem + ".",
|
||||
})
|
||||
# 2) Kein Zweck deklariert, Library kennt ihn.
|
||||
elif not (c.get("purpose") or "").strip() and purpose:
|
||||
findings.append({
|
||||
"vendor": vname, "cookie": name, "type": "missing_purpose",
|
||||
"severity": "MEDIUM", "declared": "(kein Zweck angegeben)",
|
||||
"library_purpose": purpose,
|
||||
"remediation": f"Zweck für '{name}' ergänzen. Laut Library: {purpose}",
|
||||
})
|
||||
|
||||
# 3) Speicherdauer deutlich über typischer Laufzeit.
|
||||
decl_days = _duration_days(c.get("expiry", ""))
|
||||
max_age = big.get("typical_max_age_seconds")
|
||||
if max_age:
|
||||
lib_days = int(max_age) // 86400
|
||||
typ = f"{lib_days} Tage"
|
||||
else:
|
||||
lib_days = _duration_days(rich.get("typical_lifetime", ""))
|
||||
typ = rich.get("typical_lifetime", "")
|
||||
if lib_days > 0 and decl_days - lib_days > 180:
|
||||
findings.append({
|
||||
"vendor": vname, "cookie": name, "type": "excessive_lifetime",
|
||||
"severity": "LOW",
|
||||
"declared": c.get("expiry", "") or "—",
|
||||
"library_purpose": f"typisch: {typ}",
|
||||
"remediation": (
|
||||
f"Speicherdauer von '{name}' ({c.get('expiry', '')}) "
|
||||
f"überschreitet die typische ({typ}) deutlich — Art. 5 Abs. 1 "
|
||||
f"lit. e DSGVO (Speicherbegrenzung) prüfen."
|
||||
),
|
||||
})
|
||||
|
||||
# 4) Drittland-Transfer (je Vendor einmal).
|
||||
if (country and country not in _EEA or schrems) and vname not in seen_third:
|
||||
seen_third.add(vname)
|
||||
findings.append({
|
||||
"vendor": vname, "cookie": name, "type": "third_country",
|
||||
"severity": "MEDIUM",
|
||||
"declared": country or "—",
|
||||
"library_purpose": schrems or f"Anbieter-Sitz {country}",
|
||||
"remediation": (
|
||||
f"{vname} überträgt in ein Drittland ({country or 'außerhalb EWR'}) — "
|
||||
f"SCC (Art. 46) oder DPF-Zertifizierung prüfen und in der "
|
||||
f"Datenschutzerklärung benennen (Art. 44 ff. DSGVO)."
|
||||
),
|
||||
})
|
||||
|
||||
# 8) EU-Alternative (je Vendor einmal, kommerziell).
|
||||
if alt and (vname + alt) not in seen_alt:
|
||||
seen_alt.add(vname + alt)
|
||||
findings.append({
|
||||
"vendor": vname, "cookie": name, "type": "eu_alternative",
|
||||
"severity": "LOW", "declared": vname,
|
||||
"library_purpose": f"EU-Ersatz: {alt}",
|
||||
"remediation": (
|
||||
f"EU-Alternative für {vname}: {alt} — gleiche Funktion, kein "
|
||||
f"Drittland-Transfer, häufig Lizenzkosten-Ersparnis."
|
||||
),
|
||||
})
|
||||
|
||||
findings.sort(key=lambda f: _SEV_ORDER.get(f["severity"], 3))
|
||||
return {
|
||||
"summary": {
|
||||
"checked": checked,
|
||||
"in_library": in_library,
|
||||
"findings": len(findings),
|
||||
},
|
||||
"findings": findings,
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
"""Pro-Cookie-Library-Abgleich: deklariert vs. cookie_knowledge_db."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from compliance.services.cookie_library_check import analyze_cookies
|
||||
|
||||
|
||||
def test_tracker_declared_necessary_is_high_finding():
|
||||
# _ga ist laut Library technical_necessity=none, reid=high.
|
||||
vendors = [{
|
||||
"name": "Salesforce", "category": "necessary",
|
||||
"cookies": [{"name": "_ga", "purpose": "Funktionsverbesserung"}],
|
||||
}]
|
||||
out = analyze_cookies(vendors)
|
||||
assert out["summary"]["in_library"] == 1
|
||||
f = out["findings"][0]
|
||||
assert f["type"] == "tracker_as_necessary"
|
||||
assert f["severity"] == "HIGH"
|
||||
assert "§ 25" in f["remediation"]
|
||||
assert f["library_purpose"] # exact_purpose aus Library
|
||||
|
||||
|
||||
def test_missing_purpose_when_library_knows_it():
|
||||
vendors = [{
|
||||
"name": "X", "category": "marketing",
|
||||
"cookies": [{"name": "_ga", "purpose": ""}],
|
||||
}]
|
||||
out = analyze_cookies(vendors)
|
||||
f = out["findings"][0]
|
||||
assert f["type"] == "missing_purpose"
|
||||
assert f["severity"] == "MEDIUM"
|
||||
assert f["library_purpose"]
|
||||
|
||||
|
||||
def test_unknown_cookie_no_finding():
|
||||
vendors = [{
|
||||
"name": "Y", "category": "necessary",
|
||||
"cookies": [{"name": "completely_unknown_xyz_123", "purpose": ""}],
|
||||
}]
|
||||
out = analyze_cookies(vendors)
|
||||
assert out["summary"]["checked"] == 1
|
||||
assert out["summary"]["in_library"] == 0
|
||||
assert out["findings"] == []
|
||||
|
||||
|
||||
def _types(out):
|
||||
return {f["type"] for f in out["findings"]}
|
||||
|
||||
|
||||
def test_third_country_and_eu_alternative_for_us_tracker():
|
||||
# _ga: US-Vendor + EU-Alternative Matomo in der Library.
|
||||
out = analyze_cookies([{
|
||||
"name": "Google", "category": "marketing",
|
||||
"cookies": [{"name": "_ga", "purpose": "Statistik", "expiry": "2 Jahre"}],
|
||||
}])
|
||||
t = _types(out)
|
||||
assert "third_country" in t
|
||||
assert "eu_alternative" in t
|
||||
|
||||
|
||||
def test_third_country_deduped_per_vendor():
|
||||
out = analyze_cookies([{
|
||||
"name": "Google", "category": "marketing",
|
||||
"cookies": [
|
||||
{"name": "_ga", "purpose": "x", "expiry": "2 Jahre"},
|
||||
{"name": "_gid", "purpose": "x", "expiry": "1 Tag"},
|
||||
],
|
||||
}])
|
||||
assert sum(1 for f in out["findings"] if f["type"] == "third_country") == 1
|
||||
|
||||
|
||||
def test_excessive_lifetime():
|
||||
# _gid: typische Laufzeit 24 Stunden; deklariert 2 Jahre.
|
||||
out = analyze_cookies([{
|
||||
"name": "Google", "category": "marketing",
|
||||
"cookies": [{"name": "_gid", "purpose": "x", "expiry": "2 Jahre"}],
|
||||
}])
|
||||
el = [f for f in out["findings"] if f["type"] == "excessive_lifetime"]
|
||||
assert el and "Art. 5" in el[0]["remediation"]
|
||||
|
||||
|
||||
def test_big_library_covers_cookie_not_in_rich_db():
|
||||
# Cookie nicht in der 35er rich-DB, aber in der grossen 2287er (big_lib).
|
||||
big = {"bmw_track_de": {
|
||||
"actual_category": "marketing", "typical_max_age_seconds": 86400,
|
||||
"purpose_de": "Reichweiten-Tracking", "vendor_name": "BMW",
|
||||
}}
|
||||
out = analyze_cookies([{
|
||||
"name": "BMW", "category": "necessary",
|
||||
"cookies": [{"name": "bmw_track_de", "purpose": "", "expiry": "2 Jahre"}],
|
||||
}], big)
|
||||
assert out["summary"]["in_library"] == 1
|
||||
t = {f["type"] for f in out["findings"]}
|
||||
assert "tracker_as_necessary" in t # actual_category=marketing → Tracker
|
||||
assert "excessive_lifetime" in t # 2 Jahre vs. 1 Tag
|
||||
Reference in New Issue
Block a user