diff --git a/admin-compliance/app/sdk/agent/_components/CookieLibraryPanel.tsx b/admin-compliance/app/sdk/agent/_components/CookieLibraryPanel.tsx index b6b4c37b..ad5141a8 100644 --- a/admin-compliance/app/sdk/agent/_components/CookieLibraryPanel.tsx +++ b/admin-compliance/app/sdk/agent/_components/CookieLibraryPanel.tsx @@ -22,6 +22,12 @@ export interface CookieFinding { interface CheckData { summary?: { checked?: number; in_library?: number; findings?: number } findings?: CookieFinding[] + storage_inventory?: { + total?: number + by_type?: Record + real_cookies?: number + other_storage?: number + } } const SEV_COLOR: Record = { @@ -36,14 +42,38 @@ const TYPE_LABEL: Record = { vague_duration: 'Speicherdauer nicht konkret', third_country: 'Drittland-Transfer', eu_alternative: 'EU-Alternative verfügbar', + storage_transparency: 'Speichertyp nicht transparent', +} +const STORAGE_LABEL: Record = { + cookie: 'Cookies', local_storage: 'Local Storage', + session_storage: 'Session Storage', indexeddb: 'IndexedDB', + framework_storage: 'Framework-Storage', } // Pure, testbar. export function CookieFindingList({ data }: { data: CheckData }) { const findings = data.findings || [] const s = data.summary || {} + const inv = data.storage_inventory return (
+ {inv && (inv.total ?? 0) > 0 && ( +
+ Storage-Inventar:{' '} + {inv.total} als „Cookies" gelistet →{' '} + {inv.real_cookies} echte Cookies + {(inv.other_storage ?? 0) > 0 && ( + <> + {inv.other_storage} andere Endgeräte-Speicher + )} + {inv.by_type && ( + + ({Object.entries(inv.by_type) + .map(([k, n]) => `${n} ${STORAGE_LABEL[k] || k}`) + .join(' · ')}) + + )} +
+ )}
Library-Abgleich — {findings.length} Befund{findings.length !== 1 ? 'e' : ''} diff --git a/admin-compliance/app/sdk/agent/_components/__tests__/CookieLibraryPanel.test.tsx b/admin-compliance/app/sdk/agent/_components/__tests__/CookieLibraryPanel.test.tsx index 42e6631a..c0a46559 100644 --- a/admin-compliance/app/sdk/agent/_components/__tests__/CookieLibraryPanel.test.tsx +++ b/admin-compliance/app/sdk/agent/_components/__tests__/CookieLibraryPanel.test.tsx @@ -26,4 +26,16 @@ describe('CookieFindingList', () => { render() expect(screen.getByText(/Keine Abweichungen/)).toBeInTheDocument() }) + + it('zeigt das Storage-Inventar (echte Cookies vs. andere)', () => { + render() + expect(screen.getByText(/Storage-Inventar/)).toBeInTheDocument() + expect(screen.getByText(/60 echte Cookies/)).toBeInTheDocument() + expect(screen.getByText(/40 andere Endgeräte-Speicher/)).toBeInTheDocument() + }) }) diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index 19deb426..c8e51e90 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -232,6 +232,9 @@ async def snapshot_cookie_check(snapshot_id: str): from compliance.services.cookie_library_check import ( analyze_cookies, load_big_library, ) + from compliance.services.cookie_storage_inventory import ( + build_storage_inventory, storage_transparency_finding, + ) db = SessionLocal() try: snap = load_snapshot(db, snapshot_id) @@ -241,7 +244,14 @@ async def snapshot_cookie_check(snapshot_id: str): names = [c.get("name", "") for v in vendors for c in (v.get("cookies") or [])] big = load_big_library(db, names) - return analyze_cookies(vendors, big) + out = analyze_cookies(vendors, big) + inv = build_storage_inventory(vendors) + tf = storage_transparency_finding(inv) + if tf: + out["findings"].insert(0, tf) + out["summary"]["findings"] = len(out["findings"]) + out["storage_inventory"] = inv + return out finally: db.close() diff --git a/backend-compliance/compliance/services/cookie_storage_inventory.py b/backend-compliance/compliance/services/cookie_storage_inventory.py new file mode 100644 index 00000000..05e16cc7 --- /dev/null +++ b/backend-compliance/compliance/services/cookie_storage_inventory.py @@ -0,0 +1,108 @@ +"""Storage-Inventory — trennt echte Cookies von anderem Endgeräte-Speicher. + +Viele CMPs/Scanner werfen Cookies + Local/Session Storage + IndexedDB + +Framework-Artefakte in EINE „Cookie"-Liste und erfinden Laufzeiten dazu. § 25 +TDDDG ist aber technologieneutral (alle Endgeräte-Speicher gleich). Dieses +Modul klassifiziert heuristisch (Name-Muster + Laufzeit-Text) und liefert das +Inventar + einen Transparenz-Befund. v1 ohne Scanner-Umbau; echte Erfassung +(localStorage/IndexedDB/SW) folgt im consent-tester (v2). +""" + +from __future__ import annotations + +import re + +# Salesforce-Lightning/Aura- + typische Framework-/LocalStorage-Artefakte. +_FRAMEWORK_RE = re.compile( + r"componentdefstorage|globalvalueproviders|__mutex|\blskey|\$vfrc", + re.IGNORECASE, +) +_SESSION_HINT = ("session storage", "sessionstorage") +_INDEXEDDB_HINT = ("indexeddb", "indexed db") +_LOCAL_HINT = ("local storage", "localstorage") +# „kein Ablauf"-Formulierungen → Persistenzspeicher (kein echtes Cookie). +_PERSIST_NOEXPIRY = ( + "bis es durch den nutzer", "deaktiviert wird", "bis zur löschung", + "bis zur loeschung", "vom nutzer gelöscht", "vom nutzer geloescht", +) + +STORAGE_LABELS = { + "cookie": "Cookie", + "local_storage": "Local Storage", + "session_storage": "Session Storage", + "indexeddb": "IndexedDB", + "framework_storage": "Framework-Storage", +} + + +def detect_storage_type(name: str, expiry: str = "") -> str: + """Heuristik: echtes Cookie vs. anderer Endgeräte-Speicher. + + Konservativ — im Zweifel 'cookie'. Ist eine VERMUTUNG (kein Scanner-Beleg). + """ + n = (name or "").strip() + e = (expiry or "").lower() + if _FRAMEWORK_RE.search(n): + return "framework_storage" + if any(h in e for h in _SESSION_HINT): + return "session_storage" + if any(h in e for h in _INDEXEDDB_HINT): + return "indexeddb" + if any(h in e for h in _LOCAL_HINT): + return "local_storage" + if any(h in e for h in _PERSIST_NOEXPIRY): + return "local_storage" + return "cookie" + + +def build_storage_inventory(vendors: list[dict]) -> dict: + """Zählt je Speichertyp + liefert Beispiele für Nicht-Cookies.""" + by_type: dict[str, int] = {} + examples: list[dict] = [] + for v in vendors or []: + vname = v.get("name") or "?" + for c in v.get("cookies") or []: + st = detect_storage_type(c.get("name", ""), c.get("expiry", "")) + by_type[st] = by_type.get(st, 0) + 1 + if st != "cookie" and len(examples) < 10: + examples.append({ + "name": c.get("name", ""), "type": st, "vendor": vname, + }) + total = sum(by_type.values()) + cookies = by_type.get("cookie", 0) + return { + "total": total, + "by_type": by_type, + "real_cookies": cookies, + "other_storage": total - cookies, + "examples": examples, + } + + +def storage_transparency_finding(inv: dict) -> dict | None: + """Ein Summen-Befund, wenn Nicht-Cookies als Cookies gelistet sind.""" + other = inv.get("other_storage", 0) + if other <= 0: + return None + by = inv.get("by_type", {}) + parts = ", ".join( + f"{by[k]} {STORAGE_LABELS.get(k, k)}" for k in by if k != "cookie" + ) + return { + "vendor": "—", + "cookie": f"{other} Objekte", + "type": "storage_transparency", + "severity": "MEDIUM", + "declared": f"{inv['total']} als Cookies gelistet", + "library_purpose": f"vermutlich: {parts}", + "remediation": ( + f"{other} von {inv['total']} als 'Cookie' gelisteten Objekten sind " + f"vermutlich anderer Endgeräte-Speicher ({parts}). § 25 TDDDG ist " + f"technologieneutral — Speichertechnologie + -dauer pro Objekt " + f"transparent darstellen (echtes Cookie vs. Local Storage / Framework)." + ), + "control": { + "control_id": "DATA-2851-A05", + "regulation": "TDDDG", "article": "§ 25 Abs. 1", + }, + } diff --git a/backend-compliance/compliance/tests/test_cookie_storage_inventory.py b/backend-compliance/compliance/tests/test_cookie_storage_inventory.py new file mode 100644 index 00000000..0a2a0c20 --- /dev/null +++ b/backend-compliance/compliance/tests/test_cookie_storage_inventory.py @@ -0,0 +1,46 @@ +"""Storage-Inventory: Cookie vs. anderer Endgeräte-Speicher (§25 TDDDG).""" + +from __future__ import annotations + +from compliance.services.cookie_storage_inventory import ( + build_storage_inventory, + detect_storage_type, + storage_transparency_finding, +) + + +def test_framework_artifacts_not_cookies(): + assert detect_storage_type("ComponentDefStorage__MUTEX_X") == "framework_storage" + assert detect_storage_type("GlobalValueProviders__MUTEX_Y") == "framework_storage" + assert detect_storage_type("LSKey-c$CookieConsentPolicy") == "framework_storage" + + +def test_duration_text_signals_local_storage(): + assert detect_storage_type( + "x", "Wird solange gespeichert, bis es durch den Nutzer in seinem " + "Browser deaktiviert wird.") == "local_storage" + # echte Cookies bleiben cookie: + assert detect_storage_type("_ga", "2 Jahre") == "cookie" + assert detect_storage_type("sess", "Session") == "cookie" + + +def test_inventory_counts_and_transparency_finding(): + vendors = [{"name": "Salesforce", "cookies": [ + {"name": "ComponentDefStorage__MUTEX_X", "expiry": "bis Nutzer deaktiviert"}, + {"name": "_ga", "expiry": "2 Jahre"}, + {"name": "BrowserId1", "expiry": "1 Jahr"}, + ]}] + inv = build_storage_inventory(vendors) + assert inv["total"] == 3 + assert inv["real_cookies"] == 2 # _ga + BrowserId1 + assert inv["other_storage"] == 1 # das Framework-Artefakt + tf = storage_transparency_finding(inv) + assert tf and tf["type"] == "storage_transparency" + assert "§ 25" in tf["control"]["article"] + + +def test_no_finding_when_all_real_cookies(): + inv = build_storage_inventory( + [{"name": "X", "cookies": [{"name": "_ga", "expiry": "2 Jahre"}]}]) + assert inv["other_storage"] == 0 + assert storage_transparency_finding(inv) is None