feat(cookie): ① Storage Inventory + storage_transparency-Finding

Trennt echte Cookies von anderem Endgeraete-Speicher (Local/Session Storage,
IndexedDB, Salesforce-Framework-Artefakte) — § 25 TDDDG ist technologieneutral.
- cookie_storage_inventory: detect_storage_type (Name-Muster ComponentDefStorage/
  __MUTEX/LSKey + Laufzeit-Text) + build_storage_inventory + storage_transparency-
  Summenbefund ('X als Cookie gelistet -> Y echte + Z andere').
- Endpoint cookie-check liefert storage_inventory; Frontend zeigt den Breakdown.

Tests: 4 + Frontend-Vitest gruen. Differenzierungsmerkmal: '740 -> 132 + 608'.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-11 09:05:29 +02:00
parent 577ceae4e6
commit 289988d23e
5 changed files with 207 additions and 1 deletions
@@ -232,6 +232,9 @@ async def snapshot_cookie_check(snapshot_id: str):
from compliance.services.cookie_library_check import (
analyze_cookies, load_big_library,
)
from compliance.services.cookie_storage_inventory import (
build_storage_inventory, storage_transparency_finding,
)
db = SessionLocal()
try:
snap = load_snapshot(db, snapshot_id)
@@ -241,7 +244,14 @@ async def snapshot_cookie_check(snapshot_id: str):
names = [c.get("name", "")
for v in vendors for c in (v.get("cookies") or [])]
big = load_big_library(db, names)
return analyze_cookies(vendors, big)
out = analyze_cookies(vendors, big)
inv = build_storage_inventory(vendors)
tf = storage_transparency_finding(inv)
if tf:
out["findings"].insert(0, tf)
out["summary"]["findings"] = len(out["findings"])
out["storage_inventory"] = inv
return out
finally:
db.close()
@@ -0,0 +1,108 @@
"""Storage-Inventory — trennt echte Cookies von anderem Endgeräte-Speicher.
Viele CMPs/Scanner werfen Cookies + Local/Session Storage + IndexedDB +
Framework-Artefakte in EINE „Cookie"-Liste und erfinden Laufzeiten dazu. § 25
TDDDG ist aber technologieneutral (alle Endgeräte-Speicher gleich). Dieses
Modul klassifiziert heuristisch (Name-Muster + Laufzeit-Text) und liefert das
Inventar + einen Transparenz-Befund. v1 ohne Scanner-Umbau; echte Erfassung
(localStorage/IndexedDB/SW) folgt im consent-tester (v2).
"""
from __future__ import annotations
import re
# Salesforce-Lightning/Aura- + typische Framework-/LocalStorage-Artefakte.
_FRAMEWORK_RE = re.compile(
r"componentdefstorage|globalvalueproviders|__mutex|\blskey|\$vfrc",
re.IGNORECASE,
)
_SESSION_HINT = ("session storage", "sessionstorage")
_INDEXEDDB_HINT = ("indexeddb", "indexed db")
_LOCAL_HINT = ("local storage", "localstorage")
# „kein Ablauf"-Formulierungen → Persistenzspeicher (kein echtes Cookie).
_PERSIST_NOEXPIRY = (
"bis es durch den nutzer", "deaktiviert wird", "bis zur löschung",
"bis zur loeschung", "vom nutzer gelöscht", "vom nutzer geloescht",
)
STORAGE_LABELS = {
"cookie": "Cookie",
"local_storage": "Local Storage",
"session_storage": "Session Storage",
"indexeddb": "IndexedDB",
"framework_storage": "Framework-Storage",
}
def detect_storage_type(name: str, expiry: str = "") -> str:
"""Heuristik: echtes Cookie vs. anderer Endgeräte-Speicher.
Konservativ — im Zweifel 'cookie'. Ist eine VERMUTUNG (kein Scanner-Beleg).
"""
n = (name or "").strip()
e = (expiry or "").lower()
if _FRAMEWORK_RE.search(n):
return "framework_storage"
if any(h in e for h in _SESSION_HINT):
return "session_storage"
if any(h in e for h in _INDEXEDDB_HINT):
return "indexeddb"
if any(h in e for h in _LOCAL_HINT):
return "local_storage"
if any(h in e for h in _PERSIST_NOEXPIRY):
return "local_storage"
return "cookie"
def build_storage_inventory(vendors: list[dict]) -> dict:
"""Zählt je Speichertyp + liefert Beispiele für Nicht-Cookies."""
by_type: dict[str, int] = {}
examples: list[dict] = []
for v in vendors or []:
vname = v.get("name") or "?"
for c in v.get("cookies") or []:
st = detect_storage_type(c.get("name", ""), c.get("expiry", ""))
by_type[st] = by_type.get(st, 0) + 1
if st != "cookie" and len(examples) < 10:
examples.append({
"name": c.get("name", ""), "type": st, "vendor": vname,
})
total = sum(by_type.values())
cookies = by_type.get("cookie", 0)
return {
"total": total,
"by_type": by_type,
"real_cookies": cookies,
"other_storage": total - cookies,
"examples": examples,
}
def storage_transparency_finding(inv: dict) -> dict | None:
"""Ein Summen-Befund, wenn Nicht-Cookies als Cookies gelistet sind."""
other = inv.get("other_storage", 0)
if other <= 0:
return None
by = inv.get("by_type", {})
parts = ", ".join(
f"{by[k]} {STORAGE_LABELS.get(k, k)}" for k in by if k != "cookie"
)
return {
"vendor": "",
"cookie": f"{other} Objekte",
"type": "storage_transparency",
"severity": "MEDIUM",
"declared": f"{inv['total']} als Cookies gelistet",
"library_purpose": f"vermutlich: {parts}",
"remediation": (
f"{other} von {inv['total']} als 'Cookie' gelisteten Objekten sind "
f"vermutlich anderer Endgeräte-Speicher ({parts}). § 25 TDDDG ist "
f"technologieneutral — Speichertechnologie + -dauer pro Objekt "
f"transparent darstellen (echtes Cookie vs. Local Storage / Framework)."
),
"control": {
"control_id": "DATA-2851-A05",
"regulation": "TDDDG", "article": "§ 25 Abs. 1",
},
}
@@ -0,0 +1,46 @@
"""Storage-Inventory: Cookie vs. anderer Endgeräte-Speicher (§25 TDDDG)."""
from __future__ import annotations
from compliance.services.cookie_storage_inventory import (
build_storage_inventory,
detect_storage_type,
storage_transparency_finding,
)
def test_framework_artifacts_not_cookies():
assert detect_storage_type("ComponentDefStorage__MUTEX_X") == "framework_storage"
assert detect_storage_type("GlobalValueProviders__MUTEX_Y") == "framework_storage"
assert detect_storage_type("LSKey-c$CookieConsentPolicy") == "framework_storage"
def test_duration_text_signals_local_storage():
assert detect_storage_type(
"x", "Wird solange gespeichert, bis es durch den Nutzer in seinem "
"Browser deaktiviert wird.") == "local_storage"
# echte Cookies bleiben cookie:
assert detect_storage_type("_ga", "2 Jahre") == "cookie"
assert detect_storage_type("sess", "Session") == "cookie"
def test_inventory_counts_and_transparency_finding():
vendors = [{"name": "Salesforce", "cookies": [
{"name": "ComponentDefStorage__MUTEX_X", "expiry": "bis Nutzer deaktiviert"},
{"name": "_ga", "expiry": "2 Jahre"},
{"name": "BrowserId1", "expiry": "1 Jahr"},
]}]
inv = build_storage_inventory(vendors)
assert inv["total"] == 3
assert inv["real_cookies"] == 2 # _ga + BrowserId1
assert inv["other_storage"] == 1 # das Framework-Artefakt
tf = storage_transparency_finding(inv)
assert tf and tf["type"] == "storage_transparency"
assert "§ 25" in tf["control"]["article"]
def test_no_finding_when_all_real_cookies():
inv = build_storage_inventory(
[{"name": "X", "cookies": [{"name": "_ga", "expiry": "2 Jahre"}]}])
assert inv["other_storage"] == 0
assert storage_transparency_finding(inv) is None