feat(cookie): Deklaration-vs-Bibliothek-Diff-Sicht + Funnel-KPI

Für die Library-getroffene Teilmesse (~32%) pro Cookie die Feld- Abweichungen deklariert→Library (Kategorie/Laufzeit/Zweck) als Diff-Karte, plus ehrlicher Funnel (gesamt → geprüft → abweichend) — nicht-getroffene Cookies sind nicht prüfbar (kein Pass/Fail), passend zur Tonalität. - analyze_cookies: 'expected'-Soll-Wert an tracker_as_necessary/ excessive_lifetime/missing_purpose (+ _CAT_LABEL_DE). - neues cookie_declaration_diff.build_declaration_diff: reine Regroup- Aggregation der Findings pro Cookie (single source = analyze_cookies), Hinweis-Typen (third_country/eu_alternative) bewusst ausgeschlossen. - cookie-check exponiert out['declaration_diff']. - CookieDeclarationDiff.tsx oben im Cookie-Tab (vor Panel/ResultView). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-11 21:00:50 +02:00
parent c35977c925
commit 403e3c66d2
7 changed files with 265 additions and 1 deletions
@@ -80,6 +80,11 @@ async def snapshot_cookie_check(snapshot_id: str):
            out["findings"].insert(0, tf)
            out["summary"]["findings"] = len(out["findings"])
        out["storage_inventory"] = inv
+        # Deklaration-vs-Bibliothek-Diff (nur die getroffene Teilmenge) + Funnel.
+        from compliance.services.cookie_declaration_diff import (
+            build_declaration_diff,
+        )
+        out["declaration_diff"] = build_declaration_diff(out)
        # ② Documentation Drift: Cookie-Richtlinie (Text) vs. Browser-Realität.
        docs = snap.get("doc_entries") or []
        cookie_text = next(
@@ -0,0 +1,70 @@
+"""Deklaration-vs-Bibliothek-Diff.
+
+Regroupt die `analyze_cookies`-Findings PRO COOKIE zu Feld-Diffs
+(deklariert → Library) — nur für die Library-getroffene Teilmenge, denn nur
+dort gibt es eine Ground-Truth. Plus ein ehrlicher Funnel (gesamt → geprüft →
+abweichend), damit nie der Eindruck entsteht, ALLE Cookies seien geprüft
+(passt zur BreakPilot-Tonalität: nicht-getroffene Cookies = nicht prüfbar,
+kein Pass, kein Fail).
+
+Single source of truth bleibt `analyze_cookies` (Erkennung); dieses Modul ist
+reine Präsentations-Aggregation und damit isoliert testbar.
+"""
+
+from __future__ import annotations
+
+# Finding-Typ → Feld-Label. Nur Typen mit echtem Library-Soll ('expected').
+# vague_duration/missing_retention/missing_opt_out haben KEINEN Library-Vergleich
+# und third_country/eu_alternative sind Hinweise → bewusst NICHT im Diff.
+_FIELD = {
+    "tracker_as_necessary": "Kategorie",
+    "excessive_lifetime": "Laufzeit",
+    "missing_purpose": "Zweck",
+}
+_SEV_ORDER = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
+
+
+def build_declaration_diff(analysis: dict) -> dict:
+    """Aus dem `analyze_cookies`-Ergebnis die Diff-Sicht + Funnel bauen."""
+    findings = analysis.get("findings") or []
+    summary = analysis.get("summary") or {}
+
+    rows: dict[tuple, dict] = {}
+    for f in findings:
+        field = _FIELD.get(f.get("type"))
+        if not field:
+            continue
+        key = (f.get("vendor") or "", f.get("cookie") or "")
+        row = rows.get(key)
+        if row is None:
+            row = {
+                "cookie": f.get("cookie") or "",
+                "vendor": f.get("vendor") or "",
+                "diffs": [],
+                "measures": [],
+                "severity": "LOW",
+            }
+            rows[key] = row
+        row["diffs"].append({
+            "field": field,
+            "declared": str(f.get("declared") or "—"),
+            "expected": str(f.get("expected") or f.get("library_purpose") or "—"),
+            "severe": f.get("severity") == "HIGH",
+        })
+        rem = f.get("remediation")
+        if rem and rem not in row["measures"]:
+            row["measures"].append(rem)
+        if _SEV_ORDER.get(f.get("severity"), 3) < _SEV_ORDER.get(row["severity"], 3):
+            row["severity"] = f.get("severity") or "LOW"
+
+    out_rows = sorted(rows.values(), key=lambda r: _SEV_ORDER.get(r["severity"], 3))
+    total = int(summary.get("checked") or 0)        # alle Cookies
+    checked = int(summary.get("in_library") or 0)   # davon mit Library-Treffer
+    return {
+        "coverage": {
+            "total": total,
+            "checked": checked,
+            "discrepant": len(out_rows),
+        },
+        "rows": out_rows,
+    }
@@ -24,6 +24,12 @@ from sqlalchemy import text
 from compliance.services.cookie_knowledge_db import lookup_cookie

 _TRACKER_CATS = {"marketing", "statistics", "social_media", "targeting"}
+# Library-Kategorie → deutsches Label (für die Deklaration-vs-Library-Diff-Sicht).
+_CAT_LABEL_DE = {
+    "marketing": "Marketing", "statistics": "Statistik",
+    "social_media": "Social Media", "targeting": "Targeting",
+    "functional": "Funktional", "necessary": "Notwendig",
+}

 # A — auditfeste Verdrahtung: jeder Befund-Typ → echter Control (control_id aus
 # doc_check_controls) + legal_basis. Die Controls tragen regulation/article noch
@@ -238,7 +244,9 @@ def analyze_cookies(vendors: list[dict], big_lib: dict | None = None) -> dict:
                findings.append({
                    "vendor": vname, "cookie": name, "type": "tracker_as_necessary",
                    "severity": "HIGH" if rich.get("reid_risk") == "high" else "MEDIUM",
-                    "declared": vcat_label, "library_purpose": purpose,
+                    "declared": vcat_label,
+                    "expected": _CAT_LABEL_DE.get(actual_cat, "einwilligungspflichtig"),
+                    "library_purpose": purpose,
                    "remediation": rem + ".",
                })
            # 2) Kein Zweck deklariert, Library kennt ihn.
@@ -246,6 +254,7 @@ def analyze_cookies(vendors: list[dict], big_lib: dict | None = None) -> dict:
                findings.append({
                    "vendor": vname, "cookie": name, "type": "missing_purpose",
                    "severity": "MEDIUM", "declared": "(kein Zweck angegeben)",
+                    "expected": purpose,
                    "library_purpose": purpose,
                    "remediation": f"Zweck für '{name}' ergänzen. Laut Library: {purpose}",
                })
@@ -264,6 +273,7 @@ def analyze_cookies(vendors: list[dict], big_lib: dict | None = None) -> dict:
                    "vendor": vname, "cookie": name, "type": "excessive_lifetime",
                    "severity": "LOW",
                    "declared": c.get("expiry", "") or "—",
+                    "expected": typ,
                    "library_purpose": f"typisch: {typ}",
                    "remediation": (
                        f"Speicherdauer von '{name}' ({c.get('expiry', '')}) "
@@ -0,0 +1,39 @@
+"""Deklaration-vs-Bibliothek-Diff: Regroup pro Cookie + Funnel."""
+
+from __future__ import annotations
+
+from compliance.services.cookie_declaration_diff import build_declaration_diff
+
+
+def test_groups_diffs_per_cookie_with_funnel():
+    analysis = {
+        "summary": {"checked": 100, "in_library": 32, "findings": 4},
+        "findings": [
+            {"vendor": "Google", "cookie": "_ga", "type": "tracker_as_necessary",
+             "severity": "HIGH", "declared": "notwendig", "expected": "Marketing",
+             "remediation": "Als § 25 einstufen."},
+            {"vendor": "Google", "cookie": "_ga", "type": "excessive_lifetime",
+             "severity": "LOW", "declared": "Session", "expected": "730 Tage",
+             "remediation": "Laufzeit prüfen."},
+            {"vendor": "Meta", "cookie": "_fbp", "type": "missing_purpose",
+             "severity": "MEDIUM", "declared": "(kein Zweck)", "expected": "Tracking",
+             "remediation": "Zweck ergänzen."},
+            # Hinweis-Typ ohne Library-Soll → NICHT im Diff.
+            {"vendor": "Google", "cookie": "_ga", "type": "third_country",
+             "severity": "MEDIUM", "declared": "US"},
+        ],
+    }
+    out = build_declaration_diff(analysis)
+    assert out["coverage"] == {"total": 100, "checked": 32, "discrepant": 2}
+    ga = next(r for r in out["rows"] if r["cookie"] == "_ga")
+    assert {d["field"] for d in ga["diffs"]} == {"Kategorie", "Laufzeit"}
+    assert ga["severity"] == "HIGH"                 # höchste der beiden
+    assert ga["diffs"][0]["severe"] is True
+    assert out["rows"][0]["cookie"] == "_ga"        # HIGH zuerst sortiert
+
+
+def test_no_discrepancies_yields_empty_rows():
+    out = build_declaration_diff(
+        {"summary": {"checked": 10, "in_library": 4}, "findings": []})
+    assert out["coverage"] == {"total": 10, "checked": 4, "discrepant": 0}
+    assert out["rows"] == []