diff --git a/admin-compliance/app/sdk/agent/_components/CookieResultView.tsx b/admin-compliance/app/sdk/agent/_components/CookieResultView.tsx index 8eb6512e..15c45ef1 100644 --- a/admin-compliance/app/sdk/agent/_components/CookieResultView.tsx +++ b/admin-compliance/app/sdk/agent/_components/CookieResultView.tsx @@ -140,7 +140,7 @@ function VendorRow({ v, lib }: { v: SnapshotVendor; lib?: LibCategories }) { {v.country} )} - {cookies.length} Cookies + {cookies.length} {v.compliance_score != null ? `${v.compliance_score}%` : '—'} diff --git a/backend-compliance/compliance/api/agent_check/_agent_outputs.py b/backend-compliance/compliance/api/agent_check/_agent_outputs.py index e8db550b..0ce66f7c 100644 --- a/backend-compliance/compliance/api/agent_check/_agent_outputs.py +++ b/backend-compliance/compliance/api/agent_check/_agent_outputs.py @@ -75,7 +75,9 @@ def doc_input_from_snapshot(snap: dict, doc_type: str) -> dict | None: "business_scope": scope, "company_name": (profile.get("company_name") or snap.get("site_label") or ""), "origin_domain": snap.get("site_domain", ""), - "context": {"scan_context": snap.get("scan_context") or {}}, + # skip_llm: Snapshot-Ansicht ist interaktiv → kein ~40s-LLM-Schritt. + "context": {"scan_context": snap.get("scan_context") or {}, + "skip_llm": True}, } diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index 6cdeacf5..c8b0e039 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -218,6 +218,11 @@ async def get_snapshot(snapshot_id: str): snap = load_snapshot(db, snapshot_id) if not snap: raise HTTPException(status_code=404, detail="snapshot not found") + if snap.get("cmp_vendors"): + from compliance.services.cookie_storage_inventory import ( + dedupe_vendor_cookies, + ) + snap["cmp_vendors"] = dedupe_vendor_cookies(snap["cmp_vendors"]) return snap finally: db.close() diff --git a/backend-compliance/compliance/api/snapshot_check_routes.py b/backend-compliance/compliance/api/snapshot_check_routes.py index be7874f5..c7dc7088 100644 --- a/backend-compliance/compliance/api/snapshot_check_routes.py +++ b/backend-compliance/compliance/api/snapshot_check_routes.py @@ -58,6 +58,7 @@ async def snapshot_cookie_check(snapshot_id: str): ) from compliance.services.cookie_storage_inventory import ( build_storage_inventory, storage_transparency_finding, + dedupe_vendor_cookies, ) from compliance.services.cookie_compliance_audit import ( audit_cookie_compliance, @@ -67,7 +68,8 @@ async def snapshot_cookie_check(snapshot_id: str): snap = load_snapshot(db, snapshot_id) if not snap: raise HTTPException(status_code=404, detail="snapshot not found") - vendors = snap.get("cmp_vendors") or [] + # Consent-Phasen duplizieren Cookies → je Vendor nach Name deduplizieren. + vendors = dedupe_vendor_cookies(snap.get("cmp_vendors") or []) names = [c.get("name", "") for v in vendors for c in (v.get("cookies") or [])] big = load_big_library(db, names) diff --git a/backend-compliance/compliance/services/cookie_storage_inventory.py b/backend-compliance/compliance/services/cookie_storage_inventory.py index 05e16cc7..0abf9d9e 100644 --- a/backend-compliance/compliance/services/cookie_storage_inventory.py +++ b/backend-compliance/compliance/services/cookie_storage_inventory.py @@ -35,6 +35,28 @@ STORAGE_LABELS = { } +def dedupe_vendor_cookies(vendors: list[dict]) -> list[dict]: + """Cookies tauchen je Vendor mehrfach auf (Consent-Phasen before_consent / + after_accept / after_reject derselben Crawl-Session). Dedupliziert je Vendor + nach (lower) Name — behält den ersten. Behebt aufgeblähte Cookie-/Finding- + Zahlen (BMW: 2196 → ~772 eindeutig).""" + out: list[dict] = [] + for v in vendors or []: + seen: set[str] = set() + uniq: list[dict] = [] + for c in (v.get("cookies") or []): + n = (c.get("name") or "").strip().lower() + if n and n in seen: + continue + if n: + seen.add(n) + uniq.append(c) + nv = dict(v) + nv["cookies"] = uniq + out.append(nv) + return out + + def detect_storage_type(name: str, expiry: str = "") -> str: """Heuristik: echtes Cookie vs. anderer Endgeräte-Speicher. diff --git a/backend-compliance/compliance/services/specialist_agents/impressum/agent.py b/backend-compliance/compliance/services/specialist_agents/impressum/agent.py index 5b692014..330884fe 100644 --- a/backend-compliance/compliance/services/specialist_agents/impressum/agent.py +++ b/backend-compliance/compliance/services/specialist_agents/impressum/agent.py @@ -259,7 +259,10 @@ class ImpressumAgent(BaseSpecialistAgent): ) # ── Layer 3: Semantic-Validator nur für HIGH/MEDIUM-Fails ── - await self._semantic_demote(text, findings, coverage) + # In der interaktiven Snapshot-Ansicht (context skip_llm) übersprungen: + # der LLM-Schritt kostet ~40s, das deterministische Ergebnis genügt dort. + if not (agent_input.context or {}).get("skip_llm"): + await self._semantic_demote(text, findings, coverage) # Confidence: harmonic mean der Findings (oder hoch wenn 0) confs = [f.confidence for f in findings if f.confidence] or [0.95] diff --git a/backend-compliance/compliance/tests/test_cookie_storage_inventory.py b/backend-compliance/compliance/tests/test_cookie_storage_inventory.py index 0a2a0c20..34cb41a4 100644 --- a/backend-compliance/compliance/tests/test_cookie_storage_inventory.py +++ b/backend-compliance/compliance/tests/test_cookie_storage_inventory.py @@ -4,11 +4,24 @@ from __future__ import annotations from compliance.services.cookie_storage_inventory import ( build_storage_inventory, + dedupe_vendor_cookies, detect_storage_type, storage_transparency_finding, ) +def test_dedupe_vendor_cookies_removes_consent_phase_dupes(): + vendors = [{"name": "V", "cookies": [ + {"name": "_ga", "expiry": "2 Jahre"}, + {"name": "_ga", "expiry": "2 Jahre"}, # before/after_accept-Dublette + {"name": "_GA", "expiry": "2 Jahre"}, # case-insensitiv + {"name": "sid", "expiry": "Session"}, + ]}] + out = dedupe_vendor_cookies(vendors) + names = [c["name"] for c in out[0]["cookies"]] + assert names == ["_ga", "sid"] # 4 → 2 eindeutig + + def test_framework_artifacts_not_cookies(): assert detect_storage_type("ComponentDefStorage__MUTEX_X") == "framework_storage" assert detect_storage_type("GlobalValueProviders__MUTEX_Y") == "framework_storage"