fix(snapshot): Cookie-Dedup + schneller Impressum-Tab + Tabellen-Zahl
- Cookies werden je Vendor nach Name dedupliziert (Consent-Phasen-Dubletten; BMW 2196 → ~772) — in cookie-check + get_snapshot, behebt aufgeblähte Kachel-/Finding-Zahlen. - Impressum-Snapshot-Check überspringt den ~40s-LLM-Schritt (context skip_llm) → Tab lädt sofort statt leer zu bleiben. - Vendor-Tabelle zeigt nur die Cookie-Zahl (kein 'Cookies'-Wort je Zeile). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -140,7 +140,7 @@ function VendorRow({ v, lib }: { v: SnapshotVendor; lib?: LibCategories }) {
|
|||||||
{v.country}
|
{v.country}
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
<span className="text-gray-500 w-16 text-right">{cookies.length} Cookies</span>
|
<span className="text-gray-500 w-12 text-right" title="Cookies">{cookies.length}</span>
|
||||||
<span className={`w-10 text-right font-semibold ${scoreColor(v.compliance_score)}`}>
|
<span className={`w-10 text-right font-semibold ${scoreColor(v.compliance_score)}`}>
|
||||||
{v.compliance_score != null ? `${v.compliance_score}%` : '—'}
|
{v.compliance_score != null ? `${v.compliance_score}%` : '—'}
|
||||||
</span>
|
</span>
|
||||||
|
|||||||
@@ -75,7 +75,9 @@ def doc_input_from_snapshot(snap: dict, doc_type: str) -> dict | None:
|
|||||||
"business_scope": scope,
|
"business_scope": scope,
|
||||||
"company_name": (profile.get("company_name") or snap.get("site_label") or ""),
|
"company_name": (profile.get("company_name") or snap.get("site_label") or ""),
|
||||||
"origin_domain": snap.get("site_domain", ""),
|
"origin_domain": snap.get("site_domain", ""),
|
||||||
"context": {"scan_context": snap.get("scan_context") or {}},
|
# skip_llm: Snapshot-Ansicht ist interaktiv → kein ~40s-LLM-Schritt.
|
||||||
|
"context": {"scan_context": snap.get("scan_context") or {},
|
||||||
|
"skip_llm": True},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -218,6 +218,11 @@ async def get_snapshot(snapshot_id: str):
|
|||||||
snap = load_snapshot(db, snapshot_id)
|
snap = load_snapshot(db, snapshot_id)
|
||||||
if not snap:
|
if not snap:
|
||||||
raise HTTPException(status_code=404, detail="snapshot not found")
|
raise HTTPException(status_code=404, detail="snapshot not found")
|
||||||
|
if snap.get("cmp_vendors"):
|
||||||
|
from compliance.services.cookie_storage_inventory import (
|
||||||
|
dedupe_vendor_cookies,
|
||||||
|
)
|
||||||
|
snap["cmp_vendors"] = dedupe_vendor_cookies(snap["cmp_vendors"])
|
||||||
return snap
|
return snap
|
||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
|
|||||||
@@ -58,6 +58,7 @@ async def snapshot_cookie_check(snapshot_id: str):
|
|||||||
)
|
)
|
||||||
from compliance.services.cookie_storage_inventory import (
|
from compliance.services.cookie_storage_inventory import (
|
||||||
build_storage_inventory, storage_transparency_finding,
|
build_storage_inventory, storage_transparency_finding,
|
||||||
|
dedupe_vendor_cookies,
|
||||||
)
|
)
|
||||||
from compliance.services.cookie_compliance_audit import (
|
from compliance.services.cookie_compliance_audit import (
|
||||||
audit_cookie_compliance,
|
audit_cookie_compliance,
|
||||||
@@ -67,7 +68,8 @@ async def snapshot_cookie_check(snapshot_id: str):
|
|||||||
snap = load_snapshot(db, snapshot_id)
|
snap = load_snapshot(db, snapshot_id)
|
||||||
if not snap:
|
if not snap:
|
||||||
raise HTTPException(status_code=404, detail="snapshot not found")
|
raise HTTPException(status_code=404, detail="snapshot not found")
|
||||||
vendors = snap.get("cmp_vendors") or []
|
# Consent-Phasen duplizieren Cookies → je Vendor nach Name deduplizieren.
|
||||||
|
vendors = dedupe_vendor_cookies(snap.get("cmp_vendors") or [])
|
||||||
names = [c.get("name", "")
|
names = [c.get("name", "")
|
||||||
for v in vendors for c in (v.get("cookies") or [])]
|
for v in vendors for c in (v.get("cookies") or [])]
|
||||||
big = load_big_library(db, names)
|
big = load_big_library(db, names)
|
||||||
|
|||||||
@@ -35,6 +35,28 @@ STORAGE_LABELS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def dedupe_vendor_cookies(vendors: list[dict]) -> list[dict]:
|
||||||
|
"""Cookies tauchen je Vendor mehrfach auf (Consent-Phasen before_consent /
|
||||||
|
after_accept / after_reject derselben Crawl-Session). Dedupliziert je Vendor
|
||||||
|
nach (lower) Name — behält den ersten. Behebt aufgeblähte Cookie-/Finding-
|
||||||
|
Zahlen (BMW: 2196 → ~772 eindeutig)."""
|
||||||
|
out: list[dict] = []
|
||||||
|
for v in vendors or []:
|
||||||
|
seen: set[str] = set()
|
||||||
|
uniq: list[dict] = []
|
||||||
|
for c in (v.get("cookies") or []):
|
||||||
|
n = (c.get("name") or "").strip().lower()
|
||||||
|
if n and n in seen:
|
||||||
|
continue
|
||||||
|
if n:
|
||||||
|
seen.add(n)
|
||||||
|
uniq.append(c)
|
||||||
|
nv = dict(v)
|
||||||
|
nv["cookies"] = uniq
|
||||||
|
out.append(nv)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def detect_storage_type(name: str, expiry: str = "") -> str:
|
def detect_storage_type(name: str, expiry: str = "") -> str:
|
||||||
"""Heuristik: echtes Cookie vs. anderer Endgeräte-Speicher.
|
"""Heuristik: echtes Cookie vs. anderer Endgeräte-Speicher.
|
||||||
|
|
||||||
|
|||||||
@@ -259,6 +259,9 @@ class ImpressumAgent(BaseSpecialistAgent):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# ── Layer 3: Semantic-Validator nur für HIGH/MEDIUM-Fails ──
|
# ── Layer 3: Semantic-Validator nur für HIGH/MEDIUM-Fails ──
|
||||||
|
# In der interaktiven Snapshot-Ansicht (context skip_llm) übersprungen:
|
||||||
|
# der LLM-Schritt kostet ~40s, das deterministische Ergebnis genügt dort.
|
||||||
|
if not (agent_input.context or {}).get("skip_llm"):
|
||||||
await self._semantic_demote(text, findings, coverage)
|
await self._semantic_demote(text, findings, coverage)
|
||||||
|
|
||||||
# Confidence: harmonic mean der Findings (oder hoch wenn 0)
|
# Confidence: harmonic mean der Findings (oder hoch wenn 0)
|
||||||
|
|||||||
@@ -4,11 +4,24 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from compliance.services.cookie_storage_inventory import (
|
from compliance.services.cookie_storage_inventory import (
|
||||||
build_storage_inventory,
|
build_storage_inventory,
|
||||||
|
dedupe_vendor_cookies,
|
||||||
detect_storage_type,
|
detect_storage_type,
|
||||||
storage_transparency_finding,
|
storage_transparency_finding,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_dedupe_vendor_cookies_removes_consent_phase_dupes():
|
||||||
|
vendors = [{"name": "V", "cookies": [
|
||||||
|
{"name": "_ga", "expiry": "2 Jahre"},
|
||||||
|
{"name": "_ga", "expiry": "2 Jahre"}, # before/after_accept-Dublette
|
||||||
|
{"name": "_GA", "expiry": "2 Jahre"}, # case-insensitiv
|
||||||
|
{"name": "sid", "expiry": "Session"},
|
||||||
|
]}]
|
||||||
|
out = dedupe_vendor_cookies(vendors)
|
||||||
|
names = [c["name"] for c in out[0]["cookies"]]
|
||||||
|
assert names == ["_ga", "sid"] # 4 → 2 eindeutig
|
||||||
|
|
||||||
|
|
||||||
def test_framework_artifacts_not_cookies():
|
def test_framework_artifacts_not_cookies():
|
||||||
assert detect_storage_type("ComponentDefStorage__MUTEX_X") == "framework_storage"
|
assert detect_storage_type("ComponentDefStorage__MUTEX_X") == "framework_storage"
|
||||||
assert detect_storage_type("GlobalValueProviders__MUTEX_Y") == "framework_storage"
|
assert detect_storage_type("GlobalValueProviders__MUTEX_Y") == "framework_storage"
|
||||||
|
|||||||
Reference in New Issue
Block a user