feat(cookie+routing): Storage-Typ-Filter + legal_notice capture-only

#3 Storage-Filter: cookie-check exponiert per-Cookie-Speichertyp
(storage_inventory.per_cookie); CookieResultView bekommt Filter-Chips
(Cookie/Local Storage/Framework …) + eine Speicher-Spalte, Anbieter ohne
passenden Treffer werden ausgeblendet, KPI zeigt gefilterte Zahl.

A-Routing: legal_notice ist jetzt ein kanonischer Doc-Type. Eigene
Discovery-Regel (legal-disclaimer/rechtlicher-hinweis) VOR impressum →
die Disclaimer-Seite wird nicht mehr als Impressum substituiert (Ursache,
dass die Cross-Doc-Reconciliation nie zündete). capture-only: als
doc_entry für B persistiert, aber nicht einzeln gescort (keine 0%-Noise,
da ohne eigene Checkliste). Im Scan-Form als Option auswählbar.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-11 20:45:18 +02:00
parent 0f6cdc93fd
commit 97e39579d5
8 changed files with 137 additions and 11 deletions
@@ -42,6 +42,26 @@ interface Snapshot {
// name_lower → tatsächliche Kategorie laut Library (aus /cookie-check). // name_lower → tatsächliche Kategorie laut Library (aus /cookie-check).
export type LibCategories = Record<string, string> export type LibCategories = Record<string, string>
// name_lower → Speichertyp (cookie | local_storage | framework_storage | …).
export type StorageTypes = Record<string, string>
const STORAGE_LABEL: Record<string, string> = {
cookie: 'Cookie', local_storage: 'Local Storage',
session_storage: 'Session Storage', indexeddb: 'IndexedDB',
framework_storage: 'Framework',
}
const STORAGE_COLOR: Record<string, string> = {
cookie: 'bg-gray-100 text-gray-500',
local_storage: 'bg-purple-100 text-purple-700',
session_storage: 'bg-indigo-100 text-indigo-700',
indexeddb: 'bg-cyan-100 text-cyan-700',
framework_storage: 'bg-orange-100 text-orange-700',
}
const STORAGE_ORDER = ['cookie', 'local_storage', 'session_storage', 'indexeddb', 'framework_storage']
function storageOf(name: string, st?: StorageTypes): string {
return st?.[(name || '').toLowerCase()] || 'cookie'
}
const ROLE_LABEL: Record<string, string> = { const ROLE_LABEL: Record<string, string> = {
unknown: 'Unbekannt', ad_pixel: 'Werbe-Pixel', auth_token: 'Auth-Token', unknown: 'Unbekannt', ad_pixel: 'Werbe-Pixel', auth_token: 'Auth-Token',
@@ -116,9 +136,14 @@ function Tile({ label, value, tone }: { label: string; value: React.ReactNode; t
) )
} }
function VendorRow({ v, lib }: { v: SnapshotVendor; lib?: LibCategories }) { function VendorRow(
{ v, lib, st, sf }:
{ v: SnapshotVendor; lib?: LibCategories; st?: StorageTypes; sf: string },
) {
const [open, setOpen] = useState(false) const [open, setOpen] = useState(false)
const cookies = v.cookies || [] const cookies = sf
? (v.cookies || []).filter(c => storageOf(c.name, st) === sf)
: (v.cookies || [])
const cat = (v.category || '').toLowerCase() const cat = (v.category || '').toLowerCase()
const declaredCanon = canonCat(v.category) const declaredCanon = canonCat(v.category)
const drittland = !!v.country && !EEA.has((v.country || '').toUpperCase()) const drittland = !!v.country && !EEA.has((v.country || '').toUpperCase())
@@ -151,6 +176,7 @@ function VendorRow({ v, lib }: { v: SnapshotVendor; lib?: LibCategories }) {
<thead className="text-gray-400"> <thead className="text-gray-400">
<tr> <tr>
<th className="px-2 py-1 text-left font-normal">Cookie</th> <th className="px-2 py-1 text-left font-normal">Cookie</th>
<th className="px-2 py-1 text-left font-normal">Speicher</th>
<th className="px-2 py-1 text-left font-normal">Rolle</th> <th className="px-2 py-1 text-left font-normal">Rolle</th>
<th className="px-2 py-1 text-left font-normal">Zweck</th> <th className="px-2 py-1 text-left font-normal">Zweck</th>
<th className="px-2 py-1 text-left font-normal">Laufzeit</th> <th className="px-2 py-1 text-left font-normal">Laufzeit</th>
@@ -172,6 +198,16 @@ function VendorRow({ v, lib }: { v: SnapshotVendor; lib?: LibCategories }) {
</span> </span>
)} )}
</td> </td>
<td className="px-2 py-1 w-24">
{(() => {
const t = storageOf(c.name, st)
return t !== 'cookie' ? (
<span className={`px-1 py-0.5 rounded text-[9px] ${STORAGE_COLOR[t]}`}>
{STORAGE_LABEL[t] || t}
</span>
) : <span className="text-gray-300 text-[10px]">Cookie</span>
})()}
</td>
<td className="px-2 py-1 text-gray-500 w-24"> <td className="px-2 py-1 text-gray-500 w-24">
{c.functional_role && c.functional_role !== 'unknown' {c.functional_role && c.functional_role !== 'unknown'
? (ROLE_LABEL[c.functional_role] || c.functional_role) ? (ROLE_LABEL[c.functional_role] || c.functional_role)
@@ -195,11 +231,26 @@ function VendorRow({ v, lib }: { v: SnapshotVendor; lib?: LibCategories }) {
} }
export function CookieResultView( export function CookieResultView(
{ snapshot, cookieCategories }: { snapshot, cookieCategories, storageTypes }:
{ snapshot: Snapshot; cookieCategories?: LibCategories }, { snapshot: Snapshot; cookieCategories?: LibCategories; storageTypes?: StorageTypes },
) { ) {
const vendors = snapshot.cmp_vendors || [] const vendors = snapshot.cmp_vendors || []
const [viewMode, setViewMode] = useState<'role' | 'category'>('role') const [viewMode, setViewMode] = useState<'role' | 'category'>('role')
const [storageFilter, setStorageFilter] = useState('')
// Speichertyp-Verteilung über alle Cookies (für die Filter-Chips + Zähler).
const storagePresent = useMemo(() => {
const counts: Record<string, number> = {}
for (const v of vendors)
for (const c of v.cookies || []) {
const t = storageOf(c.name, storageTypes)
counts[t] = (counts[t] || 0) + 1
}
return counts
}, [vendors, storageTypes])
const matchesSF = (v: SnapshotVendor) =>
!storageFilter || (v.cookies || []).some(c => storageOf(c.name, storageTypes) === storageFilter)
const stats = useMemo(() => { const stats = useMemo(() => {
const cookies = vendors.reduce((n, v) => n + (v.cookies?.length || 0), 0) const cookies = vendors.reduce((n, v) => n + (v.cookies?.length || 0), 0)
@@ -220,7 +271,7 @@ export function CookieResultView(
(a.compliance_score ?? 100) - (b.compliance_score ?? 100) (a.compliance_score ?? 100) - (b.compliance_score ?? 100)
if (viewMode === 'category') { if (viewMode === 'category') {
return CATEGORY_GROUPS return CATEGORY_GROUPS
.map(g => ({ ...g, vendors: vendors.filter(v => canonCat(v.category) === g.key).sort(sortByScore) })) .map(g => ({ ...g, vendors: vendors.filter(v => canonCat(v.category) === g.key).filter(matchesSF).sort(sortByScore) }))
.filter(g => g.vendors.length > 0) .filter(g => g.vendors.length > 0)
} }
return GROUPS return GROUPS
@@ -228,10 +279,11 @@ export function CookieResultView(
...g, ...g,
vendors: vendors vendors: vendors
.filter(v => GROUPS.find(gg => gg.test((v.recipient_type || '').toUpperCase()))?.key === g.key) .filter(v => GROUPS.find(gg => gg.test((v.recipient_type || '').toUpperCase()))?.key === g.key)
.filter(matchesSF)
.sort(sortByScore), .sort(sortByScore),
})) }))
.filter(g => g.vendors.length > 0) .filter(g => g.vendors.length > 0)
}, [vendors, viewMode]) }, [vendors, viewMode, storageFilter, storageTypes])
const toggleBtn = (mode: 'role' | 'category', label: string) => ( const toggleBtn = (mode: 'role' | 'category', label: string) => (
<button <button
@@ -263,12 +315,37 @@ export function CookieResultView(
<div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-5 gap-3"> <div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-5 gap-3">
<Tile label="Anbieter" value={vendors.length} tone="text-gray-800" /> <Tile label="Anbieter" value={vendors.length} tone="text-gray-800" />
<Tile label="Cookies gesamt" value={stats.cookies} tone="text-gray-800" /> <Tile
label={storageFilter ? `${STORAGE_LABEL[storageFilter] || storageFilter} (gefiltert)` : 'Cookies gesamt'}
value={storageFilter ? (storagePresent[storageFilter] || 0) : stats.cookies}
tone="text-gray-800"
/>
<Tile label="Marketing-Anbieter" value={stats.marketing} tone={stats.marketing > 0 ? 'text-red-700' : 'text-gray-800'} /> <Tile label="Marketing-Anbieter" value={stats.marketing} tone={stats.marketing > 0 ? 'text-red-700' : 'text-gray-800'} />
<Tile label="Drittland (außerhalb EWR)" value={stats.drittland} tone={stats.drittland > 0 ? 'text-amber-700' : 'text-gray-800'} /> <Tile label="Drittland (außerhalb EWR)" value={stats.drittland} tone={stats.drittland > 0 ? 'text-amber-700' : 'text-gray-800'} />
<Tile label="Falsch einsortiert (lt. Library)" value={stats.misplaced} tone={stats.misplaced > 0 ? 'text-red-700' : 'text-gray-800'} /> <Tile label="Falsch einsortiert (lt. Library)" value={stats.misplaced} tone={stats.misplaced > 0 ? 'text-red-700' : 'text-gray-800'} />
</div> </div>
{Object.keys(storagePresent).filter(t => t !== 'cookie').length > 0 && (
<div className="flex items-center gap-1 flex-wrap">
<span className="text-[11px] text-gray-500 mr-1">Speichertyp:</span>
<button
onClick={() => setStorageFilter('')}
className={`px-2 py-0.5 rounded text-[11px] ${!storageFilter ? 'bg-blue-600 text-white' : 'bg-gray-100 text-gray-600 hover:bg-gray-200'}`}
>
Alle ({stats.cookies})
</button>
{STORAGE_ORDER.filter(t => storagePresent[t]).map(t => (
<button
key={t}
onClick={() => setStorageFilter(f => f === t ? '' : t)}
className={`px-2 py-0.5 rounded text-[11px] ${storageFilter === t ? 'bg-blue-600 text-white' : 'bg-gray-100 text-gray-600 hover:bg-gray-200'}`}
>
{STORAGE_LABEL[t] || t} ({storagePresent[t]})
</button>
))}
</div>
)}
{viewMode === 'category' && ( {viewMode === 'category' && (
<p className="text-[11px] text-gray-500 -mt-1"> <p className="text-[11px] text-gray-500 -mt-1">
Banner-Kategorie wie im Consent-Tool deklariert. Badge{' '} Banner-Kategorie wie im Consent-Tool deklariert. Badge{' '}
@@ -283,7 +360,7 @@ export function CookieResultView(
{g.label} <span className="text-gray-400 font-normal">({g.vendors.length})</span> {g.label} <span className="text-gray-400 font-normal">({g.vendors.length})</span>
</div> </div>
<div className="divide-y divide-gray-100"> <div className="divide-y divide-gray-100">
{g.vendors.map((v, i) => <VendorRow key={i} v={v} lib={cookieCategories} />)} {g.vendors.map((v, i) => <VendorRow key={i} v={v} lib={cookieCategories} st={storageTypes} sf={storageFilter} />)}
</div> </div>
</div> </div>
))} ))}
@@ -66,4 +66,16 @@ describe('CookieResultView', () => {
fireEvent.click(screen.getByText('Salesforce')) fireEvent.click(screen.getByText('Salesforce'))
expect(screen.getByText(/sollte: Marketing/)).toBeInTheDocument() expect(screen.getByText(/sollte: Marketing/)).toBeInTheDocument()
}) })
it('filtert nach Speichertyp (Framework vs. Cookie)', () => {
// LSKey-c$Policy ist Framework-Storage, alle anderen echte Cookies.
render(<CookieResultView snapshot={SNAP} storageTypes={{ 'lskey-c$policy': 'framework_storage' }} />)
const chip = screen.getByText(/Framework \(1\)/)
expect(chip).toBeInTheDocument() // Chip-Leiste erscheint (Nicht-Cookie vorhanden)
fireEvent.click(chip)
// Nur Salesforce (hat das Framework-Objekt) bleibt sichtbar.
expect(screen.getByText('Salesforce')).toBeInTheDocument()
expect(screen.queryByText('BMW AG — eShop')).not.toBeInTheDocument()
expect(screen.queryByText('Meta / Facebook')).not.toBeInTheDocument()
})
}) })
@@ -16,6 +16,7 @@ export const DOCUMENT_TYPES = [
{ id: 'widerruf', label: 'Widerrufsbelehrung', required: false }, { id: 'widerruf', label: 'Widerrufsbelehrung', required: false },
{ id: 'dsb', label: 'DSB-Kontakt', required: false }, { id: 'dsb', label: 'DSB-Kontakt', required: false },
{ id: 'news', label: 'Blog/Newsroom (für § 18 MStV)', required: false }, { id: 'news', label: 'Blog/Newsroom (für § 18 MStV)', required: false },
{ id: 'legal_notice', label: 'Rechtlicher Hinweis / Disclaimer', required: false },
] as const ] as const
export type DocTypeId = typeof DOCUMENT_TYPES[number]['id'] export type DocTypeId = typeof DOCUMENT_TYPES[number]['id']
@@ -94,7 +94,7 @@ export default function SnapshotDetail(
{tab === 'cookie' && hasCookies && ( {tab === 'cookie' && hasCookies && (
<div className="space-y-4"> <div className="space-y-4">
<CookieLibraryPanel snapshotId={snapshotId} data={check ?? undefined} /> <CookieLibraryPanel snapshotId={snapshotId} data={check ?? undefined} />
<CookieResultView snapshot={snap} cookieCategories={check?.cookie_categories} /> <CookieResultView snapshot={snap} cookieCategories={check?.cookie_categories} storageTypes={check?.storage_inventory?.per_cookie} />
</div> </div>
)} )}
@@ -31,10 +31,18 @@ _compliance_check_jobs: dict[str, dict] = {}
# a separate page. We check 'DSB benannt' as a sub-check of the DSE. # a separate page. We check 'DSB benannt' as a sub-check of the DSE.
_ALL_DOC_TYPES = [ _ALL_DOC_TYPES = [
"dse", "impressum", "social_media", "cookie", "dse", "impressum", "social_media", "cookie",
"agb", "nutzungsbedingungen", "widerruf", "agb", "nutzungsbedingungen", "widerruf", "legal_notice",
] ]
# Capture-only doc types: erfasst + als doc_entry persistiert (für die
# Cross-Doc-Reconciliation B), aber NICHT einzeln gescort. Sie haben keine
# eigene Checkliste/MCs → _check_single würde nur eine irreführende 0%-Zeile
# erzeugen. 'legal_notice' (Footer-„Rechtlicher Hinweis"/Disclaimer) trägt oft
# VSBG/ODR-Aussagen, die Impressum-Pflichten erfüllen → wertvoll für B.
_CAPTURE_ONLY = {"legal_notice"}
# Human-readable labels per doc_type. Used in the report + emails. # Human-readable labels per doc_type. Used in the report + emails.
_DOC_TYPE_LABELS = { _DOC_TYPE_LABELS = {
"dse": "Datenschutzerklaerung", "dse": "Datenschutzerklaerung",
@@ -77,8 +85,14 @@ _DISCOVERY_RULES: list[tuple[str, tuple[str, ...]]] = [
"allgemeine-nutzungsbedingungen")), "allgemeine-nutzungsbedingungen")),
("dsb", ("datenschutzbeauftragt", "data-protection-officer", ("dsb", ("datenschutzbeauftragt", "data-protection-officer",
"dpo-contact", "/dsb")), "dpo-contact", "/dsb")),
# A: 'legal-disclaimer' (Footer-„Rechtlicher Hinweis") VOR impressum, damit
# die Disclaimer-Seite NICHT mehr als Impressum substituiert wird (war die
# Ursache, dass die Cross-Doc-Reconciliation nie zündete).
("legal_notice", ("legal-disclaimer", "legal-disclaimer-pool",
"rechtlicher-hinweis", "rechtliche-hinweise",
"haftungsausschluss")),
("impressum", ("impressum", "imprint", "legal-notice", "site-notice", ("impressum", ("impressum", "imprint", "legal-notice", "site-notice",
"anbieterkennzeichnung", "legal-disclaimer-pool")), "anbieterkennzeichnung")),
("dse", ("data-privacy", "datenschutz", "data-protection", ("dse", ("data-privacy", "datenschutz", "data-protection",
"privacy-policy", "privacy-notice", "dsgvo", "privacy-policy", "privacy-notice", "dsgvo",
"data_privacy", "datenschutzinformation")), "data_privacy", "datenschutzinformation")),
@@ -17,6 +17,7 @@ from dataclasses import asdict
import httpx import httpx
from ._constants import _CAPTURE_ONLY
from ._helpers import ( from ._helpers import (
_apply_profile_filter, _apply_profile_filter,
_doc_type_label, _doc_type_label,
@@ -117,6 +118,16 @@ async def run_phase_b(state: dict) -> None:
)) ))
continue continue
# A: Capture-only — Text ist via doc_entries schon im Snapshot (für die
# Cross-Doc-Reconciliation B); hier NICHT scoren (keine eigene
# Checkliste → sonst irreführende 0%-Zeile).
if doc_type in _CAPTURE_ONLY:
results.append(DocCheckResult(
label=label, url=url, doc_type=doc_type,
error="Erfasst für Cross-Dokument-Abgleich (nicht einzeln bewertet).",
))
continue
pct = int(40 + (i / n_entries) * 40) pct = int(40 + (i / n_entries) * 40)
_update(check_id, f"Pruefen {i+1}/{n_entries}: {label}...", pct) _update(check_id, f"Pruefen {i+1}/{n_entries}: {label}...", pct)
@@ -81,11 +81,15 @@ def build_storage_inventory(vendors: list[dict]) -> dict:
"""Zählt je Speichertyp + liefert Beispiele für Nicht-Cookies.""" """Zählt je Speichertyp + liefert Beispiele für Nicht-Cookies."""
by_type: dict[str, int] = {} by_type: dict[str, int] = {}
examples: list[dict] = [] examples: list[dict] = []
per_cookie: dict[str, str] = {}
for v in vendors or []: for v in vendors or []:
vname = v.get("name") or "?" vname = v.get("name") or "?"
for c in v.get("cookies") or []: for c in v.get("cookies") or []:
st = detect_storage_type(c.get("name", ""), c.get("expiry", "")) st = detect_storage_type(c.get("name", ""), c.get("expiry", ""))
by_type[st] = by_type.get(st, 0) + 1 by_type[st] = by_type.get(st, 0) + 1
n = (c.get("name") or "").lower()
if n:
per_cookie[n] = st
if st != "cookie" and len(examples) < 10: if st != "cookie" and len(examples) < 10:
examples.append({ examples.append({
"name": c.get("name", ""), "type": st, "vendor": vname, "name": c.get("name", ""), "type": st, "vendor": vname,
@@ -98,6 +102,8 @@ def build_storage_inventory(vendors: list[dict]) -> dict:
"real_cookies": cookies, "real_cookies": cookies,
"other_storage": total - cookies, "other_storage": total - cookies,
"examples": examples, "examples": examples,
# name_lower → Speichertyp (für den Frontend-Filter).
"per_cookie": per_cookie,
} }
@@ -50,6 +50,11 @@ def test_inventory_counts_and_transparency_finding():
tf = storage_transparency_finding(inv) tf = storage_transparency_finding(inv)
assert tf and tf["type"] == "storage_transparency" assert tf and tf["type"] == "storage_transparency"
assert "§ 25" in tf["control"]["article"] assert "§ 25" in tf["control"]["article"]
# per_cookie-Map (für den Frontend-Storage-Filter): name_lower → Typ.
pc = inv["per_cookie"]
assert pc["componentdefstorage__mutex_x"] == "framework_storage"
assert pc["_ga"] == "cookie"
assert pc["browserid1"] == "cookie"
def test_no_finding_when_all_real_cookies(): def test_no_finding_when_all_real_cookies():