feat(b19): Cookie-Coherence — 3-Layer-Lookup + Vendor-Karten + CSV
Adressiert das BMW-Beispiel (740 Cookies, Salesforce als "essential"
mit 1-Jahres-Lifetime, Pseudo-Zwecke wie "Siehe dazugehörige
Datenverarbeitung"). User-Konzept "Regulation als Code".
Step 1 — cookie_library_lookup.py (3 Layer):
1. Override = cookie_knowledge_db.py + extended (74) für
Schrems-II / EUGH / EU-Alternative — BreakPilot-juristische-IP.
2. Truth-Base = compliance.cookie_library (2287 aus Open Cookie
Database, CC0). actual_category als Wahrheit.
3. Auto-Learning = cookie_behavior_audits — Cross-Site-Konsens
wenn ≥3 Sites denselben Cookie melden.
Match: exact > prefix (mit Separator-Check) > wildcard. Kurze
Library-Namen ("c", "ID") brauchen exact-match — verhindert
False-Positive auf "completely_unknown". Trailing-Underscore
in OCD ("guest_uuid_essential_") wird als implicit-wildcard
interpretiert.
Step 2 — cookie_coherence_check.py (B19, 6 Finding-Typen):
- MARKETING_AS_ESSENTIAL (HIGH): KB sagt actual=marketing, Site
deklariert essential/erforderlich → Einwilligung wird umgangen
- LIFETIME_TOO_LONG_FOR_ESSENTIAL (MED): essential + >90d
- PSEUDO_PURPOSE (LOW): "Siehe dazugehörige Datenverarbeitung"
/ <4 Wörter (suppressed wenn Vendor-Purpose substantial ist)
- MISSING_COUNTRY (LOW): vendor_country leer trotz KB-Hit
- UNKNOWN_VENDOR (LOW): nicht in KB → Auto-Learning-Kandidat
- DUPLICATE_VENDOR (MED): selber Vendor in N Kategorien =
Stack-Aufspaltung um Marketing unter "essential" zu schmuggeln
Jedes Finding mit recommended_action ("Cookie X aus 'erforderlich'
raus und in 'Marketing' setzen").
Step 3 — cookie_observation_logger.py:
Loggt nach jedem Audit alle (cookie, site, declared_purpose) in
compliance.cookie_behavior_audits → Basis für Cross-Site-Konsens
in Layer 3.
Step 4 — cookie_csv_exporter.py:
cookies-full-{check_id}.csv mit 21 Spalten (Name, Vendor decl/KB,
Cat decl/KB, Lifetime decl/KB, Country, Opt-Out, 8x FIND_* flags,
recommended_action). UTF-8 BOM für Excel.
ZIP-Attachment: erweitert audit_walk_zip_builder um extra_files=
parameter; phase_e ruft mit cookies-full-...csv auf.
Step 5 — mail_render_v2/_vendor_cards.py:
Statt 740 Cookie-Rows: Aggregation pro Vendor mit Cookie-Count +
Issue-Count + 1-2 Beispiel-Cookies + Issue-Type-Tags. Top 30
Vendoren in der Mail, Rest nur in CSV. Sortiert nach Issue-Score.
Step 6 — render_info_box_rechtsrahmen():
Generic Header-Info-Box mit Art. 13 DSGVO + § 25 TDDDG + Art. 5
+ § 5 UWG + § 30/130 OWiG. Immer angezeigt, kein explicit-
finding-mapping (User-mündigkeit).
Orchestrator + _compose: run_b19 + render_vendor_cards +
render_info_box_rechtsrahmen ins V2-Layout.
Tests: 28/28 grün (15 lookup + 13 coherence).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,100 @@
|
||||
"""B19 wiring — Cookie-Coherence-Check (Salesforce-as-essential)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import logging
|
||||
from collections import Counter
|
||||
|
||||
from compliance.services.cookie_coherence_check import check_cookie_coherence
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_b19(state: dict) -> None:
|
||||
# Step 3 — Auto-Learning: alle deklarierten Cookies dieser Site
|
||||
# in cookie_behavior_audits loggen (Cross-Site-Konsens-Basis).
|
||||
try:
|
||||
from compliance.services.cookie_observation_logger import (
|
||||
log_observations,
|
||||
)
|
||||
stats = log_observations(state)
|
||||
logger.info("B19 observation-logger: %s", stats)
|
||||
except Exception as e:
|
||||
logger.warning("observation-logger skipped: %s", e)
|
||||
|
||||
new = check_cookie_coherence(state)
|
||||
if not new:
|
||||
return
|
||||
extras = state.get("extra_findings") or []
|
||||
extras.extend(new)
|
||||
state["extra_findings"] = extras
|
||||
state["cookie_coherence_html"] = _render(new)
|
||||
state["cookie_coherence_findings"] = new
|
||||
logger.info("B19 cookie-coherence: %d finding(s)", len(new))
|
||||
|
||||
|
||||
def _render(findings: list[dict]) -> str:
|
||||
# Aggregate per type for the summary chip
|
||||
by_type = Counter(f.get("check_id") for f in findings)
|
||||
severity_color = {
|
||||
"HIGH": "#dc2626", "MEDIUM": "#f59e0b", "LOW": "#64748b",
|
||||
}
|
||||
# Show only the top 12 cards in the mail; rest goes to CSV
|
||||
cards = []
|
||||
for f in findings[:12]:
|
||||
sev = (f.get("severity") or "").upper()
|
||||
color = severity_color.get(sev, "#475569")
|
||||
meta = ""
|
||||
if f.get("cookie_name"):
|
||||
meta += (
|
||||
"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>Cookie: <code>{html.escape(f['cookie_name'])}</code>"
|
||||
f" · Vendor: {html.escape(f.get('vendor') or '?')}</em>"
|
||||
"</div>"
|
||||
)
|
||||
if f.get("declared_category"):
|
||||
meta += (
|
||||
"<div style='font-size:11px;color:#7f1d1d;margin-top:3px;'>"
|
||||
f"declared: <code>{html.escape(f['declared_category'])}</code>"
|
||||
+ (f" · actual (KB): <code>{html.escape(f['actual_category'])}</code>"
|
||||
if f.get("actual_category") else "")
|
||||
+ "</div>"
|
||||
)
|
||||
cards.append(
|
||||
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
||||
f"border-left:3px solid {color};border-radius:4px;'>"
|
||||
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
||||
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
|
||||
f"<div style='font-size:14px;margin-top:4px;'>"
|
||||
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
||||
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
||||
f"{html.escape(f.get('norm') or '')}</div>"
|
||||
f"{meta}"
|
||||
f"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>{html.escape(f.get('evidence') or '')}</em></div>"
|
||||
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
||||
f"padding:8px 10px;border-radius:4px;'>"
|
||||
f"<strong>→ Abstellung:</strong> "
|
||||
f"{html.escape(f.get('recommended_action') or '')}</div>"
|
||||
"</div>"
|
||||
)
|
||||
type_summary = " · ".join(
|
||||
f"{k.split('-')[-1]}: {v}" for k, v in by_type.most_common()
|
||||
)
|
||||
return (
|
||||
"<div style='margin:24px 0;padding:16px;border-left:4px solid #dc2626;"
|
||||
"background:#fef2f2;border-radius:4px;'>"
|
||||
"<h2 style='margin:0 0 8px;color:#7f1d1d;font-size:16px;'>"
|
||||
f"🍪 Cookie-Kohärenz ({len(findings)} Befunde)"
|
||||
"</h2>"
|
||||
f"<p style='margin:0 0 8px;font-size:12px;color:#475569;'>"
|
||||
f"Vergleich Site-Deklaration vs Open Cookie Database (2287) + "
|
||||
f"BreakPilot-KB.<br><strong>Verteilung:</strong> {type_summary}</p>"
|
||||
+ "".join(cards)
|
||||
+ (f"<p style='font-size:12px;color:#64748b;margin-top:8px;'>"
|
||||
f"<em>… und {len(findings)-12} weitere — vollständige Liste "
|
||||
f"in <code>cookies-full.csv</code> im ZIP-Anhang.</em></p>"
|
||||
if len(findings) > 12 else "")
|
||||
+ "</div>"
|
||||
)
|
||||
@@ -29,6 +29,7 @@ from ._b15_wiring import run_b15
|
||||
from ._b16_wiring import run_b16
|
||||
from ._b17_wiring import run_b17
|
||||
from ._b18_wiring import run_b18
|
||||
from ._b19_wiring import run_b19
|
||||
from ._constants import _compliance_check_jobs
|
||||
from ._phase_a_resolve import run_phase_a
|
||||
from ._phase_b_profile_check import run_phase_b
|
||||
@@ -92,6 +93,7 @@ async def run_compliance_check(check_id: str, req) -> None:
|
||||
run_b16(state) # Footer-Label-vs-URL-Slug-Drift
|
||||
await run_b17(state) # Audit-Walk-Video (Beweis-Aufzeichnung)
|
||||
await run_b18(state) # Impressum-Specialist-Agent (Pattern+LLM)
|
||||
run_b19(state) # Cookie-Coherence (Salesforce-as-essential)
|
||||
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
||||
await run_phase_d3_top(state)
|
||||
await run_phase_d3_mid(state)
|
||||
|
||||
@@ -62,6 +62,41 @@ def run_phase_e(state: dict) -> None:
|
||||
except Exception as e:
|
||||
logger.warning("A1 evidence-zip build failed: %s", e)
|
||||
|
||||
# B17 audit-walk: bundle video + walk.json + README into a second
|
||||
# ZIP attachment. Reviewer hat den Beweis-Film direkt im Postfach.
|
||||
audit_walk = state.get("audit_walk")
|
||||
if audit_walk and audit_walk.get("walk_id"):
|
||||
try:
|
||||
from compliance.services.audit_walk_zip_builder import (
|
||||
build_audit_walk_zip,
|
||||
)
|
||||
walk_zip = build_audit_walk_zip(
|
||||
audit_walk,
|
||||
extra_files=_build_cookie_csv_extra(state, check_id),
|
||||
)
|
||||
if walk_zip:
|
||||
evidence_attachments.append({
|
||||
"filename": f"audit-walk-{check_id[:8]}.zip",
|
||||
"data": walk_zip,
|
||||
"mime": "application/zip",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("audit-walk-zip build failed: %s", e)
|
||||
|
||||
|
||||
def _build_cookie_csv_extra(state: dict, check_id: str) -> dict[str, bytes]:
|
||||
"""B19 Step 4: cookies-full.csv ins Walk-ZIP. Returns {filename: bytes}."""
|
||||
if not state.get("cmp_vendors"):
|
||||
return {}
|
||||
try:
|
||||
from compliance.services.cookie_csv_exporter import build_cookie_csv
|
||||
csv_bytes = build_cookie_csv(state)
|
||||
if csv_bytes:
|
||||
return {f"cookies-full-{check_id[:8]}.csv": csv_bytes}
|
||||
except Exception as e:
|
||||
logger.warning("cookie-csv build failed: %s", e)
|
||||
return {}
|
||||
|
||||
email_result = send_email(
|
||||
recipient=req.recipient,
|
||||
subject=f"[COMPLIANCE-CHECK] {site_name} — {doc_count} Dokumente geprueft",
|
||||
|
||||
Reference in New Issue
Block a user