327e6a8984
BMW4 zeigte 1037 UNK-Findings — die Mail wurde damit unleserlich. Drei pragmatische Anpassungen: 1. UNK severity: LOW → INFO. Mail-Renderer zeigt jetzt nur HIGH/MEDIUM/LOW; INFO bleibt im API-Payload + CSV. 2. UNK wird NICHT emittiert wenn Vendor=First-Party-Owner (z.B. "BMW AG" auf bmw.de). Heuristik _is_first_party_owner vergleicht Vendor-Name gegen Domain-SLD. 3. auto_learning threshold ≥3 Sites → ≥1 Site. Second-time-Audit einer Site hat ihre eigenen Cookies bereits gelernt → kein UNK mehr. Single-site Auto-Learning ist absichtlich konservativ (Annotation, kein Truth). Effekt: erwartete Reduktion bei BMW von 1037 UNK → ~50-100 (nur unbekannte 3rd-party-Vendoren). Mail wird lesbar, MAE- Findings (Salesforce-as-essential) bleiben prominent sichtbar. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
108 lines
4.4 KiB
Python
108 lines
4.4 KiB
Python
"""B19 wiring — Cookie-Coherence-Check (Salesforce-as-essential)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import html
|
|
import logging
|
|
from collections import Counter
|
|
|
|
from compliance.services.cookie_coherence_check import check_cookie_coherence
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def run_b19(state: dict) -> None:
|
|
# Step 3 — Auto-Learning: alle deklarierten Cookies dieser Site
|
|
# in cookie_behavior_audits loggen (Cross-Site-Konsens-Basis).
|
|
try:
|
|
from compliance.services.cookie_observation_logger import (
|
|
log_observations,
|
|
)
|
|
stats = log_observations(state)
|
|
logger.info("B19 observation-logger: %s", stats)
|
|
except Exception as e:
|
|
logger.warning("observation-logger skipped: %s", e)
|
|
|
|
new = check_cookie_coherence(state)
|
|
if not new:
|
|
return
|
|
extras = state.get("extra_findings") or []
|
|
extras.extend(new)
|
|
state["extra_findings"] = extras
|
|
state["cookie_coherence_html"] = _render(new)
|
|
state["cookie_coherence_findings"] = new
|
|
logger.info("B19 cookie-coherence: %d finding(s)", len(new))
|
|
|
|
|
|
def _render(findings: list[dict]) -> str:
|
|
# Aggregate per type for the summary chip
|
|
by_type = Counter(f.get("check_id") for f in findings)
|
|
severity_color = {
|
|
"HIGH": "#dc2626", "MEDIUM": "#f59e0b", "LOW": "#64748b",
|
|
}
|
|
# Show only HIGH/MEDIUM/LOW cards in the mail; INFO (UNK auto-
|
|
# learning) bleibt nur in CSV — sonst überfüllt die Mail.
|
|
mail_findings = [
|
|
f for f in findings
|
|
if (f.get("severity") or "").upper() in ("HIGH", "MEDIUM", "LOW")
|
|
]
|
|
cards = []
|
|
for f in mail_findings[:12]:
|
|
sev = (f.get("severity") or "").upper()
|
|
color = severity_color.get(sev, "#475569")
|
|
meta = ""
|
|
if f.get("cookie_name"):
|
|
meta += (
|
|
"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
|
f"<em>Cookie: <code>{html.escape(f['cookie_name'])}</code>"
|
|
f" · Vendor: {html.escape(f.get('vendor') or '?')}</em>"
|
|
"</div>"
|
|
)
|
|
if f.get("declared_category"):
|
|
meta += (
|
|
"<div style='font-size:11px;color:#7f1d1d;margin-top:3px;'>"
|
|
f"declared: <code>{html.escape(f['declared_category'])}</code>"
|
|
+ (f" · actual (KB): <code>{html.escape(f['actual_category'])}</code>"
|
|
if f.get("actual_category") else "")
|
|
+ "</div>"
|
|
)
|
|
cards.append(
|
|
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
|
f"border-left:3px solid {color};border-radius:4px;'>"
|
|
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
|
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
|
|
f"<div style='font-size:14px;margin-top:4px;'>"
|
|
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
|
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
|
f"{html.escape(f.get('norm') or '')}</div>"
|
|
f"{meta}"
|
|
f"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
|
f"<em>{html.escape(f.get('evidence') or '')}</em></div>"
|
|
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
|
f"padding:8px 10px;border-radius:4px;'>"
|
|
f"<strong>→ Abstellung:</strong> "
|
|
f"{html.escape(f.get('recommended_action') or '')}</div>"
|
|
"</div>"
|
|
)
|
|
type_summary = " · ".join(
|
|
f"{k.split('-')[-1]}: {v}" for k, v in by_type.most_common()
|
|
)
|
|
return (
|
|
"<div style='margin:24px 0;padding:16px;border-left:4px solid #dc2626;"
|
|
"background:#fef2f2;border-radius:4px;'>"
|
|
"<h2 style='margin:0 0 8px;color:#7f1d1d;font-size:16px;'>"
|
|
f"🍪 Cookie-Kohärenz ({len(findings)} Befunde)"
|
|
"</h2>"
|
|
f"<p style='margin:0 0 8px;font-size:12px;color:#475569;'>"
|
|
f"Vergleich Site-Deklaration vs Open Cookie Database (2287) + "
|
|
f"BreakPilot-KB.<br><strong>Verteilung:</strong> {type_summary}</p>"
|
|
+ "".join(cards)
|
|
+ (f"<p style='font-size:12px;color:#64748b;margin-top:8px;'>"
|
|
f"<em>… und {len(findings)-len(cards)} weitere "
|
|
f"(inkl. {len(findings) - len(mail_findings)} INFO/UNK) "
|
|
f"— vollständig in <code>cookies-full-*.csv</code> im "
|
|
f"ZIP-Anhang.</em></p>"
|
|
if len(findings) > len(cards) else "")
|
|
+ "</div>"
|
|
)
|