Files
breakpilot-compliance/backend-compliance/compliance/api/agent_check/_b19_wiring.py
T
Benjamin Admin 327e6a8984 fix(b19): UNK-Noise drastisch reduzieren
BMW4 zeigte 1037 UNK-Findings — die Mail wurde damit unleserlich.
Drei pragmatische Anpassungen:

1. UNK severity: LOW → INFO. Mail-Renderer zeigt jetzt nur
   HIGH/MEDIUM/LOW; INFO bleibt im API-Payload + CSV.
2. UNK wird NICHT emittiert wenn Vendor=First-Party-Owner
   (z.B. "BMW AG" auf bmw.de). Heuristik _is_first_party_owner
   vergleicht Vendor-Name gegen Domain-SLD.
3. auto_learning threshold ≥3 Sites → ≥1 Site. Second-time-Audit
   einer Site hat ihre eigenen Cookies bereits gelernt → kein
   UNK mehr. Single-site Auto-Learning ist absichtlich
   konservativ (Annotation, kein Truth).

Effekt: erwartete Reduktion bei BMW von 1037 UNK → ~50-100
(nur unbekannte 3rd-party-Vendoren). Mail wird lesbar, MAE-
Findings (Salesforce-as-essential) bleiben prominent sichtbar.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-08 08:20:39 +02:00

108 lines
4.4 KiB
Python

"""B19 wiring — Cookie-Coherence-Check (Salesforce-as-essential)."""
from __future__ import annotations
import html
import logging
from collections import Counter
from compliance.services.cookie_coherence_check import check_cookie_coherence
logger = logging.getLogger(__name__)
def run_b19(state: dict) -> None:
# Step 3 — Auto-Learning: alle deklarierten Cookies dieser Site
# in cookie_behavior_audits loggen (Cross-Site-Konsens-Basis).
try:
from compliance.services.cookie_observation_logger import (
log_observations,
)
stats = log_observations(state)
logger.info("B19 observation-logger: %s", stats)
except Exception as e:
logger.warning("observation-logger skipped: %s", e)
new = check_cookie_coherence(state)
if not new:
return
extras = state.get("extra_findings") or []
extras.extend(new)
state["extra_findings"] = extras
state["cookie_coherence_html"] = _render(new)
state["cookie_coherence_findings"] = new
logger.info("B19 cookie-coherence: %d finding(s)", len(new))
def _render(findings: list[dict]) -> str:
# Aggregate per type for the summary chip
by_type = Counter(f.get("check_id") for f in findings)
severity_color = {
"HIGH": "#dc2626", "MEDIUM": "#f59e0b", "LOW": "#64748b",
}
# Show only HIGH/MEDIUM/LOW cards in the mail; INFO (UNK auto-
# learning) bleibt nur in CSV — sonst überfüllt die Mail.
mail_findings = [
f for f in findings
if (f.get("severity") or "").upper() in ("HIGH", "MEDIUM", "LOW")
]
cards = []
for f in mail_findings[:12]:
sev = (f.get("severity") or "").upper()
color = severity_color.get(sev, "#475569")
meta = ""
if f.get("cookie_name"):
meta += (
"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
f"<em>Cookie: <code>{html.escape(f['cookie_name'])}</code>"
f" · Vendor: {html.escape(f.get('vendor') or '?')}</em>"
"</div>"
)
if f.get("declared_category"):
meta += (
"<div style='font-size:11px;color:#7f1d1d;margin-top:3px;'>"
f"declared: <code>{html.escape(f['declared_category'])}</code>"
+ (f" · actual (KB): <code>{html.escape(f['actual_category'])}</code>"
if f.get("actual_category") else "")
+ "</div>"
)
cards.append(
f"<div style='margin:12px 0;padding:14px;background:#fff;"
f"border-left:3px solid {color};border-radius:4px;'>"
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
f"<div style='font-size:14px;margin-top:4px;'>"
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
f"{html.escape(f.get('norm') or '')}</div>"
f"{meta}"
f"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
f"<em>{html.escape(f.get('evidence') or '')}</em></div>"
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
f"padding:8px 10px;border-radius:4px;'>"
f"<strong>→ Abstellung:</strong> "
f"{html.escape(f.get('recommended_action') or '')}</div>"
"</div>"
)
type_summary = " · ".join(
f"{k.split('-')[-1]}: {v}" for k, v in by_type.most_common()
)
return (
"<div style='margin:24px 0;padding:16px;border-left:4px solid #dc2626;"
"background:#fef2f2;border-radius:4px;'>"
"<h2 style='margin:0 0 8px;color:#7f1d1d;font-size:16px;'>"
f"🍪 Cookie-Kohärenz ({len(findings)} Befunde)"
"</h2>"
f"<p style='margin:0 0 8px;font-size:12px;color:#475569;'>"
f"Vergleich Site-Deklaration vs Open Cookie Database (2287) + "
f"BreakPilot-KB.<br><strong>Verteilung:</strong> {type_summary}</p>"
+ "".join(cards)
+ (f"<p style='font-size:12px;color:#64748b;margin-top:8px;'>"
f"<em>… und {len(findings)-len(cards)} weitere "
f"(inkl. {len(findings) - len(mail_findings)} INFO/UNK) "
f"— vollständig in <code>cookies-full-*.csv</code> im "
f"ZIP-Anhang.</em></p>"
if len(findings) > len(cards) else "")
+ "</div>"
)