Files
breakpilot-compliance/backend-compliance/compliance/services/cookie_knowledge.py
T
Benjamin Admin 6c223c7c9b
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 14s
CI / loc-budget (push) Failing after 15s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m43s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 37s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
feat(compliance-check): exec-summary + voll-audit + TDM-respect + cookie-KB-extended + saving-scan-funnel
P1 — Exec-Summary oben im Email-Report (4 KPIs + 2 CTAs, dunkler Gradient)
P3 — no_direct_sales-Flag fuer OEM-Konfigurator-Sites; AGB/Widerruf/AGB als
     "NICHT ANWENDBAR" (grau) statt "NICHT GEFUNDEN" (rot)
P5 — Voll-Audit Unification: alle Findings (MC + Pflichtangaben + Vendor +
     Redundanz) in /data/compliance_audits.db.unified_findings; neuer
     /api/compliance/agent/findings/<id> Endpoint + FindingsTab im Audit-UI
     mit Filter + CSV-Export
P7 — Crawl-Hardening: TDM-Reservation-Check (robots.txt / ai.txt / Header /
     Meta) vor jedem Run mit 24h-Cache; HeadlessChrome-UA (Firma noch nicht
     gegruendet — Switch via BREAKPILOT_BRANDED_UA env); per-Domain
     Rate-Limit 1 req/s + max 2 concurrent
P2 — Cookie-Knowledge-DB additiv erweitert (35 -> 74 Cookies): Adobe, Meta,
     Microsoft, LinkedIn, TikTok, HubSpot, Marketo, Salesforce, Hotjar,
     FullStory, Mouseflow, Intercom, Drift, Zendesk, Cloudflare, Stripe,
     OneTrust/Cookiebot/Usercentrics, Matomo, Pinterest, Snapchat, X/Twitter,
     YouTube, Vimeo, Klaviyo, Mailchimp, Mixpanel, Segment, Amplitude,
     Optimizely, Datadog; Wire-in in cookie_function_classifier liefert
     compliance_risk-Label (kritisch/hoch/mittel/gering) pro Vendor
A  — k-Anonymitaets-Helper (benchmark_k_anonymity) fuer P6-Vorbereitung
B  — Cross-Tenant-Domain-Assertion im /findings-Endpoint (expected_domain
     Query-Param -> 403 bei Mismatch)
C  — Saving-Scan-Funnel: /api/compliance/agent/saving-scan/start mit
     Validierung + 24h-Rate-Limit pro Domain + Lead-Persistenz in
     saving_scan_leads + Auto-Discovery via _run_compliance_check; 6 Tests
D  — Risk-Badge im Email-Vendor-Row

Rechtliche Leitplanken (Memory feedback_oem_data_legal.md): nur eigene
Knapp-Bewertungen + Source-Pointer, keine 1:1-Kopien fremder CMP-Texte.
TDM-Opt-Out-Respect nach § 44b UrhG. KEINE Schema-Aenderungen — alles in
Sidecar-SQLite.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 23:48:34 +02:00

107 lines
3.7 KiB
Python

"""
Cookie-Knowledge Facade — vereint die Basis-KB (cookie_knowledge_db) mit
der Erweiterung (cookie_knowledge_extended) hinter einer einzigen API.
Caller sollten von hier importieren statt von einer der beiden Sub-DBs.
from compliance.services.cookie_knowledge import (
lookup_cookie,
enrich_vendor_with_knowledge,
summarize_compliance_risk,
compliance_risk_label,
)
Lookup-Reihenfolge: Extended (kuratiert, juenger) vor Base. Dadurch
koennen wir Eintraege ueberschreiben ohne die Base zu touchen.
"""
from __future__ import annotations
from compliance.services.cookie_knowledge_db import (
CookieKnowledge,
lookup_cookie as _lookup_base,
)
from compliance.services.cookie_knowledge_extended import (
KB_EXT,
lookup_cookie_extended,
)
def lookup_cookie(name: str) -> CookieKnowledge | None:
"""Resolve cookie name to enriched knowledge — extended overrides base."""
return lookup_cookie_extended(name) or _lookup_base(name)
def enrich_vendor_with_knowledge(vendor: dict) -> dict:
"""Add per-cookie knowledge dict + per-vendor risk summary."""
cookies = vendor.get("cookies") or []
enriched = []
for c in cookies:
info = lookup_cookie(c.get("name", ""))
enriched.append({**c, "knowledge": info} if info else c)
out = {**vendor, "cookies": enriched}
out["compliance_risk"] = summarize_compliance_risk(out)
return out
def summarize_compliance_risk(vendor: dict) -> dict:
"""Aggregate Re-ID risk + Schrems-II exposure across all cookies."""
cookies = vendor.get("cookies") or []
risk_counts = {"high": 0, "medium": 0, "low": 0}
schrems_affected = 0
strictly_necessary = 0
classified = 0
for c in cookies:
k = c.get("knowledge") or lookup_cookie(c.get("name", ""))
if not k:
continue
classified += 1
risk = (k.get("reid_risk") or "low").lower()
risk_counts[risk] = risk_counts.get(risk, 0) + 1
if "us" in (k.get("vendor_country") or "").lower() or \
"schrems" in (k.get("schrems_ii_status") or "").lower():
schrems_affected += 1
if k.get("technical_necessity") == "full":
strictly_necessary += 1
return {
"reid_risk_distribution": risk_counts,
"high_risk_cookie_count": risk_counts["high"],
"schrems_ii_affected_cookies": schrems_affected,
"strictly_necessary_cookies": strictly_necessary,
"total_classified": classified,
"label": compliance_risk_label({
"high_risk_cookie_count": risk_counts["high"],
"schrems_ii_affected_cookies": schrems_affected,
"total_classified": classified,
}),
}
def compliance_risk_label(summary: dict) -> str:
"""Compact risk badge: 'kritisch' | 'hoch' | 'mittel' | 'gering' | 'unklar'."""
if not summary or not summary.get("total_classified"):
return "unklar"
high = summary.get("high_risk_cookie_count", 0)
schrems = summary.get("schrems_ii_affected_cookies", 0)
total = summary.get("total_classified", 0) or 1
if high >= 3 and schrems >= 2:
return "kritisch"
if high >= 2 or (high >= 1 and schrems >= 1):
return "hoch"
if high >= 1 or schrems >= 1:
return "mittel"
return "gering"
def kb_size() -> dict:
"""Diagnostik fuer den Admin/Health-Endpoint."""
from compliance.services.cookie_knowledge_db import KB as _KB_BASE
base_keys = set(_KB_BASE.keys())
ext_keys = set(KB_EXT.keys())
return {
"base_entries": len(base_keys),
"extended_entries": len(ext_keys),
"extended_overrides_base": len(base_keys & ext_keys),
"total_unique": len(base_keys | ext_keys),
}