ff796fb480
#19 Chatbot-Cookie-Klassifikation: - chat_providers.json KB mit 11 Providern (iAdvize, Intercom, Tidio, Drift, Userlike, Zendesk, LivePerson, HubSpot, Vertex AI, OpenAI, Anthropic Claude). Pro Provider: Cookie-Pattern-Regex, typical_retention_days, tn_functions vs cp_functions, ai_capable. - chatbot_cookie_classification_check.py mit 4 KORRIGIERTEN Checks: CHAT-COOKIE-CLASS-001 (MED) — TN deklariert + Vendor-Purpose erwähnt Targeting/Analytics/A-B-Tests CHAT-COOKIE-CLASS-002 (MED) — Provider hat tn+cp Funktionen, Tabelle nennt nur eine Seite → keine Einwilligungs-Differenzierung CHAT-COOKIE-PURPOSE-001 (LOW) — Zweck zu generisch (Art. 13 DSGVO konkret) CHAT-COOKIE-RETENTION-001 (HIGH) — deklariert <90d, KB-typisch >365d → vermutlich unterdeklariert NEU vs vorigem Plan: kein "eigene Banner-Kategorie Chat/AI"-Check — gesetzlich nicht vorgeschrieben (Vermischung Zweck-Transparenz vs Kategorie-Name). Anwender-Frage berechtigt, Konzept geschärft. - _b12_wiring.py + Orchestrator-Wire + V2-Compose-Slot - Cookie-Inventar mit [Chat]/[Chat+AI]-Tag pro Cookie-Name (KB-Lookup) - Smoke (3 Vendors / 5 Cookies): 9 findings korrekt (3 HIGH RETENTION, 3 MEDIUM CLASS-001, 4 LOW PURPOSE) Cookie-Matrix Scan (Browser-Vergleich gegen safetykon.de): - consent-tester/services/cookie_behavior_per_browser.py: eigener fokussierter Scanner. Pro Browser-Profile: cookies before / after reject / after accept in separaten Kontexten. Sequenzielle Runs statt parallel (Race-Conditions). - routes_cookie_matrix.py POST /scan-cookie-matrix - Live-Test safetykon.de: chromium=1, firefox=0, webkit=1, mobile- safari=1 nach reject — Firefox setzt KEIN Cookie nach Reject! (consent-tester Rebuild brachte playwright install-deps für system-libs) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
250 lines
10 KiB
Python
250 lines
10 KiB
Python
"""B12 — Chatbot-Cookie-Klassifikations-Check.
|
|
|
|
Erkennt Chatbot-Cookies anhand der KB-Pattern und prüft 4 typische
|
|
Fehler in der DSGVO/TDDDG-Klassifikation:
|
|
|
|
CHAT-COOKIE-CLASS-001 Cookie als "technisch notwendig" deklariert,
|
|
obwohl in derselben Tabelle Targeting/A-B/
|
|
Analytics-Funktionen erwähnt werden. Falsche
|
|
Rechtsgrundlage → MEDIUM
|
|
CHAT-COOKIE-CLASS-002 Chatbot-Cookie mit nur EINER Klassifikation,
|
|
obwohl der Provider mehrere Funktionen
|
|
bietet (tn UND cp) → MEDIUM
|
|
CHAT-COOKIE-PURPOSE-001 Zweck-Beschreibung zu generisch ("Statistik",
|
|
"Cookie") — Art. 13 DSGVO verlangt konkreten
|
|
Verarbeitungszweck → LOW
|
|
CHAT-COOKIE-RETENTION-001 Deklarierte Retention <90 Tage, KB-typische
|
|
Retention >365 Tage — vermutlich unterdeklariert
|
|
→ HIGH (verlinkt B3)
|
|
|
|
KB-Quelle: specialist_agents/_kb/chat_providers.json
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_KB_PATH = os.path.join(
|
|
os.path.dirname(__file__),
|
|
"specialist_agents", "_kb", "chat_providers.json",
|
|
)
|
|
|
|
|
|
def _load_kb() -> dict:
|
|
try:
|
|
with open(_KB_PATH, encoding="utf-8") as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
logger.warning("chatbot KB load failed: %s", e)
|
|
return {"providers": {}}
|
|
|
|
|
|
_KB = _load_kb()
|
|
|
|
|
|
def _detect_provider(cookie_name: str) -> tuple[str, dict] | None:
|
|
"""Match a cookie name against KB patterns. Returns (provider_id, pattern_meta)."""
|
|
if not cookie_name:
|
|
return None
|
|
providers = _KB.get("providers") or {}
|
|
for prov_id, prov in providers.items():
|
|
for pat in prov.get("patterns") or []:
|
|
try:
|
|
if re.match(pat["regex"], cookie_name):
|
|
return prov_id, pat
|
|
except re.error:
|
|
continue
|
|
return None
|
|
|
|
|
|
_TARGETING_HINTS = (
|
|
"targeting", "engagement", "a/b", "ab-test", "ab test",
|
|
"analytics", "tracking", "marketing", "lead", "scoring",
|
|
"personalisierung", "personalization", "remarketing",
|
|
"retargeting",
|
|
)
|
|
|
|
|
|
_GENERIC_PURPOSES = {
|
|
"cookie", "statistik", "marketing", "tracking", "analyse",
|
|
"performance", "session", "essential", "essenziell",
|
|
"notwendig", "—", "?", "",
|
|
}
|
|
|
|
|
|
def _looks_targeting(text: str) -> bool:
|
|
if not text:
|
|
return False
|
|
t = text.lower()
|
|
return any(k in t for k in _TARGETING_HINTS)
|
|
|
|
|
|
def _is_generic_purpose(purpose: str) -> bool:
|
|
if not purpose:
|
|
return True
|
|
cleaned = re.sub(r"[\s\.,;:!?]+", " ", purpose.lower()).strip()
|
|
if cleaned in _GENERIC_PURPOSES:
|
|
return True
|
|
return len(cleaned.split()) < 4 # weniger als 4 Wörter = zu kurz
|
|
|
|
|
|
def check_chatbot_cookie_classification(state: dict) -> list[dict]:
|
|
"""Iterate cmp_vendors + cookies, emit findings for chatbot-cookie
|
|
classification problems."""
|
|
cmp_vendors = state.get("cmp_vendors") or []
|
|
if not cmp_vendors:
|
|
return []
|
|
findings: list[dict] = []
|
|
for v in cmp_vendors:
|
|
vendor_name = (v.get("name") or "").strip()
|
|
vendor_purpose = (v.get("purpose") or "").strip()
|
|
vendor_category = (v.get("category") or "").strip().lower()
|
|
for c in (v.get("cookies") or []):
|
|
cname = (c.get("name") or "").strip()
|
|
if not cname:
|
|
continue
|
|
match = _detect_provider(cname)
|
|
if not match:
|
|
continue
|
|
prov_id, pat = match
|
|
prov = _KB["providers"][prov_id]
|
|
c_class = (c.get("category") or "").strip().lower()
|
|
c_purpose = (c.get("purpose") or pat.get("purpose")
|
|
or "").strip()
|
|
|
|
# CLASS-001: TN deklariert + Targeting-Hint im Vendor-Purpose
|
|
tn_words = ("technisch notwendig", "essenziell", "essential",
|
|
"necessary", "strictly necessary")
|
|
declared_tn = any(t in (c_class + " " + c_purpose).lower()
|
|
for t in tn_words)
|
|
if declared_tn and _looks_targeting(vendor_purpose):
|
|
findings.append({
|
|
"check_id": "CHAT-COOKIE-CLASS-001",
|
|
"severity": "MEDIUM",
|
|
"severity_reason": "misclassified",
|
|
"provider": prov.get("company") or prov_id,
|
|
"cookie_name": cname,
|
|
"title": (
|
|
f"Chatbot-Cookie '{cname}' ({prov.get('company')}) "
|
|
"als technisch notwendig deklariert, Tabellen-Beschreibung "
|
|
"erwähnt Targeting/Analytics"
|
|
),
|
|
"norm": "DSGVO Art. 6 Abs. 1 lit. a + § 25 TDDDG",
|
|
"evidence": (
|
|
f"Vendor-Purpose: '{vendor_purpose[:120]}' — "
|
|
f"Klassifikation: '{c_class}'"
|
|
),
|
|
"action": (
|
|
"Rechtsgrundlage korrigieren: bei Targeting/Analytics/"
|
|
"A-B-Tests ist Einwilligung erforderlich. "
|
|
"Cookie aus 'technisch notwendig' herausnehmen ODER "
|
|
"die Tracking-Funktionen vom Chat-Kern trennen."
|
|
),
|
|
})
|
|
|
|
# CLASS-002: nur EINE Klassifikation obwohl Provider hat tn UND cp
|
|
has_tn = bool(prov.get("tn_functions"))
|
|
has_cp = bool(prov.get("cp_functions"))
|
|
if has_tn and has_cp:
|
|
# Single-class declaration ohne Aufschlüsselung?
|
|
# Heuristik: vendor.purpose enthält weder "auch" / "sowie" /
|
|
# "und" zwischen tn und cp Begriffen
|
|
purp_lc = vendor_purpose.lower()
|
|
mentions_tn = any(
|
|
f.replace("-", " ") in purp_lc
|
|
or f.replace("-", "") in purp_lc
|
|
for f in prov["tn_functions"]
|
|
)
|
|
mentions_cp = any(
|
|
f.replace("-", " ") in purp_lc
|
|
or f.replace("-", "") in purp_lc
|
|
for f in prov["cp_functions"]
|
|
)
|
|
if mentions_tn != mentions_cp:
|
|
# nennt nur eine Seite
|
|
missing_side = "Targeting/Analytics" if mentions_tn else (
|
|
"Chat-Kontext (technisch notwendig)"
|
|
)
|
|
findings.append({
|
|
"check_id": "CHAT-COOKIE-CLASS-002",
|
|
"severity": "MEDIUM",
|
|
"severity_reason": "incomplete",
|
|
"provider": prov.get("company") or prov_id,
|
|
"cookie_name": cname,
|
|
"title": (
|
|
f"Chatbot-Cookie '{cname}' ({prov.get('company')}) "
|
|
"ohne Funktions-Differenzierung — fehlende Seite: "
|
|
f"{missing_side}"
|
|
),
|
|
"norm": "DSGVO Art. 13 Abs. 1 lit. c + d",
|
|
"action": (
|
|
f"In der Cookie-Tabelle für '{cname}' sowohl die "
|
|
"tn-Funktionen (Chat-Kontext) als auch die "
|
|
"cp-Funktionen (Targeting/Analytics) getrennt "
|
|
"ausweisen — sonst kann der Nutzer Consent nicht "
|
|
"informiert geben."
|
|
),
|
|
})
|
|
|
|
# PURPOSE-001: zu generischer Zweck
|
|
if _is_generic_purpose(c_purpose):
|
|
findings.append({
|
|
"check_id": "CHAT-COOKIE-PURPOSE-001",
|
|
"severity": "LOW",
|
|
"severity_reason": "incomplete",
|
|
"provider": prov.get("company") or prov_id,
|
|
"cookie_name": cname,
|
|
"title": (
|
|
f"Chatbot-Cookie '{cname}' mit zu generischem Zweck"
|
|
),
|
|
"norm": "DSGVO Art. 13 Abs. 1 lit. c",
|
|
"evidence": f"Zweck-Text: '{c_purpose}'",
|
|
"action": (
|
|
f"Konkreten Verarbeitungszweck angeben — z.B. statt "
|
|
f"'{c_purpose or 'Cookie'}' "
|
|
f"'{pat.get('purpose')}' nach KB-Empfehlung."
|
|
),
|
|
})
|
|
|
|
# RETENTION-001: deklariert <90d, KB sagt >365d
|
|
from .retention_comparator import parse_duration_to_days
|
|
declared_str = (
|
|
c.get("duration") or c.get("persistence")
|
|
or c.get("expiry") or ""
|
|
)
|
|
declared_days, _kind = parse_duration_to_days(declared_str)
|
|
typical = prov.get("typical_retention_days") or 0
|
|
if declared_days is not None and typical:
|
|
if declared_days < 90 and typical >= 250:
|
|
findings.append({
|
|
"check_id": "CHAT-COOKIE-RETENTION-001",
|
|
"severity": "HIGH",
|
|
"severity_reason": "factually_wrong",
|
|
"provider": prov.get("company") or prov_id,
|
|
"cookie_name": cname,
|
|
"title": (
|
|
f"Chatbot-Cookie '{cname}' Speicherdauer "
|
|
f"vermutlich unterdeklariert"
|
|
),
|
|
"norm": "DSGVO Art. 13 Abs. 2 lit. a",
|
|
"evidence": (
|
|
f"Deklariert: {int(declared_days)} Tage — "
|
|
f"KB-typisch für {prov.get('company')}: "
|
|
f"{typical} Tage"
|
|
),
|
|
"action": (
|
|
f"Tatsächliche Cookie-Lifetime im Browser prüfen "
|
|
f"und mit '{declared_str}' abgleichen. "
|
|
f"Vermutung: real ~{typical} Tage statt deklariert "
|
|
f"{int(declared_days)}."
|
|
),
|
|
})
|
|
if findings:
|
|
logger.info("B12 chatbot-classification: %d findings", len(findings))
|
|
return findings
|