"""B12 — Chatbot-Cookie-Klassifikations-Check. Erkennt Chatbot-Cookies anhand der KB-Pattern und prüft 4 typische Fehler in der DSGVO/TDDDG-Klassifikation: CHAT-COOKIE-CLASS-001 Cookie als "technisch notwendig" deklariert, obwohl in derselben Tabelle Targeting/A-B/ Analytics-Funktionen erwähnt werden. Falsche Rechtsgrundlage → MEDIUM CHAT-COOKIE-CLASS-002 Chatbot-Cookie mit nur EINER Klassifikation, obwohl der Provider mehrere Funktionen bietet (tn UND cp) → MEDIUM CHAT-COOKIE-PURPOSE-001 Zweck-Beschreibung zu generisch ("Statistik", "Cookie") — Art. 13 DSGVO verlangt konkreten Verarbeitungszweck → LOW CHAT-COOKIE-RETENTION-001 Deklarierte Retention <90 Tage, KB-typische Retention >365 Tage — vermutlich unterdeklariert → HIGH (verlinkt B3) KB-Quelle: specialist_agents/_kb/chat_providers.json """ from __future__ import annotations import json import logging import os import re logger = logging.getLogger(__name__) _KB_PATH = os.path.join( os.path.dirname(__file__), "specialist_agents", "_kb", "chat_providers.json", ) def _load_kb() -> dict: try: with open(_KB_PATH, encoding="utf-8") as f: return json.load(f) except Exception as e: logger.warning("chatbot KB load failed: %s", e) return {"providers": {}} _KB = _load_kb() def _detect_provider(cookie_name: str) -> tuple[str, dict] | None: """Match a cookie name against KB patterns. Returns (provider_id, pattern_meta).""" if not cookie_name: return None providers = _KB.get("providers") or {} for prov_id, prov in providers.items(): for pat in prov.get("patterns") or []: try: if re.match(pat["regex"], cookie_name): return prov_id, pat except re.error: continue return None _TARGETING_HINTS = ( "targeting", "engagement", "a/b", "ab-test", "ab test", "analytics", "tracking", "marketing", "lead", "scoring", "personalisierung", "personalization", "remarketing", "retargeting", ) _GENERIC_PURPOSES = { "cookie", "statistik", "marketing", "tracking", "analyse", "performance", "session", "essential", "essenziell", "notwendig", "—", "?", "", } def _looks_targeting(text: str) -> bool: if not text: return False t = text.lower() return any(k in t for k in _TARGETING_HINTS) def _is_generic_purpose(purpose: str) -> bool: if not purpose: return True cleaned = re.sub(r"[\s\.,;:!?]+", " ", purpose.lower()).strip() if cleaned in _GENERIC_PURPOSES: return True return len(cleaned.split()) < 4 # weniger als 4 Wörter = zu kurz def check_chatbot_cookie_classification(state: dict) -> list[dict]: """Iterate cmp_vendors + cookies, emit findings for chatbot-cookie classification problems.""" cmp_vendors = state.get("cmp_vendors") or [] if not cmp_vendors: return [] findings: list[dict] = [] for v in cmp_vendors: vendor_name = (v.get("name") or "").strip() vendor_purpose = (v.get("purpose") or "").strip() vendor_category = (v.get("category") or "").strip().lower() for c in (v.get("cookies") or []): cname = (c.get("name") or "").strip() if not cname: continue match = _detect_provider(cname) if not match: continue prov_id, pat = match prov = _KB["providers"][prov_id] c_class = (c.get("category") or "").strip().lower() c_purpose = (c.get("purpose") or pat.get("purpose") or "").strip() # CLASS-001: TN deklariert + Targeting-Hint im Vendor-Purpose tn_words = ("technisch notwendig", "essenziell", "essential", "necessary", "strictly necessary") declared_tn = any(t in (c_class + " " + c_purpose).lower() for t in tn_words) if declared_tn and _looks_targeting(vendor_purpose): findings.append({ "check_id": "CHAT-COOKIE-CLASS-001", "severity": "MEDIUM", "severity_reason": "misclassified", "provider": prov.get("company") or prov_id, "cookie_name": cname, "title": ( f"Chatbot-Cookie '{cname}' ({prov.get('company')}) " "als technisch notwendig deklariert, Tabellen-Beschreibung " "erwähnt Targeting/Analytics" ), "norm": "DSGVO Art. 6 Abs. 1 lit. a + § 25 TDDDG", "evidence": ( f"Vendor-Purpose: '{vendor_purpose[:120]}' — " f"Klassifikation: '{c_class}'" ), "action": ( "Rechtsgrundlage korrigieren: bei Targeting/Analytics/" "A-B-Tests ist Einwilligung erforderlich. " "Cookie aus 'technisch notwendig' herausnehmen ODER " "die Tracking-Funktionen vom Chat-Kern trennen." ), }) # CLASS-002: nur EINE Klassifikation obwohl Provider hat tn UND cp has_tn = bool(prov.get("tn_functions")) has_cp = bool(prov.get("cp_functions")) if has_tn and has_cp: # Single-class declaration ohne Aufschlüsselung? # Heuristik: vendor.purpose enthält weder "auch" / "sowie" / # "und" zwischen tn und cp Begriffen purp_lc = vendor_purpose.lower() mentions_tn = any( f.replace("-", " ") in purp_lc or f.replace("-", "") in purp_lc for f in prov["tn_functions"] ) mentions_cp = any( f.replace("-", " ") in purp_lc or f.replace("-", "") in purp_lc for f in prov["cp_functions"] ) if mentions_tn != mentions_cp: # nennt nur eine Seite missing_side = "Targeting/Analytics" if mentions_tn else ( "Chat-Kontext (technisch notwendig)" ) findings.append({ "check_id": "CHAT-COOKIE-CLASS-002", "severity": "MEDIUM", "severity_reason": "incomplete", "provider": prov.get("company") or prov_id, "cookie_name": cname, "title": ( f"Chatbot-Cookie '{cname}' ({prov.get('company')}) " "ohne Funktions-Differenzierung — fehlende Seite: " f"{missing_side}" ), "norm": "DSGVO Art. 13 Abs. 1 lit. c + d", "action": ( f"In der Cookie-Tabelle für '{cname}' sowohl die " "tn-Funktionen (Chat-Kontext) als auch die " "cp-Funktionen (Targeting/Analytics) getrennt " "ausweisen — sonst kann der Nutzer Consent nicht " "informiert geben." ), }) # PURPOSE-001: zu generischer Zweck if _is_generic_purpose(c_purpose): findings.append({ "check_id": "CHAT-COOKIE-PURPOSE-001", "severity": "LOW", "severity_reason": "incomplete", "provider": prov.get("company") or prov_id, "cookie_name": cname, "title": ( f"Chatbot-Cookie '{cname}' mit zu generischem Zweck" ), "norm": "DSGVO Art. 13 Abs. 1 lit. c", "evidence": f"Zweck-Text: '{c_purpose}'", "action": ( f"Konkreten Verarbeitungszweck angeben — z.B. statt " f"'{c_purpose or 'Cookie'}' " f"'{pat.get('purpose')}' nach KB-Empfehlung." ), }) # RETENTION-001: deklariert <90d, KB sagt >365d from .retention_comparator import parse_duration_to_days declared_str = ( c.get("duration") or c.get("persistence") or c.get("expiry") or "" ) declared_days, _kind = parse_duration_to_days(declared_str) typical = prov.get("typical_retention_days") or 0 if declared_days is not None and typical: if declared_days < 90 and typical >= 250: findings.append({ "check_id": "CHAT-COOKIE-RETENTION-001", "severity": "HIGH", "severity_reason": "factually_wrong", "provider": prov.get("company") or prov_id, "cookie_name": cname, "title": ( f"Chatbot-Cookie '{cname}' Speicherdauer " f"vermutlich unterdeklariert" ), "norm": "DSGVO Art. 13 Abs. 2 lit. a", "evidence": ( f"Deklariert: {int(declared_days)} Tage — " f"KB-typisch für {prov.get('company')}: " f"{typical} Tage" ), "action": ( f"Tatsächliche Cookie-Lifetime im Browser prüfen " f"und mit '{declared_str}' abgleichen. " f"Vermutung: real ~{typical} Tage statt deklariert " f"{int(declared_days)}." ), }) if findings: logger.info("B12 chatbot-classification: %d findings", len(findings)) return findings