feat: B12 Chatbot-Cookie-Klassifikation (#19) + Cookie-Matrix scan + safetykon test
#19 Chatbot-Cookie-Klassifikation: - chat_providers.json KB mit 11 Providern (iAdvize, Intercom, Tidio, Drift, Userlike, Zendesk, LivePerson, HubSpot, Vertex AI, OpenAI, Anthropic Claude). Pro Provider: Cookie-Pattern-Regex, typical_retention_days, tn_functions vs cp_functions, ai_capable. - chatbot_cookie_classification_check.py mit 4 KORRIGIERTEN Checks: CHAT-COOKIE-CLASS-001 (MED) — TN deklariert + Vendor-Purpose erwähnt Targeting/Analytics/A-B-Tests CHAT-COOKIE-CLASS-002 (MED) — Provider hat tn+cp Funktionen, Tabelle nennt nur eine Seite → keine Einwilligungs-Differenzierung CHAT-COOKIE-PURPOSE-001 (LOW) — Zweck zu generisch (Art. 13 DSGVO konkret) CHAT-COOKIE-RETENTION-001 (HIGH) — deklariert <90d, KB-typisch >365d → vermutlich unterdeklariert NEU vs vorigem Plan: kein "eigene Banner-Kategorie Chat/AI"-Check — gesetzlich nicht vorgeschrieben (Vermischung Zweck-Transparenz vs Kategorie-Name). Anwender-Frage berechtigt, Konzept geschärft. - _b12_wiring.py + Orchestrator-Wire + V2-Compose-Slot - Cookie-Inventar mit [Chat]/[Chat+AI]-Tag pro Cookie-Name (KB-Lookup) - Smoke (3 Vendors / 5 Cookies): 9 findings korrekt (3 HIGH RETENTION, 3 MEDIUM CLASS-001, 4 LOW PURPOSE) Cookie-Matrix Scan (Browser-Vergleich gegen safetykon.de): - consent-tester/services/cookie_behavior_per_browser.py: eigener fokussierter Scanner. Pro Browser-Profile: cookies before / after reject / after accept in separaten Kontexten. Sequenzielle Runs statt parallel (Race-Conditions). - routes_cookie_matrix.py POST /scan-cookie-matrix - Live-Test safetykon.de: chromium=1, firefox=0, webkit=1, mobile- safari=1 nach reject — Firefox setzt KEIN Cookie nach Reject! (consent-tester Rebuild brachte playwright install-deps für system-libs) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,73 @@
|
|||||||
|
"""B12 wiring — Chatbot-Cookie-Klassifikation.
|
||||||
|
|
||||||
|
Hängt sich an `state["extra_findings"]` mit ähnlichem Render-Pattern wie
|
||||||
|
B9/B10. Wird vom Orchestrator nach B11 (run_b9b10) aufgerufen.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import html
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from compliance.services.chatbot_cookie_classification_check import (
|
||||||
|
check_chatbot_cookie_classification,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def run_b12(state: dict) -> None:
|
||||||
|
new = check_chatbot_cookie_classification(state)
|
||||||
|
if not new:
|
||||||
|
return
|
||||||
|
extras = state.get("extra_findings") or []
|
||||||
|
extras.extend(new)
|
||||||
|
state["extra_findings"] = extras
|
||||||
|
state["chatbot_cookie_html"] = _render(new)
|
||||||
|
logger.info("B12 chatbot-cookies: %d findings", len(new))
|
||||||
|
|
||||||
|
|
||||||
|
def _render(findings: list[dict]) -> str:
|
||||||
|
cards = []
|
||||||
|
for f in findings:
|
||||||
|
sev = (f.get("severity") or "").upper()
|
||||||
|
color = "#dc2626" if sev == "HIGH" else (
|
||||||
|
"#f59e0b" if sev == "MEDIUM" else "#64748b"
|
||||||
|
)
|
||||||
|
meta = (
|
||||||
|
"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||||
|
f"<em>Provider: {html.escape(f.get('provider') or '?')} · "
|
||||||
|
f"Cookie: <code>{html.escape(f.get('cookie_name') or '?')}</code>"
|
||||||
|
"</em></div>"
|
||||||
|
)
|
||||||
|
evidence = ""
|
||||||
|
if f.get("evidence"):
|
||||||
|
evidence = (
|
||||||
|
"<div style='font-size:12px;color:#475569;margin-top:4px;'>"
|
||||||
|
f"<em>{html.escape(f['evidence'])}</em></div>"
|
||||||
|
)
|
||||||
|
cards.append(
|
||||||
|
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
||||||
|
f"border-left:3px solid {color};border-radius:4px;'>"
|
||||||
|
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
||||||
|
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
|
||||||
|
f"<div style='font-size:14px;margin-top:4px;'>"
|
||||||
|
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
||||||
|
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
||||||
|
f"{html.escape(f.get('norm') or '')}</div>"
|
||||||
|
f"{meta}{evidence}"
|
||||||
|
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
||||||
|
f"padding:8px 10px;border-radius:4px;'>"
|
||||||
|
f"<strong>→ Empfehlung:</strong> "
|
||||||
|
f"{html.escape(f.get('action') or '')}</div>"
|
||||||
|
"</div>"
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
"<div style='margin:24px 0;padding:16px;border-left:4px solid #f59e0b;"
|
||||||
|
"background:#fffbeb;border-radius:4px;'>"
|
||||||
|
"<h2 style='margin:0 0 8px;color:#92400e;font-size:16px;'>"
|
||||||
|
"💬 Chatbot-Cookie-Klassifikation (KB-basiert)"
|
||||||
|
"</h2>"
|
||||||
|
+ "".join(cards) +
|
||||||
|
"</div>"
|
||||||
|
)
|
||||||
@@ -67,6 +67,7 @@ async def run_compliance_check(check_id: str, req) -> None:
|
|||||||
run_b5(state) # AI-Act Art. 50 transparency
|
run_b5(state) # AI-Act Art. 50 transparency
|
||||||
run_b6b7b8(state) # DPO-cross-doc + Doc-Staleness + CMP-fingerprint
|
run_b6b7b8(state) # DPO-cross-doc + Doc-Staleness + CMP-fingerprint
|
||||||
run_b9b10(state) # Multi-Entity-Impressum + Drittland-Mechanismus
|
run_b9b10(state) # Multi-Entity-Impressum + Drittland-Mechanismus
|
||||||
|
run_b12(state) # Chatbot-Cookie-Klassifikation (B11 ist in B9B10)
|
||||||
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
||||||
await run_phase_d3_top(state)
|
await run_phase_d3_top(state)
|
||||||
await run_phase_d3_mid(state)
|
await run_phase_d3_mid(state)
|
||||||
|
|||||||
@@ -0,0 +1,249 @@
|
|||||||
|
"""B12 — Chatbot-Cookie-Klassifikations-Check.
|
||||||
|
|
||||||
|
Erkennt Chatbot-Cookies anhand der KB-Pattern und prüft 4 typische
|
||||||
|
Fehler in der DSGVO/TDDDG-Klassifikation:
|
||||||
|
|
||||||
|
CHAT-COOKIE-CLASS-001 Cookie als "technisch notwendig" deklariert,
|
||||||
|
obwohl in derselben Tabelle Targeting/A-B/
|
||||||
|
Analytics-Funktionen erwähnt werden. Falsche
|
||||||
|
Rechtsgrundlage → MEDIUM
|
||||||
|
CHAT-COOKIE-CLASS-002 Chatbot-Cookie mit nur EINER Klassifikation,
|
||||||
|
obwohl der Provider mehrere Funktionen
|
||||||
|
bietet (tn UND cp) → MEDIUM
|
||||||
|
CHAT-COOKIE-PURPOSE-001 Zweck-Beschreibung zu generisch ("Statistik",
|
||||||
|
"Cookie") — Art. 13 DSGVO verlangt konkreten
|
||||||
|
Verarbeitungszweck → LOW
|
||||||
|
CHAT-COOKIE-RETENTION-001 Deklarierte Retention <90 Tage, KB-typische
|
||||||
|
Retention >365 Tage — vermutlich unterdeklariert
|
||||||
|
→ HIGH (verlinkt B3)
|
||||||
|
|
||||||
|
KB-Quelle: specialist_agents/_kb/chat_providers.json
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_KB_PATH = os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"specialist_agents", "_kb", "chat_providers.json",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_kb() -> dict:
|
||||||
|
try:
|
||||||
|
with open(_KB_PATH, encoding="utf-8") as f:
|
||||||
|
return json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("chatbot KB load failed: %s", e)
|
||||||
|
return {"providers": {}}
|
||||||
|
|
||||||
|
|
||||||
|
_KB = _load_kb()
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_provider(cookie_name: str) -> tuple[str, dict] | None:
|
||||||
|
"""Match a cookie name against KB patterns. Returns (provider_id, pattern_meta)."""
|
||||||
|
if not cookie_name:
|
||||||
|
return None
|
||||||
|
providers = _KB.get("providers") or {}
|
||||||
|
for prov_id, prov in providers.items():
|
||||||
|
for pat in prov.get("patterns") or []:
|
||||||
|
try:
|
||||||
|
if re.match(pat["regex"], cookie_name):
|
||||||
|
return prov_id, pat
|
||||||
|
except re.error:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
_TARGETING_HINTS = (
|
||||||
|
"targeting", "engagement", "a/b", "ab-test", "ab test",
|
||||||
|
"analytics", "tracking", "marketing", "lead", "scoring",
|
||||||
|
"personalisierung", "personalization", "remarketing",
|
||||||
|
"retargeting",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_GENERIC_PURPOSES = {
|
||||||
|
"cookie", "statistik", "marketing", "tracking", "analyse",
|
||||||
|
"performance", "session", "essential", "essenziell",
|
||||||
|
"notwendig", "—", "?", "",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_targeting(text: str) -> bool:
|
||||||
|
if not text:
|
||||||
|
return False
|
||||||
|
t = text.lower()
|
||||||
|
return any(k in t for k in _TARGETING_HINTS)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_generic_purpose(purpose: str) -> bool:
|
||||||
|
if not purpose:
|
||||||
|
return True
|
||||||
|
cleaned = re.sub(r"[\s\.,;:!?]+", " ", purpose.lower()).strip()
|
||||||
|
if cleaned in _GENERIC_PURPOSES:
|
||||||
|
return True
|
||||||
|
return len(cleaned.split()) < 4 # weniger als 4 Wörter = zu kurz
|
||||||
|
|
||||||
|
|
||||||
|
def check_chatbot_cookie_classification(state: dict) -> list[dict]:
|
||||||
|
"""Iterate cmp_vendors + cookies, emit findings for chatbot-cookie
|
||||||
|
classification problems."""
|
||||||
|
cmp_vendors = state.get("cmp_vendors") or []
|
||||||
|
if not cmp_vendors:
|
||||||
|
return []
|
||||||
|
findings: list[dict] = []
|
||||||
|
for v in cmp_vendors:
|
||||||
|
vendor_name = (v.get("name") or "").strip()
|
||||||
|
vendor_purpose = (v.get("purpose") or "").strip()
|
||||||
|
vendor_category = (v.get("category") or "").strip().lower()
|
||||||
|
for c in (v.get("cookies") or []):
|
||||||
|
cname = (c.get("name") or "").strip()
|
||||||
|
if not cname:
|
||||||
|
continue
|
||||||
|
match = _detect_provider(cname)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
prov_id, pat = match
|
||||||
|
prov = _KB["providers"][prov_id]
|
||||||
|
c_class = (c.get("category") or "").strip().lower()
|
||||||
|
c_purpose = (c.get("purpose") or pat.get("purpose")
|
||||||
|
or "").strip()
|
||||||
|
|
||||||
|
# CLASS-001: TN deklariert + Targeting-Hint im Vendor-Purpose
|
||||||
|
tn_words = ("technisch notwendig", "essenziell", "essential",
|
||||||
|
"necessary", "strictly necessary")
|
||||||
|
declared_tn = any(t in (c_class + " " + c_purpose).lower()
|
||||||
|
for t in tn_words)
|
||||||
|
if declared_tn and _looks_targeting(vendor_purpose):
|
||||||
|
findings.append({
|
||||||
|
"check_id": "CHAT-COOKIE-CLASS-001",
|
||||||
|
"severity": "MEDIUM",
|
||||||
|
"severity_reason": "misclassified",
|
||||||
|
"provider": prov.get("company") or prov_id,
|
||||||
|
"cookie_name": cname,
|
||||||
|
"title": (
|
||||||
|
f"Chatbot-Cookie '{cname}' ({prov.get('company')}) "
|
||||||
|
"als technisch notwendig deklariert, Tabellen-Beschreibung "
|
||||||
|
"erwähnt Targeting/Analytics"
|
||||||
|
),
|
||||||
|
"norm": "DSGVO Art. 6 Abs. 1 lit. a + § 25 TDDDG",
|
||||||
|
"evidence": (
|
||||||
|
f"Vendor-Purpose: '{vendor_purpose[:120]}' — "
|
||||||
|
f"Klassifikation: '{c_class}'"
|
||||||
|
),
|
||||||
|
"action": (
|
||||||
|
"Rechtsgrundlage korrigieren: bei Targeting/Analytics/"
|
||||||
|
"A-B-Tests ist Einwilligung erforderlich. "
|
||||||
|
"Cookie aus 'technisch notwendig' herausnehmen ODER "
|
||||||
|
"die Tracking-Funktionen vom Chat-Kern trennen."
|
||||||
|
),
|
||||||
|
})
|
||||||
|
|
||||||
|
# CLASS-002: nur EINE Klassifikation obwohl Provider hat tn UND cp
|
||||||
|
has_tn = bool(prov.get("tn_functions"))
|
||||||
|
has_cp = bool(prov.get("cp_functions"))
|
||||||
|
if has_tn and has_cp:
|
||||||
|
# Single-class declaration ohne Aufschlüsselung?
|
||||||
|
# Heuristik: vendor.purpose enthält weder "auch" / "sowie" /
|
||||||
|
# "und" zwischen tn und cp Begriffen
|
||||||
|
purp_lc = vendor_purpose.lower()
|
||||||
|
mentions_tn = any(
|
||||||
|
f.replace("-", " ") in purp_lc
|
||||||
|
or f.replace("-", "") in purp_lc
|
||||||
|
for f in prov["tn_functions"]
|
||||||
|
)
|
||||||
|
mentions_cp = any(
|
||||||
|
f.replace("-", " ") in purp_lc
|
||||||
|
or f.replace("-", "") in purp_lc
|
||||||
|
for f in prov["cp_functions"]
|
||||||
|
)
|
||||||
|
if mentions_tn != mentions_cp:
|
||||||
|
# nennt nur eine Seite
|
||||||
|
missing_side = "Targeting/Analytics" if mentions_tn else (
|
||||||
|
"Chat-Kontext (technisch notwendig)"
|
||||||
|
)
|
||||||
|
findings.append({
|
||||||
|
"check_id": "CHAT-COOKIE-CLASS-002",
|
||||||
|
"severity": "MEDIUM",
|
||||||
|
"severity_reason": "incomplete",
|
||||||
|
"provider": prov.get("company") or prov_id,
|
||||||
|
"cookie_name": cname,
|
||||||
|
"title": (
|
||||||
|
f"Chatbot-Cookie '{cname}' ({prov.get('company')}) "
|
||||||
|
"ohne Funktions-Differenzierung — fehlende Seite: "
|
||||||
|
f"{missing_side}"
|
||||||
|
),
|
||||||
|
"norm": "DSGVO Art. 13 Abs. 1 lit. c + d",
|
||||||
|
"action": (
|
||||||
|
f"In der Cookie-Tabelle für '{cname}' sowohl die "
|
||||||
|
"tn-Funktionen (Chat-Kontext) als auch die "
|
||||||
|
"cp-Funktionen (Targeting/Analytics) getrennt "
|
||||||
|
"ausweisen — sonst kann der Nutzer Consent nicht "
|
||||||
|
"informiert geben."
|
||||||
|
),
|
||||||
|
})
|
||||||
|
|
||||||
|
# PURPOSE-001: zu generischer Zweck
|
||||||
|
if _is_generic_purpose(c_purpose):
|
||||||
|
findings.append({
|
||||||
|
"check_id": "CHAT-COOKIE-PURPOSE-001",
|
||||||
|
"severity": "LOW",
|
||||||
|
"severity_reason": "incomplete",
|
||||||
|
"provider": prov.get("company") or prov_id,
|
||||||
|
"cookie_name": cname,
|
||||||
|
"title": (
|
||||||
|
f"Chatbot-Cookie '{cname}' mit zu generischem Zweck"
|
||||||
|
),
|
||||||
|
"norm": "DSGVO Art. 13 Abs. 1 lit. c",
|
||||||
|
"evidence": f"Zweck-Text: '{c_purpose}'",
|
||||||
|
"action": (
|
||||||
|
f"Konkreten Verarbeitungszweck angeben — z.B. statt "
|
||||||
|
f"'{c_purpose or 'Cookie'}' "
|
||||||
|
f"'{pat.get('purpose')}' nach KB-Empfehlung."
|
||||||
|
),
|
||||||
|
})
|
||||||
|
|
||||||
|
# RETENTION-001: deklariert <90d, KB sagt >365d
|
||||||
|
from .retention_comparator import parse_duration_to_days
|
||||||
|
declared_str = (
|
||||||
|
c.get("duration") or c.get("persistence")
|
||||||
|
or c.get("expiry") or ""
|
||||||
|
)
|
||||||
|
declared_days, _kind = parse_duration_to_days(declared_str)
|
||||||
|
typical = prov.get("typical_retention_days") or 0
|
||||||
|
if declared_days is not None and typical:
|
||||||
|
if declared_days < 90 and typical >= 250:
|
||||||
|
findings.append({
|
||||||
|
"check_id": "CHAT-COOKIE-RETENTION-001",
|
||||||
|
"severity": "HIGH",
|
||||||
|
"severity_reason": "factually_wrong",
|
||||||
|
"provider": prov.get("company") or prov_id,
|
||||||
|
"cookie_name": cname,
|
||||||
|
"title": (
|
||||||
|
f"Chatbot-Cookie '{cname}' Speicherdauer "
|
||||||
|
f"vermutlich unterdeklariert"
|
||||||
|
),
|
||||||
|
"norm": "DSGVO Art. 13 Abs. 2 lit. a",
|
||||||
|
"evidence": (
|
||||||
|
f"Deklariert: {int(declared_days)} Tage — "
|
||||||
|
f"KB-typisch für {prov.get('company')}: "
|
||||||
|
f"{typical} Tage"
|
||||||
|
),
|
||||||
|
"action": (
|
||||||
|
f"Tatsächliche Cookie-Lifetime im Browser prüfen "
|
||||||
|
f"und mit '{declared_str}' abgleichen. "
|
||||||
|
f"Vermutung: real ~{typical} Tage statt deklariert "
|
||||||
|
f"{int(declared_days)}."
|
||||||
|
),
|
||||||
|
})
|
||||||
|
if findings:
|
||||||
|
logger.info("B12 chatbot-classification: %d findings", len(findings))
|
||||||
|
return findings
|
||||||
@@ -46,6 +46,8 @@ def compose_v2(state: dict) -> str:
|
|||||||
state.get("ai_act_html", ""),
|
state.get("ai_act_html", ""),
|
||||||
# B6/B7/B8/B9/B10 — DPO + Staleness + CMP + MultiEntity + Transfer
|
# B6/B7/B8/B9/B10 — DPO + Staleness + CMP + MultiEntity + Transfer
|
||||||
state.get("extra_findings_html", ""),
|
state.get("extra_findings_html", ""),
|
||||||
|
# B12 Chatbot-Cookie-Klassifikation
|
||||||
|
state.get("chatbot_cookie_html", ""),
|
||||||
# Browser-Matrix (Stage 1.c)
|
# Browser-Matrix (Stage 1.c)
|
||||||
state.get("browser_matrix_html", ""),
|
state.get("browser_matrix_html", ""),
|
||||||
# All legacy build_*_html() wrapped in V2 sections — preserves
|
# All legacy build_*_html() wrapped in V2 sections — preserves
|
||||||
|
|||||||
@@ -77,6 +77,22 @@ def _country_third(country: str | None) -> tuple[str, bool, str | None]:
|
|||||||
return (code, True, tag)
|
return (code, True, tag)
|
||||||
|
|
||||||
|
|
||||||
|
def _vendor_type_tag(cookie_name: str) -> str:
|
||||||
|
"""Lookup the cookie in the chatbot-KB and return a [Chat]/[Chat+AI] tag."""
|
||||||
|
try:
|
||||||
|
from ..chatbot_cookie_classification_check import _detect_provider, _KB
|
||||||
|
match = _detect_provider(cookie_name)
|
||||||
|
if not match:
|
||||||
|
return ""
|
||||||
|
prov_id, _pat = match
|
||||||
|
prov = (_KB.get("providers") or {}).get(prov_id) or {}
|
||||||
|
if prov.get("ai_capable"):
|
||||||
|
return ' <span style="display:inline-block;background:#dbeafe;color:#1e40af;font-size:10px;padding:1px 6px;border-radius:999px;margin-left:4px;">Chat+AI</span>'
|
||||||
|
return ' <span style="display:inline-block;background:#f1f5f9;color:#475569;font-size:10px;padding:1px 6px;border-radius:999px;margin-left:4px;">Chat</span>'
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def _src_chip(in_dse: bool, in_table: bool, in_browser: bool,
|
def _src_chip(in_dse: bool, in_table: bool, in_browser: bool,
|
||||||
in_ocr: bool) -> str:
|
in_ocr: bool) -> str:
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
@@ -248,7 +264,7 @@ def render_inventory_rows(rows: list[dict]) -> list[list[str]]:
|
|||||||
f'font-weight:700;">[{tag}]</span>'
|
f'font-weight:700;">[{tag}]</span>'
|
||||||
)
|
)
|
||||||
out.append([
|
out.append([
|
||||||
f'<code>{h(r["name"])}</code>',
|
f'<code>{h(r["name"])}</code>{_vendor_type_tag(r["name"])}',
|
||||||
h(r["vendor"]) if r["vendor"] else
|
h(r["vendor"]) if r["vendor"] else
|
||||||
'<span style="color:#dc2626;">❌</span>',
|
'<span style="color:#dc2626;">❌</span>',
|
||||||
_x_or(r["category"]),
|
_x_or(r["category"]),
|
||||||
|
|||||||
@@ -0,0 +1,158 @@
|
|||||||
|
{
|
||||||
|
"_schema_version": "1.0",
|
||||||
|
"_last_updated": "2026-06-06",
|
||||||
|
"_notes": "Anonymisierte Cookie-Pattern + Funktions-Klassifizierung pro Chat-Provider. Quelle: Anbieter-Dokumentation + EDPB-Cookie-Sweep + § 25 TDDDG. Kein Roh-Mandantendatum.",
|
||||||
|
"providers": {
|
||||||
|
"iadvize": {
|
||||||
|
"company": "iAdvize SAS",
|
||||||
|
"country": "FR",
|
||||||
|
"type": "Chat & Conversational Platform",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^iadvize-\\d+-vuid$", "purpose": "Visitor-ID + Chat-Verlauf-Wiedererkennung", "default_class": "consent_required"},
|
||||||
|
{"regex": "^iadvize-\\d+-consent$", "purpose": "Consent-State für iAdvize", "default_class": "technically_necessary"},
|
||||||
|
{"regex": "^iadvize_test_cookie_top_domain$", "purpose": "Tech-Probe für Root-Domain-Detektion", "default_class": "technically_necessary"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 390,
|
||||||
|
"tn_functions": ["chat-continuation", "session-context", "logged-in-chat", "consent-state"],
|
||||||
|
"cp_functions": ["visitor-targeting", "engagement-rules", "ab-tests", "chat-analytics"]
|
||||||
|
},
|
||||||
|
"intercom": {
|
||||||
|
"company": "Intercom Inc",
|
||||||
|
"country": "US",
|
||||||
|
"type": "Chat & Customer-Messaging-Platform",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^intercom-id-[\\w-]+$", "purpose": "Identifier-Cookie für Wiedererkennung", "default_class": "consent_required"},
|
||||||
|
{"regex": "^intercom-session-[\\w-]+$", "purpose": "Aktuelle Chat-Session", "default_class": "technically_necessary"},
|
||||||
|
{"regex": "^intercom-device-id-[\\w-]+$", "purpose": "Device-Fingerprint", "default_class": "consent_required"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 270,
|
||||||
|
"tn_functions": ["session-context"],
|
||||||
|
"cp_functions": ["device-tracking", "user-recognition-across-sites", "marketing-attribution"]
|
||||||
|
},
|
||||||
|
"tidio": {
|
||||||
|
"company": "Tidio LLC",
|
||||||
|
"country": "US",
|
||||||
|
"type": "Chat-Widget + Chatbot",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^TidioStore_[\\w-]+$", "purpose": "Chat-Konfiguration + Verlauf", "default_class": "consent_required"},
|
||||||
|
{"regex": "^tidio[_-]?identify[_-].*$", "purpose": "Visitor-Identifikation", "default_class": "consent_required"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 365,
|
||||||
|
"tn_functions": ["chat-continuation"],
|
||||||
|
"cp_functions": ["visitor-tracking", "lead-scoring", "marketing-automation"]
|
||||||
|
},
|
||||||
|
"drift": {
|
||||||
|
"company": "Drift.com Inc",
|
||||||
|
"country": "US",
|
||||||
|
"type": "Conversational-Marketing-Platform",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^driftt_aid$", "purpose": "Anonymous Visitor-ID", "default_class": "consent_required"},
|
||||||
|
{"regex": "^driftt_uid$", "purpose": "Logged-in User-ID", "default_class": "technically_necessary"},
|
||||||
|
{"regex": "^drift_eid$", "purpose": "Email-Address-Identifier", "default_class": "consent_required"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 365,
|
||||||
|
"tn_functions": ["logged-in-chat", "session-context"],
|
||||||
|
"cp_functions": ["lead-generation", "conversational-marketing", "ab-testing"]
|
||||||
|
},
|
||||||
|
"userlike": {
|
||||||
|
"company": "Userlike UG",
|
||||||
|
"country": "DE",
|
||||||
|
"type": "Chat-Widget + Chatbot",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^userlike-cookie-banner[\\w-]*$", "purpose": "Consent-State für Userlike", "default_class": "technically_necessary"},
|
||||||
|
{"regex": "^userlike-[\\w-]+-id$", "purpose": "Visitor-Identifier", "default_class": "consent_required"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 365,
|
||||||
|
"tn_functions": ["chat-continuation", "consent-state"],
|
||||||
|
"cp_functions": ["visitor-tracking"]
|
||||||
|
},
|
||||||
|
"zendesk_chat": {
|
||||||
|
"company": "Zendesk Inc",
|
||||||
|
"country": "US",
|
||||||
|
"type": "Chat & Customer-Support",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^__zlcmid$", "purpose": "Live-Chat-Identifier", "default_class": "technically_necessary"},
|
||||||
|
{"regex": "^_zendesk_[\\w-]+$", "purpose": "Session-/Tracking-Cookie", "default_class": "consent_required"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 365,
|
||||||
|
"tn_functions": ["live-chat-session"],
|
||||||
|
"cp_functions": ["analytics", "marketing-tracking"]
|
||||||
|
},
|
||||||
|
"liveperson": {
|
||||||
|
"company": "LivePerson Inc",
|
||||||
|
"country": "US",
|
||||||
|
"type": "Conversational-AI-Platform",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^LP_[\\w-]+$", "purpose": "LivePerson-Visitor-ID", "default_class": "consent_required"},
|
||||||
|
{"regex": "^liveperson-[\\w-]+$", "purpose": "Session/Engagement", "default_class": "consent_required"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 365,
|
||||||
|
"tn_functions": ["chat-session"],
|
||||||
|
"cp_functions": ["visitor-tracking", "engagement-engine", "ai-chat-analytics"]
|
||||||
|
},
|
||||||
|
"hubspot_chat": {
|
||||||
|
"company": "HubSpot Inc",
|
||||||
|
"country": "US",
|
||||||
|
"type": "Chat + CRM-Integration",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^hubspotutk$", "purpose": "HubSpot Visitor-Token", "default_class": "consent_required"},
|
||||||
|
{"regex": "^__hssc$", "purpose": "Session-Tracking", "default_class": "consent_required"},
|
||||||
|
{"regex": "^__hssrc$", "purpose": "Browser-Restart-Detection", "default_class": "consent_required"},
|
||||||
|
{"regex": "^__hstc$", "purpose": "Visitor-Tracking", "default_class": "consent_required"},
|
||||||
|
{"regex": "^messagesUtk$", "purpose": "Chat-Conversation-Token", "default_class": "technically_necessary"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 390,
|
||||||
|
"tn_functions": ["chat-conversation"],
|
||||||
|
"cp_functions": ["crm-integration", "marketing-attribution", "lead-scoring"]
|
||||||
|
},
|
||||||
|
"vertex_ai_chatbot": {
|
||||||
|
"company": "Google Cloud (Vertex AI)",
|
||||||
|
"country": "US (EU-Hosting möglich)",
|
||||||
|
"type": "AI-Chatbot (LLM-basiert)",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^_GRECAPTCHA$", "purpose": "reCAPTCHA-Protection für Vertex-AI-Frontend", "default_class": "technically_necessary"},
|
||||||
|
{"regex": "^GOOGLE_AUTH.*$", "purpose": "Google-Auth-Token (wenn embedded)", "default_class": "technically_necessary"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 180,
|
||||||
|
"tn_functions": ["bot-protection", "auth-token"],
|
||||||
|
"cp_functions": ["chat-analytics", "improvement-feedback"],
|
||||||
|
"ai_act_disclosure_required": true
|
||||||
|
},
|
||||||
|
"openai_chatbot": {
|
||||||
|
"company": "OpenAI LLC",
|
||||||
|
"country": "US",
|
||||||
|
"type": "AI-Chatbot (GPT-Modelle)",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^__cf_bm$", "purpose": "Cloudflare-Bot-Schutz", "default_class": "technically_necessary"},
|
||||||
|
{"regex": "^_cfuvid$", "purpose": "Cloudflare-Visitor-ID", "default_class": "consent_required"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 365,
|
||||||
|
"tn_functions": ["bot-protection"],
|
||||||
|
"cp_functions": ["visitor-tracking", "ai-conversation-analytics"],
|
||||||
|
"ai_act_disclosure_required": true
|
||||||
|
},
|
||||||
|
"anthropic_claude": {
|
||||||
|
"company": "Anthropic PBC",
|
||||||
|
"country": "US",
|
||||||
|
"type": "AI-Chatbot (Claude-Modelle)",
|
||||||
|
"ai_capable": true,
|
||||||
|
"patterns": [
|
||||||
|
{"regex": "^cf_clearance$", "purpose": "Cloudflare-Anti-Bot", "default_class": "technically_necessary"}
|
||||||
|
],
|
||||||
|
"typical_retention_days": 30,
|
||||||
|
"tn_functions": ["bot-protection"],
|
||||||
|
"cp_functions": ["chat-analytics"],
|
||||||
|
"ai_act_disclosure_required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -62,8 +62,10 @@ class ScanResponse(BaseModel):
|
|||||||
|
|
||||||
from routes_matrix import router as matrix_router
|
from routes_matrix import router as matrix_router
|
||||||
from routes_mobile import router as mobile_router
|
from routes_mobile import router as mobile_router
|
||||||
|
from routes_cookie_matrix import router as cookie_matrix_router
|
||||||
app.include_router(matrix_router)
|
app.include_router(matrix_router)
|
||||||
app.include_router(mobile_router)
|
app.include_router(mobile_router)
|
||||||
|
app.include_router(cookie_matrix_router)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
"""POST /scan-cookie-matrix — fokussierter Multi-Browser Cookie-Test."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from services.cookie_behavior_per_browser import run_cookie_matrix
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
class CookieMatrixReq(BaseModel):
|
||||||
|
url: str
|
||||||
|
browser_profiles: list[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/scan-cookie-matrix")
|
||||||
|
async def scan_cookie_matrix(req: CookieMatrixReq):
|
||||||
|
logger.info("Cookie-matrix scan %s profiles=%s",
|
||||||
|
req.url, req.browser_profiles or "default")
|
||||||
|
res = await run_cookie_matrix(req.url, req.browser_profiles)
|
||||||
|
res["scanned_at"] = datetime.now(timezone.utc).isoformat()
|
||||||
|
return res
|
||||||
@@ -0,0 +1,209 @@
|
|||||||
|
"""Cookie behavior per browser — fokussierter Multi-Engine Cookie-Test.
|
||||||
|
|
||||||
|
Stage 1.b ohne consent_scanner-Edit:
|
||||||
|
- Eigener kleiner Playwright-basierter Cookie-Scanner
|
||||||
|
- Pro Browser-Profile: cookies VOR Banner / NACH "Alle ablehnen" /
|
||||||
|
NACH "Alle akzeptieren"
|
||||||
|
- Echte Engine-Diversität: chromium / firefox / webkit /
|
||||||
|
iphone-mobile-safari nutzen jeweils `p.chromium` / `p.firefox` /
|
||||||
|
`p.webkit.launch()`
|
||||||
|
- Output: Cookie-Delta pro Phase pro Browser → Tabelle zeigt ob
|
||||||
|
Banner-Reject in allen Browsern gleich wirkt
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .browser_profiles import resolve_profiles
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
_ACCEPT_TEXTS = (
|
||||||
|
"alle akzeptieren", "alles akzeptieren", "akzeptieren",
|
||||||
|
"zustimmen", "agree", "accept all", "accept",
|
||||||
|
"i agree", "ok", "got it",
|
||||||
|
)
|
||||||
|
_REJECT_TEXTS = (
|
||||||
|
"alle ablehnen", "ablehnen", "nur essenzielle",
|
||||||
|
"nur notwendige", "reject all", "decline", "deny",
|
||||||
|
"only necessary", "essential only",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _try_click(page, texts: tuple[str, ...]) -> bool:
|
||||||
|
"""Try clicking the first visible button/link matching any of the texts."""
|
||||||
|
for txt in texts:
|
||||||
|
try:
|
||||||
|
loc = page.get_by_role("button",
|
||||||
|
name=__import__("re").compile(txt, 2))
|
||||||
|
if await loc.count() > 0:
|
||||||
|
await loc.first.click(timeout=4000)
|
||||||
|
await page.wait_for_timeout(1500)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# fallback by text
|
||||||
|
try:
|
||||||
|
loc = page.locator(f"text=/{txt}/i").first
|
||||||
|
if await loc.count() > 0:
|
||||||
|
await loc.click(timeout=4000)
|
||||||
|
await page.wait_for_timeout(1500)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _cookie_summary(cookies: list[dict]) -> dict:
|
||||||
|
"""Compact summary: count + sample names + by-domain."""
|
||||||
|
names = [c.get("name", "") for c in cookies]
|
||||||
|
domains: dict[str, int] = {}
|
||||||
|
for c in cookies:
|
||||||
|
d = c.get("domain", "")
|
||||||
|
domains[d] = domains.get(d, 0) + 1
|
||||||
|
return {
|
||||||
|
"count": len(cookies),
|
||||||
|
"names": names,
|
||||||
|
"by_domain": sorted(domains.items(), key=lambda x: -x[1])[:8],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _scan_one(p, url: str, profile: dict) -> dict[str, Any]:
|
||||||
|
engine = profile["engine"]
|
||||||
|
if engine == "blink":
|
||||||
|
bt = p.chromium
|
||||||
|
elif engine == "gecko":
|
||||||
|
bt = p.firefox
|
||||||
|
elif engine == "webkit":
|
||||||
|
bt = p.webkit
|
||||||
|
else:
|
||||||
|
return {"profile_id": profile["id"], "error": f"unknown engine {engine}"}
|
||||||
|
launch_kw: dict[str, Any] = {"headless": True}
|
||||||
|
if profile.get("channel"):
|
||||||
|
launch_kw["channel"] = profile["channel"]
|
||||||
|
if profile.get("executable_path"):
|
||||||
|
launch_kw["executable_path"] = profile["executable_path"]
|
||||||
|
try:
|
||||||
|
browser = await bt.launch(**launch_kw)
|
||||||
|
except Exception as e:
|
||||||
|
return {"profile_id": profile["id"], "error": f"launch: {e}"[:200]}
|
||||||
|
try:
|
||||||
|
ctx_kw: dict[str, Any] = {
|
||||||
|
"locale": profile.get("locale", "de-DE"),
|
||||||
|
"timezone_id": profile.get("timezone", "Europe/Berlin"),
|
||||||
|
}
|
||||||
|
if profile.get("device"):
|
||||||
|
preset = p.devices.get(profile["device"]) or {}
|
||||||
|
ctx_kw.update(preset)
|
||||||
|
elif profile.get("viewport"):
|
||||||
|
ctx_kw["viewport"] = profile["viewport"]
|
||||||
|
context = await browser.new_context(**ctx_kw)
|
||||||
|
page = await context.new_page()
|
||||||
|
try:
|
||||||
|
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||||
|
except Exception as e:
|
||||||
|
await browser.close()
|
||||||
|
return {"profile_id": profile["id"],
|
||||||
|
"error": f"goto: {e}"[:200]}
|
||||||
|
await page.wait_for_timeout(2500)
|
||||||
|
|
||||||
|
before = await context.cookies()
|
||||||
|
|
||||||
|
# Reject branch (fresh context)
|
||||||
|
reject_clicked = await _try_click(page, _REJECT_TEXTS)
|
||||||
|
await page.wait_for_timeout(1500)
|
||||||
|
after_reject = await context.cookies()
|
||||||
|
|
||||||
|
# Accept branch (fresh context to isolate)
|
||||||
|
accept_clicked = False
|
||||||
|
after_accept: list[dict] = []
|
||||||
|
try:
|
||||||
|
context2 = await browser.new_context(**ctx_kw)
|
||||||
|
page2 = await context2.new_page()
|
||||||
|
try:
|
||||||
|
await page2.goto(url, wait_until="domcontentloaded",
|
||||||
|
timeout=30000)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
await page2.wait_for_timeout(2500)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
accept_clicked = await _try_click(page2, _ACCEPT_TEXTS)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
await page2.wait_for_timeout(1500)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
after_accept = await context2.cookies()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
logger.info("accept branch failed for %s: %s",
|
||||||
|
profile["id"], e)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"profile_id": profile["id"],
|
||||||
|
"label": profile["label"],
|
||||||
|
"engine": engine,
|
||||||
|
"reject_clicked": reject_clicked,
|
||||||
|
"accept_clicked": accept_clicked,
|
||||||
|
"before": _cookie_summary(before),
|
||||||
|
"after_reject": _cookie_summary(after_reject),
|
||||||
|
"after_accept": _cookie_summary(after_accept),
|
||||||
|
"reject_minus_before_count": (
|
||||||
|
len(after_reject) - len(before)
|
||||||
|
),
|
||||||
|
"accept_minus_before_count": (
|
||||||
|
len(after_accept) - len(before)
|
||||||
|
),
|
||||||
|
}
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
await browser.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def run_cookie_matrix(
|
||||||
|
url: str, requested_profiles: list[str] | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Run focused cookie behavior scan across all default profiles."""
|
||||||
|
from playwright.async_api import async_playwright
|
||||||
|
profiles = resolve_profiles(requested_profiles)
|
||||||
|
results: list[dict] = []
|
||||||
|
async with async_playwright() as p:
|
||||||
|
# Sequential to avoid resource contention on the Mac Mini
|
||||||
|
# (4 browsers in parallel sometimes hits target-closed races).
|
||||||
|
for prof in profiles:
|
||||||
|
try:
|
||||||
|
r = await _scan_one(p, url, prof)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("scan_one %s crashed: %s", prof["id"], e)
|
||||||
|
r = {"profile_id": prof["id"], "error": f"crash: {e}"[:200]}
|
||||||
|
results.append(r)
|
||||||
|
# Aggregate: cross-browser inconsistency detection
|
||||||
|
after_reject_counts = {
|
||||||
|
r["profile_id"]: r.get("after_reject", {}).get("count", 0)
|
||||||
|
for r in results if "error" not in r
|
||||||
|
}
|
||||||
|
inconsistent = False
|
||||||
|
if after_reject_counts:
|
||||||
|
cmin = min(after_reject_counts.values())
|
||||||
|
cmax = max(after_reject_counts.values())
|
||||||
|
inconsistent = (cmax - cmin) >= 2
|
||||||
|
return {
|
||||||
|
"url": url,
|
||||||
|
"profile_count": len(profiles),
|
||||||
|
"results": results,
|
||||||
|
"aggregate": {
|
||||||
|
"reject_cookie_counts": after_reject_counts,
|
||||||
|
"inconsistent_reject": inconsistent,
|
||||||
|
},
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user