feat: B12 Chatbot-Cookie-Klassifikation (#19) + Cookie-Matrix scan + safetykon test
#19 Chatbot-Cookie-Klassifikation: - chat_providers.json KB mit 11 Providern (iAdvize, Intercom, Tidio, Drift, Userlike, Zendesk, LivePerson, HubSpot, Vertex AI, OpenAI, Anthropic Claude). Pro Provider: Cookie-Pattern-Regex, typical_retention_days, tn_functions vs cp_functions, ai_capable. - chatbot_cookie_classification_check.py mit 4 KORRIGIERTEN Checks: CHAT-COOKIE-CLASS-001 (MED) — TN deklariert + Vendor-Purpose erwähnt Targeting/Analytics/A-B-Tests CHAT-COOKIE-CLASS-002 (MED) — Provider hat tn+cp Funktionen, Tabelle nennt nur eine Seite → keine Einwilligungs-Differenzierung CHAT-COOKIE-PURPOSE-001 (LOW) — Zweck zu generisch (Art. 13 DSGVO konkret) CHAT-COOKIE-RETENTION-001 (HIGH) — deklariert <90d, KB-typisch >365d → vermutlich unterdeklariert NEU vs vorigem Plan: kein "eigene Banner-Kategorie Chat/AI"-Check — gesetzlich nicht vorgeschrieben (Vermischung Zweck-Transparenz vs Kategorie-Name). Anwender-Frage berechtigt, Konzept geschärft. - _b12_wiring.py + Orchestrator-Wire + V2-Compose-Slot - Cookie-Inventar mit [Chat]/[Chat+AI]-Tag pro Cookie-Name (KB-Lookup) - Smoke (3 Vendors / 5 Cookies): 9 findings korrekt (3 HIGH RETENTION, 3 MEDIUM CLASS-001, 4 LOW PURPOSE) Cookie-Matrix Scan (Browser-Vergleich gegen safetykon.de): - consent-tester/services/cookie_behavior_per_browser.py: eigener fokussierter Scanner. Pro Browser-Profile: cookies before / after reject / after accept in separaten Kontexten. Sequenzielle Runs statt parallel (Race-Conditions). - routes_cookie_matrix.py POST /scan-cookie-matrix - Live-Test safetykon.de: chromium=1, firefox=0, webkit=1, mobile- safari=1 nach reject — Firefox setzt KEIN Cookie nach Reject! (consent-tester Rebuild brachte playwright install-deps für system-libs) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
"""B12 wiring — Chatbot-Cookie-Klassifikation.
|
||||
|
||||
Hängt sich an `state["extra_findings"]` mit ähnlichem Render-Pattern wie
|
||||
B9/B10. Wird vom Orchestrator nach B11 (run_b9b10) aufgerufen.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import logging
|
||||
|
||||
from compliance.services.chatbot_cookie_classification_check import (
|
||||
check_chatbot_cookie_classification,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_b12(state: dict) -> None:
|
||||
new = check_chatbot_cookie_classification(state)
|
||||
if not new:
|
||||
return
|
||||
extras = state.get("extra_findings") or []
|
||||
extras.extend(new)
|
||||
state["extra_findings"] = extras
|
||||
state["chatbot_cookie_html"] = _render(new)
|
||||
logger.info("B12 chatbot-cookies: %d findings", len(new))
|
||||
|
||||
|
||||
def _render(findings: list[dict]) -> str:
|
||||
cards = []
|
||||
for f in findings:
|
||||
sev = (f.get("severity") or "").upper()
|
||||
color = "#dc2626" if sev == "HIGH" else (
|
||||
"#f59e0b" if sev == "MEDIUM" else "#64748b"
|
||||
)
|
||||
meta = (
|
||||
"<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>Provider: {html.escape(f.get('provider') or '?')} · "
|
||||
f"Cookie: <code>{html.escape(f.get('cookie_name') or '?')}</code>"
|
||||
"</em></div>"
|
||||
)
|
||||
evidence = ""
|
||||
if f.get("evidence"):
|
||||
evidence = (
|
||||
"<div style='font-size:12px;color:#475569;margin-top:4px;'>"
|
||||
f"<em>{html.escape(f['evidence'])}</em></div>"
|
||||
)
|
||||
cards.append(
|
||||
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
||||
f"border-left:3px solid {color};border-radius:4px;'>"
|
||||
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
||||
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
|
||||
f"<div style='font-size:14px;margin-top:4px;'>"
|
||||
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
||||
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
||||
f"{html.escape(f.get('norm') or '')}</div>"
|
||||
f"{meta}{evidence}"
|
||||
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
||||
f"padding:8px 10px;border-radius:4px;'>"
|
||||
f"<strong>→ Empfehlung:</strong> "
|
||||
f"{html.escape(f.get('action') or '')}</div>"
|
||||
"</div>"
|
||||
)
|
||||
return (
|
||||
"<div style='margin:24px 0;padding:16px;border-left:4px solid #f59e0b;"
|
||||
"background:#fffbeb;border-radius:4px;'>"
|
||||
"<h2 style='margin:0 0 8px;color:#92400e;font-size:16px;'>"
|
||||
"💬 Chatbot-Cookie-Klassifikation (KB-basiert)"
|
||||
"</h2>"
|
||||
+ "".join(cards) +
|
||||
"</div>"
|
||||
)
|
||||
@@ -67,6 +67,7 @@ async def run_compliance_check(check_id: str, req) -> None:
|
||||
run_b5(state) # AI-Act Art. 50 transparency
|
||||
run_b6b7b8(state) # DPO-cross-doc + Doc-Staleness + CMP-fingerprint
|
||||
run_b9b10(state) # Multi-Entity-Impressum + Drittland-Mechanismus
|
||||
run_b12(state) # Chatbot-Cookie-Klassifikation (B11 ist in B9B10)
|
||||
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
||||
await run_phase_d3_top(state)
|
||||
await run_phase_d3_mid(state)
|
||||
|
||||
@@ -0,0 +1,249 @@
|
||||
"""B12 — Chatbot-Cookie-Klassifikations-Check.
|
||||
|
||||
Erkennt Chatbot-Cookies anhand der KB-Pattern und prüft 4 typische
|
||||
Fehler in der DSGVO/TDDDG-Klassifikation:
|
||||
|
||||
CHAT-COOKIE-CLASS-001 Cookie als "technisch notwendig" deklariert,
|
||||
obwohl in derselben Tabelle Targeting/A-B/
|
||||
Analytics-Funktionen erwähnt werden. Falsche
|
||||
Rechtsgrundlage → MEDIUM
|
||||
CHAT-COOKIE-CLASS-002 Chatbot-Cookie mit nur EINER Klassifikation,
|
||||
obwohl der Provider mehrere Funktionen
|
||||
bietet (tn UND cp) → MEDIUM
|
||||
CHAT-COOKIE-PURPOSE-001 Zweck-Beschreibung zu generisch ("Statistik",
|
||||
"Cookie") — Art. 13 DSGVO verlangt konkreten
|
||||
Verarbeitungszweck → LOW
|
||||
CHAT-COOKIE-RETENTION-001 Deklarierte Retention <90 Tage, KB-typische
|
||||
Retention >365 Tage — vermutlich unterdeklariert
|
||||
→ HIGH (verlinkt B3)
|
||||
|
||||
KB-Quelle: specialist_agents/_kb/chat_providers.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_KB_PATH = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"specialist_agents", "_kb", "chat_providers.json",
|
||||
)
|
||||
|
||||
|
||||
def _load_kb() -> dict:
|
||||
try:
|
||||
with open(_KB_PATH, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.warning("chatbot KB load failed: %s", e)
|
||||
return {"providers": {}}
|
||||
|
||||
|
||||
_KB = _load_kb()
|
||||
|
||||
|
||||
def _detect_provider(cookie_name: str) -> tuple[str, dict] | None:
|
||||
"""Match a cookie name against KB patterns. Returns (provider_id, pattern_meta)."""
|
||||
if not cookie_name:
|
||||
return None
|
||||
providers = _KB.get("providers") or {}
|
||||
for prov_id, prov in providers.items():
|
||||
for pat in prov.get("patterns") or []:
|
||||
try:
|
||||
if re.match(pat["regex"], cookie_name):
|
||||
return prov_id, pat
|
||||
except re.error:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
_TARGETING_HINTS = (
|
||||
"targeting", "engagement", "a/b", "ab-test", "ab test",
|
||||
"analytics", "tracking", "marketing", "lead", "scoring",
|
||||
"personalisierung", "personalization", "remarketing",
|
||||
"retargeting",
|
||||
)
|
||||
|
||||
|
||||
_GENERIC_PURPOSES = {
|
||||
"cookie", "statistik", "marketing", "tracking", "analyse",
|
||||
"performance", "session", "essential", "essenziell",
|
||||
"notwendig", "—", "?", "",
|
||||
}
|
||||
|
||||
|
||||
def _looks_targeting(text: str) -> bool:
|
||||
if not text:
|
||||
return False
|
||||
t = text.lower()
|
||||
return any(k in t for k in _TARGETING_HINTS)
|
||||
|
||||
|
||||
def _is_generic_purpose(purpose: str) -> bool:
|
||||
if not purpose:
|
||||
return True
|
||||
cleaned = re.sub(r"[\s\.,;:!?]+", " ", purpose.lower()).strip()
|
||||
if cleaned in _GENERIC_PURPOSES:
|
||||
return True
|
||||
return len(cleaned.split()) < 4 # weniger als 4 Wörter = zu kurz
|
||||
|
||||
|
||||
def check_chatbot_cookie_classification(state: dict) -> list[dict]:
|
||||
"""Iterate cmp_vendors + cookies, emit findings for chatbot-cookie
|
||||
classification problems."""
|
||||
cmp_vendors = state.get("cmp_vendors") or []
|
||||
if not cmp_vendors:
|
||||
return []
|
||||
findings: list[dict] = []
|
||||
for v in cmp_vendors:
|
||||
vendor_name = (v.get("name") or "").strip()
|
||||
vendor_purpose = (v.get("purpose") or "").strip()
|
||||
vendor_category = (v.get("category") or "").strip().lower()
|
||||
for c in (v.get("cookies") or []):
|
||||
cname = (c.get("name") or "").strip()
|
||||
if not cname:
|
||||
continue
|
||||
match = _detect_provider(cname)
|
||||
if not match:
|
||||
continue
|
||||
prov_id, pat = match
|
||||
prov = _KB["providers"][prov_id]
|
||||
c_class = (c.get("category") or "").strip().lower()
|
||||
c_purpose = (c.get("purpose") or pat.get("purpose")
|
||||
or "").strip()
|
||||
|
||||
# CLASS-001: TN deklariert + Targeting-Hint im Vendor-Purpose
|
||||
tn_words = ("technisch notwendig", "essenziell", "essential",
|
||||
"necessary", "strictly necessary")
|
||||
declared_tn = any(t in (c_class + " " + c_purpose).lower()
|
||||
for t in tn_words)
|
||||
if declared_tn and _looks_targeting(vendor_purpose):
|
||||
findings.append({
|
||||
"check_id": "CHAT-COOKIE-CLASS-001",
|
||||
"severity": "MEDIUM",
|
||||
"severity_reason": "misclassified",
|
||||
"provider": prov.get("company") or prov_id,
|
||||
"cookie_name": cname,
|
||||
"title": (
|
||||
f"Chatbot-Cookie '{cname}' ({prov.get('company')}) "
|
||||
"als technisch notwendig deklariert, Tabellen-Beschreibung "
|
||||
"erwähnt Targeting/Analytics"
|
||||
),
|
||||
"norm": "DSGVO Art. 6 Abs. 1 lit. a + § 25 TDDDG",
|
||||
"evidence": (
|
||||
f"Vendor-Purpose: '{vendor_purpose[:120]}' — "
|
||||
f"Klassifikation: '{c_class}'"
|
||||
),
|
||||
"action": (
|
||||
"Rechtsgrundlage korrigieren: bei Targeting/Analytics/"
|
||||
"A-B-Tests ist Einwilligung erforderlich. "
|
||||
"Cookie aus 'technisch notwendig' herausnehmen ODER "
|
||||
"die Tracking-Funktionen vom Chat-Kern trennen."
|
||||
),
|
||||
})
|
||||
|
||||
# CLASS-002: nur EINE Klassifikation obwohl Provider hat tn UND cp
|
||||
has_tn = bool(prov.get("tn_functions"))
|
||||
has_cp = bool(prov.get("cp_functions"))
|
||||
if has_tn and has_cp:
|
||||
# Single-class declaration ohne Aufschlüsselung?
|
||||
# Heuristik: vendor.purpose enthält weder "auch" / "sowie" /
|
||||
# "und" zwischen tn und cp Begriffen
|
||||
purp_lc = vendor_purpose.lower()
|
||||
mentions_tn = any(
|
||||
f.replace("-", " ") in purp_lc
|
||||
or f.replace("-", "") in purp_lc
|
||||
for f in prov["tn_functions"]
|
||||
)
|
||||
mentions_cp = any(
|
||||
f.replace("-", " ") in purp_lc
|
||||
or f.replace("-", "") in purp_lc
|
||||
for f in prov["cp_functions"]
|
||||
)
|
||||
if mentions_tn != mentions_cp:
|
||||
# nennt nur eine Seite
|
||||
missing_side = "Targeting/Analytics" if mentions_tn else (
|
||||
"Chat-Kontext (technisch notwendig)"
|
||||
)
|
||||
findings.append({
|
||||
"check_id": "CHAT-COOKIE-CLASS-002",
|
||||
"severity": "MEDIUM",
|
||||
"severity_reason": "incomplete",
|
||||
"provider": prov.get("company") or prov_id,
|
||||
"cookie_name": cname,
|
||||
"title": (
|
||||
f"Chatbot-Cookie '{cname}' ({prov.get('company')}) "
|
||||
"ohne Funktions-Differenzierung — fehlende Seite: "
|
||||
f"{missing_side}"
|
||||
),
|
||||
"norm": "DSGVO Art. 13 Abs. 1 lit. c + d",
|
||||
"action": (
|
||||
f"In der Cookie-Tabelle für '{cname}' sowohl die "
|
||||
"tn-Funktionen (Chat-Kontext) als auch die "
|
||||
"cp-Funktionen (Targeting/Analytics) getrennt "
|
||||
"ausweisen — sonst kann der Nutzer Consent nicht "
|
||||
"informiert geben."
|
||||
),
|
||||
})
|
||||
|
||||
# PURPOSE-001: zu generischer Zweck
|
||||
if _is_generic_purpose(c_purpose):
|
||||
findings.append({
|
||||
"check_id": "CHAT-COOKIE-PURPOSE-001",
|
||||
"severity": "LOW",
|
||||
"severity_reason": "incomplete",
|
||||
"provider": prov.get("company") or prov_id,
|
||||
"cookie_name": cname,
|
||||
"title": (
|
||||
f"Chatbot-Cookie '{cname}' mit zu generischem Zweck"
|
||||
),
|
||||
"norm": "DSGVO Art. 13 Abs. 1 lit. c",
|
||||
"evidence": f"Zweck-Text: '{c_purpose}'",
|
||||
"action": (
|
||||
f"Konkreten Verarbeitungszweck angeben — z.B. statt "
|
||||
f"'{c_purpose or 'Cookie'}' "
|
||||
f"'{pat.get('purpose')}' nach KB-Empfehlung."
|
||||
),
|
||||
})
|
||||
|
||||
# RETENTION-001: deklariert <90d, KB sagt >365d
|
||||
from .retention_comparator import parse_duration_to_days
|
||||
declared_str = (
|
||||
c.get("duration") or c.get("persistence")
|
||||
or c.get("expiry") or ""
|
||||
)
|
||||
declared_days, _kind = parse_duration_to_days(declared_str)
|
||||
typical = prov.get("typical_retention_days") or 0
|
||||
if declared_days is not None and typical:
|
||||
if declared_days < 90 and typical >= 250:
|
||||
findings.append({
|
||||
"check_id": "CHAT-COOKIE-RETENTION-001",
|
||||
"severity": "HIGH",
|
||||
"severity_reason": "factually_wrong",
|
||||
"provider": prov.get("company") or prov_id,
|
||||
"cookie_name": cname,
|
||||
"title": (
|
||||
f"Chatbot-Cookie '{cname}' Speicherdauer "
|
||||
f"vermutlich unterdeklariert"
|
||||
),
|
||||
"norm": "DSGVO Art. 13 Abs. 2 lit. a",
|
||||
"evidence": (
|
||||
f"Deklariert: {int(declared_days)} Tage — "
|
||||
f"KB-typisch für {prov.get('company')}: "
|
||||
f"{typical} Tage"
|
||||
),
|
||||
"action": (
|
||||
f"Tatsächliche Cookie-Lifetime im Browser prüfen "
|
||||
f"und mit '{declared_str}' abgleichen. "
|
||||
f"Vermutung: real ~{typical} Tage statt deklariert "
|
||||
f"{int(declared_days)}."
|
||||
),
|
||||
})
|
||||
if findings:
|
||||
logger.info("B12 chatbot-classification: %d findings", len(findings))
|
||||
return findings
|
||||
@@ -46,6 +46,8 @@ def compose_v2(state: dict) -> str:
|
||||
state.get("ai_act_html", ""),
|
||||
# B6/B7/B8/B9/B10 — DPO + Staleness + CMP + MultiEntity + Transfer
|
||||
state.get("extra_findings_html", ""),
|
||||
# B12 Chatbot-Cookie-Klassifikation
|
||||
state.get("chatbot_cookie_html", ""),
|
||||
# Browser-Matrix (Stage 1.c)
|
||||
state.get("browser_matrix_html", ""),
|
||||
# All legacy build_*_html() wrapped in V2 sections — preserves
|
||||
|
||||
@@ -77,6 +77,22 @@ def _country_third(country: str | None) -> tuple[str, bool, str | None]:
|
||||
return (code, True, tag)
|
||||
|
||||
|
||||
def _vendor_type_tag(cookie_name: str) -> str:
|
||||
"""Lookup the cookie in the chatbot-KB and return a [Chat]/[Chat+AI] tag."""
|
||||
try:
|
||||
from ..chatbot_cookie_classification_check import _detect_provider, _KB
|
||||
match = _detect_provider(cookie_name)
|
||||
if not match:
|
||||
return ""
|
||||
prov_id, _pat = match
|
||||
prov = (_KB.get("providers") or {}).get(prov_id) or {}
|
||||
if prov.get("ai_capable"):
|
||||
return ' <span style="display:inline-block;background:#dbeafe;color:#1e40af;font-size:10px;padding:1px 6px;border-radius:999px;margin-left:4px;">Chat+AI</span>'
|
||||
return ' <span style="display:inline-block;background:#f1f5f9;color:#475569;font-size:10px;padding:1px 6px;border-radius:999px;margin-left:4px;">Chat</span>'
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _src_chip(in_dse: bool, in_table: bool, in_browser: bool,
|
||||
in_ocr: bool) -> str:
|
||||
parts: list[str] = []
|
||||
@@ -248,7 +264,7 @@ def render_inventory_rows(rows: list[dict]) -> list[list[str]]:
|
||||
f'font-weight:700;">[{tag}]</span>'
|
||||
)
|
||||
out.append([
|
||||
f'<code>{h(r["name"])}</code>',
|
||||
f'<code>{h(r["name"])}</code>{_vendor_type_tag(r["name"])}',
|
||||
h(r["vendor"]) if r["vendor"] else
|
||||
'<span style="color:#dc2626;">❌</span>',
|
||||
_x_or(r["category"]),
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
{
|
||||
"_schema_version": "1.0",
|
||||
"_last_updated": "2026-06-06",
|
||||
"_notes": "Anonymisierte Cookie-Pattern + Funktions-Klassifizierung pro Chat-Provider. Quelle: Anbieter-Dokumentation + EDPB-Cookie-Sweep + § 25 TDDDG. Kein Roh-Mandantendatum.",
|
||||
"providers": {
|
||||
"iadvize": {
|
||||
"company": "iAdvize SAS",
|
||||
"country": "FR",
|
||||
"type": "Chat & Conversational Platform",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^iadvize-\\d+-vuid$", "purpose": "Visitor-ID + Chat-Verlauf-Wiedererkennung", "default_class": "consent_required"},
|
||||
{"regex": "^iadvize-\\d+-consent$", "purpose": "Consent-State für iAdvize", "default_class": "technically_necessary"},
|
||||
{"regex": "^iadvize_test_cookie_top_domain$", "purpose": "Tech-Probe für Root-Domain-Detektion", "default_class": "technically_necessary"}
|
||||
],
|
||||
"typical_retention_days": 390,
|
||||
"tn_functions": ["chat-continuation", "session-context", "logged-in-chat", "consent-state"],
|
||||
"cp_functions": ["visitor-targeting", "engagement-rules", "ab-tests", "chat-analytics"]
|
||||
},
|
||||
"intercom": {
|
||||
"company": "Intercom Inc",
|
||||
"country": "US",
|
||||
"type": "Chat & Customer-Messaging-Platform",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^intercom-id-[\\w-]+$", "purpose": "Identifier-Cookie für Wiedererkennung", "default_class": "consent_required"},
|
||||
{"regex": "^intercom-session-[\\w-]+$", "purpose": "Aktuelle Chat-Session", "default_class": "technically_necessary"},
|
||||
{"regex": "^intercom-device-id-[\\w-]+$", "purpose": "Device-Fingerprint", "default_class": "consent_required"}
|
||||
],
|
||||
"typical_retention_days": 270,
|
||||
"tn_functions": ["session-context"],
|
||||
"cp_functions": ["device-tracking", "user-recognition-across-sites", "marketing-attribution"]
|
||||
},
|
||||
"tidio": {
|
||||
"company": "Tidio LLC",
|
||||
"country": "US",
|
||||
"type": "Chat-Widget + Chatbot",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^TidioStore_[\\w-]+$", "purpose": "Chat-Konfiguration + Verlauf", "default_class": "consent_required"},
|
||||
{"regex": "^tidio[_-]?identify[_-].*$", "purpose": "Visitor-Identifikation", "default_class": "consent_required"}
|
||||
],
|
||||
"typical_retention_days": 365,
|
||||
"tn_functions": ["chat-continuation"],
|
||||
"cp_functions": ["visitor-tracking", "lead-scoring", "marketing-automation"]
|
||||
},
|
||||
"drift": {
|
||||
"company": "Drift.com Inc",
|
||||
"country": "US",
|
||||
"type": "Conversational-Marketing-Platform",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^driftt_aid$", "purpose": "Anonymous Visitor-ID", "default_class": "consent_required"},
|
||||
{"regex": "^driftt_uid$", "purpose": "Logged-in User-ID", "default_class": "technically_necessary"},
|
||||
{"regex": "^drift_eid$", "purpose": "Email-Address-Identifier", "default_class": "consent_required"}
|
||||
],
|
||||
"typical_retention_days": 365,
|
||||
"tn_functions": ["logged-in-chat", "session-context"],
|
||||
"cp_functions": ["lead-generation", "conversational-marketing", "ab-testing"]
|
||||
},
|
||||
"userlike": {
|
||||
"company": "Userlike UG",
|
||||
"country": "DE",
|
||||
"type": "Chat-Widget + Chatbot",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^userlike-cookie-banner[\\w-]*$", "purpose": "Consent-State für Userlike", "default_class": "technically_necessary"},
|
||||
{"regex": "^userlike-[\\w-]+-id$", "purpose": "Visitor-Identifier", "default_class": "consent_required"}
|
||||
],
|
||||
"typical_retention_days": 365,
|
||||
"tn_functions": ["chat-continuation", "consent-state"],
|
||||
"cp_functions": ["visitor-tracking"]
|
||||
},
|
||||
"zendesk_chat": {
|
||||
"company": "Zendesk Inc",
|
||||
"country": "US",
|
||||
"type": "Chat & Customer-Support",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^__zlcmid$", "purpose": "Live-Chat-Identifier", "default_class": "technically_necessary"},
|
||||
{"regex": "^_zendesk_[\\w-]+$", "purpose": "Session-/Tracking-Cookie", "default_class": "consent_required"}
|
||||
],
|
||||
"typical_retention_days": 365,
|
||||
"tn_functions": ["live-chat-session"],
|
||||
"cp_functions": ["analytics", "marketing-tracking"]
|
||||
},
|
||||
"liveperson": {
|
||||
"company": "LivePerson Inc",
|
||||
"country": "US",
|
||||
"type": "Conversational-AI-Platform",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^LP_[\\w-]+$", "purpose": "LivePerson-Visitor-ID", "default_class": "consent_required"},
|
||||
{"regex": "^liveperson-[\\w-]+$", "purpose": "Session/Engagement", "default_class": "consent_required"}
|
||||
],
|
||||
"typical_retention_days": 365,
|
||||
"tn_functions": ["chat-session"],
|
||||
"cp_functions": ["visitor-tracking", "engagement-engine", "ai-chat-analytics"]
|
||||
},
|
||||
"hubspot_chat": {
|
||||
"company": "HubSpot Inc",
|
||||
"country": "US",
|
||||
"type": "Chat + CRM-Integration",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^hubspotutk$", "purpose": "HubSpot Visitor-Token", "default_class": "consent_required"},
|
||||
{"regex": "^__hssc$", "purpose": "Session-Tracking", "default_class": "consent_required"},
|
||||
{"regex": "^__hssrc$", "purpose": "Browser-Restart-Detection", "default_class": "consent_required"},
|
||||
{"regex": "^__hstc$", "purpose": "Visitor-Tracking", "default_class": "consent_required"},
|
||||
{"regex": "^messagesUtk$", "purpose": "Chat-Conversation-Token", "default_class": "technically_necessary"}
|
||||
],
|
||||
"typical_retention_days": 390,
|
||||
"tn_functions": ["chat-conversation"],
|
||||
"cp_functions": ["crm-integration", "marketing-attribution", "lead-scoring"]
|
||||
},
|
||||
"vertex_ai_chatbot": {
|
||||
"company": "Google Cloud (Vertex AI)",
|
||||
"country": "US (EU-Hosting möglich)",
|
||||
"type": "AI-Chatbot (LLM-basiert)",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^_GRECAPTCHA$", "purpose": "reCAPTCHA-Protection für Vertex-AI-Frontend", "default_class": "technically_necessary"},
|
||||
{"regex": "^GOOGLE_AUTH.*$", "purpose": "Google-Auth-Token (wenn embedded)", "default_class": "technically_necessary"}
|
||||
],
|
||||
"typical_retention_days": 180,
|
||||
"tn_functions": ["bot-protection", "auth-token"],
|
||||
"cp_functions": ["chat-analytics", "improvement-feedback"],
|
||||
"ai_act_disclosure_required": true
|
||||
},
|
||||
"openai_chatbot": {
|
||||
"company": "OpenAI LLC",
|
||||
"country": "US",
|
||||
"type": "AI-Chatbot (GPT-Modelle)",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^__cf_bm$", "purpose": "Cloudflare-Bot-Schutz", "default_class": "technically_necessary"},
|
||||
{"regex": "^_cfuvid$", "purpose": "Cloudflare-Visitor-ID", "default_class": "consent_required"}
|
||||
],
|
||||
"typical_retention_days": 365,
|
||||
"tn_functions": ["bot-protection"],
|
||||
"cp_functions": ["visitor-tracking", "ai-conversation-analytics"],
|
||||
"ai_act_disclosure_required": true
|
||||
},
|
||||
"anthropic_claude": {
|
||||
"company": "Anthropic PBC",
|
||||
"country": "US",
|
||||
"type": "AI-Chatbot (Claude-Modelle)",
|
||||
"ai_capable": true,
|
||||
"patterns": [
|
||||
{"regex": "^cf_clearance$", "purpose": "Cloudflare-Anti-Bot", "default_class": "technically_necessary"}
|
||||
],
|
||||
"typical_retention_days": 30,
|
||||
"tn_functions": ["bot-protection"],
|
||||
"cp_functions": ["chat-analytics"],
|
||||
"ai_act_disclosure_required": true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,8 +62,10 @@ class ScanResponse(BaseModel):
|
||||
|
||||
from routes_matrix import router as matrix_router
|
||||
from routes_mobile import router as mobile_router
|
||||
from routes_cookie_matrix import router as cookie_matrix_router
|
||||
app.include_router(matrix_router)
|
||||
app.include_router(mobile_router)
|
||||
app.include_router(cookie_matrix_router)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
"""POST /scan-cookie-matrix — fokussierter Multi-Browser Cookie-Test."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.cookie_behavior_per_browser import run_cookie_matrix
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class CookieMatrixReq(BaseModel):
|
||||
url: str
|
||||
browser_profiles: list[str] | None = None
|
||||
|
||||
|
||||
@router.post("/scan-cookie-matrix")
|
||||
async def scan_cookie_matrix(req: CookieMatrixReq):
|
||||
logger.info("Cookie-matrix scan %s profiles=%s",
|
||||
req.url, req.browser_profiles or "default")
|
||||
res = await run_cookie_matrix(req.url, req.browser_profiles)
|
||||
res["scanned_at"] = datetime.now(timezone.utc).isoformat()
|
||||
return res
|
||||
@@ -0,0 +1,209 @@
|
||||
"""Cookie behavior per browser — fokussierter Multi-Engine Cookie-Test.
|
||||
|
||||
Stage 1.b ohne consent_scanner-Edit:
|
||||
- Eigener kleiner Playwright-basierter Cookie-Scanner
|
||||
- Pro Browser-Profile: cookies VOR Banner / NACH "Alle ablehnen" /
|
||||
NACH "Alle akzeptieren"
|
||||
- Echte Engine-Diversität: chromium / firefox / webkit /
|
||||
iphone-mobile-safari nutzen jeweils `p.chromium` / `p.firefox` /
|
||||
`p.webkit.launch()`
|
||||
- Output: Cookie-Delta pro Phase pro Browser → Tabelle zeigt ob
|
||||
Banner-Reject in allen Browsern gleich wirkt
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from .browser_profiles import resolve_profiles
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_ACCEPT_TEXTS = (
|
||||
"alle akzeptieren", "alles akzeptieren", "akzeptieren",
|
||||
"zustimmen", "agree", "accept all", "accept",
|
||||
"i agree", "ok", "got it",
|
||||
)
|
||||
_REJECT_TEXTS = (
|
||||
"alle ablehnen", "ablehnen", "nur essenzielle",
|
||||
"nur notwendige", "reject all", "decline", "deny",
|
||||
"only necessary", "essential only",
|
||||
)
|
||||
|
||||
|
||||
async def _try_click(page, texts: tuple[str, ...]) -> bool:
|
||||
"""Try clicking the first visible button/link matching any of the texts."""
|
||||
for txt in texts:
|
||||
try:
|
||||
loc = page.get_by_role("button",
|
||||
name=__import__("re").compile(txt, 2))
|
||||
if await loc.count() > 0:
|
||||
await loc.first.click(timeout=4000)
|
||||
await page.wait_for_timeout(1500)
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
# fallback by text
|
||||
try:
|
||||
loc = page.locator(f"text=/{txt}/i").first
|
||||
if await loc.count() > 0:
|
||||
await loc.click(timeout=4000)
|
||||
await page.wait_for_timeout(1500)
|
||||
return True
|
||||
except Exception:
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def _cookie_summary(cookies: list[dict]) -> dict:
|
||||
"""Compact summary: count + sample names + by-domain."""
|
||||
names = [c.get("name", "") for c in cookies]
|
||||
domains: dict[str, int] = {}
|
||||
for c in cookies:
|
||||
d = c.get("domain", "")
|
||||
domains[d] = domains.get(d, 0) + 1
|
||||
return {
|
||||
"count": len(cookies),
|
||||
"names": names,
|
||||
"by_domain": sorted(domains.items(), key=lambda x: -x[1])[:8],
|
||||
}
|
||||
|
||||
|
||||
async def _scan_one(p, url: str, profile: dict) -> dict[str, Any]:
|
||||
engine = profile["engine"]
|
||||
if engine == "blink":
|
||||
bt = p.chromium
|
||||
elif engine == "gecko":
|
||||
bt = p.firefox
|
||||
elif engine == "webkit":
|
||||
bt = p.webkit
|
||||
else:
|
||||
return {"profile_id": profile["id"], "error": f"unknown engine {engine}"}
|
||||
launch_kw: dict[str, Any] = {"headless": True}
|
||||
if profile.get("channel"):
|
||||
launch_kw["channel"] = profile["channel"]
|
||||
if profile.get("executable_path"):
|
||||
launch_kw["executable_path"] = profile["executable_path"]
|
||||
try:
|
||||
browser = await bt.launch(**launch_kw)
|
||||
except Exception as e:
|
||||
return {"profile_id": profile["id"], "error": f"launch: {e}"[:200]}
|
||||
try:
|
||||
ctx_kw: dict[str, Any] = {
|
||||
"locale": profile.get("locale", "de-DE"),
|
||||
"timezone_id": profile.get("timezone", "Europe/Berlin"),
|
||||
}
|
||||
if profile.get("device"):
|
||||
preset = p.devices.get(profile["device"]) or {}
|
||||
ctx_kw.update(preset)
|
||||
elif profile.get("viewport"):
|
||||
ctx_kw["viewport"] = profile["viewport"]
|
||||
context = await browser.new_context(**ctx_kw)
|
||||
page = await context.new_page()
|
||||
try:
|
||||
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||
except Exception as e:
|
||||
await browser.close()
|
||||
return {"profile_id": profile["id"],
|
||||
"error": f"goto: {e}"[:200]}
|
||||
await page.wait_for_timeout(2500)
|
||||
|
||||
before = await context.cookies()
|
||||
|
||||
# Reject branch (fresh context)
|
||||
reject_clicked = await _try_click(page, _REJECT_TEXTS)
|
||||
await page.wait_for_timeout(1500)
|
||||
after_reject = await context.cookies()
|
||||
|
||||
# Accept branch (fresh context to isolate)
|
||||
accept_clicked = False
|
||||
after_accept: list[dict] = []
|
||||
try:
|
||||
context2 = await browser.new_context(**ctx_kw)
|
||||
page2 = await context2.new_page()
|
||||
try:
|
||||
await page2.goto(url, wait_until="domcontentloaded",
|
||||
timeout=30000)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await page2.wait_for_timeout(2500)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
accept_clicked = await _try_click(page2, _ACCEPT_TEXTS)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await page2.wait_for_timeout(1500)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
after_accept = await context2.cookies()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.info("accept branch failed for %s: %s",
|
||||
profile["id"], e)
|
||||
|
||||
return {
|
||||
"profile_id": profile["id"],
|
||||
"label": profile["label"],
|
||||
"engine": engine,
|
||||
"reject_clicked": reject_clicked,
|
||||
"accept_clicked": accept_clicked,
|
||||
"before": _cookie_summary(before),
|
||||
"after_reject": _cookie_summary(after_reject),
|
||||
"after_accept": _cookie_summary(after_accept),
|
||||
"reject_minus_before_count": (
|
||||
len(after_reject) - len(before)
|
||||
),
|
||||
"accept_minus_before_count": (
|
||||
len(after_accept) - len(before)
|
||||
),
|
||||
}
|
||||
finally:
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def run_cookie_matrix(
|
||||
url: str, requested_profiles: list[str] | None = None,
|
||||
) -> dict:
|
||||
"""Run focused cookie behavior scan across all default profiles."""
|
||||
from playwright.async_api import async_playwright
|
||||
profiles = resolve_profiles(requested_profiles)
|
||||
results: list[dict] = []
|
||||
async with async_playwright() as p:
|
||||
# Sequential to avoid resource contention on the Mac Mini
|
||||
# (4 browsers in parallel sometimes hits target-closed races).
|
||||
for prof in profiles:
|
||||
try:
|
||||
r = await _scan_one(p, url, prof)
|
||||
except Exception as e:
|
||||
logger.warning("scan_one %s crashed: %s", prof["id"], e)
|
||||
r = {"profile_id": prof["id"], "error": f"crash: {e}"[:200]}
|
||||
results.append(r)
|
||||
# Aggregate: cross-browser inconsistency detection
|
||||
after_reject_counts = {
|
||||
r["profile_id"]: r.get("after_reject", {}).get("count", 0)
|
||||
for r in results if "error" not in r
|
||||
}
|
||||
inconsistent = False
|
||||
if after_reject_counts:
|
||||
cmin = min(after_reject_counts.values())
|
||||
cmax = max(after_reject_counts.values())
|
||||
inconsistent = (cmax - cmin) >= 2
|
||||
return {
|
||||
"url": url,
|
||||
"profile_count": len(profiles),
|
||||
"results": results,
|
||||
"aggregate": {
|
||||
"reject_cookie_counts": after_reject_counts,
|
||||
"inconsistent_reject": inconsistent,
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user