feat: B12 Chatbot-Cookie-Klassifikation (#19) + Cookie-Matrix scan + safetykon test
#19 Chatbot-Cookie-Klassifikation: - chat_providers.json KB mit 11 Providern (iAdvize, Intercom, Tidio, Drift, Userlike, Zendesk, LivePerson, HubSpot, Vertex AI, OpenAI, Anthropic Claude). Pro Provider: Cookie-Pattern-Regex, typical_retention_days, tn_functions vs cp_functions, ai_capable. - chatbot_cookie_classification_check.py mit 4 KORRIGIERTEN Checks: CHAT-COOKIE-CLASS-001 (MED) — TN deklariert + Vendor-Purpose erwähnt Targeting/Analytics/A-B-Tests CHAT-COOKIE-CLASS-002 (MED) — Provider hat tn+cp Funktionen, Tabelle nennt nur eine Seite → keine Einwilligungs-Differenzierung CHAT-COOKIE-PURPOSE-001 (LOW) — Zweck zu generisch (Art. 13 DSGVO konkret) CHAT-COOKIE-RETENTION-001 (HIGH) — deklariert <90d, KB-typisch >365d → vermutlich unterdeklariert NEU vs vorigem Plan: kein "eigene Banner-Kategorie Chat/AI"-Check — gesetzlich nicht vorgeschrieben (Vermischung Zweck-Transparenz vs Kategorie-Name). Anwender-Frage berechtigt, Konzept geschärft. - _b12_wiring.py + Orchestrator-Wire + V2-Compose-Slot - Cookie-Inventar mit [Chat]/[Chat+AI]-Tag pro Cookie-Name (KB-Lookup) - Smoke (3 Vendors / 5 Cookies): 9 findings korrekt (3 HIGH RETENTION, 3 MEDIUM CLASS-001, 4 LOW PURPOSE) Cookie-Matrix Scan (Browser-Vergleich gegen safetykon.de): - consent-tester/services/cookie_behavior_per_browser.py: eigener fokussierter Scanner. Pro Browser-Profile: cookies before / after reject / after accept in separaten Kontexten. Sequenzielle Runs statt parallel (Race-Conditions). - routes_cookie_matrix.py POST /scan-cookie-matrix - Live-Test safetykon.de: chromium=1, firefox=0, webkit=1, mobile- safari=1 nach reject — Firefox setzt KEIN Cookie nach Reject! (consent-tester Rebuild brachte playwright install-deps für system-libs) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,209 @@
|
||||
"""Cookie behavior per browser — fokussierter Multi-Engine Cookie-Test.
|
||||
|
||||
Stage 1.b ohne consent_scanner-Edit:
|
||||
- Eigener kleiner Playwright-basierter Cookie-Scanner
|
||||
- Pro Browser-Profile: cookies VOR Banner / NACH "Alle ablehnen" /
|
||||
NACH "Alle akzeptieren"
|
||||
- Echte Engine-Diversität: chromium / firefox / webkit /
|
||||
iphone-mobile-safari nutzen jeweils `p.chromium` / `p.firefox` /
|
||||
`p.webkit.launch()`
|
||||
- Output: Cookie-Delta pro Phase pro Browser → Tabelle zeigt ob
|
||||
Banner-Reject in allen Browsern gleich wirkt
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from .browser_profiles import resolve_profiles
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_ACCEPT_TEXTS = (
|
||||
"alle akzeptieren", "alles akzeptieren", "akzeptieren",
|
||||
"zustimmen", "agree", "accept all", "accept",
|
||||
"i agree", "ok", "got it",
|
||||
)
|
||||
_REJECT_TEXTS = (
|
||||
"alle ablehnen", "ablehnen", "nur essenzielle",
|
||||
"nur notwendige", "reject all", "decline", "deny",
|
||||
"only necessary", "essential only",
|
||||
)
|
||||
|
||||
|
||||
async def _try_click(page, texts: tuple[str, ...]) -> bool:
|
||||
"""Try clicking the first visible button/link matching any of the texts."""
|
||||
for txt in texts:
|
||||
try:
|
||||
loc = page.get_by_role("button",
|
||||
name=__import__("re").compile(txt, 2))
|
||||
if await loc.count() > 0:
|
||||
await loc.first.click(timeout=4000)
|
||||
await page.wait_for_timeout(1500)
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
# fallback by text
|
||||
try:
|
||||
loc = page.locator(f"text=/{txt}/i").first
|
||||
if await loc.count() > 0:
|
||||
await loc.click(timeout=4000)
|
||||
await page.wait_for_timeout(1500)
|
||||
return True
|
||||
except Exception:
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def _cookie_summary(cookies: list[dict]) -> dict:
|
||||
"""Compact summary: count + sample names + by-domain."""
|
||||
names = [c.get("name", "") for c in cookies]
|
||||
domains: dict[str, int] = {}
|
||||
for c in cookies:
|
||||
d = c.get("domain", "")
|
||||
domains[d] = domains.get(d, 0) + 1
|
||||
return {
|
||||
"count": len(cookies),
|
||||
"names": names,
|
||||
"by_domain": sorted(domains.items(), key=lambda x: -x[1])[:8],
|
||||
}
|
||||
|
||||
|
||||
async def _scan_one(p, url: str, profile: dict) -> dict[str, Any]:
|
||||
engine = profile["engine"]
|
||||
if engine == "blink":
|
||||
bt = p.chromium
|
||||
elif engine == "gecko":
|
||||
bt = p.firefox
|
||||
elif engine == "webkit":
|
||||
bt = p.webkit
|
||||
else:
|
||||
return {"profile_id": profile["id"], "error": f"unknown engine {engine}"}
|
||||
launch_kw: dict[str, Any] = {"headless": True}
|
||||
if profile.get("channel"):
|
||||
launch_kw["channel"] = profile["channel"]
|
||||
if profile.get("executable_path"):
|
||||
launch_kw["executable_path"] = profile["executable_path"]
|
||||
try:
|
||||
browser = await bt.launch(**launch_kw)
|
||||
except Exception as e:
|
||||
return {"profile_id": profile["id"], "error": f"launch: {e}"[:200]}
|
||||
try:
|
||||
ctx_kw: dict[str, Any] = {
|
||||
"locale": profile.get("locale", "de-DE"),
|
||||
"timezone_id": profile.get("timezone", "Europe/Berlin"),
|
||||
}
|
||||
if profile.get("device"):
|
||||
preset = p.devices.get(profile["device"]) or {}
|
||||
ctx_kw.update(preset)
|
||||
elif profile.get("viewport"):
|
||||
ctx_kw["viewport"] = profile["viewport"]
|
||||
context = await browser.new_context(**ctx_kw)
|
||||
page = await context.new_page()
|
||||
try:
|
||||
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||
except Exception as e:
|
||||
await browser.close()
|
||||
return {"profile_id": profile["id"],
|
||||
"error": f"goto: {e}"[:200]}
|
||||
await page.wait_for_timeout(2500)
|
||||
|
||||
before = await context.cookies()
|
||||
|
||||
# Reject branch (fresh context)
|
||||
reject_clicked = await _try_click(page, _REJECT_TEXTS)
|
||||
await page.wait_for_timeout(1500)
|
||||
after_reject = await context.cookies()
|
||||
|
||||
# Accept branch (fresh context to isolate)
|
||||
accept_clicked = False
|
||||
after_accept: list[dict] = []
|
||||
try:
|
||||
context2 = await browser.new_context(**ctx_kw)
|
||||
page2 = await context2.new_page()
|
||||
try:
|
||||
await page2.goto(url, wait_until="domcontentloaded",
|
||||
timeout=30000)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await page2.wait_for_timeout(2500)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
accept_clicked = await _try_click(page2, _ACCEPT_TEXTS)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await page2.wait_for_timeout(1500)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
after_accept = await context2.cookies()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.info("accept branch failed for %s: %s",
|
||||
profile["id"], e)
|
||||
|
||||
return {
|
||||
"profile_id": profile["id"],
|
||||
"label": profile["label"],
|
||||
"engine": engine,
|
||||
"reject_clicked": reject_clicked,
|
||||
"accept_clicked": accept_clicked,
|
||||
"before": _cookie_summary(before),
|
||||
"after_reject": _cookie_summary(after_reject),
|
||||
"after_accept": _cookie_summary(after_accept),
|
||||
"reject_minus_before_count": (
|
||||
len(after_reject) - len(before)
|
||||
),
|
||||
"accept_minus_before_count": (
|
||||
len(after_accept) - len(before)
|
||||
),
|
||||
}
|
||||
finally:
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def run_cookie_matrix(
|
||||
url: str, requested_profiles: list[str] | None = None,
|
||||
) -> dict:
|
||||
"""Run focused cookie behavior scan across all default profiles."""
|
||||
from playwright.async_api import async_playwright
|
||||
profiles = resolve_profiles(requested_profiles)
|
||||
results: list[dict] = []
|
||||
async with async_playwright() as p:
|
||||
# Sequential to avoid resource contention on the Mac Mini
|
||||
# (4 browsers in parallel sometimes hits target-closed races).
|
||||
for prof in profiles:
|
||||
try:
|
||||
r = await _scan_one(p, url, prof)
|
||||
except Exception as e:
|
||||
logger.warning("scan_one %s crashed: %s", prof["id"], e)
|
||||
r = {"profile_id": prof["id"], "error": f"crash: {e}"[:200]}
|
||||
results.append(r)
|
||||
# Aggregate: cross-browser inconsistency detection
|
||||
after_reject_counts = {
|
||||
r["profile_id"]: r.get("after_reject", {}).get("count", 0)
|
||||
for r in results if "error" not in r
|
||||
}
|
||||
inconsistent = False
|
||||
if after_reject_counts:
|
||||
cmin = min(after_reject_counts.values())
|
||||
cmax = max(after_reject_counts.values())
|
||||
inconsistent = (cmax - cmin) >= 2
|
||||
return {
|
||||
"url": url,
|
||||
"profile_count": len(profiles),
|
||||
"results": results,
|
||||
"aggregate": {
|
||||
"reject_cookie_counts": after_reject_counts,
|
||||
"inconsistent_reject": inconsistent,
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user