Files
breakpilot-compliance/consent-tester/services/cookie_behavior_per_browser.py
T
Benjamin Admin ff796fb480 feat: B12 Chatbot-Cookie-Klassifikation (#19) + Cookie-Matrix scan + safetykon test
#19 Chatbot-Cookie-Klassifikation:
  - chat_providers.json KB mit 11 Providern (iAdvize, Intercom, Tidio,
    Drift, Userlike, Zendesk, LivePerson, HubSpot, Vertex AI, OpenAI,
    Anthropic Claude). Pro Provider: Cookie-Pattern-Regex,
    typical_retention_days, tn_functions vs cp_functions, ai_capable.
  - chatbot_cookie_classification_check.py mit 4 KORRIGIERTEN Checks:
      CHAT-COOKIE-CLASS-001 (MED) — TN deklariert + Vendor-Purpose
        erwähnt Targeting/Analytics/A-B-Tests
      CHAT-COOKIE-CLASS-002 (MED) — Provider hat tn+cp Funktionen,
        Tabelle nennt nur eine Seite → keine Einwilligungs-Differenzierung
      CHAT-COOKIE-PURPOSE-001 (LOW) — Zweck zu generisch (Art. 13
        DSGVO konkret)
      CHAT-COOKIE-RETENTION-001 (HIGH) — deklariert <90d, KB-typisch
        >365d → vermutlich unterdeklariert
    NEU vs vorigem Plan: kein "eigene Banner-Kategorie Chat/AI"-Check —
    gesetzlich nicht vorgeschrieben (Vermischung Zweck-Transparenz vs
    Kategorie-Name). Anwender-Frage berechtigt, Konzept geschärft.
  - _b12_wiring.py + Orchestrator-Wire + V2-Compose-Slot
  - Cookie-Inventar mit [Chat]/[Chat+AI]-Tag pro Cookie-Name (KB-Lookup)
  - Smoke (3 Vendors / 5 Cookies): 9 findings korrekt (3 HIGH RETENTION,
    3 MEDIUM CLASS-001, 4 LOW PURPOSE)

Cookie-Matrix Scan (Browser-Vergleich gegen safetykon.de):
  - consent-tester/services/cookie_behavior_per_browser.py: eigener
    fokussierter Scanner. Pro Browser-Profile: cookies before / after
    reject / after accept in separaten Kontexten. Sequenzielle Runs
    statt parallel (Race-Conditions).
  - routes_cookie_matrix.py POST /scan-cookie-matrix
  - Live-Test safetykon.de: chromium=1, firefox=0, webkit=1, mobile-
    safari=1 nach reject — Firefox setzt KEIN Cookie nach Reject!
    (consent-tester Rebuild brachte playwright install-deps für system-libs)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-06 23:25:20 +02:00

210 lines
7.1 KiB
Python

"""Cookie behavior per browser — fokussierter Multi-Engine Cookie-Test.
Stage 1.b ohne consent_scanner-Edit:
- Eigener kleiner Playwright-basierter Cookie-Scanner
- Pro Browser-Profile: cookies VOR Banner / NACH "Alle ablehnen" /
NACH "Alle akzeptieren"
- Echte Engine-Diversität: chromium / firefox / webkit /
iphone-mobile-safari nutzen jeweils `p.chromium` / `p.firefox` /
`p.webkit.launch()`
- Output: Cookie-Delta pro Phase pro Browser → Tabelle zeigt ob
Banner-Reject in allen Browsern gleich wirkt
"""
from __future__ import annotations
import logging
from typing import Any
from .browser_profiles import resolve_profiles
logger = logging.getLogger(__name__)
_ACCEPT_TEXTS = (
"alle akzeptieren", "alles akzeptieren", "akzeptieren",
"zustimmen", "agree", "accept all", "accept",
"i agree", "ok", "got it",
)
_REJECT_TEXTS = (
"alle ablehnen", "ablehnen", "nur essenzielle",
"nur notwendige", "reject all", "decline", "deny",
"only necessary", "essential only",
)
async def _try_click(page, texts: tuple[str, ...]) -> bool:
"""Try clicking the first visible button/link matching any of the texts."""
for txt in texts:
try:
loc = page.get_by_role("button",
name=__import__("re").compile(txt, 2))
if await loc.count() > 0:
await loc.first.click(timeout=4000)
await page.wait_for_timeout(1500)
return True
except Exception:
pass
# fallback by text
try:
loc = page.locator(f"text=/{txt}/i").first
if await loc.count() > 0:
await loc.click(timeout=4000)
await page.wait_for_timeout(1500)
return True
except Exception:
continue
return False
def _cookie_summary(cookies: list[dict]) -> dict:
"""Compact summary: count + sample names + by-domain."""
names = [c.get("name", "") for c in cookies]
domains: dict[str, int] = {}
for c in cookies:
d = c.get("domain", "")
domains[d] = domains.get(d, 0) + 1
return {
"count": len(cookies),
"names": names,
"by_domain": sorted(domains.items(), key=lambda x: -x[1])[:8],
}
async def _scan_one(p, url: str, profile: dict) -> dict[str, Any]:
engine = profile["engine"]
if engine == "blink":
bt = p.chromium
elif engine == "gecko":
bt = p.firefox
elif engine == "webkit":
bt = p.webkit
else:
return {"profile_id": profile["id"], "error": f"unknown engine {engine}"}
launch_kw: dict[str, Any] = {"headless": True}
if profile.get("channel"):
launch_kw["channel"] = profile["channel"]
if profile.get("executable_path"):
launch_kw["executable_path"] = profile["executable_path"]
try:
browser = await bt.launch(**launch_kw)
except Exception as e:
return {"profile_id": profile["id"], "error": f"launch: {e}"[:200]}
try:
ctx_kw: dict[str, Any] = {
"locale": profile.get("locale", "de-DE"),
"timezone_id": profile.get("timezone", "Europe/Berlin"),
}
if profile.get("device"):
preset = p.devices.get(profile["device"]) or {}
ctx_kw.update(preset)
elif profile.get("viewport"):
ctx_kw["viewport"] = profile["viewport"]
context = await browser.new_context(**ctx_kw)
page = await context.new_page()
try:
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
except Exception as e:
await browser.close()
return {"profile_id": profile["id"],
"error": f"goto: {e}"[:200]}
await page.wait_for_timeout(2500)
before = await context.cookies()
# Reject branch (fresh context)
reject_clicked = await _try_click(page, _REJECT_TEXTS)
await page.wait_for_timeout(1500)
after_reject = await context.cookies()
# Accept branch (fresh context to isolate)
accept_clicked = False
after_accept: list[dict] = []
try:
context2 = await browser.new_context(**ctx_kw)
page2 = await context2.new_page()
try:
await page2.goto(url, wait_until="domcontentloaded",
timeout=30000)
except Exception:
pass
try:
await page2.wait_for_timeout(2500)
except Exception:
pass
try:
accept_clicked = await _try_click(page2, _ACCEPT_TEXTS)
except Exception:
pass
try:
await page2.wait_for_timeout(1500)
except Exception:
pass
try:
after_accept = await context2.cookies()
except Exception:
pass
except Exception as e:
logger.info("accept branch failed for %s: %s",
profile["id"], e)
return {
"profile_id": profile["id"],
"label": profile["label"],
"engine": engine,
"reject_clicked": reject_clicked,
"accept_clicked": accept_clicked,
"before": _cookie_summary(before),
"after_reject": _cookie_summary(after_reject),
"after_accept": _cookie_summary(after_accept),
"reject_minus_before_count": (
len(after_reject) - len(before)
),
"accept_minus_before_count": (
len(after_accept) - len(before)
),
}
finally:
try:
await browser.close()
except Exception:
pass
async def run_cookie_matrix(
url: str, requested_profiles: list[str] | None = None,
) -> dict:
"""Run focused cookie behavior scan across all default profiles."""
from playwright.async_api import async_playwright
profiles = resolve_profiles(requested_profiles)
results: list[dict] = []
async with async_playwright() as p:
# Sequential to avoid resource contention on the Mac Mini
# (4 browsers in parallel sometimes hits target-closed races).
for prof in profiles:
try:
r = await _scan_one(p, url, prof)
except Exception as e:
logger.warning("scan_one %s crashed: %s", prof["id"], e)
r = {"profile_id": prof["id"], "error": f"crash: {e}"[:200]}
results.append(r)
# Aggregate: cross-browser inconsistency detection
after_reject_counts = {
r["profile_id"]: r.get("after_reject", {}).get("count", 0)
for r in results if "error" not in r
}
inconsistent = False
if after_reject_counts:
cmin = min(after_reject_counts.values())
cmax = max(after_reject_counts.values())
inconsistent = (cmax - cmin) >= 2
return {
"url": url,
"profile_count": len(profiles),
"results": results,
"aggregate": {
"reject_cookie_counts": after_reject_counts,
"inconsistent_reject": inconsistent,
},
}