ff796fb480
#19 Chatbot-Cookie-Klassifikation: - chat_providers.json KB mit 11 Providern (iAdvize, Intercom, Tidio, Drift, Userlike, Zendesk, LivePerson, HubSpot, Vertex AI, OpenAI, Anthropic Claude). Pro Provider: Cookie-Pattern-Regex, typical_retention_days, tn_functions vs cp_functions, ai_capable. - chatbot_cookie_classification_check.py mit 4 KORRIGIERTEN Checks: CHAT-COOKIE-CLASS-001 (MED) — TN deklariert + Vendor-Purpose erwähnt Targeting/Analytics/A-B-Tests CHAT-COOKIE-CLASS-002 (MED) — Provider hat tn+cp Funktionen, Tabelle nennt nur eine Seite → keine Einwilligungs-Differenzierung CHAT-COOKIE-PURPOSE-001 (LOW) — Zweck zu generisch (Art. 13 DSGVO konkret) CHAT-COOKIE-RETENTION-001 (HIGH) — deklariert <90d, KB-typisch >365d → vermutlich unterdeklariert NEU vs vorigem Plan: kein "eigene Banner-Kategorie Chat/AI"-Check — gesetzlich nicht vorgeschrieben (Vermischung Zweck-Transparenz vs Kategorie-Name). Anwender-Frage berechtigt, Konzept geschärft. - _b12_wiring.py + Orchestrator-Wire + V2-Compose-Slot - Cookie-Inventar mit [Chat]/[Chat+AI]-Tag pro Cookie-Name (KB-Lookup) - Smoke (3 Vendors / 5 Cookies): 9 findings korrekt (3 HIGH RETENTION, 3 MEDIUM CLASS-001, 4 LOW PURPOSE) Cookie-Matrix Scan (Browser-Vergleich gegen safetykon.de): - consent-tester/services/cookie_behavior_per_browser.py: eigener fokussierter Scanner. Pro Browser-Profile: cookies before / after reject / after accept in separaten Kontexten. Sequenzielle Runs statt parallel (Race-Conditions). - routes_cookie_matrix.py POST /scan-cookie-matrix - Live-Test safetykon.de: chromium=1, firefox=0, webkit=1, mobile- safari=1 nach reject — Firefox setzt KEIN Cookie nach Reject! (consent-tester Rebuild brachte playwright install-deps für system-libs) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
210 lines
7.1 KiB
Python
210 lines
7.1 KiB
Python
"""Cookie behavior per browser — fokussierter Multi-Engine Cookie-Test.
|
|
|
|
Stage 1.b ohne consent_scanner-Edit:
|
|
- Eigener kleiner Playwright-basierter Cookie-Scanner
|
|
- Pro Browser-Profile: cookies VOR Banner / NACH "Alle ablehnen" /
|
|
NACH "Alle akzeptieren"
|
|
- Echte Engine-Diversität: chromium / firefox / webkit /
|
|
iphone-mobile-safari nutzen jeweils `p.chromium` / `p.firefox` /
|
|
`p.webkit.launch()`
|
|
- Output: Cookie-Delta pro Phase pro Browser → Tabelle zeigt ob
|
|
Banner-Reject in allen Browsern gleich wirkt
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
from .browser_profiles import resolve_profiles
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
_ACCEPT_TEXTS = (
|
|
"alle akzeptieren", "alles akzeptieren", "akzeptieren",
|
|
"zustimmen", "agree", "accept all", "accept",
|
|
"i agree", "ok", "got it",
|
|
)
|
|
_REJECT_TEXTS = (
|
|
"alle ablehnen", "ablehnen", "nur essenzielle",
|
|
"nur notwendige", "reject all", "decline", "deny",
|
|
"only necessary", "essential only",
|
|
)
|
|
|
|
|
|
async def _try_click(page, texts: tuple[str, ...]) -> bool:
|
|
"""Try clicking the first visible button/link matching any of the texts."""
|
|
for txt in texts:
|
|
try:
|
|
loc = page.get_by_role("button",
|
|
name=__import__("re").compile(txt, 2))
|
|
if await loc.count() > 0:
|
|
await loc.first.click(timeout=4000)
|
|
await page.wait_for_timeout(1500)
|
|
return True
|
|
except Exception:
|
|
pass
|
|
# fallback by text
|
|
try:
|
|
loc = page.locator(f"text=/{txt}/i").first
|
|
if await loc.count() > 0:
|
|
await loc.click(timeout=4000)
|
|
await page.wait_for_timeout(1500)
|
|
return True
|
|
except Exception:
|
|
continue
|
|
return False
|
|
|
|
|
|
def _cookie_summary(cookies: list[dict]) -> dict:
|
|
"""Compact summary: count + sample names + by-domain."""
|
|
names = [c.get("name", "") for c in cookies]
|
|
domains: dict[str, int] = {}
|
|
for c in cookies:
|
|
d = c.get("domain", "")
|
|
domains[d] = domains.get(d, 0) + 1
|
|
return {
|
|
"count": len(cookies),
|
|
"names": names,
|
|
"by_domain": sorted(domains.items(), key=lambda x: -x[1])[:8],
|
|
}
|
|
|
|
|
|
async def _scan_one(p, url: str, profile: dict) -> dict[str, Any]:
|
|
engine = profile["engine"]
|
|
if engine == "blink":
|
|
bt = p.chromium
|
|
elif engine == "gecko":
|
|
bt = p.firefox
|
|
elif engine == "webkit":
|
|
bt = p.webkit
|
|
else:
|
|
return {"profile_id": profile["id"], "error": f"unknown engine {engine}"}
|
|
launch_kw: dict[str, Any] = {"headless": True}
|
|
if profile.get("channel"):
|
|
launch_kw["channel"] = profile["channel"]
|
|
if profile.get("executable_path"):
|
|
launch_kw["executable_path"] = profile["executable_path"]
|
|
try:
|
|
browser = await bt.launch(**launch_kw)
|
|
except Exception as e:
|
|
return {"profile_id": profile["id"], "error": f"launch: {e}"[:200]}
|
|
try:
|
|
ctx_kw: dict[str, Any] = {
|
|
"locale": profile.get("locale", "de-DE"),
|
|
"timezone_id": profile.get("timezone", "Europe/Berlin"),
|
|
}
|
|
if profile.get("device"):
|
|
preset = p.devices.get(profile["device"]) or {}
|
|
ctx_kw.update(preset)
|
|
elif profile.get("viewport"):
|
|
ctx_kw["viewport"] = profile["viewport"]
|
|
context = await browser.new_context(**ctx_kw)
|
|
page = await context.new_page()
|
|
try:
|
|
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
|
except Exception as e:
|
|
await browser.close()
|
|
return {"profile_id": profile["id"],
|
|
"error": f"goto: {e}"[:200]}
|
|
await page.wait_for_timeout(2500)
|
|
|
|
before = await context.cookies()
|
|
|
|
# Reject branch (fresh context)
|
|
reject_clicked = await _try_click(page, _REJECT_TEXTS)
|
|
await page.wait_for_timeout(1500)
|
|
after_reject = await context.cookies()
|
|
|
|
# Accept branch (fresh context to isolate)
|
|
accept_clicked = False
|
|
after_accept: list[dict] = []
|
|
try:
|
|
context2 = await browser.new_context(**ctx_kw)
|
|
page2 = await context2.new_page()
|
|
try:
|
|
await page2.goto(url, wait_until="domcontentloaded",
|
|
timeout=30000)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
await page2.wait_for_timeout(2500)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
accept_clicked = await _try_click(page2, _ACCEPT_TEXTS)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
await page2.wait_for_timeout(1500)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
after_accept = await context2.cookies()
|
|
except Exception:
|
|
pass
|
|
except Exception as e:
|
|
logger.info("accept branch failed for %s: %s",
|
|
profile["id"], e)
|
|
|
|
return {
|
|
"profile_id": profile["id"],
|
|
"label": profile["label"],
|
|
"engine": engine,
|
|
"reject_clicked": reject_clicked,
|
|
"accept_clicked": accept_clicked,
|
|
"before": _cookie_summary(before),
|
|
"after_reject": _cookie_summary(after_reject),
|
|
"after_accept": _cookie_summary(after_accept),
|
|
"reject_minus_before_count": (
|
|
len(after_reject) - len(before)
|
|
),
|
|
"accept_minus_before_count": (
|
|
len(after_accept) - len(before)
|
|
),
|
|
}
|
|
finally:
|
|
try:
|
|
await browser.close()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
async def run_cookie_matrix(
|
|
url: str, requested_profiles: list[str] | None = None,
|
|
) -> dict:
|
|
"""Run focused cookie behavior scan across all default profiles."""
|
|
from playwright.async_api import async_playwright
|
|
profiles = resolve_profiles(requested_profiles)
|
|
results: list[dict] = []
|
|
async with async_playwright() as p:
|
|
# Sequential to avoid resource contention on the Mac Mini
|
|
# (4 browsers in parallel sometimes hits target-closed races).
|
|
for prof in profiles:
|
|
try:
|
|
r = await _scan_one(p, url, prof)
|
|
except Exception as e:
|
|
logger.warning("scan_one %s crashed: %s", prof["id"], e)
|
|
r = {"profile_id": prof["id"], "error": f"crash: {e}"[:200]}
|
|
results.append(r)
|
|
# Aggregate: cross-browser inconsistency detection
|
|
after_reject_counts = {
|
|
r["profile_id"]: r.get("after_reject", {}).get("count", 0)
|
|
for r in results if "error" not in r
|
|
}
|
|
inconsistent = False
|
|
if after_reject_counts:
|
|
cmin = min(after_reject_counts.values())
|
|
cmax = max(after_reject_counts.values())
|
|
inconsistent = (cmax - cmin) >= 2
|
|
return {
|
|
"url": url,
|
|
"profile_count": len(profiles),
|
|
"results": results,
|
|
"aggregate": {
|
|
"reject_cookie_counts": after_reject_counts,
|
|
"inconsistent_reject": inconsistent,
|
|
},
|
|
}
|