"""Cookie behavior per browser — fokussierter Multi-Engine Cookie-Test. Stage 1.b ohne consent_scanner-Edit: - Eigener kleiner Playwright-basierter Cookie-Scanner - Pro Browser-Profile: cookies VOR Banner / NACH "Alle ablehnen" / NACH "Alle akzeptieren" - Echte Engine-Diversität: chromium / firefox / webkit / iphone-mobile-safari nutzen jeweils `p.chromium` / `p.firefox` / `p.webkit.launch()` - Output: Cookie-Delta pro Phase pro Browser → Tabelle zeigt ob Banner-Reject in allen Browsern gleich wirkt """ from __future__ import annotations import logging from typing import Any from .browser_profiles import resolve_profiles logger = logging.getLogger(__name__) _ACCEPT_TEXTS = ( "alle akzeptieren", "alles akzeptieren", "akzeptieren", "zustimmen", "agree", "accept all", "accept", "i agree", "ok", "got it", ) _REJECT_TEXTS = ( "alle ablehnen", "ablehnen", "nur essenzielle", "nur notwendige", "reject all", "decline", "deny", "only necessary", "essential only", ) async def _try_click(page, texts: tuple[str, ...]) -> bool: """Try clicking the first visible button/link matching any of the texts.""" for txt in texts: try: loc = page.get_by_role("button", name=__import__("re").compile(txt, 2)) if await loc.count() > 0: await loc.first.click(timeout=4000) await page.wait_for_timeout(1500) return True except Exception: pass # fallback by text try: loc = page.locator(f"text=/{txt}/i").first if await loc.count() > 0: await loc.click(timeout=4000) await page.wait_for_timeout(1500) return True except Exception: continue return False def _cookie_summary(cookies: list[dict]) -> dict: """Compact summary: count + sample names + by-domain.""" names = [c.get("name", "") for c in cookies] domains: dict[str, int] = {} for c in cookies: d = c.get("domain", "") domains[d] = domains.get(d, 0) + 1 return { "count": len(cookies), "names": names, "by_domain": sorted(domains.items(), key=lambda x: -x[1])[:8], } async def _scan_one(p, url: str, profile: dict) -> dict[str, Any]: engine = profile["engine"] if engine == "blink": bt = p.chromium elif engine == "gecko": bt = p.firefox elif engine == "webkit": bt = p.webkit else: return {"profile_id": profile["id"], "error": f"unknown engine {engine}"} launch_kw: dict[str, Any] = {"headless": True} if profile.get("channel"): launch_kw["channel"] = profile["channel"] if profile.get("executable_path"): launch_kw["executable_path"] = profile["executable_path"] try: browser = await bt.launch(**launch_kw) except Exception as e: return {"profile_id": profile["id"], "error": f"launch: {e}"[:200]} try: ctx_kw: dict[str, Any] = { "locale": profile.get("locale", "de-DE"), "timezone_id": profile.get("timezone", "Europe/Berlin"), } if profile.get("device"): preset = p.devices.get(profile["device"]) or {} ctx_kw.update(preset) elif profile.get("viewport"): ctx_kw["viewport"] = profile["viewport"] context = await browser.new_context(**ctx_kw) page = await context.new_page() try: await page.goto(url, wait_until="domcontentloaded", timeout=30000) except Exception as e: await browser.close() return {"profile_id": profile["id"], "error": f"goto: {e}"[:200]} await page.wait_for_timeout(2500) before = await context.cookies() # Reject branch (fresh context) reject_clicked = await _try_click(page, _REJECT_TEXTS) await page.wait_for_timeout(1500) after_reject = await context.cookies() # Accept branch (fresh context to isolate) accept_clicked = False after_accept: list[dict] = [] try: context2 = await browser.new_context(**ctx_kw) page2 = await context2.new_page() try: await page2.goto(url, wait_until="domcontentloaded", timeout=30000) except Exception: pass try: await page2.wait_for_timeout(2500) except Exception: pass try: accept_clicked = await _try_click(page2, _ACCEPT_TEXTS) except Exception: pass try: await page2.wait_for_timeout(1500) except Exception: pass try: after_accept = await context2.cookies() except Exception: pass except Exception as e: logger.info("accept branch failed for %s: %s", profile["id"], e) return { "profile_id": profile["id"], "label": profile["label"], "engine": engine, "reject_clicked": reject_clicked, "accept_clicked": accept_clicked, "before": _cookie_summary(before), "after_reject": _cookie_summary(after_reject), "after_accept": _cookie_summary(after_accept), "reject_minus_before_count": ( len(after_reject) - len(before) ), "accept_minus_before_count": ( len(after_accept) - len(before) ), } finally: try: await browser.close() except Exception: pass async def run_cookie_matrix( url: str, requested_profiles: list[str] | None = None, ) -> dict: """Run focused cookie behavior scan across all default profiles.""" from playwright.async_api import async_playwright profiles = resolve_profiles(requested_profiles) results: list[dict] = [] async with async_playwright() as p: # Sequential to avoid resource contention on the Mac Mini # (4 browsers in parallel sometimes hits target-closed races). for prof in profiles: try: r = await _scan_one(p, url, prof) except Exception as e: logger.warning("scan_one %s crashed: %s", prof["id"], e) r = {"profile_id": prof["id"], "error": f"crash: {e}"[:200]} results.append(r) # Aggregate: cross-browser inconsistency detection after_reject_counts = { r["profile_id"]: r.get("after_reject", {}).get("count", 0) for r in results if "error" not in r } inconsistent = False if after_reject_counts: cmin = min(after_reject_counts.values()) cmax = max(after_reject_counts.values()) inconsistent = (cmax - cmin) >= 2 return { "url": url, "profile_count": len(profiles), "results": results, "aggregate": { "reject_cookie_counts": after_reject_counts, "inconsistent_reject": inconsistent, }, }