feat(consent-tester): /scan-matrix echt — Profil je Engine + Per-Engine-Summary (Phase 1.2)

- _scanner_run reicht browser_profile an run_consent_test durch (statt Single-Chromium-Shim)
- neue scan_matrix_summary.matrix_scan_dict: ConsentTestResult -> schlanke
  Matrix-dict-Form (phases fuer _extract_dimensions + kompakter `summary`:
  cookies_before_consent/after_reject, reject_respected-Heuristik [keine
  Verstoesse UND kein neuer Tracker], surface, screenshot)
- multi_browser_scanner._run_one hebt summary + engine + is_mobile an die
  Zeile, verwirft die vollen Cookie-Listen (JSONB-Persistenz schlank)
- consent_scanner: _ctx_base mit Mobile-Device-Emulation (iPhone-Profil ->
  echtes Mobile-Viewport/Touch), alle 5 new_context auf **_ctx_base
- Tests: test_scan_matrix_summary (6) inkl. _extract_dimensions-Vertrag

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-12 22:46:42 +02:00
parent c816827720
commit 881e9c28de
5 changed files with 208 additions and 44 deletions
+23 -30
View File
@@ -172,15 +172,28 @@ async def run_consent_test(
_launch["executable_path"] = _prof["executable_path"]
browser = await p.chromium.launch(**_launch)
# Gemeinsame Context-Optionen. Bei Mobile-Profilen (Profil nennt ein
# Playwright-`device`, z.B. „iPhone 15") echte Mobile-Emulation
# (Viewport/UA/Touch) statt Desktop — sonst wäre die Mobile-Matrix-
# Zeile nur Desktop-WebKit. Nur bekannte new_context-kwargs kopieren
# (NICHT das volle Device-dict spreaden → default_browser_type bricht).
_device = p.devices.get(_prof["device"]) if _prof.get("device") else None
_ctx_base: dict = {
"user_agent": USER_AGENT,
"viewport": {"width": 1920, "height": 1080},
"locale": "de-DE",
"timezone_id": "Europe/Berlin",
}
if _device:
for _k in ("user_agent", "viewport", "device_scale_factor",
"is_mobile", "has_touch"):
if _k in _device:
_ctx_base[_k] = _device[_k]
try:
# ── Phase A: Before consent ──────────────────────────
logger.info("Phase A: First visit (no interaction)")
ctx_a = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
ctx_a = await browser.new_context(**_ctx_base)
page_a = await ctx_a.new_page()
await page_a.add_init_script(_INTERCEPTOR_INIT)
if HAS_STEALTH:
@@ -271,12 +284,7 @@ async def run_consent_test(
# ── Phase B: After rejecting ─────────────────────────
logger.info("Phase B: Reject consent (%s)", banner.provider)
ctx_b = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
ctx_b = await browser.new_context(**_ctx_base)
page_b = await ctx_b.new_page()
await page_b.add_init_script(_INTERCEPTOR_INIT)
if HAS_STEALTH:
@@ -338,12 +346,7 @@ async def run_consent_test(
# ── Phase C: After accepting ─────────────────────────
logger.info("Phase C: Accept consent (%s)", banner.provider)
ctx_c = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
ctx_c = await browser.new_context(**_ctx_base)
page_c = await ctx_c.new_page()
await page_c.add_init_script(_INTERCEPTOR_INIT)
if HAS_STEALTH:
@@ -411,12 +414,7 @@ async def run_consent_test(
try:
from services.category_tester import detect_categories, test_single_category
ctx_cat = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
ctx_cat = await browser.new_context(**_ctx_base)
page_cat = await ctx_cat.new_page()
if HAS_STEALTH:
await stealth_async(page_cat)
@@ -461,12 +459,7 @@ async def run_consent_test(
"skipping remaining %d categories",
len(unique_cats) - len(result.category_tests))
break
cat_ctx = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
cat_ctx = await browser.new_context(**_ctx_base)
try:
cat_result = await asyncio.wait_for(
test_single_category(cat_ctx, url, cat, banner, wait_ms),
@@ -128,16 +128,24 @@ async def run_matrix(
logger.warning("matrix profile %s failed: %s", prof["id"], e)
return {
"profile_id": prof["id"], "label": prof["label"],
"scan": None, "error": str(e)[:200],
"engine": prof.get("engine"),
"is_mobile": bool(prof.get("device")),
"summary": None, "error": str(e)[:200],
"dimensions": {"pre_consent": 0, "reject_respect": 0,
"banner_design": 0},
"score": 0, "verbal": "Scan fehlgeschlagen",
}
dims = _extract_dimensions(scan or {})
score = _score(dims)
# Nur den kompakten `summary` an die Zeile heben — die vollen
# phases/Cookie-Listen werden für das Scoring konsumiert und dann
# verworfen (sonst bläht 6× volle Cookie-Liste die JSONB-Persistenz).
summary = (scan or {}).get("summary") if isinstance(scan, dict) else None
return {
"profile_id": prof["id"], "label": prof["label"],
"scan": scan, "dimensions": dims, "score": score,
"engine": prof.get("engine"),
"is_mobile": bool(prof.get("device")),
"summary": summary, "dimensions": dims, "score": score,
"verbal": _verbal(score),
}
@@ -0,0 +1,85 @@
"""Kompakte Per-Engine-Projektion eines ConsentTestResult für die Browser-Matrix.
Die Matrix braucht NICHT die volle `/scan`-Antwort — nur die Felder, die je
Browser-Zeile angezeigt + persistiert werden: Cookies vor Consent / nach
Ablehnen, ob „Ablehnen" respektiert wurde, Oberflächen-Signale, Screenshot.
Bewusst schlank gehalten, damit der in `banner_result.browser_matrix` (JSONB)
persistierte Block klein bleibt — 6 Engines × voller Cookie-Liste + Screenshot
würde sonst schnell mehrere MB groß (BMW: ~780 Cookies je Phase).
"""
from __future__ import annotations
from typing import Any
# Cookie-Namen je Phase deckeln — die Matrix zeigt Zahlen + Beispiele, nicht
# die volle Liste (die steckt im textbasierten Cookie-Modul).
_NAME_CAP = 40
_TRACK_CAP = 20
def _vdict(v: Any) -> dict:
"""Violation (dataclass/obj/dict) → serialisierbares dict."""
if isinstance(v, dict):
return v
return getattr(v, "__dict__", None) or {"text": str(v)}
def matrix_scan_dict(result: Any) -> dict:
"""`ConsentTestResult` → dict in der Form, die
`multi_browser_scanner._extract_dimensions` liest (phases/banner_checks)
plus ein kompakter `summary`-Block für Frontend + Persistenz.
Defensiv via getattr — funktioniert auch, falls der Scanner mal ein
bereits serialisiertes dict liefert (dann greifen die Defaults)."""
before = list(getattr(result, "before_cookies", []) or [])
after = list(getattr(result, "reject_cookies", []) or [])
before_violations = list(getattr(result, "before_violations", []) or [])
reject_violations = list(getattr(result, "reject_violations", []) or [])
reject_new_tracking = list(getattr(result, "reject_new_tracking", []) or [])
banner_text_violations = list(
getattr(result, "banner_text_violations", []) or [])
provider = getattr(result, "banner_provider", "") or ""
summary = {
"cookies_before_consent": len(before),
"cookies_after_reject": len(after),
"cookies_before_names": before[:_NAME_CAP],
"cookies_after_reject_names": after[:_NAME_CAP],
# „Ablehnen respektiert" = nach dem Klick auf „Ablehnen" keine Verstöße
# UND kein neuer Tracker. Verbleibende essentielle Cookies (z.B. die
# gespeicherte Consent-Entscheidung selbst) sind erlaubt → NICHT über
# die reine Cookie-Zahl bewerten (sonst False Positive).
"reject_respected": (len(reject_violations) == 0
and len(reject_new_tracking) == 0),
"reject_new_tracking": reject_new_tracking[:_TRACK_CAP],
"banner_detected": bool(getattr(result, "banner_detected", False)),
"banner_provider": provider,
"banner_screenshot_b64": getattr(result, "banner_screenshot_b64", "") or "",
"surface": {
"has_impressum_link": bool(
getattr(result, "banner_has_impressum_link", False)),
"has_dse_link": bool(
getattr(result, "banner_has_dse_link", False)),
"banner_text_issues": len(banner_text_violations),
},
"violations": {
"before_consent": len(before_violations),
"after_reject": len(reject_violations),
"banner_text": len(banner_text_violations),
},
}
return {
"banner_detected": bool(getattr(result, "banner_detected", False)),
"banner_provider": provider,
# Minimal-Form für _extract_dimensions (nur cookies-Listen + violations):
"phases": {
"before_consent": {"cookies": before},
"after_reject": {"cookies": after},
},
"banner_checks": {
"violations": [_vdict(v) for v in banner_text_violations],
},
"summary": summary,
}