feat(consent-tester): /scan-matrix echt — Profil je Engine + Per-Engine-Summary (Phase 1.2)
- _scanner_run reicht browser_profile an run_consent_test durch (statt Single-Chromium-Shim) - neue scan_matrix_summary.matrix_scan_dict: ConsentTestResult -> schlanke Matrix-dict-Form (phases fuer _extract_dimensions + kompakter `summary`: cookies_before_consent/after_reject, reject_respected-Heuristik [keine Verstoesse UND kein neuer Tracker], surface, screenshot) - multi_browser_scanner._run_one hebt summary + engine + is_mobile an die Zeile, verwirft die vollen Cookie-Listen (JSONB-Persistenz schlank) - consent_scanner: _ctx_base mit Mobile-Device-Emulation (iPhone-Profil -> echtes Mobile-Viewport/Touch), alle 5 new_context auf **_ctx_base - Tests: test_scan_matrix_summary (6) inkl. _extract_dimensions-Vertrag Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -172,15 +172,28 @@ async def run_consent_test(
|
||||
_launch["executable_path"] = _prof["executable_path"]
|
||||
browser = await p.chromium.launch(**_launch)
|
||||
|
||||
# Gemeinsame Context-Optionen. Bei Mobile-Profilen (Profil nennt ein
|
||||
# Playwright-`device`, z.B. „iPhone 15") echte Mobile-Emulation
|
||||
# (Viewport/UA/Touch) statt Desktop — sonst wäre die Mobile-Matrix-
|
||||
# Zeile nur Desktop-WebKit. Nur bekannte new_context-kwargs kopieren
|
||||
# (NICHT das volle Device-dict spreaden → default_browser_type bricht).
|
||||
_device = p.devices.get(_prof["device"]) if _prof.get("device") else None
|
||||
_ctx_base: dict = {
|
||||
"user_agent": USER_AGENT,
|
||||
"viewport": {"width": 1920, "height": 1080},
|
||||
"locale": "de-DE",
|
||||
"timezone_id": "Europe/Berlin",
|
||||
}
|
||||
if _device:
|
||||
for _k in ("user_agent", "viewport", "device_scale_factor",
|
||||
"is_mobile", "has_touch"):
|
||||
if _k in _device:
|
||||
_ctx_base[_k] = _device[_k]
|
||||
|
||||
try:
|
||||
# ── Phase A: Before consent ──────────────────────────
|
||||
logger.info("Phase A: First visit (no interaction)")
|
||||
ctx_a = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
ctx_a = await browser.new_context(**_ctx_base)
|
||||
page_a = await ctx_a.new_page()
|
||||
await page_a.add_init_script(_INTERCEPTOR_INIT)
|
||||
if HAS_STEALTH:
|
||||
@@ -271,12 +284,7 @@ async def run_consent_test(
|
||||
|
||||
# ── Phase B: After rejecting ─────────────────────────
|
||||
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
||||
ctx_b = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
ctx_b = await browser.new_context(**_ctx_base)
|
||||
page_b = await ctx_b.new_page()
|
||||
await page_b.add_init_script(_INTERCEPTOR_INIT)
|
||||
if HAS_STEALTH:
|
||||
@@ -338,12 +346,7 @@ async def run_consent_test(
|
||||
|
||||
# ── Phase C: After accepting ─────────────────────────
|
||||
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
||||
ctx_c = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
ctx_c = await browser.new_context(**_ctx_base)
|
||||
page_c = await ctx_c.new_page()
|
||||
await page_c.add_init_script(_INTERCEPTOR_INIT)
|
||||
if HAS_STEALTH:
|
||||
@@ -411,12 +414,7 @@ async def run_consent_test(
|
||||
try:
|
||||
from services.category_tester import detect_categories, test_single_category
|
||||
|
||||
ctx_cat = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
ctx_cat = await browser.new_context(**_ctx_base)
|
||||
page_cat = await ctx_cat.new_page()
|
||||
if HAS_STEALTH:
|
||||
await stealth_async(page_cat)
|
||||
@@ -461,12 +459,7 @@ async def run_consent_test(
|
||||
"skipping remaining %d categories",
|
||||
len(unique_cats) - len(result.category_tests))
|
||||
break
|
||||
cat_ctx = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
cat_ctx = await browser.new_context(**_ctx_base)
|
||||
try:
|
||||
cat_result = await asyncio.wait_for(
|
||||
test_single_category(cat_ctx, url, cat, banner, wait_ms),
|
||||
|
||||
@@ -128,16 +128,24 @@ async def run_matrix(
|
||||
logger.warning("matrix profile %s failed: %s", prof["id"], e)
|
||||
return {
|
||||
"profile_id": prof["id"], "label": prof["label"],
|
||||
"scan": None, "error": str(e)[:200],
|
||||
"engine": prof.get("engine"),
|
||||
"is_mobile": bool(prof.get("device")),
|
||||
"summary": None, "error": str(e)[:200],
|
||||
"dimensions": {"pre_consent": 0, "reject_respect": 0,
|
||||
"banner_design": 0},
|
||||
"score": 0, "verbal": "Scan fehlgeschlagen",
|
||||
}
|
||||
dims = _extract_dimensions(scan or {})
|
||||
score = _score(dims)
|
||||
# Nur den kompakten `summary` an die Zeile heben — die vollen
|
||||
# phases/Cookie-Listen werden für das Scoring konsumiert und dann
|
||||
# verworfen (sonst bläht 6× volle Cookie-Liste die JSONB-Persistenz).
|
||||
summary = (scan or {}).get("summary") if isinstance(scan, dict) else None
|
||||
return {
|
||||
"profile_id": prof["id"], "label": prof["label"],
|
||||
"scan": scan, "dimensions": dims, "score": score,
|
||||
"engine": prof.get("engine"),
|
||||
"is_mobile": bool(prof.get("device")),
|
||||
"summary": summary, "dimensions": dims, "score": score,
|
||||
"verbal": _verbal(score),
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Kompakte Per-Engine-Projektion eines ConsentTestResult für die Browser-Matrix.
|
||||
|
||||
Die Matrix braucht NICHT die volle `/scan`-Antwort — nur die Felder, die je
|
||||
Browser-Zeile angezeigt + persistiert werden: Cookies vor Consent / nach
|
||||
Ablehnen, ob „Ablehnen" respektiert wurde, Oberflächen-Signale, Screenshot.
|
||||
Bewusst schlank gehalten, damit der in `banner_result.browser_matrix` (JSONB)
|
||||
persistierte Block klein bleibt — 6 Engines × voller Cookie-Liste + Screenshot
|
||||
würde sonst schnell mehrere MB groß (BMW: ~780 Cookies je Phase).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
# Cookie-Namen je Phase deckeln — die Matrix zeigt Zahlen + Beispiele, nicht
|
||||
# die volle Liste (die steckt im textbasierten Cookie-Modul).
|
||||
_NAME_CAP = 40
|
||||
_TRACK_CAP = 20
|
||||
|
||||
|
||||
def _vdict(v: Any) -> dict:
|
||||
"""Violation (dataclass/obj/dict) → serialisierbares dict."""
|
||||
if isinstance(v, dict):
|
||||
return v
|
||||
return getattr(v, "__dict__", None) or {"text": str(v)}
|
||||
|
||||
|
||||
def matrix_scan_dict(result: Any) -> dict:
|
||||
"""`ConsentTestResult` → dict in der Form, die
|
||||
`multi_browser_scanner._extract_dimensions` liest (phases/banner_checks)
|
||||
plus ein kompakter `summary`-Block für Frontend + Persistenz.
|
||||
|
||||
Defensiv via getattr — funktioniert auch, falls der Scanner mal ein
|
||||
bereits serialisiertes dict liefert (dann greifen die Defaults)."""
|
||||
before = list(getattr(result, "before_cookies", []) or [])
|
||||
after = list(getattr(result, "reject_cookies", []) or [])
|
||||
before_violations = list(getattr(result, "before_violations", []) or [])
|
||||
reject_violations = list(getattr(result, "reject_violations", []) or [])
|
||||
reject_new_tracking = list(getattr(result, "reject_new_tracking", []) or [])
|
||||
banner_text_violations = list(
|
||||
getattr(result, "banner_text_violations", []) or [])
|
||||
provider = getattr(result, "banner_provider", "") or ""
|
||||
|
||||
summary = {
|
||||
"cookies_before_consent": len(before),
|
||||
"cookies_after_reject": len(after),
|
||||
"cookies_before_names": before[:_NAME_CAP],
|
||||
"cookies_after_reject_names": after[:_NAME_CAP],
|
||||
# „Ablehnen respektiert" = nach dem Klick auf „Ablehnen" keine Verstöße
|
||||
# UND kein neuer Tracker. Verbleibende essentielle Cookies (z.B. die
|
||||
# gespeicherte Consent-Entscheidung selbst) sind erlaubt → NICHT über
|
||||
# die reine Cookie-Zahl bewerten (sonst False Positive).
|
||||
"reject_respected": (len(reject_violations) == 0
|
||||
and len(reject_new_tracking) == 0),
|
||||
"reject_new_tracking": reject_new_tracking[:_TRACK_CAP],
|
||||
"banner_detected": bool(getattr(result, "banner_detected", False)),
|
||||
"banner_provider": provider,
|
||||
"banner_screenshot_b64": getattr(result, "banner_screenshot_b64", "") or "",
|
||||
"surface": {
|
||||
"has_impressum_link": bool(
|
||||
getattr(result, "banner_has_impressum_link", False)),
|
||||
"has_dse_link": bool(
|
||||
getattr(result, "banner_has_dse_link", False)),
|
||||
"banner_text_issues": len(banner_text_violations),
|
||||
},
|
||||
"violations": {
|
||||
"before_consent": len(before_violations),
|
||||
"after_reject": len(reject_violations),
|
||||
"banner_text": len(banner_text_violations),
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"banner_detected": bool(getattr(result, "banner_detected", False)),
|
||||
"banner_provider": provider,
|
||||
# Minimal-Form für _extract_dimensions (nur cookies-Listen + violations):
|
||||
"phases": {
|
||||
"before_consent": {"cookies": before},
|
||||
"after_reject": {"cookies": after},
|
||||
},
|
||||
"banner_checks": {
|
||||
"violations": [_vdict(v) for v in banner_text_violations],
|
||||
},
|
||||
"summary": summary,
|
||||
}
|
||||
Reference in New Issue
Block a user