diff --git a/consent-tester/routes_matrix.py b/consent-tester/routes_matrix.py index 8ef3fb1b..a8662dac 100644 --- a/consent-tester/routes_matrix.py +++ b/consent-tester/routes_matrix.py @@ -5,11 +5,9 @@ returns the aggregated robustness-score per browser plus a worst-of/best-of summary. Kept in its own module so main.py stays under the 500-LOC cap. -KNOWN LIMITATION (stage 1.a): - The underlying `run_consent_test` does not yet accept a - `browser_profile` kwarg — all profiles currently execute on the - same Chromium instance. Engine diversity (real Firefox/WebKit - contexts) ships in stage 1.b once consent_scanner is split. +Stage 1.b (erledigt): `run_consent_test` nimmt jetzt einen `browser_profile` +kwarg → echte Engine-Diversität (Firefox/Gecko, WebKit/Safari, Blink inkl. +Chrome-/Edge-Channel + Brave). `_scanner_run` reicht das Profil durch. """ from __future__ import annotations @@ -22,6 +20,7 @@ from pydantic import BaseModel from services.consent_scanner import run_consent_test from services.multi_browser_scanner import run_matrix +from services.scan_matrix_summary import matrix_scan_dict logger = logging.getLogger(__name__) router = APIRouter() @@ -36,12 +35,18 @@ class MatrixScanRequest(BaseModel): browser_profiles: list[str] | None = None -async def _scanner_shim(url: str, browser_profile: dict | None = None, - timeout_per_phase: int = 10, - categories: list[str] | None = None): - """Shim that ignores `browser_profile` until consent_scanner accepts it.""" - return await run_consent_test(url, timeout_per_phase, - categories or []) +async def _scanner_run(url: str, browser_profile: dict | None = None, + timeout_per_phase: int = 10, + categories: list[str] | None = None): + """Adapter: reicht das aufgelöste `browser_profile` (Engine/Channel/Device) + an `run_consent_test` durch, damit jede Matrix-Zeile auf der echten Engine + läuft (Firefox/WebKit/Blink + Chrome-/Edge-Channel + Brave). Projiziert + das ConsentTestResult auf die schlanke Matrix-dict-Form (phases + + kompakter `summary`).""" + result = await run_consent_test(url, timeout_per_phase, + categories or [], + browser_profile=browser_profile) + return matrix_scan_dict(result) @router.post("/scan-matrix") @@ -50,7 +55,7 @@ async def scan_matrix(req: MatrixScanRequest): logger.info("Matrix scan for %s profiles=%s", req.url, req.browser_profiles or "default") matrix = await run_matrix( - _scanner_shim, + _scanner_run, req.url, requested_profiles=req.browser_profiles, timeout_per_phase=req.timeout_per_phase, diff --git a/consent-tester/services/consent_scanner.py b/consent-tester/services/consent_scanner.py index bdf44306..d5af8b11 100644 --- a/consent-tester/services/consent_scanner.py +++ b/consent-tester/services/consent_scanner.py @@ -172,15 +172,28 @@ async def run_consent_test( _launch["executable_path"] = _prof["executable_path"] browser = await p.chromium.launch(**_launch) + # Gemeinsame Context-Optionen. Bei Mobile-Profilen (Profil nennt ein + # Playwright-`device`, z.B. „iPhone 15") echte Mobile-Emulation + # (Viewport/UA/Touch) statt Desktop — sonst wäre die Mobile-Matrix- + # Zeile nur Desktop-WebKit. Nur bekannte new_context-kwargs kopieren + # (NICHT das volle Device-dict spreaden → default_browser_type bricht). + _device = p.devices.get(_prof["device"]) if _prof.get("device") else None + _ctx_base: dict = { + "user_agent": USER_AGENT, + "viewport": {"width": 1920, "height": 1080}, + "locale": "de-DE", + "timezone_id": "Europe/Berlin", + } + if _device: + for _k in ("user_agent", "viewport", "device_scale_factor", + "is_mobile", "has_touch"): + if _k in _device: + _ctx_base[_k] = _device[_k] + try: # ── Phase A: Before consent ────────────────────────── logger.info("Phase A: First visit (no interaction)") - ctx_a = await browser.new_context( - user_agent=USER_AGENT, - viewport={"width": 1920, "height": 1080}, - locale="de-DE", - timezone_id="Europe/Berlin", - ) + ctx_a = await browser.new_context(**_ctx_base) page_a = await ctx_a.new_page() await page_a.add_init_script(_INTERCEPTOR_INIT) if HAS_STEALTH: @@ -271,12 +284,7 @@ async def run_consent_test( # ── Phase B: After rejecting ───────────────────────── logger.info("Phase B: Reject consent (%s)", banner.provider) - ctx_b = await browser.new_context( - user_agent=USER_AGENT, - viewport={"width": 1920, "height": 1080}, - locale="de-DE", - timezone_id="Europe/Berlin", - ) + ctx_b = await browser.new_context(**_ctx_base) page_b = await ctx_b.new_page() await page_b.add_init_script(_INTERCEPTOR_INIT) if HAS_STEALTH: @@ -338,12 +346,7 @@ async def run_consent_test( # ── Phase C: After accepting ───────────────────────── logger.info("Phase C: Accept consent (%s)", banner.provider) - ctx_c = await browser.new_context( - user_agent=USER_AGENT, - viewport={"width": 1920, "height": 1080}, - locale="de-DE", - timezone_id="Europe/Berlin", - ) + ctx_c = await browser.new_context(**_ctx_base) page_c = await ctx_c.new_page() await page_c.add_init_script(_INTERCEPTOR_INIT) if HAS_STEALTH: @@ -411,12 +414,7 @@ async def run_consent_test( try: from services.category_tester import detect_categories, test_single_category - ctx_cat = await browser.new_context( - user_agent=USER_AGENT, - viewport={"width": 1920, "height": 1080}, - locale="de-DE", - timezone_id="Europe/Berlin", - ) + ctx_cat = await browser.new_context(**_ctx_base) page_cat = await ctx_cat.new_page() if HAS_STEALTH: await stealth_async(page_cat) @@ -461,12 +459,7 @@ async def run_consent_test( "skipping remaining %d categories", len(unique_cats) - len(result.category_tests)) break - cat_ctx = await browser.new_context( - user_agent=USER_AGENT, - viewport={"width": 1920, "height": 1080}, - locale="de-DE", - timezone_id="Europe/Berlin", - ) + cat_ctx = await browser.new_context(**_ctx_base) try: cat_result = await asyncio.wait_for( test_single_category(cat_ctx, url, cat, banner, wait_ms), diff --git a/consent-tester/services/multi_browser_scanner.py b/consent-tester/services/multi_browser_scanner.py index 61fb8059..dbc9c7e5 100644 --- a/consent-tester/services/multi_browser_scanner.py +++ b/consent-tester/services/multi_browser_scanner.py @@ -128,16 +128,24 @@ async def run_matrix( logger.warning("matrix profile %s failed: %s", prof["id"], e) return { "profile_id": prof["id"], "label": prof["label"], - "scan": None, "error": str(e)[:200], + "engine": prof.get("engine"), + "is_mobile": bool(prof.get("device")), + "summary": None, "error": str(e)[:200], "dimensions": {"pre_consent": 0, "reject_respect": 0, "banner_design": 0}, "score": 0, "verbal": "Scan fehlgeschlagen", } dims = _extract_dimensions(scan or {}) score = _score(dims) + # Nur den kompakten `summary` an die Zeile heben — die vollen + # phases/Cookie-Listen werden für das Scoring konsumiert und dann + # verworfen (sonst bläht 6× volle Cookie-Liste die JSONB-Persistenz). + summary = (scan or {}).get("summary") if isinstance(scan, dict) else None return { "profile_id": prof["id"], "label": prof["label"], - "scan": scan, "dimensions": dims, "score": score, + "engine": prof.get("engine"), + "is_mobile": bool(prof.get("device")), + "summary": summary, "dimensions": dims, "score": score, "verbal": _verbal(score), } diff --git a/consent-tester/services/scan_matrix_summary.py b/consent-tester/services/scan_matrix_summary.py new file mode 100644 index 00000000..c8658646 --- /dev/null +++ b/consent-tester/services/scan_matrix_summary.py @@ -0,0 +1,85 @@ +"""Kompakte Per-Engine-Projektion eines ConsentTestResult für die Browser-Matrix. + +Die Matrix braucht NICHT die volle `/scan`-Antwort — nur die Felder, die je +Browser-Zeile angezeigt + persistiert werden: Cookies vor Consent / nach +Ablehnen, ob „Ablehnen" respektiert wurde, Oberflächen-Signale, Screenshot. +Bewusst schlank gehalten, damit der in `banner_result.browser_matrix` (JSONB) +persistierte Block klein bleibt — 6 Engines × voller Cookie-Liste + Screenshot +würde sonst schnell mehrere MB groß (BMW: ~780 Cookies je Phase). +""" + +from __future__ import annotations + +from typing import Any + +# Cookie-Namen je Phase deckeln — die Matrix zeigt Zahlen + Beispiele, nicht +# die volle Liste (die steckt im textbasierten Cookie-Modul). +_NAME_CAP = 40 +_TRACK_CAP = 20 + + +def _vdict(v: Any) -> dict: + """Violation (dataclass/obj/dict) → serialisierbares dict.""" + if isinstance(v, dict): + return v + return getattr(v, "__dict__", None) or {"text": str(v)} + + +def matrix_scan_dict(result: Any) -> dict: + """`ConsentTestResult` → dict in der Form, die + `multi_browser_scanner._extract_dimensions` liest (phases/banner_checks) + plus ein kompakter `summary`-Block für Frontend + Persistenz. + + Defensiv via getattr — funktioniert auch, falls der Scanner mal ein + bereits serialisiertes dict liefert (dann greifen die Defaults).""" + before = list(getattr(result, "before_cookies", []) or []) + after = list(getattr(result, "reject_cookies", []) or []) + before_violations = list(getattr(result, "before_violations", []) or []) + reject_violations = list(getattr(result, "reject_violations", []) or []) + reject_new_tracking = list(getattr(result, "reject_new_tracking", []) or []) + banner_text_violations = list( + getattr(result, "banner_text_violations", []) or []) + provider = getattr(result, "banner_provider", "") or "" + + summary = { + "cookies_before_consent": len(before), + "cookies_after_reject": len(after), + "cookies_before_names": before[:_NAME_CAP], + "cookies_after_reject_names": after[:_NAME_CAP], + # „Ablehnen respektiert" = nach dem Klick auf „Ablehnen" keine Verstöße + # UND kein neuer Tracker. Verbleibende essentielle Cookies (z.B. die + # gespeicherte Consent-Entscheidung selbst) sind erlaubt → NICHT über + # die reine Cookie-Zahl bewerten (sonst False Positive). + "reject_respected": (len(reject_violations) == 0 + and len(reject_new_tracking) == 0), + "reject_new_tracking": reject_new_tracking[:_TRACK_CAP], + "banner_detected": bool(getattr(result, "banner_detected", False)), + "banner_provider": provider, + "banner_screenshot_b64": getattr(result, "banner_screenshot_b64", "") or "", + "surface": { + "has_impressum_link": bool( + getattr(result, "banner_has_impressum_link", False)), + "has_dse_link": bool( + getattr(result, "banner_has_dse_link", False)), + "banner_text_issues": len(banner_text_violations), + }, + "violations": { + "before_consent": len(before_violations), + "after_reject": len(reject_violations), + "banner_text": len(banner_text_violations), + }, + } + + return { + "banner_detected": bool(getattr(result, "banner_detected", False)), + "banner_provider": provider, + # Minimal-Form für _extract_dimensions (nur cookies-Listen + violations): + "phases": { + "before_consent": {"cookies": before}, + "after_reject": {"cookies": after}, + }, + "banner_checks": { + "violations": [_vdict(v) for v in banner_text_violations], + }, + "summary": summary, + } diff --git a/consent-tester/tests/test_scan_matrix_summary.py b/consent-tester/tests/test_scan_matrix_summary.py new file mode 100644 index 00000000..2254e490 --- /dev/null +++ b/consent-tester/tests/test_scan_matrix_summary.py @@ -0,0 +1,73 @@ +"""Per-Engine-Projektion der Browser-Matrix (`scan_matrix_summary`). + +Sichert: ConsentTestResult → schlanke Matrix-dict-Form mit (a) phases, die +`multi_browser_scanner._extract_dimensions` lesen kann, und (b) kompaktem +`summary` (cookies_before/after_reject, reject_respected-Heuristik, Surface). +""" + +from types import SimpleNamespace + +from services.scan_matrix_summary import matrix_scan_dict +from services.multi_browser_scanner import _extract_dimensions + + +def _result(**kw): + base = dict( + banner_detected=True, banner_provider="Usercentrics", + before_cookies=[], reject_cookies=[], + before_violations=[], reject_violations=[], reject_new_tracking=[], + banner_text_violations=[], + banner_has_impressum_link=True, banner_has_dse_link=True, + banner_screenshot_b64="iVBOR_fake", + ) + base.update(kw) + return SimpleNamespace(**base) + + +def test_cookie_counts_and_names_capped(): + r = _result(before_cookies=[f"c{i}" for i in range(50)], + reject_cookies=["a", "b"]) + out = matrix_scan_dict(r) + s = out["summary"] + assert s["cookies_before_consent"] == 50 + assert s["cookies_after_reject"] == 2 + assert len(s["cookies_before_names"]) == 40 # gedeckelt + assert s["cookies_after_reject_names"] == ["a", "b"] + + +def test_reject_respected_true_when_no_violation_no_tracking(): + r = _result(reject_cookies=["consent_choice"]) # essentielles Cookie bleibt + out = matrix_scan_dict(r) + # Verbleibendes essentielles Cookie allein darf NICHT als Verstoß zählen. + assert out["summary"]["reject_respected"] is True + + +def test_reject_respected_false_on_reject_violation(): + r = _result(reject_violations=[SimpleNamespace(severity="HIGH", text="x")]) + assert matrix_scan_dict(r)["summary"]["reject_respected"] is False + + +def test_reject_respected_false_on_new_tracking(): + r = _result(reject_new_tracking=["google-analytics"]) + assert matrix_scan_dict(r)["summary"]["reject_respected"] is False + + +def test_surface_and_screenshot_passthrough(): + r = _result(banner_has_impressum_link=False, + banner_text_violations=[SimpleNamespace(severity="LOW", text="y")]) + out = matrix_scan_dict(r) + assert out["summary"]["surface"]["has_impressum_link"] is False + assert out["summary"]["surface"]["has_dse_link"] is True + assert out["summary"]["surface"]["banner_text_issues"] == 1 + assert out["summary"]["banner_screenshot_b64"] == "iVBOR_fake" + + +def test_phases_shape_readable_by_extract_dimensions(): + # Vertrag: die Projektion MUSS von _extract_dimensions konsumierbar sein. + r = _result(before_cookies=["a", "b", "c"], reject_cookies=["d"]) + out = matrix_scan_dict(r) + dims = _extract_dimensions(out) + assert set(dims) == {"pre_consent", "reject_respect", "banner_design"} + # 3 Pre-Cookies → pre_consent < 1.0; weniger Reject-Cookies → höher. + assert 0.0 <= dims["pre_consent"] <= 1.0 + assert dims["reject_respect"] <= 1.0