feat(consent-tester): /scan-matrix echt — Profil je Engine + Per-Engine-Summary (Phase 1.2)
- _scanner_run reicht browser_profile an run_consent_test durch (statt Single-Chromium-Shim) - neue scan_matrix_summary.matrix_scan_dict: ConsentTestResult -> schlanke Matrix-dict-Form (phases fuer _extract_dimensions + kompakter `summary`: cookies_before_consent/after_reject, reject_respected-Heuristik [keine Verstoesse UND kein neuer Tracker], surface, screenshot) - multi_browser_scanner._run_one hebt summary + engine + is_mobile an die Zeile, verwirft die vollen Cookie-Listen (JSONB-Persistenz schlank) - consent_scanner: _ctx_base mit Mobile-Device-Emulation (iPhone-Profil -> echtes Mobile-Viewport/Touch), alle 5 new_context auf **_ctx_base - Tests: test_scan_matrix_summary (6) inkl. _extract_dimensions-Vertrag Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -5,11 +5,9 @@ returns the aggregated robustness-score per browser plus a
|
||||
worst-of/best-of summary. Kept in its own module so main.py stays
|
||||
under the 500-LOC cap.
|
||||
|
||||
KNOWN LIMITATION (stage 1.a):
|
||||
The underlying `run_consent_test` does not yet accept a
|
||||
`browser_profile` kwarg — all profiles currently execute on the
|
||||
same Chromium instance. Engine diversity (real Firefox/WebKit
|
||||
contexts) ships in stage 1.b once consent_scanner is split.
|
||||
Stage 1.b (erledigt): `run_consent_test` nimmt jetzt einen `browser_profile`
|
||||
kwarg → echte Engine-Diversität (Firefox/Gecko, WebKit/Safari, Blink inkl.
|
||||
Chrome-/Edge-Channel + Brave). `_scanner_run` reicht das Profil durch.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -22,6 +20,7 @@ from pydantic import BaseModel
|
||||
|
||||
from services.consent_scanner import run_consent_test
|
||||
from services.multi_browser_scanner import run_matrix
|
||||
from services.scan_matrix_summary import matrix_scan_dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
@@ -36,12 +35,18 @@ class MatrixScanRequest(BaseModel):
|
||||
browser_profiles: list[str] | None = None
|
||||
|
||||
|
||||
async def _scanner_shim(url: str, browser_profile: dict | None = None,
|
||||
timeout_per_phase: int = 10,
|
||||
categories: list[str] | None = None):
|
||||
"""Shim that ignores `browser_profile` until consent_scanner accepts it."""
|
||||
return await run_consent_test(url, timeout_per_phase,
|
||||
categories or [])
|
||||
async def _scanner_run(url: str, browser_profile: dict | None = None,
|
||||
timeout_per_phase: int = 10,
|
||||
categories: list[str] | None = None):
|
||||
"""Adapter: reicht das aufgelöste `browser_profile` (Engine/Channel/Device)
|
||||
an `run_consent_test` durch, damit jede Matrix-Zeile auf der echten Engine
|
||||
läuft (Firefox/WebKit/Blink + Chrome-/Edge-Channel + Brave). Projiziert
|
||||
das ConsentTestResult auf die schlanke Matrix-dict-Form (phases +
|
||||
kompakter `summary`)."""
|
||||
result = await run_consent_test(url, timeout_per_phase,
|
||||
categories or [],
|
||||
browser_profile=browser_profile)
|
||||
return matrix_scan_dict(result)
|
||||
|
||||
|
||||
@router.post("/scan-matrix")
|
||||
@@ -50,7 +55,7 @@ async def scan_matrix(req: MatrixScanRequest):
|
||||
logger.info("Matrix scan for %s profiles=%s", req.url,
|
||||
req.browser_profiles or "default")
|
||||
matrix = await run_matrix(
|
||||
_scanner_shim,
|
||||
_scanner_run,
|
||||
req.url,
|
||||
requested_profiles=req.browser_profiles,
|
||||
timeout_per_phase=req.timeout_per_phase,
|
||||
|
||||
@@ -172,15 +172,28 @@ async def run_consent_test(
|
||||
_launch["executable_path"] = _prof["executable_path"]
|
||||
browser = await p.chromium.launch(**_launch)
|
||||
|
||||
# Gemeinsame Context-Optionen. Bei Mobile-Profilen (Profil nennt ein
|
||||
# Playwright-`device`, z.B. „iPhone 15") echte Mobile-Emulation
|
||||
# (Viewport/UA/Touch) statt Desktop — sonst wäre die Mobile-Matrix-
|
||||
# Zeile nur Desktop-WebKit. Nur bekannte new_context-kwargs kopieren
|
||||
# (NICHT das volle Device-dict spreaden → default_browser_type bricht).
|
||||
_device = p.devices.get(_prof["device"]) if _prof.get("device") else None
|
||||
_ctx_base: dict = {
|
||||
"user_agent": USER_AGENT,
|
||||
"viewport": {"width": 1920, "height": 1080},
|
||||
"locale": "de-DE",
|
||||
"timezone_id": "Europe/Berlin",
|
||||
}
|
||||
if _device:
|
||||
for _k in ("user_agent", "viewport", "device_scale_factor",
|
||||
"is_mobile", "has_touch"):
|
||||
if _k in _device:
|
||||
_ctx_base[_k] = _device[_k]
|
||||
|
||||
try:
|
||||
# ── Phase A: Before consent ──────────────────────────
|
||||
logger.info("Phase A: First visit (no interaction)")
|
||||
ctx_a = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
ctx_a = await browser.new_context(**_ctx_base)
|
||||
page_a = await ctx_a.new_page()
|
||||
await page_a.add_init_script(_INTERCEPTOR_INIT)
|
||||
if HAS_STEALTH:
|
||||
@@ -271,12 +284,7 @@ async def run_consent_test(
|
||||
|
||||
# ── Phase B: After rejecting ─────────────────────────
|
||||
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
||||
ctx_b = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
ctx_b = await browser.new_context(**_ctx_base)
|
||||
page_b = await ctx_b.new_page()
|
||||
await page_b.add_init_script(_INTERCEPTOR_INIT)
|
||||
if HAS_STEALTH:
|
||||
@@ -338,12 +346,7 @@ async def run_consent_test(
|
||||
|
||||
# ── Phase C: After accepting ─────────────────────────
|
||||
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
||||
ctx_c = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
ctx_c = await browser.new_context(**_ctx_base)
|
||||
page_c = await ctx_c.new_page()
|
||||
await page_c.add_init_script(_INTERCEPTOR_INIT)
|
||||
if HAS_STEALTH:
|
||||
@@ -411,12 +414,7 @@ async def run_consent_test(
|
||||
try:
|
||||
from services.category_tester import detect_categories, test_single_category
|
||||
|
||||
ctx_cat = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
ctx_cat = await browser.new_context(**_ctx_base)
|
||||
page_cat = await ctx_cat.new_page()
|
||||
if HAS_STEALTH:
|
||||
await stealth_async(page_cat)
|
||||
@@ -461,12 +459,7 @@ async def run_consent_test(
|
||||
"skipping remaining %d categories",
|
||||
len(unique_cats) - len(result.category_tests))
|
||||
break
|
||||
cat_ctx = await browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="de-DE",
|
||||
timezone_id="Europe/Berlin",
|
||||
)
|
||||
cat_ctx = await browser.new_context(**_ctx_base)
|
||||
try:
|
||||
cat_result = await asyncio.wait_for(
|
||||
test_single_category(cat_ctx, url, cat, banner, wait_ms),
|
||||
|
||||
@@ -128,16 +128,24 @@ async def run_matrix(
|
||||
logger.warning("matrix profile %s failed: %s", prof["id"], e)
|
||||
return {
|
||||
"profile_id": prof["id"], "label": prof["label"],
|
||||
"scan": None, "error": str(e)[:200],
|
||||
"engine": prof.get("engine"),
|
||||
"is_mobile": bool(prof.get("device")),
|
||||
"summary": None, "error": str(e)[:200],
|
||||
"dimensions": {"pre_consent": 0, "reject_respect": 0,
|
||||
"banner_design": 0},
|
||||
"score": 0, "verbal": "Scan fehlgeschlagen",
|
||||
}
|
||||
dims = _extract_dimensions(scan or {})
|
||||
score = _score(dims)
|
||||
# Nur den kompakten `summary` an die Zeile heben — die vollen
|
||||
# phases/Cookie-Listen werden für das Scoring konsumiert und dann
|
||||
# verworfen (sonst bläht 6× volle Cookie-Liste die JSONB-Persistenz).
|
||||
summary = (scan or {}).get("summary") if isinstance(scan, dict) else None
|
||||
return {
|
||||
"profile_id": prof["id"], "label": prof["label"],
|
||||
"scan": scan, "dimensions": dims, "score": score,
|
||||
"engine": prof.get("engine"),
|
||||
"is_mobile": bool(prof.get("device")),
|
||||
"summary": summary, "dimensions": dims, "score": score,
|
||||
"verbal": _verbal(score),
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Kompakte Per-Engine-Projektion eines ConsentTestResult für die Browser-Matrix.
|
||||
|
||||
Die Matrix braucht NICHT die volle `/scan`-Antwort — nur die Felder, die je
|
||||
Browser-Zeile angezeigt + persistiert werden: Cookies vor Consent / nach
|
||||
Ablehnen, ob „Ablehnen" respektiert wurde, Oberflächen-Signale, Screenshot.
|
||||
Bewusst schlank gehalten, damit der in `banner_result.browser_matrix` (JSONB)
|
||||
persistierte Block klein bleibt — 6 Engines × voller Cookie-Liste + Screenshot
|
||||
würde sonst schnell mehrere MB groß (BMW: ~780 Cookies je Phase).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
# Cookie-Namen je Phase deckeln — die Matrix zeigt Zahlen + Beispiele, nicht
|
||||
# die volle Liste (die steckt im textbasierten Cookie-Modul).
|
||||
_NAME_CAP = 40
|
||||
_TRACK_CAP = 20
|
||||
|
||||
|
||||
def _vdict(v: Any) -> dict:
|
||||
"""Violation (dataclass/obj/dict) → serialisierbares dict."""
|
||||
if isinstance(v, dict):
|
||||
return v
|
||||
return getattr(v, "__dict__", None) or {"text": str(v)}
|
||||
|
||||
|
||||
def matrix_scan_dict(result: Any) -> dict:
|
||||
"""`ConsentTestResult` → dict in der Form, die
|
||||
`multi_browser_scanner._extract_dimensions` liest (phases/banner_checks)
|
||||
plus ein kompakter `summary`-Block für Frontend + Persistenz.
|
||||
|
||||
Defensiv via getattr — funktioniert auch, falls der Scanner mal ein
|
||||
bereits serialisiertes dict liefert (dann greifen die Defaults)."""
|
||||
before = list(getattr(result, "before_cookies", []) or [])
|
||||
after = list(getattr(result, "reject_cookies", []) or [])
|
||||
before_violations = list(getattr(result, "before_violations", []) or [])
|
||||
reject_violations = list(getattr(result, "reject_violations", []) or [])
|
||||
reject_new_tracking = list(getattr(result, "reject_new_tracking", []) or [])
|
||||
banner_text_violations = list(
|
||||
getattr(result, "banner_text_violations", []) or [])
|
||||
provider = getattr(result, "banner_provider", "") or ""
|
||||
|
||||
summary = {
|
||||
"cookies_before_consent": len(before),
|
||||
"cookies_after_reject": len(after),
|
||||
"cookies_before_names": before[:_NAME_CAP],
|
||||
"cookies_after_reject_names": after[:_NAME_CAP],
|
||||
# „Ablehnen respektiert" = nach dem Klick auf „Ablehnen" keine Verstöße
|
||||
# UND kein neuer Tracker. Verbleibende essentielle Cookies (z.B. die
|
||||
# gespeicherte Consent-Entscheidung selbst) sind erlaubt → NICHT über
|
||||
# die reine Cookie-Zahl bewerten (sonst False Positive).
|
||||
"reject_respected": (len(reject_violations) == 0
|
||||
and len(reject_new_tracking) == 0),
|
||||
"reject_new_tracking": reject_new_tracking[:_TRACK_CAP],
|
||||
"banner_detected": bool(getattr(result, "banner_detected", False)),
|
||||
"banner_provider": provider,
|
||||
"banner_screenshot_b64": getattr(result, "banner_screenshot_b64", "") or "",
|
||||
"surface": {
|
||||
"has_impressum_link": bool(
|
||||
getattr(result, "banner_has_impressum_link", False)),
|
||||
"has_dse_link": bool(
|
||||
getattr(result, "banner_has_dse_link", False)),
|
||||
"banner_text_issues": len(banner_text_violations),
|
||||
},
|
||||
"violations": {
|
||||
"before_consent": len(before_violations),
|
||||
"after_reject": len(reject_violations),
|
||||
"banner_text": len(banner_text_violations),
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"banner_detected": bool(getattr(result, "banner_detected", False)),
|
||||
"banner_provider": provider,
|
||||
# Minimal-Form für _extract_dimensions (nur cookies-Listen + violations):
|
||||
"phases": {
|
||||
"before_consent": {"cookies": before},
|
||||
"after_reject": {"cookies": after},
|
||||
},
|
||||
"banner_checks": {
|
||||
"violations": [_vdict(v) for v in banner_text_violations],
|
||||
},
|
||||
"summary": summary,
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
"""Per-Engine-Projektion der Browser-Matrix (`scan_matrix_summary`).
|
||||
|
||||
Sichert: ConsentTestResult → schlanke Matrix-dict-Form mit (a) phases, die
|
||||
`multi_browser_scanner._extract_dimensions` lesen kann, und (b) kompaktem
|
||||
`summary` (cookies_before/after_reject, reject_respected-Heuristik, Surface).
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
from services.scan_matrix_summary import matrix_scan_dict
|
||||
from services.multi_browser_scanner import _extract_dimensions
|
||||
|
||||
|
||||
def _result(**kw):
|
||||
base = dict(
|
||||
banner_detected=True, banner_provider="Usercentrics",
|
||||
before_cookies=[], reject_cookies=[],
|
||||
before_violations=[], reject_violations=[], reject_new_tracking=[],
|
||||
banner_text_violations=[],
|
||||
banner_has_impressum_link=True, banner_has_dse_link=True,
|
||||
banner_screenshot_b64="iVBOR_fake",
|
||||
)
|
||||
base.update(kw)
|
||||
return SimpleNamespace(**base)
|
||||
|
||||
|
||||
def test_cookie_counts_and_names_capped():
|
||||
r = _result(before_cookies=[f"c{i}" for i in range(50)],
|
||||
reject_cookies=["a", "b"])
|
||||
out = matrix_scan_dict(r)
|
||||
s = out["summary"]
|
||||
assert s["cookies_before_consent"] == 50
|
||||
assert s["cookies_after_reject"] == 2
|
||||
assert len(s["cookies_before_names"]) == 40 # gedeckelt
|
||||
assert s["cookies_after_reject_names"] == ["a", "b"]
|
||||
|
||||
|
||||
def test_reject_respected_true_when_no_violation_no_tracking():
|
||||
r = _result(reject_cookies=["consent_choice"]) # essentielles Cookie bleibt
|
||||
out = matrix_scan_dict(r)
|
||||
# Verbleibendes essentielles Cookie allein darf NICHT als Verstoß zählen.
|
||||
assert out["summary"]["reject_respected"] is True
|
||||
|
||||
|
||||
def test_reject_respected_false_on_reject_violation():
|
||||
r = _result(reject_violations=[SimpleNamespace(severity="HIGH", text="x")])
|
||||
assert matrix_scan_dict(r)["summary"]["reject_respected"] is False
|
||||
|
||||
|
||||
def test_reject_respected_false_on_new_tracking():
|
||||
r = _result(reject_new_tracking=["google-analytics"])
|
||||
assert matrix_scan_dict(r)["summary"]["reject_respected"] is False
|
||||
|
||||
|
||||
def test_surface_and_screenshot_passthrough():
|
||||
r = _result(banner_has_impressum_link=False,
|
||||
banner_text_violations=[SimpleNamespace(severity="LOW", text="y")])
|
||||
out = matrix_scan_dict(r)
|
||||
assert out["summary"]["surface"]["has_impressum_link"] is False
|
||||
assert out["summary"]["surface"]["has_dse_link"] is True
|
||||
assert out["summary"]["surface"]["banner_text_issues"] == 1
|
||||
assert out["summary"]["banner_screenshot_b64"] == "iVBOR_fake"
|
||||
|
||||
|
||||
def test_phases_shape_readable_by_extract_dimensions():
|
||||
# Vertrag: die Projektion MUSS von _extract_dimensions konsumierbar sein.
|
||||
r = _result(before_cookies=["a", "b", "c"], reject_cookies=["d"])
|
||||
out = matrix_scan_dict(r)
|
||||
dims = _extract_dimensions(out)
|
||||
assert set(dims) == {"pre_consent", "reject_respect", "banner_design"}
|
||||
# 3 Pre-Cookies → pre_consent < 1.0; weniger Reject-Cookies → höher.
|
||||
assert 0.0 <= dims["pre_consent"] <= 1.0
|
||||
assert dims["reject_respect"] <= 1.0
|
||||
Reference in New Issue
Block a user