feat(consent-tester): /scan-matrix echt — Profil je Engine + Per-Engine-Summary (Phase 1.2)
- _scanner_run reicht browser_profile an run_consent_test durch (statt Single-Chromium-Shim) - neue scan_matrix_summary.matrix_scan_dict: ConsentTestResult -> schlanke Matrix-dict-Form (phases fuer _extract_dimensions + kompakter `summary`: cookies_before_consent/after_reject, reject_respected-Heuristik [keine Verstoesse UND kein neuer Tracker], surface, screenshot) - multi_browser_scanner._run_one hebt summary + engine + is_mobile an die Zeile, verwirft die vollen Cookie-Listen (JSONB-Persistenz schlank) - consent_scanner: _ctx_base mit Mobile-Device-Emulation (iPhone-Profil -> echtes Mobile-Viewport/Touch), alle 5 new_context auf **_ctx_base - Tests: test_scan_matrix_summary (6) inkl. _extract_dimensions-Vertrag Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -5,11 +5,9 @@ returns the aggregated robustness-score per browser plus a
|
|||||||
worst-of/best-of summary. Kept in its own module so main.py stays
|
worst-of/best-of summary. Kept in its own module so main.py stays
|
||||||
under the 500-LOC cap.
|
under the 500-LOC cap.
|
||||||
|
|
||||||
KNOWN LIMITATION (stage 1.a):
|
Stage 1.b (erledigt): `run_consent_test` nimmt jetzt einen `browser_profile`
|
||||||
The underlying `run_consent_test` does not yet accept a
|
kwarg → echte Engine-Diversität (Firefox/Gecko, WebKit/Safari, Blink inkl.
|
||||||
`browser_profile` kwarg — all profiles currently execute on the
|
Chrome-/Edge-Channel + Brave). `_scanner_run` reicht das Profil durch.
|
||||||
same Chromium instance. Engine diversity (real Firefox/WebKit
|
|
||||||
contexts) ships in stage 1.b once consent_scanner is split.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -22,6 +20,7 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from services.consent_scanner import run_consent_test
|
from services.consent_scanner import run_consent_test
|
||||||
from services.multi_browser_scanner import run_matrix
|
from services.multi_browser_scanner import run_matrix
|
||||||
|
from services.scan_matrix_summary import matrix_scan_dict
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
@@ -36,12 +35,18 @@ class MatrixScanRequest(BaseModel):
|
|||||||
browser_profiles: list[str] | None = None
|
browser_profiles: list[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
async def _scanner_shim(url: str, browser_profile: dict | None = None,
|
async def _scanner_run(url: str, browser_profile: dict | None = None,
|
||||||
timeout_per_phase: int = 10,
|
timeout_per_phase: int = 10,
|
||||||
categories: list[str] | None = None):
|
categories: list[str] | None = None):
|
||||||
"""Shim that ignores `browser_profile` until consent_scanner accepts it."""
|
"""Adapter: reicht das aufgelöste `browser_profile` (Engine/Channel/Device)
|
||||||
return await run_consent_test(url, timeout_per_phase,
|
an `run_consent_test` durch, damit jede Matrix-Zeile auf der echten Engine
|
||||||
categories or [])
|
läuft (Firefox/WebKit/Blink + Chrome-/Edge-Channel + Brave). Projiziert
|
||||||
|
das ConsentTestResult auf die schlanke Matrix-dict-Form (phases +
|
||||||
|
kompakter `summary`)."""
|
||||||
|
result = await run_consent_test(url, timeout_per_phase,
|
||||||
|
categories or [],
|
||||||
|
browser_profile=browser_profile)
|
||||||
|
return matrix_scan_dict(result)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/scan-matrix")
|
@router.post("/scan-matrix")
|
||||||
@@ -50,7 +55,7 @@ async def scan_matrix(req: MatrixScanRequest):
|
|||||||
logger.info("Matrix scan for %s profiles=%s", req.url,
|
logger.info("Matrix scan for %s profiles=%s", req.url,
|
||||||
req.browser_profiles or "default")
|
req.browser_profiles or "default")
|
||||||
matrix = await run_matrix(
|
matrix = await run_matrix(
|
||||||
_scanner_shim,
|
_scanner_run,
|
||||||
req.url,
|
req.url,
|
||||||
requested_profiles=req.browser_profiles,
|
requested_profiles=req.browser_profiles,
|
||||||
timeout_per_phase=req.timeout_per_phase,
|
timeout_per_phase=req.timeout_per_phase,
|
||||||
|
|||||||
@@ -172,15 +172,28 @@ async def run_consent_test(
|
|||||||
_launch["executable_path"] = _prof["executable_path"]
|
_launch["executable_path"] = _prof["executable_path"]
|
||||||
browser = await p.chromium.launch(**_launch)
|
browser = await p.chromium.launch(**_launch)
|
||||||
|
|
||||||
|
# Gemeinsame Context-Optionen. Bei Mobile-Profilen (Profil nennt ein
|
||||||
|
# Playwright-`device`, z.B. „iPhone 15") echte Mobile-Emulation
|
||||||
|
# (Viewport/UA/Touch) statt Desktop — sonst wäre die Mobile-Matrix-
|
||||||
|
# Zeile nur Desktop-WebKit. Nur bekannte new_context-kwargs kopieren
|
||||||
|
# (NICHT das volle Device-dict spreaden → default_browser_type bricht).
|
||||||
|
_device = p.devices.get(_prof["device"]) if _prof.get("device") else None
|
||||||
|
_ctx_base: dict = {
|
||||||
|
"user_agent": USER_AGENT,
|
||||||
|
"viewport": {"width": 1920, "height": 1080},
|
||||||
|
"locale": "de-DE",
|
||||||
|
"timezone_id": "Europe/Berlin",
|
||||||
|
}
|
||||||
|
if _device:
|
||||||
|
for _k in ("user_agent", "viewport", "device_scale_factor",
|
||||||
|
"is_mobile", "has_touch"):
|
||||||
|
if _k in _device:
|
||||||
|
_ctx_base[_k] = _device[_k]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# ── Phase A: Before consent ──────────────────────────
|
# ── Phase A: Before consent ──────────────────────────
|
||||||
logger.info("Phase A: First visit (no interaction)")
|
logger.info("Phase A: First visit (no interaction)")
|
||||||
ctx_a = await browser.new_context(
|
ctx_a = await browser.new_context(**_ctx_base)
|
||||||
user_agent=USER_AGENT,
|
|
||||||
viewport={"width": 1920, "height": 1080},
|
|
||||||
locale="de-DE",
|
|
||||||
timezone_id="Europe/Berlin",
|
|
||||||
)
|
|
||||||
page_a = await ctx_a.new_page()
|
page_a = await ctx_a.new_page()
|
||||||
await page_a.add_init_script(_INTERCEPTOR_INIT)
|
await page_a.add_init_script(_INTERCEPTOR_INIT)
|
||||||
if HAS_STEALTH:
|
if HAS_STEALTH:
|
||||||
@@ -271,12 +284,7 @@ async def run_consent_test(
|
|||||||
|
|
||||||
# ── Phase B: After rejecting ─────────────────────────
|
# ── Phase B: After rejecting ─────────────────────────
|
||||||
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
||||||
ctx_b = await browser.new_context(
|
ctx_b = await browser.new_context(**_ctx_base)
|
||||||
user_agent=USER_AGENT,
|
|
||||||
viewport={"width": 1920, "height": 1080},
|
|
||||||
locale="de-DE",
|
|
||||||
timezone_id="Europe/Berlin",
|
|
||||||
)
|
|
||||||
page_b = await ctx_b.new_page()
|
page_b = await ctx_b.new_page()
|
||||||
await page_b.add_init_script(_INTERCEPTOR_INIT)
|
await page_b.add_init_script(_INTERCEPTOR_INIT)
|
||||||
if HAS_STEALTH:
|
if HAS_STEALTH:
|
||||||
@@ -338,12 +346,7 @@ async def run_consent_test(
|
|||||||
|
|
||||||
# ── Phase C: After accepting ─────────────────────────
|
# ── Phase C: After accepting ─────────────────────────
|
||||||
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
||||||
ctx_c = await browser.new_context(
|
ctx_c = await browser.new_context(**_ctx_base)
|
||||||
user_agent=USER_AGENT,
|
|
||||||
viewport={"width": 1920, "height": 1080},
|
|
||||||
locale="de-DE",
|
|
||||||
timezone_id="Europe/Berlin",
|
|
||||||
)
|
|
||||||
page_c = await ctx_c.new_page()
|
page_c = await ctx_c.new_page()
|
||||||
await page_c.add_init_script(_INTERCEPTOR_INIT)
|
await page_c.add_init_script(_INTERCEPTOR_INIT)
|
||||||
if HAS_STEALTH:
|
if HAS_STEALTH:
|
||||||
@@ -411,12 +414,7 @@ async def run_consent_test(
|
|||||||
try:
|
try:
|
||||||
from services.category_tester import detect_categories, test_single_category
|
from services.category_tester import detect_categories, test_single_category
|
||||||
|
|
||||||
ctx_cat = await browser.new_context(
|
ctx_cat = await browser.new_context(**_ctx_base)
|
||||||
user_agent=USER_AGENT,
|
|
||||||
viewport={"width": 1920, "height": 1080},
|
|
||||||
locale="de-DE",
|
|
||||||
timezone_id="Europe/Berlin",
|
|
||||||
)
|
|
||||||
page_cat = await ctx_cat.new_page()
|
page_cat = await ctx_cat.new_page()
|
||||||
if HAS_STEALTH:
|
if HAS_STEALTH:
|
||||||
await stealth_async(page_cat)
|
await stealth_async(page_cat)
|
||||||
@@ -461,12 +459,7 @@ async def run_consent_test(
|
|||||||
"skipping remaining %d categories",
|
"skipping remaining %d categories",
|
||||||
len(unique_cats) - len(result.category_tests))
|
len(unique_cats) - len(result.category_tests))
|
||||||
break
|
break
|
||||||
cat_ctx = await browser.new_context(
|
cat_ctx = await browser.new_context(**_ctx_base)
|
||||||
user_agent=USER_AGENT,
|
|
||||||
viewport={"width": 1920, "height": 1080},
|
|
||||||
locale="de-DE",
|
|
||||||
timezone_id="Europe/Berlin",
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
cat_result = await asyncio.wait_for(
|
cat_result = await asyncio.wait_for(
|
||||||
test_single_category(cat_ctx, url, cat, banner, wait_ms),
|
test_single_category(cat_ctx, url, cat, banner, wait_ms),
|
||||||
|
|||||||
@@ -128,16 +128,24 @@ async def run_matrix(
|
|||||||
logger.warning("matrix profile %s failed: %s", prof["id"], e)
|
logger.warning("matrix profile %s failed: %s", prof["id"], e)
|
||||||
return {
|
return {
|
||||||
"profile_id": prof["id"], "label": prof["label"],
|
"profile_id": prof["id"], "label": prof["label"],
|
||||||
"scan": None, "error": str(e)[:200],
|
"engine": prof.get("engine"),
|
||||||
|
"is_mobile": bool(prof.get("device")),
|
||||||
|
"summary": None, "error": str(e)[:200],
|
||||||
"dimensions": {"pre_consent": 0, "reject_respect": 0,
|
"dimensions": {"pre_consent": 0, "reject_respect": 0,
|
||||||
"banner_design": 0},
|
"banner_design": 0},
|
||||||
"score": 0, "verbal": "Scan fehlgeschlagen",
|
"score": 0, "verbal": "Scan fehlgeschlagen",
|
||||||
}
|
}
|
||||||
dims = _extract_dimensions(scan or {})
|
dims = _extract_dimensions(scan or {})
|
||||||
score = _score(dims)
|
score = _score(dims)
|
||||||
|
# Nur den kompakten `summary` an die Zeile heben — die vollen
|
||||||
|
# phases/Cookie-Listen werden für das Scoring konsumiert und dann
|
||||||
|
# verworfen (sonst bläht 6× volle Cookie-Liste die JSONB-Persistenz).
|
||||||
|
summary = (scan or {}).get("summary") if isinstance(scan, dict) else None
|
||||||
return {
|
return {
|
||||||
"profile_id": prof["id"], "label": prof["label"],
|
"profile_id": prof["id"], "label": prof["label"],
|
||||||
"scan": scan, "dimensions": dims, "score": score,
|
"engine": prof.get("engine"),
|
||||||
|
"is_mobile": bool(prof.get("device")),
|
||||||
|
"summary": summary, "dimensions": dims, "score": score,
|
||||||
"verbal": _verbal(score),
|
"verbal": _verbal(score),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,85 @@
|
|||||||
|
"""Kompakte Per-Engine-Projektion eines ConsentTestResult für die Browser-Matrix.
|
||||||
|
|
||||||
|
Die Matrix braucht NICHT die volle `/scan`-Antwort — nur die Felder, die je
|
||||||
|
Browser-Zeile angezeigt + persistiert werden: Cookies vor Consent / nach
|
||||||
|
Ablehnen, ob „Ablehnen" respektiert wurde, Oberflächen-Signale, Screenshot.
|
||||||
|
Bewusst schlank gehalten, damit der in `banner_result.browser_matrix` (JSONB)
|
||||||
|
persistierte Block klein bleibt — 6 Engines × voller Cookie-Liste + Screenshot
|
||||||
|
würde sonst schnell mehrere MB groß (BMW: ~780 Cookies je Phase).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
# Cookie-Namen je Phase deckeln — die Matrix zeigt Zahlen + Beispiele, nicht
|
||||||
|
# die volle Liste (die steckt im textbasierten Cookie-Modul).
|
||||||
|
_NAME_CAP = 40
|
||||||
|
_TRACK_CAP = 20
|
||||||
|
|
||||||
|
|
||||||
|
def _vdict(v: Any) -> dict:
|
||||||
|
"""Violation (dataclass/obj/dict) → serialisierbares dict."""
|
||||||
|
if isinstance(v, dict):
|
||||||
|
return v
|
||||||
|
return getattr(v, "__dict__", None) or {"text": str(v)}
|
||||||
|
|
||||||
|
|
||||||
|
def matrix_scan_dict(result: Any) -> dict:
|
||||||
|
"""`ConsentTestResult` → dict in der Form, die
|
||||||
|
`multi_browser_scanner._extract_dimensions` liest (phases/banner_checks)
|
||||||
|
plus ein kompakter `summary`-Block für Frontend + Persistenz.
|
||||||
|
|
||||||
|
Defensiv via getattr — funktioniert auch, falls der Scanner mal ein
|
||||||
|
bereits serialisiertes dict liefert (dann greifen die Defaults)."""
|
||||||
|
before = list(getattr(result, "before_cookies", []) or [])
|
||||||
|
after = list(getattr(result, "reject_cookies", []) or [])
|
||||||
|
before_violations = list(getattr(result, "before_violations", []) or [])
|
||||||
|
reject_violations = list(getattr(result, "reject_violations", []) or [])
|
||||||
|
reject_new_tracking = list(getattr(result, "reject_new_tracking", []) or [])
|
||||||
|
banner_text_violations = list(
|
||||||
|
getattr(result, "banner_text_violations", []) or [])
|
||||||
|
provider = getattr(result, "banner_provider", "") or ""
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
"cookies_before_consent": len(before),
|
||||||
|
"cookies_after_reject": len(after),
|
||||||
|
"cookies_before_names": before[:_NAME_CAP],
|
||||||
|
"cookies_after_reject_names": after[:_NAME_CAP],
|
||||||
|
# „Ablehnen respektiert" = nach dem Klick auf „Ablehnen" keine Verstöße
|
||||||
|
# UND kein neuer Tracker. Verbleibende essentielle Cookies (z.B. die
|
||||||
|
# gespeicherte Consent-Entscheidung selbst) sind erlaubt → NICHT über
|
||||||
|
# die reine Cookie-Zahl bewerten (sonst False Positive).
|
||||||
|
"reject_respected": (len(reject_violations) == 0
|
||||||
|
and len(reject_new_tracking) == 0),
|
||||||
|
"reject_new_tracking": reject_new_tracking[:_TRACK_CAP],
|
||||||
|
"banner_detected": bool(getattr(result, "banner_detected", False)),
|
||||||
|
"banner_provider": provider,
|
||||||
|
"banner_screenshot_b64": getattr(result, "banner_screenshot_b64", "") or "",
|
||||||
|
"surface": {
|
||||||
|
"has_impressum_link": bool(
|
||||||
|
getattr(result, "banner_has_impressum_link", False)),
|
||||||
|
"has_dse_link": bool(
|
||||||
|
getattr(result, "banner_has_dse_link", False)),
|
||||||
|
"banner_text_issues": len(banner_text_violations),
|
||||||
|
},
|
||||||
|
"violations": {
|
||||||
|
"before_consent": len(before_violations),
|
||||||
|
"after_reject": len(reject_violations),
|
||||||
|
"banner_text": len(banner_text_violations),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"banner_detected": bool(getattr(result, "banner_detected", False)),
|
||||||
|
"banner_provider": provider,
|
||||||
|
# Minimal-Form für _extract_dimensions (nur cookies-Listen + violations):
|
||||||
|
"phases": {
|
||||||
|
"before_consent": {"cookies": before},
|
||||||
|
"after_reject": {"cookies": after},
|
||||||
|
},
|
||||||
|
"banner_checks": {
|
||||||
|
"violations": [_vdict(v) for v in banner_text_violations],
|
||||||
|
},
|
||||||
|
"summary": summary,
|
||||||
|
}
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
"""Per-Engine-Projektion der Browser-Matrix (`scan_matrix_summary`).
|
||||||
|
|
||||||
|
Sichert: ConsentTestResult → schlanke Matrix-dict-Form mit (a) phases, die
|
||||||
|
`multi_browser_scanner._extract_dimensions` lesen kann, und (b) kompaktem
|
||||||
|
`summary` (cookies_before/after_reject, reject_respected-Heuristik, Surface).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
from services.scan_matrix_summary import matrix_scan_dict
|
||||||
|
from services.multi_browser_scanner import _extract_dimensions
|
||||||
|
|
||||||
|
|
||||||
|
def _result(**kw):
|
||||||
|
base = dict(
|
||||||
|
banner_detected=True, banner_provider="Usercentrics",
|
||||||
|
before_cookies=[], reject_cookies=[],
|
||||||
|
before_violations=[], reject_violations=[], reject_new_tracking=[],
|
||||||
|
banner_text_violations=[],
|
||||||
|
banner_has_impressum_link=True, banner_has_dse_link=True,
|
||||||
|
banner_screenshot_b64="iVBOR_fake",
|
||||||
|
)
|
||||||
|
base.update(kw)
|
||||||
|
return SimpleNamespace(**base)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cookie_counts_and_names_capped():
|
||||||
|
r = _result(before_cookies=[f"c{i}" for i in range(50)],
|
||||||
|
reject_cookies=["a", "b"])
|
||||||
|
out = matrix_scan_dict(r)
|
||||||
|
s = out["summary"]
|
||||||
|
assert s["cookies_before_consent"] == 50
|
||||||
|
assert s["cookies_after_reject"] == 2
|
||||||
|
assert len(s["cookies_before_names"]) == 40 # gedeckelt
|
||||||
|
assert s["cookies_after_reject_names"] == ["a", "b"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_reject_respected_true_when_no_violation_no_tracking():
|
||||||
|
r = _result(reject_cookies=["consent_choice"]) # essentielles Cookie bleibt
|
||||||
|
out = matrix_scan_dict(r)
|
||||||
|
# Verbleibendes essentielles Cookie allein darf NICHT als Verstoß zählen.
|
||||||
|
assert out["summary"]["reject_respected"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_reject_respected_false_on_reject_violation():
|
||||||
|
r = _result(reject_violations=[SimpleNamespace(severity="HIGH", text="x")])
|
||||||
|
assert matrix_scan_dict(r)["summary"]["reject_respected"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_reject_respected_false_on_new_tracking():
|
||||||
|
r = _result(reject_new_tracking=["google-analytics"])
|
||||||
|
assert matrix_scan_dict(r)["summary"]["reject_respected"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_surface_and_screenshot_passthrough():
|
||||||
|
r = _result(banner_has_impressum_link=False,
|
||||||
|
banner_text_violations=[SimpleNamespace(severity="LOW", text="y")])
|
||||||
|
out = matrix_scan_dict(r)
|
||||||
|
assert out["summary"]["surface"]["has_impressum_link"] is False
|
||||||
|
assert out["summary"]["surface"]["has_dse_link"] is True
|
||||||
|
assert out["summary"]["surface"]["banner_text_issues"] == 1
|
||||||
|
assert out["summary"]["banner_screenshot_b64"] == "iVBOR_fake"
|
||||||
|
|
||||||
|
|
||||||
|
def test_phases_shape_readable_by_extract_dimensions():
|
||||||
|
# Vertrag: die Projektion MUSS von _extract_dimensions konsumierbar sein.
|
||||||
|
r = _result(before_cookies=["a", "b", "c"], reject_cookies=["d"])
|
||||||
|
out = matrix_scan_dict(r)
|
||||||
|
dims = _extract_dimensions(out)
|
||||||
|
assert set(dims) == {"pre_consent", "reject_respect", "banner_design"}
|
||||||
|
# 3 Pre-Cookies → pre_consent < 1.0; weniger Reject-Cookies → höher.
|
||||||
|
assert 0.0 <= dims["pre_consent"] <= 1.0
|
||||||
|
assert dims["reject_respect"] <= 1.0
|
||||||
Reference in New Issue
Block a user