feat(b17): Playwright Audit-Walk-Video (Stufe 1, #7)

Nimmt einen kompletten Site-Walk als WebKit-Browser-Session
inkl. Video auf. Reviewer kann nachträglich exakt nachvollziehen,
wie die Engine zum Befund kam.

consent-tester:
  - services/audit_walk_recorder.py: Playwright record_video_dir,
    iPhone-Viewport-free 1280×800. Goto homepage → Banner-Accept
    (Best-Effort: 12 Text-Phrasen + 5 CMP-Fallback-Selektoren) →
    Footer-Links sammeln (compliance-relevant gefiltert) →
    pro Link navigate + Dwell-Time → JSON-Action-Index mit
    UTC-Timestamps + SHA-256 vom Video als Manipulation-Schutz.
  - routes_audit_walk.py: POST /scan-audit-walk; statische
    Serves für /audit-walks/{walk_id}/video.webm + walk.json.
  - main.py: Router registriert.

backend:
  - _b17_wiring.py: Triggert /scan-audit-walk, speichert
    Walk-Metadata in state["audit_walk"]. Render-Block mit
    HTML-Tabelle aller Actions (HH:MM:SS + Aktion + Detail) +
    Links zu Video und walk.json.
  - _orchestrator.py: run_b17 nach run_b16, async-aufgerufen.
  - mail_render_v2/_compose.py: audit_walk_html im V2-Layout.
  - test_b17_audit_walk.py: 8 Tests (Render-Pfade + Wiring).

Stufe-2 (Akkordeon-Expansion) und Stufe-3 (DSMS-CID-Anchor)
folgen separat.

Real-World-Smoke gegen Elli:
  - 581 KB Video, SHA-256 verifizierbar
  - 3 Footer-Links besucht (Impressum, Datenschutzerkl., Nutzungs-)
  - 6 Actions im JSON-Index

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-07 17:20:13 +02:00
parent 529c032641
commit cb4b352846
7 changed files with 562 additions and 0 deletions
@@ -0,0 +1,133 @@
"""B17 wiring — Audit-Walk-Recorder.
Triggert beim consent-tester einen kompletten Playwright-Site-Walk
mit Video-Aufzeichnung. Result: Video + JSON-Action-Index mit
Timestamps + SHA-256-Hash für Manipulation-Schutz.
Speichert nur die Walk-Metadata + Video-URL im state. Der eigentliche
File-Body bleibt im consent-tester-Volume (Stufe 1). Stufe 3 wird das
Video zu DSMS-IPFS hochladen und die CID hier einbinden.
"""
from __future__ import annotations
import html
import logging
from urllib.parse import urlparse
import httpx
from ._constants import CONSENT_TESTER_URL
logger = logging.getLogger(__name__)
async def run_b17(state: dict) -> None:
"""Trigger walk recording + store metadata in state."""
req = state.get("req")
if req is None:
return
homepage = ""
for d in req.documents:
if d.url:
p = urlparse(d.url)
if p.scheme and p.netloc:
homepage = f"{p.scheme}://{p.netloc}/"
break
if not homepage:
return
walk: dict = {}
try:
async with httpx.AsyncClient(timeout=180.0) as c:
r = await c.post(
f"{CONSENT_TESTER_URL}/scan-audit-walk",
json={"url": homepage, "dwell_s": 4.0, "max_links": 8},
timeout=180.0,
)
if r.status_code == 200:
walk = r.json()
except Exception as e:
logger.warning("B17 audit-walk request failed: %s", e)
return
if not walk or not walk.get("walk_id"):
return
state["audit_walk"] = walk
state["audit_walk_html"] = _render(walk)
logger.info(
"B17 audit-walk: %s · %d actions · video %d bytes · sha256 %s",
walk.get("walk_id"),
len(walk.get("actions") or []),
(walk.get("video") or {}).get("size_bytes", 0),
((walk.get("video") or {}).get("sha256") or "")[:12],
)
def _video_link(walk_id: str) -> str:
"""External URL for the recorded video (when consent-tester is
reachable from the audit reviewer)."""
return f"{CONSENT_TESTER_URL}/audit-walks/{walk_id}/video.webm"
def _render(walk: dict) -> str:
wid = walk.get("walk_id") or ""
video = walk.get("video") or {}
actions = walk.get("actions") or []
nav_count = sum(1 for a in actions if a.get("action") == "navigate")
sha = (video.get("sha256") or "")[:12]
size_kb = round((video.get("size_bytes") or 0) / 1024, 1)
walk_link = _video_link(wid)
meta_link = f"{CONSENT_TESTER_URL}/audit-walks/{wid}/walk.json"
rows = []
for a in actions:
ts = (a.get("timestamp") or "")[11:19] # HH:MM:SS
act = a.get("action") or ""
detail = ""
if act == "goto" or act == "navigate":
detail = (a.get("url") or "")[:120]
if a.get("status"):
detail += f" → HTTP {a['status']}"
elif act == "accept_banner":
r = a.get("result") or ""
if r == "clicked":
detail = f"Banner akzeptiert ({a.get('phrase') or a.get('selector') or ''})"
else:
detail = "Kein Accept-Button gefunden"
elif act == "discover_footer_links":
detail = f"{a.get('count', 0)} Compliance-Links im Footer"
rows.append(
f"<tr><td style='padding:4px 8px;font-family:monospace;"
f"color:#475569;'>{html.escape(ts)}</td>"
f"<td style='padding:4px 8px;'>{html.escape(act)}</td>"
f"<td style='padding:4px 8px;color:#475569;'>"
f"{html.escape(detail)}</td></tr>"
)
return (
"<div style='margin:24px 0;padding:16px;border-left:4px solid #0ea5e9;"
"background:#f0f9ff;border-radius:4px;'>"
"<h2 style='margin:0 0 8px;color:#0c4a6e;font-size:16px;'>"
"🎥 Audit-Walk-Video (Beweis-Aufzeichnung)"
"</h2>"
"<p style='margin:0 0 8px;font-size:13px;color:#475569;'>"
f"<strong>Video:</strong> "
f"<a href='{html.escape(walk_link)}' style='color:#0369a1;'>video.webm</a> "
f"({size_kb} KB, SHA-256 <code>{html.escape(sha)}…</code>) · "
f"<strong>Metadata:</strong> "
f"<a href='{html.escape(meta_link)}' style='color:#0369a1;'>walk.json</a>"
"</p>"
"<p style='margin:0 0 8px;font-size:13px;color:#475569;'>"
f"{nav_count} Compliance-Seiten besucht, jede 4 Sek "
"verweilt — Reviewer kann den Audit-Walk nachverfolgen."
"</p>"
"<table style='font-size:12px;width:100%;border-collapse:collapse;"
"background:#fff;border-radius:4px;'>"
"<thead><tr style='background:#e0f2fe;'>"
"<th style='padding:6px 8px;text-align:left;'>Zeit (UTC)</th>"
"<th style='padding:6px 8px;text-align:left;'>Aktion</th>"
"<th style='padding:6px 8px;text-align:left;'>Detail</th>"
"</tr></thead><tbody>" + "".join(rows) + "</tbody></table>"
"</div>"
)
@@ -27,6 +27,7 @@ from ._b13_wiring import run_b13
from ._b14_wiring import run_b14
from ._b15_wiring import run_b15
from ._b16_wiring import run_b16
from ._b17_wiring import run_b17
from ._constants import _compliance_check_jobs
from ._phase_a_resolve import run_phase_a
from ._phase_b_profile_check import run_phase_b
@@ -78,6 +79,7 @@ async def run_compliance_check(check_id: str, req) -> None:
run_b14(state) # Widersprüchliche Speicherdauer im selben Doc
run_b15(state) # AI-Act Rechtsgrundlage (LLM-Vendor auf lit. f)
run_b16(state) # Footer-Label-vs-URL-Slug-Drift
await run_b17(state) # Audit-Walk-Video (Beweis-Aufzeichnung)
# Phase D-3 top/mid/bot: Step 5 HTML blocks
await run_phase_d3_top(state)
await run_phase_d3_mid(state)
@@ -56,6 +56,8 @@ def compose_v2(state: dict) -> str:
state.get("ai_legal_basis_html", ""),
# B16 Footer-Label-vs-URL-Slug-Drift (SEO / Bookmarks)
state.get("url_slug_drift_html", ""),
# B17 Audit-Walk-Video (Beweis-Aufzeichnung)
state.get("audit_walk_html", ""),
# Browser-Matrix (Stage 1.c)
state.get("browser_matrix_html", ""),
# All legacy build_*_html() wrapped in V2 sections — preserves
@@ -0,0 +1,95 @@
"""Tests for B17 Audit-Walk-Wiring (Stufe 1)."""
import asyncio
from unittest.mock import patch, MagicMock, AsyncMock
import pytest
from compliance.api.agent_check._b17_wiring import _render, run_b17
_FAKE_WALK = {
"walk_id": "abc123def456",
"url": "https://example.com/",
"started_at": "2026-06-07T10:00:00+00:00",
"completed_at": "2026-06-07T10:00:30+00:00",
"engine": "playwright/webkit",
"viewport": "1280x800",
"actions": [
{"timestamp": "2026-06-07T10:00:00+00:00", "action": "goto",
"url": "https://example.com/", "status": 200},
{"timestamp": "2026-06-07T10:00:02+00:00", "action": "accept_banner",
"result": "clicked", "phrase": "alle akzeptieren"},
{"timestamp": "2026-06-07T10:00:04+00:00",
"action": "discover_footer_links", "count": 3, "links": []},
{"timestamp": "2026-06-07T10:00:06+00:00", "action": "navigate",
"url": "https://example.com/datenschutz",
"anchor_text": "Datenschutz", "status": 200,
"title": "Datenschutzerklärung"},
],
"video": {
"filename": "video.webm",
"size_bytes": 512000,
"sha256": "a1b2c3d4e5f67890fedcba0987654321ffffeeeeddddccccbbbbaaaa00001111",
},
}
class TestRender:
def test_renders_walk_id_and_link(self):
html = _render(_FAKE_WALK)
assert "abc123def456" in html
assert "video.webm" in html
assert "walk.json" in html
def test_includes_sha_prefix(self):
html = _render(_FAKE_WALK)
# First 12 chars of sha
assert "a1b2c3d4e5f6" in html
def test_action_table_lists_all_actions(self):
html = _render(_FAKE_WALK)
# All four actions appear as <tr>
assert html.count("<tr>") >= 4 # incl. header
def test_nav_count_reflects_navigate_actions(self):
html = _render(_FAKE_WALK)
# 1 navigate in the fixture
assert "1 Compliance-Seiten" in html
class TestRunB17:
def test_no_request_skipped(self):
state = {}
asyncio.run(run_b17(state))
assert "audit_walk" not in state
def test_no_url_skipped(self):
state = {"req": MagicMock(documents=[MagicMock(url="")])}
asyncio.run(run_b17(state))
assert "audit_walk" not in state
def test_consent_tester_failure_skipped(self):
req = MagicMock(documents=[MagicMock(url="https://example.com/dse")])
state = {"req": req}
with patch(
"compliance.api.agent_check._b17_wiring.httpx.AsyncClient"
) as mock_client:
instance = mock_client.return_value.__aenter__.return_value
instance.post = AsyncMock(side_effect=Exception("nope"))
asyncio.run(run_b17(state))
assert "audit_walk" not in state
def test_success_populates_state(self):
req = MagicMock(documents=[MagicMock(url="https://example.com/dse")])
state = {"req": req}
resp = MagicMock(status_code=200)
resp.json = MagicMock(return_value=_FAKE_WALK)
with patch(
"compliance.api.agent_check._b17_wiring.httpx.AsyncClient"
) as mock_client:
instance = mock_client.return_value.__aenter__.return_value
instance.post = AsyncMock(return_value=resp)
asyncio.run(run_b17(state))
assert state["audit_walk"]["walk_id"] == "abc123def456"
assert "video.webm" in state["audit_walk_html"]