"""B17 wiring — Audit-Walk-Recorder. Triggert beim consent-tester einen kompletten Playwright-Site-Walk mit Video-Aufzeichnung. Result: Video + JSON-Action-Index mit Timestamps + SHA-256-Hash für Manipulation-Schutz. Speichert nur die Walk-Metadata + Video-URL im state. Der eigentliche File-Body bleibt im consent-tester-Volume (Stufe 1). Stufe 3 wird das Video zu DSMS-IPFS hochladen und die CID hier einbinden. """ from __future__ import annotations import html import logging import os from urllib.parse import urlparse import httpx from ._constants import CONSENT_TESTER_URL logger = logging.getLogger(__name__) # Optionaler Override für die öffentliche IPFS-Gateway-URL. DSMS gibt # intern http://dsms-node:8080/ipfs/{cid} zurück — für die Mail brauchen # Reviewer aber eine extern erreichbare URL. DSMS_PUBLIC_GATEWAY = os.environ.get( "DSMS_PUBLIC_GATEWAY", "https://dsms-dev.breakpilot.ai", ) def _publicize_gateway_url(internal_url: str) -> str: """Replace internal dsms-node host with the public gateway.""" if not internal_url: return "" return internal_url.replace( "http://dsms-node:8080", DSMS_PUBLIC_GATEWAY, ).replace( "http://bp-compliance-dsms-node:8080", DSMS_PUBLIC_GATEWAY, ) async def run_b17(state: dict) -> None: """Trigger walk recording + store metadata in state.""" req = state.get("req") if req is None: return homepage = "" for d in req.documents: if d.url: p = urlparse(d.url) if p.scheme and p.netloc: homepage = f"{p.scheme}://{p.netloc}/" break if not homepage: return walk: dict = {} walk_error: str | None = None try: async with httpx.AsyncClient(timeout=300.0) as c: r = await c.post( f"{CONSENT_TESTER_URL}/scan-audit-walk", json={"url": homepage, "dwell_s": 4.0, "max_links": 8}, timeout=300.0, ) if r.status_code == 200: walk = r.json() else: walk_error = f"consent-tester HTTP {r.status_code}" except Exception as e: walk_error = f"{type(e).__name__}: {str(e)[:120]}" logger.warning("B17 audit-walk request failed: %s", walk_error) if not walk or not walk.get("walk_id"): # Fallback-Stub damit Audit-Report einen Hinweis bekommt # statt "audit_walk: None". Reviewer sieht den Fail. state["audit_walk"] = { "walk_id": "", "url": homepage, "video": {}, "actions": [], "annotations": [], "error": walk_error or "unknown (no walk_id returned)", } state["audit_walk_html"] = ( "
"
f"Site: {homepage} · Ursache: "
f"{walk_error or 'unknown'}. Mögliche "
"Gründe: komplexes CMP-Banner (lange Tour-Zeit), Anti-Bot-"
"Protection, oder consent-tester überlastet.
{html.escape(video_cid[:20])}…"
if video_gw else
f"{html.escape(video_cid)}")
parts.append(f"Video-CID: {link}")
if meta_cid:
link = (f""
f"{html.escape(meta_cid[:20])}…"
if meta_gw else
f"{html.escape(meta_cid)}")
parts.append(f"walk.json-CID: {link}")
dsms_html = (
"" "🔒 DSMS-Anchor (manipulationssicher): " + " · ".join(parts) + "
" ) rows = [] for a in actions: ts = (a.get("timestamp") or "")[11:19] # HH:MM:SS act = a.get("action") or "" detail = "" if act == "goto" or act == "navigate": detail = (a.get("url") or "")[:120] if a.get("status"): detail += f" → HTTP {a['status']}" elif act == "accept_banner": r = a.get("result") or "" if r == "clicked": detail = f"Banner akzeptiert ({a.get('phrase') or a.get('selector') or ''})" else: detail = "Kein Accept-Button gefunden" elif act == "discover_footer_links": detail = f"{a.get('count', 0)} Compliance-Links im Footer" elif act == "expand_accordions": n = a.get("expanded", 0) detail = (f"{n} Akkordeon/Details-Sektion(en) entfaltet" if n else "Keine Akkordeons gefunden") elif act == "tour_cookie_banner": n = a.get("clicks", 0) opened = "Settings geöffnet" if a.get("settings_opened") \ else "kein Settings-Trigger gefunden" detail = f"Cookie-Banner-Tour: {n} Klicks ({opened})" rows.append( f""
f"Video: "
f"video.webm "
f"({size_kb} KB, SHA-256 {html.escape(sha)}…) · "
f"Metadata: "
f"walk.json"
"
" f"{nav_count} Compliance-Seiten besucht, jede 4 Sek " "verweilt — Reviewer kann den Audit-Walk nachverfolgen." "
" + dsms_html + "| Zeit (UTC) | " "Aktion | " "Detail | " "
|---|