"""B17 wiring — Audit-Walk-Recorder. Triggert beim consent-tester einen kompletten Playwright-Site-Walk mit Video-Aufzeichnung. Result: Video + JSON-Action-Index mit Timestamps + SHA-256-Hash für Manipulation-Schutz. Speichert nur die Walk-Metadata + Video-URL im state. Der eigentliche File-Body bleibt im consent-tester-Volume (Stufe 1). Stufe 3 wird das Video zu DSMS-IPFS hochladen und die CID hier einbinden. """ from __future__ import annotations import html import logging import os from urllib.parse import urlparse import httpx from ._constants import CONSENT_TESTER_URL logger = logging.getLogger(__name__) # Optionaler Override für die öffentliche IPFS-Gateway-URL. DSMS gibt # intern http://dsms-node:8080/ipfs/{cid} zurück — für die Mail brauchen # Reviewer aber eine extern erreichbare URL. DSMS_PUBLIC_GATEWAY = os.environ.get( "DSMS_PUBLIC_GATEWAY", "https://dsms-dev.breakpilot.ai", ) def _publicize_gateway_url(internal_url: str) -> str: """Replace internal dsms-node host with the public gateway.""" if not internal_url: return "" return internal_url.replace( "http://dsms-node:8080", DSMS_PUBLIC_GATEWAY, ).replace( "http://bp-compliance-dsms-node:8080", DSMS_PUBLIC_GATEWAY, ) async def run_b17(state: dict) -> None: """Trigger walk recording + store metadata in state.""" req = state.get("req") if req is None: return homepage = "" for d in req.documents: if d.url: p = urlparse(d.url) if p.scheme and p.netloc: homepage = f"{p.scheme}://{p.netloc}/" break if not homepage: return walk: dict = {} try: async with httpx.AsyncClient(timeout=180.0) as c: r = await c.post( f"{CONSENT_TESTER_URL}/scan-audit-walk", json={"url": homepage, "dwell_s": 4.0, "max_links": 8}, timeout=180.0, ) if r.status_code == 200: walk = r.json() except Exception as e: logger.warning("B17 audit-walk request failed: %s", e) return if not walk or not walk.get("walk_id"): return state["audit_walk"] = walk state["audit_walk_html"] = _render(walk) logger.info( "B17 audit-walk: %s · %d actions · video %d bytes · sha256 %s", walk.get("walk_id"), len(walk.get("actions") or []), (walk.get("video") or {}).get("size_bytes", 0), ((walk.get("video") or {}).get("sha256") or "")[:12], ) def _video_link(walk_id: str) -> str: """External URL for the recorded video (when consent-tester is reachable from the audit reviewer).""" return f"{CONSENT_TESTER_URL}/audit-walks/{walk_id}/video.webm" def _render(walk: dict) -> str: wid = walk.get("walk_id") or "" video = walk.get("video") or {} actions = walk.get("actions") or [] nav_count = sum(1 for a in actions if a.get("action") == "navigate") sha = (video.get("sha256") or "")[:12] size_kb = round((video.get("size_bytes") or 0) / 1024, 1) walk_link = _video_link(wid) meta_link = f"{CONSENT_TESTER_URL}/audit-walks/{wid}/walk.json" # Stufe-3 DSMS-Anchor video_dsms = (video.get("dsms") or {}) meta_dsms = (walk.get("walk_json_dsms") or {}) video_cid = video_dsms.get("cid") or "" meta_cid = meta_dsms.get("cid") or "" video_gw = _publicize_gateway_url(video_dsms.get("gateway_url") or "") meta_gw = _publicize_gateway_url(meta_dsms.get("gateway_url") or "") dsms_html = "" if video_cid or meta_cid: parts = [] if video_cid: link = (f"" f"{html.escape(video_cid[:20])}…" if video_gw else f"{html.escape(video_cid)}") parts.append(f"Video-CID: {link}") if meta_cid: link = (f"" f"{html.escape(meta_cid[:20])}…" if meta_gw else f"{html.escape(meta_cid)}") parts.append(f"walk.json-CID: {link}") dsms_html = ( "

" "🔒 DSMS-Anchor (manipulationssicher): " + " · ".join(parts) + "

" ) rows = [] for a in actions: ts = (a.get("timestamp") or "")[11:19] # HH:MM:SS act = a.get("action") or "" detail = "" if act == "goto" or act == "navigate": detail = (a.get("url") or "")[:120] if a.get("status"): detail += f" → HTTP {a['status']}" elif act == "accept_banner": r = a.get("result") or "" if r == "clicked": detail = f"Banner akzeptiert ({a.get('phrase') or a.get('selector') or ''})" else: detail = "Kein Accept-Button gefunden" elif act == "discover_footer_links": detail = f"{a.get('count', 0)} Compliance-Links im Footer" elif act == "expand_accordions": n = a.get("expanded", 0) detail = (f"{n} Akkordeon/Details-Sektion(en) entfaltet" if n else "Keine Akkordeons gefunden") rows.append( f"{html.escape(ts)}" f"{html.escape(act)}" f"" f"{html.escape(detail)}" ) return ( "
" "

" "🎥 Audit-Walk-Video (Beweis-Aufzeichnung)" "

" "

" f"Video: " f"video.webm " f"({size_kb} KB, SHA-256 {html.escape(sha)}…) · " f"Metadata: " f"walk.json" "

" "

" f"{nav_count} Compliance-Seiten besucht, jede 4 Sek " "verweilt — Reviewer kann den Audit-Walk nachverfolgen." "

" + dsms_html + "" "" "" "" "" "" + "".join(rows) + "
Zeit (UTC)AktionDetail
" "
" )