"""B17 wiring — Audit-Walk-Recorder. Triggert beim consent-tester einen kompletten Playwright-Site-Walk mit Video-Aufzeichnung. Result: Video + JSON-Action-Index mit Timestamps + SHA-256-Hash für Manipulation-Schutz. Speichert nur die Walk-Metadata + Video-URL im state. Der eigentliche File-Body bleibt im consent-tester-Volume (Stufe 1). Stufe 3 wird das Video zu DSMS-IPFS hochladen und die CID hier einbinden. """ from __future__ import annotations import html import logging import os from urllib.parse import urlparse import httpx from ._constants import CONSENT_TESTER_URL logger = logging.getLogger(__name__) # Optionaler Override für die öffentliche IPFS-Gateway-URL. DSMS gibt # intern http://dsms-node:8080/ipfs/{cid} zurück — für die Mail brauchen # Reviewer aber eine extern erreichbare URL. DSMS_PUBLIC_GATEWAY = os.environ.get( "DSMS_PUBLIC_GATEWAY", "https://dsms-dev.breakpilot.ai", ) def _publicize_gateway_url(internal_url: str) -> str: """Replace internal dsms-node host with the public gateway.""" if not internal_url: return "" return internal_url.replace( "http://dsms-node:8080", DSMS_PUBLIC_GATEWAY, ).replace( "http://bp-compliance-dsms-node:8080", DSMS_PUBLIC_GATEWAY, ) async def run_b17(state: dict) -> None: """Trigger walk recording + store metadata in state.""" req = state.get("req") if req is None: return homepage = "" for d in req.documents: if d.url: p = urlparse(d.url) if p.scheme and p.netloc: homepage = f"{p.scheme}://{p.netloc}/" break if not homepage: return walk: dict = {} try: async with httpx.AsyncClient(timeout=180.0) as c: r = await c.post( f"{CONSENT_TESTER_URL}/scan-audit-walk", json={"url": homepage, "dwell_s": 4.0, "max_links": 8}, timeout=180.0, ) if r.status_code == 200: walk = r.json() except Exception as e: logger.warning("B17 audit-walk request failed: %s", e) return if not walk or not walk.get("walk_id"): return # Stufe-5: annotierte Screenshots pro Finding. Schickt die # gesammelten findings (B1 mobile + B16 slug-drift + B13 widerruf) # zum consent-tester der pro Finding ein PNG erzeugt. annotations: list[dict] = [] try: findings_for_annot: list[dict] = [] rf = state.get("reachability_finding") if rf and not rf.get("passed", True): findings_for_annot.append({ "check_id": "COOKIE-CONSENT-UX-001", "mobile_playwright": rf.get("mobile_playwright") or {}, }) for f in (state.get("extra_findings") or []): cid = (f.get("check_id") or "").upper() if cid in ("URL-SLUG-DRIFT-001", "WIDERRUF-REACH-001"): findings_for_annot.append(f) if findings_for_annot: async with httpx.AsyncClient(timeout=120.0) as c: r = await c.post( f"{CONSENT_TESTER_URL}/annotate-findings", json={"findings": findings_for_annot, "home_url": homepage}, timeout=120.0, ) if r.status_code == 200: annotations = (r.json() or {}).get("annotations") or [] logger.info( "B17 annotations: %d Screenshots erzeugt", len(annotations), ) except Exception as e: logger.warning("annotate-findings request failed: %s", e) walk["annotations"] = annotations state["audit_walk"] = walk state["audit_walk_html"] = _render(walk) logger.info( "B17 audit-walk: %s · %d actions · video %d bytes · sha256 %s", walk.get("walk_id"), len(walk.get("actions") or []), (walk.get("video") or {}).get("size_bytes", 0), ((walk.get("video") or {}).get("sha256") or "")[:12], ) def _video_link(walk_id: str) -> str: """External URL for the recorded video (when consent-tester is reachable from the audit reviewer).""" return f"{CONSENT_TESTER_URL}/audit-walks/{walk_id}/video.webm" def _render(walk: dict) -> str: wid = walk.get("walk_id") or "" video = walk.get("video") or {} actions = walk.get("actions") or [] nav_count = sum(1 for a in actions if a.get("action") == "navigate") sha = (video.get("sha256") or "")[:12] size_kb = round((video.get("size_bytes") or 0) / 1024, 1) walk_link = _video_link(wid) meta_link = f"{CONSENT_TESTER_URL}/audit-walks/{wid}/walk.json" # Stufe-3 DSMS-Anchor video_dsms = (video.get("dsms") or {}) meta_dsms = (walk.get("walk_json_dsms") or {}) video_cid = video_dsms.get("cid") or "" meta_cid = meta_dsms.get("cid") or "" video_gw = _publicize_gateway_url(video_dsms.get("gateway_url") or "") meta_gw = _publicize_gateway_url(meta_dsms.get("gateway_url") or "") dsms_html = "" if video_cid or meta_cid: parts = [] if video_cid: link = (f"" f"{html.escape(video_cid[:20])}…" if video_gw else f"{html.escape(video_cid)}") parts.append(f"Video-CID: {link}") if meta_cid: link = (f"" f"{html.escape(meta_cid[:20])}…" if meta_gw else f"{html.escape(meta_cid)}") parts.append(f"walk.json-CID: {link}") dsms_html = ( "

" "🔒 DSMS-Anchor (manipulationssicher): " + " · ".join(parts) + "

" ) rows = [] for a in actions: ts = (a.get("timestamp") or "")[11:19] # HH:MM:SS act = a.get("action") or "" detail = "" if act == "goto" or act == "navigate": detail = (a.get("url") or "")[:120] if a.get("status"): detail += f" → HTTP {a['status']}" elif act == "accept_banner": r = a.get("result") or "" if r == "clicked": detail = f"Banner akzeptiert ({a.get('phrase') or a.get('selector') or ''})" else: detail = "Kein Accept-Button gefunden" elif act == "discover_footer_links": detail = f"{a.get('count', 0)} Compliance-Links im Footer" elif act == "expand_accordions": n = a.get("expanded", 0) detail = (f"{n} Akkordeon/Details-Sektion(en) entfaltet" if n else "Keine Akkordeons gefunden") elif act == "tour_cookie_banner": n = a.get("clicks", 0) opened = "Settings geöffnet" if a.get("settings_opened") \ else "kein Settings-Trigger gefunden" detail = f"Cookie-Banner-Tour: {n} Klicks ({opened})" rows.append( f"{html.escape(ts)}" f"{html.escape(act)}" f"" f"{html.escape(detail)}" ) return ( "
" "

" "🎥 Audit-Walk-Video (Beweis-Aufzeichnung)" "

" "

" f"Video: " f"video.webm " f"({size_kb} KB, SHA-256 {html.escape(sha)}…) · " f"Metadata: " f"walk.json" "

" "

" f"{nav_count} Compliance-Seiten besucht, jede 4 Sek " "verweilt — Reviewer kann den Audit-Walk nachverfolgen." "

" + dsms_html + "" "" "" "" "" "" + "".join(rows) + "
Zeit (UTC)AktionDetail
" "
" )