"""Bundle the audit-walk-video + metadata into a ZIP for email attachment. Backend hat kein direkten Zugriff auf das consent-tester-Volume, also laden wir das Video via HTTP vom consent-tester (Stufe-1-Endpoint). DSMS-CIDs sind im walk dict + werden zusätzlich in README.txt geschrieben, sodass der Empfänger das Original auch via IPFS-Gateway verifizieren kann. Output: bytes (ZIP-stream) — ready für SMTP-attachment. """ from __future__ import annotations import io import json import logging import zipfile import httpx logger = logging.getLogger(__name__) def _readme(walk: dict) -> str: wid = walk.get("walk_id") or "?" url = walk.get("url") or "?" started = walk.get("started_at") or "?" completed = walk.get("completed_at") or "?" video = walk.get("video") or {} sha = video.get("sha256") or "?" size = video.get("size_bytes") or 0 video_cid = (video.get("dsms") or {}).get("cid") or "—" meta_cid = (walk.get("walk_json_dsms") or {}).get("cid") or "—" nav = sum(1 for a in walk.get("actions") or [] if a.get("action") == "navigate") accs = sum((a.get("expanded") or 0) for a in walk.get("actions") or [] if a.get("action") == "expand_accordions") return f"""BreakPilot Compliance — Audit-Walk-Beweis-Paket Walk-ID: {wid} Site: {url} Aufgenommen: {started} → {completed} Engine: Playwright WebKit (Mobile-Viewport 1280×800) Inhalt dieses Pakets: - video.webm {size:,} Bytes, SHA-256 {sha[:32]}… - walk.json Action-Index mit UTC-Timestamps pro Schritt - README.txt diese Datei Walk-Statistik: - {nav} Compliance-Seiten besucht (Datenschutz, Impressum, AGB, ...) - {accs} Akkordeon-/Details-Sektionen automatisch entfaltet DSMS-Anker (IPFS, manipulationssicher): Video: {video_cid} walk.json: {meta_cid} Zur Verifikation: 1. Lade das Original via https://dsms-dev.breakpilot.ai/ipfs/ 2. Vergleiche SHA-256 mit obigem Hash 3. Öffne video.webm in einem modernen Browser (VLC / Chrome) 4. Lies walk.json um die Klick-Sequenz nachzuvollziehen """ def build_audit_walk_zip( walk: dict, consent_tester_url: str = "http://bp-compliance-consent-tester:8094", extra_files: dict[str, bytes] | None = None, ) -> bytes: """Fetch video from consent-tester + bundle with walk.json + README. `extra_files` is optional name→bytes mapping (e.g. cookies-full.csv from B19 export). Placed at the ZIP root next to video.webm. """ wid = walk.get("walk_id") or "" if not wid: return b"" # Pull video binary from consent-tester (Stufe 1 endpoint) video_bytes = b"" try: with httpx.Client(timeout=60.0) as c: r = c.get(f"{consent_tester_url}/audit-walks/{wid}/video.webm") if r.status_code == 200: video_bytes = r.content except Exception as e: logger.warning("audit-walk video fetch failed: %s", e) walk_json_bytes = json.dumps(walk, indent=2, ensure_ascii=False).encode( "utf-8", ) readme_bytes = _readme(walk).encode("utf-8") # Annotierte Screenshots pro Finding (Stufe 5) import base64 annotations = walk.get("annotations") or [] buf = io.BytesIO() with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as z: if video_bytes: z.writestr("video.webm", video_bytes) z.writestr("walk.json", walk_json_bytes) z.writestr("README.txt", readme_bytes) for a in annotations: fname = a.get("filename") or "" b64 = a.get("png_b64") or "" if not fname or not b64: continue try: z.writestr(f"findings/{fname}", base64.b64decode(b64)) except Exception as e: logger.warning("annotation %s write failed: %s", fname, e) for fname, content in (extra_files or {}).items(): if content: try: z.writestr(fname, content) except Exception as e: logger.warning("extra-file %s write failed: %s", fname, e) return buf.getvalue()