breakpilot-compliance/backend-compliance/compliance/services/audit_walk_zip_builder.py

"""Bundle the audit-walk-video + metadata into a ZIP for email attachment.

Backend hat kein direkten Zugriff auf das consent-tester-Volume, also
laden wir das Video via HTTP vom consent-tester (Stufe-1-Endpoint).
DSMS-CIDs sind im walk dict + werden zusätzlich in README.txt
geschrieben, sodass der Empfänger das Original auch via IPFS-Gateway
verifizieren kann.

Output: bytes (ZIP-stream) — ready für SMTP-attachment.
"""

from __future__ import annotations

import io
import json
import logging
import zipfile

import httpx

logger = logging.getLogger(__name__)


def _readme(walk: dict) -> str:
    wid = walk.get("walk_id") or "?"
    url = walk.get("url") or "?"
    started = walk.get("started_at") or "?"
    completed = walk.get("completed_at") or "?"
    video = walk.get("video") or {}
    sha = video.get("sha256") or "?"
    size = video.get("size_bytes") or 0
    video_cid = (video.get("dsms") or {}).get("cid") or "—"
    meta_cid = (walk.get("walk_json_dsms") or {}).get("cid") or "—"
    nav = sum(1 for a in walk.get("actions") or []
              if a.get("action") == "navigate")
    accs = sum((a.get("expanded") or 0) for a in walk.get("actions") or []
                if a.get("action") == "expand_accordions")
    return f"""BreakPilot Compliance — Audit-Walk-Beweis-Paket

Walk-ID:           {wid}
Site:              {url}
Aufgenommen:       {started} → {completed}
Engine:            Playwright WebKit (Mobile-Viewport 1280×800)

Inhalt dieses Pakets:
  - video.webm     {size:,} Bytes, SHA-256 {sha[:32]}…
  - walk.json      Action-Index mit UTC-Timestamps pro Schritt
  - README.txt     diese Datei

Walk-Statistik:
  - {nav} Compliance-Seiten besucht (Datenschutz, Impressum, AGB, ...)
  - {accs} Akkordeon-/Details-Sektionen automatisch entfaltet

DSMS-Anker (IPFS, manipulationssicher):
  Video:      {video_cid}
  walk.json:  {meta_cid}

Zur Verifikation:
  1. Lade das Original via https://dsms-dev.breakpilot.ai/ipfs/<CID>
  2. Vergleiche SHA-256 mit obigem Hash
  3. Öffne video.webm in einem modernen Browser (VLC / Chrome)
  4. Lies walk.json um die Klick-Sequenz nachzuvollziehen
"""


def build_audit_walk_zip(
    walk: dict,
    consent_tester_url: str = "http://bp-compliance-consent-tester:8094",
    extra_files: dict[str, bytes] | None = None,
) -> bytes:
    """Fetch video from consent-tester + bundle with walk.json + README.

    `extra_files` is optional name→bytes mapping (e.g. cookies-full.csv
    from B19 export). Placed at the ZIP root next to video.webm.
    """
    wid = walk.get("walk_id") or ""
    if not wid:
        return b""

    # Pull video binary from consent-tester (Stufe 1 endpoint)
    video_bytes = b""
    try:
        with httpx.Client(timeout=60.0) as c:
            r = c.get(f"{consent_tester_url}/audit-walks/{wid}/video.webm")
            if r.status_code == 200:
                video_bytes = r.content
    except Exception as e:
        logger.warning("audit-walk video fetch failed: %s", e)

    walk_json_bytes = json.dumps(walk, indent=2, ensure_ascii=False).encode(
        "utf-8",
    )
    readme_bytes = _readme(walk).encode("utf-8")

    # Annotierte Screenshots pro Finding (Stufe 5)
    import base64
    annotations = walk.get("annotations") or []

    buf = io.BytesIO()
    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as z:
        if video_bytes:
            z.writestr("video.webm", video_bytes)
        z.writestr("walk.json", walk_json_bytes)
        z.writestr("README.txt", readme_bytes)
        for a in annotations:
            fname = a.get("filename") or ""
            b64 = a.get("png_b64") or ""
            if not fname or not b64:
                continue
            try:
                z.writestr(f"findings/{fname}", base64.b64decode(b64))
            except Exception as e:
                logger.warning("annotation %s write failed: %s",
                                fname, e)
        for fname, content in (extra_files or {}).items():
            if content:
                try:
                    z.writestr(fname, content)
                except Exception as e:
                    logger.warning("extra-file %s write failed: %s",
                                    fname, e)
    return buf.getvalue()