""" Evidence ZIP Builder — bundles cookie-evidence slices into one ZIP suitable as email attachment for the audit trail. Why: capture_cookie_evidence_slices() produces N PNG slices per check with timestamps + per-slice SHA256. Without an attachment to the compliance report, the evidence chain stops at the backend. The ZIP makes the slices portable so a DSB / lawyer can hand them to an auditor or supervisory authority. ZIP layout: evidence.zip ├── manifest.json # per-slice metadata ├── audit_metadata.json # run-level (check_id, url, build_sha, ...) └── slice_001.png ... # binary PNG per slice """ from __future__ import annotations import base64 import io import json import logging import os import zipfile from datetime import datetime, timezone logger = logging.getLogger(__name__) def build_evidence_zip( slices: list[dict], meta: dict | None = None, check_id: str = "", ) -> bytes: """Build a ZIP archive with all slices + a manifest. Args: slices: list of dicts from capture_cookie_evidence_slices(): each {"idx", "ts", "top_y", "bot_y", "sha256", "png_b64", "png_size"} meta: run-level dict from the same call: {"total_height_px", "width_px", "accepted_banner", "expanded", "url", "captured_at", "slice_count"} check_id: the compliance-check job id Returns: raw ZIP bytes (suitable as email attachment payload) """ buf = io.BytesIO() manifest_slices: list[dict] = [] with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: for s in slices or []: idx = int(s.get("idx", 0)) fname = f"slice_{idx + 1:03d}.png" try: png = base64.b64decode(s.get("png_b64", "")) except Exception as e: logger.warning( "evidence_zip: skip slice %s, b64 decode failed: %s", idx, e, ) continue zf.writestr(fname, png) manifest_slices.append({ "filename": fname, "slice_idx": idx, "captured_at": s.get("ts", ""), "top_y_px": s.get("top_y"), "bot_y_px": s.get("bot_y"), "sha256_short": s.get("sha256", ""), "png_size_bytes": s.get("png_size", len(png)), }) manifest = { "schema_version": "1.0", "check_id": check_id, "slices": manifest_slices, "slice_count": len(manifest_slices), } zf.writestr( "manifest.json", json.dumps(manifest, indent=2, ensure_ascii=False), ) audit_meta = { "schema_version": "1.0", "check_id": check_id, "build_sha": os.environ.get("BUILD_SHA", "unknown"), "generated_at": datetime.now(timezone.utc).isoformat(), "source_url": (meta or {}).get("url", ""), "captured_at": (meta or {}).get("captured_at", ""), "accepted_banner": (meta or {}).get("accepted_banner"), "expanded": (meta or {}).get("expanded"), "total_height_px": (meta or {}).get("total_height_px"), "width_px": (meta or {}).get("width_px"), "slice_count": (meta or {}).get( "slice_count", len(manifest_slices), ), "note": ( "Each slice_NNN.png is an overlapping screenshot fragment " "of the cookie policy page captured at captured_at. " "sha256_short is the first 16 hex chars of the SHA-256 of " "the raw PNG bytes — use it to verify the slice was not " "modified after capture." ), } zf.writestr( "audit_metadata.json", json.dumps(audit_meta, indent=2, ensure_ascii=False), ) data = buf.getvalue() logger.info( "evidence_zip built: %d slices, %d bytes, check_id=%s", len(manifest_slices), len(data), check_id, ) return data