c2c8783fee
Phase-5 split of agent_compliance_check_routes.py — the 2700-line
monolith was decomposed into 19 modules in compliance/api/agent_check/:
- Phase A-F: resolve / profile+check / banner+TCF / vendors raw+finalize /
HTML blocks top+mid+bot / email / persist
- Helpers: _constants, _helpers, _fetch, _discovery, _single_check
- Schemas + State + thin _orchestrator
A1 ZIP-Anhang nativ in _phase_e_email: evidence_zip_builder.py bundles
slices + manifest.json + audit_metadata.json (SHA256 per slice +
build_sha + source_url). smtp_sender.py erweitert um attachments-Parameter.
B1 COOKIE-CONSENT-UX-001 (Mobile Reachability): consent_reachability_check.py
parses footer anchors, classifies intent (reopen_cmp / info_only /
browser_deflect) + target (same_page_cmp / new_tab / external).
_b1_wiring.py fetches homepage with iPhone-UA + renders Art-7-Abs-3
severity-coloured block.
B3 TH-RETENTION (Cross-Doc Speicherdauer): retention_comparator.py
compares DSI claim ↔ cookie-table duration ↔ actual Max-Age/expires
with 5% tolerance + severity hierarchy (dsi_under_actual HIGH,
table_under_actual HIGH, dsi_vs_table MEDIUM, actual_under_table LOW
Safari-ITP-Hint). _b3_wiring.py + Top-10 mismatches table in mail.
Side-effects:
- Fixed silent UnboundLocalError in original Step 5 (gf_one_pager used
audit_quality_findings before declaration, caught by surrounding
except → block never rendered). New _phase_d3_blocks_bot.py runs
audit-quality FIRST.
- agent_compliance_check_routes.py removed from loc-exceptions.txt
("Phase 5 split target" — done).
Tests: 55/55 grün (B1 22 + B3 27 + saving_scan 6).
E2E: smoke against Elli DSE+Cookie produced HIGH/missing B1 finding,
TH-RETENTION table (17 cookies / 3 ✓ / 3 ✗ / 11 ?), evidence-zip
with 2 slices + manifest + audit_metadata (12089B, SHA256-chained,
source verified), email sent (attachments=1).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
120 lines
4.0 KiB
Python
120 lines
4.0 KiB
Python
"""
|
|
Evidence ZIP Builder — bundles cookie-evidence slices into one ZIP
|
|
suitable as email attachment for the audit trail.
|
|
|
|
Why: capture_cookie_evidence_slices() produces N PNG slices per check
|
|
with timestamps + per-slice SHA256. Without an attachment to the
|
|
compliance report, the evidence chain stops at the backend. The ZIP
|
|
makes the slices portable so a DSB / lawyer can hand them to an
|
|
auditor or supervisory authority.
|
|
|
|
ZIP layout:
|
|
evidence.zip
|
|
├── manifest.json # per-slice metadata
|
|
├── audit_metadata.json # run-level (check_id, url, build_sha, ...)
|
|
└── slice_001.png ... # binary PNG per slice
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import io
|
|
import json
|
|
import logging
|
|
import os
|
|
import zipfile
|
|
from datetime import datetime, timezone
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def build_evidence_zip(
|
|
slices: list[dict],
|
|
meta: dict | None = None,
|
|
check_id: str = "",
|
|
) -> bytes:
|
|
"""Build a ZIP archive with all slices + a manifest.
|
|
|
|
Args:
|
|
slices: list of dicts from capture_cookie_evidence_slices():
|
|
each {"idx", "ts", "top_y", "bot_y", "sha256", "png_b64",
|
|
"png_size"}
|
|
meta: run-level dict from the same call:
|
|
{"total_height_px", "width_px", "accepted_banner",
|
|
"expanded", "url", "captured_at", "slice_count"}
|
|
check_id: the compliance-check job id
|
|
|
|
Returns:
|
|
raw ZIP bytes (suitable as email attachment payload)
|
|
"""
|
|
buf = io.BytesIO()
|
|
manifest_slices: list[dict] = []
|
|
|
|
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
for s in slices or []:
|
|
idx = int(s.get("idx", 0))
|
|
fname = f"slice_{idx + 1:03d}.png"
|
|
try:
|
|
png = base64.b64decode(s.get("png_b64", ""))
|
|
except Exception as e:
|
|
logger.warning(
|
|
"evidence_zip: skip slice %s, b64 decode failed: %s",
|
|
idx, e,
|
|
)
|
|
continue
|
|
zf.writestr(fname, png)
|
|
manifest_slices.append({
|
|
"filename": fname,
|
|
"slice_idx": idx,
|
|
"captured_at": s.get("ts", ""),
|
|
"top_y_px": s.get("top_y"),
|
|
"bot_y_px": s.get("bot_y"),
|
|
"sha256_short": s.get("sha256", ""),
|
|
"png_size_bytes": s.get("png_size", len(png)),
|
|
})
|
|
|
|
manifest = {
|
|
"schema_version": "1.0",
|
|
"check_id": check_id,
|
|
"slices": manifest_slices,
|
|
"slice_count": len(manifest_slices),
|
|
}
|
|
zf.writestr(
|
|
"manifest.json",
|
|
json.dumps(manifest, indent=2, ensure_ascii=False),
|
|
)
|
|
|
|
audit_meta = {
|
|
"schema_version": "1.0",
|
|
"check_id": check_id,
|
|
"build_sha": os.environ.get("BUILD_SHA", "unknown"),
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"source_url": (meta or {}).get("url", ""),
|
|
"captured_at": (meta or {}).get("captured_at", ""),
|
|
"accepted_banner": (meta or {}).get("accepted_banner"),
|
|
"expanded": (meta or {}).get("expanded"),
|
|
"total_height_px": (meta or {}).get("total_height_px"),
|
|
"width_px": (meta or {}).get("width_px"),
|
|
"slice_count": (meta or {}).get(
|
|
"slice_count", len(manifest_slices),
|
|
),
|
|
"note": (
|
|
"Each slice_NNN.png is an overlapping screenshot fragment "
|
|
"of the cookie policy page captured at captured_at. "
|
|
"sha256_short is the first 16 hex chars of the SHA-256 of "
|
|
"the raw PNG bytes — use it to verify the slice was not "
|
|
"modified after capture."
|
|
),
|
|
}
|
|
zf.writestr(
|
|
"audit_metadata.json",
|
|
json.dumps(audit_meta, indent=2, ensure_ascii=False),
|
|
)
|
|
|
|
data = buf.getvalue()
|
|
logger.info(
|
|
"evidence_zip built: %d slices, %d bytes, check_id=%s",
|
|
len(manifest_slices), len(data), check_id,
|
|
)
|
|
return data
|