refactor(agent-check): split routes file (2692→347 LOC) + wire B1/B3/A1 [guardrail-change]
Phase-5 split of agent_compliance_check_routes.py — the 2700-line
monolith was decomposed into 19 modules in compliance/api/agent_check/:
- Phase A-F: resolve / profile+check / banner+TCF / vendors raw+finalize /
HTML blocks top+mid+bot / email / persist
- Helpers: _constants, _helpers, _fetch, _discovery, _single_check
- Schemas + State + thin _orchestrator
A1 ZIP-Anhang nativ in _phase_e_email: evidence_zip_builder.py bundles
slices + manifest.json + audit_metadata.json (SHA256 per slice +
build_sha + source_url). smtp_sender.py erweitert um attachments-Parameter.
B1 COOKIE-CONSENT-UX-001 (Mobile Reachability): consent_reachability_check.py
parses footer anchors, classifies intent (reopen_cmp / info_only /
browser_deflect) + target (same_page_cmp / new_tab / external).
_b1_wiring.py fetches homepage with iPhone-UA + renders Art-7-Abs-3
severity-coloured block.
B3 TH-RETENTION (Cross-Doc Speicherdauer): retention_comparator.py
compares DSI claim ↔ cookie-table duration ↔ actual Max-Age/expires
with 5% tolerance + severity hierarchy (dsi_under_actual HIGH,
table_under_actual HIGH, dsi_vs_table MEDIUM, actual_under_table LOW
Safari-ITP-Hint). _b3_wiring.py + Top-10 mismatches table in mail.
Side-effects:
- Fixed silent UnboundLocalError in original Step 5 (gf_one_pager used
audit_quality_findings before declaration, caught by surrounding
except → block never rendered). New _phase_d3_blocks_bot.py runs
audit-quality FIRST.
- agent_compliance_check_routes.py removed from loc-exceptions.txt
("Phase 5 split target" — done).
Tests: 55/55 grün (B1 22 + B3 27 + saving_scan 6).
E2E: smoke against Elli DSE+Cookie produced HIGH/missing B1 finding,
TH-RETENTION table (17 cookies / 3 ✓ / 3 ✗ / 11 ?), evidence-zip
with 2 slices + manifest + audit_metadata (12089B, SHA256-chained,
source verified), email sent (attachments=1).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,189 @@
|
||||
"""B3 wiring — Cross-doc retention consistency check + HTML block.
|
||||
|
||||
Combines three sources of retention truth per cookie:
|
||||
|
||||
- DSI text (state["doc_texts"]["dse"] or "cookie")
|
||||
- cookie-table `duration` from cmp_vendors[i]["cookies"][j]
|
||||
- actual cookie expiry from banner_result["cookies_detailed"][k]
|
||||
|
||||
and produces per-cookie findings + a TH-RETENTION theme summary. Only
|
||||
renders an HTML block when there are findings to show; the block is
|
||||
sorted by severity (HIGH first) and shows the top-10 mismatches.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import logging
|
||||
import time
|
||||
|
||||
from compliance.services.retention_comparator import (
|
||||
build_retention_theme_summary,
|
||||
compare_retention,
|
||||
extract_retention_claims,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _actual_max_age_seconds(cookie: dict) -> float | None:
|
||||
"""Get cookie Max-Age in seconds.
|
||||
|
||||
Playwright gives us `expires` as a Unix timestamp (seconds-since-
|
||||
epoch). Some sources give `max_age` directly. -1 / 0 means session
|
||||
cookie (no expiry) — return None to signal that.
|
||||
"""
|
||||
ma = cookie.get("max_age")
|
||||
if isinstance(ma, (int, float)) and ma > 0:
|
||||
return float(ma)
|
||||
exp = cookie.get("expires")
|
||||
if isinstance(exp, (int, float)) and exp > 0:
|
||||
delta = exp - time.time()
|
||||
if delta > 0:
|
||||
return float(delta)
|
||||
return None
|
||||
|
||||
|
||||
def run_b3(state: dict) -> None:
|
||||
"""Cross-doc retention check + render HTML. Mutates state in place."""
|
||||
doc_texts = state["doc_texts"]
|
||||
cmp_vendors = state["cmp_vendors"]
|
||||
banner_result = state["banner_result"]
|
||||
|
||||
dsi_text = doc_texts.get("dse") or doc_texts.get("cookie") or ""
|
||||
if not dsi_text:
|
||||
return
|
||||
|
||||
cookie_records: list[dict] = []
|
||||
cookie_names: list[str] = []
|
||||
vendor_names: list[str] = []
|
||||
for v in cmp_vendors or []:
|
||||
vname = (v.get("name") or "").strip()
|
||||
if vname:
|
||||
vendor_names.append(vname)
|
||||
for c in (v.get("cookies") or []):
|
||||
cname = (c.get("name") or "").strip()
|
||||
if not cname:
|
||||
continue
|
||||
duration = (c.get("duration") or c.get("persistence")
|
||||
or c.get("expiry") or "")
|
||||
cookie_names.append(cname)
|
||||
cookie_records.append({
|
||||
"name": cname,
|
||||
"vendor": vname,
|
||||
"table_duration": duration,
|
||||
"actual_max_age": None,
|
||||
})
|
||||
|
||||
if not cookie_records:
|
||||
return
|
||||
|
||||
# Match actual max_age from banner_result.cookies_detailed
|
||||
if banner_result:
|
||||
cookies_detailed = banner_result.get("cookies_detailed") or []
|
||||
by_name: dict[str, dict] = {}
|
||||
for c in cookies_detailed:
|
||||
n = (c.get("name") or "").lower()
|
||||
if n:
|
||||
by_name[n] = c
|
||||
for rec in cookie_records:
|
||||
nm = rec["name"].lower()
|
||||
if nm in by_name:
|
||||
rec["actual_max_age"] = _actual_max_age_seconds(by_name[nm])
|
||||
|
||||
claims = extract_retention_claims(dsi_text, cookie_names, vendor_names)
|
||||
|
||||
findings: list[dict] = []
|
||||
for rec in cookie_records:
|
||||
finding = compare_retention(
|
||||
cookie_name=rec["name"],
|
||||
table_duration=rec["table_duration"],
|
||||
actual_max_age_seconds=rec["actual_max_age"],
|
||||
dsi_claims=claims,
|
||||
vendor_name=rec["vendor"] or None,
|
||||
)
|
||||
findings.append(finding)
|
||||
|
||||
summary = build_retention_theme_summary(findings)
|
||||
state["retention_findings"] = findings
|
||||
state["retention_theme_summary"] = summary
|
||||
state["retention_html"] = _render_block(summary, findings)
|
||||
logger.info(
|
||||
"B3 Retention: %d findings, %d passed, %d failed, %d incomplete",
|
||||
summary["total"], summary["passed"], summary["failed"],
|
||||
summary["incomplete"],
|
||||
)
|
||||
|
||||
|
||||
def _fmt_days(d: float | None) -> str:
|
||||
if d is None:
|
||||
return "—"
|
||||
if d < 1:
|
||||
return f"{int(d * 24)}h"
|
||||
if d < 30:
|
||||
return f"{int(d)}d"
|
||||
if d < 365:
|
||||
return f"{int(d / 30)}mo"
|
||||
return f"{d / 365:.1f}y"
|
||||
|
||||
|
||||
def _render_block(summary: dict, findings: list[dict]) -> str:
|
||||
if summary["total"] == 0:
|
||||
return ""
|
||||
failed_findings = [f for f in findings if not f.get("matches")
|
||||
and f.get("severity_reason") != "incomplete"]
|
||||
if not failed_findings:
|
||||
return "" # all OK, no block needed
|
||||
# Sort by severity (HIGH first) then diff_days desc
|
||||
sev_rank = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
|
||||
failed_findings.sort(key=lambda f: (
|
||||
sev_rank.get((f.get("severity") or "").upper(), 9),
|
||||
-(f.get("diff_days") or 0),
|
||||
))
|
||||
rows = []
|
||||
for f in failed_findings[:10]:
|
||||
sev = (f.get("severity") or "").upper()
|
||||
color = ("#dc2626" if sev == "HIGH"
|
||||
else "#f59e0b" if sev == "MEDIUM" else "#64748b")
|
||||
rows.append(
|
||||
"<tr>"
|
||||
f"<td style='padding:6px 10px;border-bottom:1px solid #e5e7eb;'>"
|
||||
f"<code>{html.escape(f.get('cookie_name') or '—')}</code></td>"
|
||||
f"<td style='padding:6px 10px;border-bottom:1px solid #e5e7eb;'>"
|
||||
f"{html.escape((f.get('vendor_name') or '—'))}</td>"
|
||||
f"<td style='padding:6px 10px;border-bottom:1px solid #e5e7eb;'>"
|
||||
f"DSI: {_fmt_days(f.get('dsi_days'))} • "
|
||||
f"Tabelle: {_fmt_days(f.get('table_days'))} • "
|
||||
f"Realität: {_fmt_days(f.get('actual_days'))}</td>"
|
||||
f"<td style='padding:6px 10px;border-bottom:1px solid #e5e7eb;"
|
||||
f"color:{color};font-weight:600;'>"
|
||||
f"{sev} ({html.escape(f.get('mismatch_type') or '—')})</td>"
|
||||
"</tr>"
|
||||
)
|
||||
total = summary["total"]
|
||||
passed = summary["passed"]
|
||||
failed = summary["failed"]
|
||||
incomplete = summary["incomplete"]
|
||||
return (
|
||||
"<div style='margin:24px 0;padding:16px;border-left:4px solid #dc2626;"
|
||||
"background:#fefce8;border-radius:4px;'>"
|
||||
"<h2 style='margin:0 0 8px;color:#854d0e;font-size:16px;'>"
|
||||
"TH-RETENTION — Speicherdauer-Konsistenz (DSI ↔ Cookie-Tabelle ↔ Realität)"
|
||||
"</h2>"
|
||||
"<p style='margin:0 0 8px;font-size:14px;color:#3f3f46;'>"
|
||||
f"<strong>{total}</strong> Cookies verglichen: "
|
||||
f"<strong style='color:#15803d;'>{passed} ✓</strong> / "
|
||||
f"<strong style='color:#dc2626;'>{failed} ✗</strong> / "
|
||||
f"<strong style='color:#64748b;'>{incomplete} ?</strong></p>"
|
||||
"<table style='width:100%;border-collapse:collapse;font-size:13px;"
|
||||
"margin-top:8px;background:#fff;'>"
|
||||
"<thead><tr style='background:#f1f5f9;'>"
|
||||
"<th style='text-align:left;padding:6px 10px;'>Cookie</th>"
|
||||
"<th style='text-align:left;padding:6px 10px;'>Vendor</th>"
|
||||
"<th style='text-align:left;padding:6px 10px;'>Werte</th>"
|
||||
"<th style='text-align:left;padding:6px 10px;'>Mismatch</th>"
|
||||
"</tr></thead>"
|
||||
f"<tbody>{''.join(rows)}</tbody>"
|
||||
"</table>"
|
||||
"</div>"
|
||||
)
|
||||
Reference in New Issue
Block a user