c2c8783fee
Phase-5 split of agent_compliance_check_routes.py — the 2700-line
monolith was decomposed into 19 modules in compliance/api/agent_check/:
- Phase A-F: resolve / profile+check / banner+TCF / vendors raw+finalize /
HTML blocks top+mid+bot / email / persist
- Helpers: _constants, _helpers, _fetch, _discovery, _single_check
- Schemas + State + thin _orchestrator
A1 ZIP-Anhang nativ in _phase_e_email: evidence_zip_builder.py bundles
slices + manifest.json + audit_metadata.json (SHA256 per slice +
build_sha + source_url). smtp_sender.py erweitert um attachments-Parameter.
B1 COOKIE-CONSENT-UX-001 (Mobile Reachability): consent_reachability_check.py
parses footer anchors, classifies intent (reopen_cmp / info_only /
browser_deflect) + target (same_page_cmp / new_tab / external).
_b1_wiring.py fetches homepage with iPhone-UA + renders Art-7-Abs-3
severity-coloured block.
B3 TH-RETENTION (Cross-Doc Speicherdauer): retention_comparator.py
compares DSI claim ↔ cookie-table duration ↔ actual Max-Age/expires
with 5% tolerance + severity hierarchy (dsi_under_actual HIGH,
table_under_actual HIGH, dsi_vs_table MEDIUM, actual_under_table LOW
Safari-ITP-Hint). _b3_wiring.py + Top-10 mismatches table in mail.
Side-effects:
- Fixed silent UnboundLocalError in original Step 5 (gf_one_pager used
audit_quality_findings before declaration, caught by surrounding
except → block never rendered). New _phase_d3_blocks_bot.py runs
audit-quality FIRST.
- agent_compliance_check_routes.py removed from loc-exceptions.txt
("Phase 5 split target" — done).
Tests: 55/55 grün (B1 22 + B3 27 + saving_scan 6).
E2E: smoke against Elli DSE+Cookie produced HIGH/missing B1 finding,
TH-RETENTION table (17 cookies / 3 ✓ / 3 ✗ / 11 ?), evidence-zip
with 2 slices + manifest + audit_metadata (12089B, SHA256-chained,
source verified), email sent (attachments=1).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
106 lines
3.8 KiB
Python
106 lines
3.8 KiB
Python
"""B1 wiring — Mobile Consent-Reachability check + HTML block.
|
|
|
|
Fetches the homepage of the first submitted URL, runs the static
|
|
`evaluate_reachability` analysis on the footer, and renders the
|
|
result as an HTML block for the audit mail.
|
|
|
|
Only renders a block when the check FAILS — a passing site doesn't
|
|
need a block. The block is severity-colored and lists the specific
|
|
notes that triggered the finding (missing reopen anchor, new-tab
|
|
break, browser-deflection language).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import html
|
|
import logging
|
|
|
|
import httpx
|
|
|
|
from compliance.services.consent_reachability_check import (
|
|
evaluate_reachability,
|
|
)
|
|
|
|
from ._helpers import _update
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def run_b1(state: dict) -> None:
|
|
"""Run the reachability check + render HTML. Mutates state in place."""
|
|
req = state["req"]
|
|
check_id = state["check_id"]
|
|
homepage_url = ""
|
|
for d in req.documents:
|
|
if d.url:
|
|
from urllib.parse import urlparse
|
|
p = urlparse(d.url)
|
|
if p.scheme and p.netloc:
|
|
homepage_url = f"{p.scheme}://{p.netloc}/"
|
|
break
|
|
if not homepage_url:
|
|
return
|
|
|
|
_update(check_id, "Mobile Consent-Reachability prüfen...", 95)
|
|
try:
|
|
async with httpx.AsyncClient(
|
|
timeout=20.0, follow_redirects=True,
|
|
headers={"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 "
|
|
"like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) "
|
|
"Version/17.5 Mobile/15E148 Safari/604.1"},
|
|
) as c:
|
|
r = await c.get(homepage_url)
|
|
if r.status_code != 200:
|
|
logger.info("B1: homepage fetch %s → HTTP %d", homepage_url, r.status_code)
|
|
return
|
|
page_html = r.text
|
|
except Exception as e:
|
|
logger.warning("B1: homepage fetch failed: %s", e)
|
|
return
|
|
|
|
finding = evaluate_reachability(page_html, homepage_url)
|
|
state["reachability_finding"] = finding
|
|
state["reachability_html"] = _render_block(finding)
|
|
logger.info(
|
|
"B1 Reachability: passed=%s severity=%s reason=%s",
|
|
finding["passed"], finding.get("severity"),
|
|
finding.get("severity_reason"),
|
|
)
|
|
|
|
|
|
def _render_block(finding: dict) -> str:
|
|
"""Render the reachability finding as an audit-mail HTML block."""
|
|
if finding["passed"]:
|
|
return ""
|
|
sev = (finding.get("severity") or "").upper()
|
|
color = "#dc2626" if sev == "HIGH" else "#f59e0b"
|
|
notes_html = "".join(
|
|
f"<li>{html.escape(n)}</li>" for n in finding.get("notes") or []
|
|
)
|
|
anchor = finding.get("reopen_anchor") or {}
|
|
anchor_html = ""
|
|
if anchor:
|
|
anchor_html = (
|
|
"<p style='margin:8px 0 0;font-size:13px;color:#475569;'>"
|
|
"Gefundener Footer-Link: "
|
|
f"<code>{html.escape((anchor.get('text') or '')[:80])}</code> "
|
|
f"→ <code>{html.escape((anchor.get('href') or '')[:120])}</code> "
|
|
f"(target_class: {html.escape(anchor.get('target_class') or '—')})"
|
|
"</p>"
|
|
)
|
|
return (
|
|
f"<div style='margin:24px 0;padding:16px;border-left:4px solid {color};"
|
|
"background:#fef2f2;border-radius:4px;'>"
|
|
f"<h2 style='margin:0 0 8px;color:{color};font-size:16px;'>"
|
|
"COOKIE-CONSENT-UX-001 — Mobile Consent-Reachability</h2>"
|
|
f"<p style='margin:0 0 8px;font-size:14px;'><strong>Severity:</strong> "
|
|
f"{sev} ({html.escape(finding.get('severity_reason') or '')})</p>"
|
|
"<p style='margin:0 0 4px;font-size:14px;'>"
|
|
"Art. 7 Abs. 3 DSGVO: Widerruf muss so einfach wie Erteilung sein. "
|
|
"Auf Mobile-Safari konnten wir folgendes Problem feststellen:</p>"
|
|
f"<ul style='margin:8px 0 0 20px;font-size:14px;color:#7f1d1d;'>"
|
|
f"{notes_html}</ul>"
|
|
f"{anchor_html}"
|
|
"</div>"
|
|
)
|