Files
breakpilot-compliance/consent-tester/services/finding_annotator.py
T
Benjamin Admin b16130369a feat(b17): Stufe 4 banner-tour + Stufe 5 annotierte Screenshots + V2-default
Stufe 4 — Cookie-Banner-Tour vor dem Accept-Klick:
  - audit_walk_banner_tour.tour_cookie_banner(): öffnet Settings
    (16 Phrase-Varianten), scrollt vertikal, aktiviert jedes
    [role=tab], expandet jedes [aria-expanded=false] / details /
    summary + 14 CMP-spezifische Selektoren. Max 35 Klicks,
    Best-Effort.
  - audit_walk_recorder ruft tour_cookie_banner() VOR
    _try_accept_banner auf — Reviewer sieht den vollen Consent-
    Katalog im Video (Vendor-Liste, Kategorien, Zwecke).
  - Recorder unter 500 LOC (412+155 split).

Stufe 5 — Annotierte Screenshots pro Finding:
  - finding_annotator.annotate_url(): WebKit headless, JS-Inject
    eines rot-banner-Labels oben + roter Outline um das Element
    (Selector oder Text-Match).
  - finding_annotator.annotate_findings(): dispatched 3 Cases —
    B1 Tap-Target (Anchor markiert mit "Tap-Target X×Y px"),
    B16 URL-Slug-Drift (404-Seite mit "/<slug> 404"),
    B13 Widerruf (Footer markiert "Widerruf-Link fehlt").
  - routes_audit_walk.POST /annotate-findings (consent-tester).
  - _b17_wiring ruft annotate-findings nach record_audit_walk und
    speichert annotations in walk.annotations.
  - audit_walk_zip_builder packt PNGs nach findings/<name>.png ins
    ZIP — Reviewer hat Beweis-Bilder im Postfach.

Plausibility Circuit-Breaker:
  - Nach 6 consecutive empty batches (PLAUSIBILITY_EMPTY_BUDGET=6)
    bricht die ganze Phase ab statt 200 Calls zu warten. Fix für
    qwen3-down + große DSE-Sites (BMW: ohne Breaker 21min, mit
    Breaker ~3min).

audit_walk_zip_builder fängt walk.annotations ab und legt sie unter
  findings/<fname>.png im ZIP-Anhang ab.

V2-Default:
  - docker-compose.yml backend-compliance.environment.MAIL_RENDER_V2:
    default 'true'. Ohne diesen Override liefert die Engine
    weiterhin das alte Legacy-Mail-Layout, in dem die B-Wiring-
    Blöcke nicht sichtbar sind.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-07 20:44:42 +02:00

184 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Finding-Annotator — pro Befund ein PNG mit Marker.
Erzeugt für die Audit-Mail einen visuellen Beweis pro Finding:
roter Rand um das verletzende Element + Label-Banner darüber.
Aktuell unterstützte Annotation-Typen:
- "tap_target_too_small" B1 Mobile-Reachability — markiert den
Footer-Anchor mit Größenanforderung
- "slug_404" B16 URL-Slug-Drift — Screenshot des
404-Slugs mit "Standard-Slug 404"
- "missing_link_in_footer" B13 Widerruf — Footer-Screenshot mit
"→ Widerruf-Link erwartet, fehlt"
Output: bytes (PNG). Best-Effort: Fehler beim Rendern werfen nicht,
sondern returnen leere bytes (caller dropt das Asset).
"""
from __future__ import annotations
import base64
import logging
from typing import Any
logger = logging.getLogger(__name__)
# Banner-Style für Label oben im Bild (rot mit weißer Schrift).
_LABEL_JS = """
(label) => {
const div = document.createElement('div');
div.style.cssText = `
position:fixed;top:0;left:0;right:0;z-index:2147483647;
background:#dc2626;color:#fff;padding:14px 24px;
font:bold 18px/1.2 -apple-system,sans-serif;
box-shadow:0 4px 12px rgba(0,0,0,0.4);
border-bottom:3px solid #7f1d1d;text-align:center;
`;
div.textContent = label;
document.body.appendChild(div);
}
"""
# JS zum Markieren eines Elements: roter Rand + 0.4-Schatten innen.
_MARK_JS = """
(args) => {
const sel = args.selector;
const text = args.text;
let el = null;
if (sel) {
try { el = document.querySelector(sel); } catch(e) {}
}
if (!el && text) {
const all = document.querySelectorAll('a, button, [role=button]');
for (const c of all) {
if ((c.innerText || '').trim().toLowerCase()
.includes(text.toLowerCase())) {
el = c; break;
}
}
}
if (!el) return false;
el.scrollIntoView({behavior:'instant', block:'center'});
el.style.outline = '4px solid #dc2626';
el.style.outlineOffset = '4px';
el.style.boxShadow = 'inset 0 0 0 9999px rgba(220,38,38,0.15)';
return true;
}
"""
async def annotate_url(
url: str,
label: str,
selector: str | None = None,
text_match: str | None = None,
viewport_w: int = 1280,
viewport_h: int = 800,
timeout_s: float = 20.0,
) -> bytes:
"""Open `url`, mark element by selector or text-match, save PNG."""
try:
from playwright.async_api import async_playwright
except Exception as e:
logger.warning("playwright unavailable: %s", e)
return b""
async with async_playwright() as p:
try:
browser = await p.webkit.launch(headless=True)
context = await browser.new_context(
viewport={"width": viewport_w, "height": viewport_h},
locale="de-DE",
)
page = await context.new_page()
try:
await page.goto(url, wait_until="domcontentloaded",
timeout=timeout_s * 1000)
except Exception as e:
logger.info("annotate_url goto failed for %s: %s",
url, e)
await page.wait_for_timeout(1500)
await page.evaluate(_LABEL_JS, label)
if selector or text_match:
try:
await page.evaluate(
_MARK_JS,
{"selector": selector or "",
"text": text_match or ""},
)
except Exception as e:
logger.info("annotate mark failed: %s", e)
await page.wait_for_timeout(500)
png = await page.screenshot(full_page=False, type="png")
await context.close()
await browser.close()
return png
except Exception as e:
logger.warning("annotate_url failed: %s", e)
return b""
async def annotate_findings(
findings: list[dict],
home_url: str,
) -> list[dict[str, Any]]:
"""Per finding produce a PNG. Returns list of {filename, png_b64}.
Supported finding shapes:
- {check_id: 'COOKIE-CONSENT-UX-001', mobile_playwright:
{anchor_text: '...'}}
- {check_id: 'URL-SLUG-DRIFT-001', alt_slugs_404: [...],
doc_type: '...', source_url: '...'} (Stufe-1: nimmt source_url
aus state, hier optional)
- {check_id: 'WIDERRUF-REACH-001'} → Footer-Screenshot der
Homepage mit "→ Widerruf-Link fehlt"
Skip silently if a finding doesn't match a supported shape.
"""
out: list[dict[str, Any]] = []
for f in findings:
cid = (f.get("check_id") or "").upper()
png = b""
fname = ""
if cid == "COOKIE-CONSENT-UX-001":
mob = f.get("mobile_playwright") or {}
anchor_text = mob.get("anchor_text") or ""
if anchor_text:
tp = mob.get("tap_target_px") or {}
label = (
f"B1 — Tap-Target {tp.get('w','?')}×{tp.get('h','?')} "
"px (Anforderung ≥ 44×44)"
)
png = await annotate_url(
home_url, label, text_match=anchor_text,
)
fname = f"finding-tap-target.png"
elif cid == "URL-SLUG-DRIFT-001":
alts = f.get("alt_slugs_404") or []
if alts:
# Pick first 404-slug + screenshot the 404 page
doc_type = f.get("doc_type") or "x"
from urllib.parse import urlparse
p = urlparse(home_url)
origin = f"{p.scheme}://{p.netloc}"
slug_url = f"{origin}/{alts[0]}"
label = (
f"B16 — Standard-Slug 404 ({doc_type}): /{alts[0]}"
)
png = await annotate_url(slug_url, label)
fname = f"finding-slug-404-{doc_type}.png"
elif cid == "WIDERRUF-REACH-001":
label = "B13 — Widerrufsbelehrung im Footer erwartet, fehlt"
png = await annotate_url(
home_url, label, selector="footer",
)
fname = "finding-widerruf-missing.png"
if png and fname:
out.append({
"filename": fname,
"png_b64": base64.b64encode(png).decode("ascii"),
})
return out