feat: Browser-Matrix Stufe 1.a + 2 weitere GT-Findings + Plausibility-LLM-Härtung
Stage 1.a Browser-Matrix (Task #15) — Multi-Engine Scaffolding: - consent-tester/Dockerfile: firefox + webkit + Xvfb deps - playwright install chromium firefox webkit - services/browser_profiles.py: Registry mit DEFAULT_PROFILES (Chromium-Headed/Firefox-Headed/WebKit-Headed/Mobile-Safari) + EXTRA_PROFILES (Chrome-Channel, Edge, Brave) - services/multi_browser_scanner.py: run_matrix() orchestriert N parallele Scans + worst-of-Aggregation + 3 Sub-Scores (Pre-Consent 50%, Reject-Respekt 30%, Banner-Design 20%) + Hard-Fail-Cap auf <60% bei Pre-Consent/Reject-Verstoß - routes_matrix.py: POST /scan-matrix Endpoint (eigenes Modul, damit main.py unter 500 LOC bleibt) KNOWN: Stage 1.a-Shim ruft alle Profile auf demselben Chromium, echte Engine-Diversität in Stage 1.b (consent_scanner.py Param) Coverage-Gap 3 (Task #17): 2/3 verbleibende GT-Lücken geschlossen: - B9 impressum_multi_entity_check (IMPRESSUM-001): erkennt USt-IdNr/HR/GF-Fehlen pro Entity bei multi-entity Impressen (Elli: USt-IdNr nur bei Elli Mobility, fehlt bei VW Group Charging) - B10 transfer_mechanism_check (TRANSFER-001): pro Non-EU-Vendor in cmp_vendors prüft DSE auf DPF/SCCs/BCRs/Einwilligung im ±400-char-Window. Findet Vendors ohne benannten Mechanismus. - TH-RETENTION-002 (AI-Datenkategorie-Differenzierung) bleibt semantisch-tief, vorgesehen für Specialist-Agents Task #18. Plausibility-LLM Empty-Response-Härtung (Task #16): - BATCH_SIZE 8 → 4, EXCERPT 4000 → 1500 chars, TIMEOUT 60 → 45s - Single-retry mit halbierter Batch wenn LLM empty content zurückgibt — qwen3:30b-a3b rejektiert manchmal ≥6-Item-Prompts unter format='json'. Falls auch Half-Batch empty: log + skip. - Pipeline läuft jetzt nicht mehr 10min in Timeouts. GT-Coverage Sprung: 10/13 → 11/13 (85%). 4/4 HIGH ✓, 5/6 MEDIUM ✓, 2/3 LOW ✓. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
"""B9 + B10 wiring — Multi-Entity-Impressum + Drittland-Mechanismus.
|
||||
|
||||
Runs after B6/B7/B8. Adds Findings into `state["extra_findings"]`
|
||||
and re-renders the extra-block HTML.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import logging
|
||||
|
||||
from compliance.services.impressum_multi_entity_check import (
|
||||
check_multi_entity_impressum,
|
||||
)
|
||||
from compliance.services.transfer_mechanism_check import (
|
||||
check_transfer_mechanism,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_b9b10(state: dict) -> None:
|
||||
extras = state.get("extra_findings") or []
|
||||
new: list[dict] = []
|
||||
new.extend(check_multi_entity_impressum(state))
|
||||
new.extend(check_transfer_mechanism(state))
|
||||
if not new:
|
||||
return
|
||||
extras.extend(new)
|
||||
state["extra_findings"] = extras
|
||||
state["extra_findings_html"] = _render(extras)
|
||||
logger.info("B9/B10 added %d findings (total extra=%d)",
|
||||
len(new), len(extras))
|
||||
|
||||
|
||||
def _render(findings: list[dict]) -> str:
|
||||
cards = []
|
||||
for f in findings:
|
||||
sev = (f.get("severity") or "").upper()
|
||||
color = "#dc2626" if sev == "HIGH" else (
|
||||
"#f59e0b" if sev == "MEDIUM" else "#64748b"
|
||||
)
|
||||
meta = ""
|
||||
if f.get("entities_missing"):
|
||||
meta = ("<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>Fehlt bei: "
|
||||
f"{html.escape(', '.join(f['entities_missing']))}</em>"
|
||||
"</div>")
|
||||
elif f.get("vendor"):
|
||||
meta = ("<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>Vendor: {html.escape(f['vendor'])} "
|
||||
f"({html.escape(f.get('country','?'))})</em>"
|
||||
"</div>")
|
||||
elif f.get("doc_date"):
|
||||
meta = ("<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>Stand: {html.escape(f['doc_date'])} "
|
||||
f"({f.get('age_years','?')} J. alt)</em>"
|
||||
"</div>")
|
||||
elif f.get("detected_provider"):
|
||||
meta = ("<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>Erkannter Provider: "
|
||||
f"{html.escape(f['detected_provider'])}</em>"
|
||||
"</div>")
|
||||
elif f.get("evidence_dse"):
|
||||
meta = ("<div style='font-size:12px;color:#475569;margin-top:6px;'>"
|
||||
f"<em>In DSE: {html.escape(', '.join(f['evidence_dse']))}</em>"
|
||||
"</div>")
|
||||
cards.append(
|
||||
f"<div style='margin:12px 0;padding:14px;background:#fff;"
|
||||
f"border-left:3px solid {color};border-radius:4px;'>"
|
||||
f"<div style='font-weight:600;color:{color};font-size:14px;'>"
|
||||
f"{sev} · {html.escape(f.get('check_id') or '')}</div>"
|
||||
f"<div style='font-size:14px;margin-top:4px;'>"
|
||||
f"<strong>{html.escape(f.get('title') or '')}</strong></div>"
|
||||
f"<div style='font-size:12px;color:#64748b;margin-top:2px;'>"
|
||||
f"{html.escape(f.get('norm') or '')}</div>"
|
||||
f"{meta}"
|
||||
f"<div style='font-size:13px;margin-top:8px;background:#dcfce7;"
|
||||
f"padding:8px 10px;border-radius:4px;'>"
|
||||
f"<strong>→ Empfehlung:</strong> "
|
||||
f"{html.escape(f.get('action') or '')}</div>"
|
||||
"</div>"
|
||||
)
|
||||
return (
|
||||
"<div style='margin:24px 0;padding:16px;border-left:4px solid #f59e0b;"
|
||||
"background:#fffbeb;border-radius:4px;'>"
|
||||
"<h2 style='margin:0 0 8px;color:#92400e;font-size:16px;'>"
|
||||
"📌 Zusätzliche Cross-Doc-Befunde"
|
||||
"</h2>"
|
||||
+ "".join(cards) +
|
||||
"</div>"
|
||||
)
|
||||
@@ -21,6 +21,7 @@ from ._b3_wiring import run_b3
|
||||
from ._b4_wiring import run_b4
|
||||
from ._b5_wiring import run_b5
|
||||
from ._b6b7b8_wiring import run_b6b7b8
|
||||
from ._b9b10_wiring import run_b9b10
|
||||
from ._constants import _compliance_check_jobs
|
||||
from ._phase_a_resolve import run_phase_a
|
||||
from ._phase_b_profile_check import run_phase_b
|
||||
@@ -63,6 +64,7 @@ async def run_compliance_check(check_id: str, req) -> None:
|
||||
run_b4(state) # Cross-doc vendor-consistency (Elli Vertex↔Iadvize)
|
||||
run_b5(state) # AI-Act Art. 50 transparency
|
||||
run_b6b7b8(state) # DPO-cross-doc + Doc-Staleness + CMP-fingerprint
|
||||
run_b9b10(state) # Multi-Entity-Impressum + Drittland-Mechanismus
|
||||
# Phase D-3 top/mid/bot: Step 5 HTML blocks
|
||||
await run_phase_d3_top(state)
|
||||
await run_phase_d3_mid(state)
|
||||
|
||||
Reference in New Issue
Block a user