feat(agent): SSE — progressive Themen-Tabs (Phase 2)

Der Compliance-Check streamt jetzt progressive Events; der Impressum-Tab
erscheint, sobald das Thema fertig ist, statt am Ende alles auf einmal.
Additiv — das Polling fürs finale Ergebnis bleibt.

- backend: _sse.py (Queue/emit/event_generator) + Endpoint
  /compliance-check/{id}/stream; _update emittiert progress,
  run_agent_outputs emittiert topic (laeuft jetzt frueh nach Phase B),
  Orchestrator emittiert complete/error.
- frontend: SSE-Proxy-Route + EventSource in ComplianceCheckTab merged
  topic-Events in agent_outputs -> Tab erscheint progressiv.
- Tests: backend 5 passed (SSE + agent_outputs); tsc 0 neue Fehler,
  vitest 2 passed, check-loc 0.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-10 19:07:26 +02:00
parent e21984e0ad
commit 65de90114a
8 changed files with 246 additions and 5 deletions
@@ -17,6 +17,8 @@ import logging
from compliance.services.specialist_agents import REGISTRY, AgentInput
from ._sse import emit
logger = logging.getLogger(__name__)
# topic key (matches state["doc_texts"]) -> registered agent_id
@@ -43,7 +45,9 @@ def _derive_scope(profile_dict: dict) -> list[str]:
async def run_agent_outputs(state: dict) -> None:
"""Für jedes Topic mit registriertem v3-Agent + ausreichend Text:
Agent laufen lassen und den strukturierten AgentOutput ablegen."""
Agent laufen lassen, AgentOutput ablegen + als SSE topic-Event
emittieren (Tab füllt sich progressiv)."""
check_id = state.get("check_id", "")
doc_texts = state.get("doc_texts") or {}
profile_dict = state.get("profile_dict") or {}
req = state.get("req")
@@ -75,6 +79,8 @@ async def run_agent_outputs(state: dict) -> None:
origin_domain=origin_domain,
))
outputs[topic] = out.model_dump(mode="json")
emit(check_id, {"type": "topic", "topic": topic,
"output": outputs[topic]})
logger.info(
"agent_outputs[%s]: %d findings, confidence %.2f",
topic, len(out.findings), out.confidence,
@@ -16,6 +16,7 @@ from ._constants import (
_DOC_TYPE_LABELS,
_compliance_check_jobs,
)
from ._sse import emit
logger = logging.getLogger(__name__)
@@ -26,6 +27,8 @@ def _update(check_id: str, msg: str, pct: int | None = None) -> None:
job["progress"] = msg
if pct is not None:
job["progress_pct"] = max(0, min(100, int(pct)))
emit(check_id, {"type": "progress", "msg": msg,
"pct": job.get("progress_pct", 0)})
def _doc_type_label(doc_type: str) -> str:
@@ -34,6 +34,7 @@ from ._b19_wiring import run_b19
from ._b20_wiring import run_b20
from ._b22_wiring import run_b22
from ._constants import _compliance_check_jobs
from ._sse import emit
from ._phase_a_resolve import run_phase_a
from ._phase_b_profile_check import run_phase_b
from ._phase_c_banner import run_phase_c
@@ -71,6 +72,10 @@ async def run_compliance_check(check_id: str, req) -> None:
logger.warning("chatbot-policy enrichment skipped: %s", e)
# Phase B: Step 2 (profile detect) + Step 3 (per-doc checks)
await run_phase_b(state)
# Strukturierter v3-AgentOutput pro Thema — früh (Impressum-Text +
# Profil liegen vor) → SSE topic-Event, Tab erscheint progressiv,
# während Banner/Vendor/B-Wirings noch laufen. Additiv zu B18.
await run_agent_outputs(state)
# Phase C: Step 3b-d (banner + cross-check + TCF) + Step 4
await run_phase_c(state)
# Phase C-2: optional browser-matrix scan (env BROWSER_MATRIX=true)
@@ -96,9 +101,6 @@ async def run_compliance_check(check_id: str, req) -> None:
run_b16(state) # Footer-Label-vs-URL-Slug-Drift
await run_b17(state) # Audit-Walk-Video (Beweis-Aufzeichnung)
await run_b18(state) # Impressum-Specialist-Agent (Pattern+LLM)
# Strukturierter v3-AgentOutput pro Thema → standardisierte
# Ergebnis-Tabs im Frontend (additiv zu B18-HTML).
await run_agent_outputs(state)
run_b19(state) # Cookie-Coherence (Salesforce-as-essential)
await run_b20(state) # Legacy-URL-Discovery (Sitemap+Wayback)
run_b22(state) # Cross-Domain-Legal-Doc-Hosting (Elli/LogPay)
@@ -110,8 +112,10 @@ async def run_compliance_check(check_id: str, req) -> None:
run_phase_e(state)
# Phase F: Step 7 persist + audit log + unified findings
run_phase_f(state)
emit(check_id, {"type": "complete", "status": "completed"})
except Exception as e:
logger.error("Compliance check %s failed: %s",
check_id, e, exc_info=True)
_compliance_check_jobs[check_id]["status"] = "failed"
_compliance_check_jobs[check_id]["error"] = str(e)[:500]
emit(check_id, {"type": "error", "error": str(e)[:300]})
@@ -0,0 +1,82 @@
"""SSE-Plumbing für den Compliance-Check — pro check_id eine Event-Queue
+ Generator. Spiegelt das Agent-Test-SSE (specialist_agent_routes).
ADDITIV: Das Polling auf GET /compliance-check/{check_id} bleibt die
Wahrheit fürs finale Ergebnis. SSE liefert nur **progressive** Events,
damit sich die Themen-Tabs füllen, sobald ein Thema fertig ist:
- {type:"progress", msg, pct} (aus _update)
- {type:"topic", topic, output} (aus run_agent_outputs, pro Thema)
- {type:"complete", status} (Orchestrator-Ende)
Geht ein Event verloren (Queue voll / kein Client) ist das unkritisch —
der Tab kommt spätestens mit dem finalen Poll.
"""
from __future__ import annotations
import asyncio
import json
import logging
from collections.abc import AsyncGenerator
from ._constants import _compliance_check_jobs
logger = logging.getLogger(__name__)
# In-memory Event-Queues pro check_id. Restart-fragil, aber für einen
# Live-Stream ausreichend (Polling ist der persistente Pfad).
_check_queues: dict[str, "asyncio.Queue[dict]"] = {}
_TERMINAL_JOB_STATES = ("completed", "failed", "skipped_tdm")
def new_queue(check_id: str) -> None:
"""Legt die Event-Queue für einen Check an (in POST /compliance-check)."""
_check_queues[check_id] = asyncio.Queue(maxsize=500)
def emit(check_id: str, event: dict) -> None:
"""Non-blocking best-effort push. Synchron, damit auch das synchrone
_update() emittieren kann."""
q = _check_queues.get(check_id)
if q is None:
return
try:
q.put_nowait(event)
except asyncio.QueueFull:
pass # Client zu langsam — Poll holt den Stand nach
def _format_sse(payload: dict) -> str:
return f"data: {json.dumps(payload, default=str)}\n\n"
async def event_generator(check_id: str) -> AsyncGenerator[str, None]:
"""Draint die Queue bis der Check terminal ist. Heartbeat alle 25s."""
q = _check_queues.get(check_id)
if q is None:
# Check evtl. schon fertig (Queue aufgeräumt) → Client soll pollen.
yield _format_sse({"type": "stream_close", "reason": "no_queue"})
return
yield _format_sse({"type": "hello", "check_id": check_id})
try:
while True:
try:
event = await asyncio.wait_for(q.get(), timeout=25.0)
except asyncio.TimeoutError:
yield _format_sse({"type": "heartbeat"})
job = _compliance_check_jobs.get(check_id) or {}
if job.get("status") in _TERMINAL_JOB_STATES:
yield _format_sse({"type": "complete",
"status": job.get("status")})
yield _format_sse({"type": "stream_close"})
return
continue
yield _format_sse(event)
if event.get("type") in ("complete", "error"):
yield _format_sse({"type": "stream_close"})
return
finally:
# Queue erst nach 5 Min freigeben (späte Reconnects).
asyncio.get_event_loop().call_later(
300, lambda: _check_queues.pop(check_id, None),
)