feat(agent): SSE — progressive Themen-Tabs (Phase 2)

Der Compliance-Check streamt jetzt progressive Events; der Impressum-Tab erscheint, sobald das Thema fertig ist, statt am Ende alles auf einmal. Additiv — das Polling fürs finale Ergebnis bleibt. - backend: _sse.py (Queue/emit/event_generator) + Endpoint /compliance-check/{id}/stream; _update emittiert progress, run_agent_outputs emittiert topic (laeuft jetzt frueh nach Phase B), Orchestrator emittiert complete/error. - frontend: SSE-Proxy-Route + EventSource in ComplianceCheckTab merged topic-Events in agent_outputs -> Tab erscheint progressiv. - Tests: backend 5 passed (SSE + agent_outputs); tsc 0 neue Fehler, vitest 2 passed, check-loc 0. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-10 19:07:26 +02:00
parent e21984e0ad
commit 65de90114a
8 changed files with 246 additions and 5 deletions
@@ -17,6 +17,8 @@ import logging

 from compliance.services.specialist_agents import REGISTRY, AgentInput

+from ._sse import emit
+
 logger = logging.getLogger(__name__)

 # topic key (matches state["doc_texts"]) -> registered agent_id
@@ -43,7 +45,9 @@ def _derive_scope(profile_dict: dict) -> list[str]:

 async def run_agent_outputs(state: dict) -> None:
    """Für jedes Topic mit registriertem v3-Agent + ausreichend Text:
-    Agent laufen lassen und den strukturierten AgentOutput ablegen."""
+    Agent laufen lassen, AgentOutput ablegen + als SSE topic-Event
+    emittieren (Tab füllt sich progressiv)."""
+    check_id = state.get("check_id", "")
    doc_texts = state.get("doc_texts") or {}
    profile_dict = state.get("profile_dict") or {}
    req = state.get("req")
@@ -75,6 +79,8 @@ async def run_agent_outputs(state: dict) -> None:
                origin_domain=origin_domain,
            ))
            outputs[topic] = out.model_dump(mode="json")
+            emit(check_id, {"type": "topic", "topic": topic,
+                            "output": outputs[topic]})
            logger.info(
                "agent_outputs[%s]: %d findings, confidence %.2f",
                topic, len(out.findings), out.confidence,
@@ -16,6 +16,7 @@ from ._constants import (
    _DOC_TYPE_LABELS,
    _compliance_check_jobs,
 )
+from ._sse import emit

 logger = logging.getLogger(__name__)

@@ -26,6 +27,8 @@ def _update(check_id: str, msg: str, pct: int | None = None) -> None:
    job["progress"] = msg
    if pct is not None:
        job["progress_pct"] = max(0, min(100, int(pct)))
+    emit(check_id, {"type": "progress", "msg": msg,
+                    "pct": job.get("progress_pct", 0)})


 def _doc_type_label(doc_type: str) -> str:
@@ -34,6 +34,7 @@ from ._b19_wiring import run_b19
 from ._b20_wiring import run_b20
 from ._b22_wiring import run_b22
 from ._constants import _compliance_check_jobs
+from ._sse import emit
 from ._phase_a_resolve import run_phase_a
 from ._phase_b_profile_check import run_phase_b
 from ._phase_c_banner import run_phase_c
@@ -71,6 +72,10 @@ async def run_compliance_check(check_id: str, req) -> None:
            logger.warning("chatbot-policy enrichment skipped: %s", e)
        # Phase B: Step 2 (profile detect) + Step 3 (per-doc checks)
        await run_phase_b(state)
+        # Strukturierter v3-AgentOutput pro Thema — früh (Impressum-Text +
+        # Profil liegen vor) → SSE topic-Event, Tab erscheint progressiv,
+        # während Banner/Vendor/B-Wirings noch laufen. Additiv zu B18.
+        await run_agent_outputs(state)
        # Phase C: Step 3b-d (banner + cross-check + TCF) + Step 4
        await run_phase_c(state)
        # Phase C-2: optional browser-matrix scan (env BROWSER_MATRIX=true)
@@ -96,9 +101,6 @@ async def run_compliance_check(check_id: str, req) -> None:
        run_b16(state)  # Footer-Label-vs-URL-Slug-Drift
        await run_b17(state)  # Audit-Walk-Video (Beweis-Aufzeichnung)
        await run_b18(state)  # Impressum-Specialist-Agent (Pattern+LLM)
-        # Strukturierter v3-AgentOutput pro Thema → standardisierte
-        # Ergebnis-Tabs im Frontend (additiv zu B18-HTML).
-        await run_agent_outputs(state)
        run_b19(state)  # Cookie-Coherence (Salesforce-as-essential)
        await run_b20(state)  # Legacy-URL-Discovery (Sitemap+Wayback)
        run_b22(state)  # Cross-Domain-Legal-Doc-Hosting (Elli/LogPay)
@@ -110,8 +112,10 @@ async def run_compliance_check(check_id: str, req) -> None:
        run_phase_e(state)
        # Phase F: Step 7 persist + audit log + unified findings
        run_phase_f(state)
+        emit(check_id, {"type": "complete", "status": "completed"})
    except Exception as e:
        logger.error("Compliance check %s failed: %s",
                     check_id, e, exc_info=True)
        _compliance_check_jobs[check_id]["status"] = "failed"
        _compliance_check_jobs[check_id]["error"] = str(e)[:500]
+        emit(check_id, {"type": "error", "error": str(e)[:300]})
@@ -0,0 +1,82 @@
+"""SSE-Plumbing für den Compliance-Check — pro check_id eine Event-Queue
+ Generator. Spiegelt das Agent-Test-SSE (specialist_agent_routes).
+
+ADDITIV: Das Polling auf GET /compliance-check/{check_id} bleibt die
+Wahrheit fürs finale Ergebnis. SSE liefert nur **progressive** Events,
+damit sich die Themen-Tabs füllen, sobald ein Thema fertig ist:
+  - {type:"progress", msg, pct}   (aus _update)
+  - {type:"topic", topic, output} (aus run_agent_outputs, pro Thema)
+  - {type:"complete", status}     (Orchestrator-Ende)
+Geht ein Event verloren (Queue voll / kein Client) ist das unkritisch —
+der Tab kommt spätestens mit dem finalen Poll.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+from collections.abc import AsyncGenerator
+
+from ._constants import _compliance_check_jobs
+
+logger = logging.getLogger(__name__)
+
+# In-memory Event-Queues pro check_id. Restart-fragil, aber für einen
+# Live-Stream ausreichend (Polling ist der persistente Pfad).
+_check_queues: dict[str, "asyncio.Queue[dict]"] = {}
+
+_TERMINAL_JOB_STATES = ("completed", "failed", "skipped_tdm")
+
+
+def new_queue(check_id: str) -> None:
+    """Legt die Event-Queue für einen Check an (in POST /compliance-check)."""
+    _check_queues[check_id] = asyncio.Queue(maxsize=500)
+
+
+def emit(check_id: str, event: dict) -> None:
+    """Non-blocking best-effort push. Synchron, damit auch das synchrone
+    _update() emittieren kann."""
+    q = _check_queues.get(check_id)
+    if q is None:
+        return
+    try:
+        q.put_nowait(event)
+    except asyncio.QueueFull:
+        pass  # Client zu langsam — Poll holt den Stand nach
+
+
+def _format_sse(payload: dict) -> str:
+    return f"data: {json.dumps(payload, default=str)}\n\n"
+
+
+async def event_generator(check_id: str) -> AsyncGenerator[str, None]:
+    """Draint die Queue bis der Check terminal ist. Heartbeat alle 25s."""
+    q = _check_queues.get(check_id)
+    if q is None:
+        # Check evtl. schon fertig (Queue aufgeräumt) → Client soll pollen.
+        yield _format_sse({"type": "stream_close", "reason": "no_queue"})
+        return
+    yield _format_sse({"type": "hello", "check_id": check_id})
+    try:
+        while True:
+            try:
+                event = await asyncio.wait_for(q.get(), timeout=25.0)
+            except asyncio.TimeoutError:
+                yield _format_sse({"type": "heartbeat"})
+                job = _compliance_check_jobs.get(check_id) or {}
+                if job.get("status") in _TERMINAL_JOB_STATES:
+                    yield _format_sse({"type": "complete",
+                                       "status": job.get("status")})
+                    yield _format_sse({"type": "stream_close"})
+                    return
+                continue
+            yield _format_sse(event)
+            if event.get("type") in ("complete", "error"):
+                yield _format_sse({"type": "stream_close"})
+                return
+    finally:
+        # Queue erst nach 5 Min freigeben (späte Reconnects).
+        asyncio.get_event_loop().call_later(
+            300, lambda: _check_queues.pop(check_id, None),
+        )