feat(agents): SSE-Endpoint + Agent-Test-Tab (5-URL parallel)

Backend: - specialist_agent_routes.py: GET /agents, POST /test/start (run_id), GET /test/stream/{run_id} (SSE), GET /run/{run_id}/result, GET /run/{run_id}/artifacts, GET /run/{run_id}/artifact/{path}, DELETE /run/{run_id}, GET /runs. - Per-URL async orchestrator: text fetch via consent-tester dsi-discovery → agent.evaluate() → vault.put_json + stream events. - Tests: 7/7 grün. Frontend: - /api/sdk/v1/specialist-agent proxy mit SSE-passthrough. - AgentTestTab.tsx: Agent-Wähler + 5 URL-Slots + Live-Events + Speedometer (OK/N-A/HIGH/MEDIUM/LOW) + Findings + Recommendations + Eskalations-Log + Artefakt-Link pro Slot. - Neuer Tab "Agent-Test" in /sdk/agent. User-Wunsch 2026-06-08: pro Agent isoliert testen, 5 URLs gleichzeitig, Live-Updates statt Polling-Wartespiel. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-08 17:47:05 +02:00
parent f4357a2e9b
commit 3ae4e60c9d
6 changed files with 1064 additions and 1 deletions
@@ -0,0 +1,112 @@
+/**
+ * Specialist-Agent API Proxy
+ * Proxies /api/sdk/v1/specialist-agent/* → backend-compliance:8002/api/v1/specialist-agent/*
+ *
+ * Streaming routes (SSE /test/stream/{run_id}) pass through unmodified.
+ */
+
+import { NextRequest, NextResponse } from 'next/server'
+
+const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
+
+async function proxyRequest(
+  request: NextRequest,
+  pathSegments: string[] | undefined,
+  method: string,
+) {
+  const pathStr = pathSegments?.join('/') || ''
+  const searchParams = request.nextUrl.searchParams.toString()
+  const basePath = `${BACKEND_URL}/api/v1/specialist-agent`
+  const url = pathStr
+    ? `${basePath}/${pathStr}${searchParams ? `?${searchParams}` : ''}`
+    : `${basePath}${searchParams ? `?${searchParams}` : ''}`
+
+  const isSSE = pathStr.startsWith('test/stream/')
+
+  try {
+    const headers: HeadersInit = {}
+    if (!isSSE) headers['Content-Type'] = 'application/json'
+
+    const fetchOptions: RequestInit = {
+      method,
+      headers,
+      signal: AbortSignal.timeout(isSSE ? 600000 : 60000),
+    }
+    if (method === 'POST' || method === 'PUT' || method === 'PATCH' ||
+        method === 'DELETE') {
+      const body = await request.text()
+      if (body) fetchOptions.body = body
+    }
+
+    const response = await fetch(url, fetchOptions)
+
+    if (isSSE) {
+      return new NextResponse(response.body, {
+        status: response.status,
+        headers: {
+          'Content-Type': 'text/event-stream',
+          'Cache-Control': 'no-cache',
+          'Connection': 'keep-alive',
+          'X-Accel-Buffering': 'no',
+        },
+      })
+    }
+
+    if (!response.ok) {
+      const errText = await response.text()
+      let errJson
+      try { errJson = JSON.parse(errText) }
+      catch { errJson = { error: errText } }
+      return NextResponse.json(
+        { error: `Backend Error: ${response.status}`, ...errJson },
+        { status: response.status },
+      )
+    }
+
+    const ct = response.headers.get('content-type') || ''
+    if (ct.includes('application/json')) {
+      const data = await response.json()
+      return NextResponse.json(data)
+    }
+    // Binary asset (image/video/csv etc.)
+    const blob = await response.blob()
+    return new NextResponse(blob, {
+      status: response.status,
+      headers: {
+        'Content-Type': ct || 'application/octet-stream',
+        'Content-Disposition':
+          response.headers.get('content-disposition') || '',
+      },
+    })
+  } catch (e) {
+    console.error('specialist-agent proxy error:', e)
+    return NextResponse.json(
+      { error: 'Verbindung zum Backend fehlgeschlagen' },
+      { status: 503 },
+    )
+  }
+}
+
+export async function GET(
+  request: NextRequest,
+  { params }: { params: Promise<{ path?: string[] }> },
+) {
+  const { path } = await params
+  return proxyRequest(request, path, 'GET')
+}
+
+export async function POST(
+  request: NextRequest,
+  { params }: { params: Promise<{ path?: string[] }> },
+) {
+  const { path } = await params
+  return proxyRequest(request, path, 'POST')
+}
+
+export async function DELETE(
+  request: NextRequest,
+  { params }: { params: Promise<{ path?: string[] }> },
+) {
+  const { path } = await params
+  return proxyRequest(request, path, 'DELETE')
+}
@@ -0,0 +1,440 @@
+'use client'
+
+import React, { useEffect, useMemo, useRef, useState } from 'react'
+
+type AgentInfo = {
+  agent_id: string
+  agent_version: string
+  doc_type: string
+  mc_count: number
+}
+
+type Finding = {
+  check_id: string
+  agent: string
+  agent_version: string
+  field_id?: string
+  severity: 'HIGH' | 'MEDIUM' | 'LOW' | 'INFO'
+  title: string
+  norm?: string
+  evidence?: string
+  action?: string
+  confidence?: number
+  sources?: { source_type: string; source_id: string; detail?: string }[]
+}
+
+type Recommendation = {
+  recommendation_id: string
+  title: string
+  body: string
+  severity: string
+  related_finding_ids: string[]
+  estimated_effort_hours: number
+}
+
+type SlotOutput = {
+  agent: string
+  agent_version: string
+  findings: Finding[]
+  recommendations: Recommendation[]
+  mc_total: number
+  mc_ok: number
+  mc_na: number
+  mc_high: number
+  mc_medium: number
+  mc_low: number
+  duration_ms: number
+  confidence: number
+  escalation_log: { stage: string; model: string; success: boolean; duration_ms: number }[]
+}
+
+type RunResult = {
+  run_id: string
+  agent_id: string
+  finished: boolean
+  results: Record<string, SlotOutput>
+  vault_url: string
+}
+
+type StreamEvent = {
+  type: string
+  slot?: string
+  [key: string]: any
+}
+
+const STORAGE_KEY = 'agent-test-state-v1'
+const MAX_SLOTS = 5
+
+export function AgentTestTab() {
+  const [agents, setAgents] = useState<AgentInfo[]>([])
+  const [agentId, setAgentId] = useState<string>('')
+  const [urls, setUrls] = useState<string[]>(['', '', '', '', ''])
+  const [running, setRunning] = useState(false)
+  const [runId, setRunId] = useState<string>('')
+  const [events, setEvents] = useState<StreamEvent[]>([])
+  const [result, setResult] = useState<RunResult | null>(null)
+  const [error, setError] = useState<string>('')
+  const eventSrcRef = useRef<EventSource | null>(null)
+
+  // Restore state from localStorage
+  useEffect(() => {
+    try {
+      const s = localStorage.getItem(STORAGE_KEY)
+      if (s) {
+        const parsed = JSON.parse(s)
+        if (parsed.agentId) setAgentId(parsed.agentId)
+        if (Array.isArray(parsed.urls))
+          setUrls(parsed.urls.slice(0, MAX_SLOTS).concat(
+            new Array(MAX_SLOTS).fill('')).slice(0, MAX_SLOTS))
+      }
+    } catch { /* noop */ }
+  }, [])
+  useEffect(() => {
+    try {
+      localStorage.setItem(STORAGE_KEY,
+        JSON.stringify({ agentId, urls }))
+    } catch { /* quota */ }
+  }, [agentId, urls])
+
+  // Load agents
+  useEffect(() => {
+    fetch('/api/sdk/v1/specialist-agent/agents')
+      .then(r => r.json())
+      .then(d => {
+        const list: AgentInfo[] = d.agents || []
+        setAgents(list)
+        if (list.length && !agentId) setAgentId(list[0].agent_id)
+      })
+      .catch(e => setError(`Agent-Liste fehlgeschlagen: ${e}`))
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+
+  const startTest = async () => {
+    setError('')
+    setResult(null)
+    setEvents([])
+    const cleanUrls = urls.map(u => u.trim()).filter(Boolean)
+    if (!agentId) { setError('Kein Agent ausgewählt.'); return }
+    if (cleanUrls.length === 0) { setError('Mind. eine URL angeben.'); return }
+    setRunning(true)
+    try {
+      const r = await fetch(
+        '/api/sdk/v1/specialist-agent/test/start',
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            agent_id: agentId,
+            urls: cleanUrls,
+          }),
+        },
+      )
+      if (!r.ok) {
+        const j = await r.json().catch(() => ({}))
+        throw new Error(j.error || `HTTP ${r.status}`)
+      }
+      const data = await r.json()
+      setRunId(data.run_id)
+      openStream(data.run_id)
+      pollResult(data.run_id)
+    } catch (e: any) {
+      setError(e.message || String(e))
+      setRunning(false)
+    }
+  }
+
+  const openStream = (rid: string) => {
+    try { eventSrcRef.current?.close() } catch { /* noop */ }
+    const es = new EventSource(
+      `/api/sdk/v1/specialist-agent/test/stream/${rid}`,
+    )
+    eventSrcRef.current = es
+    es.onmessage = (ev) => {
+      try {
+        const data: StreamEvent = JSON.parse(ev.data)
+        setEvents(prev => [...prev, data])
+        if (data.type === 'stream_close' || data.type === 'run_complete') {
+          try { es.close() } catch { /* noop */ }
+        }
+      } catch { /* noop */ }
+    }
+    es.onerror = () => { try { es.close() } catch { /* noop */ } }
+  }
+
+  const pollResult = async (rid: string) => {
+    for (let i = 0; i < 360; i++) {
+      try {
+        const r = await fetch(
+          `/api/sdk/v1/specialist-agent/run/${rid}/result`,
+        )
+        if (r.ok) {
+          const d: RunResult = await r.json()
+          if (d.finished) {
+            setResult(d); setRunning(false); return
+          }
+        }
+      } catch { /* noop */ }
+      await new Promise(s => setTimeout(s, 2000))
+    }
+    setRunning(false)
+  }
+
+  const slotOutputs = useMemo(() => {
+    if (!result) return []
+    const items: { slot: string; output: SlotOutput }[] = []
+    for (const slot of Object.keys(result.results)) {
+      items.push({ slot, output: result.results[slot] })
+    }
+    return items.sort((a, b) => a.slot.localeCompare(b.slot))
+  }, [result])
+
+  const selectedAgent = agents.find(a => a.agent_id === agentId)
+
+  return (
+    <div className="space-y-4">
+      <div className="rounded-lg border bg-white p-4 space-y-3">
+        <h2 className="text-lg font-semibold">Agent-Test (max. {MAX_SLOTS} URLs)</h2>
+        <p className="text-xs text-gray-500">
+          Wählt einen Spezialisten-Agent und feuert ihn gegen 1-5 URLs gleichzeitig.
+          Pro URL Speedometer + Findings + Empfehlungen mit Quellen-Herkunft (MC / Regex / LLM-Stufe).
+          Keine Aussagen "rechtssicher" oder "garantiert" — alle solchen Wörter werden vor Ausgabe gelöscht.
+        </p>
+        <div className="flex flex-wrap gap-3 items-end">
+          <div>
+            <label className="block text-xs font-medium text-gray-600">Agent</label>
+            <select value={agentId}
+                    onChange={e => setAgentId(e.target.value)}
+                    className="border rounded px-2 py-1 text-sm">
+              {agents.map(a => (
+                <option key={a.agent_id} value={a.agent_id}>
+                  {a.agent_id} v{a.agent_version} ({a.mc_count} MCs)
+                </option>
+              ))}
+            </select>
+          </div>
+          {selectedAgent && (
+            <div className="text-xs text-gray-500">
+              Doc-Type: <code>{selectedAgent.doc_type}</code>
+            </div>
+          )}
+        </div>
+        <div className="space-y-1">
+          {urls.map((u, i) => (
+            <div key={i} className="flex gap-2">
+              <span className="text-xs font-mono text-gray-500 w-8 pt-1.5">URL{i+1}</span>
+              <input value={u}
+                     onChange={e => {
+                       const next = [...urls]; next[i] = e.target.value
+                       setUrls(next)
+                     }}
+                     placeholder="https://example.com/impressum"
+                     className="flex-1 border rounded px-2 py-1 text-sm font-mono"/>
+            </div>
+          ))}
+        </div>
+        <div className="flex gap-2">
+          <button onClick={startTest}
+                  disabled={running}
+                  className="bg-blue-600 hover:bg-blue-700 disabled:bg-gray-400 text-white text-sm px-4 py-2 rounded">
+            {running ? 'Laufend...' : 'Test starten'}
+          </button>
+          {runId && (
+            <span className="text-xs text-gray-500 self-center">
+              Run-ID: <code>{runId}</code>
+            </span>
+          )}
+        </div>
+        {error && (
+          <div className="bg-red-50 border-l-4 border-red-400 p-2 text-sm text-red-700">
+            {error}
+          </div>
+        )}
+      </div>
+
+      {running && events.length > 0 && (
+        <div className="rounded-lg border bg-gray-50 p-3 max-h-48 overflow-y-auto">
+          <div className="text-xs font-mono space-y-1">
+            {events.slice(-30).map((ev, i) => (
+              <div key={i}>
+                <span className="text-gray-400">[{ev.type}]</span>{' '}
+                {ev.slot && <span className="text-blue-600">{ev.slot}</span>}{' '}
+                {ev.severity && (
+                  <span className={severityColor(ev.severity)}>
+                    {ev.severity}
+                  </span>
+                )}{' '}
+                {ev.title || ev.error || ev.label || ev.model || ev.url || ''}
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+
+      {slotOutputs.length > 0 && (
+        <div className="space-y-3">
+          {slotOutputs.map(({ slot, output }) => (
+            <SlotCard key={slot} slot={slot} output={output} runId={runId}/>
+          ))}
+        </div>
+      )}
+    </div>
+  )
+}
+
+function SlotCard({ slot, output, runId }: {
+  slot: string
+  output: SlotOutput
+  runId: string
+}) {
+  const [showAll, setShowAll] = useState(false)
+  const visibleFindings = showAll ? output.findings : output.findings.slice(0, 8)
+  return (
+    <div className="rounded-lg border bg-white p-4 space-y-3">
+      <div className="flex items-baseline gap-3">
+        <h3 className="font-semibold text-gray-800">Slot: {slot}</h3>
+        <span className="text-xs text-gray-500">
+          {output.duration_ms} ms · confidence {(output.confidence * 100).toFixed(0)}%
+        </span>
+        <a className="text-xs text-blue-600 hover:underline ml-auto"
+           href={`/api/sdk/v1/specialist-agent/run/${runId}/artifacts`}
+           target="_blank" rel="noreferrer">
+          Artefakte ↗
+        </a>
+      </div>
+      <Speedometer
+        total={output.mc_total}
+        ok={output.mc_ok}
+        na={output.mc_na}
+        high={output.mc_high}
+        medium={output.mc_medium}
+        low={output.mc_low}
+      />
+      {output.escalation_log.length > 0 && (
+        <div className="text-xs text-gray-500">
+          Eskalationen:{' '}
+          {output.escalation_log.map((e, i) => (
+            <span key={i} className="mr-2">
+              {e.stage}/{e.model} {e.success ? '✓' : '✗'} ({e.duration_ms} ms)
+            </span>
+          ))}
+        </div>
+      )}
+      {output.findings.length > 0 && (
+        <div className="space-y-1">
+          <div className="text-xs font-semibold uppercase text-gray-600">
+            Findings ({output.findings.length})
+          </div>
+          {visibleFindings.map(f => (
+            <FindingRow key={f.check_id} f={f}/>
+          ))}
+          {output.findings.length > 8 && (
+            <button onClick={() => setShowAll(x => !x)}
+                    className="text-xs text-blue-600 hover:underline">
+              {showAll ? 'Weniger anzeigen' : `Alle ${output.findings.length} anzeigen`}
+            </button>
+          )}
+        </div>
+      )}
+      {output.recommendations.length > 0 && (
+        <div className="space-y-1">
+          <div className="text-xs font-semibold uppercase text-gray-600">
+            Empfehlungen ({output.recommendations.length}, gerollupt)
+          </div>
+          {output.recommendations.map(r => (
+            <div key={r.recommendation_id}
+                 className="border-l-2 border-emerald-400 bg-emerald-50 p-2 text-xs">
+              <div className="font-semibold">{r.title}</div>
+              <div className="text-gray-600">{r.body}</div>
+              <div className="text-[10px] text-gray-500 mt-1">
+                {r.related_finding_ids.length} Finding(s) · ~{r.estimated_effort_hours}h
+              </div>
+            </div>
+          ))}
+        </div>
+      )}
+    </div>
+  )
+}
+
+function Speedometer({ total, ok, na, high, medium, low }: {
+  total: number
+  ok: number
+  na: number
+  high: number
+  medium: number
+  low: number
+}) {
+  const safeTotal = Math.max(total, 1)
+  return (
+    <div className="space-y-1">
+      <div className="text-xs text-gray-500">{total} MCs geprüft</div>
+      <div className="flex h-4 rounded overflow-hidden border">
+        <Bar pct={(ok / safeTotal) * 100} color="#10b981"/>
+        <Bar pct={(na / safeTotal) * 100} color="#94a3b8"/>
+        <Bar pct={(high / safeTotal) * 100} color="#dc2626"/>
+        <Bar pct={(medium / safeTotal) * 100} color="#f59e0b"/>
+        <Bar pct={(low / safeTotal) * 100} color="#3b82f6"/>
+      </div>
+      <div className="flex flex-wrap gap-2 text-xs">
+        <Legend color="#10b981" label={`OK ${ok}`}/>
+        <Legend color="#94a3b8" label={`n/a ${na}`}/>
+        <Legend color="#dc2626" label={`HIGH ${high}`}/>
+        <Legend color="#f59e0b" label={`MEDIUM ${medium}`}/>
+        <Legend color="#3b82f6" label={`LOW ${low}`}/>
+      </div>
+    </div>
+  )
+}
+
+function Bar({ pct, color }: { pct: number; color: string }) {
+  return <div style={{ width: `${pct}%`, background: color }}/>
+}
+
+function Legend({ color, label }: { color: string; label: string }) {
+  return (
+    <span className="inline-flex items-center gap-1">
+      <span style={{ background: color }} className="w-2 h-2 inline-block rounded"/>
+      <span>{label}</span>
+    </span>
+  )
+}
+
+function FindingRow({ f }: { f: Finding }) {
+  const color = severityHex(f.severity)
+  const sourceTags = (f.sources || [])
+    .map(s => s.source_type)
+    .filter((v, i, arr) => arr.indexOf(v) === i)
+  return (
+    <div className="p-2 border-l-2" style={{ borderColor: color }}>
+      <div className="flex items-baseline gap-2 text-xs">
+        <span style={{ color }} className="font-semibold">{f.severity}</span>
+        <code className="text-gray-500">{f.check_id}</code>
+        {sourceTags.map(t => (
+          <span key={t} className="px-1 bg-gray-100 rounded text-[10px]">{t}</span>
+        ))}
+      </div>
+      <div className="text-sm">{f.title}</div>
+      {f.norm && <div className="text-[11px] text-gray-500">{f.norm}</div>}
+      {f.evidence && (
+        <div className="text-[11px] italic text-gray-600 mt-1">„{f.evidence}"</div>
+      )}
+      {f.action && (
+        <div className="text-[11px] text-emerald-700 mt-1">
+          → {f.action}
+        </div>
+      )}
+    </div>
+  )
+}
+
+function severityColor(sev: string) {
+  return sev === 'HIGH' ? 'text-red-600 font-semibold' :
+         sev === 'MEDIUM' ? 'text-amber-600 font-semibold' :
+         sev === 'LOW' ? 'text-blue-600' : 'text-gray-600'
+}
+
+function severityHex(sev: string) {
+  return sev === 'HIGH' ? '#dc2626' :
+         sev === 'MEDIUM' ? '#f59e0b' :
+         sev === 'LOW' ? '#3b82f6' : '#94a3b8'
+}
@@ -5,13 +5,15 @@ import { ScanResult } from './_components/ScanResult'
 import { ComplianceCheckTab } from './_components/ComplianceCheckTab'
 import { BannerCheckTab } from './_components/BannerCheckTab'
 import { ComplianceFAQ } from './_components/ComplianceFAQ'
+import { AgentTestTab } from './_components/AgentTestTab'

-type AnalysisTab = 'scan' | 'compliance-check' | 'banner-check'
+type AnalysisTab = 'scan' | 'compliance-check' | 'banner-check' | 'agent-test'

 const TABS: { id: AnalysisTab; label: string; desc: string }[] = [
  { id: 'scan', label: 'Website-Scan', desc: 'Rechtliche Dokumente finden + Dienstleister erkennen' },
  { id: 'compliance-check', label: 'Compliance-Check', desc: 'Alle rechtlichen Dokumente zusammen pruefen' },
  { id: 'banner-check', label: 'Banner-Check', desc: 'Cookie-Banner auf DSGVO-Konformitaet testen' },
+  { id: 'agent-test', label: 'Agent-Test', desc: 'Specialist-Agent gegen 5 URLs isoliert testen' },
 ]

 export default function AgentPage() {
@@ -186,6 +188,7 @@ export default function AgentPage() {

      {tab === 'compliance-check' && <ComplianceCheckTab />}
      {tab === 'banner-check' && <BannerCheckTab />}
+      {tab === 'agent-test' && <AgentTestTab />}

      <ComplianceFAQ />
    </div>
@@ -74,6 +74,7 @@ _ROUTER_MODULES = [
    "founding_wizard_routes",
    "licenses_routes",
    "template_rule_routes",
+    "specialist_agent_routes",
 ]

 _loaded_count = 0
@@ -0,0 +1,378 @@
+"""SSE-Endpoint für den Agent-Test-Harness.
+
+User-Vorgabe 2026-06-08: pro Agent isoliert testen mit z.B. 5 URLs
+gleichzeitig. Live-Stream der Events ins Frontend.
+
+Endpoints:
+  GET  /specialist-agent/agents
+  POST /specialist-agent/test/start            { agent_id, urls }
+  GET  /specialist-agent/test/stream/{run_id}  → SSE-Stream
+  GET  /specialist-agent/run/{run_id}/artifacts
+  GET  /specialist-agent/run/{run_id}/artifact/{relpath}
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import uuid
+from collections.abc import AsyncGenerator
+from typing import Any
+
+import httpx
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse, StreamingResponse
+from pydantic import BaseModel, Field
+
+from compliance.services.specialist_agents import REGISTRY, AgentInput
+from compliance.services.specialist_agents._evidence_vault import (
+    EvidenceVault,
+    delete_run as vault_delete_run,
+    list_runs as vault_list_runs,
+)
+
+logger = logging.getLogger(__name__)
+
+CONSENT_TESTER_URL = os.environ.get(
+    "CONSENT_TESTER_URL",
+    "http://bp-compliance-consent-tester:8094",
+)
+
+router = APIRouter(prefix="/specialist-agent", tags=["specialist-agent"])
+
+
+# In-memory event-queues pro run_id. Restart-fragil aber für ein
+# Live-Test-Tool ausreichend (keine Persistenz nötig).
+_run_queues: dict[str, asyncio.Queue] = {}
+_run_states: dict[str, dict[str, Any]] = {}
+
+
+class TestStartRequest(BaseModel):
+    agent_id: str
+    urls: list[str] = Field(default_factory=list, max_length=10)
+    raw_texts: list[str] = Field(default_factory=list, max_length=10)
+    business_scope: list[str] = Field(default_factory=list)
+    company_name: str = ""
+    origin_domain: str = ""
+
+
+class TestStartResponse(BaseModel):
+    run_id: str
+    agent_id: str
+    slot_count: int
+
+
+@router.get("/agents")
+async def list_agents() -> dict[str, Any]:
+    """Liefert die registrierten Specialist-Agenten."""
+    return {"agents": REGISTRY.list_agents()}
+
+
+@router.post("/test/start", response_model=TestStartResponse)
+async def start_test(req: TestStartRequest) -> TestStartResponse:
+    """Startet einen Multi-URL-Test gegen einen Agent.
+
+    Liefert eine run_id zurück. Der Frontend-Client öffnet danach
+    einen SSE-Stream auf /test/stream/{run_id} um Events zu empfangen.
+    """
+    agent = REGISTRY.get(req.agent_id)
+    if agent is None:
+        raise HTTPException(404, f"agent '{req.agent_id}' nicht registriert")
+    slots = max(len(req.urls), len(req.raw_texts))
+    if slots == 0:
+        raise HTTPException(400, "urls oder raw_texts dürfen nicht leer sein")
+
+    run_id = uuid.uuid4().hex[:16]
+    queue: asyncio.Queue = asyncio.Queue(maxsize=500)
+    _run_queues[run_id] = queue
+    _run_states[run_id] = {
+        "agent_id": req.agent_id,
+        "started": False,
+        "finished": False,
+        "slot_count": slots,
+        "results": {},
+    }
+    vault = EvidenceVault(agent.agent_id, agent.agent_version,
+                            run_id=run_id)
+    asyncio.create_task(_run_test_orchestrator(run_id, req, vault))
+    return TestStartResponse(
+        run_id=run_id,
+        agent_id=req.agent_id,
+        slot_count=slots,
+    )
+
+
+@router.get("/test/stream/{run_id}")
+async def stream_test(run_id: str) -> StreamingResponse:
+    """SSE-Stream der Events für einen laufenden Test."""
+    if run_id not in _run_queues:
+        raise HTTPException(404, "run_id unbekannt")
+    return StreamingResponse(
+        _event_generator(run_id),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",   # nginx
+            "Connection": "keep-alive",
+        },
+    )
+
+
+async def _event_generator(run_id: str) -> AsyncGenerator[str, None]:
+    """Reads events from the queue until the run is finished."""
+    queue = _run_queues[run_id]
+    # Initial hello
+    yield _format_sse({"type": "hello", "run_id": run_id})
+    try:
+        while True:
+            try:
+                event = await asyncio.wait_for(queue.get(), timeout=30.0)
+            except asyncio.TimeoutError:
+                # heartbeat
+                yield _format_sse({"type": "heartbeat"})
+                if _run_states.get(run_id, {}).get("finished"):
+                    yield _format_sse({"type": "stream_close"})
+                    return
+                continue
+            yield _format_sse(event)
+            if event.get("type") in ("run_complete", "run_error"):
+                yield _format_sse({"type": "stream_close"})
+                return
+    finally:
+        # Defer cleanup: keep state for 5 min so late GETs can read
+        # results from _run_states. The queue can be released earlier.
+        asyncio.get_event_loop().call_later(
+            300, lambda: _run_queues.pop(run_id, None),
+        )
+
+
+def _format_sse(payload: dict) -> str:
+    """SSE event line format."""
+    return f"data: {json.dumps(payload, default=str)}\n\n"
+
+
+async def _emit(run_id: str, event: dict) -> None:
+    q = _run_queues.get(run_id)
+    if q is None:
+        return
+    try:
+        await q.put(event)
+    except Exception:
+        pass
+
+
+async def _run_test_orchestrator(
+    run_id: str,
+    req: TestStartRequest,
+    vault: EvidenceVault,
+) -> None:
+    """Kernlogik: pro URL / raw_text parallel den Agent feuern."""
+    agent = REGISTRY.get(req.agent_id)
+    if agent is None:
+        await _emit(run_id, {"type": "run_error",
+                             "error": "agent gone"})
+        return
+    _run_states[run_id]["started"] = True
+    await _emit(run_id, {
+        "type": "run_started",
+        "agent_id": agent.agent_id,
+        "agent_version": agent.agent_version,
+        "slot_count": _run_states[run_id]["slot_count"],
+    })
+    slot_jobs: list[asyncio.Task] = []
+    # URLs first, then raw_texts. Slots numbered url1, url2, …, text1, …
+    for i, url in enumerate(req.urls, start=1):
+        slot = f"url{i}"
+        slot_jobs.append(asyncio.create_task(
+            _process_slot(run_id, slot, agent, url, "", req, vault),
+        ))
+    for j, raw in enumerate(req.raw_texts, start=1):
+        slot = f"text{j}"
+        slot_jobs.append(asyncio.create_task(
+            _process_slot(run_id, slot, agent, "", raw, req, vault),
+        ))
+    try:
+        await asyncio.gather(*slot_jobs, return_exceptions=True)
+    finally:
+        manifest = vault.finalize()
+        _run_states[run_id]["finished"] = True
+        await _emit(run_id, {
+            "type": "run_complete",
+            "vault_url": vault.url(),
+            "manifest_asset_count": len(manifest.get("assets") or []),
+        })
+
+
+async def _process_slot(
+    run_id: str,
+    slot: str,
+    agent,
+    url: str,
+    raw_text: str,
+    req: TestStartRequest,
+    vault: EvidenceVault,
+) -> None:
+    """Holt den Text (URL oder raw), ruft Agent, vault-speichert Output."""
+    label = url or f"text-slot-{slot}"
+    await _emit(run_id, {"type": "slot_started", "slot": slot,
+                         "label": label})
+    text = raw_text
+    fetch_err = ""
+    if url and not raw_text:
+        await _emit(run_id, {"type": "slot_fetching",
+                             "slot": slot, "url": url})
+        text, fetch_err = await _fetch_text(url)
+        if fetch_err:
+            await _emit(run_id, {
+                "type": "slot_fetch_error",
+                "slot": slot,
+                "error": fetch_err,
+            })
+    if text:
+        vault.put_bytes("raw", slot, "source.txt",
+                         text.encode("utf-8"),
+                         mime="text/plain")
+    await _emit(run_id, {
+        "type": "slot_text_ready",
+        "slot": slot,
+        "char_count": len(text),
+    })
+    agent_input = AgentInput(
+        doc_type=agent.doc_type,
+        text=text,
+        url=url,
+        business_scope=req.business_scope,
+        company_name=req.company_name,
+        origin_domain=req.origin_domain,
+    )
+    await _emit(run_id, {"type": "slot_agent_running", "slot": slot})
+    try:
+        output = await agent.evaluate(agent_input)
+    except Exception as e:
+        logger.exception("agent crashed slot=%s", slot)
+        await _emit(run_id, {
+            "type": "slot_agent_error", "slot": slot,
+            "error": f"{type(e).__name__}: {str(e)[:160]}",
+        })
+        return
+    # Persist findings as JSON in vault
+    vault.put_json("finding", slot, "output.json",
+                    json.loads(output.model_dump_json()))
+    # Update state for later /artifacts query
+    _run_states[run_id]["results"][slot] = json.loads(
+        output.model_dump_json(),
+    )
+    # Stream finding-emitted events
+    for f in output.findings:
+        await _emit(run_id, {
+            "type": "finding",
+            "slot": slot,
+            "check_id": f.check_id,
+            "severity": f.severity,
+            "title": f.title,
+            "field_id": f.field_id,
+        })
+    for esc in output.escalation_log:
+        await _emit(run_id, {
+            "type": "escalation",
+            "slot": slot,
+            "stage": esc.stage,
+            "model": esc.model,
+            "success": esc.success,
+            "duration_ms": esc.duration_ms,
+        })
+    await _emit(run_id, {
+        "type": "slot_complete",
+        "slot": slot,
+        "duration_ms": output.duration_ms,
+        "mc_total": output.mc_total,
+        "mc_ok": output.mc_ok,
+        "mc_na": output.mc_na,
+        "mc_high": output.mc_high,
+        "mc_medium": output.mc_medium,
+        "mc_low": output.mc_low,
+        "findings_count": len(output.findings),
+        "recommendations_count": len(output.recommendations),
+        "confidence": output.confidence,
+    })
+
+
+async def _fetch_text(url: str) -> tuple[str, str]:
+    """Nutzt den consent-tester DSI-Discovery für Volltext."""
+    try:
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            resp = await client.post(
+                f"{CONSENT_TESTER_URL}/dsi-discovery",
+                json={"url": url, "max_documents": 5},
+                timeout=120.0,
+            )
+            if resp.status_code != 200:
+                return "", f"HTTP {resp.status_code}"
+            data = resp.json()
+            docs = data.get("documents", []) or []
+            if not docs:
+                return "", "no documents discovered"
+            texts: list[str] = []
+            for doc in docs:
+                t = (doc.get("full_text", "") or
+                     doc.get("text_preview", "") or "")
+                if t and len(t) > 50:
+                    texts.append(t)
+            return "\n\n".join(texts), ""
+    except Exception as e:
+        return "", f"{type(e).__name__}: {str(e)[:160]}"
+
+
+# ── Run / Vault Queries ──────────────────────────────────────────────
+
+
+@router.get("/run/{run_id}/result")
+async def get_run_result(run_id: str) -> dict[str, Any]:
+    """Komplette Ergebnisse eines Runs (für Frontend-Refresh)."""
+    state = _run_states.get(run_id)
+    if state is None:
+        raise HTTPException(404, "run unbekannt")
+    return {
+        "run_id": run_id,
+        "agent_id": state["agent_id"],
+        "finished": state["finished"],
+        "results": state["results"],
+        "vault_url": f"/api/v1/specialist-agent/run/{run_id}/artifacts",
+    }
+
+
+@router.get("/run/{run_id}/artifacts")
+async def list_run_artifacts(run_id: str) -> dict[str, Any]:
+    """Listet die Assets eines Runs."""
+    vault = EvidenceVault("?", "?", run_id=run_id)
+    return {
+        "run_id": run_id,
+        "manifest": vault._manifest,
+    }
+
+
+@router.get("/run/{run_id}/artifact/{path:path}")
+async def get_run_artifact(run_id: str, path: str):
+    """Liefert ein einzelnes Artefakt aus dem Vault."""
+    vault = EvidenceVault("?", "?", run_id=run_id)
+    p = vault.asset_path(path)
+    if p is None:
+        raise HTTPException(404, "asset not found")
+    return FileResponse(str(p))
+
+
+@router.delete("/run/{run_id}")
+async def delete_run(run_id: str) -> dict[str, bool]:
+    """DSR Art. 17: löscht den ganzen Run + Vault."""
+    deleted_vault = vault_delete_run(run_id)
+    _run_queues.pop(run_id, None)
+    _run_states.pop(run_id, None)
+    return {"deleted": deleted_vault}
+
+
+@router.get("/runs")
+async def list_runs(limit: int = 20) -> dict[str, Any]:
+    """Listet die letzten Runs im Vault."""
+    return {"runs": vault_list_runs(limit)}
@@ -0,0 +1,129 @@
+"""Tests für SSE-Endpoints des Specialist-Agent-Test-Harness."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+
+@pytest.fixture
+def app(tmp_path, monkeypatch):
+    monkeypatch.setenv("EVIDENCE_VAULT_ROOT", str(tmp_path / "vault"))
+    from fastapi import FastAPI
+    from compliance.api.specialist_agent_routes import router
+    app = FastAPI()
+    app.include_router(router, prefix="/api/v1")
+    return app
+
+
+@pytest.fixture
+def client(app):
+    return TestClient(app)
+
+
+def test_list_agents(client):
+    r = client.get("/api/v1/specialist-agent/agents")
+    assert r.status_code == 200
+    data = r.json()
+    agent_ids = {a["agent_id"] for a in data["agents"]}
+    assert "impressum" in agent_ids
+    assert "cookie_policy" in agent_ids
+
+
+def test_start_test_invalid_agent(client):
+    r = client.post("/api/v1/specialist-agent/test/start",
+                     json={"agent_id": "ghost",
+                           "raw_texts": ["test"]})
+    assert r.status_code == 404
+
+
+def test_start_test_no_input(client):
+    r = client.post("/api/v1/specialist-agent/test/start",
+                     json={"agent_id": "impressum"})
+    assert r.status_code == 400
+
+
+def test_start_test_with_raw_text(client):
+    r = client.post("/api/v1/specialist-agent/test/start",
+                     json={"agent_id": "impressum",
+                           "raw_texts": ["Tesla Germany GmbH "
+                                         "Berlin Email: x@y.com "
+                                         "HRB 123 Charlottenburg"]})
+    assert r.status_code == 200
+    data = r.json()
+    assert data["agent_id"] == "impressum"
+    assert data["slot_count"] == 1
+    assert data["run_id"]
+
+
+def test_stream_unknown_run(client):
+    r = client.get("/api/v1/specialist-agent/test/stream/ghost")
+    assert r.status_code == 404
+
+
+def test_run_result_after_text_input(client, monkeypatch):
+    # Skip LLM
+    async def _no_cascade(*a, **kw): return None, []
+    monkeypatch.setattr(
+        "compliance.services.specialist_agents.impressum.agent.cascade",
+        _no_cascade,
+    )
+    r = client.post("/api/v1/specialist-agent/test/start",
+                     json={"agent_id": "impressum",
+                           "raw_texts": [
+                               "Tesla Germany GmbH\nLudwig-Prandtl-Strasse 25\n"
+                               "12526 Berlin\nDeutschland\nEmail: x@y.com\n"
+                               "Tel: +49 89 1250 16 800\n"
+                               "Management: Elon Musk\n"
+                               "HRB 218904 B Charlottenburg",
+                           ]})
+    run_id = r.json()["run_id"]
+    # Give async task time to finish (small text → fast)
+    for _ in range(40):
+        rr = client.get(
+            f"/api/v1/specialist-agent/run/{run_id}/result",
+        )
+        if rr.json().get("finished"):
+            break
+        import time; time.sleep(0.05)
+    res = client.get(f"/api/v1/specialist-agent/run/{run_id}/result")
+    body = res.json()
+    assert body["finished"]
+    assert "text1" in body["results"]
+    out = body["results"]["text1"]
+    field_ids = {f["field_id"] for f in out["findings"]}
+    # Tesla pattern: German-label fehlt + USt fehlt
+    assert "vertretungsberechtigte_label_korrekt" in field_ids
+
+
+def test_artifacts_listing(client, monkeypatch):
+    async def _no_cascade(*a, **kw): return None, []
+    monkeypatch.setattr(
+        "compliance.services.specialist_agents.impressum.agent.cascade",
+        _no_cascade,
+    )
+    r = client.post("/api/v1/specialist-agent/test/start",
+                     json={"agent_id": "impressum",
+                           "raw_texts": ["Tesla Germany GmbH "
+                                         "Berlin Email: x@y.com "
+                                         "HRB 123 Charlottenburg"]})
+    run_id = r.json()["run_id"]
+    for _ in range(40):
+        rr = client.get(
+            f"/api/v1/specialist-agent/run/{run_id}/result",
+        )
+        if rr.json().get("finished"):
+            break
+        import time; time.sleep(0.05)
+    arts = client.get(
+        f"/api/v1/specialist-agent/run/{run_id}/artifacts",
+    )
+    assert arts.status_code == 200
+    manifest = arts.json()["manifest"]
+    kinds = {a["kind"] for a in manifest["assets"]}
+    assert "finding" in kinds
+    assert "raw" in kinds