feat(agents): SSE-Endpoint + Agent-Test-Tab (5-URL parallel)
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 12s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m24s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 29s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

Backend:
- specialist_agent_routes.py: GET /agents, POST /test/start (run_id),
  GET /test/stream/{run_id} (SSE), GET /run/{run_id}/result,
  GET /run/{run_id}/artifacts, GET /run/{run_id}/artifact/{path},
  DELETE /run/{run_id}, GET /runs.
- Per-URL async orchestrator: text fetch via consent-tester
  dsi-discovery → agent.evaluate() → vault.put_json + stream events.
- Tests: 7/7 grün.

Frontend:
- /api/sdk/v1/specialist-agent proxy mit SSE-passthrough.
- AgentTestTab.tsx: Agent-Wähler + 5 URL-Slots + Live-Events +
  Speedometer (OK/N-A/HIGH/MEDIUM/LOW) + Findings + Recommendations +
  Eskalations-Log + Artefakt-Link pro Slot.
- Neuer Tab "Agent-Test" in /sdk/agent.

User-Wunsch 2026-06-08: pro Agent isoliert testen, 5 URLs gleichzeitig,
Live-Updates statt Polling-Wartespiel.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-08 17:47:05 +02:00
parent f4357a2e9b
commit 3ae4e60c9d
6 changed files with 1064 additions and 1 deletions
@@ -0,0 +1,112 @@
/**
* Specialist-Agent API Proxy
* Proxies /api/sdk/v1/specialist-agent/* → backend-compliance:8002/api/v1/specialist-agent/*
*
* Streaming routes (SSE /test/stream/{run_id}) pass through unmodified.
*/
import { NextRequest, NextResponse } from 'next/server'
const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
async function proxyRequest(
request: NextRequest,
pathSegments: string[] | undefined,
method: string,
) {
const pathStr = pathSegments?.join('/') || ''
const searchParams = request.nextUrl.searchParams.toString()
const basePath = `${BACKEND_URL}/api/v1/specialist-agent`
const url = pathStr
? `${basePath}/${pathStr}${searchParams ? `?${searchParams}` : ''}`
: `${basePath}${searchParams ? `?${searchParams}` : ''}`
const isSSE = pathStr.startsWith('test/stream/')
try {
const headers: HeadersInit = {}
if (!isSSE) headers['Content-Type'] = 'application/json'
const fetchOptions: RequestInit = {
method,
headers,
signal: AbortSignal.timeout(isSSE ? 600000 : 60000),
}
if (method === 'POST' || method === 'PUT' || method === 'PATCH' ||
method === 'DELETE') {
const body = await request.text()
if (body) fetchOptions.body = body
}
const response = await fetch(url, fetchOptions)
if (isSSE) {
return new NextResponse(response.body, {
status: response.status,
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
},
})
}
if (!response.ok) {
const errText = await response.text()
let errJson
try { errJson = JSON.parse(errText) }
catch { errJson = { error: errText } }
return NextResponse.json(
{ error: `Backend Error: ${response.status}`, ...errJson },
{ status: response.status },
)
}
const ct = response.headers.get('content-type') || ''
if (ct.includes('application/json')) {
const data = await response.json()
return NextResponse.json(data)
}
// Binary asset (image/video/csv etc.)
const blob = await response.blob()
return new NextResponse(blob, {
status: response.status,
headers: {
'Content-Type': ct || 'application/octet-stream',
'Content-Disposition':
response.headers.get('content-disposition') || '',
},
})
} catch (e) {
console.error('specialist-agent proxy error:', e)
return NextResponse.json(
{ error: 'Verbindung zum Backend fehlgeschlagen' },
{ status: 503 },
)
}
}
export async function GET(
request: NextRequest,
{ params }: { params: Promise<{ path?: string[] }> },
) {
const { path } = await params
return proxyRequest(request, path, 'GET')
}
export async function POST(
request: NextRequest,
{ params }: { params: Promise<{ path?: string[] }> },
) {
const { path } = await params
return proxyRequest(request, path, 'POST')
}
export async function DELETE(
request: NextRequest,
{ params }: { params: Promise<{ path?: string[] }> },
) {
const { path } = await params
return proxyRequest(request, path, 'DELETE')
}
@@ -0,0 +1,440 @@
'use client'
import React, { useEffect, useMemo, useRef, useState } from 'react'
type AgentInfo = {
agent_id: string
agent_version: string
doc_type: string
mc_count: number
}
type Finding = {
check_id: string
agent: string
agent_version: string
field_id?: string
severity: 'HIGH' | 'MEDIUM' | 'LOW' | 'INFO'
title: string
norm?: string
evidence?: string
action?: string
confidence?: number
sources?: { source_type: string; source_id: string; detail?: string }[]
}
type Recommendation = {
recommendation_id: string
title: string
body: string
severity: string
related_finding_ids: string[]
estimated_effort_hours: number
}
type SlotOutput = {
agent: string
agent_version: string
findings: Finding[]
recommendations: Recommendation[]
mc_total: number
mc_ok: number
mc_na: number
mc_high: number
mc_medium: number
mc_low: number
duration_ms: number
confidence: number
escalation_log: { stage: string; model: string; success: boolean; duration_ms: number }[]
}
type RunResult = {
run_id: string
agent_id: string
finished: boolean
results: Record<string, SlotOutput>
vault_url: string
}
type StreamEvent = {
type: string
slot?: string
[key: string]: any
}
const STORAGE_KEY = 'agent-test-state-v1'
const MAX_SLOTS = 5
export function AgentTestTab() {
const [agents, setAgents] = useState<AgentInfo[]>([])
const [agentId, setAgentId] = useState<string>('')
const [urls, setUrls] = useState<string[]>(['', '', '', '', ''])
const [running, setRunning] = useState(false)
const [runId, setRunId] = useState<string>('')
const [events, setEvents] = useState<StreamEvent[]>([])
const [result, setResult] = useState<RunResult | null>(null)
const [error, setError] = useState<string>('')
const eventSrcRef = useRef<EventSource | null>(null)
// Restore state from localStorage
useEffect(() => {
try {
const s = localStorage.getItem(STORAGE_KEY)
if (s) {
const parsed = JSON.parse(s)
if (parsed.agentId) setAgentId(parsed.agentId)
if (Array.isArray(parsed.urls))
setUrls(parsed.urls.slice(0, MAX_SLOTS).concat(
new Array(MAX_SLOTS).fill('')).slice(0, MAX_SLOTS))
}
} catch { /* noop */ }
}, [])
useEffect(() => {
try {
localStorage.setItem(STORAGE_KEY,
JSON.stringify({ agentId, urls }))
} catch { /* quota */ }
}, [agentId, urls])
// Load agents
useEffect(() => {
fetch('/api/sdk/v1/specialist-agent/agents')
.then(r => r.json())
.then(d => {
const list: AgentInfo[] = d.agents || []
setAgents(list)
if (list.length && !agentId) setAgentId(list[0].agent_id)
})
.catch(e => setError(`Agent-Liste fehlgeschlagen: ${e}`))
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const startTest = async () => {
setError('')
setResult(null)
setEvents([])
const cleanUrls = urls.map(u => u.trim()).filter(Boolean)
if (!agentId) { setError('Kein Agent ausgewählt.'); return }
if (cleanUrls.length === 0) { setError('Mind. eine URL angeben.'); return }
setRunning(true)
try {
const r = await fetch(
'/api/sdk/v1/specialist-agent/test/start',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
agent_id: agentId,
urls: cleanUrls,
}),
},
)
if (!r.ok) {
const j = await r.json().catch(() => ({}))
throw new Error(j.error || `HTTP ${r.status}`)
}
const data = await r.json()
setRunId(data.run_id)
openStream(data.run_id)
pollResult(data.run_id)
} catch (e: any) {
setError(e.message || String(e))
setRunning(false)
}
}
const openStream = (rid: string) => {
try { eventSrcRef.current?.close() } catch { /* noop */ }
const es = new EventSource(
`/api/sdk/v1/specialist-agent/test/stream/${rid}`,
)
eventSrcRef.current = es
es.onmessage = (ev) => {
try {
const data: StreamEvent = JSON.parse(ev.data)
setEvents(prev => [...prev, data])
if (data.type === 'stream_close' || data.type === 'run_complete') {
try { es.close() } catch { /* noop */ }
}
} catch { /* noop */ }
}
es.onerror = () => { try { es.close() } catch { /* noop */ } }
}
const pollResult = async (rid: string) => {
for (let i = 0; i < 360; i++) {
try {
const r = await fetch(
`/api/sdk/v1/specialist-agent/run/${rid}/result`,
)
if (r.ok) {
const d: RunResult = await r.json()
if (d.finished) {
setResult(d); setRunning(false); return
}
}
} catch { /* noop */ }
await new Promise(s => setTimeout(s, 2000))
}
setRunning(false)
}
const slotOutputs = useMemo(() => {
if (!result) return []
const items: { slot: string; output: SlotOutput }[] = []
for (const slot of Object.keys(result.results)) {
items.push({ slot, output: result.results[slot] })
}
return items.sort((a, b) => a.slot.localeCompare(b.slot))
}, [result])
const selectedAgent = agents.find(a => a.agent_id === agentId)
return (
<div className="space-y-4">
<div className="rounded-lg border bg-white p-4 space-y-3">
<h2 className="text-lg font-semibold">Agent-Test (max. {MAX_SLOTS} URLs)</h2>
<p className="text-xs text-gray-500">
Wählt einen Spezialisten-Agent und feuert ihn gegen 1-5 URLs gleichzeitig.
Pro URL Speedometer + Findings + Empfehlungen mit Quellen-Herkunft (MC / Regex / LLM-Stufe).
Keine Aussagen "rechtssicher" oder "garantiert" alle solchen Wörter werden vor Ausgabe gelöscht.
</p>
<div className="flex flex-wrap gap-3 items-end">
<div>
<label className="block text-xs font-medium text-gray-600">Agent</label>
<select value={agentId}
onChange={e => setAgentId(e.target.value)}
className="border rounded px-2 py-1 text-sm">
{agents.map(a => (
<option key={a.agent_id} value={a.agent_id}>
{a.agent_id} v{a.agent_version} ({a.mc_count} MCs)
</option>
))}
</select>
</div>
{selectedAgent && (
<div className="text-xs text-gray-500">
Doc-Type: <code>{selectedAgent.doc_type}</code>
</div>
)}
</div>
<div className="space-y-1">
{urls.map((u, i) => (
<div key={i} className="flex gap-2">
<span className="text-xs font-mono text-gray-500 w-8 pt-1.5">URL{i+1}</span>
<input value={u}
onChange={e => {
const next = [...urls]; next[i] = e.target.value
setUrls(next)
}}
placeholder="https://example.com/impressum"
className="flex-1 border rounded px-2 py-1 text-sm font-mono"/>
</div>
))}
</div>
<div className="flex gap-2">
<button onClick={startTest}
disabled={running}
className="bg-blue-600 hover:bg-blue-700 disabled:bg-gray-400 text-white text-sm px-4 py-2 rounded">
{running ? 'Laufend...' : 'Test starten'}
</button>
{runId && (
<span className="text-xs text-gray-500 self-center">
Run-ID: <code>{runId}</code>
</span>
)}
</div>
{error && (
<div className="bg-red-50 border-l-4 border-red-400 p-2 text-sm text-red-700">
{error}
</div>
)}
</div>
{running && events.length > 0 && (
<div className="rounded-lg border bg-gray-50 p-3 max-h-48 overflow-y-auto">
<div className="text-xs font-mono space-y-1">
{events.slice(-30).map((ev, i) => (
<div key={i}>
<span className="text-gray-400">[{ev.type}]</span>{' '}
{ev.slot && <span className="text-blue-600">{ev.slot}</span>}{' '}
{ev.severity && (
<span className={severityColor(ev.severity)}>
{ev.severity}
</span>
)}{' '}
{ev.title || ev.error || ev.label || ev.model || ev.url || ''}
</div>
))}
</div>
</div>
)}
{slotOutputs.length > 0 && (
<div className="space-y-3">
{slotOutputs.map(({ slot, output }) => (
<SlotCard key={slot} slot={slot} output={output} runId={runId}/>
))}
</div>
)}
</div>
)
}
function SlotCard({ slot, output, runId }: {
slot: string
output: SlotOutput
runId: string
}) {
const [showAll, setShowAll] = useState(false)
const visibleFindings = showAll ? output.findings : output.findings.slice(0, 8)
return (
<div className="rounded-lg border bg-white p-4 space-y-3">
<div className="flex items-baseline gap-3">
<h3 className="font-semibold text-gray-800">Slot: {slot}</h3>
<span className="text-xs text-gray-500">
{output.duration_ms} ms · confidence {(output.confidence * 100).toFixed(0)}%
</span>
<a className="text-xs text-blue-600 hover:underline ml-auto"
href={`/api/sdk/v1/specialist-agent/run/${runId}/artifacts`}
target="_blank" rel="noreferrer">
Artefakte
</a>
</div>
<Speedometer
total={output.mc_total}
ok={output.mc_ok}
na={output.mc_na}
high={output.mc_high}
medium={output.mc_medium}
low={output.mc_low}
/>
{output.escalation_log.length > 0 && (
<div className="text-xs text-gray-500">
Eskalationen:{' '}
{output.escalation_log.map((e, i) => (
<span key={i} className="mr-2">
{e.stage}/{e.model} {e.success ? '✓' : '✗'} ({e.duration_ms} ms)
</span>
))}
</div>
)}
{output.findings.length > 0 && (
<div className="space-y-1">
<div className="text-xs font-semibold uppercase text-gray-600">
Findings ({output.findings.length})
</div>
{visibleFindings.map(f => (
<FindingRow key={f.check_id} f={f}/>
))}
{output.findings.length > 8 && (
<button onClick={() => setShowAll(x => !x)}
className="text-xs text-blue-600 hover:underline">
{showAll ? 'Weniger anzeigen' : `Alle ${output.findings.length} anzeigen`}
</button>
)}
</div>
)}
{output.recommendations.length > 0 && (
<div className="space-y-1">
<div className="text-xs font-semibold uppercase text-gray-600">
Empfehlungen ({output.recommendations.length}, gerollupt)
</div>
{output.recommendations.map(r => (
<div key={r.recommendation_id}
className="border-l-2 border-emerald-400 bg-emerald-50 p-2 text-xs">
<div className="font-semibold">{r.title}</div>
<div className="text-gray-600">{r.body}</div>
<div className="text-[10px] text-gray-500 mt-1">
{r.related_finding_ids.length} Finding(s) · ~{r.estimated_effort_hours}h
</div>
</div>
))}
</div>
)}
</div>
)
}
function Speedometer({ total, ok, na, high, medium, low }: {
total: number
ok: number
na: number
high: number
medium: number
low: number
}) {
const safeTotal = Math.max(total, 1)
return (
<div className="space-y-1">
<div className="text-xs text-gray-500">{total} MCs geprüft</div>
<div className="flex h-4 rounded overflow-hidden border">
<Bar pct={(ok / safeTotal) * 100} color="#10b981"/>
<Bar pct={(na / safeTotal) * 100} color="#94a3b8"/>
<Bar pct={(high / safeTotal) * 100} color="#dc2626"/>
<Bar pct={(medium / safeTotal) * 100} color="#f59e0b"/>
<Bar pct={(low / safeTotal) * 100} color="#3b82f6"/>
</div>
<div className="flex flex-wrap gap-2 text-xs">
<Legend color="#10b981" label={`OK ${ok}`}/>
<Legend color="#94a3b8" label={`n/a ${na}`}/>
<Legend color="#dc2626" label={`HIGH ${high}`}/>
<Legend color="#f59e0b" label={`MEDIUM ${medium}`}/>
<Legend color="#3b82f6" label={`LOW ${low}`}/>
</div>
</div>
)
}
function Bar({ pct, color }: { pct: number; color: string }) {
return <div style={{ width: `${pct}%`, background: color }}/>
}
function Legend({ color, label }: { color: string; label: string }) {
return (
<span className="inline-flex items-center gap-1">
<span style={{ background: color }} className="w-2 h-2 inline-block rounded"/>
<span>{label}</span>
</span>
)
}
function FindingRow({ f }: { f: Finding }) {
const color = severityHex(f.severity)
const sourceTags = (f.sources || [])
.map(s => s.source_type)
.filter((v, i, arr) => arr.indexOf(v) === i)
return (
<div className="p-2 border-l-2" style={{ borderColor: color }}>
<div className="flex items-baseline gap-2 text-xs">
<span style={{ color }} className="font-semibold">{f.severity}</span>
<code className="text-gray-500">{f.check_id}</code>
{sourceTags.map(t => (
<span key={t} className="px-1 bg-gray-100 rounded text-[10px]">{t}</span>
))}
</div>
<div className="text-sm">{f.title}</div>
{f.norm && <div className="text-[11px] text-gray-500">{f.norm}</div>}
{f.evidence && (
<div className="text-[11px] italic text-gray-600 mt-1">{f.evidence}"</div>
)}
{f.action && (
<div className="text-[11px] text-emerald-700 mt-1">
{f.action}
</div>
)}
</div>
)
}
function severityColor(sev: string) {
return sev === 'HIGH' ? 'text-red-600 font-semibold' :
sev === 'MEDIUM' ? 'text-amber-600 font-semibold' :
sev === 'LOW' ? 'text-blue-600' : 'text-gray-600'
}
function severityHex(sev: string) {
return sev === 'HIGH' ? '#dc2626' :
sev === 'MEDIUM' ? '#f59e0b' :
sev === 'LOW' ? '#3b82f6' : '#94a3b8'
}
+4 -1
View File
@@ -5,13 +5,15 @@ import { ScanResult } from './_components/ScanResult'
import { ComplianceCheckTab } from './_components/ComplianceCheckTab'
import { BannerCheckTab } from './_components/BannerCheckTab'
import { ComplianceFAQ } from './_components/ComplianceFAQ'
import { AgentTestTab } from './_components/AgentTestTab'
type AnalysisTab = 'scan' | 'compliance-check' | 'banner-check'
type AnalysisTab = 'scan' | 'compliance-check' | 'banner-check' | 'agent-test'
const TABS: { id: AnalysisTab; label: string; desc: string }[] = [
{ id: 'scan', label: 'Website-Scan', desc: 'Rechtliche Dokumente finden + Dienstleister erkennen' },
{ id: 'compliance-check', label: 'Compliance-Check', desc: 'Alle rechtlichen Dokumente zusammen pruefen' },
{ id: 'banner-check', label: 'Banner-Check', desc: 'Cookie-Banner auf DSGVO-Konformitaet testen' },
{ id: 'agent-test', label: 'Agent-Test', desc: 'Specialist-Agent gegen 5 URLs isoliert testen' },
]
export default function AgentPage() {
@@ -186,6 +188,7 @@ export default function AgentPage() {
{tab === 'compliance-check' && <ComplianceCheckTab />}
{tab === 'banner-check' && <BannerCheckTab />}
{tab === 'agent-test' && <AgentTestTab />}
<ComplianceFAQ />
</div>
@@ -74,6 +74,7 @@ _ROUTER_MODULES = [
"founding_wizard_routes",
"licenses_routes",
"template_rule_routes",
"specialist_agent_routes",
]
_loaded_count = 0
@@ -0,0 +1,378 @@
"""SSE-Endpoint für den Agent-Test-Harness.
User-Vorgabe 2026-06-08: pro Agent isoliert testen mit z.B. 5 URLs
gleichzeitig. Live-Stream der Events ins Frontend.
Endpoints:
GET /specialist-agent/agents
POST /specialist-agent/test/start { agent_id, urls }
GET /specialist-agent/test/stream/{run_id} → SSE-Stream
GET /specialist-agent/run/{run_id}/artifacts
GET /specialist-agent/run/{run_id}/artifact/{relpath}
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import uuid
from collections.abc import AsyncGenerator
from typing import Any
import httpx
from fastapi import APIRouter, HTTPException
from fastapi.responses import FileResponse, StreamingResponse
from pydantic import BaseModel, Field
from compliance.services.specialist_agents import REGISTRY, AgentInput
from compliance.services.specialist_agents._evidence_vault import (
EvidenceVault,
delete_run as vault_delete_run,
list_runs as vault_list_runs,
)
logger = logging.getLogger(__name__)
CONSENT_TESTER_URL = os.environ.get(
"CONSENT_TESTER_URL",
"http://bp-compliance-consent-tester:8094",
)
router = APIRouter(prefix="/specialist-agent", tags=["specialist-agent"])
# In-memory event-queues pro run_id. Restart-fragil aber für ein
# Live-Test-Tool ausreichend (keine Persistenz nötig).
_run_queues: dict[str, asyncio.Queue] = {}
_run_states: dict[str, dict[str, Any]] = {}
class TestStartRequest(BaseModel):
agent_id: str
urls: list[str] = Field(default_factory=list, max_length=10)
raw_texts: list[str] = Field(default_factory=list, max_length=10)
business_scope: list[str] = Field(default_factory=list)
company_name: str = ""
origin_domain: str = ""
class TestStartResponse(BaseModel):
run_id: str
agent_id: str
slot_count: int
@router.get("/agents")
async def list_agents() -> dict[str, Any]:
"""Liefert die registrierten Specialist-Agenten."""
return {"agents": REGISTRY.list_agents()}
@router.post("/test/start", response_model=TestStartResponse)
async def start_test(req: TestStartRequest) -> TestStartResponse:
"""Startet einen Multi-URL-Test gegen einen Agent.
Liefert eine run_id zurück. Der Frontend-Client öffnet danach
einen SSE-Stream auf /test/stream/{run_id} um Events zu empfangen.
"""
agent = REGISTRY.get(req.agent_id)
if agent is None:
raise HTTPException(404, f"agent '{req.agent_id}' nicht registriert")
slots = max(len(req.urls), len(req.raw_texts))
if slots == 0:
raise HTTPException(400, "urls oder raw_texts dürfen nicht leer sein")
run_id = uuid.uuid4().hex[:16]
queue: asyncio.Queue = asyncio.Queue(maxsize=500)
_run_queues[run_id] = queue
_run_states[run_id] = {
"agent_id": req.agent_id,
"started": False,
"finished": False,
"slot_count": slots,
"results": {},
}
vault = EvidenceVault(agent.agent_id, agent.agent_version,
run_id=run_id)
asyncio.create_task(_run_test_orchestrator(run_id, req, vault))
return TestStartResponse(
run_id=run_id,
agent_id=req.agent_id,
slot_count=slots,
)
@router.get("/test/stream/{run_id}")
async def stream_test(run_id: str) -> StreamingResponse:
"""SSE-Stream der Events für einen laufenden Test."""
if run_id not in _run_queues:
raise HTTPException(404, "run_id unbekannt")
return StreamingResponse(
_event_generator(run_id),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no", # nginx
"Connection": "keep-alive",
},
)
async def _event_generator(run_id: str) -> AsyncGenerator[str, None]:
"""Reads events from the queue until the run is finished."""
queue = _run_queues[run_id]
# Initial hello
yield _format_sse({"type": "hello", "run_id": run_id})
try:
while True:
try:
event = await asyncio.wait_for(queue.get(), timeout=30.0)
except asyncio.TimeoutError:
# heartbeat
yield _format_sse({"type": "heartbeat"})
if _run_states.get(run_id, {}).get("finished"):
yield _format_sse({"type": "stream_close"})
return
continue
yield _format_sse(event)
if event.get("type") in ("run_complete", "run_error"):
yield _format_sse({"type": "stream_close"})
return
finally:
# Defer cleanup: keep state for 5 min so late GETs can read
# results from _run_states. The queue can be released earlier.
asyncio.get_event_loop().call_later(
300, lambda: _run_queues.pop(run_id, None),
)
def _format_sse(payload: dict) -> str:
"""SSE event line format."""
return f"data: {json.dumps(payload, default=str)}\n\n"
async def _emit(run_id: str, event: dict) -> None:
q = _run_queues.get(run_id)
if q is None:
return
try:
await q.put(event)
except Exception:
pass
async def _run_test_orchestrator(
run_id: str,
req: TestStartRequest,
vault: EvidenceVault,
) -> None:
"""Kernlogik: pro URL / raw_text parallel den Agent feuern."""
agent = REGISTRY.get(req.agent_id)
if agent is None:
await _emit(run_id, {"type": "run_error",
"error": "agent gone"})
return
_run_states[run_id]["started"] = True
await _emit(run_id, {
"type": "run_started",
"agent_id": agent.agent_id,
"agent_version": agent.agent_version,
"slot_count": _run_states[run_id]["slot_count"],
})
slot_jobs: list[asyncio.Task] = []
# URLs first, then raw_texts. Slots numbered url1, url2, …, text1, …
for i, url in enumerate(req.urls, start=1):
slot = f"url{i}"
slot_jobs.append(asyncio.create_task(
_process_slot(run_id, slot, agent, url, "", req, vault),
))
for j, raw in enumerate(req.raw_texts, start=1):
slot = f"text{j}"
slot_jobs.append(asyncio.create_task(
_process_slot(run_id, slot, agent, "", raw, req, vault),
))
try:
await asyncio.gather(*slot_jobs, return_exceptions=True)
finally:
manifest = vault.finalize()
_run_states[run_id]["finished"] = True
await _emit(run_id, {
"type": "run_complete",
"vault_url": vault.url(),
"manifest_asset_count": len(manifest.get("assets") or []),
})
async def _process_slot(
run_id: str,
slot: str,
agent,
url: str,
raw_text: str,
req: TestStartRequest,
vault: EvidenceVault,
) -> None:
"""Holt den Text (URL oder raw), ruft Agent, vault-speichert Output."""
label = url or f"text-slot-{slot}"
await _emit(run_id, {"type": "slot_started", "slot": slot,
"label": label})
text = raw_text
fetch_err = ""
if url and not raw_text:
await _emit(run_id, {"type": "slot_fetching",
"slot": slot, "url": url})
text, fetch_err = await _fetch_text(url)
if fetch_err:
await _emit(run_id, {
"type": "slot_fetch_error",
"slot": slot,
"error": fetch_err,
})
if text:
vault.put_bytes("raw", slot, "source.txt",
text.encode("utf-8"),
mime="text/plain")
await _emit(run_id, {
"type": "slot_text_ready",
"slot": slot,
"char_count": len(text),
})
agent_input = AgentInput(
doc_type=agent.doc_type,
text=text,
url=url,
business_scope=req.business_scope,
company_name=req.company_name,
origin_domain=req.origin_domain,
)
await _emit(run_id, {"type": "slot_agent_running", "slot": slot})
try:
output = await agent.evaluate(agent_input)
except Exception as e:
logger.exception("agent crashed slot=%s", slot)
await _emit(run_id, {
"type": "slot_agent_error", "slot": slot,
"error": f"{type(e).__name__}: {str(e)[:160]}",
})
return
# Persist findings as JSON in vault
vault.put_json("finding", slot, "output.json",
json.loads(output.model_dump_json()))
# Update state for later /artifacts query
_run_states[run_id]["results"][slot] = json.loads(
output.model_dump_json(),
)
# Stream finding-emitted events
for f in output.findings:
await _emit(run_id, {
"type": "finding",
"slot": slot,
"check_id": f.check_id,
"severity": f.severity,
"title": f.title,
"field_id": f.field_id,
})
for esc in output.escalation_log:
await _emit(run_id, {
"type": "escalation",
"slot": slot,
"stage": esc.stage,
"model": esc.model,
"success": esc.success,
"duration_ms": esc.duration_ms,
})
await _emit(run_id, {
"type": "slot_complete",
"slot": slot,
"duration_ms": output.duration_ms,
"mc_total": output.mc_total,
"mc_ok": output.mc_ok,
"mc_na": output.mc_na,
"mc_high": output.mc_high,
"mc_medium": output.mc_medium,
"mc_low": output.mc_low,
"findings_count": len(output.findings),
"recommendations_count": len(output.recommendations),
"confidence": output.confidence,
})
async def _fetch_text(url: str) -> tuple[str, str]:
"""Nutzt den consent-tester DSI-Discovery für Volltext."""
try:
async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post(
f"{CONSENT_TESTER_URL}/dsi-discovery",
json={"url": url, "max_documents": 5},
timeout=120.0,
)
if resp.status_code != 200:
return "", f"HTTP {resp.status_code}"
data = resp.json()
docs = data.get("documents", []) or []
if not docs:
return "", "no documents discovered"
texts: list[str] = []
for doc in docs:
t = (doc.get("full_text", "") or
doc.get("text_preview", "") or "")
if t and len(t) > 50:
texts.append(t)
return "\n\n".join(texts), ""
except Exception as e:
return "", f"{type(e).__name__}: {str(e)[:160]}"
# ── Run / Vault Queries ──────────────────────────────────────────────
@router.get("/run/{run_id}/result")
async def get_run_result(run_id: str) -> dict[str, Any]:
"""Komplette Ergebnisse eines Runs (für Frontend-Refresh)."""
state = _run_states.get(run_id)
if state is None:
raise HTTPException(404, "run unbekannt")
return {
"run_id": run_id,
"agent_id": state["agent_id"],
"finished": state["finished"],
"results": state["results"],
"vault_url": f"/api/v1/specialist-agent/run/{run_id}/artifacts",
}
@router.get("/run/{run_id}/artifacts")
async def list_run_artifacts(run_id: str) -> dict[str, Any]:
"""Listet die Assets eines Runs."""
vault = EvidenceVault("?", "?", run_id=run_id)
return {
"run_id": run_id,
"manifest": vault._manifest,
}
@router.get("/run/{run_id}/artifact/{path:path}")
async def get_run_artifact(run_id: str, path: str):
"""Liefert ein einzelnes Artefakt aus dem Vault."""
vault = EvidenceVault("?", "?", run_id=run_id)
p = vault.asset_path(path)
if p is None:
raise HTTPException(404, "asset not found")
return FileResponse(str(p))
@router.delete("/run/{run_id}")
async def delete_run(run_id: str) -> dict[str, bool]:
"""DSR Art. 17: löscht den ganzen Run + Vault."""
deleted_vault = vault_delete_run(run_id)
_run_queues.pop(run_id, None)
_run_states.pop(run_id, None)
return {"deleted": deleted_vault}
@router.get("/runs")
async def list_runs(limit: int = 20) -> dict[str, Any]:
"""Listet die letzten Runs im Vault."""
return {"runs": vault_list_runs(limit)}
@@ -0,0 +1,129 @@
"""Tests für SSE-Endpoints des Specialist-Agent-Test-Harness."""
from __future__ import annotations
import asyncio
import json
from unittest.mock import AsyncMock, patch
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def app(tmp_path, monkeypatch):
monkeypatch.setenv("EVIDENCE_VAULT_ROOT", str(tmp_path / "vault"))
from fastapi import FastAPI
from compliance.api.specialist_agent_routes import router
app = FastAPI()
app.include_router(router, prefix="/api/v1")
return app
@pytest.fixture
def client(app):
return TestClient(app)
def test_list_agents(client):
r = client.get("/api/v1/specialist-agent/agents")
assert r.status_code == 200
data = r.json()
agent_ids = {a["agent_id"] for a in data["agents"]}
assert "impressum" in agent_ids
assert "cookie_policy" in agent_ids
def test_start_test_invalid_agent(client):
r = client.post("/api/v1/specialist-agent/test/start",
json={"agent_id": "ghost",
"raw_texts": ["test"]})
assert r.status_code == 404
def test_start_test_no_input(client):
r = client.post("/api/v1/specialist-agent/test/start",
json={"agent_id": "impressum"})
assert r.status_code == 400
def test_start_test_with_raw_text(client):
r = client.post("/api/v1/specialist-agent/test/start",
json={"agent_id": "impressum",
"raw_texts": ["Tesla Germany GmbH "
"Berlin Email: x@y.com "
"HRB 123 Charlottenburg"]})
assert r.status_code == 200
data = r.json()
assert data["agent_id"] == "impressum"
assert data["slot_count"] == 1
assert data["run_id"]
def test_stream_unknown_run(client):
r = client.get("/api/v1/specialist-agent/test/stream/ghost")
assert r.status_code == 404
def test_run_result_after_text_input(client, monkeypatch):
# Skip LLM
async def _no_cascade(*a, **kw): return None, []
monkeypatch.setattr(
"compliance.services.specialist_agents.impressum.agent.cascade",
_no_cascade,
)
r = client.post("/api/v1/specialist-agent/test/start",
json={"agent_id": "impressum",
"raw_texts": [
"Tesla Germany GmbH\nLudwig-Prandtl-Strasse 25\n"
"12526 Berlin\nDeutschland\nEmail: x@y.com\n"
"Tel: +49 89 1250 16 800\n"
"Management: Elon Musk\n"
"HRB 218904 B Charlottenburg",
]})
run_id = r.json()["run_id"]
# Give async task time to finish (small text → fast)
for _ in range(40):
rr = client.get(
f"/api/v1/specialist-agent/run/{run_id}/result",
)
if rr.json().get("finished"):
break
import time; time.sleep(0.05)
res = client.get(f"/api/v1/specialist-agent/run/{run_id}/result")
body = res.json()
assert body["finished"]
assert "text1" in body["results"]
out = body["results"]["text1"]
field_ids = {f["field_id"] for f in out["findings"]}
# Tesla pattern: German-label fehlt + USt fehlt
assert "vertretungsberechtigte_label_korrekt" in field_ids
def test_artifacts_listing(client, monkeypatch):
async def _no_cascade(*a, **kw): return None, []
monkeypatch.setattr(
"compliance.services.specialist_agents.impressum.agent.cascade",
_no_cascade,
)
r = client.post("/api/v1/specialist-agent/test/start",
json={"agent_id": "impressum",
"raw_texts": ["Tesla Germany GmbH "
"Berlin Email: x@y.com "
"HRB 123 Charlottenburg"]})
run_id = r.json()["run_id"]
for _ in range(40):
rr = client.get(
f"/api/v1/specialist-agent/run/{run_id}/result",
)
if rr.json().get("finished"):
break
import time; time.sleep(0.05)
arts = client.get(
f"/api/v1/specialist-agent/run/{run_id}/artifacts",
)
assert arts.status_code == 200
manifest = arts.json()["manifest"]
kinds = {a["kind"] for a in manifest["assets"]}
assert "finding" in kinds
assert "raw" in kinds