This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/api/tests/runners/bqas_runner.py
Benjamin Admin bfdaf63ba9 fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

286 lines
8.9 KiB
Python

"""
BQAS Test Runner
Proxy zu den BQAS-Endpoints im Voice-Service.
"""
import httpx
from datetime import datetime
from typing import Dict, Optional
from dataclasses import dataclass, field
@dataclass
class BQASResult:
"""Ergebnis eines BQAS-Test-Runs"""
suite_type: str # "golden", "rag", "synthetic"
total_tests: int = 0
passed_tests: int = 0
failed_tests: int = 0
avg_score: float = 0.0
duration_seconds: float = 0.0
metrics: Dict = field(default_factory=dict)
failed_test_ids: list = field(default_factory=list)
raw_output: str = ""
class BQASRunner:
"""
Runner fuer BQAS-Tests.
Leitet Anfragen an den Voice-Service weiter (Port 8091).
"""
VOICE_SERVICE_URL = "http://localhost:8091"
def __init__(self, api_base: Optional[str] = None):
self.api_base = api_base or self.VOICE_SERVICE_URL
async def run_golden(self, timeout: int = 120) -> BQASResult:
"""
Fuehrt die Golden Test Suite aus.
Returns:
BQASResult mit allen Metriken
"""
return await self._run_suite("golden", timeout)
async def run_rag(self, timeout: int = 120) -> BQASResult:
"""
Fuehrt die RAG Test Suite aus.
Returns:
BQASResult mit allen Metriken
"""
return await self._run_suite("rag", timeout)
async def run_synthetic(self, timeout: int = 300) -> BQASResult:
"""
Fuehrt die Synthetic Test Suite aus.
Dauert laenger wegen LLM-Generierung.
Returns:
BQASResult mit allen Metriken
"""
return await self._run_suite("synthetic", timeout)
async def _run_suite(self, suite_type: str, timeout: int) -> BQASResult:
"""Interne Methode zum Ausfuehren einer Suite"""
start_time = datetime.now()
try:
async with httpx.AsyncClient(timeout=float(timeout)) as client:
response = await client.post(
f"{self.api_base}/api/v1/bqas/run/{suite_type}",
)
if response.status_code == 200:
data = response.json()
metrics = data.get("metrics", {})
return BQASResult(
suite_type=suite_type,
total_tests=metrics.get("total_tests", 0),
passed_tests=metrics.get("passed_tests", 0),
failed_tests=metrics.get("failed_tests", 0),
avg_score=metrics.get("avg_composite_score", 0.0),
duration_seconds=(datetime.now() - start_time).total_seconds(),
metrics=metrics,
failed_test_ids=metrics.get("failed_test_ids", []),
raw_output=str(data),
)
else:
return BQASResult(
suite_type=suite_type,
raw_output=f"HTTP {response.status_code}: {response.text}",
)
except httpx.TimeoutException:
return BQASResult(
suite_type=suite_type,
duration_seconds=(datetime.now() - start_time).total_seconds(),
raw_output=f"Timeout nach {timeout} Sekunden",
)
except httpx.ConnectError:
# Demo-Daten wenn Service nicht erreichbar
return self._get_demo_result(suite_type)
except Exception as e:
return BQASResult(
suite_type=suite_type,
duration_seconds=(datetime.now() - start_time).total_seconds(),
raw_output=str(e),
)
def _get_demo_result(self, suite_type: str) -> BQASResult:
"""Gibt Demo-Daten zurueck wenn Service nicht erreichbar"""
if suite_type == "golden":
return BQASResult(
suite_type=suite_type,
total_tests=97,
passed_tests=89,
failed_tests=8,
avg_score=4.15,
duration_seconds=45.2,
metrics={
"avg_intent_accuracy": 91.7,
"avg_faithfulness": 4.2,
"avg_relevance": 4.1,
"avg_coherence": 4.3,
"safety_pass_rate": 0.98,
},
failed_test_ids=["GT-023", "GT-045", "GT-067", "GT-072", "GT-081", "GT-089", "GT-092", "GT-095"],
raw_output="Demo-Modus: Voice-Service nicht erreichbar",
)
elif suite_type == "rag":
return BQASResult(
suite_type=suite_type,
total_tests=20,
passed_tests=18,
failed_tests=2,
avg_score=4.25,
duration_seconds=62.1,
metrics={
"avg_faithfulness": 4.3,
"avg_relevance": 4.2,
"citation_accuracy": 0.92,
},
failed_test_ids=["RAG-EH-003", "RAG-HAL-002"],
raw_output="Demo-Modus: Voice-Service nicht erreichbar",
)
else: # synthetic
return BQASResult(
suite_type=suite_type,
total_tests=50,
passed_tests=45,
failed_tests=5,
avg_score=3.95,
duration_seconds=180.5,
metrics={
"avg_robustness": 3.8,
"avg_coherence": 4.1,
},
failed_test_ids=["SYN-001", "SYN-015", "SYN-023", "SYN-041", "SYN-048"],
raw_output="Demo-Modus: Voice-Service nicht erreichbar",
)
async def get_latest_metrics(self) -> Optional[Dict]:
"""
Holt die neuesten Metriken aus dem Voice-Service.
Returns:
Dict mit allen Metriken oder None
"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.api_base}/api/v1/bqas/latest-metrics",
)
if response.status_code == 200:
return response.json()
except Exception:
pass
# Demo-Daten
return {
"golden": {
"total_tests": 97,
"passed_tests": 89,
"failed_tests": 8,
"avg_composite_score": 4.15,
"last_run": datetime.now().isoformat(),
},
"rag": {
"total_tests": 20,
"passed_tests": 18,
"failed_tests": 2,
"avg_composite_score": 4.25,
"last_run": datetime.now().isoformat(),
},
"synthetic": None,
}
async def get_trend(self, days: int = 30) -> Optional[Dict]:
"""
Holt Trend-Daten.
Args:
days: Anzahl der Tage
Returns:
Dict mit Trend-Daten oder None
"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.api_base}/api/v1/bqas/trend",
params={"days": days},
)
if response.status_code == 200:
return response.json()
except Exception:
pass
# Demo-Daten
return {
"dates": ["2026-01-02", "2026-01-09", "2026-01-16", "2026-01-23", "2026-01-30"],
"scores": [3.9, 4.0, 4.1, 4.15, 4.15],
"trend": "improving",
}
async def get_runs(self, limit: int = 20) -> list:
"""
Holt die letzten Test-Runs.
Args:
limit: Maximale Anzahl
Returns:
Liste von Test-Runs
"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.api_base}/api/v1/bqas/runs",
params={"limit": limit},
)
if response.status_code == 200:
data = response.json()
return data.get("runs", [])
except Exception:
pass
# Demo-Daten
return [
{
"id": 1,
"timestamp": "2026-01-30T07:00:00Z",
"git_commit": "abc1234",
"golden_score": 4.15,
"total_tests": 97,
"passed_tests": 89,
"failed_tests": 8,
"duration_seconds": 45.2,
},
{
"id": 2,
"timestamp": "2026-01-29T07:00:00Z",
"git_commit": "def5678",
"golden_score": 4.12,
"total_tests": 97,
"passed_tests": 88,
"failed_tests": 9,
"duration_seconds": 44.8,
},
]