fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions

View File

@@ -0,0 +1,11 @@
"""
Test Runners
Spezialisierte Runner fuer verschiedene Test-Frameworks.
"""
from .go_runner import GoTestRunner
from .python_runner import PytestRunner
from .bqas_runner import BQASRunner
__all__ = ["GoTestRunner", "PytestRunner", "BQASRunner"]

View File

@@ -0,0 +1,285 @@
"""
BQAS Test Runner
Proxy zu den BQAS-Endpoints im Voice-Service.
"""
import httpx
from datetime import datetime
from typing import Dict, Optional
from dataclasses import dataclass, field
@dataclass
class BQASResult:
"""Ergebnis eines BQAS-Test-Runs"""
suite_type: str # "golden", "rag", "synthetic"
total_tests: int = 0
passed_tests: int = 0
failed_tests: int = 0
avg_score: float = 0.0
duration_seconds: float = 0.0
metrics: Dict = field(default_factory=dict)
failed_test_ids: list = field(default_factory=list)
raw_output: str = ""
class BQASRunner:
"""
Runner fuer BQAS-Tests.
Leitet Anfragen an den Voice-Service weiter (Port 8091).
"""
VOICE_SERVICE_URL = "http://localhost:8091"
def __init__(self, api_base: Optional[str] = None):
self.api_base = api_base or self.VOICE_SERVICE_URL
async def run_golden(self, timeout: int = 120) -> BQASResult:
"""
Fuehrt die Golden Test Suite aus.
Returns:
BQASResult mit allen Metriken
"""
return await self._run_suite("golden", timeout)
async def run_rag(self, timeout: int = 120) -> BQASResult:
"""
Fuehrt die RAG Test Suite aus.
Returns:
BQASResult mit allen Metriken
"""
return await self._run_suite("rag", timeout)
async def run_synthetic(self, timeout: int = 300) -> BQASResult:
"""
Fuehrt die Synthetic Test Suite aus.
Dauert laenger wegen LLM-Generierung.
Returns:
BQASResult mit allen Metriken
"""
return await self._run_suite("synthetic", timeout)
async def _run_suite(self, suite_type: str, timeout: int) -> BQASResult:
"""Interne Methode zum Ausfuehren einer Suite"""
start_time = datetime.now()
try:
async with httpx.AsyncClient(timeout=float(timeout)) as client:
response = await client.post(
f"{self.api_base}/api/v1/bqas/run/{suite_type}",
)
if response.status_code == 200:
data = response.json()
metrics = data.get("metrics", {})
return BQASResult(
suite_type=suite_type,
total_tests=metrics.get("total_tests", 0),
passed_tests=metrics.get("passed_tests", 0),
failed_tests=metrics.get("failed_tests", 0),
avg_score=metrics.get("avg_composite_score", 0.0),
duration_seconds=(datetime.now() - start_time).total_seconds(),
metrics=metrics,
failed_test_ids=metrics.get("failed_test_ids", []),
raw_output=str(data),
)
else:
return BQASResult(
suite_type=suite_type,
raw_output=f"HTTP {response.status_code}: {response.text}",
)
except httpx.TimeoutException:
return BQASResult(
suite_type=suite_type,
duration_seconds=(datetime.now() - start_time).total_seconds(),
raw_output=f"Timeout nach {timeout} Sekunden",
)
except httpx.ConnectError:
# Demo-Daten wenn Service nicht erreichbar
return self._get_demo_result(suite_type)
except Exception as e:
return BQASResult(
suite_type=suite_type,
duration_seconds=(datetime.now() - start_time).total_seconds(),
raw_output=str(e),
)
def _get_demo_result(self, suite_type: str) -> BQASResult:
"""Gibt Demo-Daten zurueck wenn Service nicht erreichbar"""
if suite_type == "golden":
return BQASResult(
suite_type=suite_type,
total_tests=97,
passed_tests=89,
failed_tests=8,
avg_score=4.15,
duration_seconds=45.2,
metrics={
"avg_intent_accuracy": 91.7,
"avg_faithfulness": 4.2,
"avg_relevance": 4.1,
"avg_coherence": 4.3,
"safety_pass_rate": 0.98,
},
failed_test_ids=["GT-023", "GT-045", "GT-067", "GT-072", "GT-081", "GT-089", "GT-092", "GT-095"],
raw_output="Demo-Modus: Voice-Service nicht erreichbar",
)
elif suite_type == "rag":
return BQASResult(
suite_type=suite_type,
total_tests=20,
passed_tests=18,
failed_tests=2,
avg_score=4.25,
duration_seconds=62.1,
metrics={
"avg_faithfulness": 4.3,
"avg_relevance": 4.2,
"citation_accuracy": 0.92,
},
failed_test_ids=["RAG-EH-003", "RAG-HAL-002"],
raw_output="Demo-Modus: Voice-Service nicht erreichbar",
)
else: # synthetic
return BQASResult(
suite_type=suite_type,
total_tests=50,
passed_tests=45,
failed_tests=5,
avg_score=3.95,
duration_seconds=180.5,
metrics={
"avg_robustness": 3.8,
"avg_coherence": 4.1,
},
failed_test_ids=["SYN-001", "SYN-015", "SYN-023", "SYN-041", "SYN-048"],
raw_output="Demo-Modus: Voice-Service nicht erreichbar",
)
async def get_latest_metrics(self) -> Optional[Dict]:
"""
Holt die neuesten Metriken aus dem Voice-Service.
Returns:
Dict mit allen Metriken oder None
"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.api_base}/api/v1/bqas/latest-metrics",
)
if response.status_code == 200:
return response.json()
except Exception:
pass
# Demo-Daten
return {
"golden": {
"total_tests": 97,
"passed_tests": 89,
"failed_tests": 8,
"avg_composite_score": 4.15,
"last_run": datetime.now().isoformat(),
},
"rag": {
"total_tests": 20,
"passed_tests": 18,
"failed_tests": 2,
"avg_composite_score": 4.25,
"last_run": datetime.now().isoformat(),
},
"synthetic": None,
}
async def get_trend(self, days: int = 30) -> Optional[Dict]:
"""
Holt Trend-Daten.
Args:
days: Anzahl der Tage
Returns:
Dict mit Trend-Daten oder None
"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.api_base}/api/v1/bqas/trend",
params={"days": days},
)
if response.status_code == 200:
return response.json()
except Exception:
pass
# Demo-Daten
return {
"dates": ["2026-01-02", "2026-01-09", "2026-01-16", "2026-01-23", "2026-01-30"],
"scores": [3.9, 4.0, 4.1, 4.15, 4.15],
"trend": "improving",
}
async def get_runs(self, limit: int = 20) -> list:
"""
Holt die letzten Test-Runs.
Args:
limit: Maximale Anzahl
Returns:
Liste von Test-Runs
"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.api_base}/api/v1/bqas/runs",
params={"limit": limit},
)
if response.status_code == 200:
data = response.json()
return data.get("runs", [])
except Exception:
pass
# Demo-Daten
return [
{
"id": 1,
"timestamp": "2026-01-30T07:00:00Z",
"git_commit": "abc1234",
"golden_score": 4.15,
"total_tests": 97,
"passed_tests": 89,
"failed_tests": 8,
"duration_seconds": 45.2,
},
{
"id": 2,
"timestamp": "2026-01-29T07:00:00Z",
"git_commit": "def5678",
"golden_score": 4.12,
"total_tests": 97,
"passed_tests": 88,
"failed_tests": 9,
"duration_seconds": 44.8,
},
]

View File

@@ -0,0 +1,229 @@
"""
Go Test Runner
Fuehrt Go-Tests aus und parsed die Ergebnisse.
"""
import subprocess
import json
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
from dataclasses import dataclass, field
@dataclass
class GoTestResult:
"""Ergebnis eines einzelnen Go-Tests"""
package: str
test_name: str
passed: bool
duration_seconds: float
output: str = ""
@dataclass
class GoTestSummary:
"""Zusammenfassung eines Go-Test-Runs"""
total: int = 0
passed: int = 0
failed: int = 0
skipped: int = 0
duration_seconds: float = 0.0
coverage_percent: Optional[float] = None
results: List[GoTestResult] = field(default_factory=list)
raw_output: str = ""
class GoTestRunner:
"""
Runner fuer Go-Tests.
Verwendet `go test -json` fuer strukturierte Ausgabe.
"""
def __init__(self, base_path: Path):
self.base_path = base_path
async def run(self, with_coverage: bool = True, timeout: int = 300) -> GoTestSummary:
"""
Fuehrt Go-Tests aus.
Args:
with_coverage: Coverage erfassen
timeout: Timeout in Sekunden
Returns:
GoTestSummary mit allen Ergebnissen
"""
if not self.base_path.exists():
return GoTestSummary(raw_output="Pfad existiert nicht")
cmd = ["go", "test", "-v", "-json"]
if with_coverage:
cmd.extend(["-cover", "-coverprofile=coverage.out"])
cmd.append("./...")
try:
result = subprocess.run(
cmd,
cwd=str(self.base_path),
capture_output=True,
text=True,
timeout=timeout,
)
return self._parse_output(result.stdout, result.stderr)
except subprocess.TimeoutExpired:
return GoTestSummary(raw_output=f"Timeout nach {timeout} Sekunden")
except FileNotFoundError:
return GoTestSummary(raw_output="Go nicht installiert")
except Exception as e:
return GoTestSummary(raw_output=str(e))
def _parse_output(self, stdout: str, stderr: str) -> GoTestSummary:
"""Parsed die JSON-Ausgabe von go test"""
summary = GoTestSummary(raw_output=stdout[:10000] if stdout else stderr[:10000])
current_test: Dict[str, str] = {}
test_outputs: Dict[str, List[str]] = {}
for line in stdout.split("\n"):
if not line.strip():
continue
try:
event = json.loads(line)
action = event.get("Action")
package = event.get("Package", "")
test = event.get("Test", "")
elapsed = event.get("Elapsed", 0)
output = event.get("Output", "")
# Test-Output sammeln
if test and output:
key = f"{package}:{test}"
if key not in test_outputs:
test_outputs[key] = []
test_outputs[key].append(output)
# Test-Ergebnis
if action == "pass" and test:
summary.passed += 1
summary.total += 1
summary.results.append(GoTestResult(
package=package,
test_name=test,
passed=True,
duration_seconds=elapsed,
output="".join(test_outputs.get(f"{package}:{test}", [])),
))
elif action == "fail" and test:
summary.failed += 1
summary.total += 1
summary.results.append(GoTestResult(
package=package,
test_name=test,
passed=False,
duration_seconds=elapsed,
output="".join(test_outputs.get(f"{package}:{test}", [])),
))
elif action == "skip" and test:
summary.skipped += 1
summary.total += 1
# Package-Ergebnis (Gesamtdauer)
elif action in ["pass", "fail"] and not test and elapsed:
summary.duration_seconds = max(summary.duration_seconds, elapsed)
except json.JSONDecodeError:
# Nicht-JSON-Zeilen ignorieren (z.B. Coverage-Output)
if "coverage:" in line.lower():
# z.B. "coverage: 75.2% of statements"
try:
parts = line.split("coverage:")
if len(parts) > 1:
percent_str = parts[1].strip().split("%")[0]
summary.coverage_percent = float(percent_str)
except (ValueError, IndexError):
pass
return summary
async def run_single_test(self, test_name: str, timeout: int = 60) -> GoTestResult:
"""
Fuehrt einen einzelnen Test aus.
Args:
test_name: Name des Tests (z.B. "TestMyFunction")
timeout: Timeout in Sekunden
Returns:
GoTestResult fuer den spezifischen Test
"""
cmd = ["go", "test", "-v", "-run", test_name, "./..."]
try:
result = subprocess.run(
cmd,
cwd=str(self.base_path),
capture_output=True,
text=True,
timeout=timeout,
)
passed = "PASS" in result.stdout
return GoTestResult(
package=str(self.base_path),
test_name=test_name,
passed=passed,
duration_seconds=0.0,
output=result.stdout + result.stderr,
)
except Exception as e:
return GoTestResult(
package=str(self.base_path),
test_name=test_name,
passed=False,
duration_seconds=0.0,
output=str(e),
)
async def get_coverage_report(self) -> Optional[Dict]:
"""
Liest den Coverage-Bericht.
Returns:
Dict mit Coverage-Details oder None
"""
coverage_file = self.base_path / "coverage.out"
if not coverage_file.exists():
return None
try:
result = subprocess.run(
["go", "tool", "cover", "-func=coverage.out"],
cwd=str(self.base_path),
capture_output=True,
text=True,
)
# Parse "total:" Zeile
for line in result.stdout.split("\n"):
if "total:" in line:
parts = line.split()
if len(parts) >= 3:
percent_str = parts[-1].replace("%", "")
return {
"total_coverage": float(percent_str),
"raw_output": result.stdout,
}
except Exception:
pass
return None

View File

@@ -0,0 +1,266 @@
"""
Python Test Runner (pytest)
Fuehrt Python-Tests aus und parsed die Ergebnisse.
"""
import subprocess
import json
import re
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
from dataclasses import dataclass, field
@dataclass
class PytestResult:
"""Ergebnis eines einzelnen pytest-Tests"""
node_id: str
test_name: str
file_path: str
passed: bool
duration_seconds: float
output: str = ""
error_message: Optional[str] = None
@dataclass
class PytestSummary:
"""Zusammenfassung eines pytest-Runs"""
total: int = 0
passed: int = 0
failed: int = 0
skipped: int = 0
errors: int = 0
duration_seconds: float = 0.0
coverage_percent: Optional[float] = None
results: List[PytestResult] = field(default_factory=list)
raw_output: str = ""
class PytestRunner:
"""
Runner fuer Python-Tests mit pytest.
Verwendet `pytest --json-report` fuer strukturierte Ausgabe.
"""
def __init__(self, base_path: Path, venv_path: Optional[Path] = None):
self.base_path = base_path
self.venv_path = venv_path
def _get_python_cmd(self) -> str:
"""Gibt den Python-Befehl zurueck (aus venv wenn vorhanden)"""
if self.venv_path and (self.venv_path / "bin" / "python").exists():
return str(self.venv_path / "bin" / "python")
return "python"
async def run(self, with_coverage: bool = True, timeout: int = 300) -> PytestSummary:
"""
Fuehrt pytest aus.
Args:
with_coverage: Coverage erfassen mit pytest-cov
timeout: Timeout in Sekunden
Returns:
PytestSummary mit allen Ergebnissen
"""
if not self.base_path.exists():
return PytestSummary(raw_output="Pfad existiert nicht")
python_cmd = self._get_python_cmd()
cmd = [python_cmd, "-m", "pytest", "-v", "--tb=short"]
if with_coverage:
cmd.extend(["--cov=.", "--cov-report=term-missing"])
cmd.append(str(self.base_path))
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
)
return self._parse_output(result.stdout, result.stderr)
except subprocess.TimeoutExpired:
return PytestSummary(raw_output=f"Timeout nach {timeout} Sekunden")
except FileNotFoundError:
return PytestSummary(raw_output="Python/pytest nicht installiert")
except Exception as e:
return PytestSummary(raw_output=str(e))
def _parse_output(self, stdout: str, stderr: str) -> PytestSummary:
"""Parsed die pytest-Ausgabe"""
output = stdout + stderr
summary = PytestSummary(raw_output=output[:10000])
# Teste-Zeilen parsen (z.B. "test_file.py::test_name PASSED")
test_pattern = re.compile(r"([\w/]+\.py)::(\w+)(?:\[.+\])?\s+(PASSED|FAILED|SKIPPED|ERROR)")
for match in test_pattern.finditer(output):
file_path, test_name, status = match.groups()
result = PytestResult(
node_id=f"{file_path}::{test_name}",
test_name=test_name,
file_path=file_path,
passed=status == "PASSED",
duration_seconds=0.0,
)
summary.results.append(result)
if status == "PASSED":
summary.passed += 1
elif status == "FAILED":
summary.failed += 1
elif status == "SKIPPED":
summary.skipped += 1
elif status == "ERROR":
summary.errors += 1
summary.total = len(summary.results)
# Zusammenfassung parsen (z.B. "5 passed, 2 failed in 3.45s")
summary_pattern = re.compile(
r"=+\s*(?:(\d+)\s+passed)?[,\s]*(?:(\d+)\s+failed)?[,\s]*(?:(\d+)\s+skipped)?[,\s]*(?:(\d+)\s+error)?.*?in\s+([\d.]+)s"
)
match = summary_pattern.search(output)
if match:
if match.group(1):
summary.passed = int(match.group(1))
if match.group(2):
summary.failed = int(match.group(2))
if match.group(3):
summary.skipped = int(match.group(3))
if match.group(4):
summary.errors = int(match.group(4))
if match.group(5):
summary.duration_seconds = float(match.group(5))
summary.total = summary.passed + summary.failed + summary.skipped + summary.errors
# Coverage parsen (z.B. "TOTAL 1234 567 54%")
coverage_pattern = re.compile(r"TOTAL\s+\d+\s+\d+\s+(\d+)%")
coverage_match = coverage_pattern.search(output)
if coverage_match:
summary.coverage_percent = float(coverage_match.group(1))
return summary
async def run_single_test(self, test_path: str, timeout: int = 60) -> PytestResult:
"""
Fuehrt einen einzelnen Test aus.
Args:
test_path: Pfad zum Test (z.B. "test_file.py::test_name")
timeout: Timeout in Sekunden
Returns:
PytestResult fuer den spezifischen Test
"""
python_cmd = self._get_python_cmd()
cmd = [python_cmd, "-m", "pytest", "-v", test_path]
try:
result = subprocess.run(
cmd,
cwd=str(self.base_path),
capture_output=True,
text=True,
timeout=timeout,
)
passed = "passed" in result.stdout.lower() and "failed" not in result.stdout.lower()
return PytestResult(
node_id=test_path,
test_name=test_path.split("::")[-1] if "::" in test_path else test_path,
file_path=test_path.split("::")[0] if "::" in test_path else test_path,
passed=passed,
duration_seconds=0.0,
output=result.stdout + result.stderr,
)
except Exception as e:
return PytestResult(
node_id=test_path,
test_name=test_path,
file_path="",
passed=False,
duration_seconds=0.0,
output=str(e),
)
async def get_coverage_report(self, format: str = "term") -> Optional[Dict]:
"""
Generiert einen Coverage-Bericht.
Args:
format: "term", "html", oder "xml"
Returns:
Dict mit Coverage-Details oder None
"""
python_cmd = self._get_python_cmd()
cmd = [python_cmd, "-m", "pytest", "--cov=.", f"--cov-report={format}"]
try:
result = subprocess.run(
cmd,
cwd=str(self.base_path),
capture_output=True,
text=True,
timeout=120,
)
# Parse "TOTAL" Zeile
coverage_pattern = re.compile(r"TOTAL\s+\d+\s+\d+\s+(\d+)%")
match = coverage_pattern.search(result.stdout)
if match:
return {
"total_coverage": float(match.group(1)),
"format": format,
"raw_output": result.stdout,
}
except Exception:
pass
return None
async def list_tests(self) -> List[str]:
"""
Listet alle verfuegbaren Tests auf.
Returns:
Liste von Test-IDs
"""
python_cmd = self._get_python_cmd()
cmd = [python_cmd, "-m", "pytest", "--collect-only", "-q"]
try:
result = subprocess.run(
cmd,
cwd=str(self.base_path),
capture_output=True,
text=True,
timeout=30,
)
tests = []
for line in result.stdout.split("\n"):
line = line.strip()
if "::" in line and not line.startswith("<"):
tests.append(line)
return tests
except Exception:
return []