fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
@@ -0,0 +1,11 @@
+"""
+Test Runners
+
+Spezialisierte Runner fuer verschiedene Test-Frameworks.
+"""
+
+from .go_runner import GoTestRunner
+from .python_runner import PytestRunner
+from .bqas_runner import BQASRunner
+
+__all__ = ["GoTestRunner", "PytestRunner", "BQASRunner"]
@@ -0,0 +1,285 @@
+"""
+BQAS Test Runner
+
+Proxy zu den BQAS-Endpoints im Voice-Service.
+"""
+
+import httpx
+from datetime import datetime
+from typing import Dict, Optional
+from dataclasses import dataclass, field
+
+
+@dataclass
+class BQASResult:
+    """Ergebnis eines BQAS-Test-Runs"""
+    suite_type: str  # "golden", "rag", "synthetic"
+    total_tests: int = 0
+    passed_tests: int = 0
+    failed_tests: int = 0
+    avg_score: float = 0.0
+    duration_seconds: float = 0.0
+    metrics: Dict = field(default_factory=dict)
+    failed_test_ids: list = field(default_factory=list)
+    raw_output: str = ""
+
+
+class BQASRunner:
+    """
+    Runner fuer BQAS-Tests.
+
+    Leitet Anfragen an den Voice-Service weiter (Port 8091).
+    """
+
+    VOICE_SERVICE_URL = "http://localhost:8091"
+
+    def __init__(self, api_base: Optional[str] = None):
+        self.api_base = api_base or self.VOICE_SERVICE_URL
+
+    async def run_golden(self, timeout: int = 120) -> BQASResult:
+        """
+        Fuehrt die Golden Test Suite aus.
+
+        Returns:
+            BQASResult mit allen Metriken
+        """
+        return await self._run_suite("golden", timeout)
+
+    async def run_rag(self, timeout: int = 120) -> BQASResult:
+        """
+        Fuehrt die RAG Test Suite aus.
+
+        Returns:
+            BQASResult mit allen Metriken
+        """
+        return await self._run_suite("rag", timeout)
+
+    async def run_synthetic(self, timeout: int = 300) -> BQASResult:
+        """
+        Fuehrt die Synthetic Test Suite aus.
+        Dauert laenger wegen LLM-Generierung.
+
+        Returns:
+            BQASResult mit allen Metriken
+        """
+        return await self._run_suite("synthetic", timeout)
+
+    async def _run_suite(self, suite_type: str, timeout: int) -> BQASResult:
+        """Interne Methode zum Ausfuehren einer Suite"""
+        start_time = datetime.now()
+
+        try:
+            async with httpx.AsyncClient(timeout=float(timeout)) as client:
+                response = await client.post(
+                    f"{self.api_base}/api/v1/bqas/run/{suite_type}",
+                )
+
+                if response.status_code == 200:
+                    data = response.json()
+                    metrics = data.get("metrics", {})
+
+                    return BQASResult(
+                        suite_type=suite_type,
+                        total_tests=metrics.get("total_tests", 0),
+                        passed_tests=metrics.get("passed_tests", 0),
+                        failed_tests=metrics.get("failed_tests", 0),
+                        avg_score=metrics.get("avg_composite_score", 0.0),
+                        duration_seconds=(datetime.now() - start_time).total_seconds(),
+                        metrics=metrics,
+                        failed_test_ids=metrics.get("failed_test_ids", []),
+                        raw_output=str(data),
+                    )
+
+                else:
+                    return BQASResult(
+                        suite_type=suite_type,
+                        raw_output=f"HTTP {response.status_code}: {response.text}",
+                    )
+
+        except httpx.TimeoutException:
+            return BQASResult(
+                suite_type=suite_type,
+                duration_seconds=(datetime.now() - start_time).total_seconds(),
+                raw_output=f"Timeout nach {timeout} Sekunden",
+            )
+
+        except httpx.ConnectError:
+            # Demo-Daten wenn Service nicht erreichbar
+            return self._get_demo_result(suite_type)
+
+        except Exception as e:
+            return BQASResult(
+                suite_type=suite_type,
+                duration_seconds=(datetime.now() - start_time).total_seconds(),
+                raw_output=str(e),
+            )
+
+    def _get_demo_result(self, suite_type: str) -> BQASResult:
+        """Gibt Demo-Daten zurueck wenn Service nicht erreichbar"""
+        if suite_type == "golden":
+            return BQASResult(
+                suite_type=suite_type,
+                total_tests=97,
+                passed_tests=89,
+                failed_tests=8,
+                avg_score=4.15,
+                duration_seconds=45.2,
+                metrics={
+                    "avg_intent_accuracy": 91.7,
+                    "avg_faithfulness": 4.2,
+                    "avg_relevance": 4.1,
+                    "avg_coherence": 4.3,
+                    "safety_pass_rate": 0.98,
+                },
+                failed_test_ids=["GT-023", "GT-045", "GT-067", "GT-072", "GT-081", "GT-089", "GT-092", "GT-095"],
+                raw_output="Demo-Modus: Voice-Service nicht erreichbar",
+            )
+
+        elif suite_type == "rag":
+            return BQASResult(
+                suite_type=suite_type,
+                total_tests=20,
+                passed_tests=18,
+                failed_tests=2,
+                avg_score=4.25,
+                duration_seconds=62.1,
+                metrics={
+                    "avg_faithfulness": 4.3,
+                    "avg_relevance": 4.2,
+                    "citation_accuracy": 0.92,
+                },
+                failed_test_ids=["RAG-EH-003", "RAG-HAL-002"],
+                raw_output="Demo-Modus: Voice-Service nicht erreichbar",
+            )
+
+        else:  # synthetic
+            return BQASResult(
+                suite_type=suite_type,
+                total_tests=50,
+                passed_tests=45,
+                failed_tests=5,
+                avg_score=3.95,
+                duration_seconds=180.5,
+                metrics={
+                    "avg_robustness": 3.8,
+                    "avg_coherence": 4.1,
+                },
+                failed_test_ids=["SYN-001", "SYN-015", "SYN-023", "SYN-041", "SYN-048"],
+                raw_output="Demo-Modus: Voice-Service nicht erreichbar",
+            )
+
+    async def get_latest_metrics(self) -> Optional[Dict]:
+        """
+        Holt die neuesten Metriken aus dem Voice-Service.
+
+        Returns:
+            Dict mit allen Metriken oder None
+        """
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(
+                    f"{self.api_base}/api/v1/bqas/latest-metrics",
+                )
+
+                if response.status_code == 200:
+                    return response.json()
+
+        except Exception:
+            pass
+
+        # Demo-Daten
+        return {
+            "golden": {
+                "total_tests": 97,
+                "passed_tests": 89,
+                "failed_tests": 8,
+                "avg_composite_score": 4.15,
+                "last_run": datetime.now().isoformat(),
+            },
+            "rag": {
+                "total_tests": 20,
+                "passed_tests": 18,
+                "failed_tests": 2,
+                "avg_composite_score": 4.25,
+                "last_run": datetime.now().isoformat(),
+            },
+            "synthetic": None,
+        }
+
+    async def get_trend(self, days: int = 30) -> Optional[Dict]:
+        """
+        Holt Trend-Daten.
+
+        Args:
+            days: Anzahl der Tage
+
+        Returns:
+            Dict mit Trend-Daten oder None
+        """
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(
+                    f"{self.api_base}/api/v1/bqas/trend",
+                    params={"days": days},
+                )
+
+                if response.status_code == 200:
+                    return response.json()
+
+        except Exception:
+            pass
+
+        # Demo-Daten
+        return {
+            "dates": ["2026-01-02", "2026-01-09", "2026-01-16", "2026-01-23", "2026-01-30"],
+            "scores": [3.9, 4.0, 4.1, 4.15, 4.15],
+            "trend": "improving",
+        }
+
+    async def get_runs(self, limit: int = 20) -> list:
+        """
+        Holt die letzten Test-Runs.
+
+        Args:
+            limit: Maximale Anzahl
+
+        Returns:
+            Liste von Test-Runs
+        """
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.get(
+                    f"{self.api_base}/api/v1/bqas/runs",
+                    params={"limit": limit},
+                )
+
+                if response.status_code == 200:
+                    data = response.json()
+                    return data.get("runs", [])
+
+        except Exception:
+            pass
+
+        # Demo-Daten
+        return [
+            {
+                "id": 1,
+                "timestamp": "2026-01-30T07:00:00Z",
+                "git_commit": "abc1234",
+                "golden_score": 4.15,
+                "total_tests": 97,
+                "passed_tests": 89,
+                "failed_tests": 8,
+                "duration_seconds": 45.2,
+            },
+            {
+                "id": 2,
+                "timestamp": "2026-01-29T07:00:00Z",
+                "git_commit": "def5678",
+                "golden_score": 4.12,
+                "total_tests": 97,
+                "passed_tests": 88,
+                "failed_tests": 9,
+                "duration_seconds": 44.8,
+            },
+        ]
@@ -0,0 +1,229 @@
+"""
+Go Test Runner
+
+Fuehrt Go-Tests aus und parsed die Ergebnisse.
+"""
+
+import subprocess
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional
+from dataclasses import dataclass, field
+
+
+@dataclass
+class GoTestResult:
+    """Ergebnis eines einzelnen Go-Tests"""
+    package: str
+    test_name: str
+    passed: bool
+    duration_seconds: float
+    output: str = ""
+
+
+@dataclass
+class GoTestSummary:
+    """Zusammenfassung eines Go-Test-Runs"""
+    total: int = 0
+    passed: int = 0
+    failed: int = 0
+    skipped: int = 0
+    duration_seconds: float = 0.0
+    coverage_percent: Optional[float] = None
+    results: List[GoTestResult] = field(default_factory=list)
+    raw_output: str = ""
+
+
+class GoTestRunner:
+    """
+    Runner fuer Go-Tests.
+
+    Verwendet `go test -json` fuer strukturierte Ausgabe.
+    """
+
+    def __init__(self, base_path: Path):
+        self.base_path = base_path
+
+    async def run(self, with_coverage: bool = True, timeout: int = 300) -> GoTestSummary:
+        """
+        Fuehrt Go-Tests aus.
+
+        Args:
+            with_coverage: Coverage erfassen
+            timeout: Timeout in Sekunden
+
+        Returns:
+            GoTestSummary mit allen Ergebnissen
+        """
+        if not self.base_path.exists():
+            return GoTestSummary(raw_output="Pfad existiert nicht")
+
+        cmd = ["go", "test", "-v", "-json"]
+        if with_coverage:
+            cmd.extend(["-cover", "-coverprofile=coverage.out"])
+        cmd.append("./...")
+
+        try:
+            result = subprocess.run(
+                cmd,
+                cwd=str(self.base_path),
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+            )
+
+            return self._parse_output(result.stdout, result.stderr)
+
+        except subprocess.TimeoutExpired:
+            return GoTestSummary(raw_output=f"Timeout nach {timeout} Sekunden")
+        except FileNotFoundError:
+            return GoTestSummary(raw_output="Go nicht installiert")
+        except Exception as e:
+            return GoTestSummary(raw_output=str(e))
+
+    def _parse_output(self, stdout: str, stderr: str) -> GoTestSummary:
+        """Parsed die JSON-Ausgabe von go test"""
+        summary = GoTestSummary(raw_output=stdout[:10000] if stdout else stderr[:10000])
+
+        current_test: Dict[str, str] = {}
+        test_outputs: Dict[str, List[str]] = {}
+
+        for line in stdout.split("\n"):
+            if not line.strip():
+                continue
+
+            try:
+                event = json.loads(line)
+                action = event.get("Action")
+                package = event.get("Package", "")
+                test = event.get("Test", "")
+                elapsed = event.get("Elapsed", 0)
+                output = event.get("Output", "")
+
+                # Test-Output sammeln
+                if test and output:
+                    key = f"{package}:{test}"
+                    if key not in test_outputs:
+                        test_outputs[key] = []
+                    test_outputs[key].append(output)
+
+                # Test-Ergebnis
+                if action == "pass" and test:
+                    summary.passed += 1
+                    summary.total += 1
+                    summary.results.append(GoTestResult(
+                        package=package,
+                        test_name=test,
+                        passed=True,
+                        duration_seconds=elapsed,
+                        output="".join(test_outputs.get(f"{package}:{test}", [])),
+                    ))
+
+                elif action == "fail" and test:
+                    summary.failed += 1
+                    summary.total += 1
+                    summary.results.append(GoTestResult(
+                        package=package,
+                        test_name=test,
+                        passed=False,
+                        duration_seconds=elapsed,
+                        output="".join(test_outputs.get(f"{package}:{test}", [])),
+                    ))
+
+                elif action == "skip" and test:
+                    summary.skipped += 1
+                    summary.total += 1
+
+                # Package-Ergebnis (Gesamtdauer)
+                elif action in ["pass", "fail"] and not test and elapsed:
+                    summary.duration_seconds = max(summary.duration_seconds, elapsed)
+
+            except json.JSONDecodeError:
+                # Nicht-JSON-Zeilen ignorieren (z.B. Coverage-Output)
+                if "coverage:" in line.lower():
+                    # z.B. "coverage: 75.2% of statements"
+                    try:
+                        parts = line.split("coverage:")
+                        if len(parts) > 1:
+                            percent_str = parts[1].strip().split("%")[0]
+                            summary.coverage_percent = float(percent_str)
+                    except (ValueError, IndexError):
+                        pass
+
+        return summary
+
+    async def run_single_test(self, test_name: str, timeout: int = 60) -> GoTestResult:
+        """
+        Fuehrt einen einzelnen Test aus.
+
+        Args:
+            test_name: Name des Tests (z.B. "TestMyFunction")
+            timeout: Timeout in Sekunden
+
+        Returns:
+            GoTestResult fuer den spezifischen Test
+        """
+        cmd = ["go", "test", "-v", "-run", test_name, "./..."]
+
+        try:
+            result = subprocess.run(
+                cmd,
+                cwd=str(self.base_path),
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+            )
+
+            passed = "PASS" in result.stdout
+            return GoTestResult(
+                package=str(self.base_path),
+                test_name=test_name,
+                passed=passed,
+                duration_seconds=0.0,
+                output=result.stdout + result.stderr,
+            )
+
+        except Exception as e:
+            return GoTestResult(
+                package=str(self.base_path),
+                test_name=test_name,
+                passed=False,
+                duration_seconds=0.0,
+                output=str(e),
+            )
+
+    async def get_coverage_report(self) -> Optional[Dict]:
+        """
+        Liest den Coverage-Bericht.
+
+        Returns:
+            Dict mit Coverage-Details oder None
+        """
+        coverage_file = self.base_path / "coverage.out"
+        if not coverage_file.exists():
+            return None
+
+        try:
+            result = subprocess.run(
+                ["go", "tool", "cover", "-func=coverage.out"],
+                cwd=str(self.base_path),
+                capture_output=True,
+                text=True,
+            )
+
+            # Parse "total:" Zeile
+            for line in result.stdout.split("\n"):
+                if "total:" in line:
+                    parts = line.split()
+                    if len(parts) >= 3:
+                        percent_str = parts[-1].replace("%", "")
+                        return {
+                            "total_coverage": float(percent_str),
+                            "raw_output": result.stdout,
+                        }
+
+        except Exception:
+            pass
+
+        return None
@@ -0,0 +1,266 @@
+"""
+Python Test Runner (pytest)
+
+Fuehrt Python-Tests aus und parsed die Ergebnisse.
+"""
+
+import subprocess
+import json
+import re
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional
+from dataclasses import dataclass, field
+
+
+@dataclass
+class PytestResult:
+    """Ergebnis eines einzelnen pytest-Tests"""
+    node_id: str
+    test_name: str
+    file_path: str
+    passed: bool
+    duration_seconds: float
+    output: str = ""
+    error_message: Optional[str] = None
+
+
+@dataclass
+class PytestSummary:
+    """Zusammenfassung eines pytest-Runs"""
+    total: int = 0
+    passed: int = 0
+    failed: int = 0
+    skipped: int = 0
+    errors: int = 0
+    duration_seconds: float = 0.0
+    coverage_percent: Optional[float] = None
+    results: List[PytestResult] = field(default_factory=list)
+    raw_output: str = ""
+
+
+class PytestRunner:
+    """
+    Runner fuer Python-Tests mit pytest.
+
+    Verwendet `pytest --json-report` fuer strukturierte Ausgabe.
+    """
+
+    def __init__(self, base_path: Path, venv_path: Optional[Path] = None):
+        self.base_path = base_path
+        self.venv_path = venv_path
+
+    def _get_python_cmd(self) -> str:
+        """Gibt den Python-Befehl zurueck (aus venv wenn vorhanden)"""
+        if self.venv_path and (self.venv_path / "bin" / "python").exists():
+            return str(self.venv_path / "bin" / "python")
+        return "python"
+
+    async def run(self, with_coverage: bool = True, timeout: int = 300) -> PytestSummary:
+        """
+        Fuehrt pytest aus.
+
+        Args:
+            with_coverage: Coverage erfassen mit pytest-cov
+            timeout: Timeout in Sekunden
+
+        Returns:
+            PytestSummary mit allen Ergebnissen
+        """
+        if not self.base_path.exists():
+            return PytestSummary(raw_output="Pfad existiert nicht")
+
+        python_cmd = self._get_python_cmd()
+        cmd = [python_cmd, "-m", "pytest", "-v", "--tb=short"]
+
+        if with_coverage:
+            cmd.extend(["--cov=.", "--cov-report=term-missing"])
+
+        cmd.append(str(self.base_path))
+
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+            )
+
+            return self._parse_output(result.stdout, result.stderr)
+
+        except subprocess.TimeoutExpired:
+            return PytestSummary(raw_output=f"Timeout nach {timeout} Sekunden")
+        except FileNotFoundError:
+            return PytestSummary(raw_output="Python/pytest nicht installiert")
+        except Exception as e:
+            return PytestSummary(raw_output=str(e))
+
+    def _parse_output(self, stdout: str, stderr: str) -> PytestSummary:
+        """Parsed die pytest-Ausgabe"""
+        output = stdout + stderr
+        summary = PytestSummary(raw_output=output[:10000])
+
+        # Teste-Zeilen parsen (z.B. "test_file.py::test_name PASSED")
+        test_pattern = re.compile(r"([\w/]+\.py)::(\w+)(?:\[.+\])?\s+(PASSED|FAILED|SKIPPED|ERROR)")
+
+        for match in test_pattern.finditer(output):
+            file_path, test_name, status = match.groups()
+
+            result = PytestResult(
+                node_id=f"{file_path}::{test_name}",
+                test_name=test_name,
+                file_path=file_path,
+                passed=status == "PASSED",
+                duration_seconds=0.0,
+            )
+            summary.results.append(result)
+
+            if status == "PASSED":
+                summary.passed += 1
+            elif status == "FAILED":
+                summary.failed += 1
+            elif status == "SKIPPED":
+                summary.skipped += 1
+            elif status == "ERROR":
+                summary.errors += 1
+
+        summary.total = len(summary.results)
+
+        # Zusammenfassung parsen (z.B. "5 passed, 2 failed in 3.45s")
+        summary_pattern = re.compile(
+            r"=+\s*(?:(\d+)\s+passed)?[,\s]*(?:(\d+)\s+failed)?[,\s]*(?:(\d+)\s+skipped)?[,\s]*(?:(\d+)\s+error)?.*?in\s+([\d.]+)s"
+        )
+        match = summary_pattern.search(output)
+        if match:
+            if match.group(1):
+                summary.passed = int(match.group(1))
+            if match.group(2):
+                summary.failed = int(match.group(2))
+            if match.group(3):
+                summary.skipped = int(match.group(3))
+            if match.group(4):
+                summary.errors = int(match.group(4))
+            if match.group(5):
+                summary.duration_seconds = float(match.group(5))
+
+            summary.total = summary.passed + summary.failed + summary.skipped + summary.errors
+
+        # Coverage parsen (z.B. "TOTAL    1234    567    54%")
+        coverage_pattern = re.compile(r"TOTAL\s+\d+\s+\d+\s+(\d+)%")
+        coverage_match = coverage_pattern.search(output)
+        if coverage_match:
+            summary.coverage_percent = float(coverage_match.group(1))
+
+        return summary
+
+    async def run_single_test(self, test_path: str, timeout: int = 60) -> PytestResult:
+        """
+        Fuehrt einen einzelnen Test aus.
+
+        Args:
+            test_path: Pfad zum Test (z.B. "test_file.py::test_name")
+            timeout: Timeout in Sekunden
+
+        Returns:
+            PytestResult fuer den spezifischen Test
+        """
+        python_cmd = self._get_python_cmd()
+        cmd = [python_cmd, "-m", "pytest", "-v", test_path]
+
+        try:
+            result = subprocess.run(
+                cmd,
+                cwd=str(self.base_path),
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+            )
+
+            passed = "passed" in result.stdout.lower() and "failed" not in result.stdout.lower()
+
+            return PytestResult(
+                node_id=test_path,
+                test_name=test_path.split("::")[-1] if "::" in test_path else test_path,
+                file_path=test_path.split("::")[0] if "::" in test_path else test_path,
+                passed=passed,
+                duration_seconds=0.0,
+                output=result.stdout + result.stderr,
+            )
+
+        except Exception as e:
+            return PytestResult(
+                node_id=test_path,
+                test_name=test_path,
+                file_path="",
+                passed=False,
+                duration_seconds=0.0,
+                output=str(e),
+            )
+
+    async def get_coverage_report(self, format: str = "term") -> Optional[Dict]:
+        """
+        Generiert einen Coverage-Bericht.
+
+        Args:
+            format: "term", "html", oder "xml"
+
+        Returns:
+            Dict mit Coverage-Details oder None
+        """
+        python_cmd = self._get_python_cmd()
+        cmd = [python_cmd, "-m", "pytest", "--cov=.", f"--cov-report={format}"]
+
+        try:
+            result = subprocess.run(
+                cmd,
+                cwd=str(self.base_path),
+                capture_output=True,
+                text=True,
+                timeout=120,
+            )
+
+            # Parse "TOTAL" Zeile
+            coverage_pattern = re.compile(r"TOTAL\s+\d+\s+\d+\s+(\d+)%")
+            match = coverage_pattern.search(result.stdout)
+
+            if match:
+                return {
+                    "total_coverage": float(match.group(1)),
+                    "format": format,
+                    "raw_output": result.stdout,
+                }
+
+        except Exception:
+            pass
+
+        return None
+
+    async def list_tests(self) -> List[str]:
+        """
+        Listet alle verfuegbaren Tests auf.
+
+        Returns:
+            Liste von Test-IDs
+        """
+        python_cmd = self._get_python_cmd()
+        cmd = [python_cmd, "-m", "pytest", "--collect-only", "-q"]
+
+        try:
+            result = subprocess.run(
+                cmd,
+                cwd=str(self.base_path),
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+
+            tests = []
+            for line in result.stdout.split("\n"):
+                line = line.strip()
+                if "::" in line and not line.startswith("<"):
+                    tests.append(line)
+
+            return tests
+
+        except Exception:
+            return []