feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)

2026-02-15 13:26:06 +01:00
parent a7e4500ea6
commit 1089c73b46
59 changed files with 12921 additions and 20 deletions
@@ -0,0 +1,197 @@
+"""
+BQAS Test Fixtures
+"""
+import os
+import pytest
+import pytest_asyncio
+import yaml
+from pathlib import Path
+from typing import List, Dict, Any
+import httpx
+
+# Add parent to path for imports
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from bqas.judge import LLMJudge
+from bqas.rag_judge import RAGJudge
+from bqas.config import BQASConfig
+from bqas.regression_tracker import RegressionTracker
+from bqas.synthetic_generator import SyntheticGenerator
+from bqas.backlog_generator import BacklogGenerator
+
+
+@pytest.fixture(scope="session")
+def bqas_config():
+    """BQAS configuration for tests."""
+    return BQASConfig(
+        ollama_base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
+        judge_model=os.getenv("BQAS_JUDGE_MODEL", "qwen2.5:32b"),
+        voice_service_url=os.getenv("VOICE_SERVICE_URL", "http://localhost:8091"),
+        db_path=os.getenv("BQAS_DB_PATH", "bqas_test_history.db"),
+    )
+
+
+@pytest.fixture(scope="session")
+def llm_judge(bqas_config):
+    """LLM Judge instance."""
+    return LLMJudge(config=bqas_config)
+
+
+@pytest.fixture(scope="session")
+def rag_judge(bqas_config):
+    """RAG Judge instance for RAG/Correction tests."""
+    return RAGJudge(config=bqas_config)
+
+
+@pytest.fixture(scope="session")
+def regression_tracker(bqas_config):
+    """Regression tracker instance."""
+    return RegressionTracker(config=bqas_config)
+
+
+@pytest.fixture(scope="session")
+def synthetic_generator(bqas_config):
+    """Synthetic test generator instance."""
+    return SyntheticGenerator(config=bqas_config)
+
+
+@pytest.fixture(scope="session")
+def backlog_generator(bqas_config):
+    """Backlog generator instance."""
+    return BacklogGenerator(config=bqas_config)
+
+
+@pytest_asyncio.fixture
+async def voice_service_client(bqas_config):
+    """Async HTTP client for voice service."""
+    async with httpx.AsyncClient(
+        base_url=bqas_config.voice_service_url,
+        timeout=30.0,
+    ) as client:
+        yield client
+
+
+def load_golden_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
+    """Load test cases from a YAML file."""
+    with open(yaml_path, 'r', encoding='utf-8') as f:
+        data = yaml.safe_load(f)
+
+    tests = []
+    # Handle different YAML structures
+    if 'tests' in data:
+        tests.extend(data['tests'])
+    if 'edge_cases' in data:
+        tests.extend(data['edge_cases'])
+    if 'workflow_tests' in data:
+        # Flatten workflow tests - take first step
+        for wf in data['workflow_tests']:
+            if 'steps' in wf and wf['steps']:
+                first_step = wf['steps'][0]
+                tests.append({
+                    'id': wf.get('id', 'WF-XXX'),
+                    'name': wf.get('name', 'Workflow'),
+                    'input': first_step.get('input', ''),
+                    'expected_intent': first_step.get('expected_intent', 'unknown'),
+                    'min_score': 3.0,
+                })
+
+    return tests
+
+
+@pytest.fixture(scope="session")
+def golden_tests() -> List[Dict[str, Any]]:
+    """Load all golden tests from YAML files."""
+    golden_dir = Path(__file__).parent / "golden_tests"
+    all_tests = []
+
+    for yaml_file in golden_dir.glob("*.yaml"):
+        tests = load_golden_tests_from_file(yaml_file)
+        all_tests.extend(tests)
+
+    return all_tests
+
+
+@pytest.fixture(scope="session")
+def intent_tests() -> List[Dict[str, Any]]:
+    """Load only intent tests."""
+    yaml_path = Path(__file__).parent / "golden_tests" / "intent_tests.yaml"
+    return load_golden_tests_from_file(yaml_path)
+
+
+@pytest.fixture(scope="session")
+def edge_case_tests() -> List[Dict[str, Any]]:
+    """Load only edge case tests."""
+    yaml_path = Path(__file__).parent / "golden_tests" / "edge_cases.yaml"
+    return load_golden_tests_from_file(yaml_path)
+
+
+def load_rag_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
+    """Load RAG test cases from a YAML file with multiple documents."""
+    with open(yaml_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    tests = []
+    # Handle YAML with multiple documents (separated by ---)
+    documents = list(yaml.safe_load_all(content))
+
+    for doc in documents:
+        if doc and 'tests' in doc:
+            tests.extend(doc['tests'])
+        if doc and 'edge_cases' in doc:
+            tests.extend(doc['edge_cases'])
+
+    return tests
+
+
+@pytest.fixture(scope="session")
+def rag_tests() -> List[Dict[str, Any]]:
+    """Load RAG/Correction tests from golden suite."""
+    yaml_path = Path(__file__).parent / "golden_tests" / "golden_rag_correction_v1.yaml"
+    if yaml_path.exists():
+        return load_rag_tests_from_file(yaml_path)
+    return []
+
+
+@pytest.fixture(scope="session")
+def rag_retrieval_tests(rag_tests) -> List[Dict[str, Any]]:
+    """Load only EH retrieval tests."""
+    return [t for t in rag_tests if t.get("category") == "eh_retrieval"]
+
+
+@pytest.fixture(scope="session")
+def rag_operator_tests(rag_tests) -> List[Dict[str, Any]]:
+    """Load only operator alignment tests."""
+    return [t for t in rag_tests if t.get("category") == "operator_alignment"]
+
+
+@pytest.fixture(scope="session")
+def rag_privacy_tests(rag_tests) -> List[Dict[str, Any]]:
+    """Load only privacy compliance tests."""
+    return [t for t in rag_tests if t.get("category") == "privacy_compliance"]
+
+
+@pytest.fixture
+def sample_test_result():
+    """Sample test result for testing."""
+    from datetime import datetime, timezone
+    from bqas.metrics import TestResult
+
+    return TestResult(
+        test_id="TEST-001",
+        test_name="Sample Test",
+        user_input="Notiz zu Max: heute gestoert",
+        expected_intent="student_observation",
+        detected_intent="student_observation",
+        response="Notiz gespeichert",
+        intent_accuracy=100,
+        faithfulness=5,
+        relevance=5,
+        coherence=5,
+        safety="pass",
+        composite_score=4.8,
+        passed=True,
+        reasoning="Perfect match",
+        timestamp=datetime.now(timezone.utc),
+        duration_ms=1500,
+    )