feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)
This commit is contained in:
197
voice-service/tests/bqas/conftest.py
Normal file
197
voice-service/tests/bqas/conftest.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
BQAS Test Fixtures
|
||||
"""
|
||||
import os
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
import httpx
|
||||
|
||||
# Add parent to path for imports
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from bqas.judge import LLMJudge
|
||||
from bqas.rag_judge import RAGJudge
|
||||
from bqas.config import BQASConfig
|
||||
from bqas.regression_tracker import RegressionTracker
|
||||
from bqas.synthetic_generator import SyntheticGenerator
|
||||
from bqas.backlog_generator import BacklogGenerator
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def bqas_config():
|
||||
"""BQAS configuration for tests."""
|
||||
return BQASConfig(
|
||||
ollama_base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
|
||||
judge_model=os.getenv("BQAS_JUDGE_MODEL", "qwen2.5:32b"),
|
||||
voice_service_url=os.getenv("VOICE_SERVICE_URL", "http://localhost:8091"),
|
||||
db_path=os.getenv("BQAS_DB_PATH", "bqas_test_history.db"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def llm_judge(bqas_config):
|
||||
"""LLM Judge instance."""
|
||||
return LLMJudge(config=bqas_config)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def rag_judge(bqas_config):
|
||||
"""RAG Judge instance for RAG/Correction tests."""
|
||||
return RAGJudge(config=bqas_config)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def regression_tracker(bqas_config):
|
||||
"""Regression tracker instance."""
|
||||
return RegressionTracker(config=bqas_config)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def synthetic_generator(bqas_config):
|
||||
"""Synthetic test generator instance."""
|
||||
return SyntheticGenerator(config=bqas_config)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def backlog_generator(bqas_config):
|
||||
"""Backlog generator instance."""
|
||||
return BacklogGenerator(config=bqas_config)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def voice_service_client(bqas_config):
|
||||
"""Async HTTP client for voice service."""
|
||||
async with httpx.AsyncClient(
|
||||
base_url=bqas_config.voice_service_url,
|
||||
timeout=30.0,
|
||||
) as client:
|
||||
yield client
|
||||
|
||||
|
||||
def load_golden_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
|
||||
"""Load test cases from a YAML file."""
|
||||
with open(yaml_path, 'r', encoding='utf-8') as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
tests = []
|
||||
# Handle different YAML structures
|
||||
if 'tests' in data:
|
||||
tests.extend(data['tests'])
|
||||
if 'edge_cases' in data:
|
||||
tests.extend(data['edge_cases'])
|
||||
if 'workflow_tests' in data:
|
||||
# Flatten workflow tests - take first step
|
||||
for wf in data['workflow_tests']:
|
||||
if 'steps' in wf and wf['steps']:
|
||||
first_step = wf['steps'][0]
|
||||
tests.append({
|
||||
'id': wf.get('id', 'WF-XXX'),
|
||||
'name': wf.get('name', 'Workflow'),
|
||||
'input': first_step.get('input', ''),
|
||||
'expected_intent': first_step.get('expected_intent', 'unknown'),
|
||||
'min_score': 3.0,
|
||||
})
|
||||
|
||||
return tests
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def golden_tests() -> List[Dict[str, Any]]:
|
||||
"""Load all golden tests from YAML files."""
|
||||
golden_dir = Path(__file__).parent / "golden_tests"
|
||||
all_tests = []
|
||||
|
||||
for yaml_file in golden_dir.glob("*.yaml"):
|
||||
tests = load_golden_tests_from_file(yaml_file)
|
||||
all_tests.extend(tests)
|
||||
|
||||
return all_tests
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def intent_tests() -> List[Dict[str, Any]]:
|
||||
"""Load only intent tests."""
|
||||
yaml_path = Path(__file__).parent / "golden_tests" / "intent_tests.yaml"
|
||||
return load_golden_tests_from_file(yaml_path)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def edge_case_tests() -> List[Dict[str, Any]]:
|
||||
"""Load only edge case tests."""
|
||||
yaml_path = Path(__file__).parent / "golden_tests" / "edge_cases.yaml"
|
||||
return load_golden_tests_from_file(yaml_path)
|
||||
|
||||
|
||||
def load_rag_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
|
||||
"""Load RAG test cases from a YAML file with multiple documents."""
|
||||
with open(yaml_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
tests = []
|
||||
# Handle YAML with multiple documents (separated by ---)
|
||||
documents = list(yaml.safe_load_all(content))
|
||||
|
||||
for doc in documents:
|
||||
if doc and 'tests' in doc:
|
||||
tests.extend(doc['tests'])
|
||||
if doc and 'edge_cases' in doc:
|
||||
tests.extend(doc['edge_cases'])
|
||||
|
||||
return tests
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def rag_tests() -> List[Dict[str, Any]]:
|
||||
"""Load RAG/Correction tests from golden suite."""
|
||||
yaml_path = Path(__file__).parent / "golden_tests" / "golden_rag_correction_v1.yaml"
|
||||
if yaml_path.exists():
|
||||
return load_rag_tests_from_file(yaml_path)
|
||||
return []
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def rag_retrieval_tests(rag_tests) -> List[Dict[str, Any]]:
|
||||
"""Load only EH retrieval tests."""
|
||||
return [t for t in rag_tests if t.get("category") == "eh_retrieval"]
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def rag_operator_tests(rag_tests) -> List[Dict[str, Any]]:
|
||||
"""Load only operator alignment tests."""
|
||||
return [t for t in rag_tests if t.get("category") == "operator_alignment"]
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def rag_privacy_tests(rag_tests) -> List[Dict[str, Any]]:
|
||||
"""Load only privacy compliance tests."""
|
||||
return [t for t in rag_tests if t.get("category") == "privacy_compliance"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_test_result():
|
||||
"""Sample test result for testing."""
|
||||
from datetime import datetime, timezone
|
||||
from bqas.metrics import TestResult
|
||||
|
||||
return TestResult(
|
||||
test_id="TEST-001",
|
||||
test_name="Sample Test",
|
||||
user_input="Notiz zu Max: heute gestoert",
|
||||
expected_intent="student_observation",
|
||||
detected_intent="student_observation",
|
||||
response="Notiz gespeichert",
|
||||
intent_accuracy=100,
|
||||
faithfulness=5,
|
||||
relevance=5,
|
||||
coherence=5,
|
||||
safety="pass",
|
||||
composite_score=4.8,
|
||||
passed=True,
|
||||
reasoning="Perfect match",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
duration_ms=1500,
|
||||
)
|
||||
Reference in New Issue
Block a user