Files
breakpilot-lehrer/voice-service/tests/bqas/conftest.py
Benjamin Admin 9912997187
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s
refactor: Jitsi/Matrix/Voice von Core übernommen, Camunda/BPMN gelöscht, Kommunikation-Nav
- Voice-Service von Core nach Lehrer verschoben (bp-lehrer-voice-service)
- 4 Jitsi-Services + 2 Synapse-Services in docker-compose.yml aufgenommen
- Camunda komplett gelöscht: workflow pages, workflow-config.ts, bpmn-js deps
- CAMUNDA_URL aus backend-lehrer environment entfernt
- Sidebar: Kategorie "Compliance SDK" + "Katalogverwaltung" entfernt
- Sidebar: Neue Kategorie "Kommunikation" mit Video & Chat, Voice Service, Alerts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 17:01:47 +01:00

198 lines
5.9 KiB
Python

"""
BQAS Test Fixtures
"""
import os
import pytest
import pytest_asyncio
import yaml
from pathlib import Path
from typing import List, Dict, Any
import httpx
# Add parent to path for imports
import sys
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from bqas.judge import LLMJudge
from bqas.rag_judge import RAGJudge
from bqas.config import BQASConfig
from bqas.regression_tracker import RegressionTracker
from bqas.synthetic_generator import SyntheticGenerator
from bqas.backlog_generator import BacklogGenerator
@pytest.fixture(scope="session")
def bqas_config():
"""BQAS configuration for tests."""
return BQASConfig(
ollama_base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
judge_model=os.getenv("BQAS_JUDGE_MODEL", "qwen2.5:32b"),
voice_service_url=os.getenv("VOICE_SERVICE_URL", "http://localhost:8091"),
db_path=os.getenv("BQAS_DB_PATH", "bqas_test_history.db"),
)
@pytest.fixture(scope="session")
def llm_judge(bqas_config):
"""LLM Judge instance."""
return LLMJudge(config=bqas_config)
@pytest.fixture(scope="session")
def rag_judge(bqas_config):
"""RAG Judge instance for RAG/Correction tests."""
return RAGJudge(config=bqas_config)
@pytest.fixture(scope="session")
def regression_tracker(bqas_config):
"""Regression tracker instance."""
return RegressionTracker(config=bqas_config)
@pytest.fixture(scope="session")
def synthetic_generator(bqas_config):
"""Synthetic test generator instance."""
return SyntheticGenerator(config=bqas_config)
@pytest.fixture(scope="session")
def backlog_generator(bqas_config):
"""Backlog generator instance."""
return BacklogGenerator(config=bqas_config)
@pytest_asyncio.fixture
async def voice_service_client(bqas_config):
"""Async HTTP client for voice service."""
async with httpx.AsyncClient(
base_url=bqas_config.voice_service_url,
timeout=30.0,
) as client:
yield client
def load_golden_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
"""Load test cases from a YAML file."""
with open(yaml_path, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
tests = []
# Handle different YAML structures
if 'tests' in data:
tests.extend(data['tests'])
if 'edge_cases' in data:
tests.extend(data['edge_cases'])
if 'workflow_tests' in data:
# Flatten workflow tests - take first step
for wf in data['workflow_tests']:
if 'steps' in wf and wf['steps']:
first_step = wf['steps'][0]
tests.append({
'id': wf.get('id', 'WF-XXX'),
'name': wf.get('name', 'Workflow'),
'input': first_step.get('input', ''),
'expected_intent': first_step.get('expected_intent', 'unknown'),
'min_score': 3.0,
})
return tests
@pytest.fixture(scope="session")
def golden_tests() -> List[Dict[str, Any]]:
"""Load all golden tests from YAML files."""
golden_dir = Path(__file__).parent / "golden_tests"
all_tests = []
for yaml_file in golden_dir.glob("*.yaml"):
tests = load_golden_tests_from_file(yaml_file)
all_tests.extend(tests)
return all_tests
@pytest.fixture(scope="session")
def intent_tests() -> List[Dict[str, Any]]:
"""Load only intent tests."""
yaml_path = Path(__file__).parent / "golden_tests" / "intent_tests.yaml"
return load_golden_tests_from_file(yaml_path)
@pytest.fixture(scope="session")
def edge_case_tests() -> List[Dict[str, Any]]:
"""Load only edge case tests."""
yaml_path = Path(__file__).parent / "golden_tests" / "edge_cases.yaml"
return load_golden_tests_from_file(yaml_path)
def load_rag_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
"""Load RAG test cases from a YAML file with multiple documents."""
with open(yaml_path, 'r', encoding='utf-8') as f:
content = f.read()
tests = []
# Handle YAML with multiple documents (separated by ---)
documents = list(yaml.safe_load_all(content))
for doc in documents:
if doc and 'tests' in doc:
tests.extend(doc['tests'])
if doc and 'edge_cases' in doc:
tests.extend(doc['edge_cases'])
return tests
@pytest.fixture(scope="session")
def rag_tests() -> List[Dict[str, Any]]:
"""Load RAG/Correction tests from golden suite."""
yaml_path = Path(__file__).parent / "golden_tests" / "golden_rag_correction_v1.yaml"
if yaml_path.exists():
return load_rag_tests_from_file(yaml_path)
return []
@pytest.fixture(scope="session")
def rag_retrieval_tests(rag_tests) -> List[Dict[str, Any]]:
"""Load only EH retrieval tests."""
return [t for t in rag_tests if t.get("category") == "eh_retrieval"]
@pytest.fixture(scope="session")
def rag_operator_tests(rag_tests) -> List[Dict[str, Any]]:
"""Load only operator alignment tests."""
return [t for t in rag_tests if t.get("category") == "operator_alignment"]
@pytest.fixture(scope="session")
def rag_privacy_tests(rag_tests) -> List[Dict[str, Any]]:
"""Load only privacy compliance tests."""
return [t for t in rag_tests if t.get("category") == "privacy_compliance"]
@pytest.fixture
def sample_test_result():
"""Sample test result for testing."""
from datetime import datetime, timezone
from bqas.metrics import TestResult
return TestResult(
test_id="TEST-001",
test_name="Sample Test",
user_input="Notiz zu Max: heute gestoert",
expected_intent="student_observation",
detected_intent="student_observation",
response="Notiz gespeichert",
intent_accuracy=100,
faithfulness=5,
relevance=5,
coherence=5,
safety="pass",
composite_score=4.8,
passed=True,
reasoning="Perfect match",
timestamp=datetime.now(timezone.utc),
duration_ms=1500,
)