Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s
- Voice-Service von Core nach Lehrer verschoben (bp-lehrer-voice-service) - 4 Jitsi-Services + 2 Synapse-Services in docker-compose.yml aufgenommen - Camunda komplett gelöscht: workflow pages, workflow-config.ts, bpmn-js deps - CAMUNDA_URL aus backend-lehrer environment entfernt - Sidebar: Kategorie "Compliance SDK" + "Katalogverwaltung" entfernt - Sidebar: Neue Kategorie "Kommunikation" mit Video & Chat, Voice Service, Alerts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
198 lines
5.9 KiB
Python
198 lines
5.9 KiB
Python
"""
|
|
BQAS Test Fixtures
|
|
"""
|
|
import os
|
|
import pytest
|
|
import pytest_asyncio
|
|
import yaml
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any
|
|
import httpx
|
|
|
|
# Add parent to path for imports
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
from bqas.judge import LLMJudge
|
|
from bqas.rag_judge import RAGJudge
|
|
from bqas.config import BQASConfig
|
|
from bqas.regression_tracker import RegressionTracker
|
|
from bqas.synthetic_generator import SyntheticGenerator
|
|
from bqas.backlog_generator import BacklogGenerator
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def bqas_config():
|
|
"""BQAS configuration for tests."""
|
|
return BQASConfig(
|
|
ollama_base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
|
|
judge_model=os.getenv("BQAS_JUDGE_MODEL", "qwen2.5:32b"),
|
|
voice_service_url=os.getenv("VOICE_SERVICE_URL", "http://localhost:8091"),
|
|
db_path=os.getenv("BQAS_DB_PATH", "bqas_test_history.db"),
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_judge(bqas_config):
|
|
"""LLM Judge instance."""
|
|
return LLMJudge(config=bqas_config)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def rag_judge(bqas_config):
|
|
"""RAG Judge instance for RAG/Correction tests."""
|
|
return RAGJudge(config=bqas_config)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def regression_tracker(bqas_config):
|
|
"""Regression tracker instance."""
|
|
return RegressionTracker(config=bqas_config)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def synthetic_generator(bqas_config):
|
|
"""Synthetic test generator instance."""
|
|
return SyntheticGenerator(config=bqas_config)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def backlog_generator(bqas_config):
|
|
"""Backlog generator instance."""
|
|
return BacklogGenerator(config=bqas_config)
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def voice_service_client(bqas_config):
|
|
"""Async HTTP client for voice service."""
|
|
async with httpx.AsyncClient(
|
|
base_url=bqas_config.voice_service_url,
|
|
timeout=30.0,
|
|
) as client:
|
|
yield client
|
|
|
|
|
|
def load_golden_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
|
|
"""Load test cases from a YAML file."""
|
|
with open(yaml_path, 'r', encoding='utf-8') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
tests = []
|
|
# Handle different YAML structures
|
|
if 'tests' in data:
|
|
tests.extend(data['tests'])
|
|
if 'edge_cases' in data:
|
|
tests.extend(data['edge_cases'])
|
|
if 'workflow_tests' in data:
|
|
# Flatten workflow tests - take first step
|
|
for wf in data['workflow_tests']:
|
|
if 'steps' in wf and wf['steps']:
|
|
first_step = wf['steps'][0]
|
|
tests.append({
|
|
'id': wf.get('id', 'WF-XXX'),
|
|
'name': wf.get('name', 'Workflow'),
|
|
'input': first_step.get('input', ''),
|
|
'expected_intent': first_step.get('expected_intent', 'unknown'),
|
|
'min_score': 3.0,
|
|
})
|
|
|
|
return tests
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def golden_tests() -> List[Dict[str, Any]]:
|
|
"""Load all golden tests from YAML files."""
|
|
golden_dir = Path(__file__).parent / "golden_tests"
|
|
all_tests = []
|
|
|
|
for yaml_file in golden_dir.glob("*.yaml"):
|
|
tests = load_golden_tests_from_file(yaml_file)
|
|
all_tests.extend(tests)
|
|
|
|
return all_tests
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def intent_tests() -> List[Dict[str, Any]]:
|
|
"""Load only intent tests."""
|
|
yaml_path = Path(__file__).parent / "golden_tests" / "intent_tests.yaml"
|
|
return load_golden_tests_from_file(yaml_path)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def edge_case_tests() -> List[Dict[str, Any]]:
|
|
"""Load only edge case tests."""
|
|
yaml_path = Path(__file__).parent / "golden_tests" / "edge_cases.yaml"
|
|
return load_golden_tests_from_file(yaml_path)
|
|
|
|
|
|
def load_rag_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
|
|
"""Load RAG test cases from a YAML file with multiple documents."""
|
|
with open(yaml_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
tests = []
|
|
# Handle YAML with multiple documents (separated by ---)
|
|
documents = list(yaml.safe_load_all(content))
|
|
|
|
for doc in documents:
|
|
if doc and 'tests' in doc:
|
|
tests.extend(doc['tests'])
|
|
if doc and 'edge_cases' in doc:
|
|
tests.extend(doc['edge_cases'])
|
|
|
|
return tests
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def rag_tests() -> List[Dict[str, Any]]:
|
|
"""Load RAG/Correction tests from golden suite."""
|
|
yaml_path = Path(__file__).parent / "golden_tests" / "golden_rag_correction_v1.yaml"
|
|
if yaml_path.exists():
|
|
return load_rag_tests_from_file(yaml_path)
|
|
return []
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def rag_retrieval_tests(rag_tests) -> List[Dict[str, Any]]:
|
|
"""Load only EH retrieval tests."""
|
|
return [t for t in rag_tests if t.get("category") == "eh_retrieval"]
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def rag_operator_tests(rag_tests) -> List[Dict[str, Any]]:
|
|
"""Load only operator alignment tests."""
|
|
return [t for t in rag_tests if t.get("category") == "operator_alignment"]
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def rag_privacy_tests(rag_tests) -> List[Dict[str, Any]]:
|
|
"""Load only privacy compliance tests."""
|
|
return [t for t in rag_tests if t.get("category") == "privacy_compliance"]
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_test_result():
|
|
"""Sample test result for testing."""
|
|
from datetime import datetime, timezone
|
|
from bqas.metrics import TestResult
|
|
|
|
return TestResult(
|
|
test_id="TEST-001",
|
|
test_name="Sample Test",
|
|
user_input="Notiz zu Max: heute gestoert",
|
|
expected_intent="student_observation",
|
|
detected_intent="student_observation",
|
|
response="Notiz gespeichert",
|
|
intent_accuracy=100,
|
|
faithfulness=5,
|
|
relevance=5,
|
|
coherence=5,
|
|
safety="pass",
|
|
composite_score=4.8,
|
|
passed=True,
|
|
reasoning="Perfect match",
|
|
timestamp=datetime.now(timezone.utc),
|
|
duration_ms=1500,
|
|
)
|