breakpilot-lehrer/voice-service/tests/bqas/conftest.py

"""
BQAS Test Fixtures
"""
import os
import pytest
import pytest_asyncio
import yaml
from pathlib import Path
from typing import List, Dict, Any
import httpx

# Add parent to path for imports
import sys
sys.path.insert(0, str(Path(__file__).parent.parent.parent))

from bqas.judge import LLMJudge
from bqas.rag_judge import RAGJudge
from bqas.config import BQASConfig
from bqas.regression_tracker import RegressionTracker
from bqas.synthetic_generator import SyntheticGenerator
from bqas.backlog_generator import BacklogGenerator


@pytest.fixture(scope="session")
def bqas_config():
    """BQAS configuration for tests."""
    return BQASConfig(
        ollama_base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
        judge_model=os.getenv("BQAS_JUDGE_MODEL", "qwen2.5:32b"),
        voice_service_url=os.getenv("VOICE_SERVICE_URL", "http://localhost:8091"),
        db_path=os.getenv("BQAS_DB_PATH", "bqas_test_history.db"),
    )


@pytest.fixture(scope="session")
def llm_judge(bqas_config):
    """LLM Judge instance."""
    return LLMJudge(config=bqas_config)


@pytest.fixture(scope="session")
def rag_judge(bqas_config):
    """RAG Judge instance for RAG/Correction tests."""
    return RAGJudge(config=bqas_config)


@pytest.fixture(scope="session")
def regression_tracker(bqas_config):
    """Regression tracker instance."""
    return RegressionTracker(config=bqas_config)


@pytest.fixture(scope="session")
def synthetic_generator(bqas_config):
    """Synthetic test generator instance."""
    return SyntheticGenerator(config=bqas_config)


@pytest.fixture(scope="session")
def backlog_generator(bqas_config):
    """Backlog generator instance."""
    return BacklogGenerator(config=bqas_config)


@pytest_asyncio.fixture
async def voice_service_client(bqas_config):
    """Async HTTP client for voice service."""
    async with httpx.AsyncClient(
        base_url=bqas_config.voice_service_url,
        timeout=30.0,
    ) as client:
        yield client


def load_golden_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
    """Load test cases from a YAML file."""
    with open(yaml_path, 'r', encoding='utf-8') as f:
        data = yaml.safe_load(f)

    tests = []
    # Handle different YAML structures
    if 'tests' in data:
        tests.extend(data['tests'])
    if 'edge_cases' in data:
        tests.extend(data['edge_cases'])
    if 'workflow_tests' in data:
        # Flatten workflow tests - take first step
        for wf in data['workflow_tests']:
            if 'steps' in wf and wf['steps']:
                first_step = wf['steps'][0]
                tests.append({
                    'id': wf.get('id', 'WF-XXX'),
                    'name': wf.get('name', 'Workflow'),
                    'input': first_step.get('input', ''),
                    'expected_intent': first_step.get('expected_intent', 'unknown'),
                    'min_score': 3.0,
                })

    return tests


@pytest.fixture(scope="session")
def golden_tests() -> List[Dict[str, Any]]:
    """Load all golden tests from YAML files."""
    golden_dir = Path(__file__).parent / "golden_tests"
    all_tests = []

    for yaml_file in golden_dir.glob("*.yaml"):
        tests = load_golden_tests_from_file(yaml_file)
        all_tests.extend(tests)

    return all_tests


@pytest.fixture(scope="session")
def intent_tests() -> List[Dict[str, Any]]:
    """Load only intent tests."""
    yaml_path = Path(__file__).parent / "golden_tests" / "intent_tests.yaml"
    return load_golden_tests_from_file(yaml_path)


@pytest.fixture(scope="session")
def edge_case_tests() -> List[Dict[str, Any]]:
    """Load only edge case tests."""
    yaml_path = Path(__file__).parent / "golden_tests" / "edge_cases.yaml"
    return load_golden_tests_from_file(yaml_path)


def load_rag_tests_from_file(yaml_path: Path) -> List[Dict[str, Any]]:
    """Load RAG test cases from a YAML file with multiple documents."""
    with open(yaml_path, 'r', encoding='utf-8') as f:
        content = f.read()

    tests = []
    # Handle YAML with multiple documents (separated by ---)
    documents = list(yaml.safe_load_all(content))

    for doc in documents:
        if doc and 'tests' in doc:
            tests.extend(doc['tests'])
        if doc and 'edge_cases' in doc:
            tests.extend(doc['edge_cases'])

    return tests


@pytest.fixture(scope="session")
def rag_tests() -> List[Dict[str, Any]]:
    """Load RAG/Correction tests from golden suite."""
    yaml_path = Path(__file__).parent / "golden_tests" / "golden_rag_correction_v1.yaml"
    if yaml_path.exists():
        return load_rag_tests_from_file(yaml_path)
    return []


@pytest.fixture(scope="session")
def rag_retrieval_tests(rag_tests) -> List[Dict[str, Any]]:
    """Load only EH retrieval tests."""
    return [t for t in rag_tests if t.get("category") == "eh_retrieval"]


@pytest.fixture(scope="session")
def rag_operator_tests(rag_tests) -> List[Dict[str, Any]]:
    """Load only operator alignment tests."""
    return [t for t in rag_tests if t.get("category") == "operator_alignment"]


@pytest.fixture(scope="session")
def rag_privacy_tests(rag_tests) -> List[Dict[str, Any]]:
    """Load only privacy compliance tests."""
    return [t for t in rag_tests if t.get("category") == "privacy_compliance"]


@pytest.fixture
def sample_test_result():
    """Sample test result for testing."""
    from datetime import datetime, timezone
    from bqas.metrics import TestResult

    return TestResult(
        test_id="TEST-001",
        test_name="Sample Test",
        user_input="Notiz zu Max: heute gestoert",
        expected_intent="student_observation",
        detected_intent="student_observation",
        response="Notiz gespeichert",
        intent_accuracy=100,
        faithfulness=5,
        relevance=5,
        coherence=5,
        safety="pass",
        composite_score=4.8,
        passed=True,
        reasoning="Perfect match",
        timestamp=datetime.now(timezone.utc),
        duration_ms=1500,
    )