Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
129 lines
4.2 KiB
Python
129 lines
4.2 KiB
Python
"""
|
|
Synthetic Tests
|
|
Tests using synthetically generated test cases
|
|
"""
|
|
import pytest
|
|
from typing import Dict, List
|
|
|
|
from bqas.synthetic_generator import SyntheticGenerator, TEACHER_PATTERNS
|
|
from bqas.judge import LLMJudge
|
|
|
|
|
|
class TestSyntheticGenerator:
|
|
"""Tests for synthetic test generation."""
|
|
|
|
def test_teacher_patterns_exist(self):
|
|
"""Verify teacher patterns are defined."""
|
|
assert len(TEACHER_PATTERNS) > 0
|
|
assert "student_observation" in TEACHER_PATTERNS
|
|
assert "worksheet_generate" in TEACHER_PATTERNS
|
|
assert "parent_letter" in TEACHER_PATTERNS
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_generation(self, synthetic_generator: SyntheticGenerator):
|
|
"""Test fallback pattern-based generation."""
|
|
variations = synthetic_generator._generate_fallback(
|
|
intent="student_observation",
|
|
count=5,
|
|
)
|
|
|
|
assert len(variations) == 5
|
|
for v in variations:
|
|
assert v.expected_intent == "student_observation"
|
|
assert len(v.input) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_variations(self, synthetic_generator: SyntheticGenerator):
|
|
"""Test LLM-based variation generation."""
|
|
# This test may be skipped if Ollama is not available
|
|
try:
|
|
variations = await synthetic_generator.generate_variations(
|
|
intent="student_observation",
|
|
count=3,
|
|
)
|
|
|
|
assert len(variations) >= 1 # At least fallback should work
|
|
for v in variations:
|
|
assert v.expected_intent == "student_observation"
|
|
|
|
except Exception as e:
|
|
pytest.skip(f"Ollama not available: {e}")
|
|
|
|
|
|
class TestSyntheticEvaluation:
|
|
"""Evaluate synthetic tests with LLM Judge."""
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("intent", [
|
|
"student_observation",
|
|
"worksheet_generate",
|
|
"reminder",
|
|
])
|
|
async def test_synthetic_intent_quality(
|
|
self,
|
|
llm_judge: LLMJudge,
|
|
synthetic_generator: SyntheticGenerator,
|
|
intent: str,
|
|
):
|
|
"""Test quality of synthetic test cases."""
|
|
is_available = await llm_judge.health_check()
|
|
if not is_available:
|
|
pytest.skip("LLM judge not available")
|
|
|
|
# Generate fallback variations (fast, doesn't need LLM)
|
|
variations = synthetic_generator._generate_fallback(intent, count=3)
|
|
|
|
scores = []
|
|
for var in variations:
|
|
result = await llm_judge.evaluate(
|
|
user_input=var.input,
|
|
detected_intent=intent,
|
|
response="Verstanden.",
|
|
expected_intent=intent,
|
|
)
|
|
scores.append(result.composite_score)
|
|
|
|
avg_score = sum(scores) / len(scores)
|
|
assert avg_score >= 3.0, f"Average score {avg_score} too low for {intent}"
|
|
|
|
|
|
class TestSyntheticCoverage:
|
|
"""Test coverage of synthetic generation."""
|
|
|
|
def test_all_intents_have_patterns(self):
|
|
"""Verify all main intents have patterns."""
|
|
required_intents = [
|
|
"student_observation",
|
|
"reminder",
|
|
"homework_check",
|
|
"worksheet_generate",
|
|
"parent_letter",
|
|
"class_message",
|
|
"quiz_generate",
|
|
"quick_activity",
|
|
"canvas_edit",
|
|
"canvas_layout",
|
|
"operator_checklist",
|
|
"eh_passage",
|
|
"feedback_suggest",
|
|
"reminder_schedule",
|
|
"task_summary",
|
|
]
|
|
|
|
for intent in required_intents:
|
|
assert intent in TEACHER_PATTERNS, f"Missing patterns for: {intent}"
|
|
assert len(TEACHER_PATTERNS[intent]) >= 2, f"Too few patterns for: {intent}"
|
|
|
|
def test_pattern_placeholders(self):
|
|
"""Verify patterns have valid placeholders."""
|
|
import re
|
|
|
|
for intent, patterns in TEACHER_PATTERNS.items():
|
|
for pattern in patterns:
|
|
# Find all placeholders
|
|
placeholders = re.findall(r'\{(\w+)\}', pattern)
|
|
|
|
# Verify no empty placeholders
|
|
for ph in placeholders:
|
|
assert len(ph) > 0, f"Empty placeholder in {intent}: {pattern}"
|