This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/voice-service/tests/bqas/test_synthetic.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

129 lines
4.2 KiB
Python

"""
Synthetic Tests
Tests using synthetically generated test cases
"""
import pytest
from typing import Dict, List
from bqas.synthetic_generator import SyntheticGenerator, TEACHER_PATTERNS
from bqas.judge import LLMJudge
class TestSyntheticGenerator:
"""Tests for synthetic test generation."""
def test_teacher_patterns_exist(self):
"""Verify teacher patterns are defined."""
assert len(TEACHER_PATTERNS) > 0
assert "student_observation" in TEACHER_PATTERNS
assert "worksheet_generate" in TEACHER_PATTERNS
assert "parent_letter" in TEACHER_PATTERNS
@pytest.mark.asyncio
async def test_fallback_generation(self, synthetic_generator: SyntheticGenerator):
"""Test fallback pattern-based generation."""
variations = synthetic_generator._generate_fallback(
intent="student_observation",
count=5,
)
assert len(variations) == 5
for v in variations:
assert v.expected_intent == "student_observation"
assert len(v.input) > 0
@pytest.mark.asyncio
async def test_generate_variations(self, synthetic_generator: SyntheticGenerator):
"""Test LLM-based variation generation."""
# This test may be skipped if Ollama is not available
try:
variations = await synthetic_generator.generate_variations(
intent="student_observation",
count=3,
)
assert len(variations) >= 1 # At least fallback should work
for v in variations:
assert v.expected_intent == "student_observation"
except Exception as e:
pytest.skip(f"Ollama not available: {e}")
class TestSyntheticEvaluation:
"""Evaluate synthetic tests with LLM Judge."""
@pytest.mark.asyncio
@pytest.mark.parametrize("intent", [
"student_observation",
"worksheet_generate",
"reminder",
])
async def test_synthetic_intent_quality(
self,
llm_judge: LLMJudge,
synthetic_generator: SyntheticGenerator,
intent: str,
):
"""Test quality of synthetic test cases."""
is_available = await llm_judge.health_check()
if not is_available:
pytest.skip("LLM judge not available")
# Generate fallback variations (fast, doesn't need LLM)
variations = synthetic_generator._generate_fallback(intent, count=3)
scores = []
for var in variations:
result = await llm_judge.evaluate(
user_input=var.input,
detected_intent=intent,
response="Verstanden.",
expected_intent=intent,
)
scores.append(result.composite_score)
avg_score = sum(scores) / len(scores)
assert avg_score >= 3.0, f"Average score {avg_score} too low for {intent}"
class TestSyntheticCoverage:
"""Test coverage of synthetic generation."""
def test_all_intents_have_patterns(self):
"""Verify all main intents have patterns."""
required_intents = [
"student_observation",
"reminder",
"homework_check",
"worksheet_generate",
"parent_letter",
"class_message",
"quiz_generate",
"quick_activity",
"canvas_edit",
"canvas_layout",
"operator_checklist",
"eh_passage",
"feedback_suggest",
"reminder_schedule",
"task_summary",
]
for intent in required_intents:
assert intent in TEACHER_PATTERNS, f"Missing patterns for: {intent}"
assert len(TEACHER_PATTERNS[intent]) >= 2, f"Too few patterns for: {intent}"
def test_pattern_placeholders(self):
"""Verify patterns have valid placeholders."""
import re
for intent, patterns in TEACHER_PATTERNS.items():
for pattern in patterns:
# Find all placeholders
placeholders = re.findall(r'\{(\w+)\}', pattern)
# Verify no empty placeholders
for ph in placeholders:
assert len(ph) > 0, f"Empty placeholder in {intent}: {pattern}"