[split-required] Split remaining 500-680 LOC files (final batch)

website (17 pages + 3 components):
- multiplayer/wizard, middleware/wizard+test-wizard, communication
- builds/wizard, staff-search, voice, sbom/wizard
- foerderantrag, mail/tasks, tools/communication, sbom
- compliance/evidence, uni-crawler, brandbook (already done)
- CollectionsTab, IngestionTab, RiskHeatmap

backend-lehrer (5 files):
- letters_api (641 → 2), certificates_api (636 → 2)
- alerts_agent/db/models (636 → 3)
- llm_gateway/communication_service (614 → 2)
- game/database already done in prior batch

klausur-service (2 files):
- hybrid_vocab_extractor (664 → 2)
- klausur-service/frontend: api.ts (620 → 3), EHUploadWizard (591 → 2)

voice-service (3 files):
- bqas/rag_judge (618 → 3), runner (529 → 2)
- enhanced_task_orchestrator (519 → 2)

studio-v2 (6 files):
- korrektur/[klausurId] (578 → 4), fairness (569 → 2)
- AlertsWizard (552 → 2), OnboardingWizard (513 → 2)
- korrektur/api.ts (506 → 3), geo-lernwelt (501 → 2)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:56:45 +02:00
parent b4613e26f3
commit 451365a312
115 changed files with 10694 additions and 13839 deletions

View File

@@ -0,0 +1,162 @@
"""
BQAS Golden Suite Runner - Loads and executes golden test cases
"""
import yaml
import structlog
from pathlib import Path
from typing import List, Dict, Any, Optional
from datetime import datetime
from bqas.metrics import TestResult
logger = structlog.get_logger(__name__)
async def load_golden_tests() -> List[Dict[str, Any]]:
"""Load all golden test cases from YAML files."""
tests = []
golden_dir = Path(__file__).parent.parent / "tests" / "bqas" / "golden_tests"
yaml_files = [
"intent_tests.yaml",
"edge_cases.yaml",
"workflow_tests.yaml",
]
for filename in yaml_files:
filepath = golden_dir / filename
if filepath.exists():
try:
with open(filepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
if data and 'tests' in data:
for test in data['tests']:
test['source_file'] = filename
tests.extend(data['tests'])
except Exception as e:
logger.warning(f"Failed to load {filename}", error=str(e))
return tests
async def load_rag_tests() -> List[Dict[str, Any]]:
"""Load RAG test cases from YAML."""
tests = []
rag_file = Path(__file__).parent.parent / "tests" / "bqas" / "golden_tests" / "golden_rag_correction_v1.yaml"
if rag_file.exists():
try:
with open(rag_file, 'r', encoding='utf-8') as f:
documents = list(yaml.safe_load_all(f))
for doc in documents:
if doc and 'tests' in doc:
tests.extend(doc['tests'])
if doc and 'edge_cases' in doc:
tests.extend(doc['edge_cases'])
except Exception as e:
logger.warning(f"Failed to load RAG tests", error=str(e))
return tests
def simulate_response(user_input: str, expected_intent: str) -> tuple:
"""Simulate voice service response for testing without live service."""
import random
if random.random() < 0.90:
detected_intent = expected_intent
else:
intents = ["student_observation", "reminder", "worksheet_generate", "parent_letter", "smalltalk"]
detected_intent = random.choice([i for i in intents if i != expected_intent])
responses = {
"student_observation": f"Notiz wurde gespeichert: {user_input}",
"reminder": f"Erinnerung erstellt: {user_input}",
"worksheet_generate": f"Arbeitsblatt wird generiert basierend auf: {user_input}",
"homework_check": f"Hausaufgabenkontrolle eingetragen: {user_input}",
"parent_letter": f"Elternbrief-Entwurf erstellt: {user_input}",
"class_message": f"Nachricht an Klasse vorbereitet: {user_input}",
"quiz_generate": f"Quiz wird erstellt: {user_input}",
"quick_activity": f"Einstiegsaktivitaet geplant: {user_input}",
"canvas_edit": f"Aenderung am Canvas wird ausgefuehrt: {user_input}",
"canvas_layout": f"Layout wird angepasst: {user_input}",
"operator_checklist": f"Operatoren-Checkliste geladen: {user_input}",
"eh_passage": f"EH-Passage gefunden: {user_input}",
"feedback_suggest": f"Feedback-Vorschlag: {user_input}",
"reminder_schedule": f"Erinnerung geplant: {user_input}",
"task_summary": f"Aufgabenuebersicht: {user_input}",
"conference_topic": f"Konferenzthema notiert: {user_input}",
"correction_note": f"Korrekturnotiz gespeichert: {user_input}",
"worksheet_differentiate": f"Differenzierung wird erstellt: {user_input}",
}
response = responses.get(detected_intent, f"Verstanden: {user_input}")
return detected_intent, response
def create_error_result(test_case: Dict[str, Any], error: str) -> TestResult:
"""Create a failed test result due to error."""
return TestResult(
test_id=test_case.get('id', 'UNKNOWN'),
test_name=test_case.get('name', 'Error'),
user_input=test_case.get('input', ''),
expected_intent=test_case.get('expected_intent', ''),
detected_intent='error',
response='',
intent_accuracy=0,
faithfulness=1,
relevance=1,
coherence=1,
safety='fail',
composite_score=0.0,
passed=False,
reasoning=f"Test execution error: {error}",
timestamp=datetime.utcnow(),
duration_ms=0,
)
async def simulate_rag_response(test_case: Dict[str, Any]) -> Dict[str, Any]:
"""Simulate RAG service response."""
category = test_case.get('category', '')
input_data = test_case.get('input', {})
expected = test_case.get('expected', {})
if category == 'eh_retrieval':
concepts = expected.get('must_contain_concepts', [])
passage = f"Der Erwartungshorizont sieht folgende Aspekte vor: {', '.join(concepts[:3])}. "
passage += "Diese muessen im Rahmen der Aufgabenbearbeitung beruecksichtigt werden."
return {
"passage": passage,
"source": "EH_Deutsch_Abitur_2024_NI.pdf",
"relevance_score": 0.85,
}
elif category == 'operator_alignment':
operator = input_data.get('operator', '')
afb = expected.get('afb_level', 'II')
actions = expected.get('expected_actions', [])
return {
"operator": operator,
"definition": f"'{operator}' gehoert zu Anforderungsbereich {afb}. Erwartete Handlungen: {', '.join(actions[:2])}.",
"afb_level": afb,
}
elif category == 'hallucination_control':
return {
"response": "Basierend auf den verfuegbaren Informationen kann ich folgendes feststellen...",
"grounded": True,
}
elif category == 'privacy_compliance':
return {
"response": "Die Arbeit zeigt folgende Merkmale... [anonymisiert]",
"contains_pii": False,
}
elif category == 'namespace_isolation':
return {
"response": "Zugriff nur auf Daten im eigenen Namespace.",
"namespace_violation": False,
}
return {"response": "Simulated response", "success": True}