This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/voice-service/bqas/synthetic_generator.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

302 lines
9.7 KiB
Python

"""
Synthetic Test Generator
Generates realistic teacher voice command variations using LLM
"""
import json
import structlog
import httpx
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from bqas.config import BQASConfig
from bqas.prompts import SYNTHETIC_GENERATION_PROMPT
logger = structlog.get_logger(__name__)
# Teacher speech patterns by intent
TEACHER_PATTERNS = {
"student_observation": [
"Notiz zu {name}: {observation}",
"Kurze Bemerkung zu {name}, {observation}",
"{name} hat heute {observation}",
"Bitte merken: {name} - {observation}",
"Beobachtung {name}: {observation}",
],
"reminder": [
"Erinner mich an {task}",
"Nicht vergessen: {task}",
"Reminder: {task}",
"Denk dran: {task}",
],
"homework_check": [
"Hausaufgabe kontrollieren",
"{class_name} {subject} Hausaufgabe kontrollieren",
"HA Check {class_name}",
"Hausaufgaben {subject} pruefen",
],
"worksheet_generate": [
"Mach mir ein Arbeitsblatt zu {topic}",
"Erstelle bitte {count} Aufgaben zu {topic}",
"Ich brauche ein Uebungsblatt fuer {topic}",
"Generiere Lueckentexte zu {topic}",
"Arbeitsblatt {topic} erstellen",
],
"parent_letter": [
"Schreib einen Elternbrief wegen {reason}",
"Formuliere eine Nachricht an die Eltern von {name} zu {reason}",
"Ich brauche einen neutralen Brief an Eltern wegen {reason}",
"Elternbrief {reason}",
],
"class_message": [
"Nachricht an {class_name}: {content}",
"Info an die Klasse {class_name}",
"Klassennachricht {class_name}",
"Mitteilung an {class_name}: {content}",
],
"quiz_generate": [
"Vokabeltest erstellen",
"Quiz mit {count} Fragen",
"{duration} Minuten Test",
"Kurzer Test zu {topic}",
],
"quick_activity": [
"{duration} Minuten Einstieg",
"Schnelle Aktivitaet {topic}",
"Warming Up {duration} Minuten",
"Einstiegsaufgabe",
],
"canvas_edit": [
"Ueberschriften groesser",
"Bild {number} nach {direction}",
"Pfeil von {source} auf {target}",
"Kasten hinzufuegen",
],
"canvas_layout": [
"Alles auf eine Seite",
"Drucklayout A4",
"Layout aendern",
"Seitenformat anpassen",
],
"operator_checklist": [
"Operatoren-Checkliste fuer {task_type}",
"Welche Operatoren fuer {topic}",
"Zeig Operatoren",
],
"eh_passage": [
"Erwartungshorizont zu {topic}",
"Was steht im EH zu {topic}",
"EH Passage suchen",
],
"feedback_suggest": [
"Feedback vorschlagen",
"Formuliere Rueckmeldung",
"Wie formuliere ich Feedback zu {topic}",
],
"reminder_schedule": [
"Erinner mich morgen an {task}",
"In {time_offset} erinnern: {task}",
"Naechste Woche: {task}",
],
"task_summary": [
"Offene Aufgaben",
"Was steht noch an",
"Zusammenfassung",
"Diese Woche",
],
}
@dataclass
class SyntheticTest:
"""A synthetically generated test case."""
input: str
expected_intent: str
slots: Dict[str, Any]
source: str = "synthetic"
class SyntheticGenerator:
"""
Generates realistic variations of teacher voice commands.
Uses LLM to create variations with:
- Different phrasings
- Optional typos
- Regional dialects
- Natural speech patterns
"""
def __init__(self, config: Optional[BQASConfig] = None):
self.config = config or BQASConfig.from_env()
self._client: Optional[httpx.AsyncClient] = None
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client."""
if self._client is None:
self._client = httpx.AsyncClient(timeout=self.config.judge_timeout)
return self._client
async def generate_variations(
self,
intent: str,
count: int = 10,
include_typos: bool = True,
include_dialect: bool = True,
) -> List[SyntheticTest]:
"""
Generate realistic variations for an intent.
Args:
intent: Target intent type
count: Number of variations to generate
include_typos: Include occasional typos
include_dialect: Include regional variants (Austrian, Swiss)
Returns:
List of SyntheticTest objects
"""
patterns = TEACHER_PATTERNS.get(intent, [])
if not patterns:
logger.warning(f"No patterns for intent: {intent}")
return []
typo_instruction = "Fuege gelegentlich Tippfehler ein" if include_typos else "Keine Tippfehler"
dialect_instruction = "Beruecksichtige regionale Varianten (Oesterreich, Schweiz)" if include_dialect else "Nur Hochdeutsch"
prompt = SYNTHETIC_GENERATION_PROMPT.format(
count=count,
intent=intent,
patterns="\n".join(f"- {p}" for p in patterns),
typo_instruction=typo_instruction,
dialect_instruction=dialect_instruction,
)
client = await self._get_client()
try:
resp = await client.post(
f"{self.config.ollama_base_url}/api/generate",
json={
"model": self.config.judge_model,
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0.8,
"num_predict": 2000,
},
},
)
resp.raise_for_status()
result_text = resp.json().get("response", "")
return self._parse_variations(result_text, intent)
except Exception as e:
logger.error("Failed to generate variations", intent=intent, error=str(e))
# Return pattern-based fallbacks
return self._generate_fallback(intent, count)
def _parse_variations(self, text: str, intent: str) -> List[SyntheticTest]:
"""Parse JSON variations from LLM response."""
try:
# Find JSON array in response
start = text.find("[")
end = text.rfind("]") + 1
if start >= 0 and end > start:
json_str = text[start:end]
data = json.loads(json_str)
return [
SyntheticTest(
input=item.get("input", ""),
expected_intent=item.get("expected_intent", intent),
slots=item.get("slots", {}),
source="llm_generated",
)
for item in data
if item.get("input")
]
except (json.JSONDecodeError, TypeError) as e:
logger.warning("Failed to parse variations", error=str(e))
return []
def _generate_fallback(self, intent: str, count: int) -> List[SyntheticTest]:
"""Generate simple variations from patterns."""
patterns = TEACHER_PATTERNS.get(intent, [])
if not patterns:
return []
# Sample slot values
sample_values = {
"name": ["Max", "Lisa", "Tim", "Anna", "Paul", "Emma"],
"observation": ["heute sehr aufmerksam", "braucht Hilfe", "war abgelenkt"],
"task": ["Hausaufgaben kontrollieren", "Elternbrief schreiben", "Test vorbereiten"],
"class_name": ["7a", "8b", "9c", "10d"],
"subject": ["Mathe", "Deutsch", "Englisch", "Physik"],
"topic": ["Bruchrechnung", "Vokabeln", "Grammatik", "Prozentrechnung"],
"count": ["3", "5", "10"],
"duration": ["10", "15", "20"],
"reason": ["fehlende Hausaufgaben", "wiederholte Stoerungen", "positives Verhalten"],
"content": ["Hausaufgaben bis Freitag", "Test naechste Woche"],
}
import random
results = []
for i in range(count):
pattern = patterns[i % len(patterns)]
# Fill in placeholders
filled = pattern
for key, values in sample_values.items():
placeholder = f"{{{key}}}"
if placeholder in filled:
filled = filled.replace(placeholder, random.choice(values), 1)
# Extract filled slots
slots = {}
for key in sample_values:
if f"{{{key}}}" in pattern:
# The value we used
for val in sample_values[key]:
if val in filled:
slots[key] = val
break
results.append(SyntheticTest(
input=filled,
expected_intent=intent,
slots=slots,
source="pattern_generated",
))
return results
async def generate_all_intents(
self,
count_per_intent: int = 10,
) -> Dict[str, List[SyntheticTest]]:
"""Generate variations for all known intents."""
results = {}
for intent in TEACHER_PATTERNS.keys():
logger.info(f"Generating variations for intent: {intent}")
variations = await self.generate_variations(
intent=intent,
count=count_per_intent,
include_typos=self.config.include_typos,
include_dialect=self.config.include_dialect,
)
results[intent] = variations
logger.info(f"Generated {len(variations)} variations for {intent}")
return results
async def close(self):
"""Close HTTP client."""
if self._client:
await self._client.aclose()
self._client = None