refactor: voice-service entfernt (verschoben nach breakpilot-core)
This commit is contained in:
@@ -1,301 +0,0 @@
|
||||
"""
|
||||
Synthetic Test Generator
|
||||
Generates realistic teacher voice command variations using LLM
|
||||
"""
|
||||
import json
|
||||
import structlog
|
||||
import httpx
|
||||
from typing import List, Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from bqas.config import BQASConfig
|
||||
from bqas.prompts import SYNTHETIC_GENERATION_PROMPT
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
# Teacher speech patterns by intent
|
||||
TEACHER_PATTERNS = {
|
||||
"student_observation": [
|
||||
"Notiz zu {name}: {observation}",
|
||||
"Kurze Bemerkung zu {name}, {observation}",
|
||||
"{name} hat heute {observation}",
|
||||
"Bitte merken: {name} - {observation}",
|
||||
"Beobachtung {name}: {observation}",
|
||||
],
|
||||
"reminder": [
|
||||
"Erinner mich an {task}",
|
||||
"Nicht vergessen: {task}",
|
||||
"Reminder: {task}",
|
||||
"Denk dran: {task}",
|
||||
],
|
||||
"homework_check": [
|
||||
"Hausaufgabe kontrollieren",
|
||||
"{class_name} {subject} Hausaufgabe kontrollieren",
|
||||
"HA Check {class_name}",
|
||||
"Hausaufgaben {subject} pruefen",
|
||||
],
|
||||
"worksheet_generate": [
|
||||
"Mach mir ein Arbeitsblatt zu {topic}",
|
||||
"Erstelle bitte {count} Aufgaben zu {topic}",
|
||||
"Ich brauche ein Uebungsblatt fuer {topic}",
|
||||
"Generiere Lueckentexte zu {topic}",
|
||||
"Arbeitsblatt {topic} erstellen",
|
||||
],
|
||||
"parent_letter": [
|
||||
"Schreib einen Elternbrief wegen {reason}",
|
||||
"Formuliere eine Nachricht an die Eltern von {name} zu {reason}",
|
||||
"Ich brauche einen neutralen Brief an Eltern wegen {reason}",
|
||||
"Elternbrief {reason}",
|
||||
],
|
||||
"class_message": [
|
||||
"Nachricht an {class_name}: {content}",
|
||||
"Info an die Klasse {class_name}",
|
||||
"Klassennachricht {class_name}",
|
||||
"Mitteilung an {class_name}: {content}",
|
||||
],
|
||||
"quiz_generate": [
|
||||
"Vokabeltest erstellen",
|
||||
"Quiz mit {count} Fragen",
|
||||
"{duration} Minuten Test",
|
||||
"Kurzer Test zu {topic}",
|
||||
],
|
||||
"quick_activity": [
|
||||
"{duration} Minuten Einstieg",
|
||||
"Schnelle Aktivitaet {topic}",
|
||||
"Warming Up {duration} Minuten",
|
||||
"Einstiegsaufgabe",
|
||||
],
|
||||
"canvas_edit": [
|
||||
"Ueberschriften groesser",
|
||||
"Bild {number} nach {direction}",
|
||||
"Pfeil von {source} auf {target}",
|
||||
"Kasten hinzufuegen",
|
||||
],
|
||||
"canvas_layout": [
|
||||
"Alles auf eine Seite",
|
||||
"Drucklayout A4",
|
||||
"Layout aendern",
|
||||
"Seitenformat anpassen",
|
||||
],
|
||||
"operator_checklist": [
|
||||
"Operatoren-Checkliste fuer {task_type}",
|
||||
"Welche Operatoren fuer {topic}",
|
||||
"Zeig Operatoren",
|
||||
],
|
||||
"eh_passage": [
|
||||
"Erwartungshorizont zu {topic}",
|
||||
"Was steht im EH zu {topic}",
|
||||
"EH Passage suchen",
|
||||
],
|
||||
"feedback_suggest": [
|
||||
"Feedback vorschlagen",
|
||||
"Formuliere Rueckmeldung",
|
||||
"Wie formuliere ich Feedback zu {topic}",
|
||||
],
|
||||
"reminder_schedule": [
|
||||
"Erinner mich morgen an {task}",
|
||||
"In {time_offset} erinnern: {task}",
|
||||
"Naechste Woche: {task}",
|
||||
],
|
||||
"task_summary": [
|
||||
"Offene Aufgaben",
|
||||
"Was steht noch an",
|
||||
"Zusammenfassung",
|
||||
"Diese Woche",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SyntheticTest:
|
||||
"""A synthetically generated test case."""
|
||||
input: str
|
||||
expected_intent: str
|
||||
slots: Dict[str, Any]
|
||||
source: str = "synthetic"
|
||||
|
||||
|
||||
class SyntheticGenerator:
|
||||
"""
|
||||
Generates realistic variations of teacher voice commands.
|
||||
|
||||
Uses LLM to create variations with:
|
||||
- Different phrasings
|
||||
- Optional typos
|
||||
- Regional dialects
|
||||
- Natural speech patterns
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[BQASConfig] = None):
|
||||
self.config = config or BQASConfig.from_env()
|
||||
self._client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
async def _get_client(self) -> httpx.AsyncClient:
|
||||
"""Get or create HTTP client."""
|
||||
if self._client is None:
|
||||
self._client = httpx.AsyncClient(timeout=self.config.judge_timeout)
|
||||
return self._client
|
||||
|
||||
async def generate_variations(
|
||||
self,
|
||||
intent: str,
|
||||
count: int = 10,
|
||||
include_typos: bool = True,
|
||||
include_dialect: bool = True,
|
||||
) -> List[SyntheticTest]:
|
||||
"""
|
||||
Generate realistic variations for an intent.
|
||||
|
||||
Args:
|
||||
intent: Target intent type
|
||||
count: Number of variations to generate
|
||||
include_typos: Include occasional typos
|
||||
include_dialect: Include regional variants (Austrian, Swiss)
|
||||
|
||||
Returns:
|
||||
List of SyntheticTest objects
|
||||
"""
|
||||
patterns = TEACHER_PATTERNS.get(intent, [])
|
||||
if not patterns:
|
||||
logger.warning(f"No patterns for intent: {intent}")
|
||||
return []
|
||||
|
||||
typo_instruction = "Fuege gelegentlich Tippfehler ein" if include_typos else "Keine Tippfehler"
|
||||
dialect_instruction = "Beruecksichtige regionale Varianten (Oesterreich, Schweiz)" if include_dialect else "Nur Hochdeutsch"
|
||||
|
||||
prompt = SYNTHETIC_GENERATION_PROMPT.format(
|
||||
count=count,
|
||||
intent=intent,
|
||||
patterns="\n".join(f"- {p}" for p in patterns),
|
||||
typo_instruction=typo_instruction,
|
||||
dialect_instruction=dialect_instruction,
|
||||
)
|
||||
|
||||
client = await self._get_client()
|
||||
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{self.config.ollama_base_url}/api/generate",
|
||||
json={
|
||||
"model": self.config.judge_model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": 0.8,
|
||||
"num_predict": 2000,
|
||||
},
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
result_text = resp.json().get("response", "")
|
||||
return self._parse_variations(result_text, intent)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to generate variations", intent=intent, error=str(e))
|
||||
# Return pattern-based fallbacks
|
||||
return self._generate_fallback(intent, count)
|
||||
|
||||
def _parse_variations(self, text: str, intent: str) -> List[SyntheticTest]:
|
||||
"""Parse JSON variations from LLM response."""
|
||||
try:
|
||||
# Find JSON array in response
|
||||
start = text.find("[")
|
||||
end = text.rfind("]") + 1
|
||||
if start >= 0 and end > start:
|
||||
json_str = text[start:end]
|
||||
data = json.loads(json_str)
|
||||
|
||||
return [
|
||||
SyntheticTest(
|
||||
input=item.get("input", ""),
|
||||
expected_intent=item.get("expected_intent", intent),
|
||||
slots=item.get("slots", {}),
|
||||
source="llm_generated",
|
||||
)
|
||||
for item in data
|
||||
if item.get("input")
|
||||
]
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
logger.warning("Failed to parse variations", error=str(e))
|
||||
|
||||
return []
|
||||
|
||||
def _generate_fallback(self, intent: str, count: int) -> List[SyntheticTest]:
|
||||
"""Generate simple variations from patterns."""
|
||||
patterns = TEACHER_PATTERNS.get(intent, [])
|
||||
if not patterns:
|
||||
return []
|
||||
|
||||
# Sample slot values
|
||||
sample_values = {
|
||||
"name": ["Max", "Lisa", "Tim", "Anna", "Paul", "Emma"],
|
||||
"observation": ["heute sehr aufmerksam", "braucht Hilfe", "war abgelenkt"],
|
||||
"task": ["Hausaufgaben kontrollieren", "Elternbrief schreiben", "Test vorbereiten"],
|
||||
"class_name": ["7a", "8b", "9c", "10d"],
|
||||
"subject": ["Mathe", "Deutsch", "Englisch", "Physik"],
|
||||
"topic": ["Bruchrechnung", "Vokabeln", "Grammatik", "Prozentrechnung"],
|
||||
"count": ["3", "5", "10"],
|
||||
"duration": ["10", "15", "20"],
|
||||
"reason": ["fehlende Hausaufgaben", "wiederholte Stoerungen", "positives Verhalten"],
|
||||
"content": ["Hausaufgaben bis Freitag", "Test naechste Woche"],
|
||||
}
|
||||
|
||||
import random
|
||||
results = []
|
||||
|
||||
for i in range(count):
|
||||
pattern = patterns[i % len(patterns)]
|
||||
|
||||
# Fill in placeholders
|
||||
filled = pattern
|
||||
for key, values in sample_values.items():
|
||||
placeholder = f"{{{key}}}"
|
||||
if placeholder in filled:
|
||||
filled = filled.replace(placeholder, random.choice(values), 1)
|
||||
|
||||
# Extract filled slots
|
||||
slots = {}
|
||||
for key in sample_values:
|
||||
if f"{{{key}}}" in pattern:
|
||||
# The value we used
|
||||
for val in sample_values[key]:
|
||||
if val in filled:
|
||||
slots[key] = val
|
||||
break
|
||||
|
||||
results.append(SyntheticTest(
|
||||
input=filled,
|
||||
expected_intent=intent,
|
||||
slots=slots,
|
||||
source="pattern_generated",
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
async def generate_all_intents(
|
||||
self,
|
||||
count_per_intent: int = 10,
|
||||
) -> Dict[str, List[SyntheticTest]]:
|
||||
"""Generate variations for all known intents."""
|
||||
results = {}
|
||||
|
||||
for intent in TEACHER_PATTERNS.keys():
|
||||
logger.info(f"Generating variations for intent: {intent}")
|
||||
variations = await self.generate_variations(
|
||||
intent=intent,
|
||||
count=count_per_intent,
|
||||
include_typos=self.config.include_typos,
|
||||
include_dialect=self.config.include_dialect,
|
||||
)
|
||||
results[intent] = variations
|
||||
logger.info(f"Generated {len(variations)} variations for {intent}")
|
||||
|
||||
return results
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client."""
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
Reference in New Issue
Block a user