Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
150
voice-service/tests/bqas/golden_tests/edge_cases.yaml
Normal file
150
voice-service/tests/bqas/golden_tests/edge_cases.yaml
Normal file
@@ -0,0 +1,150 @@
|
||||
# Golden Test Suite - Edge Cases
|
||||
# Tests for ambiguous, incomplete, or unusual inputs
|
||||
|
||||
edge_cases:
|
||||
# Ambiguous inputs
|
||||
- id: EDGE-001
|
||||
name: "Ambiguous - Just Name"
|
||||
input: "Max"
|
||||
expected_intent: "clarification_needed"
|
||||
expected_response_contains: "Was moechtest"
|
||||
min_score: 3.0
|
||||
|
||||
- id: EDGE-002
|
||||
name: "Ambiguous - Multiple Intents"
|
||||
input: "Notiz zu Max und mach ein Arbeitsblatt"
|
||||
expected_intent: "multi_intent"
|
||||
expected_sub_intents:
|
||||
- "student_observation"
|
||||
- "worksheet_generate"
|
||||
min_score: 3.0
|
||||
|
||||
- id: EDGE-003
|
||||
name: "Incomplete Command"
|
||||
input: "Erinner mich an"
|
||||
expected_intent: "clarification_needed"
|
||||
min_score: 2.5
|
||||
|
||||
# Typos and variations
|
||||
- id: EDGE-004
|
||||
name: "Typo - Notiz"
|
||||
input: "Notziz zu Lisa: war heute sehr aufmerksam"
|
||||
expected_intent: "student_observation"
|
||||
min_score: 3.5
|
||||
|
||||
- id: EDGE-005
|
||||
name: "Dialect - Austrian"
|
||||
input: "Mach mir a Arbeitsblattl zu Prozentrechnung"
|
||||
expected_intent: "worksheet_generate"
|
||||
min_score: 3.0
|
||||
|
||||
- id: EDGE-006
|
||||
name: "Mixed Language"
|
||||
input: "Reminder: Meeting mit den Parents"
|
||||
expected_intent: "reminder"
|
||||
min_score: 3.0
|
||||
|
||||
# Context-dependent
|
||||
- id: EDGE-007
|
||||
name: "Continuation - Yes"
|
||||
input: "Ja, genau"
|
||||
expected_intent: "confirmation"
|
||||
min_score: 2.5
|
||||
|
||||
- id: EDGE-008
|
||||
name: "Continuation - No"
|
||||
input: "Nein, anders"
|
||||
expected_intent: "rejection"
|
||||
min_score: 2.5
|
||||
|
||||
# Long inputs
|
||||
- id: EDGE-009
|
||||
name: "Long Input - Multiple Details"
|
||||
input: "Ich brauche ein Arbeitsblatt mit drei Lueckentexten zum Thema Bruchrechnung, Schwierigkeitsstufe mittel, fuer Klasse 6b, das in 20 Minuten bearbeitbar ist"
|
||||
expected_intent: "worksheet_generate"
|
||||
expected_slots:
|
||||
type: "Lueckentexte"
|
||||
topic: "Bruchrechnung"
|
||||
class_name: "6b"
|
||||
duration_minutes: 20
|
||||
min_score: 3.5
|
||||
|
||||
# Empty or noise
|
||||
- id: EDGE-010
|
||||
name: "Background Noise"
|
||||
input: "Aehm... also... ja..."
|
||||
expected_intent: "clarification_needed"
|
||||
min_score: 2.0
|
||||
|
||||
# PII safety checks
|
||||
- id: EDGE-011
|
||||
name: "Contains Email"
|
||||
input: "Notiz zu Max Mueller, email max.mueller@schule.de: fehlt oft"
|
||||
expected_intent: "student_observation"
|
||||
safety_check: "no_pii_in_response"
|
||||
min_score: 3.5
|
||||
|
||||
- id: EDGE-012
|
||||
name: "Contains Phone"
|
||||
input: "Ruf Eltern an 0170-1234567"
|
||||
expected_intent: "reminder"
|
||||
safety_check: "no_pii_in_response"
|
||||
min_score: 3.0
|
||||
|
||||
# Similar intents
|
||||
- id: EDGE-013
|
||||
name: "Reminder vs Reminder Schedule"
|
||||
input: "Nicht vergessen: morgen Konferenz"
|
||||
expected_intent: "reminder"
|
||||
alternative_intents:
|
||||
- "reminder_schedule"
|
||||
min_score: 3.5
|
||||
|
||||
- id: EDGE-014
|
||||
name: "Worksheet vs Quick Activity"
|
||||
input: "Schnell 5 Aufgaben zu Vokabeln"
|
||||
expected_intent: "quick_activity"
|
||||
alternative_intents:
|
||||
- "worksheet_generate"
|
||||
min_score: 3.0
|
||||
|
||||
# Negations
|
||||
- id: EDGE-015
|
||||
name: "Negation - Cancel"
|
||||
input: "Vergiss das mit dem Arbeitsblatt"
|
||||
expected_intent: "cancel"
|
||||
min_score: 3.0
|
||||
|
||||
- id: EDGE-016
|
||||
name: "Negation - Not Reminder"
|
||||
input: "Keine Erinnerung, nur eine Notiz"
|
||||
expected_intent: "student_observation"
|
||||
min_score: 3.0
|
||||
|
||||
# Questions
|
||||
- id: EDGE-017
|
||||
name: "Question - How"
|
||||
input: "Wie erstelle ich ein Arbeitsblatt?"
|
||||
expected_intent: "help_request"
|
||||
min_score: 3.0
|
||||
|
||||
- id: EDGE-018
|
||||
name: "Question - Status"
|
||||
input: "Was steht noch aus?"
|
||||
expected_intent: "task_summary"
|
||||
min_score: 3.5
|
||||
|
||||
# Time expressions
|
||||
- id: EDGE-019
|
||||
name: "Time - Relative"
|
||||
input: "In zwei Stunden erinnern"
|
||||
expected_intent: "reminder_schedule"
|
||||
expected_slots:
|
||||
time_offset: "2 Stunden"
|
||||
min_score: 3.5
|
||||
|
||||
- id: EDGE-020
|
||||
name: "Time - Absolute"
|
||||
input: "Am 15. Januar Notiz wiederholen"
|
||||
expected_intent: "reminder_schedule"
|
||||
min_score: 3.0
|
||||
Reference in New Issue
Block a user