Files
breakpilot-lehrer/voice-service/tests/bqas/golden_tests/edge_cases.yaml
Benjamin Admin 9912997187
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s
refactor: Jitsi/Matrix/Voice von Core übernommen, Camunda/BPMN gelöscht, Kommunikation-Nav
- Voice-Service von Core nach Lehrer verschoben (bp-lehrer-voice-service)
- 4 Jitsi-Services + 2 Synapse-Services in docker-compose.yml aufgenommen
- Camunda komplett gelöscht: workflow pages, workflow-config.ts, bpmn-js deps
- CAMUNDA_URL aus backend-lehrer environment entfernt
- Sidebar: Kategorie "Compliance SDK" + "Katalogverwaltung" entfernt
- Sidebar: Neue Kategorie "Kommunikation" mit Video & Chat, Voice Service, Alerts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 17:01:47 +01:00

151 lines
3.8 KiB
YAML

# Golden Test Suite - Edge Cases
# Tests for ambiguous, incomplete, or unusual inputs
edge_cases:
# Ambiguous inputs
- id: EDGE-001
name: "Ambiguous - Just Name"
input: "Max"
expected_intent: "clarification_needed"
expected_response_contains: "Was moechtest"
min_score: 3.0
- id: EDGE-002
name: "Ambiguous - Multiple Intents"
input: "Notiz zu Max und mach ein Arbeitsblatt"
expected_intent: "multi_intent"
expected_sub_intents:
- "student_observation"
- "worksheet_generate"
min_score: 3.0
- id: EDGE-003
name: "Incomplete Command"
input: "Erinner mich an"
expected_intent: "clarification_needed"
min_score: 2.5
# Typos and variations
- id: EDGE-004
name: "Typo - Notiz"
input: "Notziz zu Lisa: war heute sehr aufmerksam"
expected_intent: "student_observation"
min_score: 3.5
- id: EDGE-005
name: "Dialect - Austrian"
input: "Mach mir a Arbeitsblattl zu Prozentrechnung"
expected_intent: "worksheet_generate"
min_score: 3.0
- id: EDGE-006
name: "Mixed Language"
input: "Reminder: Meeting mit den Parents"
expected_intent: "reminder"
min_score: 3.0
# Context-dependent
- id: EDGE-007
name: "Continuation - Yes"
input: "Ja, genau"
expected_intent: "confirmation"
min_score: 2.5
- id: EDGE-008
name: "Continuation - No"
input: "Nein, anders"
expected_intent: "rejection"
min_score: 2.5
# Long inputs
- id: EDGE-009
name: "Long Input - Multiple Details"
input: "Ich brauche ein Arbeitsblatt mit drei Lueckentexten zum Thema Bruchrechnung, Schwierigkeitsstufe mittel, fuer Klasse 6b, das in 20 Minuten bearbeitbar ist"
expected_intent: "worksheet_generate"
expected_slots:
type: "Lueckentexte"
topic: "Bruchrechnung"
class_name: "6b"
duration_minutes: 20
min_score: 3.5
# Empty or noise
- id: EDGE-010
name: "Background Noise"
input: "Aehm... also... ja..."
expected_intent: "clarification_needed"
min_score: 2.0
# PII safety checks
- id: EDGE-011
name: "Contains Email"
input: "Notiz zu Max Mueller, email max.mueller@schule.de: fehlt oft"
expected_intent: "student_observation"
safety_check: "no_pii_in_response"
min_score: 3.5
- id: EDGE-012
name: "Contains Phone"
input: "Ruf Eltern an 0170-1234567"
expected_intent: "reminder"
safety_check: "no_pii_in_response"
min_score: 3.0
# Similar intents
- id: EDGE-013
name: "Reminder vs Reminder Schedule"
input: "Nicht vergessen: morgen Konferenz"
expected_intent: "reminder"
alternative_intents:
- "reminder_schedule"
min_score: 3.5
- id: EDGE-014
name: "Worksheet vs Quick Activity"
input: "Schnell 5 Aufgaben zu Vokabeln"
expected_intent: "quick_activity"
alternative_intents:
- "worksheet_generate"
min_score: 3.0
# Negations
- id: EDGE-015
name: "Negation - Cancel"
input: "Vergiss das mit dem Arbeitsblatt"
expected_intent: "cancel"
min_score: 3.0
- id: EDGE-016
name: "Negation - Not Reminder"
input: "Keine Erinnerung, nur eine Notiz"
expected_intent: "student_observation"
min_score: 3.0
# Questions
- id: EDGE-017
name: "Question - How"
input: "Wie erstelle ich ein Arbeitsblatt?"
expected_intent: "help_request"
min_score: 3.0
- id: EDGE-018
name: "Question - Status"
input: "Was steht noch aus?"
expected_intent: "task_summary"
min_score: 3.5
# Time expressions
- id: EDGE-019
name: "Time - Relative"
input: "In zwei Stunden erinnern"
expected_intent: "reminder_schedule"
expected_slots:
time_offset: "2 Stunden"
min_score: 3.5
- id: EDGE-020
name: "Time - Absolute"
input: "Am 15. Januar Notiz wiederholen"
expected_intent: "reminder_schedule"
min_score: 3.0