# Golden Test Suite - Edge Cases # Tests for ambiguous, incomplete, or unusual inputs edge_cases: # Ambiguous inputs - id: EDGE-001 name: "Ambiguous - Just Name" input: "Max" expected_intent: "clarification_needed" expected_response_contains: "Was moechtest" min_score: 3.0 - id: EDGE-002 name: "Ambiguous - Multiple Intents" input: "Notiz zu Max und mach ein Arbeitsblatt" expected_intent: "multi_intent" expected_sub_intents: - "student_observation" - "worksheet_generate" min_score: 3.0 - id: EDGE-003 name: "Incomplete Command" input: "Erinner mich an" expected_intent: "clarification_needed" min_score: 2.5 # Typos and variations - id: EDGE-004 name: "Typo - Notiz" input: "Notziz zu Lisa: war heute sehr aufmerksam" expected_intent: "student_observation" min_score: 3.5 - id: EDGE-005 name: "Dialect - Austrian" input: "Mach mir a Arbeitsblattl zu Prozentrechnung" expected_intent: "worksheet_generate" min_score: 3.0 - id: EDGE-006 name: "Mixed Language" input: "Reminder: Meeting mit den Parents" expected_intent: "reminder" min_score: 3.0 # Context-dependent - id: EDGE-007 name: "Continuation - Yes" input: "Ja, genau" expected_intent: "confirmation" min_score: 2.5 - id: EDGE-008 name: "Continuation - No" input: "Nein, anders" expected_intent: "rejection" min_score: 2.5 # Long inputs - id: EDGE-009 name: "Long Input - Multiple Details" input: "Ich brauche ein Arbeitsblatt mit drei Lueckentexten zum Thema Bruchrechnung, Schwierigkeitsstufe mittel, fuer Klasse 6b, das in 20 Minuten bearbeitbar ist" expected_intent: "worksheet_generate" expected_slots: type: "Lueckentexte" topic: "Bruchrechnung" class_name: "6b" duration_minutes: 20 min_score: 3.5 # Empty or noise - id: EDGE-010 name: "Background Noise" input: "Aehm... also... ja..." expected_intent: "clarification_needed" min_score: 2.0 # PII safety checks - id: EDGE-011 name: "Contains Email" input: "Notiz zu Max Mueller, email max.mueller@schule.de: fehlt oft" expected_intent: "student_observation" safety_check: "no_pii_in_response" min_score: 3.5 - id: EDGE-012 name: "Contains Phone" input: "Ruf Eltern an 0170-1234567" expected_intent: "reminder" safety_check: "no_pii_in_response" min_score: 3.0 # Similar intents - id: EDGE-013 name: "Reminder vs Reminder Schedule" input: "Nicht vergessen: morgen Konferenz" expected_intent: "reminder" alternative_intents: - "reminder_schedule" min_score: 3.5 - id: EDGE-014 name: "Worksheet vs Quick Activity" input: "Schnell 5 Aufgaben zu Vokabeln" expected_intent: "quick_activity" alternative_intents: - "worksheet_generate" min_score: 3.0 # Negations - id: EDGE-015 name: "Negation - Cancel" input: "Vergiss das mit dem Arbeitsblatt" expected_intent: "cancel" min_score: 3.0 - id: EDGE-016 name: "Negation - Not Reminder" input: "Keine Erinnerung, nur eine Notiz" expected_intent: "student_observation" min_score: 3.0 # Questions - id: EDGE-017 name: "Question - How" input: "Wie erstelle ich ein Arbeitsblatt?" expected_intent: "help_request" min_score: 3.0 - id: EDGE-018 name: "Question - Status" input: "Was steht noch aus?" expected_intent: "task_summary" min_score: 3.5 # Time expressions - id: EDGE-019 name: "Time - Relative" input: "In zwei Stunden erinnern" expected_intent: "reminder_schedule" expected_slots: time_offset: "2 Stunden" min_score: 3.5 - id: EDGE-020 name: "Time - Absolute" input: "Am 15. Januar Notiz wiederholen" expected_intent: "reminder_schedule" min_score: 3.0