breakpilot-pwa/backend/klausur/tests/test_magic_onboarding.py

"""
Tests for Magic Onboarding functionality.

Tests cover:
- OnboardingSession lifecycle
- Student detection and confirmation
- Roster parsing
- School resolution
- Module linking
"""

import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from datetime import datetime

# Import models
from klausur.db_models import (
    OnboardingSession, DetectedStudent, ModuleLink,
    OnboardingStatus, ModuleLinkType
)

# Import services
from klausur.services.roster_parser import RosterParser, RosterEntry, NameMatch
from klausur.services.school_resolver import SchoolResolver, BUNDESLAENDER, SCHULFORMEN
from klausur.services.module_linker import (
    ModuleLinker, CorrectionResult, MeetingUrgency, ParentMeetingSuggestion
)


# =============================================================================
# ROSTER PARSER TESTS
# =============================================================================

class TestRosterParser:
    """Tests for RosterParser service."""

    def test_match_first_names_exact_match(self):
        """Test exact name matching."""
        parser = RosterParser()

        roster = [
            RosterEntry(first_name="Max", last_name="Mueller"),
            RosterEntry(first_name="Anna", last_name="Schmidt"),
            RosterEntry(first_name="Tim", last_name="Weber"),
        ]

        detected = ["Max", "Anna", "Tim"]
        matches = parser.match_first_names(detected, roster)

        # Check all names matched
        assert len(matches) == 3

        # Find Max match
        max_match = next(m for m in matches if m.detected_name == "Max")
        assert max_match.matched_entry is not None
        assert max_match.matched_entry.last_name == "Mueller"
        assert max_match.match_type == "exact"
        assert max_match.confidence == 1.0

    def test_match_first_names_fuzzy_match(self):
        """Test fuzzy matching for similar names."""
        parser = RosterParser()

        roster = [
            RosterEntry(first_name="Maximilian", last_name="Mueller"),
            RosterEntry(first_name="Anna-Lena", last_name="Schmidt"),
        ]

        # "Max" should fuzzy-match "Maximilian" (starts with)
        detected = ["Max"]
        matches = parser.match_first_names(detected, roster)

        assert len(matches) == 1
        max_match = matches[0]
        # Should match to Maximilian via first_name matching
        if max_match.matched_entry is not None:
            assert max_match.match_type in ["first_name", "fuzzy"]

    def test_match_first_names_no_match(self):
        """Test handling of unmatched names."""
        parser = RosterParser()

        roster = [
            RosterEntry(first_name="Max", last_name="Mueller"),
        ]

        detected = ["Sophie", "Lisa"]
        matches = parser.match_first_names(detected, roster)

        # Both should be unmatched
        assert len(matches) == 2
        for match in matches:
            assert match.matched_entry is None
            assert match.match_type == "none"

    def test_roster_entry_creation(self):
        """Test RosterEntry dataclass creation."""
        entry = RosterEntry(
            first_name="Max",
            last_name="Mueller",
            student_number="12345",
            parent_email="eltern@example.com",
            parent_phone="+49123456789"
        )

        assert entry.first_name == "Max"
        assert entry.last_name == "Mueller"
        assert entry.parent_email == "eltern@example.com"

    def test_name_match_dataclass(self):
        """Test NameMatch dataclass creation."""
        entry = RosterEntry(first_name="Max", last_name="Mueller")
        match = NameMatch(
            detected_name="Max",
            matched_entry=entry,
            confidence=1.0,
            match_type="exact"
        )

        assert match.detected_name == "Max"
        assert match.matched_entry.last_name == "Mueller"
        assert match.confidence == 1.0


# =============================================================================
# SCHOOL RESOLVER TESTS
# =============================================================================

class TestSchoolResolver:
    """Tests for SchoolResolver service."""

    def test_bundeslaender_completeness(self):
        """Test that all 16 German states are included."""
        assert len(BUNDESLAENDER) == 16
        # BUNDESLAENDER is a dict with codes as keys
        assert "NI" in BUNDESLAENDER  # Niedersachsen
        assert "BY" in BUNDESLAENDER  # Bayern
        assert "BE" in BUNDESLAENDER  # Berlin
        # Check values too
        assert BUNDESLAENDER["NI"] == "Niedersachsen"

    def test_schulformen_have_grades(self):
        """Test that each Schulform has grade ranges."""
        for schulform, info in SCHULFORMEN.items():
            assert "grades" in info
            assert isinstance(info["grades"], list)
            assert len(info["grades"]) > 0

    def test_detect_grade_from_class_name(self):
        """Test grade detection from class names."""
        resolver = SchoolResolver()

        # Test various formats
        assert resolver.detect_grade_from_class_name("3a") == 3
        assert resolver.detect_grade_from_class_name("10b") == 10
        assert resolver.detect_grade_from_class_name("Q1") == 11
        assert resolver.detect_grade_from_class_name("Q2") == 12
        assert resolver.detect_grade_from_class_name("12") == 12

    def test_detect_grade_returns_none_for_invalid(self):
        """Test grade detection returns None for invalid input."""
        resolver = SchoolResolver()

        assert resolver.detect_grade_from_class_name("abc") is None
        assert resolver.detect_grade_from_class_name("") is None

    def test_local_storage_initialization(self):
        """Test that local storage starts empty."""
        resolver = SchoolResolver()
        assert resolver._local_schools == {}
        assert resolver._local_classes == {}


# =============================================================================
# MODULE LINKER TESTS
# =============================================================================

class TestModuleLinker:
    """Tests for ModuleLinker service."""

    def test_suggest_elternabend_for_weak_students(self):
        """Test parent meeting suggestions for failing grades."""
        linker = ModuleLinker()

        results = [
            CorrectionResult(
                doc_token="token1", score=25, max_score=100,
                grade="5", feedback=""
            ),
            CorrectionResult(
                doc_token="token2", score=85, max_score=100,
                grade="2", feedback=""
            ),
            CorrectionResult(
                doc_token="token3", score=30, max_score=100,
                grade="5-", feedback=""
            ),
            CorrectionResult(
                doc_token="token4", score=20, max_score=100,
                grade="6", feedback=""
            ),
        ]

        suggestions = linker.suggest_elternabend(
            results, subject="Mathematik", threshold_grade="4"
        )

        # Should suggest meetings for students with grades 4 or worse
        # Grades 5, 5-, and 6 should trigger meetings
        assert len(suggestions) == 3

        # Verify suggestions use doc_tokens (privacy)
        for suggestion in suggestions:
            assert suggestion.doc_token in ["token1", "token3", "token4"]

    def test_suggest_elternabend_empty_for_good_class(self):
        """Test no suggestions for good performers."""
        linker = ModuleLinker()

        results = [
            CorrectionResult(
                doc_token="token1", score=95, max_score=100,
                grade="1", feedback=""
            ),
            CorrectionResult(
                doc_token="token2", score=85, max_score=100,
                grade="2", feedback=""
            ),
            CorrectionResult(
                doc_token="token3", score=78, max_score=100,
                grade="3", feedback=""
            ),
        ]

        suggestions = linker.suggest_elternabend(
            results, subject="Deutsch", threshold_grade="4"
        )

        assert len(suggestions) == 0

    def test_calculate_grade_statistics(self):
        """Test grade distribution calculation."""
        linker = ModuleLinker()

        results = [
            CorrectionResult(doc_token="t1", score=95, max_score=100, grade="1", feedback=""),
            CorrectionResult(doc_token="t2", score=85, max_score=100, grade="2", feedback=""),
            CorrectionResult(doc_token="t3", score=85, max_score=100, grade="2", feedback=""),
            CorrectionResult(doc_token="t4", score=75, max_score=100, grade="3", feedback=""),
            CorrectionResult(doc_token="t5", score=55, max_score=100, grade="4", feedback=""),
            CorrectionResult(doc_token="t6", score=25, max_score=100, grade="5", feedback=""),
        ]

        stats = linker.calculate_grade_statistics(results)

        assert isinstance(stats, dict)
        assert stats["count"] == 6

        # Check grade distribution
        assert stats["distribution"].get("1", 0) == 1
        assert stats["distribution"].get("2", 0) == 2
        assert stats["distribution"].get("3", 0) == 1

        # Check passing/failing counts
        assert stats["passing_count"] == 5  # Grades 1-4 pass
        assert stats["failing_count"] == 1  # Grade 5 fails

    def test_calculate_statistics_empty_results(self):
        """Test statistics with no results."""
        linker = ModuleLinker()

        stats = linker.calculate_grade_statistics([])

        assert stats == {}

    def test_correction_result_creation(self):
        """Test CorrectionResult dataclass."""
        result = CorrectionResult(
            doc_token="abc-123",
            score=87,
            max_score=100,
            grade="2+",
            feedback="Gut geloest",
            question_results=[{"aufgabe": 1, "punkte": 10}]
        )

        assert result.doc_token == "abc-123"
        assert result.score == 87
        assert result.grade == "2+"


# =============================================================================
# DB MODEL TESTS
# =============================================================================

class TestOnboardingModels:
    """Tests for Magic Onboarding database models."""

    def test_onboarding_status_enum_values(self):
        """Test OnboardingStatus enum has all required values."""
        assert OnboardingStatus.ANALYZING.value == "analyzing"
        assert OnboardingStatus.CONFIRMING.value == "confirming"
        assert OnboardingStatus.PROCESSING.value == "processing"
        assert OnboardingStatus.LINKING.value == "linking"
        assert OnboardingStatus.COMPLETE.value == "complete"

    def test_module_link_type_enum_values(self):
        """Test ModuleLinkType enum has all required values."""
        assert ModuleLinkType.NOTENBUCH.value == "notenbuch"
        assert ModuleLinkType.ELTERNABEND.value == "elternabend"
        assert ModuleLinkType.ZEUGNIS.value == "zeugnis"
        assert ModuleLinkType.CALENDAR.value == "calendar"
        assert ModuleLinkType.KLASSENBUCH.value == "klassenbuch"

    def test_onboarding_session_repr(self):
        """Test OnboardingSession string representation."""
        session = OnboardingSession(
            id="12345678-1234-1234-1234-123456789abc",
            teacher_id="teacher-1",
            detected_class="3a",
            status=OnboardingStatus.ANALYZING
        )

        repr_str = repr(session)
        assert "12345678" in repr_str
        assert "3a" in repr_str
        assert "analyzing" in repr_str

    def test_detected_student_repr(self):
        """Test DetectedStudent string representation."""
        student = DetectedStudent(
            id="12345678-1234-1234-1234-123456789abc",
            detected_first_name="Max"
        )

        repr_str = repr(student)
        assert "Max" in repr_str

    def test_module_link_repr(self):
        """Test ModuleLink string representation."""
        link = ModuleLink(
            id="12345678-1234-1234-1234-123456789abc",
            klausur_session_id="session-1",
            link_type=ModuleLinkType.NOTENBUCH,
            target_module="school"
        )

        repr_str = repr(link)
        assert "notenbuch" in repr_str
        assert "school" in repr_str


# =============================================================================
# PRIVACY TESTS
# =============================================================================

class TestPrivacyInMagicOnboarding:
    """Tests ensuring privacy is maintained in Magic Onboarding."""

    def test_detected_student_no_full_last_name_in_detection(self):
        """Test that detection only captures hints, not full last names."""
        student = DetectedStudent(
            id="12345678-1234-1234-1234-123456789abc",
            detected_first_name="Max",
            detected_last_name_hint="M."  # Only initial/hint, not full name
        )

        # The detection phase should only have hints
        assert student.detected_last_name_hint == "M."
        # Full name is only set after teacher confirmation
        assert student.confirmed_last_name is None

    def test_module_link_uses_doc_tokens_not_names(self):
        """Test that module links use pseudonymized tokens."""
        linker = ModuleLinker()

        # Results should only contain doc_tokens, not student names
        results = [
            CorrectionResult(
                doc_token="uuid-token-1", score=45, max_score=100,
                grade="4", feedback=""
            ),
        ]

        suggestions = linker.suggest_elternabend(
            results, subject="Deutsch", threshold_grade="4"
        )

        # Suggestions reference doc_tokens, not names
        for suggestion in suggestions:
            assert hasattr(suggestion, 'doc_token')
            # Verify doc_token is the pseudonymized one
            assert suggestion.doc_token == "uuid-token-1"


# =============================================================================
# INTEGRATION FLOW TESTS
# =============================================================================

class TestMagicOnboardingFlow:
    """Tests for the complete Magic Onboarding flow."""

    def test_onboarding_status_progression(self):
        """Test that status progresses correctly through the flow."""
        statuses = list(OnboardingStatus)

        # Verify correct order
        assert statuses[0] == OnboardingStatus.ANALYZING
        assert statuses[1] == OnboardingStatus.CONFIRMING
        assert statuses[2] == OnboardingStatus.PROCESSING
        assert statuses[3] == OnboardingStatus.LINKING
        assert statuses[4] == OnboardingStatus.COMPLETE

    def test_grade_conversion_german_scale(self):
        """Test that German grading scale (1-6) is used correctly."""
        linker = ModuleLinker()

        # Test the internal grade checking
        # Grades 1-4 are passing, 5-6 are failing
        results = [
            CorrectionResult(doc_token="t1", score=95, max_score=100, grade="1", feedback=""),
            CorrectionResult(doc_token="t2", score=80, max_score=100, grade="2", feedback=""),
            CorrectionResult(doc_token="t3", score=65, max_score=100, grade="3", feedback=""),
            CorrectionResult(doc_token="t4", score=50, max_score=100, grade="4", feedback=""),
            CorrectionResult(doc_token="t5", score=30, max_score=100, grade="5", feedback=""),
            CorrectionResult(doc_token="t6", score=15, max_score=100, grade="6", feedback=""),
        ]

        stats = linker.calculate_grade_statistics(results)

        # 4 passing (grades 1-4), 2 failing (grades 5, 6)
        assert stats["passing_count"] == 4
        assert stats["failing_count"] == 2

    def test_meeting_urgency_levels(self):
        """Test meeting urgency assignment based on grades."""
        linker = ModuleLinker()

        results = [
            CorrectionResult(doc_token="t1", score=55, max_score=100, grade="4", feedback=""),
            CorrectionResult(doc_token="t2", score=30, max_score=100, grade="5", feedback=""),
            CorrectionResult(doc_token="t3", score=15, max_score=100, grade="6", feedback=""),
        ]

        suggestions = linker.suggest_elternabend(
            results, subject="Mathe", threshold_grade="4"
        )

        # Verify urgency levels exist and are meaningful
        urgencies = [s.urgency for s in suggestions]
        assert len(urgencies) == 3

        # Grade 6 should be high urgency
        grade_6_suggestion = next(s for s in suggestions if s.grade == "6")
        assert grade_6_suggestion.urgency == MeetingUrgency.HIGH