breakpilot-lehrer/backend-lehrer/classroom_engine/analytics.py

"""
Analytics-Modul fuer Classroom Engine (Phase 5).

Bietet Statistiken und Auswertungen fuer Unterrichtsstunden:
- Phasen-Dauer Statistiken
- Overtime-Analyse
- Lehrer-Dashboard Daten
- Post-Lesson Reflection

WICHTIG: Keine wertenden Metriken (z.B. "Sie haben 70% geredet").
Fokus auf neutrale, hilfreiche Statistiken.
"""

from datetime import datetime
from typing import Optional, List, Dict, Any

from .analytics_models import (
    PhaseStatistics,
    SessionSummary,
    TeacherAnalytics,
    LessonReflection,
)

# Re-export models for backward compatibility
__all__ = [
    "PhaseStatistics",
    "SessionSummary",
    "TeacherAnalytics",
    "LessonReflection",
    "AnalyticsCalculator",
]


class AnalyticsCalculator:
    """Berechnet Analytics aus Session-Daten."""

    PHASE_DISPLAY_NAMES = {
        "einstieg": "Einstieg",
        "erarbeitung": "Erarbeitung",
        "sicherung": "Sicherung",
        "transfer": "Transfer",
        "reflexion": "Reflexion",
    }

    @classmethod
    def calculate_session_summary(
        cls,
        session_data: Dict[str, Any],
        phase_history: List[Dict[str, Any]]
    ) -> SessionSummary:
        """Berechnet die Zusammenfassung einer Session."""
        session_id = session_data.get("session_id", "")
        teacher_id = session_data.get("teacher_id", "")
        class_id = session_data.get("class_id", "")
        subject = session_data.get("subject", "")
        topic = session_data.get("topic")

        lesson_started = session_data.get("lesson_started_at")
        lesson_ended = session_data.get("lesson_ended_at")

        if isinstance(lesson_started, str):
            lesson_started = datetime.fromisoformat(lesson_started.replace("Z", "+00:00"))
        if isinstance(lesson_ended, str):
            lesson_ended = datetime.fromisoformat(lesson_ended.replace("Z", "+00:00"))

        total_duration = 0
        if lesson_started and lesson_ended:
            total_duration = int((lesson_ended - lesson_started).total_seconds())

        phase_durations = session_data.get("phase_durations", {})
        planned_duration = sum(phase_durations.values()) * 60

        phase_stats = []
        total_overtime = 0
        phases_with_overtime = 0
        total_pause_count = 0
        total_pause_seconds = 0
        phases_completed = 0

        for entry in phase_history:
            phase = entry.get("phase", "")
            if phase in ["not_started", "ended"]:
                continue

            planned_seconds = phase_durations.get(phase, 0) * 60
            actual_seconds = entry.get("duration_seconds", 0) or 0
            difference = actual_seconds - planned_seconds

            had_overtime = difference > 0
            overtime_seconds = max(0, difference)

            if had_overtime:
                total_overtime += overtime_seconds
                phases_with_overtime += 1

            pause_count = entry.get("pause_count", 0) or 0
            pause_seconds = entry.get("total_pause_seconds", 0) or 0
            total_pause_count += pause_count
            total_pause_seconds += pause_seconds

            if entry.get("ended_at"):
                phases_completed += 1

            phase_stats.append(PhaseStatistics(
                phase=phase,
                display_name=cls.PHASE_DISPLAY_NAMES.get(phase, phase),
                planned_duration_seconds=planned_seconds,
                actual_duration_seconds=actual_seconds,
                difference_seconds=difference,
                had_overtime=had_overtime,
                overtime_seconds=overtime_seconds,
                was_extended=entry.get("was_extended", False),
                extension_minutes=entry.get("extension_minutes", 0) or 0,
                pause_count=pause_count,
                total_pause_seconds=pause_seconds,
            ))

        return SessionSummary(
            session_id=session_id, teacher_id=teacher_id,
            class_id=class_id, subject=subject, topic=topic,
            date=lesson_started or datetime.now(),
            total_duration_seconds=total_duration,
            planned_duration_seconds=planned_duration,
            phases_completed=phases_completed, total_phases=5,
            phase_statistics=phase_stats,
            total_overtime_seconds=total_overtime,
            phases_with_overtime=phases_with_overtime,
            total_pause_count=total_pause_count,
            total_pause_seconds=total_pause_seconds,
        )

    @classmethod
    def calculate_teacher_analytics(
        cls,
        sessions: List[Dict[str, Any]],
        period_start: datetime,
        period_end: datetime
    ) -> TeacherAnalytics:
        """Berechnet aggregierte Statistiken fuer einen Lehrer."""
        if not sessions:
            return TeacherAnalytics(teacher_id="", period_start=period_start, period_end=period_end)

        teacher_id = sessions[0].get("teacher_id", "")

        total_sessions = len(sessions)
        completed_sessions = sum(1 for s in sessions if s.get("lesson_ended_at"))

        total_minutes = 0
        for session in sessions:
            started = session.get("lesson_started_at")
            ended = session.get("lesson_ended_at")
            if started and ended:
                if isinstance(started, str):
                    started = datetime.fromisoformat(started.replace("Z", "+00:00"))
                if isinstance(ended, str):
                    ended = datetime.fromisoformat(ended.replace("Z", "+00:00"))
                total_minutes += (ended - started).total_seconds() / 60

        phase_durations_sum: Dict[str, List[int]] = {
            "einstieg": [], "erarbeitung": [], "sicherung": [],
            "transfer": [], "reflexion": [],
        }

        overtime_count = 0
        overtime_seconds_total = 0
        phase_overtime: Dict[str, int] = {}
        pause_counts = []
        pause_durations = []
        subjects: Dict[str, int] = {}
        classes: Dict[str, int] = {}

        for session in sessions:
            subject = session.get("subject", "")
            class_id = session.get("class_id", "")
            subjects[subject] = subjects.get(subject, 0) + 1
            classes[class_id] = classes.get(class_id, 0) + 1

            history = session.get("phase_history", [])
            session_has_overtime = False
            session_pause_count = 0
            session_pause_duration = 0
            phase_durations_dict = session.get("phase_durations", {})

            for entry in history:
                phase = entry.get("phase", "")
                if phase in phase_durations_sum:
                    duration = entry.get("duration_seconds", 0) or 0
                    phase_durations_sum[phase].append(duration)

                    planned = phase_durations_dict.get(phase, 0) * 60
                    if duration > planned:
                        overtime = duration - planned
                        overtime_seconds_total += overtime
                        session_has_overtime = True
                        phase_overtime[phase] = phase_overtime.get(phase, 0) + overtime

                session_pause_count += entry.get("pause_count", 0) or 0
                session_pause_duration += entry.get("total_pause_seconds", 0) or 0

            if session_has_overtime:
                overtime_count += 1
            pause_counts.append(session_pause_count)
            pause_durations.append(session_pause_duration)

        avg_durations = {}
        for phase, durations in phase_durations_sum.items():
            avg_durations[phase] = round(sum(durations) / len(durations)) if durations else 0

        most_overtime_phase = None
        if phase_overtime:
            most_overtime_phase = max(phase_overtime, key=phase_overtime.get)

        return TeacherAnalytics(
            teacher_id=teacher_id, period_start=period_start, period_end=period_end,
            total_sessions=total_sessions, completed_sessions=completed_sessions,
            total_teaching_minutes=int(total_minutes),
            avg_phase_durations=avg_durations,
            sessions_with_overtime=overtime_count,
            avg_overtime_seconds=overtime_seconds_total / max(total_sessions, 1),
            most_overtime_phase=most_overtime_phase,
            avg_pause_count=sum(pause_counts) / max(len(pause_counts), 1),
            avg_pause_duration_seconds=sum(pause_durations) / max(len(pause_durations), 1),
            subjects_taught=subjects, classes_taught=classes,
        )