[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions
--- a/backend-lehrer/unit_analytics_routes.py
+++ b/backend-lehrer/unit_analytics_routes.py
@@ -0,0 +1,394 @@
+"""
+Unit Analytics API - Routes.
+
+All API endpoints for learning gain, stop-level, misconception,
+student timeline, class comparison, export, and dashboard analytics.
+"""
+
+import logging
+import statistics
+from datetime import datetime
+from typing import Optional, Dict, Any, List
+
+from fastapi import APIRouter, Query
+
+from unit_analytics_models import (
+    TimeRange,
+    LearningGainData,
+    LearningGainSummary,
+    StopPerformance,
+    UnitPerformanceDetail,
+    MisconceptionEntry,
+    MisconceptionReport,
+    StudentProgressTimeline,
+    ClassComparisonData,
+)
+from unit_analytics_helpers import (
+    get_analytics_database,
+    calculate_gain_distribution,
+    calculate_trend,
+    calculate_difficulty_rating,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["Unit Analytics"])
+
+
+# ==============================================
+# API Endpoints - Learning Gain
+# ==============================================
+
+# NOTE: Static routes must come BEFORE dynamic routes like /{unit_id}
+@router.get("/learning-gain/compare")
+async def compare_learning_gains(
+    unit_ids: str = Query(..., description="Comma-separated unit IDs"),
+    class_id: Optional[str] = Query(None),
+    time_range: TimeRange = Query(TimeRange.MONTH),
+) -> Dict[str, Any]:
+    """
+    Compare learning gains across multiple units.
+    """
+    unit_list = [u.strip() for u in unit_ids.split(",")]
+    comparisons = []
+
+    for unit_id in unit_list:
+        try:
+            summary = await get_learning_gain_analysis(unit_id, class_id, time_range)
+            comparisons.append({
+                "unit_id": unit_id,
+                "avg_gain": summary.avg_gain,
+                "median_gain": summary.median_gain,
+                "total_students": summary.total_students,
+                "positive_rate": summary.positive_gain_count / max(summary.total_students, 1),
+            })
+        except Exception as e:
+            logger.error(f"Failed to get comparison for {unit_id}: {e}")
+
+    return {
+        "time_range": time_range.value,
+        "class_id": class_id,
+        "comparisons": sorted(comparisons, key=lambda x: x["avg_gain"], reverse=True),
+    }
+
+
+@router.get("/learning-gain/{unit_id}", response_model=LearningGainSummary)
+async def get_learning_gain_analysis(
+    unit_id: str,
+    class_id: Optional[str] = Query(None, description="Filter by class"),
+    time_range: TimeRange = Query(TimeRange.MONTH, description="Time range for analysis"),
+) -> LearningGainSummary:
+    """
+    Get detailed pre/post learning gain analysis for a unit.
+    """
+    db = await get_analytics_database()
+    individual_gains = []
+
+    if db:
+        try:
+            sessions = await db.get_unit_sessions_with_scores(
+                unit_id=unit_id,
+                class_id=class_id,
+                time_range=time_range.value
+            )
+
+            for session in sessions:
+                if session.get("precheck_score") is not None and session.get("postcheck_score") is not None:
+                    gain = session["postcheck_score"] - session["precheck_score"]
+                    individual_gains.append(LearningGainData(
+                        student_id=session["student_id"],
+                        student_name=session.get("student_name", session["student_id"][:8]),
+                        unit_id=unit_id,
+                        precheck_score=session["precheck_score"],
+                        postcheck_score=session["postcheck_score"],
+                        learning_gain=gain,
+                    ))
+        except Exception as e:
+            logger.error(f"Failed to get learning gain data: {e}")
+
+    # Calculate statistics
+    if not individual_gains:
+        return LearningGainSummary(
+            unit_id=unit_id,
+            unit_title=f"Unit {unit_id}",
+            total_students=0,
+            avg_precheck=0.0, avg_postcheck=0.0,
+            avg_gain=0.0, median_gain=0.0, std_deviation=0.0,
+            positive_gain_count=0, negative_gain_count=0, no_change_count=0,
+            gain_distribution={}, individual_gains=[],
+        )
+
+    gains = [g.learning_gain for g in individual_gains]
+    prechecks = [g.precheck_score for g in individual_gains]
+    postchecks = [g.postcheck_score for g in individual_gains]
+
+    avg_gain = statistics.mean(gains)
+    median_gain = statistics.median(gains)
+    std_dev = statistics.stdev(gains) if len(gains) > 1 else 0.0
+
+    # Calculate percentiles
+    sorted_gains = sorted(gains)
+    for data in individual_gains:
+        rank = sorted_gains.index(data.learning_gain) + 1
+        data.percentile = rank / len(sorted_gains) * 100
+
+    return LearningGainSummary(
+        unit_id=unit_id,
+        unit_title=f"Unit {unit_id}",
+        total_students=len(individual_gains),
+        avg_precheck=statistics.mean(prechecks),
+        avg_postcheck=statistics.mean(postchecks),
+        avg_gain=avg_gain,
+        median_gain=median_gain,
+        std_deviation=std_dev,
+        positive_gain_count=sum(1 for g in gains if g > 0.01),
+        negative_gain_count=sum(1 for g in gains if g < -0.01),
+        no_change_count=sum(1 for g in gains if -0.01 <= g <= 0.01),
+        gain_distribution=calculate_gain_distribution(gains),
+        individual_gains=sorted(individual_gains, key=lambda x: x.learning_gain, reverse=True),
+    )
+
+
+# ==============================================
+# API Endpoints - Stop-Level Analytics
+# ==============================================
+
+@router.get("/unit/{unit_id}/stops", response_model=UnitPerformanceDetail)
+async def get_unit_stop_analytics(
+    unit_id: str,
+    class_id: Optional[str] = Query(None),
+    time_range: TimeRange = Query(TimeRange.MONTH),
+) -> UnitPerformanceDetail:
+    """
+    Get detailed stop-level performance analytics.
+    """
+    db = await get_analytics_database()
+    stops_data = []
+
+    if db:
+        try:
+            stop_stats = await db.get_stop_performance(
+                unit_id=unit_id, class_id=class_id, time_range=time_range.value
+            )
+
+            for stop in stop_stats:
+                difficulty = calculate_difficulty_rating(
+                    stop.get("success_rate", 0.5),
+                    stop.get("avg_attempts", 1.0)
+                )
+                stops_data.append(StopPerformance(
+                    stop_id=stop["stop_id"],
+                    stop_label=stop.get("stop_label", stop["stop_id"]),
+                    attempts_total=stop.get("total_attempts", 0),
+                    success_rate=stop.get("success_rate", 0.0),
+                    avg_time_seconds=stop.get("avg_time_seconds", 0.0),
+                    avg_attempts_before_success=stop.get("avg_attempts", 1.0),
+                    common_errors=stop.get("common_errors", []),
+                    difficulty_rating=difficulty,
+                ))
+
+            unit_stats = await db.get_unit_overall_stats(unit_id, class_id, time_range.value)
+        except Exception as e:
+            logger.error(f"Failed to get stop analytics: {e}")
+            unit_stats = {}
+    else:
+        unit_stats = {}
+
+    # Identify bottleneck stops
+    bottlenecks = [
+        s.stop_id for s in stops_data
+        if s.difficulty_rating > 3.5 or s.success_rate < 0.6
+    ]
+
+    return UnitPerformanceDetail(
+        unit_id=unit_id,
+        unit_title=f"Unit {unit_id}",
+        template=unit_stats.get("template", "unknown"),
+        total_sessions=unit_stats.get("total_sessions", 0),
+        completed_sessions=unit_stats.get("completed_sessions", 0),
+        completion_rate=unit_stats.get("completion_rate", 0.0),
+        avg_duration_minutes=unit_stats.get("avg_duration_minutes", 0.0),
+        stops=stops_data,
+        bottleneck_stops=bottlenecks,
+    )
+
+
+# ==============================================
+# API Endpoints - Misconception Tracking
+# ==============================================
+
+@router.get("/misconceptions", response_model=MisconceptionReport)
+async def get_misconception_report(
+    class_id: Optional[str] = Query(None),
+    unit_id: Optional[str] = Query(None),
+    time_range: TimeRange = Query(TimeRange.MONTH),
+    limit: int = Query(20, ge=1, le=100),
+) -> MisconceptionReport:
+    """
+    Get comprehensive misconception report.
+    """
+    db = await get_analytics_database()
+    misconceptions = []
+
+    if db:
+        try:
+            raw_misconceptions = await db.get_misconceptions(
+                class_id=class_id, unit_id=unit_id,
+                time_range=time_range.value, limit=limit
+            )
+
+            for m in raw_misconceptions:
+                misconceptions.append(MisconceptionEntry(
+                    concept_id=m["concept_id"],
+                    concept_label=m["concept_label"],
+                    misconception_text=m["misconception_text"],
+                    frequency=m["frequency"],
+                    affected_student_ids=m.get("student_ids", []),
+                    unit_id=m["unit_id"],
+                    stop_id=m["stop_id"],
+                    detected_via=m.get("detected_via", "unknown"),
+                    first_detected=m.get("first_detected", datetime.utcnow()),
+                    last_detected=m.get("last_detected", datetime.utcnow()),
+                ))
+        except Exception as e:
+            logger.error(f"Failed to get misconceptions: {e}")
+
+    # Group by unit
+    by_unit = {}
+    for m in misconceptions:
+        if m.unit_id not in by_unit:
+            by_unit[m.unit_id] = []
+        by_unit[m.unit_id].append(m)
+
+    trending_up = misconceptions[:3] if misconceptions else []
+    resolved = []
+
+    return MisconceptionReport(
+        class_id=class_id,
+        time_range=time_range.value,
+        total_misconceptions=sum(m.frequency for m in misconceptions),
+        unique_concepts=len(set(m.concept_id for m in misconceptions)),
+        most_common=sorted(misconceptions, key=lambda x: x.frequency, reverse=True)[:10],
+        by_unit=by_unit,
+        trending_up=trending_up,
+        resolved=resolved,
+    )
+
+
+@router.get("/misconceptions/student/{student_id}")
+async def get_student_misconceptions(
+    student_id: str,
+    time_range: TimeRange = Query(TimeRange.ALL),
+) -> Dict[str, Any]:
+    """
+    Get misconceptions for a specific student.
+    """
+    db = await get_analytics_database()
+
+    if db:
+        try:
+            misconceptions = await db.get_student_misconceptions(
+                student_id=student_id, time_range=time_range.value
+            )
+            return {
+                "student_id": student_id,
+                "misconceptions": misconceptions,
+                "recommended_remediation": [
+                    {"concept": m["concept_label"], "activity": f"Review {m['unit_id']}/{m['stop_id']}"}
+                    for m in misconceptions[:5]
+                ]
+            }
+        except Exception as e:
+            logger.error(f"Failed to get student misconceptions: {e}")
+
+    return {
+        "student_id": student_id,
+        "misconceptions": [],
+        "recommended_remediation": [],
+    }
+
+
+# ==============================================
+# API Endpoints - Student Progress Timeline
+# ==============================================
+
+@router.get("/student/{student_id}/timeline", response_model=StudentProgressTimeline)
+async def get_student_timeline(
+    student_id: str,
+    time_range: TimeRange = Query(TimeRange.ALL),
+) -> StudentProgressTimeline:
+    """
+    Get detailed progress timeline for a student.
+    """
+    db = await get_analytics_database()
+    timeline = []
+    scores = []
+
+    if db:
+        try:
+            sessions = await db.get_student_sessions(
+                student_id=student_id, time_range=time_range.value
+            )
+
+            for session in sessions:
+                timeline.append({
+                    "date": session.get("started_at"),
+                    "unit_id": session.get("unit_id"),
+                    "completed": session.get("completed_at") is not None,
+                    "precheck": session.get("precheck_score"),
+                    "postcheck": session.get("postcheck_score"),
+                    "duration_minutes": session.get("duration_seconds", 0) // 60,
+                })
+                if session.get("postcheck_score") is not None:
+                    scores.append(session["postcheck_score"])
+        except Exception as e:
+            logger.error(f"Failed to get student timeline: {e}")
+
+    trend = calculate_trend(scores) if scores else "insufficient_data"
+
+    return StudentProgressTimeline(
+        student_id=student_id,
+        student_name=f"Student {student_id[:8]}",
+        units_completed=sum(1 for t in timeline if t["completed"]),
+        total_time_minutes=sum(t["duration_minutes"] for t in timeline),
+        avg_score=statistics.mean(scores) if scores else 0.0,
+        trend=trend,
+        timeline=timeline,
+    )
+
+
+# ==============================================
+# API Endpoints - Class Comparison
+# ==============================================
+
+@router.get("/compare/classes", response_model=List[ClassComparisonData])
+async def compare_classes(
+    class_ids: str = Query(..., description="Comma-separated class IDs"),
+    time_range: TimeRange = Query(TimeRange.MONTH),
+) -> List[ClassComparisonData]:
+    """
+    Compare performance across multiple classes.
+    """
+    class_list = [c.strip() for c in class_ids.split(",")]
+    comparisons = []
+
+    db = await get_analytics_database()
+    if db:
+        for class_id in class_list:
+            try:
+                stats = await db.get_class_aggregate_stats(class_id, time_range.value)
+                comparisons.append(ClassComparisonData(
+                    class_id=class_id,
+                    class_name=stats.get("class_name", f"Klasse {class_id[:8]}"),
+                    student_count=stats.get("student_count", 0),
+                    units_assigned=stats.get("units_assigned", 0),
+                    avg_completion_rate=stats.get("avg_completion_rate", 0.0),
+                    avg_learning_gain=stats.get("avg_learning_gain", 0.0),
+                    avg_time_per_unit=stats.get("avg_time_per_unit", 0.0),
+                ))
+            except Exception as e:
+                logger.error(f"Failed to get stats for class {class_id}: {e}")
+
+    return sorted(comparisons, key=lambda x: x.avg_learning_gain, reverse=True)
+
+