breakpilot-pwa/backend/correction_api.py

"""
Correction API - REST API für Klassenarbeits-Korrektur.

Workflow:
1. Upload: Gescannte Klassenarbeit hochladen
2. OCR: Text aus Handschrift extrahieren
3. Analyse: Antworten analysieren und bewerten
4. Feedback: KI-generiertes Feedback erstellen
5. Export: Korrigierte Arbeit als PDF exportieren

Integriert:
- FileProcessor für OCR
- PDFService für Export
- LLM für Analyse und Feedback
"""

import logging
import uuid
import os
from datetime import datetime
from typing import List, Dict, Any, Optional
from enum import Enum
from pathlib import Path

from fastapi import APIRouter, HTTPException, UploadFile, File, Form, BackgroundTasks
from pydantic import BaseModel, Field

# FileProcessor requires OpenCV with libGL - make optional for CI
try:
    from services.file_processor import FileProcessor, ProcessingResult
    _ocr_available = True
except (ImportError, OSError):
    FileProcessor = None  # type: ignore
    ProcessingResult = None  # type: ignore
    _ocr_available = False

# PDF service requires WeasyPrint with system libraries - make optional for CI
try:
    from services.pdf_service import PDFService, CorrectionData, StudentInfo
    _pdf_available = True
except (ImportError, OSError):
    PDFService = None  # type: ignore
    CorrectionData = None  # type: ignore
    StudentInfo = None  # type: ignore
    _pdf_available = False

logger = logging.getLogger(__name__)

router = APIRouter(
    prefix="/corrections",
    tags=["corrections"],
)

# Upload directory
UPLOAD_DIR = Path("/tmp/corrections")
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)


# ============================================================================
# Enums and Models
# ============================================================================

class CorrectionStatus(str, Enum):
    """Status einer Korrektur."""
    UPLOADED = "uploaded"           # Datei hochgeladen
    PROCESSING = "processing"       # OCR läuft
    OCR_COMPLETE = "ocr_complete"   # OCR abgeschlossen
    ANALYZING = "analyzing"         # Analyse läuft
    ANALYZED = "analyzed"           # Analyse abgeschlossen
    REVIEWING = "reviewing"         # Lehrkraft prüft
    COMPLETED = "completed"         # Korrektur abgeschlossen
    ERROR = "error"                 # Fehler aufgetreten


class AnswerEvaluation(BaseModel):
    """Bewertung einer einzelnen Antwort."""
    question_number: int
    extracted_text: str
    points_possible: float
    points_awarded: float
    feedback: str
    is_correct: bool
    confidence: float  # 0-1, wie sicher die OCR/Analyse ist


class CorrectionCreate(BaseModel):
    """Request zum Erstellen einer neuen Korrektur."""
    student_id: str
    student_name: str
    class_name: str
    exam_title: str
    subject: str
    max_points: float = Field(default=100.0, ge=0)
    expected_answers: Optional[Dict[str, str]] = None  # Musterlösung


class CorrectionUpdate(BaseModel):
    """Request zum Aktualisieren einer Korrektur."""
    evaluations: Optional[List[AnswerEvaluation]] = None
    total_points: Optional[float] = None
    grade: Optional[str] = None
    teacher_notes: Optional[str] = None
    status: Optional[CorrectionStatus] = None


class Correction(BaseModel):
    """Eine Korrektur."""
    id: str
    student_id: str
    student_name: str
    class_name: str
    exam_title: str
    subject: str
    max_points: float
    total_points: float = 0.0
    percentage: float = 0.0
    grade: Optional[str] = None
    status: CorrectionStatus
    file_path: Optional[str] = None
    extracted_text: Optional[str] = None
    evaluations: List[AnswerEvaluation] = []
    teacher_notes: Optional[str] = None
    ai_feedback: Optional[str] = None
    created_at: datetime
    updated_at: datetime


class CorrectionResponse(BaseModel):
    """Response für eine Korrektur."""
    success: bool
    correction: Optional[Correction] = None
    error: Optional[str] = None


class OCRResponse(BaseModel):
    """Response für OCR-Ergebnis."""
    success: bool
    extracted_text: Optional[str] = None
    regions: List[Dict[str, Any]] = []
    confidence: float = 0.0
    error: Optional[str] = None


class AnalysisResponse(BaseModel):
    """Response für Analyse-Ergebnis."""
    success: bool
    evaluations: List[AnswerEvaluation] = []
    total_points: float = 0.0
    percentage: float = 0.0
    suggested_grade: Optional[str] = None
    ai_feedback: Optional[str] = None
    error: Optional[str] = None


# ============================================================================
# In-Memory Storage (später durch DB ersetzen)
# ============================================================================

_corrections: Dict[str, Correction] = {}


# ============================================================================
# Helper Functions
# ============================================================================

def _calculate_grade(percentage: float) -> str:
    """Berechnet Note aus Prozent (deutsches System)."""
    if percentage >= 92:
        return "1"
    elif percentage >= 81:
        return "2"
    elif percentage >= 67:
        return "3"
    elif percentage >= 50:
        return "4"
    elif percentage >= 30:
        return "5"
    else:
        return "6"


def _generate_ai_feedback(
    evaluations: List[AnswerEvaluation],
    total_points: float,
    max_points: float,
    subject: str
) -> str:
    """Generiert KI-Feedback basierend auf Bewertung."""
    # Ohne LLM: Einfaches Template-basiertes Feedback
    percentage = (total_points / max_points * 100) if max_points > 0 else 0
    correct_count = sum(1 for e in evaluations if e.is_correct)
    total_count = len(evaluations)

    if percentage >= 90:
        intro = "Hervorragende Leistung!"
    elif percentage >= 75:
        intro = "Gute Arbeit!"
    elif percentage >= 60:
        intro = "Insgesamt eine solide Leistung."
    elif percentage >= 50:
        intro = "Die Arbeit zeigt Grundkenntnisse, aber es gibt Verbesserungsbedarf."
    else:
        intro = "Es sind deutliche Wissenslücken erkennbar."

    # Finde Verbesserungsbereiche
    weak_areas = [e for e in evaluations if not e.is_correct]
    strengths = [e for e in evaluations if e.is_correct and e.confidence > 0.8]

    feedback_parts = [intro]

    if strengths:
        feedback_parts.append(
            f"Besonders gut gelöst: Aufgabe(n) {', '.join(str(s.question_number) for s in strengths[:3])}."
        )

    if weak_areas:
        feedback_parts.append(
            f"Übungsbedarf bei: Aufgabe(n) {', '.join(str(w.question_number) for w in weak_areas[:3])}."
        )

    feedback_parts.append(
        f"Ergebnis: {correct_count} von {total_count} Aufgaben korrekt ({percentage:.1f}%)."
    )

    return " ".join(feedback_parts)


async def _process_ocr(correction_id: str, file_path: str):
    """Background Task für OCR-Verarbeitung."""
    correction = _corrections.get(correction_id)
    if not correction:
        return

    try:
        correction.status = CorrectionStatus.PROCESSING
        _corrections[correction_id] = correction

        # OCR durchführen
        processor = FileProcessor()
        result = processor.process_file(file_path)

        if result.success and result.text:
            correction.extracted_text = result.text
            correction.status = CorrectionStatus.OCR_COMPLETE
        else:
            correction.status = CorrectionStatus.ERROR

        correction.updated_at = datetime.utcnow()
        _corrections[correction_id] = correction

    except Exception as e:
        logger.error(f"OCR error for {correction_id}: {e}")
        correction.status = CorrectionStatus.ERROR
        correction.updated_at = datetime.utcnow()
        _corrections[correction_id] = correction


# ============================================================================
# API Endpoints
# ============================================================================

@router.post("/", response_model=CorrectionResponse)
async def create_correction(data: CorrectionCreate):
    """
    Erstellt eine neue Korrektur.

    Noch ohne Datei - diese wird separat hochgeladen.
    """
    correction_id = str(uuid.uuid4())
    now = datetime.utcnow()

    correction = Correction(
        id=correction_id,
        student_id=data.student_id,
        student_name=data.student_name,
        class_name=data.class_name,
        exam_title=data.exam_title,
        subject=data.subject,
        max_points=data.max_points,
        status=CorrectionStatus.UPLOADED,
        created_at=now,
        updated_at=now
    )

    _corrections[correction_id] = correction
    logger.info(f"Created correction {correction_id} for {data.student_name}")

    return CorrectionResponse(success=True, correction=correction)


@router.post("/{correction_id}/upload", response_model=CorrectionResponse)
async def upload_exam(
    correction_id: str,
    background_tasks: BackgroundTasks,
    file: UploadFile = File(...)
):
    """
    Lädt gescannte Klassenarbeit hoch und startet OCR.

    Unterstützte Formate: PDF, PNG, JPG, JPEG
    """
    correction = _corrections.get(correction_id)
    if not correction:
        raise HTTPException(status_code=404, detail="Korrektur nicht gefunden")

    # Validiere Dateiformat
    allowed_extensions = {".pdf", ".png", ".jpg", ".jpeg"}
    file_ext = Path(file.filename).suffix.lower() if file.filename else ""

    if file_ext not in allowed_extensions:
        raise HTTPException(
            status_code=400,
            detail=f"Ungültiges Dateiformat. Erlaubt: {', '.join(allowed_extensions)}"
        )

    # Speichere Datei
    file_path = UPLOAD_DIR / f"{correction_id}{file_ext}"

    try:
        content = await file.read()
        with open(file_path, "wb") as f:
            f.write(content)

        correction.file_path = str(file_path)
        correction.updated_at = datetime.utcnow()
        _corrections[correction_id] = correction

        # Starte OCR im Hintergrund
        background_tasks.add_task(_process_ocr, correction_id, str(file_path))

        logger.info(f"Uploaded file for correction {correction_id}: {file.filename}")

        return CorrectionResponse(success=True, correction=correction)

    except Exception as e:
        logger.error(f"Upload error: {e}")
        return CorrectionResponse(success=False, error=str(e))


@router.get("/{correction_id}", response_model=CorrectionResponse)
async def get_correction(correction_id: str):
    """Ruft eine Korrektur ab."""
    correction = _corrections.get(correction_id)
    if not correction:
        raise HTTPException(status_code=404, detail="Korrektur nicht gefunden")

    return CorrectionResponse(success=True, correction=correction)


@router.get("/", response_model=Dict[str, Any])
async def list_corrections(
    class_name: Optional[str] = None,
    status: Optional[CorrectionStatus] = None,
    limit: int = 50
):
    """Listet Korrekturen auf, optional gefiltert."""
    corrections = list(_corrections.values())

    if class_name:
        corrections = [c for c in corrections if c.class_name == class_name]

    if status:
        corrections = [c for c in corrections if c.status == status]

    # Sortiere nach Erstellungsdatum (neueste zuerst)
    corrections.sort(key=lambda x: x.created_at, reverse=True)

    return {
        "total": len(corrections),
        "corrections": [c.dict() for c in corrections[:limit]]
    }


@router.post("/{correction_id}/analyze", response_model=AnalysisResponse)
async def analyze_correction(
    correction_id: str,
    expected_answers: Optional[Dict[str, str]] = None
):
    """
    Analysiert die extrahierten Antworten.

    Optional mit Musterlösung für automatische Bewertung.
    """
    correction = _corrections.get(correction_id)
    if not correction:
        raise HTTPException(status_code=404, detail="Korrektur nicht gefunden")

    if correction.status not in [CorrectionStatus.OCR_COMPLETE, CorrectionStatus.ANALYZED]:
        raise HTTPException(
            status_code=400,
            detail=f"Korrektur im falschen Status: {correction.status}"
        )

    if not correction.extracted_text:
        raise HTTPException(status_code=400, detail="Kein extrahierter Text vorhanden")

    try:
        correction.status = CorrectionStatus.ANALYZING
        _corrections[correction_id] = correction

        # Einfache Analyse ohne LLM
        # Teile Text in Abschnitte (simuliert Aufgabenerkennung)
        text_parts = correction.extracted_text.split('\n\n')
        evaluations = []

        for i, part in enumerate(text_parts[:10], start=1):  # Max 10 Aufgaben
            if len(part.strip()) < 5:
                continue

            # Simulierte Bewertung
            # In Produktion würde hier LLM-basierte Analyse stattfinden
            expected = expected_answers.get(str(i), "") if expected_answers else ""

            # Einfacher Textvergleich (in Produktion: semantischer Vergleich)
            is_correct = bool(expected and expected.lower() in part.lower())
            points = correction.max_points / len(text_parts) if text_parts else 0

            evaluation = AnswerEvaluation(
                question_number=i,
                extracted_text=part[:200],  # Kürzen für Response
                points_possible=points,
                points_awarded=points if is_correct else points * 0.5,  # Teilpunkte
                feedback=f"Antwort zu Aufgabe {i}" + (" korrekt." if is_correct else " mit Verbesserungsbedarf."),
                is_correct=is_correct,
                confidence=0.7  # Simulierte Confidence
            )
            evaluations.append(evaluation)

        # Berechne Gesamtergebnis
        total_points = sum(e.points_awarded for e in evaluations)
        percentage = (total_points / correction.max_points * 100) if correction.max_points > 0 else 0
        suggested_grade = _calculate_grade(percentage)

        # Generiere Feedback
        ai_feedback = _generate_ai_feedback(
            evaluations, total_points, correction.max_points, correction.subject
        )

        # Aktualisiere Korrektur
        correction.evaluations = evaluations
        correction.total_points = total_points
        correction.percentage = percentage
        correction.grade = suggested_grade
        correction.ai_feedback = ai_feedback
        correction.status = CorrectionStatus.ANALYZED
        correction.updated_at = datetime.utcnow()
        _corrections[correction_id] = correction

        logger.info(f"Analysis complete for {correction_id}: {total_points}/{correction.max_points}")

        return AnalysisResponse(
            success=True,
            evaluations=evaluations,
            total_points=total_points,
            percentage=percentage,
            suggested_grade=suggested_grade,
            ai_feedback=ai_feedback
        )

    except Exception as e:
        logger.error(f"Analysis error: {e}")
        correction.status = CorrectionStatus.ERROR
        _corrections[correction_id] = correction
        return AnalysisResponse(success=False, error=str(e))


@router.put("/{correction_id}", response_model=CorrectionResponse)
async def update_correction(correction_id: str, data: CorrectionUpdate):
    """
    Aktualisiert eine Korrektur.

    Ermöglicht manuelle Anpassungen durch die Lehrkraft.
    """
    correction = _corrections.get(correction_id)
    if not correction:
        raise HTTPException(status_code=404, detail="Korrektur nicht gefunden")

    if data.evaluations is not None:
        correction.evaluations = data.evaluations
        correction.total_points = sum(e.points_awarded for e in data.evaluations)
        correction.percentage = (
            correction.total_points / correction.max_points * 100
        ) if correction.max_points > 0 else 0

    if data.total_points is not None:
        correction.total_points = data.total_points
        correction.percentage = (
            data.total_points / correction.max_points * 100
        ) if correction.max_points > 0 else 0

    if data.grade is not None:
        correction.grade = data.grade

    if data.teacher_notes is not None:
        correction.teacher_notes = data.teacher_notes

    if data.status is not None:
        correction.status = data.status

    correction.updated_at = datetime.utcnow()
    _corrections[correction_id] = correction

    return CorrectionResponse(success=True, correction=correction)


@router.post("/{correction_id}/complete", response_model=CorrectionResponse)
async def complete_correction(correction_id: str):
    """Markiert Korrektur als abgeschlossen."""
    correction = _corrections.get(correction_id)
    if not correction:
        raise HTTPException(status_code=404, detail="Korrektur nicht gefunden")

    correction.status = CorrectionStatus.COMPLETED
    correction.updated_at = datetime.utcnow()
    _corrections[correction_id] = correction

    logger.info(f"Correction {correction_id} completed: {correction.grade}")

    return CorrectionResponse(success=True, correction=correction)


@router.get("/{correction_id}/export-pdf")
async def export_correction_pdf(correction_id: str):
    """
    Exportiert korrigierte Arbeit als PDF.

    Enthält:
    - Originalscan
    - Bewertungen
    - Feedback
    - Gesamtergebnis
    """
    correction = _corrections.get(correction_id)
    if not correction:
        raise HTTPException(status_code=404, detail="Korrektur nicht gefunden")

    try:
        pdf_service = PDFService()

        # Erstelle CorrectionData
        correction_data = CorrectionData(
            student=StudentInfo(
                student_id=correction.student_id,
                name=correction.student_name,
                class_name=correction.class_name
            ),
            exam_title=correction.exam_title,
            subject=correction.subject,
            date=correction.created_at.strftime("%d.%m.%Y"),
            max_points=correction.max_points,
            achieved_points=correction.total_points,
            grade=correction.grade or "",
            percentage=correction.percentage,
            corrections=[
                {
                    "question": f"Aufgabe {e.question_number}",
                    "answer": e.extracted_text,
                    "points": f"{e.points_awarded}/{e.points_possible}",
                    "feedback": e.feedback
                }
                for e in correction.evaluations
            ],
            teacher_notes=correction.teacher_notes or "",
            ai_feedback=correction.ai_feedback or ""
        )

        # Generiere PDF
        pdf_bytes = pdf_service.generate_correction_pdf(correction_data)

        from fastapi.responses import Response

        return Response(
            content=pdf_bytes,
            media_type="application/pdf",
            headers={
                "Content-Disposition": f'attachment; filename="korrektur_{correction.student_name}_{correction.exam_title}.pdf"'
            }
        )

    except Exception as e:
        logger.error(f"PDF export error: {e}")
        raise HTTPException(status_code=500, detail=f"PDF-Export fehlgeschlagen: {str(e)}")


@router.delete("/{correction_id}")
async def delete_correction(correction_id: str):
    """Löscht eine Korrektur."""
    if correction_id not in _corrections:
        raise HTTPException(status_code=404, detail="Korrektur nicht gefunden")

    correction = _corrections[correction_id]

    # Lösche auch die hochgeladene Datei
    if correction.file_path and os.path.exists(correction.file_path):
        try:
            os.remove(correction.file_path)
        except Exception as e:
            logger.warning(f"Could not delete file {correction.file_path}: {e}")

    del _corrections[correction_id]
    logger.info(f"Deleted correction {correction_id}")

    return {"status": "deleted", "id": correction_id}


@router.get("/class/{class_name}/summary")
async def get_class_summary(class_name: str):
    """
    Gibt Zusammenfassung für eine Klasse zurück.

    Enthält Statistiken über alle Korrekturen der Klasse.
    """
    class_corrections = [
        c for c in _corrections.values()
        if c.class_name == class_name and c.status == CorrectionStatus.COMPLETED
    ]

    if not class_corrections:
        return {
            "class_name": class_name,
            "total_students": 0,
            "average_percentage": 0,
            "grade_distribution": {},
            "corrections": []
        }

    # Berechne Statistiken
    percentages = [c.percentage for c in class_corrections]
    average_percentage = sum(percentages) / len(percentages) if percentages else 0

    # Notenverteilung
    grade_distribution = {}
    for c in class_corrections:
        grade = c.grade or "?"
        grade_distribution[grade] = grade_distribution.get(grade, 0) + 1

    return {
        "class_name": class_name,
        "total_students": len(class_corrections),
        "average_percentage": round(average_percentage, 1),
        "average_points": round(
            sum(c.total_points for c in class_corrections) / len(class_corrections), 1
        ),
        "grade_distribution": grade_distribution,
        "corrections": [
            {
                "id": c.id,
                "student_name": c.student_name,
                "total_points": c.total_points,
                "percentage": c.percentage,
                "grade": c.grade
            }
            for c in sorted(class_corrections, key=lambda x: x.student_name)
        ]
    }


@router.post("/{correction_id}/ocr/retry", response_model=CorrectionResponse)
async def retry_ocr(correction_id: str, background_tasks: BackgroundTasks):
    """
    Wiederholt OCR-Verarbeitung.

    Nützlich wenn erste Verarbeitung fehlgeschlagen ist.
    """
    correction = _corrections.get(correction_id)
    if not correction:
        raise HTTPException(status_code=404, detail="Korrektur nicht gefunden")

    if not correction.file_path:
        raise HTTPException(status_code=400, detail="Keine Datei vorhanden")

    if not os.path.exists(correction.file_path):
        raise HTTPException(status_code=400, detail="Datei nicht mehr vorhanden")

    # Starte OCR erneut
    correction.status = CorrectionStatus.UPLOADED
    correction.extracted_text = None
    correction.updated_at = datetime.utcnow()
    _corrections[correction_id] = correction

    background_tasks.add_task(_process_ocr, correction_id, correction.file_path)

    return CorrectionResponse(success=True, correction=correction)