breakpilot-pwa/backend/klausur/routes.py

"""
Klausurkorrektur API Routes.

Privacy-by-Design exam correction with QR-code based pseudonymization.
All endpoints are teacher-scoped - no cross-teacher data access possible.

DSGVO Compliance:
- No student names stored in backend
- Only doc_tokens (pseudonymized IDs) used
- Identity mapping encrypted client-side
- All data auto-deleted after retention period
"""

import uuid
import logging
import re
import json
from datetime import datetime, timedelta
from typing import Optional, List
from io import BytesIO

from fastapi import APIRouter, HTTPException, Query, Depends, UploadFile, File, Response, BackgroundTasks
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from pydantic import BaseModel, Field

from .database import get_db
from .db_models import (
    ExamSession, PseudonymizedDocument, QRBatchJob,
    SessionStatus, DocumentStatus
)
from .repository import KlausurRepository
from .services.pseudonymizer import get_pseudonymizer
from .services.correction_service import get_correction_service, QuestionRubric
from .services.storage_service import get_storage_service
from .services.processing_service import get_processing_service

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/klausur", tags=["Klausurkorrektur"])


# =============================================================================
# Pydantic Schemas
# =============================================================================

class SessionCreate(BaseModel):
    """Request to create a new exam session."""
    name: str = Field(..., min_length=1, max_length=200, description="Session name (e.g., 'Mathe 10a - Klausur 1')")
    subject: str = Field(default="", max_length=100)
    class_name: str = Field(default="", max_length=100, description="Class name (e.g., '10a')")
    total_points: int = Field(default=100, ge=1, le=1000)
    rubric: str = Field(default="", description="General grading criteria")
    questions: List[dict] = Field(default=[], description="Question definitions with rubrics")
    retention_days: int = Field(default=30, ge=1, le=365, description="Auto-delete after N days")


class SessionResponse(BaseModel):
    """Response for an exam session."""
    id: str
    name: str
    subject: str
    class_name: str
    total_points: int
    status: str
    document_count: int
    processed_count: int
    created_at: datetime
    completed_at: Optional[datetime] = None
    retention_until: Optional[datetime] = None

    class Config:
        from_attributes = True


class SessionListResponse(BaseModel):
    """List of exam sessions."""
    sessions: List[SessionResponse]
    total: int


class DocumentResponse(BaseModel):
    """Response for a pseudonymized document."""
    doc_token: str
    session_id: str
    status: str
    page_number: int
    total_pages: int
    ocr_confidence: int
    ai_score: Optional[int] = None
    ai_grade: Optional[str] = None
    ai_feedback: Optional[str] = None
    created_at: datetime
    processing_completed_at: Optional[datetime] = None

    class Config:
        from_attributes = True


class DocumentListResponse(BaseModel):
    """List of documents in a session."""
    documents: List[DocumentResponse]
    total: int


class QRBatchRequest(BaseModel):
    """Request to generate QR code batch."""
    student_count: int = Field(..., ge=1, le=100, description="Number of QR codes to generate")
    labels: Optional[List[str]] = Field(default=None, description="Optional labels (numbers only, NO names!)")


class QRBatchResponse(BaseModel):
    """Response with generated QR batch."""
    batch_id: str
    session_id: str
    student_count: int
    generated_tokens: List[str]


class IdentityMapUpdate(BaseModel):
    """Request to store encrypted identity map."""
    encrypted_data: str = Field(..., description="Base64-encoded encrypted identity map")
    iv: str = Field(..., description="Initialization vector for decryption")


class ProcessingStats(BaseModel):
    """Processing statistics for a session."""
    session_id: str
    total_documents: int
    processed_documents: int
    status_breakdown: dict
    score_average: Optional[float] = None
    score_min: Optional[int] = None
    score_max: Optional[int] = None


class CorrectionResultResponse(BaseModel):
    """AI correction result (pseudonymized)."""
    doc_token: str
    total_score: int
    max_score: int
    grade: str
    overall_feedback: str
    question_results: List[dict]


# =============================================================================
# Helper Functions
# =============================================================================

def get_teacher_id(request=None) -> str:
    """
    Get teacher ID from request context.

    In production, this should extract the teacher ID from JWT token.
    For now, we use a placeholder that should be replaced with actual auth.
    """
    # TODO: Implement proper JWT extraction
    # return request.state.teacher_id
    return "default_teacher"


# =============================================================================
# Session Endpoints
# =============================================================================

@router.post("/sessions", response_model=SessionResponse, status_code=201)
async def create_session(
    data: SessionCreate,
    db: Session = Depends(get_db)
):
    """
    Create a new exam correction session.

    This initializes a workspace for pseudonymized exam correction.
    No student data is stored at this point.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.create_session(
        teacher_id=teacher_id,
        name=data.name,
        subject=data.subject,
        class_name=data.class_name,
        total_points=data.total_points,
        rubric=data.rubric,
        questions=data.questions,
        retention_days=data.retention_days
    )

    return SessionResponse(
        id=session.id,
        name=session.name,
        subject=session.subject,
        class_name=session.class_name,
        total_points=session.total_points,
        status=session.status.value,
        document_count=session.document_count,
        processed_count=session.processed_count,
        created_at=session.created_at,
        completed_at=session.completed_at,
        retention_until=session.retention_until
    )


@router.get("/sessions", response_model=SessionListResponse)
async def list_sessions(
    include_archived: bool = Query(False, description="Include archived sessions"),
    limit: int = Query(50, ge=1, le=100),
    offset: int = Query(0, ge=0),
    db: Session = Depends(get_db)
):
    """List all exam sessions for the current teacher."""
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    sessions = repo.list_sessions(
        teacher_id=teacher_id,
        include_archived=include_archived,
        limit=limit,
        offset=offset
    )

    return SessionListResponse(
        sessions=[SessionResponse(
            id=s.id,
            name=s.name,
            subject=s.subject,
            class_name=s.class_name,
            total_points=s.total_points,
            status=s.status.value,
            document_count=s.document_count,
            processed_count=s.processed_count,
            created_at=s.created_at,
            completed_at=s.completed_at,
            retention_until=s.retention_until
        ) for s in sessions],
        total=len(sessions)
    )


@router.get("/sessions/{session_id}", response_model=SessionResponse)
async def get_session(
    session_id: str,
    db: Session = Depends(get_db)
):
    """Get details of a specific session."""
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    return SessionResponse(
        id=session.id,
        name=session.name,
        subject=session.subject,
        class_name=session.class_name,
        total_points=session.total_points,
        status=session.status.value,
        document_count=session.document_count,
        processed_count=session.processed_count,
        created_at=session.created_at,
        completed_at=session.completed_at,
        retention_until=session.retention_until
    )


@router.delete("/sessions/{session_id}", status_code=204)
async def delete_session(
    session_id: str,
    hard_delete: bool = Query(False, description="Permanently delete (vs soft delete)"),
    db: Session = Depends(get_db)
):
    """Delete an exam session and all associated documents."""
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    success = repo.delete_session(session_id, teacher_id, hard_delete=hard_delete)
    if not success:
        raise HTTPException(status_code=404, detail="Session not found")

    return Response(status_code=204)


# =============================================================================
# QR Code Generation Endpoints
# =============================================================================

@router.post("/sessions/{session_id}/qr-batch", response_model=QRBatchResponse)
async def generate_qr_batch(
    session_id: str,
    data: QRBatchRequest,
    db: Session = Depends(get_db)
):
    """
    Generate QR codes for exam pseudonymization.

    Each QR code contains a random doc_token that will be used to
    track the exam through the correction process WITHOUT revealing
    the student's identity.

    IMPORTANT: Labels should be numbers only (e.g., "Nr. 1", "Nr. 2"),
    NOT student names!
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    # Generate random tokens
    pseudonymizer = get_pseudonymizer()
    tokens = pseudonymizer.generate_batch_tokens(data.student_count)

    # Create batch record
    batch = repo.create_qr_batch(
        session_id=session_id,
        teacher_id=teacher_id,
        student_count=data.student_count,
        generated_tokens=tokens
    )

    return QRBatchResponse(
        batch_id=batch.id,
        session_id=session_id,
        student_count=data.student_count,
        generated_tokens=tokens
    )


@router.get("/sessions/{session_id}/qr-sheet")
async def download_qr_sheet(
    session_id: str,
    batch_id: Optional[str] = Query(None),
    db: Session = Depends(get_db)
):
    """
    Download printable QR code sheet as PNG.

    The sheet contains QR codes with doc_tokens that students
    will attach to their exams for pseudonymized tracking.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    # Get the batch (or create one if not specified)
    if batch_id:
        batch = repo.get_qr_batch(batch_id, teacher_id)
        if not batch:
            raise HTTPException(status_code=404, detail="QR batch not found")
        tokens = batch.generated_tokens
    else:
        # Get all tokens from documents
        docs = repo.list_documents(session_id, teacher_id)
        tokens = [d.doc_token for d in docs]
        if not tokens:
            raise HTTPException(status_code=400, detail="No documents or QR batch found")

    # Generate QR sheet
    pseudonymizer = get_pseudonymizer()
    try:
        sheet_bytes = pseudonymizer.generate_qr_sheet(tokens)
    except RuntimeError as e:
        raise HTTPException(status_code=500, detail=str(e))

    return StreamingResponse(
        BytesIO(sheet_bytes),
        media_type="image/png",
        headers={
            "Content-Disposition": f"attachment; filename=qr_sheet_{session_id[:8]}.png"
        }
    )


# =============================================================================
# Document Upload & Processing Endpoints
# =============================================================================

@router.post("/sessions/{session_id}/upload", response_model=DocumentResponse)
async def upload_document(
    session_id: str,
    file: UploadFile = File(...),
    auto_redact: bool = Query(True, description="Automatically redact header area"),
    db: Session = Depends(get_db)
):
    """
    Upload a scanned exam page.

    The document will be:
    1. Scanned for QR code to extract doc_token
    2. Header area redacted to remove personal data (if auto_redact=True)
    3. Stored for OCR processing

    PRIVACY: Header redaction removes student name/class before storage.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    # Read file content
    content = await file.read()

    pseudonymizer = get_pseudonymizer()

    # Try to detect QR code
    qr_result = pseudonymizer.detect_qr_code(content)
    doc_token = qr_result.doc_token

    if not doc_token:
        # Generate new token if QR not found
        doc_token = pseudonymizer.generate_doc_token()
        logger.warning(f"No QR code found in upload, generated new token: {doc_token[:8]}")

    # Redact header if requested
    if auto_redact:
        redaction_result = pseudonymizer.smart_redact_header(content, preserve_qr=True)
        if redaction_result.redaction_applied:
            content = redaction_result.redacted_image
            logger.info(f"Redacted {redaction_result.redacted_height}px header from document")

    # Create document record
    doc = repo.create_document(
        session_id=session_id,
        teacher_id=teacher_id,
        doc_token=doc_token
    )

    if not doc:
        raise HTTPException(status_code=500, detail="Failed to create document")

    # Store content in MinIO storage
    try:
        storage = get_storage_service()
        file_ext = file.filename.split(".")[-1] if file.filename else "png"
        storage.upload_document(
            session_id=session_id,
            doc_token=doc_token,
            file_data=content,
            file_extension=file_ext,
            is_redacted=auto_redact
        )
        logger.info(f"Stored document {doc_token[:8]} in MinIO")
    except Exception as e:
        logger.warning(f"Failed to store document in MinIO (continuing anyway): {e}")

    return DocumentResponse(
        doc_token=doc.doc_token,
        session_id=doc.session_id,
        status=doc.status.value,
        page_number=doc.page_number,
        total_pages=doc.total_pages,
        ocr_confidence=doc.ocr_confidence,
        ai_score=doc.ai_score,
        ai_grade=doc.ai_grade,
        ai_feedback=doc.ai_feedback,
        created_at=doc.created_at,
        processing_completed_at=doc.processing_completed_at
    )


@router.get("/sessions/{session_id}/documents", response_model=DocumentListResponse)
async def list_documents(
    session_id: str,
    db: Session = Depends(get_db)
):
    """List all documents in a session (pseudonymized)."""
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    docs = repo.list_documents(session_id, teacher_id)

    return DocumentListResponse(
        documents=[DocumentResponse(
            doc_token=d.doc_token,
            session_id=d.session_id,
            status=d.status.value,
            page_number=d.page_number,
            total_pages=d.total_pages,
            ocr_confidence=d.ocr_confidence,
            ai_score=d.ai_score,
            ai_grade=d.ai_grade,
            ai_feedback=d.ai_feedback,
            created_at=d.created_at,
            processing_completed_at=d.processing_completed_at
        ) for d in docs],
        total=len(docs)
    )


@router.get("/documents/{doc_token}", response_model=DocumentResponse)
async def get_document(
    doc_token: str,
    db: Session = Depends(get_db)
):
    """Get details of a specific document by token."""
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    doc = repo.get_document(doc_token, teacher_id)
    if not doc:
        raise HTTPException(status_code=404, detail="Document not found")

    return DocumentResponse(
        doc_token=doc.doc_token,
        session_id=doc.session_id,
        status=doc.status.value,
        page_number=doc.page_number,
        total_pages=doc.total_pages,
        ocr_confidence=doc.ocr_confidence,
        ai_score=doc.ai_score,
        ai_grade=doc.ai_grade,
        ai_feedback=doc.ai_feedback,
        created_at=doc.created_at,
        processing_completed_at=doc.processing_completed_at
    )


# =============================================================================
# Processing & Correction Endpoints
# =============================================================================

@router.post("/sessions/{session_id}/process", status_code=202)
async def start_processing(
    session_id: str,
    background_tasks: BackgroundTasks,
    use_ai: bool = Query(default=True, description="Run AI correction (requires LLM)"),
    db: Session = Depends(get_db)
):
    """
    Start OCR and AI correction for all uploaded documents.

    This triggers background processing:
    1. OCR extraction of student answers (via TrOCR on Mac Mini)
    2. AI-assisted correction using self-hosted LLM
    3. Grade calculation

    PRIVACY: Only pseudonymized text is sent to LLM.
    No student names or personal data.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    if session.document_count == 0:
        raise HTTPException(status_code=400, detail="No documents to process")

    if session.status == SessionStatus.PROCESSING:
        raise HTTPException(status_code=409, detail="Session is already processing")

    # Update session status
    repo.update_session_status(session_id, teacher_id, SessionStatus.PROCESSING)

    # Start background processing task
    async def run_processing():
        """Background task wrapper."""
        from .database import SessionLocal
        db_session = SessionLocal()
        try:
            service = get_processing_service(db_session)
            await service.process_session(
                session_id=session_id,
                teacher_id=teacher_id,
                use_ai_correction=use_ai
            )
        except Exception as e:
            logger.error(f"Background processing failed: {e}")
            # Mark session as failed
            try:
                repo_err = KlausurRepository(db_session)
                repo_err.update_session_status(session_id, teacher_id, SessionStatus.CREATED)
            except Exception:
                pass
        finally:
            db_session.close()

    # Add to background tasks
    background_tasks.add_task(run_processing)

    logger.info(f"Started background processing for session {session_id} with {session.document_count} documents")

    return {
        "status": "processing",
        "message": "Background processing started",
        "session_id": session_id,
        "document_count": session.document_count,
        "use_ai_correction": use_ai
    }


@router.get("/sessions/{session_id}/stats", response_model=ProcessingStats)
async def get_processing_stats(
    session_id: str,
    db: Session = Depends(get_db)
):
    """Get anonymized processing statistics for a session."""
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    stats = repo.get_session_stats(session_id, teacher_id)
    if not stats:
        raise HTTPException(status_code=404, detail="Session not found")

    return ProcessingStats(**stats)


@router.get("/sessions/{session_id}/results", response_model=List[CorrectionResultResponse])
async def get_correction_results(
    session_id: str,
    db: Session = Depends(get_db)
):
    """
    Get AI correction results (pseudonymized).

    Returns doc_token + scores/grades WITHOUT student names.
    The teacher's client can rejoin these with the encrypted
    identity map to reveal which student each result belongs to.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    docs = repo.list_documents(session_id, teacher_id)

    results = []
    for doc in docs:
        if doc.status == DocumentStatus.COMPLETED:
            results.append(CorrectionResultResponse(
                doc_token=doc.doc_token,
                total_score=doc.ai_score or 0,
                max_score=session.total_points,
                grade=doc.ai_grade or "",
                overall_feedback=doc.ai_feedback or "",
                question_results=doc.ai_details.get("question_results", []) if doc.ai_details else []
            ))

    return results


# =============================================================================
# Identity Map (Client-Side Encryption) Endpoints
# =============================================================================

@router.post("/sessions/{session_id}/identity-map", status_code=204)
async def store_identity_map(
    session_id: str,
    data: IdentityMapUpdate,
    db: Session = Depends(get_db)
):
    """
    Store encrypted identity map for a session.

    PRIVACY DESIGN:
    - The identity map (doc_token → student name) is encrypted
      with the teacher's password BEFORE being sent to server
    - Server stores only the encrypted blob
    - Server CANNOT decrypt the mapping
    - Only the teacher (with their password) can rejoin results

    This is zero-knowledge storage.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    import base64
    try:
        encrypted_bytes = base64.b64decode(data.encrypted_data)
    except Exception:
        raise HTTPException(status_code=400, detail="Invalid base64 data")

    result = repo.update_session_identity_map(
        session_id=session_id,
        teacher_id=teacher_id,
        encrypted_map=encrypted_bytes,
        iv=data.iv
    )

    if not result:
        raise HTTPException(status_code=404, detail="Session not found")

    return Response(status_code=204)


@router.get("/sessions/{session_id}/identity-map")
async def get_identity_map(
    session_id: str,
    db: Session = Depends(get_db)
):
    """
    Retrieve encrypted identity map.

    Returns the encrypted blob that the teacher's client
    can decrypt locally to rejoin results with student names.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    if not session.encrypted_identity_map:
        raise HTTPException(status_code=404, detail="No identity map stored")

    import base64
    return {
        "encrypted_data": base64.b64encode(session.encrypted_identity_map).decode(),
        "iv": session.identity_map_iv
    }


# =============================================================================
# Data Retention Endpoint
# =============================================================================

@router.post("/maintenance/cleanup", status_code=200)
async def cleanup_expired_data(
    db: Session = Depends(get_db)
):
    """
    Clean up expired sessions (data retention).

    This should be called periodically (e.g., daily cron job).
    Deletes sessions past their retention_until date.
    """
    repo = KlausurRepository(db)
    deleted_count = repo.cleanup_expired_sessions()

    return {
        "status": "ok",
        "deleted_sessions": deleted_count,
        "timestamp": datetime.utcnow().isoformat()
    }


# =============================================================================
# Magic Onboarding Endpoints
# =============================================================================

# Import additional models for Magic Onboarding
from .db_models import OnboardingSession, DetectedStudent, ModuleLink, OnboardingStatus, ModuleLinkType
from .services.roster_parser import get_roster_parser
from .services.school_resolver import get_school_resolver, BUNDESLAENDER, SCHULFORMEN, FAECHER
from .services.module_linker import get_module_linker, CorrectionResult


class MagicAnalysisRequest(BaseModel):
    """Request for magic header analysis (client-side results)."""
    detected_class: Optional[str] = None
    detected_subject: Optional[str] = None
    detected_date: Optional[str] = None
    students: List[dict] = Field(default=[])  # [{firstName, lastNameHint, confidence}]
    confidence: float = Field(default=0.0, ge=0.0, le=1.0)


class MagicAnalysisResponse(BaseModel):
    """Response after magic analysis."""
    onboarding_id: str
    detected_class: Optional[str]
    detected_subject: Optional[str]
    detected_date: Optional[str]
    student_count: int
    confidence: float
    bundeslaender: dict  # For school cascade
    schulformen: dict
    existing_classes: List[dict]  # Teacher's existing classes


class OnboardingConfirmRequest(BaseModel):
    """Request to confirm onboarding data."""
    onboarding_id: str
    # School context
    bundesland: str
    schulform: str
    school_name: str
    # Class info
    class_name: str
    subject: str
    # Students (confirmed)
    students: List[dict]  # [{firstName, lastName, parentEmail?, parentPhone?}]
    # Options
    create_class: bool = Field(default=True)
    link_to_existing_class_id: Optional[str] = None


class OnboardingConfirmResponse(BaseModel):
    """Response after confirmation."""
    session_id: str
    onboarding_id: str
    class_id: Optional[str]
    student_count: int
    ready_for_correction: bool


class RosterUploadResponse(BaseModel):
    """Response after roster upload."""
    parsed_count: int
    matched_count: int
    entries: List[dict]  # [{firstName, lastName, parentEmail?, matched: bool}]
    warnings: List[str]


class MagicCorrectionRequest(BaseModel):
    """Request to start magic correction."""
    onboarding_id: str
    rubric: str = Field(default="")
    questions: List[dict] = Field(default=[])


class ResultsWithLinksResponse(BaseModel):
    """Results with module links."""
    results: List[CorrectionResultResponse]
    statistics: dict
    module_links: List[dict]
    parent_meeting_suggestions: List[dict]


class FileExtractionRequest(BaseModel):
    """Request to extract info from uploaded exam files."""
    filenames: List[str] = Field(default=[], description="Original filenames for metadata extraction")
    use_llm: bool = Field(default=True, description="Use LLM for intelligent extraction")


class ExamExtractionResult(BaseModel):
    """Extracted information from an exam file."""
    filename: str
    detected_student_name: Optional[str] = None
    detected_last_name_hint: Optional[str] = None
    detected_class: Optional[str] = None
    detected_subject: Optional[str] = None
    detected_date: Optional[str] = None
    detected_grade: Optional[str] = None
    detected_score: Optional[int] = None
    detected_max_score: Optional[int] = None
    is_nachschreiben: bool = False
    is_separate_page: bool = False
    page_number: Optional[int] = None
    question_scores: List[dict] = Field(default=[])  # [{question: 1, score: 5, max: 10}]
    raw_text: Optional[str] = None
    confidence: float = 0.0


class FileExtractionResponse(BaseModel):
    """Response with extracted exam information."""
    results: List[ExamExtractionResult]
    detected_class: Optional[str] = None
    detected_subject: Optional[str] = None
    detected_date: Optional[str] = None
    student_count: int = 0
    overall_confidence: float = 0.0


@router.post("/magic-onboarding/extract", response_model=FileExtractionResponse)
async def extract_exam_info(
    files: List[UploadFile] = File(...),
    db: Session = Depends(get_db)
):
    """
    Server-side extraction of exam information using OCR and LLM.

    Extracts:
    - Student names from headers
    - Class and subject from context
    - Grades and scores if already corrected
    - Question-level scores

    Uses:
    1. Filename parsing for initial metadata
    2. OCR for text extraction
    3. Ollama/Qwen for intelligent parsing (if available)
    """
    import re
    import httpx

    results = []
    class_votes = {}
    subject_votes = {}
    date_votes = {}

    for file in files:
        filename = file.filename or ""
        content = await file.read()

        # Parse filename for metadata
        filename_info = _parse_exam_filename(filename)

        result = ExamExtractionResult(
            filename=filename,
            detected_class=filename_info.get('class'),
            detected_subject=filename_info.get('subject'),
            detected_date=filename_info.get('date'),
            is_nachschreiben=filename_info.get('nachschreiben', False),
            is_separate_page=filename_info.get('separate_page', False),
            page_number=filename_info.get('page_number'),
            confidence=0.5  # Base confidence from filename
        )

        # Try to extract student name from filename
        if filename_info.get('student_name'):
            result.detected_student_name = filename_info['student_name']
            result.confidence = 0.7

        # Vote for class/subject
        if result.detected_class:
            class_votes[result.detected_class] = class_votes.get(result.detected_class, 0) + 1
        if result.detected_subject:
            subject_votes[result.detected_subject] = subject_votes.get(result.detected_subject, 0) + 1
        if result.detected_date:
            date_votes[result.detected_date] = date_votes.get(result.detected_date, 0) + 1

        # Try LLM extraction if Ollama is available
        try:
            llm_result = await _extract_with_ollama(content, filename)
            if llm_result:
                result.detected_student_name = llm_result.get('student_name') or result.detected_student_name
                result.detected_last_name_hint = llm_result.get('last_name_hint')
                result.detected_grade = llm_result.get('grade')
                result.detected_score = llm_result.get('score')
                result.detected_max_score = llm_result.get('max_score')
                result.question_scores = llm_result.get('question_scores', [])
                result.raw_text = llm_result.get('raw_text', '')[:500]  # Truncate for response
                result.confidence = max(result.confidence, llm_result.get('confidence', 0.0))
        except Exception as e:
            logger.warning(f"LLM extraction failed for {filename}: {e}")

        results.append(result)

    # Determine overall detected values
    detected_class = max(class_votes.items(), key=lambda x: x[1])[0] if class_votes else None
    detected_subject = max(subject_votes.items(), key=lambda x: x[1])[0] if subject_votes else None
    detected_date = max(date_votes.items(), key=lambda x: x[1])[0] if date_votes else None
    overall_confidence = sum(r.confidence for r in results) / len(results) if results else 0.0

    return FileExtractionResponse(
        results=results,
        detected_class=detected_class,
        detected_subject=detected_subject,
        detected_date=detected_date,
        student_count=len(results),
        overall_confidence=overall_confidence
    )


def _parse_exam_filename(filename: str) -> dict:
    """
    Parse exam filename for metadata.

    Expected patterns:
    - 20260119_103820_Mathe_Klasse_3-1_2026-01-15_085630.pdf
    - Mathe_Klasse_3_Nachschreiben_2026-01-15_090901.pdf
    - Mathe_Klasse_3-2_Miguel_Seite_2_2026-01-15_090620.pdf
    """
    import re

    result = {
        'class': None,
        'subject': None,
        'date': None,
        'nachschreiben': False,
        'separate_page': False,
        'page_number': None,
        'student_name': None
    }

    # Remove extension
    name = filename.rsplit('.', 1)[0] if '.' in filename else filename

    # Detect subject (common German subjects)
    subjects = ['Mathe', 'Mathematik', 'Deutsch', 'Englisch', 'Physik', 'Chemie', 'Bio', 'Biologie',
                'Geschichte', 'Erdkunde', 'Geographie', 'Kunst', 'Musik', 'Sport', 'Informatik',
                'Französisch', 'Latein', 'Spanisch', 'Religion', 'Ethik', 'Politik', 'Wirtschaft']
    for subject in subjects:
        if subject.lower() in name.lower():
            result['subject'] = subject
            break

    # Detect class (e.g., Klasse_3-1, 3a, 10b, Q1)
    class_patterns = [
        r'Klasse[_\s]*(\d+[-a-zA-Z0-9]*)',  # Klasse_3-1, Klasse 10a
        r'(\d{1,2}[a-zA-Z])',  # 3a, 10b
        r'(Q[12])',  # Q1, Q2 (Oberstufe)
        r'(E[PF])',  # EP, EF (Einführungsphase)
    ]
    for pattern in class_patterns:
        match = re.search(pattern, name, re.IGNORECASE)
        if match:
            result['class'] = match.group(1)
            break

    # Detect date (YYYY-MM-DD or DD.MM.YYYY)
    date_patterns = [
        r'(\d{4}-\d{2}-\d{2})',  # 2026-01-15
        r'(\d{2}\.\d{2}\.\d{4})',  # 15.01.2026
    ]
    for pattern in date_patterns:
        match = re.search(pattern, name)
        if match:
            result['date'] = match.group(1)
            break

    # Detect Nachschreiben
    if 'nachschreib' in name.lower():
        result['nachschreiben'] = True

    # Detect separate page (Seite_2)
    page_match = re.search(r'Seite[_\s]*(\d+)', name, re.IGNORECASE)
    if page_match:
        result['separate_page'] = True
        result['page_number'] = int(page_match.group(1))

    # Try to extract student name (usually after class, before date)
    # Pattern: ...Klasse_3-2_Miguel_Seite...
    name_match = re.search(r'Klasse[_\s]*\d+[-a-zA-Z0-9]*[_\s]+([A-Z][a-z]+)(?:[_\s]|$)', name)
    if name_match:
        potential_name = name_match.group(1)
        # Exclude common non-name words
        if potential_name not in ['Seite', 'Nachschreiben', 'Teil', 'Aufgabe']:
            result['student_name'] = potential_name

    return result


async def _extract_with_ollama(content: bytes, filename: str) -> Optional[dict]:
    """
    Use Ollama (local or Mac Mini) to extract information from exam content.

    Tries local Ollama first, then Mac Mini if configured.
    """
    import httpx
    import base64

    # Ollama endpoints to try
    ollama_endpoints = [
        "http://localhost:11434",  # Local
        "http://192.168.178.163:11434",  # Mac Mini
    ]

    # Convert PDF first page to image if needed
    image_data = None
    if filename.lower().endswith('.pdf'):
        try:
            # Try to extract first page as image
            # This requires pdf2image or PyMuPDF
            image_data = await _pdf_to_image(content)
        except Exception as e:
            logger.warning(f"PDF conversion failed: {e}")
            return None
    elif filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        image_data = content

    if not image_data:
        return None

    # Create prompt for extraction
    prompt = """Analysiere dieses Bild einer Klausur/Klassenarbeit und extrahiere folgende Informationen im JSON-Format:
{
  "student_name": "Vorname des Schülers (falls sichtbar)",
  "last_name_hint": "Anfangsbuchstabe des Nachnamens (z.B. 'M.' falls sichtbar)",
  "grade": "Note falls eingetragen (z.B. '2+', '3', '5-')",
  "score": Punktzahl als Zahl (falls vorhanden),
  "max_score": Maximale Punktzahl als Zahl (falls vorhanden),
  "question_scores": [{"question": 1, "score": 5, "max": 10}],
  "confidence": Konfidenz 0.0-1.0
}
Antworte NUR mit dem JSON, kein zusätzlicher Text."""

    # Try each endpoint
    for endpoint in ollama_endpoints:
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                # Check if vision model is available
                response = await client.get(f"{endpoint}/api/tags")
                if response.status_code != 200:
                    continue

                models = response.json().get('models', [])
                # Prefer vision models: llava, bakllava, moondream, qwen2-vl
                vision_model = None
                for m in models:
                    name = m.get('name', '').lower()
                    if any(vm in name for vm in ['llava', 'moondream', 'qwen', 'vision']):
                        vision_model = m['name']
                        break

                # Fall back to text model with OCR
                model = vision_model or (models[0]['name'] if models else None)
                if not model:
                    continue

                # Call Ollama
                request_data = {
                    "model": model,
                    "prompt": prompt,
                    "stream": False
                }

                if vision_model and image_data:
                    request_data["images"] = [base64.b64encode(image_data).decode()]

                response = await client.post(
                    f"{endpoint}/api/generate",
                    json=request_data
                )

                if response.status_code == 200:
                    result_text = response.json().get('response', '')
                    # Parse JSON from response
                    import json
                    try:
                        # Extract JSON from response
                        json_match = re.search(r'\{[^}]+\}', result_text, re.DOTALL)
                        if json_match:
                            return json.loads(json_match.group())
                    except json.JSONDecodeError:
                        logger.warning(f"Failed to parse LLM response as JSON")
                        return None

        except Exception as e:
            logger.debug(f"Ollama endpoint {endpoint} failed: {e}")
            continue

    return None


async def _pdf_to_image(content: bytes) -> Optional[bytes]:
    """Convert first page of PDF to PNG image."""
    try:
        import fitz  # PyMuPDF
        doc = fitz.open(stream=content, filetype="pdf")
        page = doc[0]
        pix = page.get_pixmap(dpi=150)
        return pix.tobytes("png")
    except ImportError:
        pass

    try:
        from pdf2image import convert_from_bytes
        images = convert_from_bytes(content, first_page=1, last_page=1, dpi=150)
        if images:
            from io import BytesIO
            buffer = BytesIO()
            images[0].save(buffer, format='PNG')
            return buffer.getvalue()
    except ImportError:
        pass

    return None


@router.post("/magic-onboarding/analyze", response_model=MagicAnalysisResponse)
async def magic_analyze(
    data: MagicAnalysisRequest,
    db: Session = Depends(get_db)
):
    """
    Phase 1: Store client-side analysis results and prepare for confirmation.

    The actual header extraction happens client-side using the local LLM.
    This endpoint stores the results and provides school cascade data.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)
    resolver = get_school_resolver()

    # Create onboarding session
    onboarding = OnboardingSession(
        teacher_id=teacher_id,
        detected_class=data.detected_class,
        detected_subject=data.detected_subject,
        detected_student_count=len(data.students),
        detection_confidence=int(data.confidence * 100),
        status=OnboardingStatus.CONFIRMING
    )
    onboarding.analysis_completed_at = datetime.utcnow()
    db.add(onboarding)

    # Store detected students
    for student_data in data.students:
        student = DetectedStudent(
            onboarding_session_id=onboarding.id,
            detected_first_name=student_data.get('firstName'),
            detected_last_name_hint=student_data.get('lastNameHint'),
            confidence=int(student_data.get('confidence', 0) * 100)
        )
        db.add(student)

    db.commit()
    db.refresh(onboarding)

    # Get teacher's existing classes
    existing_classes = await resolver.get_classes_for_teacher(teacher_id)

    return MagicAnalysisResponse(
        onboarding_id=onboarding.id,
        detected_class=onboarding.detected_class,
        detected_subject=onboarding.detected_subject,
        detected_date=data.detected_date,
        student_count=onboarding.detected_student_count,
        confidence=data.confidence,
        bundeslaender=BUNDESLAENDER,
        schulformen={k: v['name'] for k, v in SCHULFORMEN.items()},
        existing_classes=[{
            'id': c.id,
            'name': c.name,
            'grade_level': c.grade_level
        } for c in existing_classes]
    )


@router.post("/magic-onboarding/upload-roster", response_model=RosterUploadResponse)
async def upload_roster(
    onboarding_id: str = Query(...),
    file: UploadFile = File(...),
    db: Session = Depends(get_db)
):
    """
    Phase 2a: Upload Klassenbuch photo or roster file.

    Parses the uploaded file and matches names to detected students.
    """
    teacher_id = get_teacher_id()
    parser = get_roster_parser()

    # Get onboarding session
    onboarding = db.query(OnboardingSession).filter(
        OnboardingSession.id == onboarding_id,
        OnboardingSession.teacher_id == teacher_id
    ).first()

    if not onboarding:
        raise HTTPException(status_code=404, detail="Onboarding session not found")

    # Read file
    content = await file.read()
    filename = file.filename.lower()

    # Parse based on file type
    if filename.endswith(('.png', '.jpg', '.jpeg')):
        roster = parser.parse_klassenbuch_image(content)
    elif filename.endswith('.pdf'):
        roster = parser.parse_pdf_roster(content)
    elif filename.endswith('.csv'):
        roster = parser.parse_csv_roster(content.decode('utf-8'))
    else:
        raise HTTPException(status_code=400, detail="Unsupported file format")

    # Get detected students
    detected_students = db.query(DetectedStudent).filter(
        DetectedStudent.onboarding_session_id == onboarding_id
    ).all()

    detected_names = [s.detected_first_name for s in detected_students if s.detected_first_name]

    # Match names
    matches = parser.match_first_names(detected_names, roster.entries)

    # Update detected students with matched data
    matched_count = 0
    for match in matches:
        if match.matched_entry and match.confidence > 0.7:
            for student in detected_students:
                if student.detected_first_name == match.detected_name:
                    student.confirmed_first_name = match.matched_entry.first_name
                    student.confirmed_last_name = match.matched_entry.last_name
                    student.parent_email = match.matched_entry.parent_email
                    student.parent_phone = match.matched_entry.parent_phone
                    matched_count += 1
                    break

    db.commit()

    return RosterUploadResponse(
        parsed_count=len(roster.entries),
        matched_count=matched_count,
        entries=[{
            'firstName': e.first_name,
            'lastName': e.last_name,
            'parentEmail': e.parent_email,
            'parentPhone': e.parent_phone,
            'matched': any(
                m.matched_entry and m.matched_entry.first_name == e.first_name
                for m in matches
            )
        } for e in roster.entries],
        warnings=roster.warnings
    )


@router.post("/magic-onboarding/confirm", response_model=OnboardingConfirmResponse)
async def confirm_onboarding(
    data: OnboardingConfirmRequest,
    db: Session = Depends(get_db)
):
    """
    Phase 2b: Confirm onboarding data and create class/session.

    Creates the school class (if requested) and exam session.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)
    resolver = get_school_resolver()

    # Get onboarding session
    onboarding = db.query(OnboardingSession).filter(
        OnboardingSession.id == data.onboarding_id,
        OnboardingSession.teacher_id == teacher_id
    ).first()

    if not onboarding:
        raise HTTPException(status_code=404, detail="Onboarding session not found")

    # Update school context
    onboarding.bundesland = data.bundesland
    onboarding.schulform = data.schulform
    onboarding.school_name = data.school_name
    onboarding.confirmed_class = data.class_name
    onboarding.confirmed_subject = data.subject
    onboarding.confirmation_completed_at = datetime.utcnow()

    class_id = data.link_to_existing_class_id

    # Create class if requested
    if data.create_class and not class_id:
        from .services.school_resolver import DetectedClassInfo

        # Get or create school
        school = await resolver.get_or_create_school(
            teacher_id=teacher_id,
            bundesland=data.bundesland,
            schulform=data.schulform,
            school_name=data.school_name
        )
        onboarding.linked_school_id = school.id

        # Create class
        class_info = DetectedClassInfo(
            class_name=data.class_name,
            students=data.students
        )
        school_class = await resolver.auto_create_class(
            teacher_id=teacher_id,
            school_id=school.id,
            detected_info=class_info
        )
        class_id = school_class.id
        onboarding.linked_class_id = class_id

    # Create exam session
    session = repo.create_session(
        teacher_id=teacher_id,
        name=f"{data.subject} - {data.class_name}",
        subject=data.subject,
        class_name=data.class_name,
        total_points=100
    )
    session.linked_school_class_id = class_id
    onboarding.klausur_session_id = session.id
    onboarding.status = OnboardingStatus.PROCESSING

    # Update detected students with confirmed data
    for student_data in data.students:
        # Update or create detected student
        first_name = student_data.get('firstName')
        if first_name:
            student = db.query(DetectedStudent).filter(
                DetectedStudent.onboarding_session_id == data.onboarding_id,
                DetectedStudent.detected_first_name == first_name
            ).first()

            if student:
                student.confirmed_first_name = first_name
                student.confirmed_last_name = student_data.get('lastName', '')
                student.parent_email = student_data.get('parentEmail')
                student.parent_phone = student_data.get('parentPhone')

    db.commit()

    return OnboardingConfirmResponse(
        session_id=session.id,
        onboarding_id=onboarding.id,
        class_id=class_id,
        student_count=len(data.students),
        ready_for_correction=True
    )


@router.post("/magic-onboarding/start-correction")
async def start_magic_correction(
    data: MagicCorrectionRequest,
    db: Session = Depends(get_db)
):
    """
    Phase 3: Start background correction.

    Triggers the AI correction process for all uploaded documents.
    """
    teacher_id = get_teacher_id()

    # Get onboarding session
    onboarding = db.query(OnboardingSession).filter(
        OnboardingSession.id == data.onboarding_id,
        OnboardingSession.teacher_id == teacher_id
    ).first()

    if not onboarding:
        raise HTTPException(status_code=404, detail="Onboarding session not found")

    if not onboarding.klausur_session_id:
        raise HTTPException(status_code=400, detail="Session not confirmed yet")

    onboarding.processing_started_at = datetime.utcnow()
    db.commit()

    # The actual correction is triggered via the existing /sessions/{id}/process endpoint
    return {
        "status": "started",
        "session_id": onboarding.klausur_session_id,
        "onboarding_id": onboarding.id,
        "message": "Korrektur gestartet. Verwende /sessions/{id}/progress-stream fuer Updates."
    }


@router.get("/sessions/{session_id}/results-with-links", response_model=ResultsWithLinksResponse)
async def get_results_with_links(
    session_id: str,
    db: Session = Depends(get_db)
):
    """
    Phase 4: Get results with module links.

    Returns correction results along with suggestions for module linking.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)
    linker = get_module_linker()

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    # Get documents
    documents = repo.list_documents(session_id, teacher_id)
    completed_docs = [d for d in documents if d.status == DocumentStatus.COMPLETED]

    # Build correction results
    results = []
    correction_results = []  # For linker

    for doc in completed_docs:
        result = CorrectionResultResponse(
            doc_token=doc.doc_token,
            total_score=doc.ai_score or 0,
            max_score=session.total_points,
            grade=doc.ai_grade or "",
            overall_feedback=doc.ai_feedback or "",
            question_results=doc.ai_details.get('question_results', []) if doc.ai_details else []
        )
        results.append(result)

        correction_results.append(CorrectionResult(
            doc_token=doc.doc_token,
            score=float(doc.ai_score or 0),
            max_score=float(session.total_points),
            grade=doc.ai_grade or "",
            feedback=doc.ai_feedback or ""
        ))

    # Calculate statistics
    stats = linker.calculate_grade_statistics(correction_results)

    # Get existing module links
    links = db.query(ModuleLink).filter(
        ModuleLink.klausur_session_id == session_id
    ).all()

    # Generate parent meeting suggestions
    meeting_suggestions = linker.suggest_elternabend(
        results=correction_results,
        subject=session.subject
    )

    return ResultsWithLinksResponse(
        results=results,
        statistics=stats,
        module_links=[{
            'id': link.id,
            'type': link.link_type.value,
            'module': link.target_module,
            'url': link.target_url
        } for link in links],
        parent_meeting_suggestions=[{
            'doc_token': s.doc_token,
            'reason': s.reason,
            'urgency': s.urgency.value,
            'grade': s.grade,
            'topics': s.suggested_topics
        } for s in meeting_suggestions]
    )


@router.post("/sessions/{session_id}/link-to-module")
async def create_module_link(
    session_id: str,
    link_type: str = Query(..., description="notenbuch, elternabend, zeugnis, calendar"),
    db: Session = Depends(get_db)
):
    """
    Phase 4: Create a link to another module.

    Creates the actual connection to Notenbuch, Elternabend, etc.
    """
    teacher_id = get_teacher_id()
    repo = KlausurRepository(db)
    linker = get_module_linker()

    session = repo.get_session(session_id, teacher_id)
    if not session:
        raise HTTPException(status_code=404, detail="Session not found")

    # Get documents
    documents = repo.list_documents(session_id, teacher_id)
    completed_docs = [d for d in documents if d.status == DocumentStatus.COMPLETED]

    # Build correction results
    correction_results = [
        CorrectionResult(
            doc_token=doc.doc_token,
            score=float(doc.ai_score or 0),
            max_score=float(session.total_points),
            grade=doc.ai_grade or "",
            feedback=doc.ai_feedback or ""
        )
        for doc in completed_docs
    ]

    result = None

    if link_type == "notenbuch":
        result = await linker.link_to_notenbuch(
            session_id=session_id,
            class_id=session.linked_school_class_id or "",
            subject=session.subject,
            results=correction_results,
            exam_name=session.name,
            exam_date=session.created_at.strftime("%Y-%m-%d")
        )

    elif link_type == "elternabend":
        suggestions = linker.suggest_elternabend(
            results=correction_results,
            subject=session.subject
        )
        result = await linker.create_elternabend_link(
            session_id=session_id,
            suggestions=suggestions,
            teacher_id=teacher_id
        )

    elif link_type == "zeugnis":
        grades = {r.doc_token: r.grade for r in correction_results}
        result = await linker.update_zeugnis(
            class_id=session.linked_school_class_id or "",
            subject=session.subject,
            grades=grades
        )

    elif link_type == "calendar":
        suggestions = linker.suggest_elternabend(
            results=correction_results,
            subject=session.subject
        )
        events = await linker.create_calendar_events(
            teacher_id=teacher_id,
            meetings=suggestions
        )
        result = type('obj', (object,), {
            'success': len(events) > 0,
            'message': f"{len(events)} Kalendereintraege erstellt"
        })()

    else:
        raise HTTPException(status_code=400, detail=f"Unknown link type: {link_type}")

    if result and result.success:
        # Store the link
        link = ModuleLink(
            klausur_session_id=session_id,
            link_type=ModuleLinkType(link_type),
            target_module=link_type,
            target_entity_id=getattr(result, 'link', {}).target_entity_id if hasattr(result, 'link') and result.link else "",
            target_url=getattr(result, 'target_url', None)
        )
        db.add(link)
        db.commit()

    return {
        "success": result.success if result else False,
        "message": result.message if result else "Unknown error",
        "target_url": getattr(result, 'target_url', None) if result else None
    }


@router.get("/school-data/bundeslaender")
async def get_bundeslaender():
    """Get list of German federal states."""
    return {"bundeslaender": BUNDESLAENDER}


@router.get("/school-data/schulformen")
async def get_schulformen():
    """Get list of school types."""
    return {"schulformen": {k: v['name'] for k, v in SCHULFORMEN.items()}}


@router.get("/school-data/faecher")
async def get_faecher():
    """Get list of subjects."""
    return {"faecher": {k: v['name'] for k, v in FAECHER.items()}}


# =============================================================================
# TrOCR HANDWRITING RECOGNITION ENDPOINTS
# =============================================================================

class TrOCRExtractRequest(BaseModel):
    """Request for TrOCR text extraction."""
    detect_lines: bool = Field(default=True, description="Detect and process text lines separately")


class TrOCRTrainingRequest(BaseModel):
    """Request to add a training example."""
    ground_truth: str = Field(..., min_length=1, description="Correct text for the image")


class TrOCRFineTuneRequest(BaseModel):
    """Request to start fine-tuning."""
    epochs: int = Field(default=3, ge=1, le=10)
    learning_rate: float = Field(default=5e-5, gt=0, lt=1)


@router.post("/trocr/extract")
async def trocr_extract(
    file: UploadFile = File(...),
    detect_lines: bool = Query(default=True),
    teacher_id: str = Query(default="teacher_1")
):
    """
    Extract handwritten text from an image using TrOCR.

    This endpoint uses Microsoft's TrOCR model optimized for handwriting.
    Processing happens on Mac Mini TrOCR service - no cloud, only local network.

    Args:
        file: Image file (PNG, JPG)
        detect_lines: If True, detect individual text lines
        teacher_id: Teacher ID for logging

    Returns:
        Extracted text with confidence scores
    """
    # Try remote TrOCR client first (Mac Mini)
    try:
        from .services.trocr_client import get_trocr_client

        client = get_trocr_client()

        if await client.is_available():
            content = await file.read()
            result = await client.extract_text(
                content,
                filename=file.filename or "image.png",
                detect_lines=detect_lines
            )

            return {
                "text": result.text,
                "confidence": result.confidence,
                "bounding_boxes": [],
                "processing_time_ms": result.processing_time_ms,
                "model": "trocr-base-handwritten",
                "device": result.device,
                "service": "mac-mini"
            }
    except Exception as e:
        logger.warning(f"Remote TrOCR client failed: {e}")

    # Fallback to local TrOCR service
    try:
        from .services.trocr_service import get_trocr_service

        service = get_trocr_service()
        content = await file.read()
        result = await service.extract_text(content, detect_lines=detect_lines)

        return {
            "text": result.text,
            "confidence": result.confidence,
            "bounding_boxes": result.bounding_boxes,
            "processing_time_ms": result.processing_time_ms,
            "model": service.model_name,
            "has_lora_adapter": service._lora_adapter is not None,
            "service": "local"
        }

    except ImportError as e:
        logger.error(f"TrOCR not available locally or remotely: {e}")
        raise HTTPException(
            status_code=503,
            detail="TrOCR not available. Mac Mini service unreachable and local dependencies missing."
        )
    except Exception as e:
        logger.error(f"TrOCR extraction failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/trocr/batch-extract")
async def trocr_batch_extract(
    files: List[UploadFile] = File(...),
    detect_lines: bool = Query(default=True),
    teacher_id: str = Query(default="teacher_1")
):
    """
    Extract handwritten text from multiple images.

    Args:
        files: List of image files
        detect_lines: If True, detect individual text lines
        teacher_id: Teacher ID for logging

    Returns:
        List of extraction results
    """
    try:
        from .services.trocr_service import get_trocr_service

        service = get_trocr_service()

        # Read all files
        images = [await f.read() for f in files]

        # Extract from all
        results = await service.batch_extract(images, detect_lines=detect_lines)

        return {
            "results": [
                {
                    "filename": files[i].filename,
                    "text": r.text,
                    "confidence": r.confidence,
                    "processing_time_ms": r.processing_time_ms
                }
                for i, r in enumerate(results)
            ],
            "total_files": len(files),
            "model": service.model_name
        }

    except ImportError as e:
        raise HTTPException(status_code=503, detail=f"TrOCR not available: {e}")
    except Exception as e:
        logger.error(f"TrOCR batch extraction failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/trocr/training/add")
async def trocr_add_training_example(
    file: UploadFile = File(...),
    ground_truth: str = Query(..., min_length=1),
    teacher_id: str = Query(default="teacher_1")
):
    """
    Add a training example for TrOCR fine-tuning.

    When a teacher corrects OCR output, submit the correction here
    to improve future recognition accuracy.

    Args:
        file: Image file with handwritten text
        ground_truth: The correct text (teacher-corrected)
        teacher_id: Teacher ID (for tracking)

    Returns:
        Example ID
    """
    try:
        from .services.trocr_service import get_trocr_service

        service = get_trocr_service()

        # Read file
        content = await file.read()

        # Add training example
        example_id = service.add_training_example(
            image_data=content,
            ground_truth=ground_truth,
            teacher_id=teacher_id
        )

        info = service.get_model_info()

        return {
            "example_id": example_id,
            "ground_truth": ground_truth,
            "teacher_id": teacher_id,
            "total_examples": info["training_examples_count"],
            "message": "Training example added successfully"
        }

    except Exception as e:
        logger.error(f"Failed to add training example: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/trocr/training/fine-tune")
async def trocr_fine_tune(
    request: TrOCRFineTuneRequest,
    teacher_id: str = Query(default=None)
):
    """
    Start fine-tuning TrOCR with collected training examples.

    Uses LoRA for efficient fine-tuning. Requires at least 10 training examples.

    Args:
        request: Fine-tuning parameters
        teacher_id: If provided, only use examples from this teacher

    Returns:
        Training results
    """
    try:
        from .services.trocr_service import get_trocr_service

        service = get_trocr_service()

        # Run fine-tuning
        result = await service.fine_tune(
            teacher_id=teacher_id,
            epochs=request.epochs,
            learning_rate=request.learning_rate
        )

        return result

    except ImportError as e:
        raise HTTPException(
            status_code=503,
            detail=f"Fine-tuning dependencies not installed: {e}. Install with: pip install peft"
        )
    except Exception as e:
        logger.error(f"Fine-tuning failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/trocr/training/examples")
async def trocr_list_training_examples(
    teacher_id: str = Query(default=None)
):
    """
    List training examples.

    Args:
        teacher_id: If provided, filter by teacher

    Returns:
        List of training examples
    """
    try:
        from .services.trocr_service import get_trocr_service

        service = get_trocr_service()
        examples = service.get_training_examples(teacher_id)

        return {
            "examples": [
                {
                    "image_path": e.image_path,
                    "ground_truth": e.ground_truth[:100] + "..." if len(e.ground_truth) > 100 else e.ground_truth,
                    "teacher_id": e.teacher_id,
                    "created_at": e.created_at
                }
                for e in examples
            ],
            "total": len(examples)
        }

    except Exception as e:
        logger.error(f"Failed to list training examples: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/trocr/status")
async def trocr_status():
    """
    Get TrOCR model status and info.

    Returns:
        Model information including device, adapter status, etc.
    """
    result = {
        "status": "unavailable",
        "services": {}
    }

    # Check Mac Mini TrOCR service
    try:
        from .services.trocr_client import get_trocr_client

        client = get_trocr_client()
        if await client.is_available():
            remote_status = await client.get_status()
            result["services"]["mac_mini"] = {
                "status": "available",
                **remote_status
            }
            result["status"] = "available"
            result["primary_service"] = "mac_mini"
    except Exception as e:
        result["services"]["mac_mini"] = {
            "status": "error",
            "error": str(e)
        }

    # Check local TrOCR service
    try:
        from .services.trocr_service import get_trocr_service

        service = get_trocr_service()
        info = service.get_model_info()
        result["services"]["local"] = {
            "status": "available",
            **info
        }
        if result["status"] != "available":
            result["status"] = "available"
            result["primary_service"] = "local"

    except ImportError as e:
        result["services"]["local"] = {
            "status": "not_installed",
            "error": str(e)
        }
    except Exception as e:
        result["services"]["local"] = {
            "status": "error",
            "error": str(e)
        }

    return result