A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1971 lines
63 KiB
Python
1971 lines
63 KiB
Python
"""
|
|
Klausurkorrektur API Routes.
|
|
|
|
Privacy-by-Design exam correction with QR-code based pseudonymization.
|
|
All endpoints are teacher-scoped - no cross-teacher data access possible.
|
|
|
|
DSGVO Compliance:
|
|
- No student names stored in backend
|
|
- Only doc_tokens (pseudonymized IDs) used
|
|
- Identity mapping encrypted client-side
|
|
- All data auto-deleted after retention period
|
|
"""
|
|
|
|
import uuid
|
|
import logging
|
|
import re
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
from typing import Optional, List
|
|
from io import BytesIO
|
|
|
|
from fastapi import APIRouter, HTTPException, Query, Depends, UploadFile, File, Response, BackgroundTasks
|
|
from fastapi.responses import StreamingResponse
|
|
from sqlalchemy.orm import Session
|
|
from pydantic import BaseModel, Field
|
|
|
|
from .database import get_db
|
|
from .db_models import (
|
|
ExamSession, PseudonymizedDocument, QRBatchJob,
|
|
SessionStatus, DocumentStatus
|
|
)
|
|
from .repository import KlausurRepository
|
|
from .services.pseudonymizer import get_pseudonymizer
|
|
from .services.correction_service import get_correction_service, QuestionRubric
|
|
from .services.storage_service import get_storage_service
|
|
from .services.processing_service import get_processing_service
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/klausur", tags=["Klausurkorrektur"])
|
|
|
|
|
|
# =============================================================================
|
|
# Pydantic Schemas
|
|
# =============================================================================
|
|
|
|
class SessionCreate(BaseModel):
|
|
"""Request to create a new exam session."""
|
|
name: str = Field(..., min_length=1, max_length=200, description="Session name (e.g., 'Mathe 10a - Klausur 1')")
|
|
subject: str = Field(default="", max_length=100)
|
|
class_name: str = Field(default="", max_length=100, description="Class name (e.g., '10a')")
|
|
total_points: int = Field(default=100, ge=1, le=1000)
|
|
rubric: str = Field(default="", description="General grading criteria")
|
|
questions: List[dict] = Field(default=[], description="Question definitions with rubrics")
|
|
retention_days: int = Field(default=30, ge=1, le=365, description="Auto-delete after N days")
|
|
|
|
|
|
class SessionResponse(BaseModel):
|
|
"""Response for an exam session."""
|
|
id: str
|
|
name: str
|
|
subject: str
|
|
class_name: str
|
|
total_points: int
|
|
status: str
|
|
document_count: int
|
|
processed_count: int
|
|
created_at: datetime
|
|
completed_at: Optional[datetime] = None
|
|
retention_until: Optional[datetime] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class SessionListResponse(BaseModel):
|
|
"""List of exam sessions."""
|
|
sessions: List[SessionResponse]
|
|
total: int
|
|
|
|
|
|
class DocumentResponse(BaseModel):
|
|
"""Response for a pseudonymized document."""
|
|
doc_token: str
|
|
session_id: str
|
|
status: str
|
|
page_number: int
|
|
total_pages: int
|
|
ocr_confidence: int
|
|
ai_score: Optional[int] = None
|
|
ai_grade: Optional[str] = None
|
|
ai_feedback: Optional[str] = None
|
|
created_at: datetime
|
|
processing_completed_at: Optional[datetime] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class DocumentListResponse(BaseModel):
|
|
"""List of documents in a session."""
|
|
documents: List[DocumentResponse]
|
|
total: int
|
|
|
|
|
|
class QRBatchRequest(BaseModel):
|
|
"""Request to generate QR code batch."""
|
|
student_count: int = Field(..., ge=1, le=100, description="Number of QR codes to generate")
|
|
labels: Optional[List[str]] = Field(default=None, description="Optional labels (numbers only, NO names!)")
|
|
|
|
|
|
class QRBatchResponse(BaseModel):
|
|
"""Response with generated QR batch."""
|
|
batch_id: str
|
|
session_id: str
|
|
student_count: int
|
|
generated_tokens: List[str]
|
|
|
|
|
|
class IdentityMapUpdate(BaseModel):
|
|
"""Request to store encrypted identity map."""
|
|
encrypted_data: str = Field(..., description="Base64-encoded encrypted identity map")
|
|
iv: str = Field(..., description="Initialization vector for decryption")
|
|
|
|
|
|
class ProcessingStats(BaseModel):
|
|
"""Processing statistics for a session."""
|
|
session_id: str
|
|
total_documents: int
|
|
processed_documents: int
|
|
status_breakdown: dict
|
|
score_average: Optional[float] = None
|
|
score_min: Optional[int] = None
|
|
score_max: Optional[int] = None
|
|
|
|
|
|
class CorrectionResultResponse(BaseModel):
|
|
"""AI correction result (pseudonymized)."""
|
|
doc_token: str
|
|
total_score: int
|
|
max_score: int
|
|
grade: str
|
|
overall_feedback: str
|
|
question_results: List[dict]
|
|
|
|
|
|
# =============================================================================
|
|
# Helper Functions
|
|
# =============================================================================
|
|
|
|
def get_teacher_id(request=None) -> str:
|
|
"""
|
|
Get teacher ID from request context.
|
|
|
|
In production, this should extract the teacher ID from JWT token.
|
|
For now, we use a placeholder that should be replaced with actual auth.
|
|
"""
|
|
# TODO: Implement proper JWT extraction
|
|
# return request.state.teacher_id
|
|
return "default_teacher"
|
|
|
|
|
|
# =============================================================================
|
|
# Session Endpoints
|
|
# =============================================================================
|
|
|
|
@router.post("/sessions", response_model=SessionResponse, status_code=201)
|
|
async def create_session(
|
|
data: SessionCreate,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Create a new exam correction session.
|
|
|
|
This initializes a workspace for pseudonymized exam correction.
|
|
No student data is stored at this point.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.create_session(
|
|
teacher_id=teacher_id,
|
|
name=data.name,
|
|
subject=data.subject,
|
|
class_name=data.class_name,
|
|
total_points=data.total_points,
|
|
rubric=data.rubric,
|
|
questions=data.questions,
|
|
retention_days=data.retention_days
|
|
)
|
|
|
|
return SessionResponse(
|
|
id=session.id,
|
|
name=session.name,
|
|
subject=session.subject,
|
|
class_name=session.class_name,
|
|
total_points=session.total_points,
|
|
status=session.status.value,
|
|
document_count=session.document_count,
|
|
processed_count=session.processed_count,
|
|
created_at=session.created_at,
|
|
completed_at=session.completed_at,
|
|
retention_until=session.retention_until
|
|
)
|
|
|
|
|
|
@router.get("/sessions", response_model=SessionListResponse)
|
|
async def list_sessions(
|
|
include_archived: bool = Query(False, description="Include archived sessions"),
|
|
limit: int = Query(50, ge=1, le=100),
|
|
offset: int = Query(0, ge=0),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""List all exam sessions for the current teacher."""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
sessions = repo.list_sessions(
|
|
teacher_id=teacher_id,
|
|
include_archived=include_archived,
|
|
limit=limit,
|
|
offset=offset
|
|
)
|
|
|
|
return SessionListResponse(
|
|
sessions=[SessionResponse(
|
|
id=s.id,
|
|
name=s.name,
|
|
subject=s.subject,
|
|
class_name=s.class_name,
|
|
total_points=s.total_points,
|
|
status=s.status.value,
|
|
document_count=s.document_count,
|
|
processed_count=s.processed_count,
|
|
created_at=s.created_at,
|
|
completed_at=s.completed_at,
|
|
retention_until=s.retention_until
|
|
) for s in sessions],
|
|
total=len(sessions)
|
|
)
|
|
|
|
|
|
@router.get("/sessions/{session_id}", response_model=SessionResponse)
|
|
async def get_session(
|
|
session_id: str,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Get details of a specific session."""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
return SessionResponse(
|
|
id=session.id,
|
|
name=session.name,
|
|
subject=session.subject,
|
|
class_name=session.class_name,
|
|
total_points=session.total_points,
|
|
status=session.status.value,
|
|
document_count=session.document_count,
|
|
processed_count=session.processed_count,
|
|
created_at=session.created_at,
|
|
completed_at=session.completed_at,
|
|
retention_until=session.retention_until
|
|
)
|
|
|
|
|
|
@router.delete("/sessions/{session_id}", status_code=204)
|
|
async def delete_session(
|
|
session_id: str,
|
|
hard_delete: bool = Query(False, description="Permanently delete (vs soft delete)"),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Delete an exam session and all associated documents."""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
success = repo.delete_session(session_id, teacher_id, hard_delete=hard_delete)
|
|
if not success:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
return Response(status_code=204)
|
|
|
|
|
|
# =============================================================================
|
|
# QR Code Generation Endpoints
|
|
# =============================================================================
|
|
|
|
@router.post("/sessions/{session_id}/qr-batch", response_model=QRBatchResponse)
|
|
async def generate_qr_batch(
|
|
session_id: str,
|
|
data: QRBatchRequest,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Generate QR codes for exam pseudonymization.
|
|
|
|
Each QR code contains a random doc_token that will be used to
|
|
track the exam through the correction process WITHOUT revealing
|
|
the student's identity.
|
|
|
|
IMPORTANT: Labels should be numbers only (e.g., "Nr. 1", "Nr. 2"),
|
|
NOT student names!
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
# Generate random tokens
|
|
pseudonymizer = get_pseudonymizer()
|
|
tokens = pseudonymizer.generate_batch_tokens(data.student_count)
|
|
|
|
# Create batch record
|
|
batch = repo.create_qr_batch(
|
|
session_id=session_id,
|
|
teacher_id=teacher_id,
|
|
student_count=data.student_count,
|
|
generated_tokens=tokens
|
|
)
|
|
|
|
return QRBatchResponse(
|
|
batch_id=batch.id,
|
|
session_id=session_id,
|
|
student_count=data.student_count,
|
|
generated_tokens=tokens
|
|
)
|
|
|
|
|
|
@router.get("/sessions/{session_id}/qr-sheet")
|
|
async def download_qr_sheet(
|
|
session_id: str,
|
|
batch_id: Optional[str] = Query(None),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Download printable QR code sheet as PNG.
|
|
|
|
The sheet contains QR codes with doc_tokens that students
|
|
will attach to their exams for pseudonymized tracking.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
# Get the batch (or create one if not specified)
|
|
if batch_id:
|
|
batch = repo.get_qr_batch(batch_id, teacher_id)
|
|
if not batch:
|
|
raise HTTPException(status_code=404, detail="QR batch not found")
|
|
tokens = batch.generated_tokens
|
|
else:
|
|
# Get all tokens from documents
|
|
docs = repo.list_documents(session_id, teacher_id)
|
|
tokens = [d.doc_token for d in docs]
|
|
if not tokens:
|
|
raise HTTPException(status_code=400, detail="No documents or QR batch found")
|
|
|
|
# Generate QR sheet
|
|
pseudonymizer = get_pseudonymizer()
|
|
try:
|
|
sheet_bytes = pseudonymizer.generate_qr_sheet(tokens)
|
|
except RuntimeError as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
return StreamingResponse(
|
|
BytesIO(sheet_bytes),
|
|
media_type="image/png",
|
|
headers={
|
|
"Content-Disposition": f"attachment; filename=qr_sheet_{session_id[:8]}.png"
|
|
}
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Document Upload & Processing Endpoints
|
|
# =============================================================================
|
|
|
|
@router.post("/sessions/{session_id}/upload", response_model=DocumentResponse)
|
|
async def upload_document(
|
|
session_id: str,
|
|
file: UploadFile = File(...),
|
|
auto_redact: bool = Query(True, description="Automatically redact header area"),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Upload a scanned exam page.
|
|
|
|
The document will be:
|
|
1. Scanned for QR code to extract doc_token
|
|
2. Header area redacted to remove personal data (if auto_redact=True)
|
|
3. Stored for OCR processing
|
|
|
|
PRIVACY: Header redaction removes student name/class before storage.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
# Read file content
|
|
content = await file.read()
|
|
|
|
pseudonymizer = get_pseudonymizer()
|
|
|
|
# Try to detect QR code
|
|
qr_result = pseudonymizer.detect_qr_code(content)
|
|
doc_token = qr_result.doc_token
|
|
|
|
if not doc_token:
|
|
# Generate new token if QR not found
|
|
doc_token = pseudonymizer.generate_doc_token()
|
|
logger.warning(f"No QR code found in upload, generated new token: {doc_token[:8]}")
|
|
|
|
# Redact header if requested
|
|
if auto_redact:
|
|
redaction_result = pseudonymizer.smart_redact_header(content, preserve_qr=True)
|
|
if redaction_result.redaction_applied:
|
|
content = redaction_result.redacted_image
|
|
logger.info(f"Redacted {redaction_result.redacted_height}px header from document")
|
|
|
|
# Create document record
|
|
doc = repo.create_document(
|
|
session_id=session_id,
|
|
teacher_id=teacher_id,
|
|
doc_token=doc_token
|
|
)
|
|
|
|
if not doc:
|
|
raise HTTPException(status_code=500, detail="Failed to create document")
|
|
|
|
# Store content in MinIO storage
|
|
try:
|
|
storage = get_storage_service()
|
|
file_ext = file.filename.split(".")[-1] if file.filename else "png"
|
|
storage.upload_document(
|
|
session_id=session_id,
|
|
doc_token=doc_token,
|
|
file_data=content,
|
|
file_extension=file_ext,
|
|
is_redacted=auto_redact
|
|
)
|
|
logger.info(f"Stored document {doc_token[:8]} in MinIO")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to store document in MinIO (continuing anyway): {e}")
|
|
|
|
return DocumentResponse(
|
|
doc_token=doc.doc_token,
|
|
session_id=doc.session_id,
|
|
status=doc.status.value,
|
|
page_number=doc.page_number,
|
|
total_pages=doc.total_pages,
|
|
ocr_confidence=doc.ocr_confidence,
|
|
ai_score=doc.ai_score,
|
|
ai_grade=doc.ai_grade,
|
|
ai_feedback=doc.ai_feedback,
|
|
created_at=doc.created_at,
|
|
processing_completed_at=doc.processing_completed_at
|
|
)
|
|
|
|
|
|
@router.get("/sessions/{session_id}/documents", response_model=DocumentListResponse)
|
|
async def list_documents(
|
|
session_id: str,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""List all documents in a session (pseudonymized)."""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
docs = repo.list_documents(session_id, teacher_id)
|
|
|
|
return DocumentListResponse(
|
|
documents=[DocumentResponse(
|
|
doc_token=d.doc_token,
|
|
session_id=d.session_id,
|
|
status=d.status.value,
|
|
page_number=d.page_number,
|
|
total_pages=d.total_pages,
|
|
ocr_confidence=d.ocr_confidence,
|
|
ai_score=d.ai_score,
|
|
ai_grade=d.ai_grade,
|
|
ai_feedback=d.ai_feedback,
|
|
created_at=d.created_at,
|
|
processing_completed_at=d.processing_completed_at
|
|
) for d in docs],
|
|
total=len(docs)
|
|
)
|
|
|
|
|
|
@router.get("/documents/{doc_token}", response_model=DocumentResponse)
|
|
async def get_document(
|
|
doc_token: str,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Get details of a specific document by token."""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
doc = repo.get_document(doc_token, teacher_id)
|
|
if not doc:
|
|
raise HTTPException(status_code=404, detail="Document not found")
|
|
|
|
return DocumentResponse(
|
|
doc_token=doc.doc_token,
|
|
session_id=doc.session_id,
|
|
status=doc.status.value,
|
|
page_number=doc.page_number,
|
|
total_pages=doc.total_pages,
|
|
ocr_confidence=doc.ocr_confidence,
|
|
ai_score=doc.ai_score,
|
|
ai_grade=doc.ai_grade,
|
|
ai_feedback=doc.ai_feedback,
|
|
created_at=doc.created_at,
|
|
processing_completed_at=doc.processing_completed_at
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Processing & Correction Endpoints
|
|
# =============================================================================
|
|
|
|
@router.post("/sessions/{session_id}/process", status_code=202)
|
|
async def start_processing(
|
|
session_id: str,
|
|
background_tasks: BackgroundTasks,
|
|
use_ai: bool = Query(default=True, description="Run AI correction (requires LLM)"),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Start OCR and AI correction for all uploaded documents.
|
|
|
|
This triggers background processing:
|
|
1. OCR extraction of student answers (via TrOCR on Mac Mini)
|
|
2. AI-assisted correction using self-hosted LLM
|
|
3. Grade calculation
|
|
|
|
PRIVACY: Only pseudonymized text is sent to LLM.
|
|
No student names or personal data.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
if session.document_count == 0:
|
|
raise HTTPException(status_code=400, detail="No documents to process")
|
|
|
|
if session.status == SessionStatus.PROCESSING:
|
|
raise HTTPException(status_code=409, detail="Session is already processing")
|
|
|
|
# Update session status
|
|
repo.update_session_status(session_id, teacher_id, SessionStatus.PROCESSING)
|
|
|
|
# Start background processing task
|
|
async def run_processing():
|
|
"""Background task wrapper."""
|
|
from .database import SessionLocal
|
|
db_session = SessionLocal()
|
|
try:
|
|
service = get_processing_service(db_session)
|
|
await service.process_session(
|
|
session_id=session_id,
|
|
teacher_id=teacher_id,
|
|
use_ai_correction=use_ai
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Background processing failed: {e}")
|
|
# Mark session as failed
|
|
try:
|
|
repo_err = KlausurRepository(db_session)
|
|
repo_err.update_session_status(session_id, teacher_id, SessionStatus.CREATED)
|
|
except Exception:
|
|
pass
|
|
finally:
|
|
db_session.close()
|
|
|
|
# Add to background tasks
|
|
background_tasks.add_task(run_processing)
|
|
|
|
logger.info(f"Started background processing for session {session_id} with {session.document_count} documents")
|
|
|
|
return {
|
|
"status": "processing",
|
|
"message": "Background processing started",
|
|
"session_id": session_id,
|
|
"document_count": session.document_count,
|
|
"use_ai_correction": use_ai
|
|
}
|
|
|
|
|
|
@router.get("/sessions/{session_id}/stats", response_model=ProcessingStats)
|
|
async def get_processing_stats(
|
|
session_id: str,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Get anonymized processing statistics for a session."""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
stats = repo.get_session_stats(session_id, teacher_id)
|
|
if not stats:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
return ProcessingStats(**stats)
|
|
|
|
|
|
@router.get("/sessions/{session_id}/results", response_model=List[CorrectionResultResponse])
|
|
async def get_correction_results(
|
|
session_id: str,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Get AI correction results (pseudonymized).
|
|
|
|
Returns doc_token + scores/grades WITHOUT student names.
|
|
The teacher's client can rejoin these with the encrypted
|
|
identity map to reveal which student each result belongs to.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
docs = repo.list_documents(session_id, teacher_id)
|
|
|
|
results = []
|
|
for doc in docs:
|
|
if doc.status == DocumentStatus.COMPLETED:
|
|
results.append(CorrectionResultResponse(
|
|
doc_token=doc.doc_token,
|
|
total_score=doc.ai_score or 0,
|
|
max_score=session.total_points,
|
|
grade=doc.ai_grade or "",
|
|
overall_feedback=doc.ai_feedback or "",
|
|
question_results=doc.ai_details.get("question_results", []) if doc.ai_details else []
|
|
))
|
|
|
|
return results
|
|
|
|
|
|
# =============================================================================
|
|
# Identity Map (Client-Side Encryption) Endpoints
|
|
# =============================================================================
|
|
|
|
@router.post("/sessions/{session_id}/identity-map", status_code=204)
|
|
async def store_identity_map(
|
|
session_id: str,
|
|
data: IdentityMapUpdate,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Store encrypted identity map for a session.
|
|
|
|
PRIVACY DESIGN:
|
|
- The identity map (doc_token → student name) is encrypted
|
|
with the teacher's password BEFORE being sent to server
|
|
- Server stores only the encrypted blob
|
|
- Server CANNOT decrypt the mapping
|
|
- Only the teacher (with their password) can rejoin results
|
|
|
|
This is zero-knowledge storage.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
import base64
|
|
try:
|
|
encrypted_bytes = base64.b64decode(data.encrypted_data)
|
|
except Exception:
|
|
raise HTTPException(status_code=400, detail="Invalid base64 data")
|
|
|
|
result = repo.update_session_identity_map(
|
|
session_id=session_id,
|
|
teacher_id=teacher_id,
|
|
encrypted_map=encrypted_bytes,
|
|
iv=data.iv
|
|
)
|
|
|
|
if not result:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
return Response(status_code=204)
|
|
|
|
|
|
@router.get("/sessions/{session_id}/identity-map")
|
|
async def get_identity_map(
|
|
session_id: str,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Retrieve encrypted identity map.
|
|
|
|
Returns the encrypted blob that the teacher's client
|
|
can decrypt locally to rejoin results with student names.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
if not session.encrypted_identity_map:
|
|
raise HTTPException(status_code=404, detail="No identity map stored")
|
|
|
|
import base64
|
|
return {
|
|
"encrypted_data": base64.b64encode(session.encrypted_identity_map).decode(),
|
|
"iv": session.identity_map_iv
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# Data Retention Endpoint
|
|
# =============================================================================
|
|
|
|
@router.post("/maintenance/cleanup", status_code=200)
|
|
async def cleanup_expired_data(
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Clean up expired sessions (data retention).
|
|
|
|
This should be called periodically (e.g., daily cron job).
|
|
Deletes sessions past their retention_until date.
|
|
"""
|
|
repo = KlausurRepository(db)
|
|
deleted_count = repo.cleanup_expired_sessions()
|
|
|
|
return {
|
|
"status": "ok",
|
|
"deleted_sessions": deleted_count,
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# Magic Onboarding Endpoints
|
|
# =============================================================================
|
|
|
|
# Import additional models for Magic Onboarding
|
|
from .db_models import OnboardingSession, DetectedStudent, ModuleLink, OnboardingStatus, ModuleLinkType
|
|
from .services.roster_parser import get_roster_parser
|
|
from .services.school_resolver import get_school_resolver, BUNDESLAENDER, SCHULFORMEN, FAECHER
|
|
from .services.module_linker import get_module_linker, CorrectionResult
|
|
|
|
|
|
class MagicAnalysisRequest(BaseModel):
|
|
"""Request for magic header analysis (client-side results)."""
|
|
detected_class: Optional[str] = None
|
|
detected_subject: Optional[str] = None
|
|
detected_date: Optional[str] = None
|
|
students: List[dict] = Field(default=[]) # [{firstName, lastNameHint, confidence}]
|
|
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
|
|
|
|
|
|
class MagicAnalysisResponse(BaseModel):
|
|
"""Response after magic analysis."""
|
|
onboarding_id: str
|
|
detected_class: Optional[str]
|
|
detected_subject: Optional[str]
|
|
detected_date: Optional[str]
|
|
student_count: int
|
|
confidence: float
|
|
bundeslaender: dict # For school cascade
|
|
schulformen: dict
|
|
existing_classes: List[dict] # Teacher's existing classes
|
|
|
|
|
|
class OnboardingConfirmRequest(BaseModel):
|
|
"""Request to confirm onboarding data."""
|
|
onboarding_id: str
|
|
# School context
|
|
bundesland: str
|
|
schulform: str
|
|
school_name: str
|
|
# Class info
|
|
class_name: str
|
|
subject: str
|
|
# Students (confirmed)
|
|
students: List[dict] # [{firstName, lastName, parentEmail?, parentPhone?}]
|
|
# Options
|
|
create_class: bool = Field(default=True)
|
|
link_to_existing_class_id: Optional[str] = None
|
|
|
|
|
|
class OnboardingConfirmResponse(BaseModel):
|
|
"""Response after confirmation."""
|
|
session_id: str
|
|
onboarding_id: str
|
|
class_id: Optional[str]
|
|
student_count: int
|
|
ready_for_correction: bool
|
|
|
|
|
|
class RosterUploadResponse(BaseModel):
|
|
"""Response after roster upload."""
|
|
parsed_count: int
|
|
matched_count: int
|
|
entries: List[dict] # [{firstName, lastName, parentEmail?, matched: bool}]
|
|
warnings: List[str]
|
|
|
|
|
|
class MagicCorrectionRequest(BaseModel):
|
|
"""Request to start magic correction."""
|
|
onboarding_id: str
|
|
rubric: str = Field(default="")
|
|
questions: List[dict] = Field(default=[])
|
|
|
|
|
|
class ResultsWithLinksResponse(BaseModel):
|
|
"""Results with module links."""
|
|
results: List[CorrectionResultResponse]
|
|
statistics: dict
|
|
module_links: List[dict]
|
|
parent_meeting_suggestions: List[dict]
|
|
|
|
|
|
class FileExtractionRequest(BaseModel):
|
|
"""Request to extract info from uploaded exam files."""
|
|
filenames: List[str] = Field(default=[], description="Original filenames for metadata extraction")
|
|
use_llm: bool = Field(default=True, description="Use LLM for intelligent extraction")
|
|
|
|
|
|
class ExamExtractionResult(BaseModel):
|
|
"""Extracted information from an exam file."""
|
|
filename: str
|
|
detected_student_name: Optional[str] = None
|
|
detected_last_name_hint: Optional[str] = None
|
|
detected_class: Optional[str] = None
|
|
detected_subject: Optional[str] = None
|
|
detected_date: Optional[str] = None
|
|
detected_grade: Optional[str] = None
|
|
detected_score: Optional[int] = None
|
|
detected_max_score: Optional[int] = None
|
|
is_nachschreiben: bool = False
|
|
is_separate_page: bool = False
|
|
page_number: Optional[int] = None
|
|
question_scores: List[dict] = Field(default=[]) # [{question: 1, score: 5, max: 10}]
|
|
raw_text: Optional[str] = None
|
|
confidence: float = 0.0
|
|
|
|
|
|
class FileExtractionResponse(BaseModel):
|
|
"""Response with extracted exam information."""
|
|
results: List[ExamExtractionResult]
|
|
detected_class: Optional[str] = None
|
|
detected_subject: Optional[str] = None
|
|
detected_date: Optional[str] = None
|
|
student_count: int = 0
|
|
overall_confidence: float = 0.0
|
|
|
|
|
|
@router.post("/magic-onboarding/extract", response_model=FileExtractionResponse)
|
|
async def extract_exam_info(
|
|
files: List[UploadFile] = File(...),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Server-side extraction of exam information using OCR and LLM.
|
|
|
|
Extracts:
|
|
- Student names from headers
|
|
- Class and subject from context
|
|
- Grades and scores if already corrected
|
|
- Question-level scores
|
|
|
|
Uses:
|
|
1. Filename parsing for initial metadata
|
|
2. OCR for text extraction
|
|
3. Ollama/Qwen for intelligent parsing (if available)
|
|
"""
|
|
import re
|
|
import httpx
|
|
|
|
results = []
|
|
class_votes = {}
|
|
subject_votes = {}
|
|
date_votes = {}
|
|
|
|
for file in files:
|
|
filename = file.filename or ""
|
|
content = await file.read()
|
|
|
|
# Parse filename for metadata
|
|
filename_info = _parse_exam_filename(filename)
|
|
|
|
result = ExamExtractionResult(
|
|
filename=filename,
|
|
detected_class=filename_info.get('class'),
|
|
detected_subject=filename_info.get('subject'),
|
|
detected_date=filename_info.get('date'),
|
|
is_nachschreiben=filename_info.get('nachschreiben', False),
|
|
is_separate_page=filename_info.get('separate_page', False),
|
|
page_number=filename_info.get('page_number'),
|
|
confidence=0.5 # Base confidence from filename
|
|
)
|
|
|
|
# Try to extract student name from filename
|
|
if filename_info.get('student_name'):
|
|
result.detected_student_name = filename_info['student_name']
|
|
result.confidence = 0.7
|
|
|
|
# Vote for class/subject
|
|
if result.detected_class:
|
|
class_votes[result.detected_class] = class_votes.get(result.detected_class, 0) + 1
|
|
if result.detected_subject:
|
|
subject_votes[result.detected_subject] = subject_votes.get(result.detected_subject, 0) + 1
|
|
if result.detected_date:
|
|
date_votes[result.detected_date] = date_votes.get(result.detected_date, 0) + 1
|
|
|
|
# Try LLM extraction if Ollama is available
|
|
try:
|
|
llm_result = await _extract_with_ollama(content, filename)
|
|
if llm_result:
|
|
result.detected_student_name = llm_result.get('student_name') or result.detected_student_name
|
|
result.detected_last_name_hint = llm_result.get('last_name_hint')
|
|
result.detected_grade = llm_result.get('grade')
|
|
result.detected_score = llm_result.get('score')
|
|
result.detected_max_score = llm_result.get('max_score')
|
|
result.question_scores = llm_result.get('question_scores', [])
|
|
result.raw_text = llm_result.get('raw_text', '')[:500] # Truncate for response
|
|
result.confidence = max(result.confidence, llm_result.get('confidence', 0.0))
|
|
except Exception as e:
|
|
logger.warning(f"LLM extraction failed for {filename}: {e}")
|
|
|
|
results.append(result)
|
|
|
|
# Determine overall detected values
|
|
detected_class = max(class_votes.items(), key=lambda x: x[1])[0] if class_votes else None
|
|
detected_subject = max(subject_votes.items(), key=lambda x: x[1])[0] if subject_votes else None
|
|
detected_date = max(date_votes.items(), key=lambda x: x[1])[0] if date_votes else None
|
|
overall_confidence = sum(r.confidence for r in results) / len(results) if results else 0.0
|
|
|
|
return FileExtractionResponse(
|
|
results=results,
|
|
detected_class=detected_class,
|
|
detected_subject=detected_subject,
|
|
detected_date=detected_date,
|
|
student_count=len(results),
|
|
overall_confidence=overall_confidence
|
|
)
|
|
|
|
|
|
def _parse_exam_filename(filename: str) -> dict:
|
|
"""
|
|
Parse exam filename for metadata.
|
|
|
|
Expected patterns:
|
|
- 20260119_103820_Mathe_Klasse_3-1_2026-01-15_085630.pdf
|
|
- Mathe_Klasse_3_Nachschreiben_2026-01-15_090901.pdf
|
|
- Mathe_Klasse_3-2_Miguel_Seite_2_2026-01-15_090620.pdf
|
|
"""
|
|
import re
|
|
|
|
result = {
|
|
'class': None,
|
|
'subject': None,
|
|
'date': None,
|
|
'nachschreiben': False,
|
|
'separate_page': False,
|
|
'page_number': None,
|
|
'student_name': None
|
|
}
|
|
|
|
# Remove extension
|
|
name = filename.rsplit('.', 1)[0] if '.' in filename else filename
|
|
|
|
# Detect subject (common German subjects)
|
|
subjects = ['Mathe', 'Mathematik', 'Deutsch', 'Englisch', 'Physik', 'Chemie', 'Bio', 'Biologie',
|
|
'Geschichte', 'Erdkunde', 'Geographie', 'Kunst', 'Musik', 'Sport', 'Informatik',
|
|
'Französisch', 'Latein', 'Spanisch', 'Religion', 'Ethik', 'Politik', 'Wirtschaft']
|
|
for subject in subjects:
|
|
if subject.lower() in name.lower():
|
|
result['subject'] = subject
|
|
break
|
|
|
|
# Detect class (e.g., Klasse_3-1, 3a, 10b, Q1)
|
|
class_patterns = [
|
|
r'Klasse[_\s]*(\d+[-a-zA-Z0-9]*)', # Klasse_3-1, Klasse 10a
|
|
r'(\d{1,2}[a-zA-Z])', # 3a, 10b
|
|
r'(Q[12])', # Q1, Q2 (Oberstufe)
|
|
r'(E[PF])', # EP, EF (Einführungsphase)
|
|
]
|
|
for pattern in class_patterns:
|
|
match = re.search(pattern, name, re.IGNORECASE)
|
|
if match:
|
|
result['class'] = match.group(1)
|
|
break
|
|
|
|
# Detect date (YYYY-MM-DD or DD.MM.YYYY)
|
|
date_patterns = [
|
|
r'(\d{4}-\d{2}-\d{2})', # 2026-01-15
|
|
r'(\d{2}\.\d{2}\.\d{4})', # 15.01.2026
|
|
]
|
|
for pattern in date_patterns:
|
|
match = re.search(pattern, name)
|
|
if match:
|
|
result['date'] = match.group(1)
|
|
break
|
|
|
|
# Detect Nachschreiben
|
|
if 'nachschreib' in name.lower():
|
|
result['nachschreiben'] = True
|
|
|
|
# Detect separate page (Seite_2)
|
|
page_match = re.search(r'Seite[_\s]*(\d+)', name, re.IGNORECASE)
|
|
if page_match:
|
|
result['separate_page'] = True
|
|
result['page_number'] = int(page_match.group(1))
|
|
|
|
# Try to extract student name (usually after class, before date)
|
|
# Pattern: ...Klasse_3-2_Miguel_Seite...
|
|
name_match = re.search(r'Klasse[_\s]*\d+[-a-zA-Z0-9]*[_\s]+([A-Z][a-z]+)(?:[_\s]|$)', name)
|
|
if name_match:
|
|
potential_name = name_match.group(1)
|
|
# Exclude common non-name words
|
|
if potential_name not in ['Seite', 'Nachschreiben', 'Teil', 'Aufgabe']:
|
|
result['student_name'] = potential_name
|
|
|
|
return result
|
|
|
|
|
|
async def _extract_with_ollama(content: bytes, filename: str) -> Optional[dict]:
|
|
"""
|
|
Use Ollama (local or Mac Mini) to extract information from exam content.
|
|
|
|
Tries local Ollama first, then Mac Mini if configured.
|
|
"""
|
|
import httpx
|
|
import base64
|
|
|
|
# Ollama endpoints to try
|
|
ollama_endpoints = [
|
|
"http://localhost:11434", # Local
|
|
"http://192.168.178.163:11434", # Mac Mini
|
|
]
|
|
|
|
# Convert PDF first page to image if needed
|
|
image_data = None
|
|
if filename.lower().endswith('.pdf'):
|
|
try:
|
|
# Try to extract first page as image
|
|
# This requires pdf2image or PyMuPDF
|
|
image_data = await _pdf_to_image(content)
|
|
except Exception as e:
|
|
logger.warning(f"PDF conversion failed: {e}")
|
|
return None
|
|
elif filename.lower().endswith(('.png', '.jpg', '.jpeg')):
|
|
image_data = content
|
|
|
|
if not image_data:
|
|
return None
|
|
|
|
# Create prompt for extraction
|
|
prompt = """Analysiere dieses Bild einer Klausur/Klassenarbeit und extrahiere folgende Informationen im JSON-Format:
|
|
{
|
|
"student_name": "Vorname des Schülers (falls sichtbar)",
|
|
"last_name_hint": "Anfangsbuchstabe des Nachnamens (z.B. 'M.' falls sichtbar)",
|
|
"grade": "Note falls eingetragen (z.B. '2+', '3', '5-')",
|
|
"score": Punktzahl als Zahl (falls vorhanden),
|
|
"max_score": Maximale Punktzahl als Zahl (falls vorhanden),
|
|
"question_scores": [{"question": 1, "score": 5, "max": 10}],
|
|
"confidence": Konfidenz 0.0-1.0
|
|
}
|
|
Antworte NUR mit dem JSON, kein zusätzlicher Text."""
|
|
|
|
# Try each endpoint
|
|
for endpoint in ollama_endpoints:
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
# Check if vision model is available
|
|
response = await client.get(f"{endpoint}/api/tags")
|
|
if response.status_code != 200:
|
|
continue
|
|
|
|
models = response.json().get('models', [])
|
|
# Prefer vision models: llava, bakllava, moondream, qwen2-vl
|
|
vision_model = None
|
|
for m in models:
|
|
name = m.get('name', '').lower()
|
|
if any(vm in name for vm in ['llava', 'moondream', 'qwen', 'vision']):
|
|
vision_model = m['name']
|
|
break
|
|
|
|
# Fall back to text model with OCR
|
|
model = vision_model or (models[0]['name'] if models else None)
|
|
if not model:
|
|
continue
|
|
|
|
# Call Ollama
|
|
request_data = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": False
|
|
}
|
|
|
|
if vision_model and image_data:
|
|
request_data["images"] = [base64.b64encode(image_data).decode()]
|
|
|
|
response = await client.post(
|
|
f"{endpoint}/api/generate",
|
|
json=request_data
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result_text = response.json().get('response', '')
|
|
# Parse JSON from response
|
|
import json
|
|
try:
|
|
# Extract JSON from response
|
|
json_match = re.search(r'\{[^}]+\}', result_text, re.DOTALL)
|
|
if json_match:
|
|
return json.loads(json_match.group())
|
|
except json.JSONDecodeError:
|
|
logger.warning(f"Failed to parse LLM response as JSON")
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Ollama endpoint {endpoint} failed: {e}")
|
|
continue
|
|
|
|
return None
|
|
|
|
|
|
async def _pdf_to_image(content: bytes) -> Optional[bytes]:
|
|
"""Convert first page of PDF to PNG image."""
|
|
try:
|
|
import fitz # PyMuPDF
|
|
doc = fitz.open(stream=content, filetype="pdf")
|
|
page = doc[0]
|
|
pix = page.get_pixmap(dpi=150)
|
|
return pix.tobytes("png")
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
from pdf2image import convert_from_bytes
|
|
images = convert_from_bytes(content, first_page=1, last_page=1, dpi=150)
|
|
if images:
|
|
from io import BytesIO
|
|
buffer = BytesIO()
|
|
images[0].save(buffer, format='PNG')
|
|
return buffer.getvalue()
|
|
except ImportError:
|
|
pass
|
|
|
|
return None
|
|
|
|
|
|
@router.post("/magic-onboarding/analyze", response_model=MagicAnalysisResponse)
|
|
async def magic_analyze(
|
|
data: MagicAnalysisRequest,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Phase 1: Store client-side analysis results and prepare for confirmation.
|
|
|
|
The actual header extraction happens client-side using the local LLM.
|
|
This endpoint stores the results and provides school cascade data.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
resolver = get_school_resolver()
|
|
|
|
# Create onboarding session
|
|
onboarding = OnboardingSession(
|
|
teacher_id=teacher_id,
|
|
detected_class=data.detected_class,
|
|
detected_subject=data.detected_subject,
|
|
detected_student_count=len(data.students),
|
|
detection_confidence=int(data.confidence * 100),
|
|
status=OnboardingStatus.CONFIRMING
|
|
)
|
|
onboarding.analysis_completed_at = datetime.utcnow()
|
|
db.add(onboarding)
|
|
|
|
# Store detected students
|
|
for student_data in data.students:
|
|
student = DetectedStudent(
|
|
onboarding_session_id=onboarding.id,
|
|
detected_first_name=student_data.get('firstName'),
|
|
detected_last_name_hint=student_data.get('lastNameHint'),
|
|
confidence=int(student_data.get('confidence', 0) * 100)
|
|
)
|
|
db.add(student)
|
|
|
|
db.commit()
|
|
db.refresh(onboarding)
|
|
|
|
# Get teacher's existing classes
|
|
existing_classes = await resolver.get_classes_for_teacher(teacher_id)
|
|
|
|
return MagicAnalysisResponse(
|
|
onboarding_id=onboarding.id,
|
|
detected_class=onboarding.detected_class,
|
|
detected_subject=onboarding.detected_subject,
|
|
detected_date=data.detected_date,
|
|
student_count=onboarding.detected_student_count,
|
|
confidence=data.confidence,
|
|
bundeslaender=BUNDESLAENDER,
|
|
schulformen={k: v['name'] for k, v in SCHULFORMEN.items()},
|
|
existing_classes=[{
|
|
'id': c.id,
|
|
'name': c.name,
|
|
'grade_level': c.grade_level
|
|
} for c in existing_classes]
|
|
)
|
|
|
|
|
|
@router.post("/magic-onboarding/upload-roster", response_model=RosterUploadResponse)
|
|
async def upload_roster(
|
|
onboarding_id: str = Query(...),
|
|
file: UploadFile = File(...),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Phase 2a: Upload Klassenbuch photo or roster file.
|
|
|
|
Parses the uploaded file and matches names to detected students.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
parser = get_roster_parser()
|
|
|
|
# Get onboarding session
|
|
onboarding = db.query(OnboardingSession).filter(
|
|
OnboardingSession.id == onboarding_id,
|
|
OnboardingSession.teacher_id == teacher_id
|
|
).first()
|
|
|
|
if not onboarding:
|
|
raise HTTPException(status_code=404, detail="Onboarding session not found")
|
|
|
|
# Read file
|
|
content = await file.read()
|
|
filename = file.filename.lower()
|
|
|
|
# Parse based on file type
|
|
if filename.endswith(('.png', '.jpg', '.jpeg')):
|
|
roster = parser.parse_klassenbuch_image(content)
|
|
elif filename.endswith('.pdf'):
|
|
roster = parser.parse_pdf_roster(content)
|
|
elif filename.endswith('.csv'):
|
|
roster = parser.parse_csv_roster(content.decode('utf-8'))
|
|
else:
|
|
raise HTTPException(status_code=400, detail="Unsupported file format")
|
|
|
|
# Get detected students
|
|
detected_students = db.query(DetectedStudent).filter(
|
|
DetectedStudent.onboarding_session_id == onboarding_id
|
|
).all()
|
|
|
|
detected_names = [s.detected_first_name for s in detected_students if s.detected_first_name]
|
|
|
|
# Match names
|
|
matches = parser.match_first_names(detected_names, roster.entries)
|
|
|
|
# Update detected students with matched data
|
|
matched_count = 0
|
|
for match in matches:
|
|
if match.matched_entry and match.confidence > 0.7:
|
|
for student in detected_students:
|
|
if student.detected_first_name == match.detected_name:
|
|
student.confirmed_first_name = match.matched_entry.first_name
|
|
student.confirmed_last_name = match.matched_entry.last_name
|
|
student.parent_email = match.matched_entry.parent_email
|
|
student.parent_phone = match.matched_entry.parent_phone
|
|
matched_count += 1
|
|
break
|
|
|
|
db.commit()
|
|
|
|
return RosterUploadResponse(
|
|
parsed_count=len(roster.entries),
|
|
matched_count=matched_count,
|
|
entries=[{
|
|
'firstName': e.first_name,
|
|
'lastName': e.last_name,
|
|
'parentEmail': e.parent_email,
|
|
'parentPhone': e.parent_phone,
|
|
'matched': any(
|
|
m.matched_entry and m.matched_entry.first_name == e.first_name
|
|
for m in matches
|
|
)
|
|
} for e in roster.entries],
|
|
warnings=roster.warnings
|
|
)
|
|
|
|
|
|
@router.post("/magic-onboarding/confirm", response_model=OnboardingConfirmResponse)
|
|
async def confirm_onboarding(
|
|
data: OnboardingConfirmRequest,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Phase 2b: Confirm onboarding data and create class/session.
|
|
|
|
Creates the school class (if requested) and exam session.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
resolver = get_school_resolver()
|
|
|
|
# Get onboarding session
|
|
onboarding = db.query(OnboardingSession).filter(
|
|
OnboardingSession.id == data.onboarding_id,
|
|
OnboardingSession.teacher_id == teacher_id
|
|
).first()
|
|
|
|
if not onboarding:
|
|
raise HTTPException(status_code=404, detail="Onboarding session not found")
|
|
|
|
# Update school context
|
|
onboarding.bundesland = data.bundesland
|
|
onboarding.schulform = data.schulform
|
|
onboarding.school_name = data.school_name
|
|
onboarding.confirmed_class = data.class_name
|
|
onboarding.confirmed_subject = data.subject
|
|
onboarding.confirmation_completed_at = datetime.utcnow()
|
|
|
|
class_id = data.link_to_existing_class_id
|
|
|
|
# Create class if requested
|
|
if data.create_class and not class_id:
|
|
from .services.school_resolver import DetectedClassInfo
|
|
|
|
# Get or create school
|
|
school = await resolver.get_or_create_school(
|
|
teacher_id=teacher_id,
|
|
bundesland=data.bundesland,
|
|
schulform=data.schulform,
|
|
school_name=data.school_name
|
|
)
|
|
onboarding.linked_school_id = school.id
|
|
|
|
# Create class
|
|
class_info = DetectedClassInfo(
|
|
class_name=data.class_name,
|
|
students=data.students
|
|
)
|
|
school_class = await resolver.auto_create_class(
|
|
teacher_id=teacher_id,
|
|
school_id=school.id,
|
|
detected_info=class_info
|
|
)
|
|
class_id = school_class.id
|
|
onboarding.linked_class_id = class_id
|
|
|
|
# Create exam session
|
|
session = repo.create_session(
|
|
teacher_id=teacher_id,
|
|
name=f"{data.subject} - {data.class_name}",
|
|
subject=data.subject,
|
|
class_name=data.class_name,
|
|
total_points=100
|
|
)
|
|
session.linked_school_class_id = class_id
|
|
onboarding.klausur_session_id = session.id
|
|
onboarding.status = OnboardingStatus.PROCESSING
|
|
|
|
# Update detected students with confirmed data
|
|
for student_data in data.students:
|
|
# Update or create detected student
|
|
first_name = student_data.get('firstName')
|
|
if first_name:
|
|
student = db.query(DetectedStudent).filter(
|
|
DetectedStudent.onboarding_session_id == data.onboarding_id,
|
|
DetectedStudent.detected_first_name == first_name
|
|
).first()
|
|
|
|
if student:
|
|
student.confirmed_first_name = first_name
|
|
student.confirmed_last_name = student_data.get('lastName', '')
|
|
student.parent_email = student_data.get('parentEmail')
|
|
student.parent_phone = student_data.get('parentPhone')
|
|
|
|
db.commit()
|
|
|
|
return OnboardingConfirmResponse(
|
|
session_id=session.id,
|
|
onboarding_id=onboarding.id,
|
|
class_id=class_id,
|
|
student_count=len(data.students),
|
|
ready_for_correction=True
|
|
)
|
|
|
|
|
|
@router.post("/magic-onboarding/start-correction")
|
|
async def start_magic_correction(
|
|
data: MagicCorrectionRequest,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Phase 3: Start background correction.
|
|
|
|
Triggers the AI correction process for all uploaded documents.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
|
|
# Get onboarding session
|
|
onboarding = db.query(OnboardingSession).filter(
|
|
OnboardingSession.id == data.onboarding_id,
|
|
OnboardingSession.teacher_id == teacher_id
|
|
).first()
|
|
|
|
if not onboarding:
|
|
raise HTTPException(status_code=404, detail="Onboarding session not found")
|
|
|
|
if not onboarding.klausur_session_id:
|
|
raise HTTPException(status_code=400, detail="Session not confirmed yet")
|
|
|
|
onboarding.processing_started_at = datetime.utcnow()
|
|
db.commit()
|
|
|
|
# The actual correction is triggered via the existing /sessions/{id}/process endpoint
|
|
return {
|
|
"status": "started",
|
|
"session_id": onboarding.klausur_session_id,
|
|
"onboarding_id": onboarding.id,
|
|
"message": "Korrektur gestartet. Verwende /sessions/{id}/progress-stream fuer Updates."
|
|
}
|
|
|
|
|
|
@router.get("/sessions/{session_id}/results-with-links", response_model=ResultsWithLinksResponse)
|
|
async def get_results_with_links(
|
|
session_id: str,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Phase 4: Get results with module links.
|
|
|
|
Returns correction results along with suggestions for module linking.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
linker = get_module_linker()
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
# Get documents
|
|
documents = repo.list_documents(session_id, teacher_id)
|
|
completed_docs = [d for d in documents if d.status == DocumentStatus.COMPLETED]
|
|
|
|
# Build correction results
|
|
results = []
|
|
correction_results = [] # For linker
|
|
|
|
for doc in completed_docs:
|
|
result = CorrectionResultResponse(
|
|
doc_token=doc.doc_token,
|
|
total_score=doc.ai_score or 0,
|
|
max_score=session.total_points,
|
|
grade=doc.ai_grade or "",
|
|
overall_feedback=doc.ai_feedback or "",
|
|
question_results=doc.ai_details.get('question_results', []) if doc.ai_details else []
|
|
)
|
|
results.append(result)
|
|
|
|
correction_results.append(CorrectionResult(
|
|
doc_token=doc.doc_token,
|
|
score=float(doc.ai_score or 0),
|
|
max_score=float(session.total_points),
|
|
grade=doc.ai_grade or "",
|
|
feedback=doc.ai_feedback or ""
|
|
))
|
|
|
|
# Calculate statistics
|
|
stats = linker.calculate_grade_statistics(correction_results)
|
|
|
|
# Get existing module links
|
|
links = db.query(ModuleLink).filter(
|
|
ModuleLink.klausur_session_id == session_id
|
|
).all()
|
|
|
|
# Generate parent meeting suggestions
|
|
meeting_suggestions = linker.suggest_elternabend(
|
|
results=correction_results,
|
|
subject=session.subject
|
|
)
|
|
|
|
return ResultsWithLinksResponse(
|
|
results=results,
|
|
statistics=stats,
|
|
module_links=[{
|
|
'id': link.id,
|
|
'type': link.link_type.value,
|
|
'module': link.target_module,
|
|
'url': link.target_url
|
|
} for link in links],
|
|
parent_meeting_suggestions=[{
|
|
'doc_token': s.doc_token,
|
|
'reason': s.reason,
|
|
'urgency': s.urgency.value,
|
|
'grade': s.grade,
|
|
'topics': s.suggested_topics
|
|
} for s in meeting_suggestions]
|
|
)
|
|
|
|
|
|
@router.post("/sessions/{session_id}/link-to-module")
|
|
async def create_module_link(
|
|
session_id: str,
|
|
link_type: str = Query(..., description="notenbuch, elternabend, zeugnis, calendar"),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Phase 4: Create a link to another module.
|
|
|
|
Creates the actual connection to Notenbuch, Elternabend, etc.
|
|
"""
|
|
teacher_id = get_teacher_id()
|
|
repo = KlausurRepository(db)
|
|
linker = get_module_linker()
|
|
|
|
session = repo.get_session(session_id, teacher_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
# Get documents
|
|
documents = repo.list_documents(session_id, teacher_id)
|
|
completed_docs = [d for d in documents if d.status == DocumentStatus.COMPLETED]
|
|
|
|
# Build correction results
|
|
correction_results = [
|
|
CorrectionResult(
|
|
doc_token=doc.doc_token,
|
|
score=float(doc.ai_score or 0),
|
|
max_score=float(session.total_points),
|
|
grade=doc.ai_grade or "",
|
|
feedback=doc.ai_feedback or ""
|
|
)
|
|
for doc in completed_docs
|
|
]
|
|
|
|
result = None
|
|
|
|
if link_type == "notenbuch":
|
|
result = await linker.link_to_notenbuch(
|
|
session_id=session_id,
|
|
class_id=session.linked_school_class_id or "",
|
|
subject=session.subject,
|
|
results=correction_results,
|
|
exam_name=session.name,
|
|
exam_date=session.created_at.strftime("%Y-%m-%d")
|
|
)
|
|
|
|
elif link_type == "elternabend":
|
|
suggestions = linker.suggest_elternabend(
|
|
results=correction_results,
|
|
subject=session.subject
|
|
)
|
|
result = await linker.create_elternabend_link(
|
|
session_id=session_id,
|
|
suggestions=suggestions,
|
|
teacher_id=teacher_id
|
|
)
|
|
|
|
elif link_type == "zeugnis":
|
|
grades = {r.doc_token: r.grade for r in correction_results}
|
|
result = await linker.update_zeugnis(
|
|
class_id=session.linked_school_class_id or "",
|
|
subject=session.subject,
|
|
grades=grades
|
|
)
|
|
|
|
elif link_type == "calendar":
|
|
suggestions = linker.suggest_elternabend(
|
|
results=correction_results,
|
|
subject=session.subject
|
|
)
|
|
events = await linker.create_calendar_events(
|
|
teacher_id=teacher_id,
|
|
meetings=suggestions
|
|
)
|
|
result = type('obj', (object,), {
|
|
'success': len(events) > 0,
|
|
'message': f"{len(events)} Kalendereintraege erstellt"
|
|
})()
|
|
|
|
else:
|
|
raise HTTPException(status_code=400, detail=f"Unknown link type: {link_type}")
|
|
|
|
if result and result.success:
|
|
# Store the link
|
|
link = ModuleLink(
|
|
klausur_session_id=session_id,
|
|
link_type=ModuleLinkType(link_type),
|
|
target_module=link_type,
|
|
target_entity_id=getattr(result, 'link', {}).target_entity_id if hasattr(result, 'link') and result.link else "",
|
|
target_url=getattr(result, 'target_url', None)
|
|
)
|
|
db.add(link)
|
|
db.commit()
|
|
|
|
return {
|
|
"success": result.success if result else False,
|
|
"message": result.message if result else "Unknown error",
|
|
"target_url": getattr(result, 'target_url', None) if result else None
|
|
}
|
|
|
|
|
|
@router.get("/school-data/bundeslaender")
|
|
async def get_bundeslaender():
|
|
"""Get list of German federal states."""
|
|
return {"bundeslaender": BUNDESLAENDER}
|
|
|
|
|
|
@router.get("/school-data/schulformen")
|
|
async def get_schulformen():
|
|
"""Get list of school types."""
|
|
return {"schulformen": {k: v['name'] for k, v in SCHULFORMEN.items()}}
|
|
|
|
|
|
@router.get("/school-data/faecher")
|
|
async def get_faecher():
|
|
"""Get list of subjects."""
|
|
return {"faecher": {k: v['name'] for k, v in FAECHER.items()}}
|
|
|
|
|
|
# =============================================================================
|
|
# TrOCR HANDWRITING RECOGNITION ENDPOINTS
|
|
# =============================================================================
|
|
|
|
class TrOCRExtractRequest(BaseModel):
|
|
"""Request for TrOCR text extraction."""
|
|
detect_lines: bool = Field(default=True, description="Detect and process text lines separately")
|
|
|
|
|
|
class TrOCRTrainingRequest(BaseModel):
|
|
"""Request to add a training example."""
|
|
ground_truth: str = Field(..., min_length=1, description="Correct text for the image")
|
|
|
|
|
|
class TrOCRFineTuneRequest(BaseModel):
|
|
"""Request to start fine-tuning."""
|
|
epochs: int = Field(default=3, ge=1, le=10)
|
|
learning_rate: float = Field(default=5e-5, gt=0, lt=1)
|
|
|
|
|
|
@router.post("/trocr/extract")
|
|
async def trocr_extract(
|
|
file: UploadFile = File(...),
|
|
detect_lines: bool = Query(default=True),
|
|
teacher_id: str = Query(default="teacher_1")
|
|
):
|
|
"""
|
|
Extract handwritten text from an image using TrOCR.
|
|
|
|
This endpoint uses Microsoft's TrOCR model optimized for handwriting.
|
|
Processing happens on Mac Mini TrOCR service - no cloud, only local network.
|
|
|
|
Args:
|
|
file: Image file (PNG, JPG)
|
|
detect_lines: If True, detect individual text lines
|
|
teacher_id: Teacher ID for logging
|
|
|
|
Returns:
|
|
Extracted text with confidence scores
|
|
"""
|
|
# Try remote TrOCR client first (Mac Mini)
|
|
try:
|
|
from .services.trocr_client import get_trocr_client
|
|
|
|
client = get_trocr_client()
|
|
|
|
if await client.is_available():
|
|
content = await file.read()
|
|
result = await client.extract_text(
|
|
content,
|
|
filename=file.filename or "image.png",
|
|
detect_lines=detect_lines
|
|
)
|
|
|
|
return {
|
|
"text": result.text,
|
|
"confidence": result.confidence,
|
|
"bounding_boxes": [],
|
|
"processing_time_ms": result.processing_time_ms,
|
|
"model": "trocr-base-handwritten",
|
|
"device": result.device,
|
|
"service": "mac-mini"
|
|
}
|
|
except Exception as e:
|
|
logger.warning(f"Remote TrOCR client failed: {e}")
|
|
|
|
# Fallback to local TrOCR service
|
|
try:
|
|
from .services.trocr_service import get_trocr_service
|
|
|
|
service = get_trocr_service()
|
|
content = await file.read()
|
|
result = await service.extract_text(content, detect_lines=detect_lines)
|
|
|
|
return {
|
|
"text": result.text,
|
|
"confidence": result.confidence,
|
|
"bounding_boxes": result.bounding_boxes,
|
|
"processing_time_ms": result.processing_time_ms,
|
|
"model": service.model_name,
|
|
"has_lora_adapter": service._lora_adapter is not None,
|
|
"service": "local"
|
|
}
|
|
|
|
except ImportError as e:
|
|
logger.error(f"TrOCR not available locally or remotely: {e}")
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="TrOCR not available. Mac Mini service unreachable and local dependencies missing."
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"TrOCR extraction failed: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/trocr/batch-extract")
|
|
async def trocr_batch_extract(
|
|
files: List[UploadFile] = File(...),
|
|
detect_lines: bool = Query(default=True),
|
|
teacher_id: str = Query(default="teacher_1")
|
|
):
|
|
"""
|
|
Extract handwritten text from multiple images.
|
|
|
|
Args:
|
|
files: List of image files
|
|
detect_lines: If True, detect individual text lines
|
|
teacher_id: Teacher ID for logging
|
|
|
|
Returns:
|
|
List of extraction results
|
|
"""
|
|
try:
|
|
from .services.trocr_service import get_trocr_service
|
|
|
|
service = get_trocr_service()
|
|
|
|
# Read all files
|
|
images = [await f.read() for f in files]
|
|
|
|
# Extract from all
|
|
results = await service.batch_extract(images, detect_lines=detect_lines)
|
|
|
|
return {
|
|
"results": [
|
|
{
|
|
"filename": files[i].filename,
|
|
"text": r.text,
|
|
"confidence": r.confidence,
|
|
"processing_time_ms": r.processing_time_ms
|
|
}
|
|
for i, r in enumerate(results)
|
|
],
|
|
"total_files": len(files),
|
|
"model": service.model_name
|
|
}
|
|
|
|
except ImportError as e:
|
|
raise HTTPException(status_code=503, detail=f"TrOCR not available: {e}")
|
|
except Exception as e:
|
|
logger.error(f"TrOCR batch extraction failed: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/trocr/training/add")
|
|
async def trocr_add_training_example(
|
|
file: UploadFile = File(...),
|
|
ground_truth: str = Query(..., min_length=1),
|
|
teacher_id: str = Query(default="teacher_1")
|
|
):
|
|
"""
|
|
Add a training example for TrOCR fine-tuning.
|
|
|
|
When a teacher corrects OCR output, submit the correction here
|
|
to improve future recognition accuracy.
|
|
|
|
Args:
|
|
file: Image file with handwritten text
|
|
ground_truth: The correct text (teacher-corrected)
|
|
teacher_id: Teacher ID (for tracking)
|
|
|
|
Returns:
|
|
Example ID
|
|
"""
|
|
try:
|
|
from .services.trocr_service import get_trocr_service
|
|
|
|
service = get_trocr_service()
|
|
|
|
# Read file
|
|
content = await file.read()
|
|
|
|
# Add training example
|
|
example_id = service.add_training_example(
|
|
image_data=content,
|
|
ground_truth=ground_truth,
|
|
teacher_id=teacher_id
|
|
)
|
|
|
|
info = service.get_model_info()
|
|
|
|
return {
|
|
"example_id": example_id,
|
|
"ground_truth": ground_truth,
|
|
"teacher_id": teacher_id,
|
|
"total_examples": info["training_examples_count"],
|
|
"message": "Training example added successfully"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to add training example: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post("/trocr/training/fine-tune")
|
|
async def trocr_fine_tune(
|
|
request: TrOCRFineTuneRequest,
|
|
teacher_id: str = Query(default=None)
|
|
):
|
|
"""
|
|
Start fine-tuning TrOCR with collected training examples.
|
|
|
|
Uses LoRA for efficient fine-tuning. Requires at least 10 training examples.
|
|
|
|
Args:
|
|
request: Fine-tuning parameters
|
|
teacher_id: If provided, only use examples from this teacher
|
|
|
|
Returns:
|
|
Training results
|
|
"""
|
|
try:
|
|
from .services.trocr_service import get_trocr_service
|
|
|
|
service = get_trocr_service()
|
|
|
|
# Run fine-tuning
|
|
result = await service.fine_tune(
|
|
teacher_id=teacher_id,
|
|
epochs=request.epochs,
|
|
learning_rate=request.learning_rate
|
|
)
|
|
|
|
return result
|
|
|
|
except ImportError as e:
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail=f"Fine-tuning dependencies not installed: {e}. Install with: pip install peft"
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Fine-tuning failed: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/trocr/training/examples")
|
|
async def trocr_list_training_examples(
|
|
teacher_id: str = Query(default=None)
|
|
):
|
|
"""
|
|
List training examples.
|
|
|
|
Args:
|
|
teacher_id: If provided, filter by teacher
|
|
|
|
Returns:
|
|
List of training examples
|
|
"""
|
|
try:
|
|
from .services.trocr_service import get_trocr_service
|
|
|
|
service = get_trocr_service()
|
|
examples = service.get_training_examples(teacher_id)
|
|
|
|
return {
|
|
"examples": [
|
|
{
|
|
"image_path": e.image_path,
|
|
"ground_truth": e.ground_truth[:100] + "..." if len(e.ground_truth) > 100 else e.ground_truth,
|
|
"teacher_id": e.teacher_id,
|
|
"created_at": e.created_at
|
|
}
|
|
for e in examples
|
|
],
|
|
"total": len(examples)
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to list training examples: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/trocr/status")
|
|
async def trocr_status():
|
|
"""
|
|
Get TrOCR model status and info.
|
|
|
|
Returns:
|
|
Model information including device, adapter status, etc.
|
|
"""
|
|
result = {
|
|
"status": "unavailable",
|
|
"services": {}
|
|
}
|
|
|
|
# Check Mac Mini TrOCR service
|
|
try:
|
|
from .services.trocr_client import get_trocr_client
|
|
|
|
client = get_trocr_client()
|
|
if await client.is_available():
|
|
remote_status = await client.get_status()
|
|
result["services"]["mac_mini"] = {
|
|
"status": "available",
|
|
**remote_status
|
|
}
|
|
result["status"] = "available"
|
|
result["primary_service"] = "mac_mini"
|
|
except Exception as e:
|
|
result["services"]["mac_mini"] = {
|
|
"status": "error",
|
|
"error": str(e)
|
|
}
|
|
|
|
# Check local TrOCR service
|
|
try:
|
|
from .services.trocr_service import get_trocr_service
|
|
|
|
service = get_trocr_service()
|
|
info = service.get_model_info()
|
|
result["services"]["local"] = {
|
|
"status": "available",
|
|
**info
|
|
}
|
|
if result["status"] != "available":
|
|
result["status"] = "available"
|
|
result["primary_service"] = "local"
|
|
|
|
except ImportError as e:
|
|
result["services"]["local"] = {
|
|
"status": "not_installed",
|
|
"error": str(e)
|
|
}
|
|
except Exception as e:
|
|
result["services"]["local"] = {
|
|
"status": "error",
|
|
"error": str(e)
|
|
}
|
|
|
|
return result
|