feat: BreakPilot PWA - Full codebase (clean push without large binaries)

All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00
commit 19855efacc
2512 changed files with 933814 additions and 0 deletions
--- a/backend/klausur/init.py
+++ b/backend/klausur/init.py
@@ -0,0 +1,54 @@
+"""
+Klausurkorrektur Module - Privacy-by-Design Exam Correction.
+
+DSGVO-compliant exam correction with QR-code based pseudonymization.
+No personal data is sent to the LLM.
+
+Architecture:
+- Pseudonymization via doc_token (128-bit UUID)
+- Teacher namespace isolation
+- Self-hosted LLM at SysEleven
+- Zero-knowledge identity mapping (encrypted client-side)
+"""
+
+from .db_models import (
+    ExamSession, PseudonymizedDocument, QRBatchJob,
+    SessionStatus, DocumentStatus,
+    # Magic Onboarding
+    OnboardingSession, DetectedStudent, ModuleLink,
+    OnboardingStatus, ModuleLinkType
+)
+from .repository import KlausurRepository
+from .database import get_db, init_db
+
+# Services
+from .services.roster_parser import RosterParser, get_roster_parser
+from .services.school_resolver import SchoolResolver, get_school_resolver
+from .services.module_linker import ModuleLinker, get_module_linker
+
+__all__ = [
+    # Models
+    "ExamSession",
+    "PseudonymizedDocument",
+    "QRBatchJob",
+    "SessionStatus",
+    "DocumentStatus",
+    # Magic Onboarding Models
+    "OnboardingSession",
+    "DetectedStudent",
+    "ModuleLink",
+    "OnboardingStatus",
+    "ModuleLinkType",
+    # Repository
+    "KlausurRepository",
+    # Database
+    "get_db",
+    "init_db",
+    # Services
+    "RosterParser",
+    "get_roster_parser",
+    "SchoolResolver",
+    "get_school_resolver",
+    "ModuleLinker",
+    "get_module_linker",
+]
--- a/backend/klausur/database.py
+++ b/backend/klausur/database.py
@@ -0,0 +1,47 @@
+"""
+Database Configuration for Klausur Module.
+
+Uses the same PostgreSQL database as the main backend.
+"""
+import os
+from sqlalchemy import create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+
+# Database URL from environment (uses same DB as Backend)
+_raw_url = os.getenv(
+    "DATABASE_URL",
+    "postgresql://breakpilot:breakpilot123@localhost:5432/breakpilot"
+)
+# SQLAlchemy 2.0 requires "postgresql://" instead of "postgres://"
+DATABASE_URL = _raw_url.replace("postgres://", "postgresql://", 1) if _raw_url.startswith("postgres://") else _raw_url
+
+# Engine configuration
+engine = create_engine(
+    DATABASE_URL,
+    pool_pre_ping=True,
+    pool_size=5,
+    max_overflow=10,
+    echo=os.getenv("SQL_ECHO", "false").lower() == "true"
+)
+
+# Declarative Base
+Base = declarative_base()
+
+# Session factory
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+
+def get_db():
+    """Database dependency for FastAPI endpoints."""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+def init_db():
+    """Creates all tables (for development)."""
+    from . import db_models  # Import models to register them
+    Base.metadata.create_all(bind=engine)
--- a/backend/klausur/db_models.py
+++ b/backend/klausur/db_models.py
@@ -0,0 +1,377 @@
+"""
+SQLAlchemy Database Models for Klausurkorrektur Module.
+
+Privacy-by-Design: No personal data (student names) is stored in these models.
+Only pseudonymized doc_tokens are used to reference exam documents.
+"""
+from datetime import datetime
+from sqlalchemy import (
+    Column, String, Integer, DateTime, JSON,
+    Boolean, Text, Enum as SQLEnum, ForeignKey, LargeBinary
+)
+from sqlalchemy.orm import relationship
+import enum
+import uuid
+
+from .database import Base
+
+
+class SessionStatus(str, enum.Enum):
+    """Status of an exam correction session."""
+    CREATED = "created"           # Session created, awaiting uploads
+    UPLOADING = "uploading"       # Documents being uploaded
+    PROCESSING = "processing"     # OCR and AI correction in progress
+    COMPLETED = "completed"       # All documents processed
+    ARCHIVED = "archived"         # Session archived (data retention)
+    DELETED = "deleted"           # Soft delete
+
+
+class OnboardingStatus(str, enum.Enum):
+    """Status of a magic onboarding session."""
+    ANALYZING = "analyzing"       # Local LLM extracting headers
+    CONFIRMING = "confirming"     # User confirming detected data
+    PROCESSING = "processing"     # Cloud LLM correcting exams
+    LINKING = "linking"           # Creating module links
+    COMPLETE = "complete"         # Onboarding finished
+
+
+class ModuleLinkType(str, enum.Enum):
+    """Type of cross-module link."""
+    NOTENBUCH = "notenbuch"       # Link to grade book
+    ELTERNABEND = "elternabend"   # Link to parent meetings
+    ZEUGNIS = "zeugnis"           # Link to certificates
+    CALENDAR = "calendar"         # Link to calendar events
+    KLASSENBUCH = "klassenbuch"   # Link to class book
+
+
+class DocumentStatus(str, enum.Enum):
+    """Status of a single pseudonymized document."""
+    UPLOADED = "uploaded"         # Document uploaded, awaiting OCR
+    OCR_PROCESSING = "ocr_processing"  # OCR in progress
+    OCR_COMPLETED = "ocr_completed"    # OCR done, awaiting AI correction
+    AI_PROCESSING = "ai_processing"    # AI correction in progress
+    COMPLETED = "completed"       # Fully processed
+    FAILED = "failed"             # Processing failed
+
+
+class ExamSession(Base):
+    """
+    Exam Correction Session.
+
+    Groups multiple pseudonymized documents for a single exam correction task.
+    No personal data is stored - teacher_id is the only identifying info.
+    """
+    __tablename__ = 'klausur_sessions'
+
+    # Primary Key
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+
+    # Teacher isolation (mandatory)
+    teacher_id = Column(String(100), nullable=False, index=True)
+
+    # Session metadata
+    name = Column(String(200), nullable=False)  # e.g., "Mathe 10a - Klausur 1"
+    subject = Column(String(100), default="")
+    class_name = Column(String(100), default="")  # e.g., "10a"
+
+    # Exam configuration
+    total_points = Column(Integer, default=100)
+    rubric = Column(Text, default="")  # Bewertungskriterien
+    questions = Column(JSON, default=list)  # [{question, points, rubric}]
+
+    # Status
+    status = Column(
+        SQLEnum(SessionStatus),
+        default=SessionStatus.CREATED,
+        nullable=False,
+        index=True
+    )
+
+    # Statistics (anonymized)
+    document_count = Column(Integer, default=0)
+    processed_count = Column(Integer, default=0)
+
+    # Encrypted identity map (only teacher can decrypt)
+    # This is stored encrypted with teacher's password
+    encrypted_identity_map = Column(LargeBinary, nullable=True)
+    identity_map_iv = Column(String(64), nullable=True)  # IV for AES decryption
+
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    completed_at = Column(DateTime, nullable=True)
+
+    # Data retention: auto-delete after this date
+    retention_until = Column(DateTime, nullable=True)
+
+    # Magic Onboarding: Link to school class (optional)
+    linked_school_class_id = Column(String(36), nullable=True)
+    linked_subject_id = Column(String(36), nullable=True)
+
+    # Relationship to documents
+    documents = relationship(
+        "PseudonymizedDocument",
+        back_populates="session",
+        cascade="all, delete-orphan"
+    )
+
+    def __repr__(self):
+        return f"<ExamSession {self.id[:8]}: {self.name} ({self.status.value})>"
+
+
+class PseudonymizedDocument(Base):
+    """
+    Pseudonymized Exam Document.
+
+    PRIVACY DESIGN:
+    - doc_token is a 128-bit random UUID, NOT derivable from student identity
+    - No student name or personal info is stored here
+    - Identity mapping is stored encrypted in ExamSession.encrypted_identity_map
+    - The backend CANNOT de-pseudonymize documents
+
+    Only the teacher (with their encryption key) can map doc_token -> student name.
+    """
+    __tablename__ = 'klausur_documents'
+
+    # Primary Key: The pseudonymization token
+    doc_token = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+
+    # Session relationship
+    session_id = Column(String(36), ForeignKey('klausur_sessions.id'), nullable=False, index=True)
+
+    # Processing status
+    status = Column(
+        SQLEnum(DocumentStatus),
+        default=DocumentStatus.UPLOADED,
+        nullable=False,
+        index=True
+    )
+
+    # Page info
+    page_number = Column(Integer, default=1)
+    total_pages = Column(Integer, default=1)
+
+    # OCR result (redacted - no header/name visible)
+    ocr_text = Column(Text, default="")
+    ocr_confidence = Column(Integer, default=0)  # 0-100
+
+    # AI correction result (pseudonymized)
+    ai_feedback = Column(Text, default="")
+    ai_score = Column(Integer, nullable=True)  # Points achieved
+    ai_grade = Column(String(10), nullable=True)  # e.g., "2+" or "B"
+    ai_details = Column(JSON, default=dict)  # Per-question scores
+
+    # Processing metadata
+    processing_started_at = Column(DateTime, nullable=True)
+    processing_completed_at = Column(DateTime, nullable=True)
+    processing_error = Column(Text, nullable=True)
+
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    # Relationship
+    session = relationship("ExamSession", back_populates="documents")
+
+    def __repr__(self):
+        return f"<PseudonymizedDocument {self.doc_token[:8]} ({self.status.value})>"
+
+
+class QRBatchJob(Base):
+    """
+    QR Code Generation Batch Job.
+
+    Tracks generation of QR overlay sheets for printing.
+    The generated PDF contains QR codes with doc_tokens.
+    """
+    __tablename__ = 'klausur_qr_batches'
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+
+    # Session relationship
+    session_id = Column(String(36), ForeignKey('klausur_sessions.id'), nullable=False, index=True)
+    teacher_id = Column(String(100), nullable=False, index=True)
+
+    # Batch info
+    student_count = Column(Integer, nullable=False)
+    generated_tokens = Column(JSON, default=list)  # List of generated doc_tokens
+
+    # Generated PDF (stored as path reference, not in DB)
+    pdf_path = Column(String(500), nullable=True)
+
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+    downloaded_at = Column(DateTime, nullable=True)
+
+    def __repr__(self):
+        return f"<QRBatchJob {self.id[:8]}: {self.student_count} students>"
+
+
+class OnboardingSession(Base):
+    """
+    Magic Onboarding Session.
+
+    Tracks the automatic class/student detection and setup process.
+    Temporary data structure - merged into ExamSession after confirmation.
+    """
+    __tablename__ = 'klausur_onboarding_sessions'
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+
+    # Links
+    klausur_session_id = Column(String(36), ForeignKey('klausur_sessions.id'), nullable=True)
+    teacher_id = Column(String(100), nullable=False, index=True)
+
+    # Detected metadata (from local LLM)
+    detected_class = Column(String(100), nullable=True)
+    detected_subject = Column(String(100), nullable=True)
+    detected_date = Column(DateTime, nullable=True)
+    detected_student_count = Column(Integer, default=0)
+    detection_confidence = Column(Integer, default=0)  # 0-100
+
+    # Confirmed data (after user review)
+    confirmed_class = Column(String(100), nullable=True)
+    confirmed_subject = Column(String(100), nullable=True)
+
+    # Linked school entities (after confirmation)
+    linked_school_id = Column(String(36), nullable=True)
+    linked_class_id = Column(String(36), nullable=True)
+
+    # School context
+    bundesland = Column(String(50), nullable=True)
+    schulform = Column(String(50), nullable=True)
+    school_name = Column(String(200), nullable=True)
+
+    # Status
+    status = Column(
+        SQLEnum(OnboardingStatus),
+        default=OnboardingStatus.ANALYZING,
+        nullable=False,
+        index=True
+    )
+
+    # Progress tracking
+    analysis_completed_at = Column(DateTime, nullable=True)
+    confirmation_completed_at = Column(DateTime, nullable=True)
+    processing_started_at = Column(DateTime, nullable=True)
+    processing_completed_at = Column(DateTime, nullable=True)
+
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    # Relationships
+    detected_students = relationship(
+        "DetectedStudent",
+        back_populates="onboarding_session",
+        cascade="all, delete-orphan"
+    )
+
+    def __repr__(self):
+        return f"<OnboardingSession {self.id[:8]}: {self.detected_class} ({self.status.value})>"
+
+
+class DetectedStudent(Base):
+    """
+    Student detected during Magic Onboarding.
+
+    Temporary storage for detected student data before confirmation.
+    After confirmation, students are created in the School Service.
+    """
+    __tablename__ = 'klausur_detected_students'
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+
+    # Onboarding session
+    onboarding_session_id = Column(
+        String(36),
+        ForeignKey('klausur_onboarding_sessions.id'),
+        nullable=False,
+        index=True
+    )
+
+    # Detected data (from exam header)
+    detected_first_name = Column(String(100), nullable=True)
+    detected_last_name_hint = Column(String(100), nullable=True)  # Partial, e.g. "M."
+
+    # Confirmed data (after roster matching)
+    confirmed_first_name = Column(String(100), nullable=True)
+    confirmed_last_name = Column(String(100), nullable=True)
+
+    # Matched to School Service student
+    matched_student_id = Column(String(36), nullable=True)
+
+    # Parent contact (extracted from roster)
+    parent_email = Column(String(200), nullable=True)
+    parent_phone = Column(String(50), nullable=True)
+
+    # Link to pseudonymized document
+    doc_token = Column(String(36), nullable=True)
+
+    # Confidence
+    confidence = Column(Integer, default=0)  # 0-100
+
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+    # Relationship
+    onboarding_session = relationship("OnboardingSession", back_populates="detected_students")
+
+    def __repr__(self):
+        name = self.confirmed_first_name or self.detected_first_name or "?"
+        return f"<DetectedStudent {self.id[:8]}: {name}>"
+
+
+class ModuleLink(Base):
+    """
+    Cross-module link from Klausur to other BreakPilot modules.
+
+    Tracks connections to: Notenbuch, Elternabend, Zeugnis, Calendar
+    """
+    __tablename__ = 'klausur_module_links'
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+
+    # Source
+    klausur_session_id = Column(
+        String(36),
+        ForeignKey('klausur_sessions.id'),
+        nullable=False,
+        index=True
+    )
+
+    # Link type
+    link_type = Column(
+        SQLEnum(ModuleLinkType),
+        nullable=False,
+        index=True
+    )
+
+    # Target
+    target_module = Column(String(50), nullable=False)  # school, calendar, etc.
+    target_entity_id = Column(String(36), nullable=True)
+    target_url = Column(String(500), nullable=True)
+
+    # Link metadata
+    link_metadata = Column(JSON, default=dict)
+
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+    def __repr__(self):
+        return f"<ModuleLink {self.id[:8]}: {self.link_type.value} -> {self.target_module}>"
+
+
+# Export all models
+__all__ = [
+    "SessionStatus",
+    "DocumentStatus",
+    "OnboardingStatus",
+    "ModuleLinkType",
+    "ExamSession",
+    "PseudonymizedDocument",
+    "QRBatchJob",
+    "OnboardingSession",
+    "DetectedStudent",
+    "ModuleLink",
+]
--- a/backend/klausur/repository.py
+++ b/backend/klausur/repository.py
@@ -0,0 +1,377 @@
+"""
+Repository for Klausurkorrektur Module.
+
+All queries are filtered by teacher_id to ensure complete namespace isolation.
+No cross-teacher data access is possible.
+"""
+from datetime import datetime, timedelta
+from typing import Optional, List
+from sqlalchemy.orm import Session
+from sqlalchemy import and_, func
+
+from .db_models import (
+    ExamSession, PseudonymizedDocument, QRBatchJob,
+    SessionStatus, DocumentStatus
+)
+
+
+class KlausurRepository:
+    """
+    Repository for exam correction data.
+
+    PRIVACY DESIGN:
+    - All queries MUST include teacher_id filter
+    - No method allows access to other teachers' data
+    - Bulk operations are scoped to teacher namespace
+    """
+
+    def __init__(self, db: Session):
+        self.db = db
+
+    # ==================== Session Operations ====================
+
+    def create_session(
+        self,
+        teacher_id: str,
+        name: str,
+        subject: str = "",
+        class_name: str = "",
+        total_points: int = 100,
+        rubric: str = "",
+        questions: Optional[List[dict]] = None,
+        retention_days: int = 30
+    ) -> ExamSession:
+        """Create a new exam correction session."""
+        session = ExamSession(
+            teacher_id=teacher_id,
+            name=name,
+            subject=subject,
+            class_name=class_name,
+            total_points=total_points,
+            rubric=rubric,
+            questions=questions or [],
+            status=SessionStatus.CREATED,
+            retention_until=datetime.utcnow() + timedelta(days=retention_days)
+        )
+        self.db.add(session)
+        self.db.commit()
+        self.db.refresh(session)
+        return session
+
+    def get_session(
+        self,
+        session_id: str,
+        teacher_id: str
+    ) -> Optional[ExamSession]:
+        """Get a session by ID (teacher-scoped)."""
+        return self.db.query(ExamSession).filter(
+            and_(
+                ExamSession.id == session_id,
+                ExamSession.teacher_id == teacher_id,
+                ExamSession.status != SessionStatus.DELETED
+            )
+        ).first()
+
+    def list_sessions(
+        self,
+        teacher_id: str,
+        include_archived: bool = False,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List[ExamSession]:
+        """List all sessions for a teacher."""
+        query = self.db.query(ExamSession).filter(
+            and_(
+                ExamSession.teacher_id == teacher_id,
+                ExamSession.status != SessionStatus.DELETED
+            )
+        )
+        if not include_archived:
+            query = query.filter(ExamSession.status != SessionStatus.ARCHIVED)
+
+        return query.order_by(ExamSession.created_at.desc()).offset(offset).limit(limit).all()
+
+    def update_session_status(
+        self,
+        session_id: str,
+        teacher_id: str,
+        status: SessionStatus
+    ) -> Optional[ExamSession]:
+        """Update session status."""
+        session = self.get_session(session_id, teacher_id)
+        if session:
+            session.status = status
+            if status == SessionStatus.COMPLETED:
+                session.completed_at = datetime.utcnow()
+            self.db.commit()
+            self.db.refresh(session)
+        return session
+
+    def update_session_identity_map(
+        self,
+        session_id: str,
+        teacher_id: str,
+        encrypted_map: bytes,
+        iv: str
+    ) -> Optional[ExamSession]:
+        """Store encrypted identity map (teacher-scoped)."""
+        session = self.get_session(session_id, teacher_id)
+        if session:
+            session.encrypted_identity_map = encrypted_map
+            session.identity_map_iv = iv
+            self.db.commit()
+            self.db.refresh(session)
+        return session
+
+    def delete_session(
+        self,
+        session_id: str,
+        teacher_id: str,
+        hard_delete: bool = False
+    ) -> bool:
+        """Delete a session (soft or hard delete)."""
+        session = self.get_session(session_id, teacher_id)
+        if not session:
+            return False
+
+        if hard_delete:
+            self.db.delete(session)
+        else:
+            session.status = SessionStatus.DELETED
+        self.db.commit()
+        return True
+
+    # ==================== Document Operations ====================
+
+    def create_document(
+        self,
+        session_id: str,
+        teacher_id: str,
+        doc_token: Optional[str] = None,
+        page_number: int = 1,
+        total_pages: int = 1
+    ) -> Optional[PseudonymizedDocument]:
+        """Create a new pseudonymized document."""
+        # Verify session belongs to teacher
+        session = self.get_session(session_id, teacher_id)
+        if not session:
+            return None
+
+        doc = PseudonymizedDocument(
+            session_id=session_id,
+            page_number=page_number,
+            total_pages=total_pages,
+            status=DocumentStatus.UPLOADED
+        )
+        if doc_token:
+            doc.doc_token = doc_token
+
+        self.db.add(doc)
+
+        # Update session document count
+        session.document_count += 1
+        self.db.commit()
+        self.db.refresh(doc)
+        return doc
+
+    def get_document(
+        self,
+        doc_token: str,
+        teacher_id: str
+    ) -> Optional[PseudonymizedDocument]:
+        """Get a document by token (teacher-scoped via session)."""
+        return self.db.query(PseudonymizedDocument).join(
+            ExamSession
+        ).filter(
+            and_(
+                PseudonymizedDocument.doc_token == doc_token,
+                ExamSession.teacher_id == teacher_id,
+                ExamSession.status != SessionStatus.DELETED
+            )
+        ).first()
+
+    def list_documents(
+        self,
+        session_id: str,
+        teacher_id: str
+    ) -> List[PseudonymizedDocument]:
+        """List all documents in a session (teacher-scoped)."""
+        # Verify session belongs to teacher
+        session = self.get_session(session_id, teacher_id)
+        if not session:
+            return []
+
+        return self.db.query(PseudonymizedDocument).filter(
+            PseudonymizedDocument.session_id == session_id
+        ).order_by(PseudonymizedDocument.created_at).all()
+
+    def update_document_ocr(
+        self,
+        doc_token: str,
+        teacher_id: str,
+        ocr_text: str,
+        confidence: int = 0
+    ) -> Optional[PseudonymizedDocument]:
+        """Update document with OCR results."""
+        doc = self.get_document(doc_token, teacher_id)
+        if doc:
+            doc.ocr_text = ocr_text
+            doc.ocr_confidence = confidence
+            doc.status = DocumentStatus.OCR_COMPLETED
+            self.db.commit()
+            self.db.refresh(doc)
+        return doc
+
+    def update_document_ai_result(
+        self,
+        doc_token: str,
+        teacher_id: str,
+        feedback: str,
+        score: Optional[int] = None,
+        grade: Optional[str] = None,
+        details: Optional[dict] = None
+    ) -> Optional[PseudonymizedDocument]:
+        """Update document with AI correction results."""
+        doc = self.get_document(doc_token, teacher_id)
+        if doc:
+            doc.ai_feedback = feedback
+            doc.ai_score = score
+            doc.ai_grade = grade
+            doc.ai_details = details or {}
+            doc.status = DocumentStatus.COMPLETED
+            doc.processing_completed_at = datetime.utcnow()
+
+            # Update session processed count
+            session = doc.session
+            session.processed_count += 1
+
+            # Check if all documents are processed
+            if session.processed_count >= session.document_count:
+                session.status = SessionStatus.COMPLETED
+                session.completed_at = datetime.utcnow()
+
+            self.db.commit()
+            self.db.refresh(doc)
+        return doc
+
+    def update_document_status(
+        self,
+        doc_token: str,
+        teacher_id: str,
+        status: DocumentStatus,
+        error: Optional[str] = None
+    ) -> Optional[PseudonymizedDocument]:
+        """Update document processing status."""
+        doc = self.get_document(doc_token, teacher_id)
+        if doc:
+            doc.status = status
+            if error:
+                doc.processing_error = error
+            if status in [DocumentStatus.OCR_PROCESSING, DocumentStatus.AI_PROCESSING]:
+                doc.processing_started_at = datetime.utcnow()
+            self.db.commit()
+            self.db.refresh(doc)
+        return doc
+
+    # ==================== QR Batch Operations ====================
+
+    def create_qr_batch(
+        self,
+        session_id: str,
+        teacher_id: str,
+        student_count: int,
+        generated_tokens: List[str]
+    ) -> Optional[QRBatchJob]:
+        """Create a QR code batch job."""
+        # Verify session belongs to teacher
+        session = self.get_session(session_id, teacher_id)
+        if not session:
+            return None
+
+        batch = QRBatchJob(
+            session_id=session_id,
+            teacher_id=teacher_id,
+            student_count=student_count,
+            generated_tokens=generated_tokens
+        )
+        self.db.add(batch)
+        self.db.commit()
+        self.db.refresh(batch)
+        return batch
+
+    def get_qr_batch(
+        self,
+        batch_id: str,
+        teacher_id: str
+    ) -> Optional[QRBatchJob]:
+        """Get a QR batch by ID (teacher-scoped)."""
+        return self.db.query(QRBatchJob).filter(
+            and_(
+                QRBatchJob.id == batch_id,
+                QRBatchJob.teacher_id == teacher_id
+            )
+        ).first()
+
+    # ==================== Statistics (Anonymized) ====================
+
+    def get_session_stats(
+        self,
+        session_id: str,
+        teacher_id: str
+    ) -> dict:
+        """Get anonymized statistics for a session."""
+        session = self.get_session(session_id, teacher_id)
+        if not session:
+            return {}
+
+        # Count documents by status
+        status_counts = self.db.query(
+            PseudonymizedDocument.status,
+            func.count(PseudonymizedDocument.doc_token)
+        ).filter(
+            PseudonymizedDocument.session_id == session_id
+        ).group_by(PseudonymizedDocument.status).all()
+
+        # Score statistics (anonymized)
+        score_stats = self.db.query(
+            func.avg(PseudonymizedDocument.ai_score),
+            func.min(PseudonymizedDocument.ai_score),
+            func.max(PseudonymizedDocument.ai_score)
+        ).filter(
+            and_(
+                PseudonymizedDocument.session_id == session_id,
+                PseudonymizedDocument.ai_score.isnot(None)
+            )
+        ).first()
+
+        return {
+            "session_id": session_id,
+            "total_documents": session.document_count,
+            "processed_documents": session.processed_count,
+            "status_breakdown": {s.value: c for s, c in status_counts},
+            "score_average": float(score_stats[0]) if score_stats[0] else None,
+            "score_min": score_stats[1],
+            "score_max": score_stats[2]
+        }
+
+    # ==================== Data Retention ====================
+
+    def cleanup_expired_sessions(self) -> int:
+        """Delete sessions past their retention date. Returns count deleted."""
+        now = datetime.utcnow()
+        expired = self.db.query(ExamSession).filter(
+            and_(
+                ExamSession.retention_until < now,
+                ExamSession.status != SessionStatus.DELETED
+            )
+        ).all()
+
+        count = len(expired)
+        for session in expired:
+            session.status = SessionStatus.DELETED
+            # Clear sensitive data
+            session.encrypted_identity_map = None
+            session.identity_map_iv = None
+
+        self.db.commit()
+        return count
--- a/backend/klausur/routes.py
+++ b/backend/klausur/routes.py
--- a/backend/klausur/services/init.py
+++ b/backend/klausur/services/init.py
@@ -0,0 +1,28 @@
+"""
+Services for Klausurkorrektur Module.
+
+- PseudonymizationService: QR code generation, header redaction
+- CorrectionService: LLM integration for AI-assisted grading
+- RosterParser: Parse Klassenbuch photos and roster files
+- SchoolResolver: School/class selection and auto-creation
+- ModuleLinker: Cross-module links (Notenbuch, Elternabend, etc.)
+"""
+
+from .pseudonymizer import PseudonymizationService, get_pseudonymizer
+from .correction_service import ExamCorrectionService, get_correction_service
+from .roster_parser import RosterParser, get_roster_parser
+from .school_resolver import SchoolResolver, get_school_resolver
+from .module_linker import ModuleLinker, get_module_linker
+
+__all__ = [
+    "PseudonymizationService",
+    "get_pseudonymizer",
+    "ExamCorrectionService",
+    "get_correction_service",
+    "RosterParser",
+    "get_roster_parser",
+    "SchoolResolver",
+    "get_school_resolver",
+    "ModuleLinker",
+    "get_module_linker",
+]
--- a/backend/klausur/services/correction_service.py
+++ b/backend/klausur/services/correction_service.py
@@ -0,0 +1,379 @@
+"""
+Exam Correction Service using Self-Hosted LLM.
+
+PRIVACY BY DESIGN:
+- Only pseudonymized text (doc_token + OCR content) is sent to LLM
+- No student names or personal data in prompts
+- All processing happens on self-hosted infrastructure (SysEleven)
+- No data sent to external APIs (unless explicitly configured)
+
+This service generates AI-assisted corrections and feedback for exam answers.
+"""
+import logging
+from typing import Optional, List
+from dataclasses import dataclass
+
+from llm_gateway.services.inference import get_inference_service, InferenceResult
+from llm_gateway.models.chat import ChatCompletionRequest, ChatMessage
+from llm_gateway.config import get_config
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class QuestionRubric:
+    """Rubric for a single exam question."""
+    question_number: int
+    question_text: str
+    max_points: int
+    expected_answer: str
+    grading_criteria: str
+
+
+@dataclass
+class QuestionResult:
+    """AI correction result for a single question."""
+    question_number: int
+    points_awarded: int
+    max_points: int
+    feedback: str
+    strengths: List[str]
+    improvements: List[str]
+
+
+@dataclass
+class CorrectionResult:
+    """Complete correction result for an exam."""
+    doc_token: str  # Pseudonymized identifier
+    total_score: int
+    max_score: int
+    grade: str
+    overall_feedback: str
+    question_results: List[QuestionResult]
+    processing_time_ms: int
+
+
+# German grading scale (can be customized)
+GERMAN_GRADES = [
+    (95, "1+"),  # sehr gut plus
+    (90, "1"),   # sehr gut
+    (85, "1-"),  # sehr gut minus
+    (80, "2+"),  # gut plus
+    (75, "2"),   # gut
+    (70, "2-"),  # gut minus
+    (65, "3+"),  # befriedigend plus
+    (60, "3"),   # befriedigend
+    (55, "3-"),  # befriedigend minus
+    (50, "4+"),  # ausreichend plus
+    (45, "4"),   # ausreichend
+    (40, "4-"),  # ausreichend minus
+    (33, "5+"),  # mangelhaft plus
+    (27, "5"),   # mangelhaft
+    (20, "5-"),  # mangelhaft minus
+    (0, "6"),    # ungenuegend
+]
+
+
+def calculate_grade(percentage: float) -> str:
+    """Calculate German grade from percentage."""
+    for threshold, grade in GERMAN_GRADES:
+        if percentage >= threshold:
+            return grade
+    return "6"
+
+
+class ExamCorrectionService:
+    """
+    Service for AI-assisted exam correction.
+
+    PRIVACY GUARANTEES:
+    1. Prompts contain NO personal data
+    2. Only doc_token is used as reference
+    3. Processing on self-hosted LLM
+    4. Results stored with pseudonymized identifiers
+    """
+
+    # System prompt for exam correction (German)
+    CORRECTION_SYSTEM_PROMPT = """Du bist ein erfahrener Lehrer und korrigierst Schuelerantworten.
+
+WICHTIGE REGELN:
+1. Bewerte NUR den fachlichen Inhalt der Antwort
+2. Ignoriere Rechtschreibfehler (ausser bei Deutschklausuren)
+3. Gib konstruktives, ermutigzendes Feedback
+4. Beziehe dich auf die Bewertungskriterien
+5. Sei fair und konsistent
+
+AUSGABEFORMAT (JSON):
+{
+    "points": <Punktzahl>,
+    "feedback": "<Kurze Begruendung der Bewertung>",
+    "strengths": ["<Staerke 1>", "<Staerke 2>"],
+    "improvements": ["<Verbesserungsvorschlag 1>"]
+}
+
+Antworte NUR mit dem JSON-Objekt, ohne weitere Erklaerungen."""
+
+    OVERALL_FEEDBACK_PROMPT = """Basierend auf den einzelnen Bewertungen, erstelle eine Gesamtrueckmeldung.
+
+Einzelbewertungen:
+{question_results}
+
+Gesamtpunktzahl: {total_score}/{max_score} ({percentage}%)
+Note: {grade}
+
+Erstelle eine motivierende Gesamtrueckmeldung (2-3 Saetze), die:
+1. Die Staerken hervorhebt
+2. Konstruktive Verbesserungsvorschlaege macht
+3. Ermutigt und motiviert
+
+Antworte nur mit dem Feedback-Text, ohne JSON-Formatierung."""
+
+    def __init__(self, model: Optional[str] = None):
+        """
+        Initialize the correction service.
+
+        Args:
+            model: LLM model to use (default: qwen2.5:14b from config)
+
+        DATENSCHUTZ/PRIVACY:
+        Das Modell läuft lokal auf dem Mac Mini via Ollama.
+        Keine Daten werden an externe Server gesendet.
+        """
+        config = get_config()
+        # Use configured correction model (default: qwen2.5:14b)
+        self.model = model or config.correction_model
+        self.inference = get_inference_service()
+        logger.info(f"Correction service initialized with model: {self.model}")
+
+    async def correct_question(
+        self,
+        student_answer: str,
+        rubric: QuestionRubric,
+        subject: str = "Allgemein"
+    ) -> QuestionResult:
+        """
+        Correct a single question answer.
+
+        Args:
+            student_answer: The student's OCR-extracted answer (pseudonymized)
+            rubric: Grading rubric for this question
+            subject: Subject for context
+
+        Returns:
+            QuestionResult with points and feedback
+        """
+        # Build prompt with NO personal data
+        user_prompt = f"""Fach: {subject}
+Frage {rubric.question_number}: {rubric.question_text}
+Maximale Punktzahl: {rubric.max_points}
+
+Erwartete Antwort:
+{rubric.expected_answer}
+
+Bewertungskriterien:
+{rubric.grading_criteria}
+
+---
+
+Schuelerantwort:
+{student_answer}
+
+---
+
+Bewerte diese Antwort nach den Kriterien."""
+
+        request = ChatCompletionRequest(
+            model=self.model,
+            messages=[
+                ChatMessage(role="system", content=self.CORRECTION_SYSTEM_PROMPT),
+                ChatMessage(role="user", content=user_prompt),
+            ],
+            temperature=0.3,  # Lower temperature for consistent grading
+            max_tokens=500,
+        )
+
+        try:
+            response = await self.inference.complete(request)
+            content = response.choices[0].message.content or "{}"
+
+            # Parse JSON response
+            import json
+            try:
+                result = json.loads(content)
+            except json.JSONDecodeError:
+                # Fallback parsing
+                logger.warning(f"Failed to parse LLM response as JSON: {content[:100]}")
+                result = {
+                    "points": rubric.max_points // 2,
+                    "feedback": content[:200],
+                    "strengths": [],
+                    "improvements": ["Automatische Bewertung fehlgeschlagen - manuelle Pruefung erforderlich"]
+                }
+
+            points = min(int(result.get("points", 0)), rubric.max_points)
+
+            return QuestionResult(
+                question_number=rubric.question_number,
+                points_awarded=points,
+                max_points=rubric.max_points,
+                feedback=result.get("feedback", ""),
+                strengths=result.get("strengths", []),
+                improvements=result.get("improvements", []),
+            )
+
+        except Exception as e:
+            logger.error(f"Correction failed for question {rubric.question_number}: {e}")
+            return QuestionResult(
+                question_number=rubric.question_number,
+                points_awarded=0,
+                max_points=rubric.max_points,
+                feedback=f"Automatische Bewertung fehlgeschlagen: {str(e)}",
+                strengths=[],
+                improvements=["Manuelle Korrektur erforderlich"],
+            )
+
+    async def correct_exam(
+        self,
+        doc_token: str,
+        ocr_text: str,
+        rubrics: List[QuestionRubric],
+        subject: str = "Allgemein"
+    ) -> CorrectionResult:
+        """
+        Correct a complete exam with multiple questions.
+
+        Args:
+            doc_token: Pseudonymized document identifier
+            ocr_text: Full OCR text of the exam (already redacted)
+            rubrics: List of question rubrics
+            subject: Subject name
+
+        Returns:
+            CorrectionResult with all scores and feedback
+        """
+        import time
+        start_time = time.time()
+
+        # Split OCR text into answers (simple heuristic)
+        answers = self._extract_answers(ocr_text, len(rubrics))
+
+        # Correct each question
+        question_results = []
+        for i, rubric in enumerate(rubrics):
+            answer = answers[i] if i < len(answers) else ""
+            result = await self.correct_question(answer, rubric, subject)
+            question_results.append(result)
+
+        # Calculate totals
+        total_score = sum(r.points_awarded for r in question_results)
+        max_score = sum(r.max_points for r in question_results)
+        percentage = (total_score / max_score * 100) if max_score > 0 else 0
+        grade = calculate_grade(percentage)
+
+        # Generate overall feedback
+        overall_feedback = await self._generate_overall_feedback(
+            question_results, total_score, max_score, percentage, grade
+        )
+
+        processing_time_ms = int((time.time() - start_time) * 1000)
+
+        return CorrectionResult(
+            doc_token=doc_token,
+            total_score=total_score,
+            max_score=max_score,
+            grade=grade,
+            overall_feedback=overall_feedback,
+            question_results=question_results,
+            processing_time_ms=processing_time_ms,
+        )
+
+    async def _generate_overall_feedback(
+        self,
+        question_results: List[QuestionResult],
+        total_score: int,
+        max_score: int,
+        percentage: float,
+        grade: str
+    ) -> str:
+        """Generate motivating overall feedback."""
+        # Summarize question results
+        results_summary = "\n".join([
+            f"Frage {r.question_number}: {r.points_awarded}/{r.max_points} Punkte - {r.feedback[:100]}"
+            for r in question_results
+        ])
+
+        prompt = self.OVERALL_FEEDBACK_PROMPT.format(
+            question_results=results_summary,
+            total_score=total_score,
+            max_score=max_score,
+            percentage=f"{percentage:.1f}",
+            grade=grade,
+        )
+
+        request = ChatCompletionRequest(
+            model=self.model,
+            messages=[
+                ChatMessage(role="user", content=prompt),
+            ],
+            temperature=0.5,
+            max_tokens=200,
+        )
+
+        try:
+            response = await self.inference.complete(request)
+            return response.choices[0].message.content or "Gute Arbeit! Weiter so."
+        except Exception as e:
+            logger.error(f"Failed to generate overall feedback: {e}")
+            return f"Gesamtergebnis: {total_score}/{max_score} Punkte ({grade})"
+
+    def _extract_answers(self, ocr_text: str, num_questions: int) -> List[str]:
+        """
+        Extract individual answers from OCR text.
+
+        Simple heuristic: split by question markers (1., 2., etc.)
+        More sophisticated extraction can be implemented.
+        """
+        import re
+
+        # Try to find question markers
+        pattern = r'(?:^|\n)\s*(\d+)[.\)]\s*'
+        parts = re.split(pattern, ocr_text)
+
+        answers = []
+        i = 1  # Skip first empty part
+        while i < len(parts):
+            if i + 1 < len(parts):
+                # parts[i] is the question number, parts[i+1] is the answer
+                answers.append(parts[i + 1].strip())
+            i += 2
+
+        # Pad with empty answers if needed
+        while len(answers) < num_questions:
+            answers.append("")
+
+        return answers[:num_questions]
+
+
+# Singleton instance
+_correction_service: Optional[ExamCorrectionService] = None
+
+
+def get_correction_service(model: Optional[str] = None) -> ExamCorrectionService:
+    """
+    Get or create the correction service singleton.
+
+    Args:
+        model: Optional model override. If None, uses config.correction_model (qwen2.5:14b)
+
+    Returns:
+        ExamCorrectionService instance
+
+    DATENSCHUTZ: Alle Verarbeitung erfolgt lokal via Ollama - keine Cloud-API.
+    """
+    global _correction_service
+    if _correction_service is None:
+        _correction_service = ExamCorrectionService(model=model)
+    elif model and _correction_service.model != model:
+        # Only recreate if explicitly requesting different model
+        _correction_service = ExamCorrectionService(model=model)
+    return _correction_service
--- a/backend/klausur/services/module_linker.py
+++ b/backend/klausur/services/module_linker.py
@@ -0,0 +1,630 @@
+"""
+Module Linker Service - Cross-Module Verknuepfungen.
+
+Verknuepft Klausur-Ergebnisse mit anderen BreakPilot-Modulen:
+- Notenbuch (School Service)
+- Elternabend (Gespraechsvorschlaege)
+- Zeugnisse (Notenuebernahme)
+- Kalender (Termine)
+
+Privacy:
+- Verknuepfungen nutzen doc_tokens (pseudonymisiert)
+- Deanonymisierung nur Client-seitig moeglich
+"""
+
+import httpx
+import os
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+from datetime import datetime, timedelta
+from enum import Enum
+
+
+# ============================================================================
+# DATA CLASSES
+# ============================================================================
+
+class LinkType(str, Enum):
+    """Typ der Modul-Verknuepfung."""
+    NOTENBUCH = "notenbuch"
+    ELTERNABEND = "elternabend"
+    ZEUGNIS = "zeugnis"
+    CALENDAR = "calendar"
+    KLASSENBUCH = "klassenbuch"
+
+
+class MeetingUrgency(str, Enum):
+    """Dringlichkeit eines Elterngespraechs."""
+    LOW = "niedrig"
+    MEDIUM = "mittel"
+    HIGH = "hoch"
+
+
+@dataclass
+class CorrectionResult:
+    """Korrektur-Ergebnis (pseudonymisiert)."""
+    doc_token: str
+    score: float  # Punkte
+    max_score: float
+    grade: str  # z.B. "2+"
+    feedback: str
+    question_results: List[Dict[str, Any]] = field(default_factory=list)
+
+
+@dataclass
+class GradeEntry:
+    """Notenbuch-Eintrag."""
+    student_id: str  # Im Notenbuch: echte Student-ID
+    doc_token: str  # Aus Klausur: pseudonymisiert
+    grade: str
+    points: float
+    max_points: float
+    exam_name: str
+    date: str
+
+
+@dataclass
+class ParentMeetingSuggestion:
+    """Vorschlag fuer ein Elterngespraech."""
+    doc_token: str  # Pseudonymisiert
+    reason: str
+    urgency: MeetingUrgency
+    grade: str
+    subject: str
+    suggested_topics: List[str] = field(default_factory=list)
+
+
+@dataclass
+class CalendarEvent:
+    """Kalender-Eintrag."""
+    id: str
+    title: str
+    description: str
+    start_time: datetime
+    end_time: datetime
+    event_type: str
+    linked_doc_tokens: List[str] = field(default_factory=list)
+
+
+@dataclass
+class ModuleLink:
+    """Verknuepfung zu einem anderen Modul."""
+    id: str
+    klausur_session_id: str
+    link_type: LinkType
+    target_module: str
+    target_entity_id: str
+    target_url: Optional[str] = None
+    link_metadata: Dict[str, Any] = field(default_factory=dict)
+    created_at: datetime = field(default_factory=datetime.utcnow)
+
+
+@dataclass
+class LinkResult:
+    """Ergebnis einer Verknuepfungs-Operation."""
+    success: bool
+    link: Optional[ModuleLink] = None
+    message: str = ""
+    target_url: Optional[str] = None
+
+
+# ============================================================================
+# MODULE LINKER
+# ============================================================================
+
+class ModuleLinker:
+    """
+    Verknuepft Klausur-Ergebnisse mit anderen Modulen.
+
+    Beispiel:
+        linker = ModuleLinker()
+
+        # Noten ins Notenbuch uebertragen
+        result = await linker.link_to_notenbuch(
+            session_id="session-123",
+            class_id="class-456",
+            results=correction_results
+        )
+
+        # Elterngespraeche vorschlagen
+        suggestions = linker.suggest_elternabend(
+            results=correction_results,
+            subject="Mathematik"
+        )
+    """
+
+    # Notenschwellen fuer Elterngespraeche
+    GRADE_THRESHOLDS = {
+        "1+": 0.95, "1": 0.90, "1-": 0.85,
+        "2+": 0.80, "2": 0.75, "2-": 0.70,
+        "3+": 0.65, "3": 0.60, "3-": 0.55,
+        "4+": 0.50, "4": 0.45, "4-": 0.40,
+        "5+": 0.33, "5": 0.25, "5-": 0.17,
+        "6": 0.0
+    }
+
+    # Noten die Gespraeche erfordern
+    MEETING_TRIGGER_GRADES = ["4", "4-", "5+", "5", "5-", "6"]
+
+    def __init__(self):
+        self.school_service_url = os.getenv(
+            "SCHOOL_SERVICE_URL",
+            "http://school-service:8084"
+        )
+        self.calendar_service_url = os.getenv(
+            "CALENDAR_SERVICE_URL",
+            "http://calendar-service:8085"
+        )
+
+    # =========================================================================
+    # NOTENBUCH INTEGRATION
+    # =========================================================================
+
+    async def link_to_notenbuch(
+        self,
+        session_id: str,
+        class_id: str,
+        subject: str,
+        results: List[CorrectionResult],
+        exam_name: str,
+        exam_date: str,
+        identity_map: Optional[Dict[str, str]] = None
+    ) -> LinkResult:
+        """
+        Uebertraegt Noten ins Notenbuch (School Service).
+
+        Args:
+            session_id: Klausur-Session-ID
+            class_id: Klassen-ID im School Service
+            subject: Fach
+            results: Liste der Korrektur-Ergebnisse
+            exam_name: Name der Klausur
+            exam_date: Datum der Klausur
+            identity_map: Optional: doc_token -> student_id Mapping
+
+        Note:
+            Das identity_map wird nur serverseitig genutzt, wenn der
+            Lehrer explizit die Verknuepfung freigibt. Normalerweise
+            bleibt das Mapping Client-seitig.
+        """
+        try:
+            # Noten-Daten aufbereiten
+            grades_data = []
+            for result in results:
+                grade_entry = {
+                    "doc_token": result.doc_token,
+                    "grade": result.grade,
+                    "points": result.score,
+                    "max_points": result.max_score,
+                    "percentage": result.score / result.max_score if result.max_score > 0 else 0
+                }
+
+                # Falls identity_map vorhanden: Student-ID hinzufuegen
+                if identity_map and result.doc_token in identity_map:
+                    grade_entry["student_id"] = identity_map[result.doc_token]
+
+                grades_data.append(grade_entry)
+
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.post(
+                    f"{self.school_service_url}/api/classes/{class_id}/exams",
+                    json={
+                        "name": exam_name,
+                        "subject": subject,
+                        "date": exam_date,
+                        "max_points": results[0].max_score if results else 100,
+                        "grades": grades_data,
+                        "klausur_session_id": session_id
+                    }
+                )
+
+                if response.status_code in (200, 201):
+                    data = response.json()
+                    return LinkResult(
+                        success=True,
+                        link=ModuleLink(
+                            id=data.get('id', ''),
+                            klausur_session_id=session_id,
+                            link_type=LinkType.NOTENBUCH,
+                            target_module="school",
+                            target_entity_id=data.get('id', ''),
+                            target_url=f"/app?module=school&class={class_id}&exam={data.get('id')}"
+                        ),
+                        message=f"Noten erfolgreich uebertragen ({len(results)} Eintraege)",
+                        target_url=f"/app?module=school&class={class_id}"
+                    )
+
+                return LinkResult(
+                    success=False,
+                    message=f"Fehler beim Uebertragen: {response.status_code}"
+                )
+
+        except Exception as e:
+            return LinkResult(
+                success=False,
+                message=f"Verbindungsfehler: {str(e)}"
+            )
+
+    # =========================================================================
+    # ELTERNABEND VORSCHLAEGE
+    # =========================================================================
+
+    def suggest_elternabend(
+        self,
+        results: List[CorrectionResult],
+        subject: str,
+        threshold_grade: str = "4"
+    ) -> List[ParentMeetingSuggestion]:
+        """
+        Schlaegt Elterngespraeche fuer schwache Schueler vor.
+
+        Args:
+            results: Liste der Korrektur-Ergebnisse
+            subject: Fach
+            threshold_grade: Ab dieser Note wird ein Gespraech vorgeschlagen
+
+        Returns:
+            Liste von Gespraechs-Vorschlaegen (pseudonymisiert)
+        """
+        suggestions = []
+        threshold_idx = list(self.GRADE_THRESHOLDS.keys()).index(threshold_grade) \
+            if threshold_grade in self.GRADE_THRESHOLDS else 9
+
+        for result in results:
+            # Pruefe ob Note Gespraech erfordert
+            if result.grade in self.MEETING_TRIGGER_GRADES:
+                urgency = self._determine_urgency(result.grade)
+                topics = self._generate_meeting_topics(result, subject)
+
+                suggestions.append(ParentMeetingSuggestion(
+                    doc_token=result.doc_token,
+                    reason=f"Note {result.grade} in {subject}",
+                    urgency=urgency,
+                    grade=result.grade,
+                    subject=subject,
+                    suggested_topics=topics
+                ))
+
+        # Nach Dringlichkeit sortieren
+        urgency_order = {
+            MeetingUrgency.HIGH: 0,
+            MeetingUrgency.MEDIUM: 1,
+            MeetingUrgency.LOW: 2
+        }
+        suggestions.sort(key=lambda s: urgency_order[s.urgency])
+
+        return suggestions
+
+    def _determine_urgency(self, grade: str) -> MeetingUrgency:
+        """Bestimmt die Dringlichkeit basierend auf der Note."""
+        if grade in ["5-", "6"]:
+            return MeetingUrgency.HIGH
+        elif grade in ["5", "5+"]:
+            return MeetingUrgency.MEDIUM
+        else:
+            return MeetingUrgency.LOW
+
+    def _generate_meeting_topics(
+        self,
+        result: CorrectionResult,
+        subject: str
+    ) -> List[str]:
+        """Generiert Gespraechsthemen basierend auf den Ergebnissen."""
+        topics = []
+
+        # Allgemeine Themen
+        topics.append(f"Leistungsstand in {subject}")
+
+        # Basierend auf Feedback
+        if "Verstaendnis" in result.feedback.lower() or "grundlagen" in result.feedback.lower():
+            topics.append("Grundlagenverstaendnis foerdern")
+
+        if "uebung" in result.feedback.lower():
+            topics.append("Zusaetzliche Uebungsmoeglichkeiten")
+
+        # Basierend auf Aufgaben-Ergebnissen
+        if result.question_results:
+            weak_areas = []
+            for qr in result.question_results:
+                if qr.get('points_awarded', 0) / qr.get('max_points', 1) < 0.5:
+                    weak_areas.append(qr.get('question_text', ''))
+
+            if weak_areas:
+                topics.append("Gezielte Foerderung in Schwachstellen")
+
+        # Standard-Themen
+        if not topics or len(topics) < 3:
+            topics.extend([
+                "Lernstrategien besprechen",
+                "Unterstuetzungsmoeglichkeiten zu Hause",
+                "Nachhilfe-Optionen"
+            ])
+
+        return topics[:5]  # Max 5 Themen
+
+    async def create_elternabend_link(
+        self,
+        session_id: str,
+        suggestions: List[ParentMeetingSuggestion],
+        teacher_id: str
+    ) -> LinkResult:
+        """Erstellt Verknuepfungen zum Elternabend-Modul."""
+        # TODO: Integration mit Elternabend-Modul
+        # Vorerst nur Metadaten speichern
+
+        return LinkResult(
+            success=True,
+            link=ModuleLink(
+                id=f"elternabend-{session_id}",
+                klausur_session_id=session_id,
+                link_type=LinkType.ELTERNABEND,
+                target_module="elternabend",
+                target_entity_id="",
+                link_metadata={
+                    "suggestion_count": len(suggestions),
+                    "high_urgency_count": sum(
+                        1 for s in suggestions if s.urgency == MeetingUrgency.HIGH
+                    )
+                }
+            ),
+            message=f"{len(suggestions)} Elterngespraeche vorgeschlagen",
+            target_url="/app?module=elternabend"
+        )
+
+    # =========================================================================
+    # ZEUGNIS INTEGRATION
+    # =========================================================================
+
+    async def update_zeugnis(
+        self,
+        class_id: str,
+        subject: str,
+        grades: Dict[str, str],
+        exam_weight: float = 1.0
+    ) -> LinkResult:
+        """
+        Aktualisiert Zeugnis-Aggregation mit neuen Noten.
+
+        Args:
+            class_id: Klassen-ID
+            subject: Fach
+            grades: doc_token -> Note Mapping
+            exam_weight: Gewichtung der Klausur (Standard: 1.0)
+        """
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.post(
+                    f"{self.school_service_url}/api/classes/{class_id}/grades/aggregate",
+                    json={
+                        "subject": subject,
+                        "grades": grades,
+                        "weight": exam_weight,
+                        "type": "klausur"
+                    }
+                )
+
+                if response.status_code in (200, 201):
+                    return LinkResult(
+                        success=True,
+                        message="Zeugnis-Daten aktualisiert",
+                        target_url=f"/app?module=school&class={class_id}&tab=certificates"
+                    )
+
+                return LinkResult(
+                    success=False,
+                    message=f"Fehler: {response.status_code}"
+                )
+
+        except Exception as e:
+            return LinkResult(
+                success=False,
+                message=f"Verbindungsfehler: {str(e)}"
+            )
+
+    # =========================================================================
+    # KALENDER INTEGRATION
+    # =========================================================================
+
+    async def create_calendar_events(
+        self,
+        teacher_id: str,
+        suggestions: List[ParentMeetingSuggestion],
+        default_duration_minutes: int = 30
+    ) -> List[CalendarEvent]:
+        """
+        Erstellt Kalender-Eintraege fuer Elterngespraeche.
+
+        Args:
+            teacher_id: ID des Lehrers
+            suggestions: Liste der Gespraechs-Vorschlaege
+            default_duration_minutes: Standard-Dauer pro Gespraech
+        """
+        events = []
+
+        # Zeitslots generieren (ab naechster Woche, nachmittags)
+        start_date = datetime.now() + timedelta(days=7 - datetime.now().weekday())
+        start_date = start_date.replace(hour=14, minute=0, second=0, microsecond=0)
+
+        slot_index = 0
+        for suggestion in suggestions:
+            # Zeitslot berechnen
+            event_start = start_date + timedelta(minutes=slot_index * default_duration_minutes)
+            event_end = event_start + timedelta(minutes=default_duration_minutes)
+
+            # Naechster Tag wenn nach 18 Uhr
+            if event_start.hour >= 18:
+                start_date += timedelta(days=1)
+                start_date = start_date.replace(hour=14)
+                slot_index = 0
+                event_start = start_date
+                event_end = event_start + timedelta(minutes=default_duration_minutes)
+
+            event = CalendarEvent(
+                id=f"meeting-{suggestion.doc_token[:8]}",
+                title=f"Elterngespraech ({suggestion.grade})",
+                description=f"Anlass: {suggestion.reason}\n\nThemen:\n" +
+                           "\n".join(f"- {t}" for t in suggestion.suggested_topics),
+                start_time=event_start,
+                end_time=event_end,
+                event_type="parent_meeting",
+                linked_doc_tokens=[suggestion.doc_token]
+            )
+            events.append(event)
+            slot_index += 1
+
+        # An Kalender-Service senden
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                for event in events:
+                    await client.post(
+                        f"{self.calendar_service_url}/api/events",
+                        json={
+                            "teacher_id": teacher_id,
+                            "title": event.title,
+                            "description": event.description,
+                            "start": event.start_time.isoformat(),
+                            "end": event.end_time.isoformat(),
+                            "type": event.event_type,
+                            "metadata": {
+                                "doc_tokens": event.linked_doc_tokens
+                            }
+                        }
+                    )
+        except Exception as e:
+            print(f"[ModuleLinker] Calendar service error: {e}")
+
+        return events
+
+    # =========================================================================
+    # STATISTIKEN
+    # =========================================================================
+
+    def calculate_grade_statistics(
+        self,
+        results: List[CorrectionResult]
+    ) -> Dict[str, Any]:
+        """
+        Berechnet Notenstatistiken.
+
+        Returns:
+            Dict mit Durchschnitt, Verteilung, Median, etc.
+        """
+        if not results:
+            return {}
+
+        # Notenwerte (fuer Durchschnitt)
+        grade_values = {
+            "1+": 0.7, "1": 1.0, "1-": 1.3,
+            "2+": 1.7, "2": 2.0, "2-": 2.3,
+            "3+": 2.7, "3": 3.0, "3-": 3.3,
+            "4+": 3.7, "4": 4.0, "4-": 4.3,
+            "5+": 4.7, "5": 5.0, "5-": 5.3,
+            "6": 6.0
+        }
+
+        # Noten sammeln
+        grades = [r.grade for r in results]
+        points = [r.score for r in results]
+        max_points = results[0].max_score if results else 100
+
+        # Durchschnitt berechnen
+        numeric_grades = [grade_values.get(g, 4.0) for g in grades]
+        avg_grade = sum(numeric_grades) / len(numeric_grades)
+
+        # Notenverteilung
+        distribution = {}
+        for grade in grades:
+            distribution[grade] = distribution.get(grade, 0) + 1
+
+        # Prozent-Verteilung
+        percent_distribution = {
+            "sehr gut (1)": sum(1 for g in grades if g.startswith("1")),
+            "gut (2)": sum(1 for g in grades if g.startswith("2")),
+            "befriedigend (3)": sum(1 for g in grades if g.startswith("3")),
+            "ausreichend (4)": sum(1 for g in grades if g.startswith("4")),
+            "mangelhaft (5)": sum(1 for g in grades if g.startswith("5")),
+            "ungenuegend (6)": sum(1 for g in grades if g == "6")
+        }
+
+        return {
+            "count": len(results),
+            "average_grade": round(avg_grade, 2),
+            "average_grade_display": self._numeric_to_grade(avg_grade),
+            "average_points": round(sum(points) / len(points), 1),
+            "max_points": max_points,
+            "average_percent": round((sum(points) / len(points) / max_points) * 100, 1),
+            "best_grade": min(grades, key=lambda g: grade_values.get(g, 6)),
+            "worst_grade": max(grades, key=lambda g: grade_values.get(g, 0)),
+            "median_grade": self._calculate_median_grade(grades),
+            "distribution": distribution,
+            "percent_distribution": percent_distribution,
+            "passing_count": sum(1 for g in grades if not g.startswith("5") and g != "6"),
+            "failing_count": sum(1 for g in grades if g.startswith("5") or g == "6")
+        }
+
+    def _numeric_to_grade(self, value: float) -> str:
+        """Konvertiert Notenwert zu Note."""
+        if value <= 1.15:
+            return "1+"
+        elif value <= 1.5:
+            return "1"
+        elif value <= 1.85:
+            return "1-"
+        elif value <= 2.15:
+            return "2+"
+        elif value <= 2.5:
+            return "2"
+        elif value <= 2.85:
+            return "2-"
+        elif value <= 3.15:
+            return "3+"
+        elif value <= 3.5:
+            return "3"
+        elif value <= 3.85:
+            return "3-"
+        elif value <= 4.15:
+            return "4+"
+        elif value <= 4.5:
+            return "4"
+        elif value <= 4.85:
+            return "4-"
+        elif value <= 5.15:
+            return "5+"
+        elif value <= 5.5:
+            return "5"
+        elif value <= 5.85:
+            return "5-"
+        else:
+            return "6"
+
+    def _calculate_median_grade(self, grades: List[str]) -> str:
+        """Berechnet die Median-Note."""
+        grade_values = {
+            "1+": 0.7, "1": 1.0, "1-": 1.3,
+            "2+": 1.7, "2": 2.0, "2-": 2.3,
+            "3+": 2.7, "3": 3.0, "3-": 3.3,
+            "4+": 3.7, "4": 4.0, "4-": 4.3,
+            "5+": 4.7, "5": 5.0, "5-": 5.3,
+            "6": 6.0
+        }
+
+        numeric = sorted([grade_values.get(g, 4.0) for g in grades])
+        n = len(numeric)
+        if n % 2 == 0:
+            median = (numeric[n // 2 - 1] + numeric[n // 2]) / 2
+        else:
+            median = numeric[n // 2]
+
+        return self._numeric_to_grade(median)
+
+
+# Singleton
+_module_linker: Optional[ModuleLinker] = None
+
+
+def get_module_linker() -> ModuleLinker:
+    """Gibt die Singleton-Instanz des ModuleLinkers zurueck."""
+    global _module_linker
+    if _module_linker is None:
+        _module_linker = ModuleLinker()
+    return _module_linker
--- a/backend/klausur/services/processing_service.py
+++ b/backend/klausur/services/processing_service.py
@@ -0,0 +1,424 @@
+"""
+Background Processing Service for Klausur Correction.
+
+Orchestrates the complete correction pipeline:
+1. Load documents from storage
+2. Run TrOCR for text extraction
+3. Run AI correction for grading
+4. Save results to database
+
+PRIVACY BY DESIGN:
+- Only pseudonymized doc_tokens used throughout
+- No student names in processing pipeline
+- All data stays on self-hosted infrastructure
+"""
+import asyncio
+import logging
+from datetime import datetime
+from typing import Optional, List, Callable
+from dataclasses import dataclass
+
+from sqlalchemy.orm import Session
+
+from ..db_models import (
+    ExamSession, PseudonymizedDocument,
+    SessionStatus, DocumentStatus
+)
+from ..repository import KlausurRepository
+from .trocr_client import get_trocr_client, TrOCRClient
+from .vision_ocr_service import get_vision_ocr_service, VisionOCRService
+from .correction_service import (
+    get_correction_service, ExamCorrectionService,
+    QuestionRubric, CorrectionResult
+)
+from .storage_service import get_storage_service, KlausurStorageService
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ProcessingProgress:
+    """Progress update for SSE streaming."""
+    session_id: str
+    total_documents: int
+    processed_documents: int
+    current_document: Optional[str] = None
+    current_step: str = "idle"  # ocr, correction, saving
+    error: Optional[str] = None
+
+    @property
+    def percentage(self) -> int:
+        if self.total_documents == 0:
+            return 0
+        return int(self.processed_documents / self.total_documents * 100)
+
+
+class ProcessingService:
+    """
+    Background service for exam correction processing.
+
+    Usage:
+        service = ProcessingService(db_session)
+        await service.process_session(session_id, teacher_id)
+    """
+
+    def __init__(
+        self,
+        db: Session,
+        trocr_client: Optional[TrOCRClient] = None,
+        vision_ocr_service: Optional[VisionOCRService] = None,
+        correction_service: Optional[ExamCorrectionService] = None,
+        storage_service: Optional[KlausurStorageService] = None,
+        prefer_vision_ocr: bool = True  # Vision-LLM als Primär für Handschrift
+    ):
+        self.db = db
+        self.repo = KlausurRepository(db)
+        self.trocr = trocr_client or get_trocr_client()
+        self.vision_ocr = vision_ocr_service or get_vision_ocr_service()
+        self.correction = correction_service or get_correction_service()
+        self.storage = storage_service or get_storage_service()
+        self.prefer_vision_ocr = prefer_vision_ocr
+
+        # Progress callback for SSE streaming
+        self._progress_callback: Optional[Callable[[ProcessingProgress], None]] = None
+
+    def set_progress_callback(self, callback: Callable[[ProcessingProgress], None]):
+        """Set callback for progress updates (SSE streaming)."""
+        self._progress_callback = callback
+
+    def _notify_progress(self, progress: ProcessingProgress):
+        """Notify progress to callback if set."""
+        if self._progress_callback:
+            try:
+                self._progress_callback(progress)
+            except Exception as e:
+                logger.warning(f"Progress callback failed: {e}")
+
+    async def process_session(
+        self,
+        session_id: str,
+        teacher_id: str,
+        use_ai_correction: bool = True
+    ) -> bool:
+        """
+        Process all documents in a session.
+
+        Args:
+            session_id: Exam session ID
+            teacher_id: Teacher ID for isolation
+            use_ai_correction: Whether to run AI correction (requires LLM)
+
+        Returns:
+            True if processing completed successfully
+        """
+        # Get session
+        session = self.repo.get_session(session_id, teacher_id)
+        if not session:
+            logger.error(f"Session not found: {session_id}")
+            return False
+
+        # Get documents
+        documents = self.repo.list_documents(session_id, teacher_id)
+        if not documents:
+            logger.warning(f"No documents in session: {session_id}")
+            return False
+
+        total = len(documents)
+        processed = 0
+
+        logger.info(f"Starting processing for session {session_id}: {total} documents")
+
+        # Check OCR service availability (Vision-LLM preferred for handwriting)
+        vision_ocr_available = await self.vision_ocr.is_available()
+        trocr_available = await self.trocr.is_available()
+
+        if vision_ocr_available and self.prefer_vision_ocr:
+            logger.info("Using Vision-LLM (llama3.2-vision) for OCR - optimal for handwriting")
+            use_vision_ocr = True
+        elif trocr_available:
+            logger.info("Using TrOCR for OCR")
+            use_vision_ocr = False
+        elif vision_ocr_available:
+            logger.info("TrOCR not available, falling back to Vision-LLM")
+            use_vision_ocr = True
+        else:
+            logger.warning("No OCR service available - OCR will be skipped")
+            use_vision_ocr = False
+            trocr_available = False
+
+        # Process each document
+        for doc in documents:
+            progress = ProcessingProgress(
+                session_id=session_id,
+                total_documents=total,
+                processed_documents=processed,
+                current_document=doc.doc_token[:8],
+                current_step="ocr"
+            )
+            self._notify_progress(progress)
+
+            try:
+                # Step 1: OCR extraction (Vision-LLM or TrOCR)
+                if (vision_ocr_available or trocr_available) and doc.status == DocumentStatus.UPLOADED:
+                    await self._process_ocr(session_id, doc, teacher_id, use_vision_ocr=use_vision_ocr)
+
+                # Step 2: AI correction
+                progress.current_step = "correction"
+                self._notify_progress(progress)
+
+                if use_ai_correction and doc.ocr_text:
+                    await self._process_correction(session, doc, teacher_id)
+                else:
+                    # Just mark as completed without AI
+                    self._mark_document_completed(doc, teacher_id)
+
+                processed += 1
+
+            except Exception as e:
+                logger.error(f"Failed to process document {doc.doc_token}: {e}")
+                self._mark_document_failed(doc, str(e), teacher_id)
+
+        # Update session status
+        self.repo.update_session_status(session_id, teacher_id, SessionStatus.COMPLETED)
+
+        # Final progress
+        progress = ProcessingProgress(
+            session_id=session_id,
+            total_documents=total,
+            processed_documents=processed,
+            current_step="complete"
+        )
+        self._notify_progress(progress)
+
+        logger.info(f"Completed processing session {session_id}: {processed}/{total} documents")
+        return True
+
+    async def _process_ocr(
+        self,
+        session_id: str,
+        doc: PseudonymizedDocument,
+        teacher_id: str,
+        use_vision_ocr: bool = True
+    ):
+        """
+        Run OCR on a document.
+
+        Args:
+            session_id: Session ID
+            doc: Document to process
+            teacher_id: Teacher ID
+            use_vision_ocr: True to use Vision-LLM (llama3.2-vision), False for TrOCR
+        """
+        # Update status
+        doc.status = DocumentStatus.OCR_PROCESSING
+        doc.processing_started_at = datetime.utcnow()
+        self.db.commit()
+
+        # Try to get document from storage (check both redacted and original)
+        image_data = None
+        for is_redacted in [True, False]:  # Prefer redacted version
+            for ext in ["png", "jpg", "jpeg", "pdf"]:
+                image_data = self.storage.get_document(
+                    session_id, doc.doc_token, ext, is_redacted=is_redacted
+                )
+                if image_data:
+                    logger.debug(f"Found document: {doc.doc_token[:8]}.{ext} (redacted={is_redacted})")
+                    break
+            if image_data:
+                break
+
+        if not image_data:
+            logger.warning(f"No image found for document {doc.doc_token}")
+            # Use placeholder OCR text for testing
+            doc.ocr_text = "[Kein Bild gefunden - Manuelle Eingabe erforderlich]"
+            doc.ocr_confidence = 0
+            doc.status = DocumentStatus.OCR_COMPLETED
+            self.db.commit()
+            return
+
+        # Call OCR service (Vision-LLM or TrOCR)
+        try:
+            if use_vision_ocr:
+                # Use Vision-LLM (llama3.2-vision) - better for handwriting
+                result = await self.vision_ocr.extract_text(
+                    image_data,
+                    filename=f"{doc.doc_token}.png",
+                    is_handwriting=True  # Assume handwriting for exams
+                )
+                ocr_method = "Vision-LLM"
+            else:
+                # Use TrOCR
+                result = await self.trocr.extract_text(
+                    image_data,
+                    filename=f"{doc.doc_token}.png",
+                    detect_lines=True
+                )
+                ocr_method = "TrOCR"
+
+            doc.ocr_text = result.text
+            doc.ocr_confidence = int(result.confidence * 100)
+            doc.status = DocumentStatus.OCR_COMPLETED
+
+            logger.info(
+                f"OCR completed ({ocr_method}) for {doc.doc_token[:8]}: "
+                f"{len(result.text)} chars, {result.confidence:.0%} confidence"
+            )
+
+        except Exception as e:
+            logger.error(f"OCR failed for {doc.doc_token}: {e}")
+            doc.ocr_text = f"[OCR Fehler: {str(e)[:100]}]"
+            doc.ocr_confidence = 0
+            doc.status = DocumentStatus.OCR_COMPLETED  # Continue to AI anyway
+
+        self.db.commit()
+
+    async def _process_correction(
+        self,
+        session: ExamSession,
+        doc: PseudonymizedDocument,
+        teacher_id: str
+    ):
+        """Run AI correction on a document."""
+        doc.status = DocumentStatus.AI_PROCESSING
+        self.db.commit()
+
+        # Build rubrics from session questions
+        rubrics = self._build_rubrics(session)
+
+        if not rubrics:
+            # No rubrics defined - use simple scoring
+            doc.ai_feedback = "Keine Bewertungskriterien definiert. Manuelle Korrektur empfohlen."
+            doc.ai_score = None
+            doc.ai_grade = None
+            doc.status = DocumentStatus.COMPLETED
+            doc.processing_completed_at = datetime.utcnow()
+            self.db.commit()
+
+            # Update session stats
+            session.processed_count += 1
+            self.db.commit()
+            return
+
+        try:
+            # Run AI correction
+            result = await self.correction.correct_exam(
+                doc_token=doc.doc_token,
+                ocr_text=doc.ocr_text,
+                rubrics=rubrics,
+                subject=session.subject or "Allgemein"
+            )
+
+            # Save results
+            doc.ai_feedback = result.overall_feedback
+            doc.ai_score = result.total_score
+            doc.ai_grade = result.grade
+            doc.ai_details = {
+                "max_score": result.max_score,
+                "processing_time_ms": result.processing_time_ms,
+                "questions": [
+                    {
+                        "number": q.question_number,
+                        "points": q.points_awarded,
+                        "max_points": q.max_points,
+                        "feedback": q.feedback,
+                        "strengths": q.strengths,
+                        "improvements": q.improvements
+                    }
+                    for q in result.question_results
+                ]
+            }
+            doc.status = DocumentStatus.COMPLETED
+            doc.processing_completed_at = datetime.utcnow()
+
+            logger.info(
+                f"Correction completed for {doc.doc_token[:8]}: "
+                f"{result.total_score}/{result.max_score} ({result.grade})"
+            )
+
+        except Exception as e:
+            logger.error(f"AI correction failed for {doc.doc_token}: {e}")
+            doc.ai_feedback = f"KI-Korrektur fehlgeschlagen: {str(e)[:200]}"
+            doc.status = DocumentStatus.COMPLETED  # Mark complete anyway
+            doc.processing_completed_at = datetime.utcnow()
+
+        # Update session stats
+        session.processed_count += 1
+        self.db.commit()
+
+    def _build_rubrics(self, session: ExamSession) -> List[QuestionRubric]:
+        """Build QuestionRubric list from session questions."""
+        rubrics = []
+
+        if not session.questions:
+            return rubrics
+
+        for i, q in enumerate(session.questions):
+            rubric = QuestionRubric(
+                question_number=q.get("number", i + 1),
+                question_text=q.get("text", f"Frage {i + 1}"),
+                max_points=q.get("points", 10),
+                expected_answer=q.get("expected_answer", ""),
+                grading_criteria=q.get("rubric", session.rubric or "")
+            )
+            rubrics.append(rubric)
+
+        return rubrics
+
+    def _mark_document_completed(
+        self,
+        doc: PseudonymizedDocument,
+        teacher_id: str
+    ):
+        """Mark document as completed without AI correction."""
+        doc.status = DocumentStatus.COMPLETED
+        doc.processing_completed_at = datetime.utcnow()
+        if not doc.ai_feedback:
+            doc.ai_feedback = "Verarbeitung abgeschlossen (ohne KI-Korrektur)"
+        self.db.commit()
+
+        # Update session stats
+        if doc.session:
+            doc.session.processed_count += 1
+            self.db.commit()
+
+    def _mark_document_failed(
+        self,
+        doc: PseudonymizedDocument,
+        error: str,
+        teacher_id: str
+    ):
+        """Mark document as failed."""
+        doc.status = DocumentStatus.FAILED
+        doc.processing_error = error[:500]
+        doc.processing_completed_at = datetime.utcnow()
+        self.db.commit()
+
+
+# Background task function for FastAPI
+async def process_session_background(
+    session_id: str,
+    teacher_id: str,
+    db_url: str
+):
+    """
+    Background task for session processing.
+
+    This function creates its own DB session for use in background tasks.
+    """
+    from ..database import SessionLocal
+
+    db = SessionLocal()
+    try:
+        service = ProcessingService(db)
+        await service.process_session(session_id, teacher_id)
+    finally:
+        db.close()
+
+
+# Singleton for main service
+_processing_service: Optional[ProcessingService] = None
+
+
+def get_processing_service(db: Session) -> ProcessingService:
+    """Get processing service instance."""
+    return ProcessingService(db)
--- a/backend/klausur/services/pseudonymizer.py
+++ b/backend/klausur/services/pseudonymizer.py
@@ -0,0 +1,376 @@
+"""
+Pseudonymization Service for Klausurkorrektur.
+
+Implements privacy-by-design principles:
+- QR code generation with random doc_tokens
+- Header redaction to remove personal data before OCR
+- No student identity data leaves the teacher's device
+
+DSGVO Art. 4 Nr. 5 Compliance:
+The doc_token is a 128-bit random UUID that cannot be used to
+identify a student without the encrypted identity map.
+"""
+import uuid
+import io
+import logging
+from typing import List, Tuple, Optional
+from dataclasses import dataclass
+from PIL import Image, ImageDraw, ImageFont
+
+logger = logging.getLogger(__name__)
+
+# Optional imports (graceful fallback if not installed)
+try:
+    import qrcode
+    HAS_QRCODE = True
+except ImportError:
+    HAS_QRCODE = False
+    logger.warning("qrcode not installed - QR generation disabled")
+
+try:
+    import cv2
+    import numpy as np
+    HAS_CV2 = True
+except ImportError:
+    HAS_CV2 = False
+    logger.warning("opencv-python not installed - image processing disabled")
+
+try:
+    from pyzbar.pyzbar import decode as pyzbar_decode
+    HAS_PYZBAR = True
+except ImportError:
+    HAS_PYZBAR = False
+    logger.warning("pyzbar not installed - QR reading disabled")
+
+
+@dataclass
+class RedactionResult:
+    """Result of header redaction."""
+    redacted_image: bytes
+    original_height: int
+    redacted_height: int
+    redaction_applied: bool
+
+
+@dataclass
+class QRDetectionResult:
+    """Result of QR code detection."""
+    doc_token: Optional[str]
+    confidence: float
+    bbox: Optional[Tuple[int, int, int, int]]  # x, y, width, height
+
+
+class PseudonymizationService:
+    """
+    Service for document pseudonymization.
+
+    PRIVACY GUARANTEES:
+    1. doc_tokens are cryptographically random (UUID4)
+    2. No deterministic relationship between token and student
+    3. Header redaction removes visible personal data
+    4. Identity mapping is encrypted client-side
+    """
+
+    # Default header height to redact (in pixels, assuming 300 DPI scan)
+    DEFAULT_HEADER_HEIGHT = 300  # ~1 inch / 2.5cm
+
+    @staticmethod
+    def generate_doc_token() -> str:
+        """
+        Generate a cryptographically random document token.
+
+        Uses UUID4 which provides 122 bits of randomness.
+        This ensures no correlation between tokens is possible.
+        """
+        return str(uuid.uuid4())
+
+    @staticmethod
+    def generate_batch_tokens(count: int) -> List[str]:
+        """Generate multiple unique doc_tokens."""
+        return [PseudonymizationService.generate_doc_token() for _ in range(count)]
+
+    def generate_qr_code(
+        self,
+        doc_token: str,
+        size: int = 200,
+        border: int = 2
+    ) -> bytes:
+        """
+        Generate a QR code image for a doc_token.
+
+        Args:
+            doc_token: The pseudonymization token
+            size: Size of the QR code in pixels
+            border: Border size in QR modules
+
+        Returns:
+            PNG image as bytes
+        """
+        if not HAS_QRCODE:
+            raise RuntimeError("qrcode library not installed")
+
+        qr = qrcode.QRCode(
+            version=1,
+            error_correction=qrcode.constants.ERROR_CORRECT_M,
+            box_size=10,
+            border=border,
+        )
+        qr.add_data(doc_token)
+        qr.make(fit=True)
+
+        img = qr.make_image(fill_color="black", back_color="white")
+        img = img.resize((size, size), Image.Resampling.LANCZOS)
+
+        buffer = io.BytesIO()
+        img.save(buffer, format="PNG")
+        return buffer.getvalue()
+
+    def generate_qr_sheet(
+        self,
+        doc_tokens: List[str],
+        page_size: Tuple[int, int] = (2480, 3508),  # A4 at 300 DPI
+        qr_size: int = 200,
+        margin: int = 100,
+        labels: Optional[List[str]] = None
+    ) -> bytes:
+        """
+        Generate a printable sheet of QR codes.
+
+        Args:
+            doc_tokens: List of tokens to generate QR codes for
+            page_size: Page dimensions (width, height) in pixels
+            qr_size: Size of each QR code
+            margin: Page margin
+            labels: Optional labels (e.g., "Nr. 1", "Nr. 2") - NO student names!
+
+        Returns:
+            PNG image of the full sheet
+        """
+        if not HAS_QRCODE:
+            raise RuntimeError("qrcode library not installed")
+
+        width, height = page_size
+        img = Image.new('RGB', (width, height), 'white')
+        draw = ImageDraw.Draw(img)
+
+        # Calculate grid
+        usable_width = width - 2 * margin
+        usable_height = height - 2 * margin
+        cell_width = qr_size + 50
+        cell_height = qr_size + 80  # Extra space for label
+
+        cols = usable_width // cell_width
+        rows = usable_height // cell_height
+
+        # Try to load a font (fallback to default)
+        try:
+            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 16)
+        except (IOError, OSError):
+            font = ImageFont.load_default()
+
+        # Generate QR codes
+        for i, token in enumerate(doc_tokens):
+            if i >= cols * rows:
+                logger.warning(f"Sheet full, skipping {len(doc_tokens) - i} tokens")
+                break
+
+            row = i // cols
+            col = i % cols
+
+            x = margin + col * cell_width
+            y = margin + row * cell_height
+
+            # Generate QR code
+            qr_bytes = self.generate_qr_code(token, qr_size)
+            qr_img = Image.open(io.BytesIO(qr_bytes))
+            img.paste(qr_img, (x, y))
+
+            # Add label (number only, NO names)
+            label = labels[i] if labels and i < len(labels) else f"Nr. {i + 1}"
+            draw.text((x, y + qr_size + 5), label, fill="black", font=font)
+
+            # Add truncated token for verification
+            token_short = token[:8] + "..."
+            draw.text((x, y + qr_size + 25), token_short, fill="gray", font=font)
+
+        buffer = io.BytesIO()
+        img.save(buffer, format="PNG")
+        return buffer.getvalue()
+
+    def detect_qr_code(self, image_bytes: bytes) -> QRDetectionResult:
+        """
+        Detect and decode QR code from an image.
+
+        Args:
+            image_bytes: Image data (PNG, JPEG, etc.)
+
+        Returns:
+            QRDetectionResult with doc_token if found
+        """
+        if not HAS_PYZBAR:
+            return QRDetectionResult(
+                doc_token=None,
+                confidence=0.0,
+                bbox=None
+            )
+
+        try:
+            img = Image.open(io.BytesIO(image_bytes))
+
+            # Decode QR codes
+            decoded = pyzbar_decode(img)
+
+            for obj in decoded:
+                if obj.type == 'QRCODE':
+                    token = obj.data.decode('utf-8')
+                    # Validate it looks like a UUID
+                    try:
+                        uuid.UUID(token)
+                        rect = obj.rect
+                        return QRDetectionResult(
+                            doc_token=token,
+                            confidence=1.0,
+                            bbox=(rect.left, rect.top, rect.width, rect.height)
+                        )
+                    except ValueError:
+                        continue
+
+            return QRDetectionResult(doc_token=None, confidence=0.0, bbox=None)
+
+        except Exception as e:
+            logger.error(f"QR detection failed: {e}")
+            return QRDetectionResult(doc_token=None, confidence=0.0, bbox=None)
+
+    def redact_header(
+        self,
+        image_bytes: bytes,
+        header_height: Optional[int] = None,
+        fill_color: Tuple[int, int, int] = (255, 255, 255)
+    ) -> RedactionResult:
+        """
+        Redact the header area of a scanned exam page.
+
+        This removes the area where student name/class/date typically appears.
+        The redaction is permanent - no original data is preserved.
+
+        Args:
+            image_bytes: Original scanned image
+            header_height: Height in pixels to redact (None = auto-detect)
+            fill_color: RGB color to fill redacted area (default: white)
+
+        Returns:
+            RedactionResult with redacted image
+        """
+        try:
+            img = Image.open(io.BytesIO(image_bytes))
+            width, height = img.size
+
+            # Determine header height
+            redact_height = header_height or self.DEFAULT_HEADER_HEIGHT
+
+            # Create a copy and redact header
+            redacted = img.copy()
+            draw = ImageDraw.Draw(redacted)
+            draw.rectangle([(0, 0), (width, redact_height)], fill=fill_color)
+
+            # Save result
+            buffer = io.BytesIO()
+            redacted.save(buffer, format="PNG")
+
+            return RedactionResult(
+                redacted_image=buffer.getvalue(),
+                original_height=height,
+                redacted_height=redact_height,
+                redaction_applied=True
+            )
+
+        except Exception as e:
+            logger.error(f"Header redaction failed: {e}")
+            return RedactionResult(
+                redacted_image=image_bytes,
+                original_height=0,
+                redacted_height=0,
+                redaction_applied=False
+            )
+
+    def smart_redact_header(
+        self,
+        image_bytes: bytes,
+        preserve_qr: bool = True
+    ) -> RedactionResult:
+        """
+        Smart header redaction that detects text regions.
+
+        Uses OCR confidence to identify and redact only the header
+        area containing personal data.
+
+        Args:
+            image_bytes: Original scanned image
+            preserve_qr: If True, don't redact QR code areas
+
+        Returns:
+            RedactionResult with intelligently redacted image
+        """
+        if not HAS_CV2:
+            # Fallback to simple redaction
+            return self.redact_header(image_bytes)
+
+        try:
+            # Convert to OpenCV format
+            nparr = np.frombuffer(image_bytes, np.uint8)
+            img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+            height, width = img.shape[:2]
+
+            # Detect QR code position if present
+            qr_result = self.detect_qr_code(image_bytes)
+
+            # Calculate redaction area (top portion of page)
+            # Typically header is in top 10-15% of page
+            header_height = int(height * 0.12)
+
+            # If QR code is in header area, adjust redaction
+            if preserve_qr and qr_result.bbox:
+                qr_x, qr_y, qr_w, qr_h = qr_result.bbox
+                if qr_y < header_height:
+                    # QR is in header - redact around it
+                    # Create mask
+                    mask = np.ones((header_height, width), dtype=np.uint8) * 255
+
+                    # Leave QR area unredacted
+                    mask[max(0, qr_y):min(header_height, qr_y + qr_h),
+                         max(0, qr_x):min(width, qr_x + qr_w)] = 0
+
+                    # Apply white fill where mask is 255
+                    img[:header_height][mask == 255] = [255, 255, 255]
+                else:
+                    # QR not in header - simple redaction
+                    img[:header_height] = [255, 255, 255]
+            else:
+                # Simple header redaction
+                img[:header_height] = [255, 255, 255]
+
+            # Encode result
+            _, buffer = cv2.imencode('.png', img)
+
+            return RedactionResult(
+                redacted_image=buffer.tobytes(),
+                original_height=height,
+                redacted_height=header_height,
+                redaction_applied=True
+            )
+
+        except Exception as e:
+            logger.error(f"Smart redaction failed: {e}")
+            return self.redact_header(image_bytes)
+
+
+# Singleton instance
+_pseudonymizer: Optional[PseudonymizationService] = None
+
+
+def get_pseudonymizer() -> PseudonymizationService:
+    """Get or create the pseudonymization service singleton."""
+    global _pseudonymizer
+    if _pseudonymizer is None:
+        _pseudonymizer = PseudonymizationService()
+    return _pseudonymizer
--- a/backend/klausur/services/roster_parser.py
+++ b/backend/klausur/services/roster_parser.py
@@ -0,0 +1,502 @@
+"""
+Roster Parser Service - Klassenbuch und Schuelerlisten parsen.
+
+Unterstuetzt:
+- Klassenbuch-Fotos (OCR mit PaddleOCR)
+- PDF-Schuelerlisten (SchILD, ASV, etc.)
+- CSV-Dateien
+- Manuelle Eingabe
+
+Privacy-First:
+- Alle Verarbeitung serverseitig (kein externer Upload)
+- Daten bleiben im Lehrer-Namespace
+"""
+
+import re
+import csv
+import io
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Tuple
+from difflib import SequenceMatcher
+
+# Optionale Imports
+try:
+    from services.file_processor import get_file_processor, ProcessingResult
+    HAS_OCR = True
+except ImportError:
+    HAS_OCR = False
+
+try:
+    import fitz  # PyMuPDF
+    HAS_PDF = True
+except ImportError:
+    HAS_PDF = False
+
+
+@dataclass
+class RosterEntry:
+    """Eintrag in einer Schuelerliste."""
+    first_name: str
+    last_name: str
+    student_number: Optional[str] = None
+    parent_email: Optional[str] = None
+    parent_phone: Optional[str] = None
+    birth_date: Optional[str] = None
+    additional_data: Dict[str, str] = field(default_factory=dict)
+
+
+@dataclass
+class ParsedRoster:
+    """Ergebnis des Roster-Parsings."""
+    entries: List[RosterEntry]
+    source_type: str  # klassenbuch, pdf, csv
+    confidence: float
+    warnings: List[str] = field(default_factory=list)
+    raw_text: Optional[str] = None
+
+
+@dataclass
+class NameMatch:
+    """Ergebnis eines Name-Matchings."""
+    detected_name: str
+    matched_entry: Optional[RosterEntry]
+    confidence: float
+    match_type: str  # exact, first_name, fuzzy, none
+
+
+class RosterParser:
+    """
+    Parst Klassenlisten aus verschiedenen Quellen.
+
+    Beispiel:
+        parser = RosterParser()
+
+        # Klassenbuch-Foto
+        roster = parser.parse_klassenbuch_image(image_bytes)
+
+        # PDF-Liste
+        roster = parser.parse_pdf_roster(pdf_bytes)
+
+        # Namen matchen
+        matches = parser.match_first_names(
+            detected=["Max", "Anna", "Tim"],
+            roster=roster.entries
+        )
+    """
+
+    # Regex-Patterns fuer Kontaktdaten
+    EMAIL_PATTERN = re.compile(r'[\w.+-]+@[\w-]+\.[\w.-]+')
+    PHONE_PATTERN = re.compile(r'(?:\+49|0)[\s.-]?\d{2,4}[\s.-]?\d{3,}[\s.-]?\d{2,}')
+    DATE_PATTERN = re.compile(r'\b(\d{1,2})\.(\d{1,2})\.(\d{2,4})\b')
+
+    # Deutsche Vornamen (Auszug fuer Validierung)
+    COMMON_FIRST_NAMES = {
+        'max', 'anna', 'tim', 'lena', 'paul', 'marie', 'felix', 'emma',
+        'leon', 'sophia', 'lukas', 'mia', 'jonas', 'hannah', 'elias', 'emilia',
+        'ben', 'lea', 'noah', 'lina', 'finn', 'amelie', 'luis', 'laura',
+        'moritz', 'clara', 'henry', 'julia', 'julian', 'emily', 'david', 'johanna',
+        'niklas', 'charlotte', 'simon', 'maja', 'alexander', 'sarah', 'jan', 'lisa',
+        'tom', 'nele', 'luca', 'sophie', 'erik', 'alina', 'fabian', 'paula',
+        'philipp', 'luisa', 'tobias', 'melina', 'vincent', 'lara', 'maximilian', 'elena'
+    }
+
+    def __init__(self):
+        self.file_processor = get_file_processor() if HAS_OCR else None
+
+    # =========================================================================
+    # KLASSENBUCH-FOTO PARSING
+    # =========================================================================
+
+    def parse_klassenbuch_image(self, image_bytes: bytes) -> ParsedRoster:
+        """
+        Parst ein Klassenbuch-Foto via OCR.
+
+        Args:
+            image_bytes: Bild als Bytes (PNG, JPG)
+
+        Returns:
+            ParsedRoster mit extrahierten Schuelerdaten
+        """
+        if not HAS_OCR or not self.file_processor:
+            return ParsedRoster(
+                entries=[],
+                source_type='klassenbuch',
+                confidence=0.0,
+                warnings=['OCR nicht verfuegbar (PaddleOCR nicht installiert)']
+            )
+
+        # OCR ausfuehren
+        result: ProcessingResult = self.file_processor.process_file(
+            image_bytes,
+            filename='klassenbuch.png',
+            processing_mode='ocr_handwriting'
+        )
+
+        # Text in Zeilen aufteilen
+        lines = result.text.split('\n')
+        entries = []
+        warnings = []
+
+        for line in lines:
+            line = line.strip()
+            if not line or len(line) < 3:
+                continue
+
+            entry = self._parse_roster_line(line)
+            if entry:
+                entries.append(entry)
+
+        return ParsedRoster(
+            entries=entries,
+            source_type='klassenbuch',
+            confidence=result.confidence,
+            warnings=warnings,
+            raw_text=result.text
+        )
+
+    def _parse_roster_line(self, line: str) -> Optional[RosterEntry]:
+        """Parst eine einzelne Zeile aus dem Klassenbuch."""
+        # Bereinigen
+        line = re.sub(r'\s+', ' ', line).strip()
+
+        # Nummer am Anfang entfernen (z.B. "1. Max Mustermann")
+        line = re.sub(r'^\d+[\.\)\s]+', '', line)
+
+        # Email extrahieren
+        email_match = self.EMAIL_PATTERN.search(line)
+        email = email_match.group() if email_match else None
+        if email:
+            line = line.replace(email, '')
+
+        # Telefon extrahieren
+        phone_match = self.PHONE_PATTERN.search(line)
+        phone = phone_match.group() if phone_match else None
+        if phone:
+            line = line.replace(phone, '')
+
+        # Geburtsdatum extrahieren
+        date_match = self.DATE_PATTERN.search(line)
+        birth_date = date_match.group() if date_match else None
+        if birth_date:
+            line = line.replace(birth_date, '')
+
+        # Namen parsen (Rest der Zeile)
+        line = re.sub(r'\s+', ' ', line).strip()
+        if not line:
+            return None
+
+        first_name, last_name = self._parse_name(line)
+        if not first_name:
+            return None
+
+        return RosterEntry(
+            first_name=first_name,
+            last_name=last_name or '',
+            parent_email=email,
+            parent_phone=phone,
+            birth_date=birth_date
+        )
+
+    def _parse_name(self, text: str) -> Tuple[Optional[str], Optional[str]]:
+        """
+        Parst einen Namen in Vor- und Nachname.
+
+        Formate:
+        - "Max Mustermann"
+        - "Mustermann, Max"
+        - "Max M."
+        - "Max"
+        """
+        text = text.strip()
+        if not text:
+            return None, None
+
+        # Format: "Nachname, Vorname"
+        if ',' in text:
+            parts = text.split(',', 1)
+            last_name = parts[0].strip()
+            first_name = parts[1].strip() if len(parts) > 1 else ''
+            return first_name, last_name
+
+        # Format: "Vorname Nachname" oder "Vorname"
+        parts = text.split()
+        if len(parts) == 1:
+            return parts[0], None
+        elif len(parts) == 2:
+            return parts[0], parts[1]
+        else:
+            # Erster Teil ist Vorname, Rest ist Nachname
+            return parts[0], ' '.join(parts[1:])
+
+    # =========================================================================
+    # PDF ROSTER PARSING
+    # =========================================================================
+
+    def parse_pdf_roster(self, pdf_bytes: bytes) -> ParsedRoster:
+        """
+        Parst eine PDF-Schuelerliste.
+
+        Unterstuetzt gaengige Schulverwaltungs-Exporte:
+        - SchILD-NRW
+        - ASV (Bayern)
+        - Untis
+        - Generic CSV-in-PDF
+        """
+        if not HAS_PDF:
+            return ParsedRoster(
+                entries=[],
+                source_type='pdf',
+                confidence=0.0,
+                warnings=['PDF-Parsing nicht verfuegbar (PyMuPDF nicht installiert)']
+            )
+
+        entries = []
+        warnings = []
+        raw_text = ''
+
+        try:
+            doc = fitz.open(stream=pdf_bytes, filetype='pdf')
+
+            for page in doc:
+                text = page.get_text()
+                raw_text += text + '\n'
+
+                # Tabellen extrahieren
+                tables = page.find_tables()
+                for table in tables:
+                    df = table.to_pandas()
+                    for _, row in df.iterrows():
+                        entry = self._parse_table_row(row.to_dict())
+                        if entry:
+                            entries.append(entry)
+
+                # Falls keine Tabellen: Zeilenweise parsen
+                if not tables:
+                    for line in text.split('\n'):
+                        entry = self._parse_roster_line(line)
+                        if entry:
+                            entries.append(entry)
+
+            doc.close()
+
+        except Exception as e:
+            warnings.append(f'PDF-Parsing Fehler: {str(e)}')
+
+        # Duplikate entfernen
+        entries = self._deduplicate_entries(entries)
+
+        return ParsedRoster(
+            entries=entries,
+            source_type='pdf',
+            confidence=0.9 if entries else 0.0,
+            warnings=warnings,
+            raw_text=raw_text
+        )
+
+    def _parse_table_row(self, row: Dict) -> Optional[RosterEntry]:
+        """Parst eine Tabellenzeile in einen RosterEntry."""
+        # Spalten-Mappings (verschiedene Formate)
+        name_columns = ['name', 'schueler', 'schüler', 'student', 'nachname', 'last_name']
+        first_name_columns = ['vorname', 'first_name', 'firstname']
+        email_columns = ['email', 'e-mail', 'mail', 'eltern_email', 'parent_email']
+        phone_columns = ['telefon', 'phone', 'tel', 'handy', 'mobile', 'eltern_tel']
+
+        first_name = None
+        last_name = None
+        email = None
+        phone = None
+
+        for key, value in row.items():
+            if not value or str(value).strip() == '':
+                continue
+
+            key_lower = str(key).lower()
+            value_str = str(value).strip()
+
+            if any(col in key_lower for col in first_name_columns):
+                first_name = value_str
+            elif any(col in key_lower for col in name_columns):
+                # Kann "Vorname Nachname" oder nur "Nachname" sein
+                if first_name:
+                    last_name = value_str
+                else:
+                    first_name, last_name = self._parse_name(value_str)
+            elif any(col in key_lower for col in email_columns):
+                if self.EMAIL_PATTERN.match(value_str):
+                    email = value_str
+            elif any(col in key_lower for col in phone_columns):
+                phone = value_str
+
+        if not first_name:
+            return None
+
+        return RosterEntry(
+            first_name=first_name,
+            last_name=last_name or '',
+            parent_email=email,
+            parent_phone=phone
+        )
+
+    # =========================================================================
+    # CSV PARSING
+    # =========================================================================
+
+    def parse_csv_roster(self, csv_content: str) -> ParsedRoster:
+        """
+        Parst eine CSV-Schuelerliste.
+
+        Args:
+            csv_content: CSV als String
+
+        Returns:
+            ParsedRoster
+        """
+        entries = []
+        warnings = []
+
+        try:
+            # Delimiter erraten
+            dialect = csv.Sniffer().sniff(csv_content[:1024])
+            reader = csv.DictReader(io.StringIO(csv_content), dialect=dialect)
+
+            for row in reader:
+                entry = self._parse_table_row(row)
+                if entry:
+                    entries.append(entry)
+
+        except csv.Error as e:
+            warnings.append(f'CSV-Parsing Fehler: {str(e)}')
+
+            # Fallback: Zeilenweise parsen
+            for line in csv_content.split('\n'):
+                entry = self._parse_roster_line(line)
+                if entry:
+                    entries.append(entry)
+
+        return ParsedRoster(
+            entries=entries,
+            source_type='csv',
+            confidence=0.95 if entries else 0.0,
+            warnings=warnings,
+            raw_text=csv_content
+        )
+
+    # =========================================================================
+    # NAME MATCHING
+    # =========================================================================
+
+    def match_first_names(
+        self,
+        detected: List[str],
+        roster: List[RosterEntry],
+        threshold: float = 0.7
+    ) -> List[NameMatch]:
+        """
+        Matched erkannte Vornamen zu Roster-Eintraegen.
+
+        Args:
+            detected: Liste erkannter Vornamen (z.B. ["Max", "Anna"])
+            roster: Vollstaendige Schuelerliste
+            threshold: Mindest-Konfidenz fuer Fuzzy-Matching
+
+        Returns:
+            Liste von NameMatch-Objekten
+        """
+        matches = []
+        used_entries = set()
+
+        for name in detected:
+            name_lower = name.lower().strip()
+            best_match = None
+            best_confidence = 0.0
+            match_type = 'none'
+
+            for i, entry in enumerate(roster):
+                if i in used_entries:
+                    continue
+
+                entry_first_lower = entry.first_name.lower().strip()
+
+                # Exakter Match
+                if name_lower == entry_first_lower:
+                    best_match = entry
+                    best_confidence = 1.0
+                    match_type = 'exact'
+                    used_entries.add(i)
+                    break
+
+                # Vorname-Anfang Match (z.B. "Max" matched "Maximilian")
+                if entry_first_lower.startswith(name_lower) or name_lower.startswith(entry_first_lower):
+                    confidence = min(len(name_lower), len(entry_first_lower)) / max(len(name_lower), len(entry_first_lower))
+                    if confidence > best_confidence and confidence >= threshold:
+                        best_match = entry
+                        best_confidence = confidence
+                        match_type = 'first_name'
+
+                # Fuzzy Match
+                ratio = SequenceMatcher(None, name_lower, entry_first_lower).ratio()
+                if ratio > best_confidence and ratio >= threshold:
+                    best_match = entry
+                    best_confidence = ratio
+                    match_type = 'fuzzy'
+
+            if best_match and match_type != 'exact':
+                # Entry als verwendet markieren
+                for i, entry in enumerate(roster):
+                    if entry is best_match:
+                        used_entries.add(i)
+                        break
+
+            matches.append(NameMatch(
+                detected_name=name,
+                matched_entry=best_match,
+                confidence=best_confidence,
+                match_type=match_type
+            ))
+
+        return matches
+
+    # =========================================================================
+    # HELPERS
+    # =========================================================================
+
+    def _deduplicate_entries(self, entries: List[RosterEntry]) -> List[RosterEntry]:
+        """Entfernt Duplikate basierend auf Vor- und Nachname."""
+        seen = set()
+        unique = []
+
+        for entry in entries:
+            key = (entry.first_name.lower(), entry.last_name.lower())
+            if key not in seen:
+                seen.add(key)
+                unique.append(entry)
+
+        return unique
+
+    def validate_entry(self, entry: RosterEntry) -> List[str]:
+        """Validiert einen RosterEntry und gibt Warnungen zurueck."""
+        warnings = []
+
+        # Vorname pruefen
+        if not entry.first_name:
+            warnings.append('Kein Vorname')
+        elif len(entry.first_name) < 2:
+            warnings.append('Vorname zu kurz')
+
+        # Email validieren
+        if entry.parent_email and not self.EMAIL_PATTERN.match(entry.parent_email):
+            warnings.append('Ungueltige Email-Adresse')
+
+        return warnings
+
+
+# Singleton
+_roster_parser: Optional[RosterParser] = None
+
+
+def get_roster_parser() -> RosterParser:
+    """Gibt die Singleton-Instanz des RosterParsers zurueck."""
+    global _roster_parser
+    if _roster_parser is None:
+        _roster_parser = RosterParser()
+    return _roster_parser
--- a/backend/klausur/services/school_resolver.py
+++ b/backend/klausur/services/school_resolver.py
@@ -0,0 +1,613 @@
+"""
+School Resolver Service - Schul-Auswahl und Klassen-Erstellung.
+
+Funktionen:
+- Bundesland -> Schulform -> Schule Kaskade
+- Auto-Erstellung von Klassen aus erkannten Daten
+- Integration mit Go School Service (Port 8084)
+
+Privacy:
+- Schuldaten sind Stammdaten (kein DSGVO-Problem)
+- Schueler-Erstellung nur im Lehrer-Namespace
+"""
+
+import httpx
+import os
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+from enum import Enum
+
+
+# ============================================================================
+# KONSTANTEN
+# ============================================================================
+
+BUNDESLAENDER = {
+    "BW": "Baden-Wuerttemberg",
+    "BY": "Bayern",
+    "BE": "Berlin",
+    "BB": "Brandenburg",
+    "HB": "Bremen",
+    "HH": "Hamburg",
+    "HE": "Hessen",
+    "MV": "Mecklenburg-Vorpommern",
+    "NI": "Niedersachsen",
+    "NW": "Nordrhein-Westfalen",
+    "RP": "Rheinland-Pfalz",
+    "SL": "Saarland",
+    "SN": "Sachsen",
+    "ST": "Sachsen-Anhalt",
+    "SH": "Schleswig-Holstein",
+    "TH": "Thueringen"
+}
+
+SCHULFORMEN = {
+    "grundschule": {
+        "name": "Grundschule",
+        "grades": [1, 2, 3, 4],
+        "short": "GS"
+    },
+    "hauptschule": {
+        "name": "Hauptschule",
+        "grades": [5, 6, 7, 8, 9, 10],
+        "short": "HS"
+    },
+    "realschule": {
+        "name": "Realschule",
+        "grades": [5, 6, 7, 8, 9, 10],
+        "short": "RS"
+    },
+    "gymnasium": {
+        "name": "Gymnasium",
+        "grades": [5, 6, 7, 8, 9, 10, 11, 12, 13],
+        "short": "GYM"
+    },
+    "gesamtschule": {
+        "name": "Gesamtschule",
+        "grades": [5, 6, 7, 8, 9, 10, 11, 12, 13],
+        "short": "IGS"
+    },
+    "oberschule": {
+        "name": "Oberschule",
+        "grades": [5, 6, 7, 8, 9, 10],
+        "short": "OBS"
+    },
+    "sekundarschule": {
+        "name": "Sekundarschule",
+        "grades": [5, 6, 7, 8, 9, 10],
+        "short": "SEK"
+    },
+    "foerderschule": {
+        "name": "Foerderschule",
+        "grades": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        "short": "FS"
+    },
+    "berufsschule": {
+        "name": "Berufsschule",
+        "grades": [10, 11, 12, 13],
+        "short": "BS"
+    },
+    "fachoberschule": {
+        "name": "Fachoberschule",
+        "grades": [11, 12, 13],
+        "short": "FOS"
+    }
+}
+
+# Faecher mit Standardbezeichnungen
+FAECHER = {
+    "mathematik": {"name": "Mathematik", "short": "Ma"},
+    "deutsch": {"name": "Deutsch", "short": "De"},
+    "englisch": {"name": "Englisch", "short": "En"},
+    "franzoesisch": {"name": "Franzoesisch", "short": "Fr"},
+    "spanisch": {"name": "Spanisch", "short": "Sp"},
+    "latein": {"name": "Latein", "short": "La"},
+    "physik": {"name": "Physik", "short": "Ph"},
+    "chemie": {"name": "Chemie", "short": "Ch"},
+    "biologie": {"name": "Biologie", "short": "Bio"},
+    "geschichte": {"name": "Geschichte", "short": "Ge"},
+    "erdkunde": {"name": "Erdkunde", "short": "Ek"},
+    "politik": {"name": "Politik", "short": "Po"},
+    "wirtschaft": {"name": "Wirtschaft", "short": "Wi"},
+    "kunst": {"name": "Kunst", "short": "Ku"},
+    "musik": {"name": "Musik", "short": "Mu"},
+    "sport": {"name": "Sport", "short": "Sp"},
+    "religion": {"name": "Religion", "short": "Re"},
+    "ethik": {"name": "Ethik", "short": "Et"},
+    "informatik": {"name": "Informatik", "short": "If"},
+    "sachunterricht": {"name": "Sachunterricht", "short": "SU"}
+}
+
+
+# ============================================================================
+# DATA CLASSES
+# ============================================================================
+
+@dataclass
+class School:
+    """Schule."""
+    id: str
+    name: str
+    bundesland: str
+    schulform: str
+    address: Optional[str] = None
+    city: Optional[str] = None
+
+
+@dataclass
+class SchoolClass:
+    """Schulklasse."""
+    id: str
+    school_id: str
+    name: str  # z.B. "3a"
+    grade_level: int  # z.B. 3
+    school_year: str  # z.B. "2025/2026"
+    teacher_id: str
+    student_count: int = 0
+
+
+@dataclass
+class Student:
+    """Schueler (Stammdaten, keine PII im Klausur-Kontext)."""
+    id: str
+    class_id: str
+    first_name: str
+    last_name: str
+    student_number: Optional[str] = None
+
+
+@dataclass
+class DetectedClassInfo:
+    """Aus Klausuren erkannte Klasseninformationen."""
+    class_name: str  # z.B. "3a"
+    grade_level: Optional[int] = None  # z.B. 3
+    subject: Optional[str] = None
+    date: Optional[str] = None
+    students: List[Dict[str, str]] = field(default_factory=list)
+    confidence: float = 0.0
+
+
+@dataclass
+class SchoolContext:
+    """Vollstaendiger Schulkontext fuer einen Lehrer."""
+    teacher_id: str
+    school: Optional[School] = None
+    classes: List[SchoolClass] = field(default_factory=list)
+    current_school_year: str = "2025/2026"
+
+
+# ============================================================================
+# SCHOOL RESOLVER
+# ============================================================================
+
+class SchoolResolver:
+    """
+    Verwaltet Schul- und Klassenkontext.
+
+    Beispiel:
+        resolver = SchoolResolver()
+
+        # Schul-Kaskade
+        schools = await resolver.search_schools("Niedersachsen", "Grundschule", "Jever")
+
+        # Klasse auto-erstellen
+        class_obj = await resolver.auto_create_class(
+            teacher_id="teacher-123",
+            school_id="school-456",
+            detected_info=DetectedClassInfo(
+                class_name="3a",
+                students=[{"firstName": "Max"}, {"firstName": "Anna"}]
+            )
+        )
+    """
+
+    def __init__(self):
+        self.school_service_url = os.getenv(
+            "SCHOOL_SERVICE_URL",
+            "http://school-service:8084"
+        )
+        # Fallback auf lokale Daten wenn Service nicht erreichbar
+        self._local_schools: Dict[str, School] = {}
+        self._local_classes: Dict[str, SchoolClass] = {}
+
+    # =========================================================================
+    # BUNDESLAND / SCHULFORM LOOKUP
+    # =========================================================================
+
+    def get_bundeslaender(self) -> Dict[str, str]:
+        """Gibt alle Bundeslaender zurueck."""
+        return BUNDESLAENDER
+
+    def get_schulformen(self) -> Dict[str, Dict]:
+        """Gibt alle Schulformen zurueck."""
+        return SCHULFORMEN
+
+    def get_faecher(self) -> Dict[str, Dict]:
+        """Gibt alle Faecher zurueck."""
+        return FAECHER
+
+    def get_grades_for_schulform(self, schulform: str) -> List[int]:
+        """Gibt die Klassenstufen fuer eine Schulform zurueck."""
+        if schulform in SCHULFORMEN:
+            return SCHULFORMEN[schulform]["grades"]
+        return list(range(1, 14))  # Default: alle Stufen
+
+    def detect_grade_from_class_name(self, class_name: str) -> Optional[int]:
+        """
+        Erkennt die Klassenstufe aus dem Klassennamen.
+
+        Beispiele:
+        - "3a" -> 3
+        - "10b" -> 10
+        - "Q1" -> 11
+        - "EF" -> 10
+        """
+        import re
+
+        # Standard-Format: Zahl + Buchstabe
+        match = re.match(r'^(\d{1,2})[a-zA-Z]?$', class_name)
+        if match:
+            return int(match.group(1))
+
+        # Oberstufen-Formate
+        upper_grades = {
+            'ef': 10, 'e': 10,
+            'q1': 11, 'q2': 12,
+            'k1': 11, 'k2': 12,
+            '11': 11, '12': 12, '13': 13
+        }
+
+        class_lower = class_name.lower()
+        if class_lower in upper_grades:
+            return upper_grades[class_lower]
+
+        return None
+
+    def normalize_subject(self, detected_subject: str) -> Optional[str]:
+        """
+        Normalisiert einen erkannten Fachnamen.
+
+        Beispiel: "Mathe" -> "mathematik"
+        """
+        subject_lower = detected_subject.lower().strip()
+
+        # Direkte Matches
+        if subject_lower in FAECHER:
+            return subject_lower
+
+        # Abkuerzungen und Varianten
+        subject_aliases = {
+            'mathe': 'mathematik',
+            'bio': 'biologie',
+            'phy': 'physik',
+            'che': 'chemie',
+            'geo': 'erdkunde',
+            'geographie': 'erdkunde',
+            'powi': 'politik',
+            'sowi': 'politik',
+            'reli': 'religion',
+            'info': 'informatik',
+            'su': 'sachunterricht'
+        }
+
+        if subject_lower in subject_aliases:
+            return subject_aliases[subject_lower]
+
+        # Teilstring-Match
+        for key in FAECHER:
+            if key.startswith(subject_lower) or subject_lower.startswith(key[:3]):
+                return key
+
+        return None
+
+    # =========================================================================
+    # SCHOOL SERVICE INTEGRATION
+    # =========================================================================
+
+    async def search_schools(
+        self,
+        bundesland: Optional[str] = None,
+        schulform: Optional[str] = None,
+        name_query: Optional[str] = None,
+        limit: int = 20
+    ) -> List[School]:
+        """
+        Sucht Schulen im School Service.
+
+        Args:
+            bundesland: Bundesland-Kuerzel (z.B. "NI")
+            schulform: Schulform-Key (z.B. "grundschule")
+            name_query: Suchbegriff fuer Schulname
+            limit: Max. Anzahl Ergebnisse
+        """
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                params = {}
+                if bundesland:
+                    params['state'] = bundesland
+                if schulform:
+                    params['type'] = schulform
+                if name_query:
+                    params['q'] = name_query
+                params['limit'] = limit
+
+                response = await client.get(
+                    f"{self.school_service_url}/api/schools",
+                    params=params
+                )
+
+                if response.status_code == 200:
+                    data = response.json()
+                    return [
+                        School(
+                            id=s['id'],
+                            name=s['name'],
+                            bundesland=s.get('state', bundesland or ''),
+                            schulform=s.get('type', schulform or ''),
+                            address=s.get('address'),
+                            city=s.get('city')
+                        )
+                        for s in data.get('schools', [])
+                    ]
+
+        except Exception as e:
+            print(f"[SchoolResolver] Service error: {e}")
+
+        # Fallback: Leere Liste (Schule kann manuell angelegt werden)
+        return []
+
+    async def get_or_create_school(
+        self,
+        teacher_id: str,
+        bundesland: str,
+        schulform: str,
+        school_name: str,
+        city: Optional[str] = None
+    ) -> School:
+        """
+        Holt oder erstellt eine Schule.
+
+        Falls die Schule existiert, wird sie zurueckgegeben.
+        Sonst wird sie neu erstellt.
+        """
+        # Zuerst suchen
+        existing = await self.search_schools(
+            bundesland=bundesland,
+            schulform=schulform,
+            name_query=school_name,
+            limit=1
+        )
+
+        if existing:
+            return existing[0]
+
+        # Neu erstellen
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.post(
+                    f"{self.school_service_url}/api/schools",
+                    json={
+                        "name": school_name,
+                        "state": bundesland,
+                        "type": schulform,
+                        "city": city,
+                        "created_by": teacher_id
+                    }
+                )
+
+                if response.status_code in (200, 201):
+                    data = response.json()
+                    return School(
+                        id=data['id'],
+                        name=school_name,
+                        bundesland=bundesland,
+                        schulform=schulform,
+                        city=city
+                    )
+
+        except Exception as e:
+            print(f"[SchoolResolver] Create school error: {e}")
+
+        # Fallback: Lokale Schule erstellen
+        import uuid
+        school_id = str(uuid.uuid4())
+        school = School(
+            id=school_id,
+            name=school_name,
+            bundesland=bundesland,
+            schulform=schulform,
+            city=city
+        )
+        self._local_schools[school_id] = school
+        return school
+
+    # =========================================================================
+    # CLASS MANAGEMENT
+    # =========================================================================
+
+    async def get_classes_for_teacher(
+        self,
+        teacher_id: str,
+        school_id: Optional[str] = None
+    ) -> List[SchoolClass]:
+        """Holt alle Klassen eines Lehrers."""
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                params = {"teacher_id": teacher_id}
+                if school_id:
+                    params["school_id"] = school_id
+
+                response = await client.get(
+                    f"{self.school_service_url}/api/classes",
+                    params=params
+                )
+
+                if response.status_code == 200:
+                    data = response.json()
+                    return [
+                        SchoolClass(
+                            id=c['id'],
+                            school_id=c.get('school_id', ''),
+                            name=c['name'],
+                            grade_level=c.get('grade_level', 0),
+                            school_year=c.get('school_year', '2025/2026'),
+                            teacher_id=teacher_id,
+                            student_count=c.get('student_count', 0)
+                        )
+                        for c in data.get('classes', [])
+                    ]
+
+        except Exception as e:
+            print(f"[SchoolResolver] Get classes error: {e}")
+
+        return list(self._local_classes.values())
+
+    async def auto_create_class(
+        self,
+        teacher_id: str,
+        school_id: str,
+        detected_info: DetectedClassInfo,
+        school_year: str = "2025/2026"
+    ) -> SchoolClass:
+        """
+        Erstellt automatisch eine Klasse aus erkannten Daten.
+
+        Args:
+            teacher_id: ID des Lehrers
+            school_id: ID der Schule
+            detected_info: Aus Klausuren erkannte Informationen
+            school_year: Schuljahr
+        """
+        grade_level = detected_info.grade_level or self.detect_grade_from_class_name(
+            detected_info.class_name
+        ) or 0
+
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.post(
+                    f"{self.school_service_url}/api/classes",
+                    json={
+                        "school_id": school_id,
+                        "name": detected_info.class_name,
+                        "grade_level": grade_level,
+                        "school_year": school_year,
+                        "teacher_id": teacher_id
+                    }
+                )
+
+                if response.status_code in (200, 201):
+                    data = response.json()
+                    class_id = data['id']
+
+                    # Schueler hinzufuegen
+                    if detected_info.students:
+                        await self._bulk_create_students(
+                            class_id,
+                            detected_info.students
+                        )
+
+                    return SchoolClass(
+                        id=class_id,
+                        school_id=school_id,
+                        name=detected_info.class_name,
+                        grade_level=grade_level,
+                        school_year=school_year,
+                        teacher_id=teacher_id,
+                        student_count=len(detected_info.students)
+                    )
+
+        except Exception as e:
+            print(f"[SchoolResolver] Create class error: {e}")
+
+        # Fallback: Lokale Klasse
+        import uuid
+        class_id = str(uuid.uuid4())
+        school_class = SchoolClass(
+            id=class_id,
+            school_id=school_id,
+            name=detected_info.class_name,
+            grade_level=grade_level,
+            school_year=school_year,
+            teacher_id=teacher_id,
+            student_count=len(detected_info.students)
+        )
+        self._local_classes[class_id] = school_class
+        return school_class
+
+    async def _bulk_create_students(
+        self,
+        class_id: str,
+        students: List[Dict[str, str]]
+    ) -> List[Student]:
+        """Erstellt mehrere Schueler auf einmal."""
+        created = []
+
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                response = await client.post(
+                    f"{self.school_service_url}/api/classes/{class_id}/students/bulk",
+                    json={
+                        "students": [
+                            {
+                                "first_name": s.get("firstName", s.get("first_name", "")),
+                                "last_name": s.get("lastName", s.get("last_name", ""))
+                            }
+                            for s in students
+                        ]
+                    }
+                )
+
+                if response.status_code in (200, 201):
+                    data = response.json()
+                    created = [
+                        Student(
+                            id=s['id'],
+                            class_id=class_id,
+                            first_name=s['first_name'],
+                            last_name=s.get('last_name', '')
+                        )
+                        for s in data.get('students', [])
+                    ]
+
+        except Exception as e:
+            print(f"[SchoolResolver] Bulk create students error: {e}")
+
+        return created
+
+    # =========================================================================
+    # CONTEXT MANAGEMENT
+    # =========================================================================
+
+    async def get_teacher_context(self, teacher_id: str) -> SchoolContext:
+        """
+        Holt den vollstaendigen Schulkontext eines Lehrers.
+
+        Beinhaltet Schule, Klassen und aktuelles Schuljahr.
+        """
+        context = SchoolContext(teacher_id=teacher_id)
+
+        # Klassen laden
+        classes = await self.get_classes_for_teacher(teacher_id)
+        context.classes = classes
+
+        # Schule aus erster Klasse ableiten
+        if classes and classes[0].school_id:
+            schools = await self.search_schools()
+            for school in schools:
+                if school.id == classes[0].school_id:
+                    context.school = school
+                    break
+
+        return context
+
+
+# Singleton
+_school_resolver: Optional[SchoolResolver] = None
+
+
+def get_school_resolver() -> SchoolResolver:
+    """Gibt die Singleton-Instanz des SchoolResolvers zurueck."""
+    global _school_resolver
+    if _school_resolver is None:
+        _school_resolver = SchoolResolver()
+    return _school_resolver
--- a/backend/klausur/services/storage_service.py
+++ b/backend/klausur/services/storage_service.py
@@ -0,0 +1,197 @@
+"""
+Storage Service for Klausur Documents.
+
+PRIVACY BY DESIGN:
+- Documents stored with doc_token as identifier (not student names)
+- Organized by session_id/doc_token for teacher isolation
+- Auto-cleanup when retention period expires
+"""
+import os
+import io
+import logging
+from typing import Optional, BinaryIO
+from pathlib import Path
+from minio import Minio
+from minio.error import S3Error
+
+logger = logging.getLogger(__name__)
+
+
+class KlausurStorageService:
+    """
+    MinIO/S3 Storage Service for exam documents.
+
+    Structure:
+        klausur-exams/
+            {session_id}/
+                {doc_token}.{ext}
+                {doc_token}_redacted.{ext}  # After header redaction
+    """
+
+    def __init__(self):
+        self.endpoint = os.getenv("MINIO_ENDPOINT", "minio:9000")
+        self.access_key = os.getenv("MINIO_ROOT_USER", "breakpilot_dev")
+        self.secret_key = os.getenv("MINIO_ROOT_PASSWORD", "breakpilot_dev_123")
+        self.secure = os.getenv("MINIO_SECURE", "false").lower() == "true"
+        self.bucket_name = os.getenv("KLAUSUR_BUCKET", "klausur-exams")
+
+        self._client: Optional[Minio] = None
+
+    @property
+    def client(self) -> Minio:
+        """Lazy-init MinIO client."""
+        if self._client is None:
+            self._client = Minio(
+                self.endpoint,
+                access_key=self.access_key,
+                secret_key=self.secret_key,
+                secure=self.secure
+            )
+            self._ensure_bucket()
+        return self._client
+
+    def _ensure_bucket(self):
+        """Create bucket if it doesn't exist."""
+        try:
+            if not self._client.bucket_exists(self.bucket_name):
+                self._client.make_bucket(self.bucket_name)
+                logger.info(f"Created Klausur bucket: {self.bucket_name}")
+        except S3Error as e:
+            logger.warning(f"MinIO bucket check failed: {e}")
+
+    def upload_document(
+        self,
+        session_id: str,
+        doc_token: str,
+        file_data: bytes,
+        file_extension: str = "png",
+        is_redacted: bool = False
+    ) -> str:
+        """
+        Upload exam document to storage.
+
+        Args:
+            session_id: Exam session ID
+            doc_token: Pseudonymized document token
+            file_data: Document binary data
+            file_extension: File extension (png, jpg, pdf)
+            is_redacted: Whether this is the redacted version
+
+        Returns:
+            Object path in storage
+        """
+        suffix = "_redacted" if is_redacted else ""
+        object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"
+
+        # Determine content type
+        content_types = {
+            "png": "image/png",
+            "jpg": "image/jpeg",
+            "jpeg": "image/jpeg",
+            "pdf": "application/pdf",
+        }
+        content_type = content_types.get(file_extension.lower(), "application/octet-stream")
+
+        try:
+            self.client.put_object(
+                bucket_name=self.bucket_name,
+                object_name=object_name,
+                data=io.BytesIO(file_data),
+                length=len(file_data),
+                content_type=content_type
+            )
+            logger.info(f"Uploaded document: {object_name}")
+            return object_name
+
+        except S3Error as e:
+            logger.error(f"Failed to upload document: {e}")
+            raise
+
+    def get_document(
+        self,
+        session_id: str,
+        doc_token: str,
+        file_extension: str = "png",
+        is_redacted: bool = False
+    ) -> Optional[bytes]:
+        """
+        Download exam document from storage.
+
+        Args:
+            session_id: Exam session ID
+            doc_token: Pseudonymized document token
+            file_extension: File extension
+            is_redacted: Whether to get the redacted version
+
+        Returns:
+            Document binary data or None if not found
+        """
+        suffix = "_redacted" if is_redacted else ""
+        object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"
+
+        try:
+            response = self.client.get_object(self.bucket_name, object_name)
+            data = response.read()
+            response.close()
+            response.release_conn()
+            return data
+
+        except S3Error as e:
+            if e.code == "NoSuchKey":
+                logger.warning(f"Document not found: {object_name}")
+                return None
+            logger.error(f"Failed to get document: {e}")
+            raise
+
+    def delete_session_documents(self, session_id: str) -> int:
+        """
+        Delete all documents for a session.
+
+        Args:
+            session_id: Exam session ID
+
+        Returns:
+            Number of deleted objects
+        """
+        deleted_count = 0
+        prefix = f"{session_id}/"
+
+        try:
+            objects = self.client.list_objects(self.bucket_name, prefix=prefix)
+            for obj in objects:
+                self.client.remove_object(self.bucket_name, obj.object_name)
+                deleted_count += 1
+                logger.debug(f"Deleted: {obj.object_name}")
+
+            logger.info(f"Deleted {deleted_count} documents for session {session_id}")
+            return deleted_count
+
+        except S3Error as e:
+            logger.error(f"Failed to delete session documents: {e}")
+            raise
+
+    def document_exists(
+        self,
+        session_id: str,
+        doc_token: str,
+        file_extension: str = "png"
+    ) -> bool:
+        """Check if document exists in storage."""
+        object_name = f"{session_id}/{doc_token}.{file_extension}"
+        try:
+            self.client.stat_object(self.bucket_name, object_name)
+            return True
+        except S3Error:
+            return False
+
+
+# Singleton instance
+_storage_service: Optional[KlausurStorageService] = None
+
+
+def get_storage_service() -> KlausurStorageService:
+    """Get or create the storage service singleton."""
+    global _storage_service
+    if _storage_service is None:
+        _storage_service = KlausurStorageService()
+    return _storage_service
--- a/backend/klausur/services/trocr_client.py
+++ b/backend/klausur/services/trocr_client.py
@@ -0,0 +1,214 @@
+"""
+TrOCR Client - Connects to external TrOCR service (Mac Mini).
+
+This client forwards OCR requests to the TrOCR service running on
+the Mac Mini, enabling handwriting recognition without requiring
+local GPU/ML dependencies.
+
+Privacy: Images are sent over the local network only - no cloud.
+"""
+import os
+import httpx
+import logging
+from typing import Optional, List, Dict
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+# Mac Mini TrOCR Service URL
+TROCR_SERVICE_URL = os.environ.get(
+    "TROCR_SERVICE_URL",
+    "http://192.168.178.163:8084"
+)
+
+
+@dataclass
+class OCRResult:
+    """Result from TrOCR extraction."""
+    text: str
+    confidence: float
+    processing_time_ms: int
+    device: str = "remote"
+
+
+class TrOCRClient:
+    """
+    Client for external TrOCR service.
+
+    Usage:
+        client = TrOCRClient()
+
+        # Check if service is available
+        if await client.is_available():
+            result = await client.extract_text(image_bytes)
+            print(result.text)
+    """
+
+    def __init__(self, base_url: Optional[str] = None):
+        self.base_url = base_url or TROCR_SERVICE_URL
+        self._client: Optional[httpx.AsyncClient] = None
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Get or create HTTP client."""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                base_url=self.base_url,
+                timeout=300.0  # 5 min timeout for model loading
+            )
+        return self._client
+
+    async def close(self):
+        """Close the HTTP client."""
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+
+    async def is_available(self) -> bool:
+        """Check if TrOCR service is available."""
+        try:
+            client = await self._get_client()
+            response = await client.get("/health", timeout=5.0)
+            return response.status_code == 200
+        except Exception as e:
+            logger.warning(f"TrOCR service not available: {e}")
+            return False
+
+    async def get_status(self) -> Dict:
+        """Get TrOCR service status."""
+        try:
+            client = await self._get_client()
+            response = await client.get("/api/v1/status")
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"Failed to get TrOCR status: {e}")
+            return {
+                "status": "unavailable",
+                "error": str(e)
+            }
+
+    async def extract_text(
+        self,
+        image_data: bytes,
+        filename: str = "image.png",
+        detect_lines: bool = True
+    ) -> OCRResult:
+        """
+        Extract text from an image using TrOCR.
+
+        Args:
+            image_data: Raw image bytes
+            filename: Original filename
+            detect_lines: Whether to detect individual lines
+
+        Returns:
+            OCRResult with extracted text
+        """
+        try:
+            client = await self._get_client()
+
+            files = {"file": (filename, image_data, "image/png")}
+            params = {"detect_lines": str(detect_lines).lower()}
+
+            response = await client.post(
+                "/api/v1/extract",
+                files=files,
+                params=params
+            )
+            response.raise_for_status()
+
+            data = response.json()
+
+            return OCRResult(
+                text=data.get("text", ""),
+                confidence=data.get("confidence", 0.0),
+                processing_time_ms=data.get("processing_time_ms", 0),
+                device=data.get("device", "remote")
+            )
+
+        except httpx.TimeoutException:
+            logger.error("TrOCR request timed out (model may be loading)")
+            raise
+        except Exception as e:
+            logger.error(f"TrOCR extraction failed: {e}")
+            raise
+
+    async def batch_extract(
+        self,
+        images: List[bytes],
+        filenames: Optional[List[str]] = None,
+        detect_lines: bool = True
+    ) -> List[OCRResult]:
+        """
+        Extract text from multiple images.
+
+        Args:
+            images: List of image bytes
+            filenames: Optional list of filenames
+            detect_lines: Whether to detect individual lines
+
+        Returns:
+            List of OCRResult
+        """
+        if filenames is None:
+            filenames = [f"image_{i}.png" for i in range(len(images))]
+
+        try:
+            client = await self._get_client()
+
+            files = [
+                ("files", (fn, img, "image/png"))
+                for fn, img in zip(filenames, images)
+            ]
+
+            response = await client.post(
+                "/api/v1/batch-extract",
+                files=files
+            )
+            response.raise_for_status()
+
+            data = response.json()
+            results = []
+
+            for item in data.get("results", []):
+                results.append(OCRResult(
+                    text=item.get("text", ""),
+                    confidence=item.get("confidence", 0.85),
+                    processing_time_ms=0,
+                    device="remote"
+                ))
+
+            return results
+
+        except Exception as e:
+            logger.error(f"TrOCR batch extraction failed: {e}")
+            raise
+
+
+# Singleton instance
+_trocr_client: Optional[TrOCRClient] = None
+
+
+def get_trocr_client() -> TrOCRClient:
+    """Get the TrOCR client singleton."""
+    global _trocr_client
+    if _trocr_client is None:
+        _trocr_client = TrOCRClient()
+    return _trocr_client
+
+
+async def extract_text_from_image(
+    image_data: bytes,
+    filename: str = "image.png"
+) -> OCRResult:
+    """
+    Convenience function to extract text from an image.
+
+    Args:
+        image_data: Raw image bytes
+        filename: Original filename
+
+    Returns:
+        OCRResult with extracted text
+    """
+    client = get_trocr_client()
+    return await client.extract_text(image_data, filename)
--- a/backend/klausur/services/trocr_service.py
+++ b/backend/klausur/services/trocr_service.py
@@ -0,0 +1,577 @@
+"""
+TrOCR Service for Handwriting Recognition.
+
+Uses Microsoft's TrOCR model for extracting handwritten text from exam images.
+Supports fine-tuning with teacher corrections via LoRA adapters.
+
+PRIVACY BY DESIGN:
+- All processing happens locally
+- No data sent to external services
+- Fine-tuning data stays on-premise
+"""
+import logging
+import os
+from pathlib import Path
+from typing import Optional, List, Dict, Tuple
+from dataclasses import dataclass
+from io import BytesIO
+import json
+
+logger = logging.getLogger(__name__)
+
+# Model paths
+MODEL_CACHE_DIR = Path(os.environ.get("TROCR_CACHE_DIR", "/app/models/trocr"))
+LORA_ADAPTERS_DIR = Path(os.environ.get("TROCR_LORA_DIR", "/app/models/trocr/lora"))
+TRAINING_DATA_DIR = Path(os.environ.get("TROCR_TRAINING_DIR", "/app/data/trocr_training"))
+
+
+@dataclass
+class OCRResult:
+    """Result from TrOCR extraction."""
+    text: str
+    confidence: float
+    bounding_boxes: List[Dict]  # [{"x": 0, "y": 0, "w": 100, "h": 20, "text": "..."}]
+    processing_time_ms: int
+
+
+@dataclass
+class TrainingExample:
+    """A single training example for fine-tuning."""
+    image_path: str
+    ground_truth: str
+    teacher_id: str
+    created_at: str
+
+
+class TrOCRService:
+    """
+    Handwriting recognition service using TrOCR.
+
+    Features:
+    - Line-by-line handwriting extraction
+    - Confidence scoring
+    - LoRA fine-tuning support
+    - Batch processing
+    """
+
+    # Available models (from smallest to largest)
+    MODELS = {
+        "trocr-small": "microsoft/trocr-small-handwritten",
+        "trocr-base": "microsoft/trocr-base-handwritten",  # Recommended
+        "trocr-large": "microsoft/trocr-large-handwritten",
+    }
+
+    def __init__(self, model_name: str = "trocr-base", device: str = "auto"):
+        """
+        Initialize TrOCR service.
+
+        Args:
+            model_name: One of "trocr-small", "trocr-base", "trocr-large"
+            device: "cpu", "cuda", "mps" (Apple Silicon), or "auto"
+        """
+        self.model_name = model_name
+        self.model_id = self.MODELS.get(model_name, self.MODELS["trocr-base"])
+        self.device = self._get_device(device)
+
+        self._processor = None
+        self._model = None
+        self._lora_adapter = None
+
+        # Create directories
+        MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        LORA_ADAPTERS_DIR.mkdir(parents=True, exist_ok=True)
+        TRAINING_DATA_DIR.mkdir(parents=True, exist_ok=True)
+
+        logger.info(f"TrOCR Service initialized: model={model_name}, device={self.device}")
+
+    def _get_device(self, device: str) -> str:
+        """Determine the best device for inference."""
+        if device != "auto":
+            return device
+
+        try:
+            import torch
+            if torch.cuda.is_available():
+                return "cuda"
+            elif torch.backends.mps.is_available():
+                return "mps"
+            return "cpu"
+        except ImportError:
+            return "cpu"
+
+    def _load_model(self):
+        """Lazy-load the TrOCR model."""
+        if self._model is not None:
+            return
+
+        try:
+            from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+            import torch
+
+            logger.info(f"Loading TrOCR model: {self.model_id}")
+
+            self._processor = TrOCRProcessor.from_pretrained(
+                self.model_id,
+                cache_dir=str(MODEL_CACHE_DIR)
+            )
+
+            self._model = VisionEncoderDecoderModel.from_pretrained(
+                self.model_id,
+                cache_dir=str(MODEL_CACHE_DIR)
+            )
+
+            # Move to device
+            if self.device == "cuda":
+                self._model = self._model.cuda()
+            elif self.device == "mps":
+                self._model = self._model.to("mps")
+
+            # Load LoRA adapter if exists
+            adapter_path = LORA_ADAPTERS_DIR / f"{self.model_name}_adapter"
+            if adapter_path.exists():
+                self._load_lora_adapter(adapter_path)
+
+            logger.info(f"TrOCR model loaded successfully on {self.device}")
+
+        except ImportError as e:
+            logger.error(f"Missing dependencies: {e}")
+            logger.error("Install with: pip install transformers torch pillow")
+            raise
+        except Exception as e:
+            logger.error(f"Failed to load TrOCR model: {e}")
+            raise
+
+    def _load_lora_adapter(self, adapter_path: Path):
+        """Load a LoRA adapter for fine-tuned model."""
+        try:
+            from peft import PeftModel
+
+            logger.info(f"Loading LoRA adapter from {adapter_path}")
+            self._model = PeftModel.from_pretrained(self._model, str(adapter_path))
+            self._lora_adapter = str(adapter_path)
+            logger.info("LoRA adapter loaded successfully")
+
+        except ImportError:
+            logger.warning("peft not installed, skipping LoRA adapter")
+        except Exception as e:
+            logger.warning(f"Failed to load LoRA adapter: {e}")
+
+    async def extract_text(
+        self,
+        image_data: bytes,
+        detect_lines: bool = True
+    ) -> OCRResult:
+        """
+        Extract handwritten text from an image.
+
+        Args:
+            image_data: Raw image bytes (PNG, JPG, etc.)
+            detect_lines: If True, detect and process individual lines
+
+        Returns:
+            OCRResult with extracted text and confidence
+        """
+        import time
+        start_time = time.time()
+
+        self._load_model()
+
+        try:
+            from PIL import Image
+            import torch
+
+            # Load image
+            image = Image.open(BytesIO(image_data)).convert("RGB")
+
+            if detect_lines:
+                # Detect text lines and process each
+                lines, bboxes = await self._detect_and_extract_lines(image)
+                text = "\n".join(lines)
+                confidence = 0.85  # Average confidence estimate
+            else:
+                # Process whole image
+                text, confidence = await self._extract_single(image)
+                bboxes = []
+
+            processing_time_ms = int((time.time() - start_time) * 1000)
+
+            return OCRResult(
+                text=text,
+                confidence=confidence,
+                bounding_boxes=bboxes,
+                processing_time_ms=processing_time_ms
+            )
+
+        except Exception as e:
+            logger.error(f"OCR extraction failed: {e}")
+            return OCRResult(
+                text="",
+                confidence=0.0,
+                bounding_boxes=[],
+                processing_time_ms=int((time.time() - start_time) * 1000)
+            )
+
+    async def _extract_single(self, image) -> Tuple[str, float]:
+        """Extract text from a single image (no line detection)."""
+        import torch
+
+        # Preprocess
+        pixel_values = self._processor(
+            images=image,
+            return_tensors="pt"
+        ).pixel_values
+
+        if self.device == "cuda":
+            pixel_values = pixel_values.cuda()
+        elif self.device == "mps":
+            pixel_values = pixel_values.to("mps")
+
+        # Generate
+        with torch.no_grad():
+            generated_ids = self._model.generate(
+                pixel_values,
+                max_length=128,
+                num_beams=4,
+                return_dict_in_generate=True,
+                output_scores=True
+            )
+
+        # Decode
+        text = self._processor.batch_decode(
+            generated_ids.sequences,
+            skip_special_tokens=True
+        )[0]
+
+        # Estimate confidence from generation scores
+        confidence = self._estimate_confidence(generated_ids)
+
+        return text.strip(), confidence
+
+    async def _detect_and_extract_lines(self, image) -> Tuple[List[str], List[Dict]]:
+        """Detect text lines and extract each separately."""
+        from PIL import Image
+        import numpy as np
+
+        # Convert to numpy for line detection
+        img_array = np.array(image.convert("L"))  # Grayscale
+
+        # Simple horizontal projection for line detection
+        lines_y = self._detect_line_positions(img_array)
+
+        if not lines_y:
+            # Fallback: process whole image
+            text, _ = await self._extract_single(image)
+            return [text], []
+
+        # Extract each line
+        results = []
+        bboxes = []
+        width = image.width
+
+        for i, (y_start, y_end) in enumerate(lines_y):
+            # Crop line
+            line_img = image.crop((0, y_start, width, y_end))
+
+            # Ensure minimum height
+            if line_img.height < 20:
+                continue
+
+            # Extract text
+            text, conf = await self._extract_single(line_img)
+
+            if text.strip():
+                results.append(text)
+                bboxes.append({
+                    "x": 0,
+                    "y": y_start,
+                    "w": width,
+                    "h": y_end - y_start,
+                    "text": text,
+                    "confidence": conf
+                })
+
+        return results, bboxes
+
+    def _detect_line_positions(self, img_array) -> List[Tuple[int, int]]:
+        """Detect horizontal text line positions using projection profile."""
+        import numpy as np
+
+        # Horizontal projection (sum of pixels per row)
+        projection = np.sum(255 - img_array, axis=1)
+
+        # Threshold to find text rows
+        threshold = np.max(projection) * 0.1
+        text_rows = projection > threshold
+
+        # Find line boundaries
+        lines = []
+        in_line = False
+        line_start = 0
+
+        for i, is_text in enumerate(text_rows):
+            if is_text and not in_line:
+                in_line = True
+                line_start = max(0, i - 5)  # Add padding
+            elif not is_text and in_line:
+                in_line = False
+                line_end = min(len(text_rows) - 1, i + 5)  # Add padding
+                if line_end - line_start > 15:  # Minimum line height
+                    lines.append((line_start, line_end))
+
+        # Handle last line
+        if in_line:
+            lines.append((line_start, len(text_rows) - 1))
+
+        return lines
+
+    def _estimate_confidence(self, generated_output) -> float:
+        """Estimate confidence from generation scores."""
+        try:
+            import torch
+
+            if hasattr(generated_output, 'scores') and generated_output.scores:
+                # Average probability of selected tokens
+                probs = []
+                for score in generated_output.scores:
+                    prob = torch.softmax(score, dim=-1).max().item()
+                    probs.append(prob)
+                return sum(probs) / len(probs) if probs else 0.5
+            return 0.75  # Default confidence
+        except Exception:
+            return 0.75
+
+    async def batch_extract(
+        self,
+        images: List[bytes],
+        detect_lines: bool = True
+    ) -> List[OCRResult]:
+        """
+        Extract text from multiple images.
+
+        Args:
+            images: List of image bytes
+            detect_lines: If True, detect lines in each image
+
+        Returns:
+            List of OCRResult
+        """
+        results = []
+        for img_data in images:
+            result = await self.extract_text(img_data, detect_lines)
+            results.append(result)
+        return results
+
+    # ==========================================
+    # FINE-TUNING SUPPORT
+    # ==========================================
+
+    def add_training_example(
+        self,
+        image_data: bytes,
+        ground_truth: str,
+        teacher_id: str
+    ) -> str:
+        """
+        Add a training example for fine-tuning.
+
+        Args:
+            image_data: Image bytes
+            ground_truth: Correct text (teacher-provided)
+            teacher_id: ID of the teacher providing correction
+
+        Returns:
+            Example ID
+        """
+        import uuid
+        from datetime import datetime
+
+        example_id = str(uuid.uuid4())
+
+        # Save image
+        image_path = TRAINING_DATA_DIR / f"{example_id}.png"
+        with open(image_path, "wb") as f:
+            f.write(image_data)
+
+        # Save metadata
+        example = TrainingExample(
+            image_path=str(image_path),
+            ground_truth=ground_truth,
+            teacher_id=teacher_id,
+            created_at=datetime.utcnow().isoformat()
+        )
+
+        meta_path = TRAINING_DATA_DIR / f"{example_id}.json"
+        with open(meta_path, "w") as f:
+            json.dump(example.__dict__, f, indent=2)
+
+        logger.info(f"Training example added: {example_id}")
+        return example_id
+
+    def get_training_examples(self, teacher_id: Optional[str] = None) -> List[TrainingExample]:
+        """Get all training examples, optionally filtered by teacher."""
+        examples = []
+
+        for meta_file in TRAINING_DATA_DIR.glob("*.json"):
+            with open(meta_file) as f:
+                data = json.load(f)
+                example = TrainingExample(**data)
+
+                if teacher_id is None or example.teacher_id == teacher_id:
+                    examples.append(example)
+
+        return examples
+
+    async def fine_tune(
+        self,
+        teacher_id: Optional[str] = None,
+        epochs: int = 3,
+        learning_rate: float = 5e-5
+    ) -> Dict:
+        """
+        Fine-tune the model with collected training examples.
+
+        Uses LoRA for efficient fine-tuning.
+
+        Args:
+            teacher_id: If provided, only use examples from this teacher
+            epochs: Number of training epochs
+            learning_rate: Learning rate for fine-tuning
+
+        Returns:
+            Training statistics
+        """
+        examples = self.get_training_examples(teacher_id)
+
+        if len(examples) < 10:
+            return {
+                "status": "error",
+                "message": f"Need at least 10 examples, have {len(examples)}"
+            }
+
+        try:
+            from peft import LoraConfig, get_peft_model, TaskType
+            from transformers import Trainer, TrainingArguments
+            from PIL import Image
+            import torch
+
+            self._load_model()
+
+            logger.info(f"Starting fine-tuning with {len(examples)} examples")
+
+            # Configure LoRA
+            lora_config = LoraConfig(
+                task_type=TaskType.SEQ_2_SEQ_LM,
+                r=16,  # LoRA rank
+                lora_alpha=32,
+                lora_dropout=0.1,
+                target_modules=["q_proj", "v_proj"]  # Attention layers
+            )
+
+            # Apply LoRA
+            model = get_peft_model(self._model, lora_config)
+
+            # Prepare dataset
+            class OCRDataset(torch.utils.data.Dataset):
+                def __init__(self, examples, processor):
+                    self.examples = examples
+                    self.processor = processor
+
+                def __len__(self):
+                    return len(self.examples)
+
+                def __getitem__(self, idx):
+                    ex = self.examples[idx]
+                    image = Image.open(ex.image_path).convert("RGB")
+
+                    pixel_values = self.processor(
+                        images=image, return_tensors="pt"
+                    ).pixel_values.squeeze()
+
+                    labels = self.processor.tokenizer(
+                        ex.ground_truth,
+                        return_tensors="pt",
+                        padding="max_length",
+                        max_length=128
+                    ).input_ids.squeeze()
+
+                    return {
+                        "pixel_values": pixel_values,
+                        "labels": labels
+                    }
+
+            dataset = OCRDataset(examples, self._processor)
+
+            # Training arguments
+            output_dir = LORA_ADAPTERS_DIR / f"{self.model_name}_adapter"
+            training_args = TrainingArguments(
+                output_dir=str(output_dir),
+                num_train_epochs=epochs,
+                per_device_train_batch_size=4,
+                learning_rate=learning_rate,
+                save_strategy="epoch",
+                logging_steps=10,
+                remove_unused_columns=False,
+            )
+
+            # Train
+            trainer = Trainer(
+                model=model,
+                args=training_args,
+                train_dataset=dataset,
+            )
+
+            train_result = trainer.train()
+
+            # Save adapter
+            model.save_pretrained(str(output_dir))
+
+            # Reload model with new adapter
+            self._model = None
+            self._load_model()
+
+            return {
+                "status": "success",
+                "examples_used": len(examples),
+                "epochs": epochs,
+                "adapter_path": str(output_dir),
+                "train_loss": train_result.training_loss
+            }
+
+        except ImportError as e:
+            logger.error(f"Missing dependencies for fine-tuning: {e}")
+            return {
+                "status": "error",
+                "message": f"Missing dependencies: {e}. Install with: pip install peft"
+            }
+        except Exception as e:
+            logger.error(f"Fine-tuning failed: {e}")
+            return {
+                "status": "error",
+                "message": str(e)
+            }
+
+    def get_model_info(self) -> Dict:
+        """Get information about the loaded model."""
+        adapter_path = LORA_ADAPTERS_DIR / f"{self.model_name}_adapter"
+
+        return {
+            "model_name": self.model_name,
+            "model_id": self.model_id,
+            "device": self.device,
+            "is_loaded": self._model is not None,
+            "has_lora_adapter": adapter_path.exists(),
+            "lora_adapter_path": str(adapter_path) if adapter_path.exists() else None,
+            "training_examples_count": len(list(TRAINING_DATA_DIR.glob("*.json"))),
+        }
+
+
+# Singleton instance
+_trocr_service: Optional[TrOCRService] = None
+
+
+def get_trocr_service(model_name: str = "trocr-base") -> TrOCRService:
+    """Get or create the TrOCR service singleton."""
+    global _trocr_service
+    if _trocr_service is None or _trocr_service.model_name != model_name:
+        _trocr_service = TrOCRService(model_name=model_name)
+    return _trocr_service
--- a/backend/klausur/services/vision_ocr_service.py
+++ b/backend/klausur/services/vision_ocr_service.py
@@ -0,0 +1,309 @@
+"""
+Vision-OCR Service - Handschrifterkennung mit Llama 3.2 Vision.
+
+DATENSCHUTZ/PRIVACY BY DESIGN:
+- Alle Verarbeitung erfolgt lokal auf dem Mac Mini
+- Keine Daten verlassen das lokale Netzwerk
+- Keine Cloud-APIs beteiligt
+- Perfekt für DSGVO-konforme Schulumgebungen
+
+Verwendet llama3.2-vision:11b über Ollama für OCR/Handschrifterkennung.
+Dies ist eine Alternative zu TrOCR mit besserer Handschrifterkennung.
+"""
+import os
+import base64
+import httpx
+import logging
+import time
+from typing import Optional
+from dataclasses import dataclass
+
+from llm_gateway.config import get_config
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class VisionOCRResult:
+    """Result from Vision-LLM OCR extraction."""
+    text: str
+    confidence: float
+    processing_time_ms: int
+    model: str = "llama3.2-vision:11b"
+    device: str = "local-ollama"
+
+
+# OCR System Prompt für optimale Handschrifterkennung
+HANDWRITING_OCR_PROMPT = """Du bist ein Experte für Handschrifterkennung (OCR).
+
+AUFGABE: Extrahiere den handschriftlichen Text aus dem Bild so genau wie möglich.
+
+WICHTIGE REGELN:
+1. Transkribiere NUR den sichtbaren Text - erfinde nichts dazu
+2. Behalte die Zeilenstruktur bei (jede Zeile auf einer neuen Zeile)
+3. Bei unleserlichen Stellen: [unleserlich] oder [?] verwenden
+4. Ignoriere Linien, Kästchen und andere Formatierungen
+5. Korrigiere KEINE Rechtschreibfehler - transkribiere exakt was da steht
+6. Bei Aufzählungen: Nummern/Punkte beibehalten (1., 2., a), b), etc.)
+
+AUSGABE: Nur der transkribierte Text, keine Erklärungen oder Kommentare."""
+
+# Alternative Prompt für gedruckten Text
+PRINTED_OCR_PROMPT = """Extrahiere den gesamten Text aus diesem Bild.
+Behalte die Struktur bei (Absätze, Listen, etc.).
+Gib nur den extrahierten Text zurück, ohne Kommentare."""
+
+
+class VisionOCRService:
+    """
+    OCR Service mit Llama 3.2 Vision über Ollama.
+
+    Läuft komplett lokal auf dem Mac Mini - keine Cloud-Verbindung nötig.
+    Ideal für datenschutzkonforme Klausurkorrektur in Schulen.
+
+    Usage:
+        service = VisionOCRService()
+
+        if await service.is_available():
+            result = await service.extract_text(image_bytes)
+            print(result.text)
+    """
+
+    def __init__(self, ollama_url: Optional[str] = None, model: Optional[str] = None):
+        """
+        Initialize Vision OCR Service.
+
+        Args:
+            ollama_url: Ollama API URL (default: from config)
+            model: Vision model to use (default: llama3.2-vision:11b)
+        """
+        config = get_config()
+        self.ollama_url = ollama_url or (config.ollama.base_url if config.ollama else "http://localhost:11434")
+        self.model = model or config.vision_model
+        self._client: Optional[httpx.AsyncClient] = None
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Get or create HTTP client."""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                timeout=300.0  # 5 min timeout für große Bilder
+            )
+        return self._client
+
+    async def close(self):
+        """Close the HTTP client."""
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+
+    async def is_available(self) -> bool:
+        """Check if Ollama with vision model is available."""
+        try:
+            client = await self._get_client()
+
+            # Check Ollama health
+            response = await client.get(
+                f"{self.ollama_url}/api/tags",
+                timeout=5.0
+            )
+
+            if response.status_code != 200:
+                return False
+
+            # Check if vision model is installed
+            data = response.json()
+            models = [m.get("name", "") for m in data.get("models", [])]
+
+            # Check for any vision model
+            has_vision = any(
+                "vision" in m.lower() or "llava" in m.lower()
+                for m in models
+            )
+
+            if not has_vision:
+                logger.warning(f"No vision model found. Available: {models}")
+                return False
+
+            return True
+
+        except Exception as e:
+            logger.warning(f"Vision OCR service not available: {e}")
+            return False
+
+    async def get_status(self) -> dict:
+        """Get service status."""
+        try:
+            client = await self._get_client()
+            response = await client.get(f"{self.ollama_url}/api/tags")
+
+            if response.status_code == 200:
+                data = response.json()
+                models = data.get("models", [])
+                vision_models = [
+                    m for m in models
+                    if "vision" in m.get("name", "").lower() or "llava" in m.get("name", "").lower()
+                ]
+
+                return {
+                    "status": "available",
+                    "ollama_url": self.ollama_url,
+                    "configured_model": self.model,
+                    "vision_models": [m.get("name") for m in vision_models],
+                    "total_models": len(models)
+                }
+            else:
+                return {
+                    "status": "unavailable",
+                    "error": f"HTTP {response.status_code}"
+                }
+
+        except Exception as e:
+            return {
+                "status": "unavailable",
+                "error": str(e)
+            }
+
+    async def extract_text(
+        self,
+        image_data: bytes,
+        filename: str = "image.png",
+        is_handwriting: bool = True
+    ) -> VisionOCRResult:
+        """
+        Extract text from an image using Vision LLM.
+
+        Args:
+            image_data: Raw image bytes (PNG, JPG, etc.)
+            filename: Original filename (for logging)
+            is_handwriting: True for handwriting, False for printed text
+
+        Returns:
+            VisionOCRResult with extracted text
+        """
+        start_time = time.time()
+
+        try:
+            client = await self._get_client()
+
+            # Encode image as base64
+            image_base64 = base64.b64encode(image_data).decode("utf-8")
+
+            # Select appropriate prompt
+            prompt = HANDWRITING_OCR_PROMPT if is_handwriting else PRINTED_OCR_PROMPT
+
+            # Ollama Vision API request
+            payload = {
+                "model": self.model,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": prompt,
+                        "images": [image_base64]
+                    }
+                ],
+                "stream": False,
+                "options": {
+                    "temperature": 0.1,  # Low temperature for consistent OCR
+                    "num_predict": 2048,  # Max tokens for extracted text
+                }
+            }
+
+            logger.info(f"Sending image to Vision OCR: {filename} ({len(image_data)} bytes)")
+
+            response = await client.post(
+                f"{self.ollama_url}/api/chat",
+                json=payload,
+                timeout=180.0  # 3 min timeout
+            )
+            response.raise_for_status()
+
+            data = response.json()
+
+            extracted_text = data.get("message", {}).get("content", "")
+
+            processing_time_ms = int((time.time() - start_time) * 1000)
+
+            # Estimate confidence based on response quality
+            confidence = self._estimate_confidence(extracted_text)
+
+            logger.info(
+                f"Vision OCR completed for {filename}: "
+                f"{len(extracted_text)} chars in {processing_time_ms}ms"
+            )
+
+            return VisionOCRResult(
+                text=extracted_text.strip(),
+                confidence=confidence,
+                processing_time_ms=processing_time_ms,
+                model=self.model,
+                device="local-ollama"
+            )
+
+        except httpx.TimeoutException:
+            logger.error(f"Vision OCR timed out for {filename}")
+            raise
+        except Exception as e:
+            logger.error(f"Vision OCR failed for {filename}: {e}")
+            raise
+
+    def _estimate_confidence(self, text: str) -> float:
+        """
+        Estimate OCR confidence based on text quality.
+
+        This is a heuristic - real confidence would need model output.
+        """
+        if not text:
+            return 0.0
+
+        # Count uncertain markers
+        uncertain_markers = text.count("[unleserlich]") + text.count("[?]")
+
+        # Count reasonable text vs markers
+        text_length = len(text.replace("[unleserlich]", "").replace("[?]", ""))
+
+        if text_length == 0:
+            return 0.1
+
+        # Base confidence
+        confidence = 0.85
+
+        # Reduce for uncertain markers
+        confidence -= min(uncertain_markers * 0.05, 0.3)
+
+        # Very short text might be incomplete
+        if text_length < 20:
+            confidence -= 0.1
+
+        return max(confidence, 0.1)
+
+
+# Singleton instance
+_vision_ocr_service: Optional[VisionOCRService] = None
+
+
+def get_vision_ocr_service() -> VisionOCRService:
+    """Get the Vision OCR service singleton."""
+    global _vision_ocr_service
+    if _vision_ocr_service is None:
+        _vision_ocr_service = VisionOCRService()
+    return _vision_ocr_service
+
+
+async def extract_handwriting(
+    image_data: bytes,
+    filename: str = "image.png"
+) -> VisionOCRResult:
+    """
+    Convenience function to extract handwriting from an image.
+
+    Uses Llama 3.2 Vision locally via Ollama.
+    All processing happens on the local Mac Mini - DSGVO-konform.
+
+    Args:
+        image_data: Raw image bytes
+        filename: Original filename
+
+    Returns:
+        VisionOCRResult with extracted text
+    """
+    service = get_vision_ocr_service()
+    return await service.extract_text(image_data, filename, is_handwriting=True)
--- a/backend/klausur/tests/init.py
+++ b/backend/klausur/tests/init.py
@@ -0,0 +1,9 @@
+"""
+Tests for Klausurkorrektur Module.
+
+Tests cover:
+- Database models and repository
+- Pseudonymization service
+- API routes
+- Privacy guarantees
+"""
--- a/backend/klausur/tests/test_magic_onboarding.py
+++ b/backend/klausur/tests/test_magic_onboarding.py
@@ -0,0 +1,455 @@
+"""
+Tests for Magic Onboarding functionality.
+
+Tests cover:
+- OnboardingSession lifecycle
+- Student detection and confirmation
+- Roster parsing
+- School resolution
+- Module linking
+"""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from datetime import datetime
+
+# Import models
+from klausur.db_models import (
+    OnboardingSession, DetectedStudent, ModuleLink,
+    OnboardingStatus, ModuleLinkType
+)
+
+# Import services
+from klausur.services.roster_parser import RosterParser, RosterEntry, NameMatch
+from klausur.services.school_resolver import SchoolResolver, BUNDESLAENDER, SCHULFORMEN
+from klausur.services.module_linker import (
+    ModuleLinker, CorrectionResult, MeetingUrgency, ParentMeetingSuggestion
+)
+
+
+# =============================================================================
+# ROSTER PARSER TESTS
+# =============================================================================
+
+class TestRosterParser:
+    """Tests for RosterParser service."""
+
+    def test_match_first_names_exact_match(self):
+        """Test exact name matching."""
+        parser = RosterParser()
+
+        roster = [
+            RosterEntry(first_name="Max", last_name="Mueller"),
+            RosterEntry(first_name="Anna", last_name="Schmidt"),
+            RosterEntry(first_name="Tim", last_name="Weber"),
+        ]
+
+        detected = ["Max", "Anna", "Tim"]
+        matches = parser.match_first_names(detected, roster)
+
+        # Check all names matched
+        assert len(matches) == 3
+
+        # Find Max match
+        max_match = next(m for m in matches if m.detected_name == "Max")
+        assert max_match.matched_entry is not None
+        assert max_match.matched_entry.last_name == "Mueller"
+        assert max_match.match_type == "exact"
+        assert max_match.confidence == 1.0
+
+    def test_match_first_names_fuzzy_match(self):
+        """Test fuzzy matching for similar names."""
+        parser = RosterParser()
+
+        roster = [
+            RosterEntry(first_name="Maximilian", last_name="Mueller"),
+            RosterEntry(first_name="Anna-Lena", last_name="Schmidt"),
+        ]
+
+        # "Max" should fuzzy-match "Maximilian" (starts with)
+        detected = ["Max"]
+        matches = parser.match_first_names(detected, roster)
+
+        assert len(matches) == 1
+        max_match = matches[0]
+        # Should match to Maximilian via first_name matching
+        if max_match.matched_entry is not None:
+            assert max_match.match_type in ["first_name", "fuzzy"]
+
+    def test_match_first_names_no_match(self):
+        """Test handling of unmatched names."""
+        parser = RosterParser()
+
+        roster = [
+            RosterEntry(first_name="Max", last_name="Mueller"),
+        ]
+
+        detected = ["Sophie", "Lisa"]
+        matches = parser.match_first_names(detected, roster)
+
+        # Both should be unmatched
+        assert len(matches) == 2
+        for match in matches:
+            assert match.matched_entry is None
+            assert match.match_type == "none"
+
+    def test_roster_entry_creation(self):
+        """Test RosterEntry dataclass creation."""
+        entry = RosterEntry(
+            first_name="Max",
+            last_name="Mueller",
+            student_number="12345",
+            parent_email="eltern@example.com",
+            parent_phone="+49123456789"
+        )
+
+        assert entry.first_name == "Max"
+        assert entry.last_name == "Mueller"
+        assert entry.parent_email == "eltern@example.com"
+
+    def test_name_match_dataclass(self):
+        """Test NameMatch dataclass creation."""
+        entry = RosterEntry(first_name="Max", last_name="Mueller")
+        match = NameMatch(
+            detected_name="Max",
+            matched_entry=entry,
+            confidence=1.0,
+            match_type="exact"
+        )
+
+        assert match.detected_name == "Max"
+        assert match.matched_entry.last_name == "Mueller"
+        assert match.confidence == 1.0
+
+
+# =============================================================================
+# SCHOOL RESOLVER TESTS
+# =============================================================================
+
+class TestSchoolResolver:
+    """Tests for SchoolResolver service."""
+
+    def test_bundeslaender_completeness(self):
+        """Test that all 16 German states are included."""
+        assert len(BUNDESLAENDER) == 16
+        # BUNDESLAENDER is a dict with codes as keys
+        assert "NI" in BUNDESLAENDER  # Niedersachsen
+        assert "BY" in BUNDESLAENDER  # Bayern
+        assert "BE" in BUNDESLAENDER  # Berlin
+        # Check values too
+        assert BUNDESLAENDER["NI"] == "Niedersachsen"
+
+    def test_schulformen_have_grades(self):
+        """Test that each Schulform has grade ranges."""
+        for schulform, info in SCHULFORMEN.items():
+            assert "grades" in info
+            assert isinstance(info["grades"], list)
+            assert len(info["grades"]) > 0
+
+    def test_detect_grade_from_class_name(self):
+        """Test grade detection from class names."""
+        resolver = SchoolResolver()
+
+        # Test various formats
+        assert resolver.detect_grade_from_class_name("3a") == 3
+        assert resolver.detect_grade_from_class_name("10b") == 10
+        assert resolver.detect_grade_from_class_name("Q1") == 11
+        assert resolver.detect_grade_from_class_name("Q2") == 12
+        assert resolver.detect_grade_from_class_name("12") == 12
+
+    def test_detect_grade_returns_none_for_invalid(self):
+        """Test grade detection returns None for invalid input."""
+        resolver = SchoolResolver()
+
+        assert resolver.detect_grade_from_class_name("abc") is None
+        assert resolver.detect_grade_from_class_name("") is None
+
+    def test_local_storage_initialization(self):
+        """Test that local storage starts empty."""
+        resolver = SchoolResolver()
+        assert resolver._local_schools == {}
+        assert resolver._local_classes == {}
+
+
+# =============================================================================
+# MODULE LINKER TESTS
+# =============================================================================
+
+class TestModuleLinker:
+    """Tests for ModuleLinker service."""
+
+    def test_suggest_elternabend_for_weak_students(self):
+        """Test parent meeting suggestions for failing grades."""
+        linker = ModuleLinker()
+
+        results = [
+            CorrectionResult(
+                doc_token="token1", score=25, max_score=100,
+                grade="5", feedback=""
+            ),
+            CorrectionResult(
+                doc_token="token2", score=85, max_score=100,
+                grade="2", feedback=""
+            ),
+            CorrectionResult(
+                doc_token="token3", score=30, max_score=100,
+                grade="5-", feedback=""
+            ),
+            CorrectionResult(
+                doc_token="token4", score=20, max_score=100,
+                grade="6", feedback=""
+            ),
+        ]
+
+        suggestions = linker.suggest_elternabend(
+            results, subject="Mathematik", threshold_grade="4"
+        )
+
+        # Should suggest meetings for students with grades 4 or worse
+        # Grades 5, 5-, and 6 should trigger meetings
+        assert len(suggestions) == 3
+
+        # Verify suggestions use doc_tokens (privacy)
+        for suggestion in suggestions:
+            assert suggestion.doc_token in ["token1", "token3", "token4"]
+
+    def test_suggest_elternabend_empty_for_good_class(self):
+        """Test no suggestions for good performers."""
+        linker = ModuleLinker()
+
+        results = [
+            CorrectionResult(
+                doc_token="token1", score=95, max_score=100,
+                grade="1", feedback=""
+            ),
+            CorrectionResult(
+                doc_token="token2", score=85, max_score=100,
+                grade="2", feedback=""
+            ),
+            CorrectionResult(
+                doc_token="token3", score=78, max_score=100,
+                grade="3", feedback=""
+            ),
+        ]
+
+        suggestions = linker.suggest_elternabend(
+            results, subject="Deutsch", threshold_grade="4"
+        )
+
+        assert len(suggestions) == 0
+
+    def test_calculate_grade_statistics(self):
+        """Test grade distribution calculation."""
+        linker = ModuleLinker()
+
+        results = [
+            CorrectionResult(doc_token="t1", score=95, max_score=100, grade="1", feedback=""),
+            CorrectionResult(doc_token="t2", score=85, max_score=100, grade="2", feedback=""),
+            CorrectionResult(doc_token="t3", score=85, max_score=100, grade="2", feedback=""),
+            CorrectionResult(doc_token="t4", score=75, max_score=100, grade="3", feedback=""),
+            CorrectionResult(doc_token="t5", score=55, max_score=100, grade="4", feedback=""),
+            CorrectionResult(doc_token="t6", score=25, max_score=100, grade="5", feedback=""),
+        ]
+
+        stats = linker.calculate_grade_statistics(results)
+
+        assert isinstance(stats, dict)
+        assert stats["count"] == 6
+
+        # Check grade distribution
+        assert stats["distribution"].get("1", 0) == 1
+        assert stats["distribution"].get("2", 0) == 2
+        assert stats["distribution"].get("3", 0) == 1
+
+        # Check passing/failing counts
+        assert stats["passing_count"] == 5  # Grades 1-4 pass
+        assert stats["failing_count"] == 1  # Grade 5 fails
+
+    def test_calculate_statistics_empty_results(self):
+        """Test statistics with no results."""
+        linker = ModuleLinker()
+
+        stats = linker.calculate_grade_statistics([])
+
+        assert stats == {}
+
+    def test_correction_result_creation(self):
+        """Test CorrectionResult dataclass."""
+        result = CorrectionResult(
+            doc_token="abc-123",
+            score=87,
+            max_score=100,
+            grade="2+",
+            feedback="Gut geloest",
+            question_results=[{"aufgabe": 1, "punkte": 10}]
+        )
+
+        assert result.doc_token == "abc-123"
+        assert result.score == 87
+        assert result.grade == "2+"
+
+
+# =============================================================================
+# DB MODEL TESTS
+# =============================================================================
+
+class TestOnboardingModels:
+    """Tests for Magic Onboarding database models."""
+
+    def test_onboarding_status_enum_values(self):
+        """Test OnboardingStatus enum has all required values."""
+        assert OnboardingStatus.ANALYZING.value == "analyzing"
+        assert OnboardingStatus.CONFIRMING.value == "confirming"
+        assert OnboardingStatus.PROCESSING.value == "processing"
+        assert OnboardingStatus.LINKING.value == "linking"
+        assert OnboardingStatus.COMPLETE.value == "complete"
+
+    def test_module_link_type_enum_values(self):
+        """Test ModuleLinkType enum has all required values."""
+        assert ModuleLinkType.NOTENBUCH.value == "notenbuch"
+        assert ModuleLinkType.ELTERNABEND.value == "elternabend"
+        assert ModuleLinkType.ZEUGNIS.value == "zeugnis"
+        assert ModuleLinkType.CALENDAR.value == "calendar"
+        assert ModuleLinkType.KLASSENBUCH.value == "klassenbuch"
+
+    def test_onboarding_session_repr(self):
+        """Test OnboardingSession string representation."""
+        session = OnboardingSession(
+            id="12345678-1234-1234-1234-123456789abc",
+            teacher_id="teacher-1",
+            detected_class="3a",
+            status=OnboardingStatus.ANALYZING
+        )
+
+        repr_str = repr(session)
+        assert "12345678" in repr_str
+        assert "3a" in repr_str
+        assert "analyzing" in repr_str
+
+    def test_detected_student_repr(self):
+        """Test DetectedStudent string representation."""
+        student = DetectedStudent(
+            id="12345678-1234-1234-1234-123456789abc",
+            detected_first_name="Max"
+        )
+
+        repr_str = repr(student)
+        assert "Max" in repr_str
+
+    def test_module_link_repr(self):
+        """Test ModuleLink string representation."""
+        link = ModuleLink(
+            id="12345678-1234-1234-1234-123456789abc",
+            klausur_session_id="session-1",
+            link_type=ModuleLinkType.NOTENBUCH,
+            target_module="school"
+        )
+
+        repr_str = repr(link)
+        assert "notenbuch" in repr_str
+        assert "school" in repr_str
+
+
+# =============================================================================
+# PRIVACY TESTS
+# =============================================================================
+
+class TestPrivacyInMagicOnboarding:
+    """Tests ensuring privacy is maintained in Magic Onboarding."""
+
+    def test_detected_student_no_full_last_name_in_detection(self):
+        """Test that detection only captures hints, not full last names."""
+        student = DetectedStudent(
+            id="12345678-1234-1234-1234-123456789abc",
+            detected_first_name="Max",
+            detected_last_name_hint="M."  # Only initial/hint, not full name
+        )
+
+        # The detection phase should only have hints
+        assert student.detected_last_name_hint == "M."
+        # Full name is only set after teacher confirmation
+        assert student.confirmed_last_name is None
+
+    def test_module_link_uses_doc_tokens_not_names(self):
+        """Test that module links use pseudonymized tokens."""
+        linker = ModuleLinker()
+
+        # Results should only contain doc_tokens, not student names
+        results = [
+            CorrectionResult(
+                doc_token="uuid-token-1", score=45, max_score=100,
+                grade="4", feedback=""
+            ),
+        ]
+
+        suggestions = linker.suggest_elternabend(
+            results, subject="Deutsch", threshold_grade="4"
+        )
+
+        # Suggestions reference doc_tokens, not names
+        for suggestion in suggestions:
+            assert hasattr(suggestion, 'doc_token')
+            # Verify doc_token is the pseudonymized one
+            assert suggestion.doc_token == "uuid-token-1"
+
+
+# =============================================================================
+# INTEGRATION FLOW TESTS
+# =============================================================================
+
+class TestMagicOnboardingFlow:
+    """Tests for the complete Magic Onboarding flow."""
+
+    def test_onboarding_status_progression(self):
+        """Test that status progresses correctly through the flow."""
+        statuses = list(OnboardingStatus)
+
+        # Verify correct order
+        assert statuses[0] == OnboardingStatus.ANALYZING
+        assert statuses[1] == OnboardingStatus.CONFIRMING
+        assert statuses[2] == OnboardingStatus.PROCESSING
+        assert statuses[3] == OnboardingStatus.LINKING
+        assert statuses[4] == OnboardingStatus.COMPLETE
+
+    def test_grade_conversion_german_scale(self):
+        """Test that German grading scale (1-6) is used correctly."""
+        linker = ModuleLinker()
+
+        # Test the internal grade checking
+        # Grades 1-4 are passing, 5-6 are failing
+        results = [
+            CorrectionResult(doc_token="t1", score=95, max_score=100, grade="1", feedback=""),
+            CorrectionResult(doc_token="t2", score=80, max_score=100, grade="2", feedback=""),
+            CorrectionResult(doc_token="t3", score=65, max_score=100, grade="3", feedback=""),
+            CorrectionResult(doc_token="t4", score=50, max_score=100, grade="4", feedback=""),
+            CorrectionResult(doc_token="t5", score=30, max_score=100, grade="5", feedback=""),
+            CorrectionResult(doc_token="t6", score=15, max_score=100, grade="6", feedback=""),
+        ]
+
+        stats = linker.calculate_grade_statistics(results)
+
+        # 4 passing (grades 1-4), 2 failing (grades 5, 6)
+        assert stats["passing_count"] == 4
+        assert stats["failing_count"] == 2
+
+    def test_meeting_urgency_levels(self):
+        """Test meeting urgency assignment based on grades."""
+        linker = ModuleLinker()
+
+        results = [
+            CorrectionResult(doc_token="t1", score=55, max_score=100, grade="4", feedback=""),
+            CorrectionResult(doc_token="t2", score=30, max_score=100, grade="5", feedback=""),
+            CorrectionResult(doc_token="t3", score=15, max_score=100, grade="6", feedback=""),
+        ]
+
+        suggestions = linker.suggest_elternabend(
+            results, subject="Mathe", threshold_grade="4"
+        )
+
+        # Verify urgency levels exist and are meaningful
+        urgencies = [s.urgency for s in suggestions]
+        assert len(urgencies) == 3
+
+        # Grade 6 should be high urgency
+        grade_6_suggestion = next(s for s in suggestions if s.grade == "6")
+        assert grade_6_suggestion.urgency == MeetingUrgency.HIGH
--- a/backend/klausur/tests/test_pseudonymizer.py
+++ b/backend/klausur/tests/test_pseudonymizer.py
@@ -0,0 +1,209 @@
+"""
+Tests for PseudonymizationService.
+
+Verifies that:
+- doc_tokens are cryptographically random
+- QR codes are generated correctly
+- Header redaction works as expected
+- No personal data leaks through pseudonymization
+"""
+import pytest
+import uuid
+from unittest.mock import patch, MagicMock
+
+from klausur.services.pseudonymizer import (
+    PseudonymizationService,
+    get_pseudonymizer,
+    RedactionResult,
+    QRDetectionResult,
+)
+
+
+class TestDocTokenGeneration:
+    """Tests for doc_token generation."""
+
+    def test_generate_doc_token_returns_valid_uuid(self):
+        """doc_token should be a valid UUID4."""
+        service = PseudonymizationService()
+        token = service.generate_doc_token()
+
+        # Should be a valid UUID
+        parsed = uuid.UUID(token)
+        assert parsed.version == 4
+
+    def test_generate_doc_token_is_unique(self):
+        """Each generated token should be unique."""
+        service = PseudonymizationService()
+        tokens = [service.generate_doc_token() for _ in range(1000)]
+
+        # All tokens should be unique
+        assert len(set(tokens)) == 1000
+
+    def test_generate_batch_tokens_correct_count(self):
+        """Batch generation should return correct number of tokens."""
+        service = PseudonymizationService()
+        tokens = service.generate_batch_tokens(25)
+
+        assert len(tokens) == 25
+        assert len(set(tokens)) == 25  # All unique
+
+    def test_token_no_correlation_to_index(self):
+        """Tokens should not correlate to their generation order."""
+        service = PseudonymizationService()
+
+        # Generate multiple batches
+        batch1 = service.generate_batch_tokens(10)
+        batch2 = service.generate_batch_tokens(10)
+
+        # No overlap between batches
+        assert not set(batch1).intersection(set(batch2))
+
+
+class TestQRCodeGeneration:
+    """Tests for QR code generation."""
+
+    def test_generate_qr_code_returns_bytes(self):
+        """QR code generation should return PNG bytes."""
+        service = PseudonymizationService()
+        token = service.generate_doc_token()
+
+        try:
+            qr_bytes = service.generate_qr_code(token)
+            assert isinstance(qr_bytes, bytes)
+            # PNG magic bytes
+            assert qr_bytes[:8] == b'\x89PNG\r\n\x1a\n'
+        except RuntimeError:
+            pytest.skip("qrcode library not installed")
+
+    def test_generate_qr_code_custom_size(self):
+        """QR code should respect custom size."""
+        service = PseudonymizationService()
+        token = service.generate_doc_token()
+
+        try:
+            # Generate with different sizes
+            small = service.generate_qr_code(token, size=100)
+            large = service.generate_qr_code(token, size=400)
+
+            # Both should be valid PNG
+            assert small[:8] == b'\x89PNG\r\n\x1a\n'
+            assert large[:8] == b'\x89PNG\r\n\x1a\n'
+
+            # Large should be bigger
+            assert len(large) > len(small)
+        except RuntimeError:
+            pytest.skip("qrcode library not installed")
+
+
+class TestHeaderRedaction:
+    """Tests for header redaction."""
+
+    def test_redact_header_returns_redaction_result(self):
+        """Redaction should return proper RedactionResult."""
+        service = PseudonymizationService()
+
+        # Create a simple test image (1x1 white pixel PNG)
+        # This is a minimal valid PNG
+        test_png = (
+            b'\x89PNG\r\n\x1a\n'  # PNG signature
+            b'\x00\x00\x00\rIHDR'  # IHDR chunk
+            b'\x00\x00\x00\x01'    # Width: 1
+            b'\x00\x00\x00\x01'    # Height: 1
+            b'\x08\x02'            # Bit depth: 8, Color type: RGB
+            b'\x00\x00\x00'        # Compression, Filter, Interlace
+            b'\x90wS\xde'          # CRC
+            b'\x00\x00\x00\x0cIDATx\x9cc\xf8\x0f\x00\x00\x01\x01\x00\x05\x18\xd8N'  # IDAT
+            b'\x00\x00\x00\x00IEND\xaeB`\x82'  # IEND
+        )
+
+        result = service.redact_header(test_png)
+
+        assert isinstance(result, RedactionResult)
+        assert isinstance(result.redacted_image, bytes)
+
+    def test_redact_header_with_invalid_image_returns_original(self):
+        """Invalid images should return original bytes with redaction_applied=False."""
+        service = PseudonymizationService()
+
+        invalid_data = b'not an image'
+        result = service.redact_header(invalid_data)
+
+        assert result.redacted_image == invalid_data
+        assert result.redaction_applied is False
+
+
+class TestQRDetection:
+    """Tests for QR code detection."""
+
+    def test_detect_qr_code_no_qr_returns_none(self):
+        """Image without QR should return None token."""
+        service = PseudonymizationService()
+
+        # Empty/invalid image
+        result = service.detect_qr_code(b'not an image with qr')
+
+        assert result.doc_token is None
+        assert result.confidence == 0.0
+
+
+class TestSingleton:
+    """Tests for singleton pattern."""
+
+    def test_get_pseudonymizer_returns_same_instance(self):
+        """Singleton should return same instance."""
+        instance1 = get_pseudonymizer()
+        instance2 = get_pseudonymizer()
+
+        assert instance1 is instance2
+
+    def test_pseudonymizer_is_service_instance(self):
+        """Singleton should be PseudonymizationService."""
+        instance = get_pseudonymizer()
+        assert isinstance(instance, PseudonymizationService)
+
+
+class TestPrivacyGuarantees:
+    """Tests verifying privacy guarantees."""
+
+    def test_token_cannot_be_reversed_to_name(self):
+        """Tokens should have no mathematical relationship to any input."""
+        service = PseudonymizationService()
+
+        # Generate tokens for "students"
+        student_names = ["Max Mustermann", "Anna Schmidt", "Tim Mueller"]
+        tokens = service.generate_batch_tokens(len(student_names))
+
+        # Tokens should not contain any part of names
+        for token in tokens:
+            for name in student_names:
+                assert name.lower() not in token.lower()
+                for part in name.split():
+                    assert part.lower() not in token.lower()
+
+    def test_token_generation_is_not_deterministic(self):
+        """Same input should not produce same token."""
+        service = PseudonymizationService()
+
+        # Even with "same student count", tokens should differ
+        batch1 = service.generate_batch_tokens(5)
+        batch2 = service.generate_batch_tokens(5)
+
+        # No tokens should match
+        assert not set(batch1).intersection(set(batch2))
+
+    def test_token_entropy(self):
+        """Tokens should have sufficient entropy."""
+        service = PseudonymizationService()
+        tokens = service.generate_batch_tokens(100)
+
+        # Each token should be 36 chars (UUID format: 8-4-4-4-12)
+        for token in tokens:
+            assert len(token) == 36
+            assert token.count('-') == 4
+
+        # Check character distribution (rough entropy check)
+        all_chars = ''.join(t.replace('-', '') for t in tokens)
+        unique_chars = set(all_chars)
+
+        # Should use all hex digits (0-9, a-f)
+        assert len(unique_chars) >= 10
--- a/backend/klausur/tests/test_repository.py
+++ b/backend/klausur/tests/test_repository.py
@@ -0,0 +1,248 @@
+"""
+Tests for KlausurRepository.
+
+Verifies:
+- Teacher isolation (critical for privacy)
+- CRUD operations
+- Data retention cleanup
+"""
+import pytest
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch
+from sqlalchemy.orm import Session
+
+from klausur.repository import KlausurRepository
+from klausur.db_models import (
+    ExamSession, PseudonymizedDocument, QRBatchJob,
+    SessionStatus, DocumentStatus
+)
+
+
+@pytest.fixture
+def mock_db():
+    """Create a mock database session."""
+    return MagicMock(spec=Session)
+
+
+@pytest.fixture
+def repo(mock_db):
+    """Create a repository with mock DB."""
+    return KlausurRepository(mock_db)
+
+
+class TestTeacherIsolation:
+    """Tests for teacher namespace isolation (CRITICAL for privacy)."""
+
+    def test_get_session_requires_teacher_id(self, repo, mock_db):
+        """Getting a session must require teacher_id."""
+        # Setup mock
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        mock_query.first.return_value = None
+
+        # Attempt to get session
+        result = repo.get_session("session-123", "teacher-A")
+
+        # Verify filter was called (teacher isolation)
+        mock_db.query.assert_called_with(ExamSession)
+        mock_query.filter.assert_called()
+
+    def test_list_sessions_only_returns_teacher_sessions(self, repo, mock_db):
+        """Listing sessions must filter by teacher_id."""
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        mock_query.order_by.return_value = mock_query
+        mock_query.offset.return_value = mock_query
+        mock_query.limit.return_value = mock_query
+        mock_query.all.return_value = []
+
+        result = repo.list_sessions("teacher-A")
+
+        # Verify query chain
+        mock_db.query.assert_called_with(ExamSession)
+
+    def test_get_document_verifies_teacher_ownership(self, repo, mock_db):
+        """Getting a document must verify teacher owns the session."""
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.join.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        mock_query.first.return_value = None
+
+        result = repo.get_document("doc-token-123", "teacher-A")
+
+        # Must join with ExamSession to verify teacher_id
+        mock_query.join.assert_called()
+
+    def test_different_teachers_cannot_see_each_others_sessions(self, repo, mock_db):
+        """Teacher A cannot access Teacher B's sessions."""
+        # Create mock session owned by teacher-B
+        session_b = MagicMock(spec=ExamSession)
+        session_b.teacher_id = "teacher-B"
+        session_b.id = "session-123"
+
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        # Return None because filter should exclude teacher-B's session
+        mock_query.first.return_value = None
+
+        # Teacher A tries to access
+        result = repo.get_session("session-123", "teacher-A")
+
+        assert result is None
+
+
+class TestSessionOperations:
+    """Tests for session CRUD operations."""
+
+    def test_create_session_sets_teacher_id(self, repo, mock_db):
+        """Creating a session must set the teacher_id."""
+        repo.create_session(
+            teacher_id="teacher-123",
+            name="Mathe Klausur",
+            subject="Mathematik"
+        )
+
+        # Verify session was added with teacher_id
+        mock_db.add.assert_called_once()
+        added_session = mock_db.add.call_args[0][0]
+        assert added_session.teacher_id == "teacher-123"
+        assert added_session.name == "Mathe Klausur"
+
+    def test_create_session_sets_retention_date(self, repo, mock_db):
+        """Sessions must have a retention date for auto-deletion."""
+        repo.create_session(
+            teacher_id="teacher-123",
+            name="Test",
+            retention_days=30
+        )
+
+        added_session = mock_db.add.call_args[0][0]
+        assert added_session.retention_until is not None
+
+        # Should be approximately 30 days in the future
+        expected = datetime.utcnow() + timedelta(days=30)
+        diff = abs((added_session.retention_until - expected).total_seconds())
+        assert diff < 60  # Within 1 minute
+
+    def test_delete_session_soft_delete_by_default(self, repo, mock_db):
+        """Deleting should soft-delete by default."""
+        mock_session = MagicMock(spec=ExamSession)
+        mock_session.status = SessionStatus.CREATED
+
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        mock_query.first.return_value = mock_session
+
+        result = repo.delete_session("session-123", "teacher-A")
+
+        # Should set status to DELETED, not actually delete
+        assert mock_session.status == SessionStatus.DELETED
+        mock_db.delete.assert_not_called()
+
+    def test_delete_session_hard_delete_when_requested(self, repo, mock_db):
+        """Hard delete should actually delete the record."""
+        mock_session = MagicMock(spec=ExamSession)
+
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        mock_query.first.return_value = mock_session
+
+        result = repo.delete_session("session-123", "teacher-A", hard_delete=True)
+
+        mock_db.delete.assert_called_once_with(mock_session)
+
+
+class TestDocumentOperations:
+    """Tests for document CRUD operations."""
+
+    def test_create_document_requires_valid_session(self, repo, mock_db):
+        """Creating a document requires a valid session owned by teacher."""
+        # Session not found (wrong teacher or doesn't exist)
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        mock_query.first.return_value = None
+
+        result = repo.create_document(
+            session_id="session-123",
+            teacher_id="teacher-A"
+        )
+
+        assert result is None
+
+    def test_update_document_ocr_changes_status(self, repo, mock_db):
+        """Updating OCR results should update document status."""
+        mock_doc = MagicMock(spec=PseudonymizedDocument)
+        mock_doc.status = DocumentStatus.UPLOADED
+
+        # Mock get_document
+        with patch.object(repo, 'get_document', return_value=mock_doc):
+            result = repo.update_document_ocr(
+                doc_token="doc-123",
+                teacher_id="teacher-A",
+                ocr_text="Student answer text",
+                confidence=95
+            )
+
+        assert mock_doc.ocr_text == "Student answer text"
+        assert mock_doc.ocr_confidence == 95
+        assert mock_doc.status == DocumentStatus.OCR_COMPLETED
+
+
+class TestDataRetention:
+    """Tests for data retention and cleanup."""
+
+    def test_cleanup_expired_sessions(self, repo, mock_db):
+        """Cleanup should mark expired sessions as deleted."""
+        # Create expired session
+        expired_session = MagicMock(spec=ExamSession)
+        expired_session.retention_until = datetime.utcnow() - timedelta(days=1)
+        expired_session.status = SessionStatus.COMPLETED
+        expired_session.encrypted_identity_map = b"encrypted_data"
+
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        mock_query.all.return_value = [expired_session]
+
+        count = repo.cleanup_expired_sessions()
+
+        assert count == 1
+        assert expired_session.status == SessionStatus.DELETED
+        # Identity map should be cleared
+        assert expired_session.encrypted_identity_map is None
+
+
+class TestStatistics:
+    """Tests for anonymized statistics."""
+
+    def test_get_session_stats_returns_anonymized_data(self, repo, mock_db):
+        """Statistics should not contain any PII."""
+        mock_session = MagicMock(spec=ExamSession)
+        mock_session.document_count = 25
+        mock_session.processed_count = 20
+
+        mock_query = MagicMock()
+        mock_db.query.return_value = mock_query
+        mock_query.filter.return_value = mock_query
+        # first() is called twice: once for status counts and once for score stats
+        # Return a tuple for score_stats that can be subscripted
+        mock_query.first.return_value = (85.0, 60, 100)  # avg, min, max scores
+        mock_query.group_by.return_value = mock_query
+        mock_query.all.return_value = []
+
+        with patch.object(repo, 'get_session', return_value=mock_session):
+            stats = repo.get_session_stats("session-123", "teacher-A")
+
+        # Stats should contain only aggregate data, no PII
+        assert "session_id" in stats
+        assert "total_documents" in stats
+        # Should NOT contain student names or tokens
+        assert "student_names" not in stats
+        assert "doc_tokens" not in stats
--- a/backend/klausur/tests/test_routes.py
+++ b/backend/klausur/tests/test_routes.py
@@ -0,0 +1,346 @@
+"""
+Tests for Klausur API Routes.
+
+Verifies:
+- API endpoint behavior
+- Request validation
+- Response format
+- Privacy guarantees at API level
+"""
+import pytest
+from unittest.mock import MagicMock, patch, AsyncMock
+from fastapi.testclient import TestClient
+from fastapi import FastAPI
+
+from klausur.routes import router
+from klausur.db_models import SessionStatus, DocumentStatus
+
+
+@pytest.fixture
+def app():
+    """Create test FastAPI app."""
+    app = FastAPI()
+    app.include_router(router, prefix="/api")
+    return app
+
+
+@pytest.fixture
+def client(app):
+    """Create test client."""
+    return TestClient(app)
+
+
+class TestSessionEndpoints:
+    """Tests for session-related endpoints."""
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_db')
+    def test_create_session_returns_201(self, mock_get_db, mock_repo_class, client):
+        """Creating a session should return 201."""
+        # Setup mocks
+        mock_db = MagicMock()
+        mock_get_db.return_value = iter([mock_db])
+
+        mock_repo = MagicMock()
+        mock_repo_class.return_value = mock_repo
+
+        mock_session = MagicMock()
+        mock_session.id = "session-123"
+        mock_session.name = "Test Klausur"
+        mock_session.subject = "Mathe"
+        mock_session.class_name = "10a"
+        mock_session.total_points = 100
+        mock_session.status = SessionStatus.CREATED
+        mock_session.document_count = 0
+        mock_session.processed_count = 0
+        mock_session.created_at = "2024-01-15T10:00:00"
+        mock_session.completed_at = None
+        mock_session.retention_until = "2024-02-15T10:00:00"
+
+        mock_repo.create_session.return_value = mock_session
+
+        response = client.post("/api/klausur/sessions", json={
+            "name": "Test Klausur",
+            "subject": "Mathe",
+            "class_name": "10a"
+        })
+
+        assert response.status_code == 201
+        data = response.json()
+        assert data["name"] == "Test Klausur"
+        assert data["status"] == "created"
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_db')
+    def test_create_session_validates_name(self, mock_get_db, mock_repo_class, client):
+        """Session name is required and must not be empty."""
+        response = client.post("/api/klausur/sessions", json={
+            "name": "",  # Empty name
+            "subject": "Mathe"
+        })
+
+        assert response.status_code == 422  # Validation error
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_db')
+    def test_list_sessions_returns_array(self, mock_get_db, mock_repo_class, client):
+        """Listing sessions should return an array."""
+        mock_db = MagicMock()
+        mock_get_db.return_value = iter([mock_db])
+
+        mock_repo = MagicMock()
+        mock_repo_class.return_value = mock_repo
+        mock_repo.list_sessions.return_value = []
+
+        response = client.get("/api/klausur/sessions")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "sessions" in data
+        assert isinstance(data["sessions"], list)
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_db')
+    def test_get_session_404_when_not_found(self, mock_get_db, mock_repo_class, client):
+        """Getting non-existent session should return 404."""
+        mock_db = MagicMock()
+        mock_get_db.return_value = iter([mock_db])
+
+        mock_repo = MagicMock()
+        mock_repo_class.return_value = mock_repo
+        mock_repo.get_session.return_value = None
+
+        response = client.get("/api/klausur/sessions/nonexistent-123")
+
+        assert response.status_code == 404
+
+
+class TestQREndpoints:
+    """Tests for QR code generation endpoints."""
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_pseudonymizer')
+    @patch('klausur.routes.get_db')
+    def test_generate_qr_batch_creates_tokens(
+        self, mock_get_db, mock_get_pseudonymizer, mock_repo_class, client
+    ):
+        """QR batch generation should create correct number of tokens."""
+        mock_db = MagicMock()
+        mock_get_db.return_value = iter([mock_db])
+
+        mock_repo = MagicMock()
+        mock_repo_class.return_value = mock_repo
+
+        mock_session = MagicMock()
+        mock_repo.get_session.return_value = mock_session
+
+        mock_batch = MagicMock()
+        mock_batch.id = "batch-123"
+        mock_batch.student_count = 5
+        mock_repo.create_qr_batch.return_value = mock_batch
+
+        mock_pseudonymizer = MagicMock()
+        mock_pseudonymizer.generate_batch_tokens.return_value = [
+            "token-1", "token-2", "token-3", "token-4", "token-5"
+        ]
+        mock_get_pseudonymizer.return_value = mock_pseudonymizer
+
+        response = client.post("/api/klausur/sessions/session-123/qr-batch", json={
+            "student_count": 5
+        })
+
+        assert response.status_code == 200
+        data = response.json()
+        assert len(data["generated_tokens"]) == 5
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_db')
+    def test_qr_batch_validates_student_count(self, mock_get_db, mock_repo_class, client):
+        """Student count must be within valid range."""
+        # Too many students
+        response = client.post("/api/klausur/sessions/session-123/qr-batch", json={
+            "student_count": 200  # Max is 100
+        })
+
+        assert response.status_code == 422
+
+
+class TestUploadEndpoints:
+    """Tests for document upload endpoints."""
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_pseudonymizer')
+    @patch('klausur.routes.get_db')
+    def test_upload_applies_redaction_by_default(
+        self, mock_get_db, mock_get_pseudonymizer, mock_repo_class, client
+    ):
+        """Upload should apply header redaction by default."""
+        mock_db = MagicMock()
+        mock_get_db.return_value = iter([mock_db])
+
+        mock_repo = MagicMock()
+        mock_repo_class.return_value = mock_repo
+
+        mock_session = MagicMock()
+        mock_repo.get_session.return_value = mock_session
+
+        mock_doc = MagicMock()
+        mock_doc.doc_token = "doc-token-123"
+        mock_doc.session_id = "session-123"
+        mock_doc.status = DocumentStatus.UPLOADED
+        mock_doc.page_number = 1
+        mock_doc.total_pages = 1
+        mock_doc.ocr_confidence = 0
+        mock_doc.ai_score = None
+        mock_doc.ai_grade = None
+        mock_doc.ai_feedback = None
+        mock_doc.created_at = "2024-01-15T10:00:00"
+        mock_doc.processing_completed_at = None
+        mock_repo.create_document.return_value = mock_doc
+
+        mock_pseudonymizer = MagicMock()
+        mock_pseudonymizer.detect_qr_code.return_value = MagicMock(doc_token=None)
+        mock_pseudonymizer.generate_doc_token.return_value = "doc-token-123"
+        mock_pseudonymizer.smart_redact_header.return_value = MagicMock(
+            redaction_applied=True,
+            redacted_image=b"redacted",
+            redacted_height=300
+        )
+        mock_get_pseudonymizer.return_value = mock_pseudonymizer
+
+        # Create a minimal file upload
+        response = client.post(
+            "/api/klausur/sessions/session-123/upload",
+            files={"file": ("test.png", b"fake image data", "image/png")}
+        )
+
+        # Verify redaction was called
+        mock_pseudonymizer.smart_redact_header.assert_called_once()
+
+
+class TestResultsEndpoints:
+    """Tests for results endpoints."""
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_db')
+    def test_results_only_return_pseudonymized_data(
+        self, mock_get_db, mock_repo_class, client
+    ):
+        """Results should only contain doc_tokens, not names."""
+        mock_db = MagicMock()
+        mock_get_db.return_value = iter([mock_db])
+
+        mock_repo = MagicMock()
+        mock_repo_class.return_value = mock_repo
+
+        mock_session = MagicMock()
+        mock_session.total_points = 100
+        mock_repo.get_session.return_value = mock_session
+
+        mock_doc = MagicMock()
+        mock_doc.doc_token = "anonymous-token-123"
+        mock_doc.status = DocumentStatus.COMPLETED
+        mock_doc.ai_score = 85
+        mock_doc.ai_grade = "2+"
+        mock_doc.ai_feedback = "Good work"
+        mock_doc.ai_details = {}
+        mock_repo.list_documents.return_value = [mock_doc]
+
+        response = client.get("/api/klausur/sessions/session-123/results")
+
+        assert response.status_code == 200
+        data = response.json()
+
+        # Results should use doc_token, not student name
+        assert len(data) == 1
+        assert "doc_token" in data[0]
+        assert "student_name" not in data[0]
+        assert "name" not in data[0]
+
+
+class TestIdentityMapEndpoints:
+    """Tests for identity map (vault) endpoints."""
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_db')
+    def test_store_identity_map_accepts_encrypted_data(
+        self, mock_get_db, mock_repo_class, client
+    ):
+        """Identity map endpoint should accept encrypted data."""
+        mock_db = MagicMock()
+        mock_get_db.return_value = iter([mock_db])
+
+        mock_repo = MagicMock()
+        mock_repo_class.return_value = mock_repo
+
+        mock_session = MagicMock()
+        mock_repo.update_session_identity_map.return_value = mock_session
+
+        # Base64 encoded "encrypted" data
+        import base64
+        encrypted = base64.b64encode(b"encrypted identity map").decode()
+
+        response = client.post("/api/klausur/sessions/session-123/identity-map", json={
+            "encrypted_data": encrypted,
+            "iv": "base64iv=="
+        })
+
+        assert response.status_code == 204
+
+    @patch('klausur.routes.KlausurRepository')
+    @patch('klausur.routes.get_db')
+    def test_get_identity_map_returns_encrypted_blob(
+        self, mock_get_db, mock_repo_class, client
+    ):
+        """Getting identity map should return encrypted blob."""
+        mock_db = MagicMock()
+        mock_get_db.return_value = iter([mock_db])
+
+        mock_repo = MagicMock()
+        mock_repo_class.return_value = mock_repo
+
+        mock_session = MagicMock()
+        mock_session.encrypted_identity_map = b"encrypted data"
+        mock_session.identity_map_iv = "ivvalue"
+        mock_repo.get_session.return_value = mock_session
+
+        response = client.get("/api/klausur/sessions/session-123/identity-map")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "encrypted_data" in data
+        assert "iv" in data
+
+
+class TestPrivacyAtAPILevel:
+    """Tests verifying privacy guarantees at API level."""
+
+    def test_no_student_names_in_any_response_schema(self):
+        """Verify response schemas don't include student names."""
+        from klausur.routes import (
+            SessionResponse, DocumentResponse, CorrectionResultResponse
+        )
+
+        # Check all response model fields
+        session_fields = SessionResponse.model_fields.keys()
+        doc_fields = DocumentResponse.model_fields.keys()
+        result_fields = CorrectionResultResponse.model_fields.keys()
+
+        all_fields = list(session_fields) + list(doc_fields) + list(result_fields)
+
+        # Should not contain student-name-related fields
+        # Note: "name" alone is allowed (e.g., session/exam name like "Mathe Klausur")
+        forbidden = ["student_name", "schueler_name", "student", "pupil", "schueler"]
+        for field in all_fields:
+            assert field.lower() not in forbidden, f"Field '{field}' may contain PII"
+
+    def test_identity_map_request_requires_encryption(self):
+        """Identity map must be encrypted before storage."""
+        from klausur.routes import IdentityMapUpdate
+
+        # Check that schema requires encrypted_data, not plain names
+        fields = IdentityMapUpdate.model_fields.keys()
+
+        assert "encrypted_data" in fields
+        assert "names" not in fields
+        assert "student_names" not in fields