fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
--- a/backend/abitur_docs_api.py
+++ b/backend/abitur_docs_api.py
@@ -0,0 +1,956 @@
+"""
+Abitur Document Store API - Verwaltung von Abitur-Aufgaben und Erwartungshorizonten.
+
+Unterstützt:
+- Bundesland-spezifische Dokumente
+- Fach, Jahr, Niveau (eA/gA), Aufgabennummer
+- KI-basierte Dokumentenerkennung
+- RAG-Integration mit Vector Store
+
+Dateinamen-Schema (NiBiS Niedersachsen):
+- 2025_Deutsch_eA_I.pdf - Aufgabe
+- 2025_Deutsch_eA_I_EWH.pdf - Erwartungshorizont
+"""
+
+import logging
+import uuid
+import os
+import re
+import zipfile
+import tempfile
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+from enum import Enum
+from pathlib import Path
+from dataclasses import dataclass
+
+from fastapi import APIRouter, HTTPException, UploadFile, File, Form, BackgroundTasks
+from fastapi.responses import FileResponse
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(
+    prefix="/abitur-docs",
+    tags=["abitur-docs"],
+)
+
+# Storage directory
+DOCS_DIR = Path("/tmp/abitur-docs")
+DOCS_DIR.mkdir(parents=True, exist_ok=True)
+
+
+# ============================================================================
+# Enums
+# ============================================================================
+
+class Bundesland(str, Enum):
+    """Bundesländer mit Zentralabitur."""
+    NIEDERSACHSEN = "niedersachsen"
+    BAYERN = "bayern"
+    BADEN_WUERTTEMBERG = "baden_wuerttemberg"
+    NORDRHEIN_WESTFALEN = "nordrhein_westfalen"
+    HESSEN = "hessen"
+    SACHSEN = "sachsen"
+    THUERINGEN = "thueringen"
+    BERLIN = "berlin"
+    HAMBURG = "hamburg"
+    SCHLESWIG_HOLSTEIN = "schleswig_holstein"
+    BREMEN = "bremen"
+    BRANDENBURG = "brandenburg"
+    MECKLENBURG_VORPOMMERN = "mecklenburg_vorpommern"
+    SACHSEN_ANHALT = "sachsen_anhalt"
+    RHEINLAND_PFALZ = "rheinland_pfalz"
+    SAARLAND = "saarland"
+
+
+class Fach(str, Enum):
+    """Abiturfächer."""
+    DEUTSCH = "deutsch"
+    ENGLISCH = "englisch"
+    MATHEMATIK = "mathematik"
+    BIOLOGIE = "biologie"
+    CHEMIE = "chemie"
+    PHYSIK = "physik"
+    GESCHICHTE = "geschichte"
+    ERDKUNDE = "erdkunde"
+    POLITIK_WIRTSCHAFT = "politik_wirtschaft"
+    FRANZOESISCH = "franzoesisch"
+    SPANISCH = "spanisch"
+    LATEIN = "latein"
+    GRIECHISCH = "griechisch"
+    KUNST = "kunst"
+    MUSIK = "musik"
+    SPORT = "sport"
+    INFORMATIK = "informatik"
+    EV_RELIGION = "ev_religion"
+    KATH_RELIGION = "kath_religion"
+    WERTE_NORMEN = "werte_normen"
+    BRC = "brc"  # Betriebswirtschaft mit Rechnungswesen
+    BVW = "bvw"  # Volkswirtschaft
+    ERNAEHRUNG = "ernaehrung"
+    MECHATRONIK = "mechatronik"
+    GESUNDHEIT_PFLEGE = "gesundheit_pflege"
+    PAEDAGOGIK_PSYCHOLOGIE = "paedagogik_psychologie"
+
+
+class Niveau(str, Enum):
+    """Anforderungsniveau."""
+    EA = "eA"  # Erhöhtes Anforderungsniveau (Leistungskurs)
+    GA = "gA"  # Grundlegendes Anforderungsniveau (Grundkurs)
+
+
+class DokumentTyp(str, Enum):
+    """Dokumenttyp."""
+    AUFGABE = "aufgabe"
+    ERWARTUNGSHORIZONT = "erwartungshorizont"
+    DECKBLATT = "deckblatt"
+    MATERIAL = "material"
+    HOERVERSTEHEN = "hoerverstehen"  # Für Sprachen
+    SPRACHMITTLUNG = "sprachmittlung"  # Für Sprachen
+    BEWERTUNGSBOGEN = "bewertungsbogen"
+
+
+class VerarbeitungsStatus(str, Enum):
+    """Status der Dokumentenverarbeitung."""
+    PENDING = "pending"
+    PROCESSING = "processing"
+    RECOGNIZED = "recognized"  # KI hat Metadaten erkannt
+    CONFIRMED = "confirmed"  # Entwickler hat bestätigt
+    INDEXED = "indexed"  # Im Vector Store
+    ERROR = "error"
+
+
+# ============================================================================
+# Fach-Mapping für Dateinamen
+# ============================================================================
+
+FACH_NAME_MAPPING = {
+    "deutsch": Fach.DEUTSCH,
+    "englisch": Fach.ENGLISCH,
+    "mathe": Fach.MATHEMATIK,
+    "mathematik": Fach.MATHEMATIK,
+    "biologie": Fach.BIOLOGIE,
+    "bio": Fach.BIOLOGIE,
+    "chemie": Fach.CHEMIE,
+    "physik": Fach.PHYSIK,
+    "geschichte": Fach.GESCHICHTE,
+    "erdkunde": Fach.ERDKUNDE,
+    "geographie": Fach.ERDKUNDE,
+    "politikwirtschaft": Fach.POLITIK_WIRTSCHAFT,
+    "politik": Fach.POLITIK_WIRTSCHAFT,
+    "franzoesisch": Fach.FRANZOESISCH,
+    "franz": Fach.FRANZOESISCH,
+    "spanisch": Fach.SPANISCH,
+    "latein": Fach.LATEIN,
+    "griechisch": Fach.GRIECHISCH,
+    "kunst": Fach.KUNST,
+    "musik": Fach.MUSIK,
+    "sport": Fach.SPORT,
+    "informatik": Fach.INFORMATIK,
+    "evreligion": Fach.EV_RELIGION,
+    "kathreligion": Fach.KATH_RELIGION,
+    "wertenormen": Fach.WERTE_NORMEN,
+    "brc": Fach.BRC,
+    "bvw": Fach.BVW,
+    "ernaehrung": Fach.ERNAEHRUNG,
+    "mecha": Fach.MECHATRONIK,
+    "mechatronik": Fach.MECHATRONIK,
+    "technikmecha": Fach.MECHATRONIK,
+    "gespfl": Fach.GESUNDHEIT_PFLEGE,
+    "paedpsych": Fach.PAEDAGOGIK_PSYCHOLOGIE,
+}
+
+
+# ============================================================================
+# Pydantic Models
+# ============================================================================
+
+class DokumentCreate(BaseModel):
+    """Manuelles Erstellen eines Dokuments."""
+    bundesland: Bundesland
+    fach: Fach
+    jahr: int = Field(ge=2000, le=2100)
+    niveau: Niveau
+    typ: DokumentTyp
+    aufgaben_nummer: Optional[str] = None  # I, II, III, 1, 2, etc.
+
+
+class DokumentUpdate(BaseModel):
+    """Update für erkannte Metadaten."""
+    bundesland: Optional[Bundesland] = None
+    fach: Optional[Fach] = None
+    jahr: Optional[int] = None
+    niveau: Optional[Niveau] = None
+    typ: Optional[DokumentTyp] = None
+    aufgaben_nummer: Optional[str] = None
+    status: Optional[VerarbeitungsStatus] = None
+
+
+class DokumentResponse(BaseModel):
+    """Response für ein Dokument."""
+    id: str
+    dateiname: str
+    original_dateiname: str
+    bundesland: Bundesland
+    fach: Fach
+    jahr: int
+    niveau: Niveau
+    typ: DokumentTyp
+    aufgaben_nummer: Optional[str]
+    status: VerarbeitungsStatus
+    confidence: float  # Erkennungs-Confidence
+    file_path: str
+    file_size: int
+    indexed: bool
+    vector_ids: List[str]
+    created_at: datetime
+    updated_at: datetime
+
+
+class ImportResult(BaseModel):
+    """Ergebnis eines ZIP-Imports."""
+    total_files: int
+    recognized: int
+    errors: int
+    documents: List[DokumentResponse]
+
+
+class RecognitionResult(BaseModel):
+    """Ergebnis der Dokumentenerkennung."""
+    success: bool
+    bundesland: Optional[Bundesland]
+    fach: Optional[Fach]
+    jahr: Optional[int]
+    niveau: Optional[Niveau]
+    typ: Optional[DokumentTyp]
+    aufgaben_nummer: Optional[str]
+    confidence: float
+    raw_filename: str
+    suggestions: List[Dict[str, Any]]
+
+    @property
+    def extracted(self) -> Dict[str, Any]:
+        """Backwards-compatible property returning extracted values as dict."""
+        result = {}
+        if self.bundesland:
+            result["bundesland"] = self.bundesland.value
+        if self.fach:
+            result["fach"] = self.fach.value
+        if self.jahr:
+            result["jahr"] = self.jahr
+        if self.niveau:
+            result["niveau"] = self.niveau.value
+        if self.typ:
+            result["typ"] = self.typ.value
+        if self.aufgaben_nummer:
+            result["aufgaben_nummer"] = self.aufgaben_nummer
+        return result
+
+    @property
+    def method(self) -> str:
+        """Backwards-compatible property for recognition method."""
+        return "filename_pattern"
+
+
+# ============================================================================
+# Internal Data Classes
+# ============================================================================
+
+@dataclass
+class AbiturDokument:
+    """Internes Dokument."""
+    id: str
+    dateiname: str
+    original_dateiname: str
+    bundesland: Bundesland
+    fach: Fach
+    jahr: int
+    niveau: Niveau
+    typ: DokumentTyp
+    aufgaben_nummer: Optional[str]
+    status: VerarbeitungsStatus
+    confidence: float
+    file_path: str
+    file_size: int
+    indexed: bool
+    vector_ids: List[str]
+    created_at: datetime
+    updated_at: datetime
+
+
+# ============================================================================
+# In-Memory Storage
+# ============================================================================
+
+_dokumente: Dict[str, AbiturDokument] = {}
+
+
+# ============================================================================
+# Helper Functions - Dokumentenerkennung
+# ============================================================================
+
+def parse_nibis_filename(filename: str) -> RecognitionResult:
+    """
+    Erkennt Metadaten aus NiBiS-Dateinamen.
+
+    Beispiele:
+    - 2025_Deutsch_eA_I.pdf
+    - 2025_Deutsch_eA_I_EWH.pdf
+    - 2025_Biologie_gA_1.pdf
+    - 2025_Englisch_eA_HV.pdf (Hörverstehen)
+    """
+    result = RecognitionResult(
+        success=False,
+        bundesland=Bundesland.NIEDERSACHSEN,  # NiBiS = Niedersachsen
+        fach=None,
+        jahr=None,
+        niveau=None,
+        typ=None,
+        aufgaben_nummer=None,
+        confidence=0.0,
+        raw_filename=filename,
+        suggestions=[]
+    )
+
+    # Bereinige Dateiname
+    name = Path(filename).stem.lower()
+
+    # Extrahiere Jahr (4 Ziffern am Anfang)
+    jahr_match = re.match(r'^(\d{4})', name)
+    if jahr_match:
+        result.jahr = int(jahr_match.group(1))
+        result.confidence += 0.2
+
+    # Extrahiere Fach
+    for fach_key, fach_enum in FACH_NAME_MAPPING.items():
+        if fach_key in name.replace("_", "").replace("-", ""):
+            result.fach = fach_enum
+            result.confidence += 0.3
+            break
+
+    # Extrahiere Niveau (eA/gA)
+    if "_ea" in name or "_ea_" in name or "ea_" in name:
+        result.niveau = Niveau.EA
+        result.confidence += 0.2
+    elif "_ga" in name or "_ga_" in name or "ga_" in name:
+        result.niveau = Niveau.GA
+        result.confidence += 0.2
+
+    # Extrahiere Typ
+    if "_ewh" in name:
+        result.typ = DokumentTyp.ERWARTUNGSHORIZONT
+        result.confidence += 0.2
+    elif "_hv" in name or "hoerverstehen" in name:
+        result.typ = DokumentTyp.HOERVERSTEHEN
+        result.confidence += 0.15
+    elif "_sm" in name or "_me" in name or "sprachmittlung" in name:
+        result.typ = DokumentTyp.SPRACHMITTLUNG
+        result.confidence += 0.15
+    elif "deckblatt" in name:
+        result.typ = DokumentTyp.DECKBLATT
+        result.confidence += 0.15
+    elif "material" in name:
+        result.typ = DokumentTyp.MATERIAL
+        result.confidence += 0.15
+    elif "bewertung" in name:
+        result.typ = DokumentTyp.BEWERTUNGSBOGEN
+        result.confidence += 0.15
+    else:
+        result.typ = DokumentTyp.AUFGABE
+        result.confidence += 0.1
+
+    # Extrahiere Aufgabennummer (römisch oder arabisch)
+    aufgabe_match = re.search(r'_([ivx]+|[1-4][abc]?)(?:_|\.pdf|$)', name, re.IGNORECASE)
+    if aufgabe_match:
+        result.aufgaben_nummer = aufgabe_match.group(1).upper()
+        result.confidence += 0.1
+
+    # Erfolg wenn mindestens Fach und Jahr erkannt
+    if result.fach and result.jahr:
+        result.success = True
+
+    # Normalisiere Confidence auf max 1.0
+    result.confidence = min(result.confidence, 1.0)
+
+    return result
+
+
+def _to_dokument_response(doc: AbiturDokument) -> DokumentResponse:
+    """Konvertiert internes Dokument zu Response."""
+    return DokumentResponse(
+        id=doc.id,
+        dateiname=doc.dateiname,
+        original_dateiname=doc.original_dateiname,
+        bundesland=doc.bundesland,
+        fach=doc.fach,
+        jahr=doc.jahr,
+        niveau=doc.niveau,
+        typ=doc.typ,
+        aufgaben_nummer=doc.aufgaben_nummer,
+        status=doc.status,
+        confidence=doc.confidence,
+        file_path=doc.file_path,
+        file_size=doc.file_size,
+        indexed=doc.indexed,
+        vector_ids=doc.vector_ids,
+        created_at=doc.created_at,
+        updated_at=doc.updated_at
+    )
+
+
+# ============================================================================
+# API Endpoints - Dokumente
+# ============================================================================
+
+@router.post("/upload", response_model=DokumentResponse)
+async def upload_dokument(
+    file: UploadFile = File(...),
+    bundesland: Optional[Bundesland] = Form(None),
+    fach: Optional[Fach] = Form(None),
+    jahr: Optional[int] = Form(None),
+    niveau: Optional[Niveau] = Form(None),
+    typ: Optional[DokumentTyp] = Form(None),
+    aufgaben_nummer: Optional[str] = Form(None)
+):
+    """
+    Lädt ein einzelnes Dokument hoch.
+
+    Metadaten können manuell angegeben oder automatisch erkannt werden.
+    """
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="Kein Dateiname")
+
+    # Erkenne Metadaten aus Dateiname
+    recognition = parse_nibis_filename(file.filename)
+
+    # Überschreibe mit manuellen Angaben
+    final_bundesland = bundesland or recognition.bundesland or Bundesland.NIEDERSACHSEN
+    final_fach = fach or recognition.fach
+    final_jahr = jahr or recognition.jahr or datetime.now().year
+    final_niveau = niveau or recognition.niveau or Niveau.EA
+    final_typ = typ or recognition.typ or DokumentTyp.AUFGABE
+    final_aufgabe = aufgaben_nummer or recognition.aufgaben_nummer
+
+    if not final_fach:
+        raise HTTPException(status_code=400, detail="Fach konnte nicht erkannt werden")
+
+    # Generiere ID und speichere Datei
+    doc_id = str(uuid.uuid4())
+    file_ext = Path(file.filename).suffix
+    safe_filename = f"{doc_id}{file_ext}"
+    file_path = DOCS_DIR / safe_filename
+
+    content = await file.read()
+    with open(file_path, "wb") as f:
+        f.write(content)
+
+    now = datetime.utcnow()
+
+    dokument = AbiturDokument(
+        id=doc_id,
+        dateiname=safe_filename,
+        original_dateiname=file.filename,
+        bundesland=final_bundesland,
+        fach=final_fach,
+        jahr=final_jahr,
+        niveau=final_niveau,
+        typ=final_typ,
+        aufgaben_nummer=final_aufgabe,
+        status=VerarbeitungsStatus.RECOGNIZED if recognition.success else VerarbeitungsStatus.PENDING,
+        confidence=recognition.confidence,
+        file_path=str(file_path),
+        file_size=len(content),
+        indexed=False,
+        vector_ids=[],
+        created_at=now,
+        updated_at=now
+    )
+
+    _dokumente[doc_id] = dokument
+    logger.info(f"Uploaded document {doc_id}: {file.filename}")
+
+    return _to_dokument_response(dokument)
+
+
+@router.post("/import-zip", response_model=ImportResult)
+async def import_zip(
+    file: UploadFile = File(...),
+    bundesland: Bundesland = Form(Bundesland.NIEDERSACHSEN),
+    background_tasks: BackgroundTasks = None
+):
+    """
+    Importiert alle PDFs aus einer ZIP-Datei.
+
+    Erkennt automatisch Metadaten aus Dateinamen.
+    """
+    if not file.filename or not file.filename.endswith(".zip"):
+        raise HTTPException(status_code=400, detail="ZIP-Datei erforderlich")
+
+    # Speichere ZIP temporär
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
+        content = await file.read()
+        tmp.write(content)
+        tmp_path = tmp.name
+
+    documents = []
+    total = 0
+    recognized = 0
+    errors = 0
+
+    try:
+        with zipfile.ZipFile(tmp_path, 'r') as zip_ref:
+            for zip_info in zip_ref.infolist():
+                # Nur PDFs
+                if not zip_info.filename.lower().endswith(".pdf"):
+                    continue
+
+                # Ignoriere Mac-spezifische Dateien
+                if "__MACOSX" in zip_info.filename or zip_info.filename.startswith("."):
+                    continue
+
+                # Ignoriere Thumbs.db
+                if "thumbs.db" in zip_info.filename.lower():
+                    continue
+
+                total += 1
+
+                try:
+                    # Erkenne Metadaten
+                    basename = Path(zip_info.filename).name
+                    recognition = parse_nibis_filename(basename)
+
+                    if not recognition.fach:
+                        errors += 1
+                        logger.warning(f"Konnte Fach nicht erkennen: {basename}")
+                        continue
+
+                    # Extrahiere und speichere
+                    doc_id = str(uuid.uuid4())
+                    file_ext = Path(basename).suffix
+                    safe_filename = f"{doc_id}{file_ext}"
+                    file_path = DOCS_DIR / safe_filename
+
+                    with zip_ref.open(zip_info.filename) as source:
+                        file_content = source.read()
+                        with open(file_path, "wb") as target:
+                            target.write(file_content)
+
+                    now = datetime.utcnow()
+
+                    dokument = AbiturDokument(
+                        id=doc_id,
+                        dateiname=safe_filename,
+                        original_dateiname=basename,
+                        bundesland=bundesland,
+                        fach=recognition.fach,
+                        jahr=recognition.jahr or datetime.now().year,
+                        niveau=recognition.niveau or Niveau.EA,
+                        typ=recognition.typ or DokumentTyp.AUFGABE,
+                        aufgaben_nummer=recognition.aufgaben_nummer,
+                        status=VerarbeitungsStatus.RECOGNIZED,
+                        confidence=recognition.confidence,
+                        file_path=str(file_path),
+                        file_size=len(file_content),
+                        indexed=False,
+                        vector_ids=[],
+                        created_at=now,
+                        updated_at=now
+                    )
+
+                    _dokumente[doc_id] = dokument
+                    documents.append(_to_dokument_response(dokument))
+                    recognized += 1
+
+                except Exception as e:
+                    errors += 1
+                    logger.error(f"Fehler bei {zip_info.filename}: {e}")
+
+    finally:
+        # Lösche temporäre ZIP
+        os.unlink(tmp_path)
+
+    logger.info(f"ZIP-Import: {recognized}/{total} erkannt, {errors} Fehler")
+
+    return ImportResult(
+        total_files=total,
+        recognized=recognized,
+        errors=errors,
+        documents=documents
+    )
+
+
+@router.get("/", response_model=List[DokumentResponse])
+async def list_dokumente(
+    bundesland: Optional[Bundesland] = None,
+    fach: Optional[Fach] = None,
+    jahr: Optional[int] = None,
+    niveau: Optional[Niveau] = None,
+    typ: Optional[DokumentTyp] = None,
+    status: Optional[VerarbeitungsStatus] = None,
+    indexed: Optional[bool] = None
+):
+    """Listet Dokumente mit optionalen Filtern."""
+    docs = list(_dokumente.values())
+
+    if bundesland:
+        docs = [d for d in docs if d.bundesland == bundesland]
+    if fach:
+        docs = [d for d in docs if d.fach == fach]
+    if jahr:
+        docs = [d for d in docs if d.jahr == jahr]
+    if niveau:
+        docs = [d for d in docs if d.niveau == niveau]
+    if typ:
+        docs = [d for d in docs if d.typ == typ]
+    if status:
+        docs = [d for d in docs if d.status == status]
+    if indexed is not None:
+        docs = [d for d in docs if d.indexed == indexed]
+
+    docs.sort(key=lambda x: (x.jahr, x.fach.value, x.niveau.value), reverse=True)
+    return [_to_dokument_response(d) for d in docs]
+
+
+@router.get("/{doc_id}", response_model=DokumentResponse)
+async def get_dokument(doc_id: str):
+    """Ruft ein Dokument ab."""
+    doc = _dokumente.get(doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
+    return _to_dokument_response(doc)
+
+
+@router.put("/{doc_id}", response_model=DokumentResponse)
+async def update_dokument(doc_id: str, data: DokumentUpdate):
+    """Aktualisiert Dokument-Metadaten (nach KI-Erkennung durch Entwickler)."""
+    doc = _dokumente.get(doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
+
+    if data.bundesland is not None:
+        doc.bundesland = data.bundesland
+    if data.fach is not None:
+        doc.fach = data.fach
+    if data.jahr is not None:
+        doc.jahr = data.jahr
+    if data.niveau is not None:
+        doc.niveau = data.niveau
+    if data.typ is not None:
+        doc.typ = data.typ
+    if data.aufgaben_nummer is not None:
+        doc.aufgaben_nummer = data.aufgaben_nummer
+    if data.status is not None:
+        doc.status = data.status
+
+    doc.updated_at = datetime.utcnow()
+
+    return _to_dokument_response(doc)
+
+
+@router.post("/{doc_id}/confirm", response_model=DokumentResponse)
+async def confirm_dokument(doc_id: str):
+    """Bestätigt erkannte Metadaten."""
+    doc = _dokumente.get(doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
+
+    doc.status = VerarbeitungsStatus.CONFIRMED
+    doc.updated_at = datetime.utcnow()
+
+    return _to_dokument_response(doc)
+
+
+@router.post("/{doc_id}/index", response_model=DokumentResponse)
+async def index_dokument(doc_id: str):
+    """Indiziert Dokument im Vector Store."""
+    doc = _dokumente.get(doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
+
+    if doc.status not in [VerarbeitungsStatus.CONFIRMED, VerarbeitungsStatus.RECOGNIZED]:
+        raise HTTPException(status_code=400, detail="Dokument muss erst bestätigt werden")
+
+    # TODO: Vector Store Integration
+    # 1. PDF lesen und Text extrahieren
+    # 2. In Chunks aufteilen
+    # 3. Embeddings generieren
+    # 4. Mit Metadaten im Vector Store speichern
+
+    # Demo: Simuliere Indexierung
+    doc.indexed = True
+    doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)]  # Demo-IDs
+    doc.status = VerarbeitungsStatus.INDEXED
+    doc.updated_at = datetime.utcnow()
+
+    logger.info(f"Document {doc_id} indexed (demo)")
+
+    return _to_dokument_response(doc)
+
+
+@router.delete("/{doc_id}")
+async def delete_dokument(doc_id: str):
+    """Löscht ein Dokument."""
+    doc = _dokumente.get(doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
+
+    # Lösche Datei
+    if os.path.exists(doc.file_path):
+        os.remove(doc.file_path)
+
+    # TODO: Aus Vector Store entfernen
+
+    del _dokumente[doc_id]
+
+    return {"status": "deleted", "id": doc_id}
+
+
+@router.get("/{doc_id}/download")
+async def download_dokument(doc_id: str):
+    """Lädt Dokument herunter."""
+    doc = _dokumente.get(doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
+
+    if not os.path.exists(doc.file_path):
+        raise HTTPException(status_code=404, detail="Datei nicht gefunden")
+
+    return FileResponse(
+        doc.file_path,
+        filename=doc.original_dateiname,
+        media_type="application/pdf"
+    )
+
+
+# ============================================================================
+# API Endpoints - Erkennung
+# ============================================================================
+
+@router.post("/recognize", response_model=RecognitionResult)
+async def recognize_filename(filename: str):
+    """Erkennt Metadaten aus einem Dateinamen."""
+    return parse_nibis_filename(filename)
+
+
+@router.post("/bulk-confirm")
+async def bulk_confirm(doc_ids: List[str]):
+    """Bestätigt mehrere Dokumente auf einmal."""
+    confirmed = 0
+    for doc_id in doc_ids:
+        doc = _dokumente.get(doc_id)
+        if doc and doc.status == VerarbeitungsStatus.RECOGNIZED:
+            doc.status = VerarbeitungsStatus.CONFIRMED
+            doc.updated_at = datetime.utcnow()
+            confirmed += 1
+
+    return {"confirmed": confirmed, "total": len(doc_ids)}
+
+
+@router.post("/bulk-index")
+async def bulk_index(doc_ids: List[str]):
+    """Indiziert mehrere Dokumente auf einmal."""
+    indexed = 0
+    for doc_id in doc_ids:
+        doc = _dokumente.get(doc_id)
+        if doc and doc.status in [VerarbeitungsStatus.CONFIRMED, VerarbeitungsStatus.RECOGNIZED]:
+            # Demo-Indexierung
+            doc.indexed = True
+            doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)]
+            doc.status = VerarbeitungsStatus.INDEXED
+            doc.updated_at = datetime.utcnow()
+            indexed += 1
+
+    return {"indexed": indexed, "total": len(doc_ids)}
+
+
+# ============================================================================
+# API Endpoints - Statistiken
+# ============================================================================
+
+@router.get("/stats/overview")
+async def get_stats_overview():
+    """Gibt Übersicht über alle Dokumente."""
+    docs = list(_dokumente.values())
+
+    by_bundesland = {}
+    by_fach = {}
+    by_jahr = {}
+    by_status = {}
+
+    for doc in docs:
+        by_bundesland[doc.bundesland.value] = by_bundesland.get(doc.bundesland.value, 0) + 1
+        by_fach[doc.fach.value] = by_fach.get(doc.fach.value, 0) + 1
+        by_jahr[doc.jahr] = by_jahr.get(doc.jahr, 0) + 1
+        by_status[doc.status.value] = by_status.get(doc.status.value, 0) + 1
+
+    return {
+        "total": len(docs),
+        "indexed": sum(1 for d in docs if d.indexed),
+        "pending": sum(1 for d in docs if d.status == VerarbeitungsStatus.PENDING),
+        "by_bundesland": by_bundesland,
+        "by_fach": by_fach,
+        "by_jahr": by_jahr,
+        "by_status": by_status
+    }
+
+
+# ============================================================================
+# API Endpoints - Suche (für Klausur-Korrektur)
+# ============================================================================
+
+@router.get("/search", response_model=List[DokumentResponse])
+async def search_dokumente(
+    bundesland: Bundesland,
+    fach: Fach,
+    jahr: Optional[int] = None,
+    niveau: Optional[Niveau] = None,
+    nur_indexed: bool = True
+):
+    """
+    Sucht Dokumente für Klausur-Korrektur.
+
+    Gibt nur indizierte Dokumente zurück (Standard).
+    """
+    docs = list(_dokumente.values())
+
+    # Pflichtfilter
+    docs = [d for d in docs if d.bundesland == bundesland and d.fach == fach]
+
+    # Optionale Filter
+    if jahr:
+        docs = [d for d in docs if d.jahr == jahr]
+    if niveau:
+        docs = [d for d in docs if d.niveau == niveau]
+    if nur_indexed:
+        docs = [d for d in docs if d.indexed]
+
+    # Sortiere: Aufgaben vor Erwartungshorizonten
+    aufgaben = [d for d in docs if d.typ == DokumentTyp.AUFGABE]
+    ewh = [d for d in docs if d.typ == DokumentTyp.ERWARTUNGSHORIZONT]
+    andere = [d for d in docs if d.typ not in [DokumentTyp.AUFGABE, DokumentTyp.ERWARTUNGSHORIZONT]]
+
+    result = []
+    for aufgabe in aufgaben:
+        result.append(_to_dokument_response(aufgabe))
+        # Finde passenden EWH
+        matching_ewh = next(
+            (e for e in ewh
+             if e.jahr == aufgabe.jahr
+             and e.niveau == aufgabe.niveau
+             and e.aufgaben_nummer == aufgabe.aufgaben_nummer),
+            None
+        )
+        if matching_ewh:
+            result.append(_to_dokument_response(matching_ewh))
+
+    # Restliche EWH und andere
+    for e in ewh:
+        if _to_dokument_response(e) not in result:
+            result.append(_to_dokument_response(e))
+    for a in andere:
+        result.append(_to_dokument_response(a))
+
+    return result
+
+
+# ============================================================================
+# Enums Endpoint (für Frontend)
+# ============================================================================
+
+@router.get("/enums/bundeslaender")
+async def get_bundeslaender():
+    """Gibt alle Bundesländer zurück."""
+    return [{"value": b.value, "label": b.value.replace("_", " ").title()} for b in Bundesland]
+
+
+@router.get("/enums/faecher")
+async def get_faecher():
+    """Gibt alle Fächer zurück."""
+    labels = {
+        Fach.DEUTSCH: "Deutsch",
+        Fach.ENGLISCH: "Englisch",
+        Fach.MATHEMATIK: "Mathematik",
+        Fach.BIOLOGIE: "Biologie",
+        Fach.CHEMIE: "Chemie",
+        Fach.PHYSIK: "Physik",
+        Fach.GESCHICHTE: "Geschichte",
+        Fach.ERDKUNDE: "Erdkunde",
+        Fach.POLITIK_WIRTSCHAFT: "Politik-Wirtschaft",
+        Fach.FRANZOESISCH: "Französisch",
+        Fach.SPANISCH: "Spanisch",
+        Fach.LATEIN: "Latein",
+        Fach.GRIECHISCH: "Griechisch",
+        Fach.KUNST: "Kunst",
+        Fach.MUSIK: "Musik",
+        Fach.SPORT: "Sport",
+        Fach.INFORMATIK: "Informatik",
+        Fach.EV_RELIGION: "Ev. Religion",
+        Fach.KATH_RELIGION: "Kath. Religion",
+        Fach.WERTE_NORMEN: "Werte und Normen",
+        Fach.BRC: "BRC (Betriebswirtschaft)",
+        Fach.BVW: "BVW (Volkswirtschaft)",
+        Fach.ERNAEHRUNG: "Ernährung",
+        Fach.MECHATRONIK: "Mechatronik",
+        Fach.GESUNDHEIT_PFLEGE: "Gesundheit-Pflege",
+        Fach.PAEDAGOGIK_PSYCHOLOGIE: "Pädagogik-Psychologie",
+    }
+    return [{"value": f.value, "label": labels.get(f, f.value)} for f in Fach]
+
+
+@router.get("/enums/niveaus")
+async def get_niveaus():
+    """Gibt alle Niveaus zurück."""
+    return [
+        {"value": "eA", "label": "eA (erhöhtes Anforderungsniveau)"},
+        {"value": "gA", "label": "gA (grundlegendes Anforderungsniveau)"}
+    ]
+
+
+@router.get("/enums/typen")
+async def get_typen():
+    """Gibt alle Dokumenttypen zurück."""
+    labels = {
+        DokumentTyp.AUFGABE: "Aufgabe",
+        DokumentTyp.ERWARTUNGSHORIZONT: "Erwartungshorizont",
+        DokumentTyp.DECKBLATT: "Deckblatt",
+        DokumentTyp.MATERIAL: "Material",
+        DokumentTyp.HOERVERSTEHEN: "Hörverstehen",
+        DokumentTyp.SPRACHMITTLUNG: "Sprachmittlung",
+        DokumentTyp.BEWERTUNGSBOGEN: "Bewertungsbogen",
+    }
+    return [{"value": t.value, "label": labels.get(t, t.value)} for t in DokumentTyp]
+
+
+# ============================================================================
+# Backwards-compatibility aliases (used by tests)
+# ============================================================================
+AbiturFach = Fach
+Anforderungsniveau = Niveau
+documents_db = _dokumente
+
+
+class DocumentMetadata(BaseModel):
+    """Backwards-compatible metadata model for tests."""
+    jahr: Optional[int] = None
+    bundesland: Optional[str] = None
+    fach: Optional[str] = None
+    niveau: Optional[str] = None
+    dokument_typ: Optional[str] = None
+    aufgaben_nummer: Optional[str] = None
+
+
+# Backwards-compatible AbiturDokument for tests (different from internal dataclass)
+class AbiturDokumentCompat(BaseModel):
+    """Backwards-compatible AbiturDokument model for tests."""
+    id: str
+    filename: str
+    file_path: str
+    metadata: DocumentMetadata
+    status: VerarbeitungsStatus
+    recognition_result: Optional[RecognitionResult] = None
+    created_at: datetime
+    updated_at: datetime
+
+    class Config:
+        arbitrary_types_allowed = True