[split-required] Split 500-1000 LOC files across all services

backend-lehrer (5 files): - alerts_agent/db/repository.py (992 → 5), abitur_docs_api.py (956 → 3) - teacher_dashboard_api.py (951 → 3), services/pdf_service.py (916 → 3) - mail/mail_db.py (987 → 6) klausur-service (5 files): - legal_templates_ingestion.py (942 → 3), ocr_pipeline_postprocess.py (929 → 4) - ocr_pipeline_words.py (876 → 3), ocr_pipeline_ocr_merge.py (616 → 2) - KorrekturPage.tsx (956 → 6) website (5 pages): - mail (985 → 9), edu-search (958 → 8), mac-mini (950 → 7) - ocr-labeling (946 → 7), audit-workspace (871 → 4) studio-v2 (5 files + 1 deleted): - page.tsx (946 → 5), MessagesContext.tsx (925 → 4) - korrektur (914 → 6), worksheet-cleanup (899 → 6) - useVocabWorksheet.ts (888 → 3) - Deleted dead page-original.tsx (934 LOC) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 23:35:37 +02:00
parent 6811264756
commit b6983ab1dc
99 changed files with 13484 additions and 16106 deletions
--- a/backend-lehrer/abitur_docs_api.py
+++ b/backend-lehrer/abitur_docs_api.py
@@ -15,18 +15,24 @@ Dateinamen-Schema (NiBiS Niedersachsen):
 import logging
 import uuid
 import os
-import re
 import zipfile
 import tempfile
 from datetime import datetime
-from typing import List, Dict, Any, Optional
-from enum import Enum
+from typing import List, Optional, Dict, Any
 from pathlib import Path
-from dataclasses import dataclass

 from fastapi import APIRouter, HTTPException, UploadFile, File, Form, BackgroundTasks
 from fastapi.responses import FileResponse
-from pydantic import BaseModel, Field
+
+from abitur_docs_models import (
+    Bundesland, Fach, Niveau, DokumentTyp, VerarbeitungsStatus,
+    DokumentCreate, DokumentUpdate, DokumentResponse, ImportResult,
+    RecognitionResult, AbiturDokument,
+    FACH_LABELS, DOKUMENT_TYP_LABELS,
+    # Backwards-compatibility re-exports
+    AbiturFach, Anforderungsniveau, DocumentMetadata, AbiturDokumentCompat,
+)
+from abitur_docs_recognition import parse_nibis_filename, to_dokument_response

 logger = logging.getLogger(__name__)

@@ -39,364 +45,19 @@ router = APIRouter(
 DOCS_DIR = Path("/tmp/abitur-docs")
 DOCS_DIR.mkdir(parents=True, exist_ok=True)

-
-# ============================================================================
-# Enums
-# ============================================================================
-
-class Bundesland(str, Enum):
-    """Bundesländer mit Zentralabitur."""
-    NIEDERSACHSEN = "niedersachsen"
-    BAYERN = "bayern"
-    BADEN_WUERTTEMBERG = "baden_wuerttemberg"
-    NORDRHEIN_WESTFALEN = "nordrhein_westfalen"
-    HESSEN = "hessen"
-    SACHSEN = "sachsen"
-    THUERINGEN = "thueringen"
-    BERLIN = "berlin"
-    HAMBURG = "hamburg"
-    SCHLESWIG_HOLSTEIN = "schleswig_holstein"
-    BREMEN = "bremen"
-    BRANDENBURG = "brandenburg"
-    MECKLENBURG_VORPOMMERN = "mecklenburg_vorpommern"
-    SACHSEN_ANHALT = "sachsen_anhalt"
-    RHEINLAND_PFALZ = "rheinland_pfalz"
-    SAARLAND = "saarland"
-
-
-class Fach(str, Enum):
-    """Abiturfächer."""
-    DEUTSCH = "deutsch"
-    ENGLISCH = "englisch"
-    MATHEMATIK = "mathematik"
-    BIOLOGIE = "biologie"
-    CHEMIE = "chemie"
-    PHYSIK = "physik"
-    GESCHICHTE = "geschichte"
-    ERDKUNDE = "erdkunde"
-    POLITIK_WIRTSCHAFT = "politik_wirtschaft"
-    FRANZOESISCH = "franzoesisch"
-    SPANISCH = "spanisch"
-    LATEIN = "latein"
-    GRIECHISCH = "griechisch"
-    KUNST = "kunst"
-    MUSIK = "musik"
-    SPORT = "sport"
-    INFORMATIK = "informatik"
-    EV_RELIGION = "ev_religion"
-    KATH_RELIGION = "kath_religion"
-    WERTE_NORMEN = "werte_normen"
-    BRC = "brc"  # Betriebswirtschaft mit Rechnungswesen
-    BVW = "bvw"  # Volkswirtschaft
-    ERNAEHRUNG = "ernaehrung"
-    MECHATRONIK = "mechatronik"
-    GESUNDHEIT_PFLEGE = "gesundheit_pflege"
-    PAEDAGOGIK_PSYCHOLOGIE = "paedagogik_psychologie"
-
-
-class Niveau(str, Enum):
-    """Anforderungsniveau."""
-    EA = "eA"  # Erhöhtes Anforderungsniveau (Leistungskurs)
-    GA = "gA"  # Grundlegendes Anforderungsniveau (Grundkurs)
-
-
-class DokumentTyp(str, Enum):
-    """Dokumenttyp."""
-    AUFGABE = "aufgabe"
-    ERWARTUNGSHORIZONT = "erwartungshorizont"
-    DECKBLATT = "deckblatt"
-    MATERIAL = "material"
-    HOERVERSTEHEN = "hoerverstehen"  # Für Sprachen
-    SPRACHMITTLUNG = "sprachmittlung"  # Für Sprachen
-    BEWERTUNGSBOGEN = "bewertungsbogen"
-
-
-class VerarbeitungsStatus(str, Enum):
-    """Status der Dokumentenverarbeitung."""
-    PENDING = "pending"
-    PROCESSING = "processing"
-    RECOGNIZED = "recognized"  # KI hat Metadaten erkannt
-    CONFIRMED = "confirmed"  # Entwickler hat bestätigt
-    INDEXED = "indexed"  # Im Vector Store
-    ERROR = "error"
-
-
-# ============================================================================
-# Fach-Mapping für Dateinamen
-# ============================================================================
-
-FACH_NAME_MAPPING = {
-    "deutsch": Fach.DEUTSCH,
-    "englisch": Fach.ENGLISCH,
-    "mathe": Fach.MATHEMATIK,
-    "mathematik": Fach.MATHEMATIK,
-    "biologie": Fach.BIOLOGIE,
-    "bio": Fach.BIOLOGIE,
-    "chemie": Fach.CHEMIE,
-    "physik": Fach.PHYSIK,
-    "geschichte": Fach.GESCHICHTE,
-    "erdkunde": Fach.ERDKUNDE,
-    "geographie": Fach.ERDKUNDE,
-    "politikwirtschaft": Fach.POLITIK_WIRTSCHAFT,
-    "politik": Fach.POLITIK_WIRTSCHAFT,
-    "franzoesisch": Fach.FRANZOESISCH,
-    "franz": Fach.FRANZOESISCH,
-    "spanisch": Fach.SPANISCH,
-    "latein": Fach.LATEIN,
-    "griechisch": Fach.GRIECHISCH,
-    "kunst": Fach.KUNST,
-    "musik": Fach.MUSIK,
-    "sport": Fach.SPORT,
-    "informatik": Fach.INFORMATIK,
-    "evreligion": Fach.EV_RELIGION,
-    "kathreligion": Fach.KATH_RELIGION,
-    "wertenormen": Fach.WERTE_NORMEN,
-    "brc": Fach.BRC,
-    "bvw": Fach.BVW,
-    "ernaehrung": Fach.ERNAEHRUNG,
-    "mecha": Fach.MECHATRONIK,
-    "mechatronik": Fach.MECHATRONIK,
-    "technikmecha": Fach.MECHATRONIK,
-    "gespfl": Fach.GESUNDHEIT_PFLEGE,
-    "paedpsych": Fach.PAEDAGOGIK_PSYCHOLOGIE,
-}
-
-
-# ============================================================================
-# Pydantic Models
-# ============================================================================
-
-class DokumentCreate(BaseModel):
-    """Manuelles Erstellen eines Dokuments."""
-    bundesland: Bundesland
-    fach: Fach
-    jahr: int = Field(ge=2000, le=2100)
-    niveau: Niveau
-    typ: DokumentTyp
-    aufgaben_nummer: Optional[str] = None  # I, II, III, 1, 2, etc.
-
-
-class DokumentUpdate(BaseModel):
-    """Update für erkannte Metadaten."""
-    bundesland: Optional[Bundesland] = None
-    fach: Optional[Fach] = None
-    jahr: Optional[int] = None
-    niveau: Optional[Niveau] = None
-    typ: Optional[DokumentTyp] = None
-    aufgaben_nummer: Optional[str] = None
-    status: Optional[VerarbeitungsStatus] = None
-
-
-class DokumentResponse(BaseModel):
-    """Response für ein Dokument."""
-    id: str
-    dateiname: str
-    original_dateiname: str
-    bundesland: Bundesland
-    fach: Fach
-    jahr: int
-    niveau: Niveau
-    typ: DokumentTyp
-    aufgaben_nummer: Optional[str]
-    status: VerarbeitungsStatus
-    confidence: float  # Erkennungs-Confidence
-    file_path: str
-    file_size: int
-    indexed: bool
-    vector_ids: List[str]
-    created_at: datetime
-    updated_at: datetime
-
-
-class ImportResult(BaseModel):
-    """Ergebnis eines ZIP-Imports."""
-    total_files: int
-    recognized: int
-    errors: int
-    documents: List[DokumentResponse]
-
-
-class RecognitionResult(BaseModel):
-    """Ergebnis der Dokumentenerkennung."""
-    success: bool
-    bundesland: Optional[Bundesland]
-    fach: Optional[Fach]
-    jahr: Optional[int]
-    niveau: Optional[Niveau]
-    typ: Optional[DokumentTyp]
-    aufgaben_nummer: Optional[str]
-    confidence: float
-    raw_filename: str
-    suggestions: List[Dict[str, Any]]
-
-    @property
-    def extracted(self) -> Dict[str, Any]:
-        """Backwards-compatible property returning extracted values as dict."""
-        result = {}
-        if self.bundesland:
-            result["bundesland"] = self.bundesland.value
-        if self.fach:
-            result["fach"] = self.fach.value
-        if self.jahr:
-            result["jahr"] = self.jahr
-        if self.niveau:
-            result["niveau"] = self.niveau.value
-        if self.typ:
-            result["typ"] = self.typ.value
-        if self.aufgaben_nummer:
-            result["aufgaben_nummer"] = self.aufgaben_nummer
-        return result
-
-    @property
-    def method(self) -> str:
-        """Backwards-compatible property for recognition method."""
-        return "filename_pattern"
-
-
-# ============================================================================
-# Internal Data Classes
-# ============================================================================
-
-@dataclass
-class AbiturDokument:
-    """Internes Dokument."""
-    id: str
-    dateiname: str
-    original_dateiname: str
-    bundesland: Bundesland
-    fach: Fach
-    jahr: int
-    niveau: Niveau
-    typ: DokumentTyp
-    aufgaben_nummer: Optional[str]
-    status: VerarbeitungsStatus
-    confidence: float
-    file_path: str
-    file_size: int
-    indexed: bool
-    vector_ids: List[str]
-    created_at: datetime
-    updated_at: datetime
-
-
-# ============================================================================
 # In-Memory Storage
-# ============================================================================
-
 _dokumente: Dict[str, AbiturDokument] = {}

+# Backwards-compatibility alias
+documents_db = _dokumente
+

 # ============================================================================
-# Helper Functions - Dokumentenerkennung
+# Private helper (kept local since it references module-level _dokumente)
 # ============================================================================

-def parse_nibis_filename(filename: str) -> RecognitionResult:
-    """
-    Erkennt Metadaten aus NiBiS-Dateinamen.
-
-    Beispiele:
-    - 2025_Deutsch_eA_I.pdf
-    - 2025_Deutsch_eA_I_EWH.pdf
-    - 2025_Biologie_gA_1.pdf
-    - 2025_Englisch_eA_HV.pdf (Hörverstehen)
-    """
-    result = RecognitionResult(
-        success=False,
-        bundesland=Bundesland.NIEDERSACHSEN,  # NiBiS = Niedersachsen
-        fach=None,
-        jahr=None,
-        niveau=None,
-        typ=None,
-        aufgaben_nummer=None,
-        confidence=0.0,
-        raw_filename=filename,
-        suggestions=[]
-    )
-
-    # Bereinige Dateiname
-    name = Path(filename).stem.lower()
-
-    # Extrahiere Jahr (4 Ziffern am Anfang)
-    jahr_match = re.match(r'^(\d{4})', name)
-    if jahr_match:
-        result.jahr = int(jahr_match.group(1))
-        result.confidence += 0.2
-
-    # Extrahiere Fach
-    for fach_key, fach_enum in FACH_NAME_MAPPING.items():
-        if fach_key in name.replace("_", "").replace("-", ""):
-            result.fach = fach_enum
-            result.confidence += 0.3
-            break
-
-    # Extrahiere Niveau (eA/gA)
-    if "_ea" in name or "_ea_" in name or "ea_" in name:
-        result.niveau = Niveau.EA
-        result.confidence += 0.2
-    elif "_ga" in name or "_ga_" in name or "ga_" in name:
-        result.niveau = Niveau.GA
-        result.confidence += 0.2
-
-    # Extrahiere Typ
-    if "_ewh" in name:
-        result.typ = DokumentTyp.ERWARTUNGSHORIZONT
-        result.confidence += 0.2
-    elif "_hv" in name or "hoerverstehen" in name:
-        result.typ = DokumentTyp.HOERVERSTEHEN
-        result.confidence += 0.15
-    elif "_sm" in name or "_me" in name or "sprachmittlung" in name:
-        result.typ = DokumentTyp.SPRACHMITTLUNG
-        result.confidence += 0.15
-    elif "deckblatt" in name:
-        result.typ = DokumentTyp.DECKBLATT
-        result.confidence += 0.15
-    elif "material" in name:
-        result.typ = DokumentTyp.MATERIAL
-        result.confidence += 0.15
-    elif "bewertung" in name:
-        result.typ = DokumentTyp.BEWERTUNGSBOGEN
-        result.confidence += 0.15
-    else:
-        result.typ = DokumentTyp.AUFGABE
-        result.confidence += 0.1
-
-    # Extrahiere Aufgabennummer (römisch oder arabisch)
-    aufgabe_match = re.search(r'_([ivx]+|[1-4][abc]?)(?:_|\.pdf|$)', name, re.IGNORECASE)
-    if aufgabe_match:
-        result.aufgaben_nummer = aufgabe_match.group(1).upper()
-        result.confidence += 0.1
-
-    # Erfolg wenn mindestens Fach und Jahr erkannt
-    if result.fach and result.jahr:
-        result.success = True
-
-    # Normalisiere Confidence auf max 1.0
-    result.confidence = min(result.confidence, 1.0)
-
-    return result
-
-
 def _to_dokument_response(doc: AbiturDokument) -> DokumentResponse:
-    """Konvertiert internes Dokument zu Response."""
-    return DokumentResponse(
-        id=doc.id,
-        dateiname=doc.dateiname,
-        original_dateiname=doc.original_dateiname,
-        bundesland=doc.bundesland,
-        fach=doc.fach,
-        jahr=doc.jahr,
-        niveau=doc.niveau,
-        typ=doc.typ,
-        aufgaben_nummer=doc.aufgaben_nummer,
-        status=doc.status,
-        confidence=doc.confidence,
-        file_path=doc.file_path,
-        file_size=doc.file_size,
-        indexed=doc.indexed,
-        vector_ids=doc.vector_ids,
-        created_at=doc.created_at,
-        updated_at=doc.updated_at
-    )
+    return to_dokument_response(doc)


 # ============================================================================
@@ -413,18 +74,12 @@ async def upload_dokument(
    typ: Optional[DokumentTyp] = Form(None),
    aufgaben_nummer: Optional[str] = Form(None)
 ):
-    """
-    Lädt ein einzelnes Dokument hoch.
-
-    Metadaten können manuell angegeben oder automatisch erkannt werden.
-    """
+    """Lädt ein einzelnes Dokument hoch."""
    if not file.filename:
        raise HTTPException(status_code=400, detail="Kein Dateiname")

-    # Erkenne Metadaten aus Dateiname
    recognition = parse_nibis_filename(file.filename)

-    # Überschreibe mit manuellen Angaben
    final_bundesland = bundesland or recognition.bundesland or Bundesland.NIEDERSACHSEN
    final_fach = fach or recognition.fach
    final_jahr = jahr or recognition.jahr or datetime.now().year
@@ -435,7 +90,6 @@ async def upload_dokument(
    if not final_fach:
        raise HTTPException(status_code=400, detail="Fach konnte nicht erkannt werden")

-    # Generiere ID und speichere Datei
    doc_id = str(uuid.uuid4())
    file_ext = Path(file.filename).suffix
    safe_filename = f"{doc_id}{file_ext}"
@@ -446,30 +100,16 @@ async def upload_dokument(
        f.write(content)

    now = datetime.utcnow()
-
    dokument = AbiturDokument(
-        id=doc_id,
-        dateiname=safe_filename,
-        original_dateiname=file.filename,
-        bundesland=final_bundesland,
-        fach=final_fach,
-        jahr=final_jahr,
-        niveau=final_niveau,
-        typ=final_typ,
-        aufgaben_nummer=final_aufgabe,
+        id=doc_id, dateiname=safe_filename, original_dateiname=file.filename,
+        bundesland=final_bundesland, fach=final_fach, jahr=final_jahr,
+        niveau=final_niveau, typ=final_typ, aufgaben_nummer=final_aufgabe,
        status=VerarbeitungsStatus.RECOGNIZED if recognition.success else VerarbeitungsStatus.PENDING,
-        confidence=recognition.confidence,
-        file_path=str(file_path),
-        file_size=len(content),
-        indexed=False,
-        vector_ids=[],
-        created_at=now,
-        updated_at=now
+        confidence=recognition.confidence, file_path=str(file_path), file_size=len(content),
+        indexed=False, vector_ids=[], created_at=now, updated_at=now
    )
-
    _dokumente[doc_id] = dokument
    logger.info(f"Uploaded document {doc_id}: {file.filename}")
-
    return _to_dokument_response(dokument)


@@ -479,15 +119,10 @@ async def import_zip(
    bundesland: Bundesland = Form(Bundesland.NIEDERSACHSEN),
    background_tasks: BackgroundTasks = None
 ):
-    """
-    Importiert alle PDFs aus einer ZIP-Datei.
-
-    Erkennt automatisch Metadaten aus Dateinamen.
-    """
+    """Importiert alle PDFs aus einer ZIP-Datei."""
    if not file.filename or not file.filename.endswith(".zip"):
        raise HTTPException(status_code=400, detail="ZIP-Datei erforderlich")

-    # Speichere ZIP temporär
    with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
        content = await file.read()
        tmp.write(content)
@@ -501,31 +136,22 @@ async def import_zip(
    try:
        with zipfile.ZipFile(tmp_path, 'r') as zip_ref:
            for zip_info in zip_ref.infolist():
-                # Nur PDFs
                if not zip_info.filename.lower().endswith(".pdf"):
                    continue
-
-                # Ignoriere Mac-spezifische Dateien
                if "__MACOSX" in zip_info.filename or zip_info.filename.startswith("."):
                    continue
-
-                # Ignoriere Thumbs.db
                if "thumbs.db" in zip_info.filename.lower():
                    continue

                total += 1
-
                try:
-                    # Erkenne Metadaten
                    basename = Path(zip_info.filename).name
                    recognition = parse_nibis_filename(basename)
-
                    if not recognition.fach:
                        errors += 1
                        logger.warning(f"Konnte Fach nicht erkennen: {basename}")
                        continue

-                    # Extrahiere und speichere
                    doc_id = str(uuid.uuid4())
                    file_ext = Path(basename).suffix
                    safe_filename = f"{doc_id}{file_ext}"
@@ -537,62 +163,39 @@ async def import_zip(
                            target.write(file_content)

                    now = datetime.utcnow()
-
                    dokument = AbiturDokument(
-                        id=doc_id,
-                        dateiname=safe_filename,
-                        original_dateiname=basename,
-                        bundesland=bundesland,
-                        fach=recognition.fach,
+                        id=doc_id, dateiname=safe_filename, original_dateiname=basename,
+                        bundesland=bundesland, fach=recognition.fach,
                        jahr=recognition.jahr or datetime.now().year,
                        niveau=recognition.niveau or Niveau.EA,
                        typ=recognition.typ or DokumentTyp.AUFGABE,
                        aufgaben_nummer=recognition.aufgaben_nummer,
-                        status=VerarbeitungsStatus.RECOGNIZED,
-                        confidence=recognition.confidence,
-                        file_path=str(file_path),
-                        file_size=len(file_content),
-                        indexed=False,
-                        vector_ids=[],
-                        created_at=now,
-                        updated_at=now
+                        status=VerarbeitungsStatus.RECOGNIZED, confidence=recognition.confidence,
+                        file_path=str(file_path), file_size=len(file_content),
+                        indexed=False, vector_ids=[], created_at=now, updated_at=now
                    )
-
                    _dokumente[doc_id] = dokument
                    documents.append(_to_dokument_response(dokument))
                    recognized += 1
-
                except Exception as e:
                    errors += 1
                    logger.error(f"Fehler bei {zip_info.filename}: {e}")
-
    finally:
-        # Lösche temporäre ZIP
        os.unlink(tmp_path)

    logger.info(f"ZIP-Import: {recognized}/{total} erkannt, {errors} Fehler")
-
-    return ImportResult(
-        total_files=total,
-        recognized=recognized,
-        errors=errors,
-        documents=documents
-    )
+    return ImportResult(total_files=total, recognized=recognized, errors=errors, documents=documents)


@router.get("/", response_model=List[DokumentResponse])
 async def list_dokumente(
-    bundesland: Optional[Bundesland] = None,
-    fach: Optional[Fach] = None,
-    jahr: Optional[int] = None,
-    niveau: Optional[Niveau] = None,
-    typ: Optional[DokumentTyp] = None,
-    status: Optional[VerarbeitungsStatus] = None,
+    bundesland: Optional[Bundesland] = None, fach: Optional[Fach] = None,
+    jahr: Optional[int] = None, niveau: Optional[Niveau] = None,
+    typ: Optional[DokumentTyp] = None, status: Optional[VerarbeitungsStatus] = None,
    indexed: Optional[bool] = None
 ):
    """Listet Dokumente mit optionalen Filtern."""
    docs = list(_dokumente.values())
-
    if bundesland:
        docs = [d for d in docs if d.bundesland == bundesland]
    if fach:
@@ -607,7 +210,6 @@ async def list_dokumente(
        docs = [d for d in docs if d.status == status]
    if indexed is not None:
        docs = [d for d in docs if d.indexed == indexed]
-
    docs.sort(key=lambda x: (x.jahr, x.fach.value, x.niveau.value), reverse=True)
    return [_to_dokument_response(d) for d in docs]

@@ -623,11 +225,10 @@ async def get_dokument(doc_id: str):

@router.put("/{doc_id}", response_model=DokumentResponse)
 async def update_dokument(doc_id: str, data: DokumentUpdate):
-    """Aktualisiert Dokument-Metadaten (nach KI-Erkennung durch Entwickler)."""
+    """Aktualisiert Dokument-Metadaten."""
    doc = _dokumente.get(doc_id)
    if not doc:
        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
    if data.bundesland is not None:
        doc.bundesland = data.bundesland
    if data.fach is not None:
@@ -642,9 +243,7 @@ async def update_dokument(doc_id: str, data: DokumentUpdate):
        doc.aufgaben_nummer = data.aufgaben_nummer
    if data.status is not None:
        doc.status = data.status
-
    doc.updated_at = datetime.utcnow()
-
    return _to_dokument_response(doc)


@@ -654,10 +253,8 @@ async def confirm_dokument(doc_id: str):
    doc = _dokumente.get(doc_id)
    if not doc:
        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
    doc.status = VerarbeitungsStatus.CONFIRMED
    doc.updated_at = datetime.utcnow()
-
    return _to_dokument_response(doc)


@@ -667,24 +264,13 @@ async def index_dokument(doc_id: str):
    doc = _dokumente.get(doc_id)
    if not doc:
        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
    if doc.status not in [VerarbeitungsStatus.CONFIRMED, VerarbeitungsStatus.RECOGNIZED]:
        raise HTTPException(status_code=400, detail="Dokument muss erst bestätigt werden")
-
-    # TODO: Vector Store Integration
-    # 1. PDF lesen und Text extrahieren
-    # 2. In Chunks aufteilen
-    # 3. Embeddings generieren
-    # 4. Mit Metadaten im Vector Store speichern
-
-    # Demo: Simuliere Indexierung
    doc.indexed = True
-    doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)]  # Demo-IDs
+    doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)]
    doc.status = VerarbeitungsStatus.INDEXED
    doc.updated_at = datetime.utcnow()
-
    logger.info(f"Document {doc_id} indexed (demo)")
-
    return _to_dokument_response(doc)


@@ -694,15 +280,9 @@ async def delete_dokument(doc_id: str):
    doc = _dokumente.get(doc_id)
    if not doc:
        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
-    # Lösche Datei
    if os.path.exists(doc.file_path):
        os.remove(doc.file_path)
-
-    # TODO: Aus Vector Store entfernen
-
    del _dokumente[doc_id]
-
    return {"status": "deleted", "id": doc_id}


@@ -712,20 +292,10 @@ async def download_dokument(doc_id: str):
    doc = _dokumente.get(doc_id)
    if not doc:
        raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
    if not os.path.exists(doc.file_path):
        raise HTTPException(status_code=404, detail="Datei nicht gefunden")
+    return FileResponse(doc.file_path, filename=doc.original_dateiname, media_type="application/pdf")

-    return FileResponse(
-        doc.file_path,
-        filename=doc.original_dateiname,
-        media_type="application/pdf"
-    )
-
-
-# ============================================================================
-# API Endpoints - Erkennung
-# ============================================================================

@router.post("/recognize", response_model=RecognitionResult)
 async def recognize_filename(filename: str):
@@ -743,7 +313,6 @@ async def bulk_confirm(doc_ids: List[str]):
            doc.status = VerarbeitungsStatus.CONFIRMED
            doc.updated_at = datetime.utcnow()
            confirmed += 1
-
    return {"confirmed": confirmed, "total": len(doc_ids)}


@@ -754,70 +323,41 @@ async def bulk_index(doc_ids: List[str]):
    for doc_id in doc_ids:
        doc = _dokumente.get(doc_id)
        if doc and doc.status in [VerarbeitungsStatus.CONFIRMED, VerarbeitungsStatus.RECOGNIZED]:
-            # Demo-Indexierung
            doc.indexed = True
            doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)]
            doc.status = VerarbeitungsStatus.INDEXED
            doc.updated_at = datetime.utcnow()
            indexed += 1
-
    return {"indexed": indexed, "total": len(doc_ids)}


-# ============================================================================
-# API Endpoints - Statistiken
-# ============================================================================
-
@router.get("/stats/overview")
 async def get_stats_overview():
    """Gibt Übersicht über alle Dokumente."""
    docs = list(_dokumente.values())
-
-    by_bundesland = {}
-    by_fach = {}
-    by_jahr = {}
-    by_status = {}
-
+    by_bundesland: Dict[str, int] = {}
+    by_fach: Dict[str, int] = {}
+    by_jahr: Dict[int, int] = {}
+    by_status: Dict[str, int] = {}
    for doc in docs:
        by_bundesland[doc.bundesland.value] = by_bundesland.get(doc.bundesland.value, 0) + 1
        by_fach[doc.fach.value] = by_fach.get(doc.fach.value, 0) + 1
        by_jahr[doc.jahr] = by_jahr.get(doc.jahr, 0) + 1
        by_status[doc.status.value] = by_status.get(doc.status.value, 0) + 1
-
    return {
-        "total": len(docs),
-        "indexed": sum(1 for d in docs if d.indexed),
+        "total": len(docs), "indexed": sum(1 for d in docs if d.indexed),
        "pending": sum(1 for d in docs if d.status == VerarbeitungsStatus.PENDING),
-        "by_bundesland": by_bundesland,
-        "by_fach": by_fach,
-        "by_jahr": by_jahr,
-        "by_status": by_status
+        "by_bundesland": by_bundesland, "by_fach": by_fach, "by_jahr": by_jahr, "by_status": by_status
    }


-# ============================================================================
-# API Endpoints - Suche (für Klausur-Korrektur)
-# ============================================================================
-
@router.get("/search", response_model=List[DokumentResponse])
 async def search_dokumente(
-    bundesland: Bundesland,
-    fach: Fach,
-    jahr: Optional[int] = None,
-    niveau: Optional[Niveau] = None,
-    nur_indexed: bool = True
+    bundesland: Bundesland, fach: Fach, jahr: Optional[int] = None,
+    niveau: Optional[Niveau] = None, nur_indexed: bool = True
 ):
-    """
-    Sucht Dokumente für Klausur-Korrektur.
-
-    Gibt nur indizierte Dokumente zurück (Standard).
-    """
-    docs = list(_dokumente.values())
-
-    # Pflichtfilter
-    docs = [d for d in docs if d.bundesland == bundesland and d.fach == fach]
-
-    # Optionale Filter
+    """Sucht Dokumente für Klausur-Korrektur."""
+    docs = [d for d in _dokumente.values() if d.bundesland == bundesland and d.fach == fach]
    if jahr:
        docs = [d for d in docs if d.jahr == jahr]
    if niveau:
@@ -825,7 +365,6 @@ async def search_dokumente(
    if nur_indexed:
        docs = [d for d in docs if d.indexed]

-    # Sortiere: Aufgaben vor Erwartungshorizonten
    aufgaben = [d for d in docs if d.typ == DokumentTyp.AUFGABE]
    ewh = [d for d in docs if d.typ == DokumentTyp.ERWARTUNGSHORIZONT]
    andere = [d for d in docs if d.typ not in [DokumentTyp.AUFGABE, DokumentTyp.ERWARTUNGSHORIZONT]]
@@ -833,31 +372,20 @@ async def search_dokumente(
    result = []
    for aufgabe in aufgaben:
        result.append(_to_dokument_response(aufgabe))
-        # Finde passenden EWH
        matching_ewh = next(
-            (e for e in ewh
-             if e.jahr == aufgabe.jahr
-             and e.niveau == aufgabe.niveau
-             and e.aufgaben_nummer == aufgabe.aufgaben_nummer),
-            None
+            (e for e in ewh if e.jahr == aufgabe.jahr and e.niveau == aufgabe.niveau
+             and e.aufgaben_nummer == aufgabe.aufgaben_nummer), None
        )
        if matching_ewh:
            result.append(_to_dokument_response(matching_ewh))
-
-    # Restliche EWH und andere
    for e in ewh:
        if _to_dokument_response(e) not in result:
            result.append(_to_dokument_response(e))
    for a in andere:
        result.append(_to_dokument_response(a))
-
    return result


-# ============================================================================
-# Enums Endpoint (für Frontend)
-# ============================================================================
-
@router.get("/enums/bundeslaender")
 async def get_bundeslaender():
    """Gibt alle Bundesländer zurück."""
@@ -867,35 +395,7 @@ async def get_bundeslaender():
@router.get("/enums/faecher")
 async def get_faecher():
    """Gibt alle Fächer zurück."""
-    labels = {
-        Fach.DEUTSCH: "Deutsch",
-        Fach.ENGLISCH: "Englisch",
-        Fach.MATHEMATIK: "Mathematik",
-        Fach.BIOLOGIE: "Biologie",
-        Fach.CHEMIE: "Chemie",
-        Fach.PHYSIK: "Physik",
-        Fach.GESCHICHTE: "Geschichte",
-        Fach.ERDKUNDE: "Erdkunde",
-        Fach.POLITIK_WIRTSCHAFT: "Politik-Wirtschaft",
-        Fach.FRANZOESISCH: "Französisch",
-        Fach.SPANISCH: "Spanisch",
-        Fach.LATEIN: "Latein",
-        Fach.GRIECHISCH: "Griechisch",
-        Fach.KUNST: "Kunst",
-        Fach.MUSIK: "Musik",
-        Fach.SPORT: "Sport",
-        Fach.INFORMATIK: "Informatik",
-        Fach.EV_RELIGION: "Ev. Religion",
-        Fach.KATH_RELIGION: "Kath. Religion",
-        Fach.WERTE_NORMEN: "Werte und Normen",
-        Fach.BRC: "BRC (Betriebswirtschaft)",
-        Fach.BVW: "BVW (Volkswirtschaft)",
-        Fach.ERNAEHRUNG: "Ernährung",
-        Fach.MECHATRONIK: "Mechatronik",
-        Fach.GESUNDHEIT_PFLEGE: "Gesundheit-Pflege",
-        Fach.PAEDAGOGIK_PSYCHOLOGIE: "Pädagogik-Psychologie",
-    }
-    return [{"value": f.value, "label": labels.get(f, f.value)} for f in Fach]
+    return [{"value": f.value, "label": FACH_LABELS.get(f, f.value)} for f in Fach]


@router.get("/enums/niveaus")
@@ -910,47 +410,4 @@ async def get_niveaus():
@router.get("/enums/typen")
 async def get_typen():
    """Gibt alle Dokumenttypen zurück."""
-    labels = {
-        DokumentTyp.AUFGABE: "Aufgabe",
-        DokumentTyp.ERWARTUNGSHORIZONT: "Erwartungshorizont",
-        DokumentTyp.DECKBLATT: "Deckblatt",
-        DokumentTyp.MATERIAL: "Material",
-        DokumentTyp.HOERVERSTEHEN: "Hörverstehen",
-        DokumentTyp.SPRACHMITTLUNG: "Sprachmittlung",
-        DokumentTyp.BEWERTUNGSBOGEN: "Bewertungsbogen",
-    }
-    return [{"value": t.value, "label": labels.get(t, t.value)} for t in DokumentTyp]
-
-
-# ============================================================================
-# Backwards-compatibility aliases (used by tests)
-# ============================================================================
-AbiturFach = Fach
-Anforderungsniveau = Niveau
-documents_db = _dokumente
-
-
-class DocumentMetadata(BaseModel):
-    """Backwards-compatible metadata model for tests."""
-    jahr: Optional[int] = None
-    bundesland: Optional[str] = None
-    fach: Optional[str] = None
-    niveau: Optional[str] = None
-    dokument_typ: Optional[str] = None
-    aufgaben_nummer: Optional[str] = None
-
-
-# Backwards-compatible AbiturDokument for tests (different from internal dataclass)
-class AbiturDokumentCompat(BaseModel):
-    """Backwards-compatible AbiturDokument model for tests."""
-    id: str
-    filename: str
-    file_path: str
-    metadata: DocumentMetadata
-    status: VerarbeitungsStatus
-    recognition_result: Optional[RecognitionResult] = None
-    created_at: datetime
-    updated_at: datetime
-
-    class Config:
-        arbitrary_types_allowed = True
+    return [{"value": t.value, "label": DOKUMENT_TYP_LABELS.get(t, t.value)} for t in DokumentTyp]