[split-required] Split 500-1000 LOC files across all services
backend-lehrer (5 files): - alerts_agent/db/repository.py (992 → 5), abitur_docs_api.py (956 → 3) - teacher_dashboard_api.py (951 → 3), services/pdf_service.py (916 → 3) - mail/mail_db.py (987 → 6) klausur-service (5 files): - legal_templates_ingestion.py (942 → 3), ocr_pipeline_postprocess.py (929 → 4) - ocr_pipeline_words.py (876 → 3), ocr_pipeline_ocr_merge.py (616 → 2) - KorrekturPage.tsx (956 → 6) website (5 pages): - mail (985 → 9), edu-search (958 → 8), mac-mini (950 → 7) - ocr-labeling (946 → 7), audit-workspace (871 → 4) studio-v2 (5 files + 1 deleted): - page.tsx (946 → 5), MessagesContext.tsx (925 → 4) - korrektur (914 → 6), worksheet-cleanup (899 → 6) - useVocabWorksheet.ts (888 → 3) - Deleted dead page-original.tsx (934 LOC) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
327
backend-lehrer/abitur_docs_models.py
Normal file
327
backend-lehrer/abitur_docs_models.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Abitur Document Store - Enums, Pydantic Models, Data Classes.
|
||||
|
||||
Shared types for abitur_docs_api and abitur_docs_recognition.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Enums
|
||||
# ============================================================================
|
||||
|
||||
class Bundesland(str, Enum):
|
||||
"""Bundesländer mit Zentralabitur."""
|
||||
NIEDERSACHSEN = "niedersachsen"
|
||||
BAYERN = "bayern"
|
||||
BADEN_WUERTTEMBERG = "baden_wuerttemberg"
|
||||
NORDRHEIN_WESTFALEN = "nordrhein_westfalen"
|
||||
HESSEN = "hessen"
|
||||
SACHSEN = "sachsen"
|
||||
THUERINGEN = "thueringen"
|
||||
BERLIN = "berlin"
|
||||
HAMBURG = "hamburg"
|
||||
SCHLESWIG_HOLSTEIN = "schleswig_holstein"
|
||||
BREMEN = "bremen"
|
||||
BRANDENBURG = "brandenburg"
|
||||
MECKLENBURG_VORPOMMERN = "mecklenburg_vorpommern"
|
||||
SACHSEN_ANHALT = "sachsen_anhalt"
|
||||
RHEINLAND_PFALZ = "rheinland_pfalz"
|
||||
SAARLAND = "saarland"
|
||||
|
||||
|
||||
class Fach(str, Enum):
|
||||
"""Abiturfächer."""
|
||||
DEUTSCH = "deutsch"
|
||||
ENGLISCH = "englisch"
|
||||
MATHEMATIK = "mathematik"
|
||||
BIOLOGIE = "biologie"
|
||||
CHEMIE = "chemie"
|
||||
PHYSIK = "physik"
|
||||
GESCHICHTE = "geschichte"
|
||||
ERDKUNDE = "erdkunde"
|
||||
POLITIK_WIRTSCHAFT = "politik_wirtschaft"
|
||||
FRANZOESISCH = "franzoesisch"
|
||||
SPANISCH = "spanisch"
|
||||
LATEIN = "latein"
|
||||
GRIECHISCH = "griechisch"
|
||||
KUNST = "kunst"
|
||||
MUSIK = "musik"
|
||||
SPORT = "sport"
|
||||
INFORMATIK = "informatik"
|
||||
EV_RELIGION = "ev_religion"
|
||||
KATH_RELIGION = "kath_religion"
|
||||
WERTE_NORMEN = "werte_normen"
|
||||
BRC = "brc"
|
||||
BVW = "bvw"
|
||||
ERNAEHRUNG = "ernaehrung"
|
||||
MECHATRONIK = "mechatronik"
|
||||
GESUNDHEIT_PFLEGE = "gesundheit_pflege"
|
||||
PAEDAGOGIK_PSYCHOLOGIE = "paedagogik_psychologie"
|
||||
|
||||
|
||||
class Niveau(str, Enum):
|
||||
"""Anforderungsniveau."""
|
||||
EA = "eA"
|
||||
GA = "gA"
|
||||
|
||||
|
||||
class DokumentTyp(str, Enum):
|
||||
"""Dokumenttyp."""
|
||||
AUFGABE = "aufgabe"
|
||||
ERWARTUNGSHORIZONT = "erwartungshorizont"
|
||||
DECKBLATT = "deckblatt"
|
||||
MATERIAL = "material"
|
||||
HOERVERSTEHEN = "hoerverstehen"
|
||||
SPRACHMITTLUNG = "sprachmittlung"
|
||||
BEWERTUNGSBOGEN = "bewertungsbogen"
|
||||
|
||||
|
||||
class VerarbeitungsStatus(str, Enum):
|
||||
"""Status der Dokumentenverarbeitung."""
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
RECOGNIZED = "recognized"
|
||||
CONFIRMED = "confirmed"
|
||||
INDEXED = "indexed"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fach-Mapping für Dateinamen
|
||||
# ============================================================================
|
||||
|
||||
FACH_NAME_MAPPING = {
|
||||
"deutsch": Fach.DEUTSCH,
|
||||
"englisch": Fach.ENGLISCH,
|
||||
"mathe": Fach.MATHEMATIK,
|
||||
"mathematik": Fach.MATHEMATIK,
|
||||
"biologie": Fach.BIOLOGIE,
|
||||
"bio": Fach.BIOLOGIE,
|
||||
"chemie": Fach.CHEMIE,
|
||||
"physik": Fach.PHYSIK,
|
||||
"geschichte": Fach.GESCHICHTE,
|
||||
"erdkunde": Fach.ERDKUNDE,
|
||||
"geographie": Fach.ERDKUNDE,
|
||||
"politikwirtschaft": Fach.POLITIK_WIRTSCHAFT,
|
||||
"politik": Fach.POLITIK_WIRTSCHAFT,
|
||||
"franzoesisch": Fach.FRANZOESISCH,
|
||||
"franz": Fach.FRANZOESISCH,
|
||||
"spanisch": Fach.SPANISCH,
|
||||
"latein": Fach.LATEIN,
|
||||
"griechisch": Fach.GRIECHISCH,
|
||||
"kunst": Fach.KUNST,
|
||||
"musik": Fach.MUSIK,
|
||||
"sport": Fach.SPORT,
|
||||
"informatik": Fach.INFORMATIK,
|
||||
"evreligion": Fach.EV_RELIGION,
|
||||
"kathreligion": Fach.KATH_RELIGION,
|
||||
"wertenormen": Fach.WERTE_NORMEN,
|
||||
"brc": Fach.BRC,
|
||||
"bvw": Fach.BVW,
|
||||
"ernaehrung": Fach.ERNAEHRUNG,
|
||||
"mecha": Fach.MECHATRONIK,
|
||||
"mechatronik": Fach.MECHATRONIK,
|
||||
"technikmecha": Fach.MECHATRONIK,
|
||||
"gespfl": Fach.GESUNDHEIT_PFLEGE,
|
||||
"paedpsych": Fach.PAEDAGOGIK_PSYCHOLOGIE,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Pydantic Models
|
||||
# ============================================================================
|
||||
|
||||
class DokumentCreate(BaseModel):
|
||||
"""Manuelles Erstellen eines Dokuments."""
|
||||
bundesland: Bundesland
|
||||
fach: Fach
|
||||
jahr: int = Field(ge=2000, le=2100)
|
||||
niveau: Niveau
|
||||
typ: DokumentTyp
|
||||
aufgaben_nummer: Optional[str] = None
|
||||
|
||||
|
||||
class DokumentUpdate(BaseModel):
|
||||
"""Update für erkannte Metadaten."""
|
||||
bundesland: Optional[Bundesland] = None
|
||||
fach: Optional[Fach] = None
|
||||
jahr: Optional[int] = None
|
||||
niveau: Optional[Niveau] = None
|
||||
typ: Optional[DokumentTyp] = None
|
||||
aufgaben_nummer: Optional[str] = None
|
||||
status: Optional[VerarbeitungsStatus] = None
|
||||
|
||||
|
||||
class DokumentResponse(BaseModel):
|
||||
"""Response für ein Dokument."""
|
||||
id: str
|
||||
dateiname: str
|
||||
original_dateiname: str
|
||||
bundesland: Bundesland
|
||||
fach: Fach
|
||||
jahr: int
|
||||
niveau: Niveau
|
||||
typ: DokumentTyp
|
||||
aufgaben_nummer: Optional[str]
|
||||
status: VerarbeitungsStatus
|
||||
confidence: float
|
||||
file_path: str
|
||||
file_size: int
|
||||
indexed: bool
|
||||
vector_ids: List[str]
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class ImportResult(BaseModel):
|
||||
"""Ergebnis eines ZIP-Imports."""
|
||||
total_files: int
|
||||
recognized: int
|
||||
errors: int
|
||||
documents: List[DokumentResponse]
|
||||
|
||||
|
||||
class RecognitionResult(BaseModel):
|
||||
"""Ergebnis der Dokumentenerkennung."""
|
||||
success: bool
|
||||
bundesland: Optional[Bundesland]
|
||||
fach: Optional[Fach]
|
||||
jahr: Optional[int]
|
||||
niveau: Optional[Niveau]
|
||||
typ: Optional[DokumentTyp]
|
||||
aufgaben_nummer: Optional[str]
|
||||
confidence: float
|
||||
raw_filename: str
|
||||
suggestions: List[Dict[str, Any]]
|
||||
|
||||
@property
|
||||
def extracted(self) -> Dict[str, Any]:
|
||||
"""Backwards-compatible property returning extracted values as dict."""
|
||||
result = {}
|
||||
if self.bundesland:
|
||||
result["bundesland"] = self.bundesland.value
|
||||
if self.fach:
|
||||
result["fach"] = self.fach.value
|
||||
if self.jahr:
|
||||
result["jahr"] = self.jahr
|
||||
if self.niveau:
|
||||
result["niveau"] = self.niveau.value
|
||||
if self.typ:
|
||||
result["typ"] = self.typ.value
|
||||
if self.aufgaben_nummer:
|
||||
result["aufgaben_nummer"] = self.aufgaben_nummer
|
||||
return result
|
||||
|
||||
@property
|
||||
def method(self) -> str:
|
||||
"""Backwards-compatible property for recognition method."""
|
||||
return "filename_pattern"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Internal Data Classes
|
||||
# ============================================================================
|
||||
|
||||
@dataclass
|
||||
class AbiturDokument:
|
||||
"""Internes Dokument."""
|
||||
id: str
|
||||
dateiname: str
|
||||
original_dateiname: str
|
||||
bundesland: Bundesland
|
||||
fach: Fach
|
||||
jahr: int
|
||||
niveau: Niveau
|
||||
typ: DokumentTyp
|
||||
aufgaben_nummer: Optional[str]
|
||||
status: VerarbeitungsStatus
|
||||
confidence: float
|
||||
file_path: str
|
||||
file_size: int
|
||||
indexed: bool
|
||||
vector_ids: List[str]
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Backwards-compatibility aliases (used by tests)
|
||||
# ============================================================================
|
||||
AbiturFach = Fach
|
||||
Anforderungsniveau = Niveau
|
||||
|
||||
|
||||
class DocumentMetadata(BaseModel):
|
||||
"""Backwards-compatible metadata model for tests."""
|
||||
jahr: Optional[int] = None
|
||||
bundesland: Optional[str] = None
|
||||
fach: Optional[str] = None
|
||||
niveau: Optional[str] = None
|
||||
dokument_typ: Optional[str] = None
|
||||
aufgaben_nummer: Optional[str] = None
|
||||
|
||||
|
||||
class AbiturDokumentCompat(BaseModel):
|
||||
"""Backwards-compatible AbiturDokument model for tests."""
|
||||
id: str
|
||||
filename: str
|
||||
file_path: str
|
||||
metadata: DocumentMetadata
|
||||
status: VerarbeitungsStatus
|
||||
recognition_result: Optional[RecognitionResult] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fach Labels (für Frontend Enum-Endpoint)
|
||||
# ============================================================================
|
||||
|
||||
FACH_LABELS = {
|
||||
Fach.DEUTSCH: "Deutsch",
|
||||
Fach.ENGLISCH: "Englisch",
|
||||
Fach.MATHEMATIK: "Mathematik",
|
||||
Fach.BIOLOGIE: "Biologie",
|
||||
Fach.CHEMIE: "Chemie",
|
||||
Fach.PHYSIK: "Physik",
|
||||
Fach.GESCHICHTE: "Geschichte",
|
||||
Fach.ERDKUNDE: "Erdkunde",
|
||||
Fach.POLITIK_WIRTSCHAFT: "Politik-Wirtschaft",
|
||||
Fach.FRANZOESISCH: "Französisch",
|
||||
Fach.SPANISCH: "Spanisch",
|
||||
Fach.LATEIN: "Latein",
|
||||
Fach.GRIECHISCH: "Griechisch",
|
||||
Fach.KUNST: "Kunst",
|
||||
Fach.MUSIK: "Musik",
|
||||
Fach.SPORT: "Sport",
|
||||
Fach.INFORMATIK: "Informatik",
|
||||
Fach.EV_RELIGION: "Ev. Religion",
|
||||
Fach.KATH_RELIGION: "Kath. Religion",
|
||||
Fach.WERTE_NORMEN: "Werte und Normen",
|
||||
Fach.BRC: "BRC (Betriebswirtschaft)",
|
||||
Fach.BVW: "BVW (Volkswirtschaft)",
|
||||
Fach.ERNAEHRUNG: "Ernährung",
|
||||
Fach.MECHATRONIK: "Mechatronik",
|
||||
Fach.GESUNDHEIT_PFLEGE: "Gesundheit-Pflege",
|
||||
Fach.PAEDAGOGIK_PSYCHOLOGIE: "Pädagogik-Psychologie",
|
||||
}
|
||||
|
||||
DOKUMENT_TYP_LABELS = {
|
||||
DokumentTyp.AUFGABE: "Aufgabe",
|
||||
DokumentTyp.ERWARTUNGSHORIZONT: "Erwartungshorizont",
|
||||
DokumentTyp.DECKBLATT: "Deckblatt",
|
||||
DokumentTyp.MATERIAL: "Material",
|
||||
DokumentTyp.HOERVERSTEHEN: "Hörverstehen",
|
||||
DokumentTyp.SPRACHMITTLUNG: "Sprachmittlung",
|
||||
DokumentTyp.BEWERTUNGSBOGEN: "Bewertungsbogen",
|
||||
}
|
||||
Reference in New Issue
Block a user