Restructure: Move 43 files into 8 domain packages (backend-lehrer)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 40s
CI / test-python-klausur (push) Failing after 2m30s
CI / test-python-agent-core (push) Successful in 28s
CI / test-nodejs-website (push) Successful in 20s

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 22:32:45 +02:00
parent 165c493d1e
commit dde45b29db
93 changed files with 9469 additions and 9290 deletions

View File

@@ -1,124 +1,4 @@
"""
Abitur Document Store - Dateinamen-Erkennung und Helfer.
Erkennt Metadaten aus NiBiS-Dateinamen (Niedersachsen).
"""
import re
from typing import Dict, Any
from pathlib import Path
from abitur_docs_models import (
Bundesland, Fach, Niveau, DokumentTyp, VerarbeitungsStatus,
RecognitionResult, AbiturDokument, DokumentResponse,
FACH_NAME_MAPPING,
)
def parse_nibis_filename(filename: str) -> RecognitionResult:
"""
Erkennt Metadaten aus NiBiS-Dateinamen.
Beispiele:
- 2025_Deutsch_eA_I.pdf
- 2025_Deutsch_eA_I_EWH.pdf
- 2025_Biologie_gA_1.pdf
- 2025_Englisch_eA_HV.pdf (Hörverstehen)
"""
result = RecognitionResult(
success=False,
bundesland=Bundesland.NIEDERSACHSEN,
fach=None,
jahr=None,
niveau=None,
typ=None,
aufgaben_nummer=None,
confidence=0.0,
raw_filename=filename,
suggestions=[]
)
# Bereinige Dateiname
name = Path(filename).stem.lower()
# Extrahiere Jahr (4 Ziffern am Anfang)
jahr_match = re.match(r'^(\d{4})', name)
if jahr_match:
result.jahr = int(jahr_match.group(1))
result.confidence += 0.2
# Extrahiere Fach
for fach_key, fach_enum in FACH_NAME_MAPPING.items():
if fach_key in name.replace("_", "").replace("-", ""):
result.fach = fach_enum
result.confidence += 0.3
break
# Extrahiere Niveau (eA/gA)
if "_ea" in name or "_ea_" in name or "ea_" in name:
result.niveau = Niveau.EA
result.confidence += 0.2
elif "_ga" in name or "_ga_" in name or "ga_" in name:
result.niveau = Niveau.GA
result.confidence += 0.2
# Extrahiere Typ
if "_ewh" in name:
result.typ = DokumentTyp.ERWARTUNGSHORIZONT
result.confidence += 0.2
elif "_hv" in name or "hoerverstehen" in name:
result.typ = DokumentTyp.HOERVERSTEHEN
result.confidence += 0.15
elif "_sm" in name or "_me" in name or "sprachmittlung" in name:
result.typ = DokumentTyp.SPRACHMITTLUNG
result.confidence += 0.15
elif "deckblatt" in name:
result.typ = DokumentTyp.DECKBLATT
result.confidence += 0.15
elif "material" in name:
result.typ = DokumentTyp.MATERIAL
result.confidence += 0.15
elif "bewertung" in name:
result.typ = DokumentTyp.BEWERTUNGSBOGEN
result.confidence += 0.15
else:
result.typ = DokumentTyp.AUFGABE
result.confidence += 0.1
# Extrahiere Aufgabennummer (römisch oder arabisch)
aufgabe_match = re.search(r'_([ivx]+|[1-4][abc]?)(?:_|\.pdf|$)', name, re.IGNORECASE)
if aufgabe_match:
result.aufgaben_nummer = aufgabe_match.group(1).upper()
result.confidence += 0.1
# Erfolg wenn mindestens Fach und Jahr erkannt
if result.fach and result.jahr:
result.success = True
# Normalisiere Confidence auf max 1.0
result.confidence = min(result.confidence, 1.0)
return result
def to_dokument_response(doc: AbiturDokument) -> DokumentResponse:
"""Konvertiert internes Dokument zu Response."""
return DokumentResponse(
id=doc.id,
dateiname=doc.dateiname,
original_dateiname=doc.original_dateiname,
bundesland=doc.bundesland,
fach=doc.fach,
jahr=doc.jahr,
niveau=doc.niveau,
typ=doc.typ,
aufgaben_nummer=doc.aufgaben_nummer,
status=doc.status,
confidence=doc.confidence,
file_path=doc.file_path,
file_size=doc.file_size,
indexed=doc.indexed,
vector_ids=doc.vector_ids,
created_at=doc.created_at,
updated_at=doc.updated_at
)
# Backward-compat shim -- module moved to abitur/recognition.py
import importlib as _importlib
import sys as _sys
_sys.modules[__name__] = _importlib.import_module("abitur.recognition")