diff --git a/backend-lehrer/abitur_docs_api.py b/backend-lehrer/abitur_docs_api.py
index a761f48..bcf0190 100644
--- a/backend-lehrer/abitur_docs_api.py
+++ b/backend-lehrer/abitur_docs_api.py
@@ -15,18 +15,24 @@ Dateinamen-Schema (NiBiS Niedersachsen):
import logging
import uuid
import os
-import re
import zipfile
import tempfile
from datetime import datetime
-from typing import List, Dict, Any, Optional
-from enum import Enum
+from typing import List, Optional, Dict, Any
from pathlib import Path
-from dataclasses import dataclass
from fastapi import APIRouter, HTTPException, UploadFile, File, Form, BackgroundTasks
from fastapi.responses import FileResponse
-from pydantic import BaseModel, Field
+
+from abitur_docs_models import (
+ Bundesland, Fach, Niveau, DokumentTyp, VerarbeitungsStatus,
+ DokumentCreate, DokumentUpdate, DokumentResponse, ImportResult,
+ RecognitionResult, AbiturDokument,
+ FACH_LABELS, DOKUMENT_TYP_LABELS,
+ # Backwards-compatibility re-exports
+ AbiturFach, Anforderungsniveau, DocumentMetadata, AbiturDokumentCompat,
+)
+from abitur_docs_recognition import parse_nibis_filename, to_dokument_response
logger = logging.getLogger(__name__)
@@ -39,364 +45,19 @@ router = APIRouter(
DOCS_DIR = Path("/tmp/abitur-docs")
DOCS_DIR.mkdir(parents=True, exist_ok=True)
-
-# ============================================================================
-# Enums
-# ============================================================================
-
-class Bundesland(str, Enum):
- """Bundesländer mit Zentralabitur."""
- NIEDERSACHSEN = "niedersachsen"
- BAYERN = "bayern"
- BADEN_WUERTTEMBERG = "baden_wuerttemberg"
- NORDRHEIN_WESTFALEN = "nordrhein_westfalen"
- HESSEN = "hessen"
- SACHSEN = "sachsen"
- THUERINGEN = "thueringen"
- BERLIN = "berlin"
- HAMBURG = "hamburg"
- SCHLESWIG_HOLSTEIN = "schleswig_holstein"
- BREMEN = "bremen"
- BRANDENBURG = "brandenburg"
- MECKLENBURG_VORPOMMERN = "mecklenburg_vorpommern"
- SACHSEN_ANHALT = "sachsen_anhalt"
- RHEINLAND_PFALZ = "rheinland_pfalz"
- SAARLAND = "saarland"
-
-
-class Fach(str, Enum):
- """Abiturfächer."""
- DEUTSCH = "deutsch"
- ENGLISCH = "englisch"
- MATHEMATIK = "mathematik"
- BIOLOGIE = "biologie"
- CHEMIE = "chemie"
- PHYSIK = "physik"
- GESCHICHTE = "geschichte"
- ERDKUNDE = "erdkunde"
- POLITIK_WIRTSCHAFT = "politik_wirtschaft"
- FRANZOESISCH = "franzoesisch"
- SPANISCH = "spanisch"
- LATEIN = "latein"
- GRIECHISCH = "griechisch"
- KUNST = "kunst"
- MUSIK = "musik"
- SPORT = "sport"
- INFORMATIK = "informatik"
- EV_RELIGION = "ev_religion"
- KATH_RELIGION = "kath_religion"
- WERTE_NORMEN = "werte_normen"
- BRC = "brc" # Betriebswirtschaft mit Rechnungswesen
- BVW = "bvw" # Volkswirtschaft
- ERNAEHRUNG = "ernaehrung"
- MECHATRONIK = "mechatronik"
- GESUNDHEIT_PFLEGE = "gesundheit_pflege"
- PAEDAGOGIK_PSYCHOLOGIE = "paedagogik_psychologie"
-
-
-class Niveau(str, Enum):
- """Anforderungsniveau."""
- EA = "eA" # Erhöhtes Anforderungsniveau (Leistungskurs)
- GA = "gA" # Grundlegendes Anforderungsniveau (Grundkurs)
-
-
-class DokumentTyp(str, Enum):
- """Dokumenttyp."""
- AUFGABE = "aufgabe"
- ERWARTUNGSHORIZONT = "erwartungshorizont"
- DECKBLATT = "deckblatt"
- MATERIAL = "material"
- HOERVERSTEHEN = "hoerverstehen" # Für Sprachen
- SPRACHMITTLUNG = "sprachmittlung" # Für Sprachen
- BEWERTUNGSBOGEN = "bewertungsbogen"
-
-
-class VerarbeitungsStatus(str, Enum):
- """Status der Dokumentenverarbeitung."""
- PENDING = "pending"
- PROCESSING = "processing"
- RECOGNIZED = "recognized" # KI hat Metadaten erkannt
- CONFIRMED = "confirmed" # Entwickler hat bestätigt
- INDEXED = "indexed" # Im Vector Store
- ERROR = "error"
-
-
-# ============================================================================
-# Fach-Mapping für Dateinamen
-# ============================================================================
-
-FACH_NAME_MAPPING = {
- "deutsch": Fach.DEUTSCH,
- "englisch": Fach.ENGLISCH,
- "mathe": Fach.MATHEMATIK,
- "mathematik": Fach.MATHEMATIK,
- "biologie": Fach.BIOLOGIE,
- "bio": Fach.BIOLOGIE,
- "chemie": Fach.CHEMIE,
- "physik": Fach.PHYSIK,
- "geschichte": Fach.GESCHICHTE,
- "erdkunde": Fach.ERDKUNDE,
- "geographie": Fach.ERDKUNDE,
- "politikwirtschaft": Fach.POLITIK_WIRTSCHAFT,
- "politik": Fach.POLITIK_WIRTSCHAFT,
- "franzoesisch": Fach.FRANZOESISCH,
- "franz": Fach.FRANZOESISCH,
- "spanisch": Fach.SPANISCH,
- "latein": Fach.LATEIN,
- "griechisch": Fach.GRIECHISCH,
- "kunst": Fach.KUNST,
- "musik": Fach.MUSIK,
- "sport": Fach.SPORT,
- "informatik": Fach.INFORMATIK,
- "evreligion": Fach.EV_RELIGION,
- "kathreligion": Fach.KATH_RELIGION,
- "wertenormen": Fach.WERTE_NORMEN,
- "brc": Fach.BRC,
- "bvw": Fach.BVW,
- "ernaehrung": Fach.ERNAEHRUNG,
- "mecha": Fach.MECHATRONIK,
- "mechatronik": Fach.MECHATRONIK,
- "technikmecha": Fach.MECHATRONIK,
- "gespfl": Fach.GESUNDHEIT_PFLEGE,
- "paedpsych": Fach.PAEDAGOGIK_PSYCHOLOGIE,
-}
-
-
-# ============================================================================
-# Pydantic Models
-# ============================================================================
-
-class DokumentCreate(BaseModel):
- """Manuelles Erstellen eines Dokuments."""
- bundesland: Bundesland
- fach: Fach
- jahr: int = Field(ge=2000, le=2100)
- niveau: Niveau
- typ: DokumentTyp
- aufgaben_nummer: Optional[str] = None # I, II, III, 1, 2, etc.
-
-
-class DokumentUpdate(BaseModel):
- """Update für erkannte Metadaten."""
- bundesland: Optional[Bundesland] = None
- fach: Optional[Fach] = None
- jahr: Optional[int] = None
- niveau: Optional[Niveau] = None
- typ: Optional[DokumentTyp] = None
- aufgaben_nummer: Optional[str] = None
- status: Optional[VerarbeitungsStatus] = None
-
-
-class DokumentResponse(BaseModel):
- """Response für ein Dokument."""
- id: str
- dateiname: str
- original_dateiname: str
- bundesland: Bundesland
- fach: Fach
- jahr: int
- niveau: Niveau
- typ: DokumentTyp
- aufgaben_nummer: Optional[str]
- status: VerarbeitungsStatus
- confidence: float # Erkennungs-Confidence
- file_path: str
- file_size: int
- indexed: bool
- vector_ids: List[str]
- created_at: datetime
- updated_at: datetime
-
-
-class ImportResult(BaseModel):
- """Ergebnis eines ZIP-Imports."""
- total_files: int
- recognized: int
- errors: int
- documents: List[DokumentResponse]
-
-
-class RecognitionResult(BaseModel):
- """Ergebnis der Dokumentenerkennung."""
- success: bool
- bundesland: Optional[Bundesland]
- fach: Optional[Fach]
- jahr: Optional[int]
- niveau: Optional[Niveau]
- typ: Optional[DokumentTyp]
- aufgaben_nummer: Optional[str]
- confidence: float
- raw_filename: str
- suggestions: List[Dict[str, Any]]
-
- @property
- def extracted(self) -> Dict[str, Any]:
- """Backwards-compatible property returning extracted values as dict."""
- result = {}
- if self.bundesland:
- result["bundesland"] = self.bundesland.value
- if self.fach:
- result["fach"] = self.fach.value
- if self.jahr:
- result["jahr"] = self.jahr
- if self.niveau:
- result["niveau"] = self.niveau.value
- if self.typ:
- result["typ"] = self.typ.value
- if self.aufgaben_nummer:
- result["aufgaben_nummer"] = self.aufgaben_nummer
- return result
-
- @property
- def method(self) -> str:
- """Backwards-compatible property for recognition method."""
- return "filename_pattern"
-
-
-# ============================================================================
-# Internal Data Classes
-# ============================================================================
-
-@dataclass
-class AbiturDokument:
- """Internes Dokument."""
- id: str
- dateiname: str
- original_dateiname: str
- bundesland: Bundesland
- fach: Fach
- jahr: int
- niveau: Niveau
- typ: DokumentTyp
- aufgaben_nummer: Optional[str]
- status: VerarbeitungsStatus
- confidence: float
- file_path: str
- file_size: int
- indexed: bool
- vector_ids: List[str]
- created_at: datetime
- updated_at: datetime
-
-
-# ============================================================================
# In-Memory Storage
-# ============================================================================
-
_dokumente: Dict[str, AbiturDokument] = {}
+# Backwards-compatibility alias
+documents_db = _dokumente
+
# ============================================================================
-# Helper Functions - Dokumentenerkennung
+# Private helper (kept local since it references module-level _dokumente)
# ============================================================================
-def parse_nibis_filename(filename: str) -> RecognitionResult:
- """
- Erkennt Metadaten aus NiBiS-Dateinamen.
-
- Beispiele:
- - 2025_Deutsch_eA_I.pdf
- - 2025_Deutsch_eA_I_EWH.pdf
- - 2025_Biologie_gA_1.pdf
- - 2025_Englisch_eA_HV.pdf (Hörverstehen)
- """
- result = RecognitionResult(
- success=False,
- bundesland=Bundesland.NIEDERSACHSEN, # NiBiS = Niedersachsen
- fach=None,
- jahr=None,
- niveau=None,
- typ=None,
- aufgaben_nummer=None,
- confidence=0.0,
- raw_filename=filename,
- suggestions=[]
- )
-
- # Bereinige Dateiname
- name = Path(filename).stem.lower()
-
- # Extrahiere Jahr (4 Ziffern am Anfang)
- jahr_match = re.match(r'^(\d{4})', name)
- if jahr_match:
- result.jahr = int(jahr_match.group(1))
- result.confidence += 0.2
-
- # Extrahiere Fach
- for fach_key, fach_enum in FACH_NAME_MAPPING.items():
- if fach_key in name.replace("_", "").replace("-", ""):
- result.fach = fach_enum
- result.confidence += 0.3
- break
-
- # Extrahiere Niveau (eA/gA)
- if "_ea" in name or "_ea_" in name or "ea_" in name:
- result.niveau = Niveau.EA
- result.confidence += 0.2
- elif "_ga" in name or "_ga_" in name or "ga_" in name:
- result.niveau = Niveau.GA
- result.confidence += 0.2
-
- # Extrahiere Typ
- if "_ewh" in name:
- result.typ = DokumentTyp.ERWARTUNGSHORIZONT
- result.confidence += 0.2
- elif "_hv" in name or "hoerverstehen" in name:
- result.typ = DokumentTyp.HOERVERSTEHEN
- result.confidence += 0.15
- elif "_sm" in name or "_me" in name or "sprachmittlung" in name:
- result.typ = DokumentTyp.SPRACHMITTLUNG
- result.confidence += 0.15
- elif "deckblatt" in name:
- result.typ = DokumentTyp.DECKBLATT
- result.confidence += 0.15
- elif "material" in name:
- result.typ = DokumentTyp.MATERIAL
- result.confidence += 0.15
- elif "bewertung" in name:
- result.typ = DokumentTyp.BEWERTUNGSBOGEN
- result.confidence += 0.15
- else:
- result.typ = DokumentTyp.AUFGABE
- result.confidence += 0.1
-
- # Extrahiere Aufgabennummer (römisch oder arabisch)
- aufgabe_match = re.search(r'_([ivx]+|[1-4][abc]?)(?:_|\.pdf|$)', name, re.IGNORECASE)
- if aufgabe_match:
- result.aufgaben_nummer = aufgabe_match.group(1).upper()
- result.confidence += 0.1
-
- # Erfolg wenn mindestens Fach und Jahr erkannt
- if result.fach and result.jahr:
- result.success = True
-
- # Normalisiere Confidence auf max 1.0
- result.confidence = min(result.confidence, 1.0)
-
- return result
-
-
def _to_dokument_response(doc: AbiturDokument) -> DokumentResponse:
- """Konvertiert internes Dokument zu Response."""
- return DokumentResponse(
- id=doc.id,
- dateiname=doc.dateiname,
- original_dateiname=doc.original_dateiname,
- bundesland=doc.bundesland,
- fach=doc.fach,
- jahr=doc.jahr,
- niveau=doc.niveau,
- typ=doc.typ,
- aufgaben_nummer=doc.aufgaben_nummer,
- status=doc.status,
- confidence=doc.confidence,
- file_path=doc.file_path,
- file_size=doc.file_size,
- indexed=doc.indexed,
- vector_ids=doc.vector_ids,
- created_at=doc.created_at,
- updated_at=doc.updated_at
- )
+ return to_dokument_response(doc)
# ============================================================================
@@ -413,18 +74,12 @@ async def upload_dokument(
typ: Optional[DokumentTyp] = Form(None),
aufgaben_nummer: Optional[str] = Form(None)
):
- """
- Lädt ein einzelnes Dokument hoch.
-
- Metadaten können manuell angegeben oder automatisch erkannt werden.
- """
+ """Lädt ein einzelnes Dokument hoch."""
if not file.filename:
raise HTTPException(status_code=400, detail="Kein Dateiname")
- # Erkenne Metadaten aus Dateiname
recognition = parse_nibis_filename(file.filename)
- # Überschreibe mit manuellen Angaben
final_bundesland = bundesland or recognition.bundesland or Bundesland.NIEDERSACHSEN
final_fach = fach or recognition.fach
final_jahr = jahr or recognition.jahr or datetime.now().year
@@ -435,7 +90,6 @@ async def upload_dokument(
if not final_fach:
raise HTTPException(status_code=400, detail="Fach konnte nicht erkannt werden")
- # Generiere ID und speichere Datei
doc_id = str(uuid.uuid4())
file_ext = Path(file.filename).suffix
safe_filename = f"{doc_id}{file_ext}"
@@ -446,30 +100,16 @@ async def upload_dokument(
f.write(content)
now = datetime.utcnow()
-
dokument = AbiturDokument(
- id=doc_id,
- dateiname=safe_filename,
- original_dateiname=file.filename,
- bundesland=final_bundesland,
- fach=final_fach,
- jahr=final_jahr,
- niveau=final_niveau,
- typ=final_typ,
- aufgaben_nummer=final_aufgabe,
+ id=doc_id, dateiname=safe_filename, original_dateiname=file.filename,
+ bundesland=final_bundesland, fach=final_fach, jahr=final_jahr,
+ niveau=final_niveau, typ=final_typ, aufgaben_nummer=final_aufgabe,
status=VerarbeitungsStatus.RECOGNIZED if recognition.success else VerarbeitungsStatus.PENDING,
- confidence=recognition.confidence,
- file_path=str(file_path),
- file_size=len(content),
- indexed=False,
- vector_ids=[],
- created_at=now,
- updated_at=now
+ confidence=recognition.confidence, file_path=str(file_path), file_size=len(content),
+ indexed=False, vector_ids=[], created_at=now, updated_at=now
)
-
_dokumente[doc_id] = dokument
logger.info(f"Uploaded document {doc_id}: {file.filename}")
-
return _to_dokument_response(dokument)
@@ -479,15 +119,10 @@ async def import_zip(
bundesland: Bundesland = Form(Bundesland.NIEDERSACHSEN),
background_tasks: BackgroundTasks = None
):
- """
- Importiert alle PDFs aus einer ZIP-Datei.
-
- Erkennt automatisch Metadaten aus Dateinamen.
- """
+ """Importiert alle PDFs aus einer ZIP-Datei."""
if not file.filename or not file.filename.endswith(".zip"):
raise HTTPException(status_code=400, detail="ZIP-Datei erforderlich")
- # Speichere ZIP temporär
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
content = await file.read()
tmp.write(content)
@@ -501,31 +136,22 @@ async def import_zip(
try:
with zipfile.ZipFile(tmp_path, 'r') as zip_ref:
for zip_info in zip_ref.infolist():
- # Nur PDFs
if not zip_info.filename.lower().endswith(".pdf"):
continue
-
- # Ignoriere Mac-spezifische Dateien
if "__MACOSX" in zip_info.filename or zip_info.filename.startswith("."):
continue
-
- # Ignoriere Thumbs.db
if "thumbs.db" in zip_info.filename.lower():
continue
total += 1
-
try:
- # Erkenne Metadaten
basename = Path(zip_info.filename).name
recognition = parse_nibis_filename(basename)
-
if not recognition.fach:
errors += 1
logger.warning(f"Konnte Fach nicht erkennen: {basename}")
continue
- # Extrahiere und speichere
doc_id = str(uuid.uuid4())
file_ext = Path(basename).suffix
safe_filename = f"{doc_id}{file_ext}"
@@ -537,62 +163,39 @@ async def import_zip(
target.write(file_content)
now = datetime.utcnow()
-
dokument = AbiturDokument(
- id=doc_id,
- dateiname=safe_filename,
- original_dateiname=basename,
- bundesland=bundesland,
- fach=recognition.fach,
+ id=doc_id, dateiname=safe_filename, original_dateiname=basename,
+ bundesland=bundesland, fach=recognition.fach,
jahr=recognition.jahr or datetime.now().year,
niveau=recognition.niveau or Niveau.EA,
typ=recognition.typ or DokumentTyp.AUFGABE,
aufgaben_nummer=recognition.aufgaben_nummer,
- status=VerarbeitungsStatus.RECOGNIZED,
- confidence=recognition.confidence,
- file_path=str(file_path),
- file_size=len(file_content),
- indexed=False,
- vector_ids=[],
- created_at=now,
- updated_at=now
+ status=VerarbeitungsStatus.RECOGNIZED, confidence=recognition.confidence,
+ file_path=str(file_path), file_size=len(file_content),
+ indexed=False, vector_ids=[], created_at=now, updated_at=now
)
-
_dokumente[doc_id] = dokument
documents.append(_to_dokument_response(dokument))
recognized += 1
-
except Exception as e:
errors += 1
logger.error(f"Fehler bei {zip_info.filename}: {e}")
-
finally:
- # Lösche temporäre ZIP
os.unlink(tmp_path)
logger.info(f"ZIP-Import: {recognized}/{total} erkannt, {errors} Fehler")
-
- return ImportResult(
- total_files=total,
- recognized=recognized,
- errors=errors,
- documents=documents
- )
+ return ImportResult(total_files=total, recognized=recognized, errors=errors, documents=documents)
@router.get("/", response_model=List[DokumentResponse])
async def list_dokumente(
- bundesland: Optional[Bundesland] = None,
- fach: Optional[Fach] = None,
- jahr: Optional[int] = None,
- niveau: Optional[Niveau] = None,
- typ: Optional[DokumentTyp] = None,
- status: Optional[VerarbeitungsStatus] = None,
+ bundesland: Optional[Bundesland] = None, fach: Optional[Fach] = None,
+ jahr: Optional[int] = None, niveau: Optional[Niveau] = None,
+ typ: Optional[DokumentTyp] = None, status: Optional[VerarbeitungsStatus] = None,
indexed: Optional[bool] = None
):
"""Listet Dokumente mit optionalen Filtern."""
docs = list(_dokumente.values())
-
if bundesland:
docs = [d for d in docs if d.bundesland == bundesland]
if fach:
@@ -607,7 +210,6 @@ async def list_dokumente(
docs = [d for d in docs if d.status == status]
if indexed is not None:
docs = [d for d in docs if d.indexed == indexed]
-
docs.sort(key=lambda x: (x.jahr, x.fach.value, x.niveau.value), reverse=True)
return [_to_dokument_response(d) for d in docs]
@@ -623,11 +225,10 @@ async def get_dokument(doc_id: str):
@router.put("/{doc_id}", response_model=DokumentResponse)
async def update_dokument(doc_id: str, data: DokumentUpdate):
- """Aktualisiert Dokument-Metadaten (nach KI-Erkennung durch Entwickler)."""
+ """Aktualisiert Dokument-Metadaten."""
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
if data.bundesland is not None:
doc.bundesland = data.bundesland
if data.fach is not None:
@@ -642,9 +243,7 @@ async def update_dokument(doc_id: str, data: DokumentUpdate):
doc.aufgaben_nummer = data.aufgaben_nummer
if data.status is not None:
doc.status = data.status
-
doc.updated_at = datetime.utcnow()
-
return _to_dokument_response(doc)
@@ -654,10 +253,8 @@ async def confirm_dokument(doc_id: str):
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
doc.status = VerarbeitungsStatus.CONFIRMED
doc.updated_at = datetime.utcnow()
-
return _to_dokument_response(doc)
@@ -667,24 +264,13 @@ async def index_dokument(doc_id: str):
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
if doc.status not in [VerarbeitungsStatus.CONFIRMED, VerarbeitungsStatus.RECOGNIZED]:
raise HTTPException(status_code=400, detail="Dokument muss erst bestätigt werden")
-
- # TODO: Vector Store Integration
- # 1. PDF lesen und Text extrahieren
- # 2. In Chunks aufteilen
- # 3. Embeddings generieren
- # 4. Mit Metadaten im Vector Store speichern
-
- # Demo: Simuliere Indexierung
doc.indexed = True
- doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)] # Demo-IDs
+ doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)]
doc.status = VerarbeitungsStatus.INDEXED
doc.updated_at = datetime.utcnow()
-
logger.info(f"Document {doc_id} indexed (demo)")
-
return _to_dokument_response(doc)
@@ -694,15 +280,9 @@ async def delete_dokument(doc_id: str):
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
- # Lösche Datei
if os.path.exists(doc.file_path):
os.remove(doc.file_path)
-
- # TODO: Aus Vector Store entfernen
-
del _dokumente[doc_id]
-
return {"status": "deleted", "id": doc_id}
@@ -712,20 +292,10 @@ async def download_dokument(doc_id: str):
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
-
if not os.path.exists(doc.file_path):
raise HTTPException(status_code=404, detail="Datei nicht gefunden")
+ return FileResponse(doc.file_path, filename=doc.original_dateiname, media_type="application/pdf")
- return FileResponse(
- doc.file_path,
- filename=doc.original_dateiname,
- media_type="application/pdf"
- )
-
-
-# ============================================================================
-# API Endpoints - Erkennung
-# ============================================================================
@router.post("/recognize", response_model=RecognitionResult)
async def recognize_filename(filename: str):
@@ -743,7 +313,6 @@ async def bulk_confirm(doc_ids: List[str]):
doc.status = VerarbeitungsStatus.CONFIRMED
doc.updated_at = datetime.utcnow()
confirmed += 1
-
return {"confirmed": confirmed, "total": len(doc_ids)}
@@ -754,70 +323,41 @@ async def bulk_index(doc_ids: List[str]):
for doc_id in doc_ids:
doc = _dokumente.get(doc_id)
if doc and doc.status in [VerarbeitungsStatus.CONFIRMED, VerarbeitungsStatus.RECOGNIZED]:
- # Demo-Indexierung
doc.indexed = True
doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)]
doc.status = VerarbeitungsStatus.INDEXED
doc.updated_at = datetime.utcnow()
indexed += 1
-
return {"indexed": indexed, "total": len(doc_ids)}
-# ============================================================================
-# API Endpoints - Statistiken
-# ============================================================================
-
@router.get("/stats/overview")
async def get_stats_overview():
"""Gibt Übersicht über alle Dokumente."""
docs = list(_dokumente.values())
-
- by_bundesland = {}
- by_fach = {}
- by_jahr = {}
- by_status = {}
-
+ by_bundesland: Dict[str, int] = {}
+ by_fach: Dict[str, int] = {}
+ by_jahr: Dict[int, int] = {}
+ by_status: Dict[str, int] = {}
for doc in docs:
by_bundesland[doc.bundesland.value] = by_bundesland.get(doc.bundesland.value, 0) + 1
by_fach[doc.fach.value] = by_fach.get(doc.fach.value, 0) + 1
by_jahr[doc.jahr] = by_jahr.get(doc.jahr, 0) + 1
by_status[doc.status.value] = by_status.get(doc.status.value, 0) + 1
-
return {
- "total": len(docs),
- "indexed": sum(1 for d in docs if d.indexed),
+ "total": len(docs), "indexed": sum(1 for d in docs if d.indexed),
"pending": sum(1 for d in docs if d.status == VerarbeitungsStatus.PENDING),
- "by_bundesland": by_bundesland,
- "by_fach": by_fach,
- "by_jahr": by_jahr,
- "by_status": by_status
+ "by_bundesland": by_bundesland, "by_fach": by_fach, "by_jahr": by_jahr, "by_status": by_status
}
-# ============================================================================
-# API Endpoints - Suche (für Klausur-Korrektur)
-# ============================================================================
-
@router.get("/search", response_model=List[DokumentResponse])
async def search_dokumente(
- bundesland: Bundesland,
- fach: Fach,
- jahr: Optional[int] = None,
- niveau: Optional[Niveau] = None,
- nur_indexed: bool = True
+ bundesland: Bundesland, fach: Fach, jahr: Optional[int] = None,
+ niveau: Optional[Niveau] = None, nur_indexed: bool = True
):
- """
- Sucht Dokumente für Klausur-Korrektur.
-
- Gibt nur indizierte Dokumente zurück (Standard).
- """
- docs = list(_dokumente.values())
-
- # Pflichtfilter
- docs = [d for d in docs if d.bundesland == bundesland and d.fach == fach]
-
- # Optionale Filter
+ """Sucht Dokumente für Klausur-Korrektur."""
+ docs = [d for d in _dokumente.values() if d.bundesland == bundesland and d.fach == fach]
if jahr:
docs = [d for d in docs if d.jahr == jahr]
if niveau:
@@ -825,7 +365,6 @@ async def search_dokumente(
if nur_indexed:
docs = [d for d in docs if d.indexed]
- # Sortiere: Aufgaben vor Erwartungshorizonten
aufgaben = [d for d in docs if d.typ == DokumentTyp.AUFGABE]
ewh = [d for d in docs if d.typ == DokumentTyp.ERWARTUNGSHORIZONT]
andere = [d for d in docs if d.typ not in [DokumentTyp.AUFGABE, DokumentTyp.ERWARTUNGSHORIZONT]]
@@ -833,31 +372,20 @@ async def search_dokumente(
result = []
for aufgabe in aufgaben:
result.append(_to_dokument_response(aufgabe))
- # Finde passenden EWH
matching_ewh = next(
- (e for e in ewh
- if e.jahr == aufgabe.jahr
- and e.niveau == aufgabe.niveau
- and e.aufgaben_nummer == aufgabe.aufgaben_nummer),
- None
+ (e for e in ewh if e.jahr == aufgabe.jahr and e.niveau == aufgabe.niveau
+ and e.aufgaben_nummer == aufgabe.aufgaben_nummer), None
)
if matching_ewh:
result.append(_to_dokument_response(matching_ewh))
-
- # Restliche EWH und andere
for e in ewh:
if _to_dokument_response(e) not in result:
result.append(_to_dokument_response(e))
for a in andere:
result.append(_to_dokument_response(a))
-
return result
-# ============================================================================
-# Enums Endpoint (für Frontend)
-# ============================================================================
-
@router.get("/enums/bundeslaender")
async def get_bundeslaender():
"""Gibt alle Bundesländer zurück."""
@@ -867,35 +395,7 @@ async def get_bundeslaender():
@router.get("/enums/faecher")
async def get_faecher():
"""Gibt alle Fächer zurück."""
- labels = {
- Fach.DEUTSCH: "Deutsch",
- Fach.ENGLISCH: "Englisch",
- Fach.MATHEMATIK: "Mathematik",
- Fach.BIOLOGIE: "Biologie",
- Fach.CHEMIE: "Chemie",
- Fach.PHYSIK: "Physik",
- Fach.GESCHICHTE: "Geschichte",
- Fach.ERDKUNDE: "Erdkunde",
- Fach.POLITIK_WIRTSCHAFT: "Politik-Wirtschaft",
- Fach.FRANZOESISCH: "Französisch",
- Fach.SPANISCH: "Spanisch",
- Fach.LATEIN: "Latein",
- Fach.GRIECHISCH: "Griechisch",
- Fach.KUNST: "Kunst",
- Fach.MUSIK: "Musik",
- Fach.SPORT: "Sport",
- Fach.INFORMATIK: "Informatik",
- Fach.EV_RELIGION: "Ev. Religion",
- Fach.KATH_RELIGION: "Kath. Religion",
- Fach.WERTE_NORMEN: "Werte und Normen",
- Fach.BRC: "BRC (Betriebswirtschaft)",
- Fach.BVW: "BVW (Volkswirtschaft)",
- Fach.ERNAEHRUNG: "Ernährung",
- Fach.MECHATRONIK: "Mechatronik",
- Fach.GESUNDHEIT_PFLEGE: "Gesundheit-Pflege",
- Fach.PAEDAGOGIK_PSYCHOLOGIE: "Pädagogik-Psychologie",
- }
- return [{"value": f.value, "label": labels.get(f, f.value)} for f in Fach]
+ return [{"value": f.value, "label": FACH_LABELS.get(f, f.value)} for f in Fach]
@router.get("/enums/niveaus")
@@ -910,47 +410,4 @@ async def get_niveaus():
@router.get("/enums/typen")
async def get_typen():
"""Gibt alle Dokumenttypen zurück."""
- labels = {
- DokumentTyp.AUFGABE: "Aufgabe",
- DokumentTyp.ERWARTUNGSHORIZONT: "Erwartungshorizont",
- DokumentTyp.DECKBLATT: "Deckblatt",
- DokumentTyp.MATERIAL: "Material",
- DokumentTyp.HOERVERSTEHEN: "Hörverstehen",
- DokumentTyp.SPRACHMITTLUNG: "Sprachmittlung",
- DokumentTyp.BEWERTUNGSBOGEN: "Bewertungsbogen",
- }
- return [{"value": t.value, "label": labels.get(t, t.value)} for t in DokumentTyp]
-
-
-# ============================================================================
-# Backwards-compatibility aliases (used by tests)
-# ============================================================================
-AbiturFach = Fach
-Anforderungsniveau = Niveau
-documents_db = _dokumente
-
-
-class DocumentMetadata(BaseModel):
- """Backwards-compatible metadata model for tests."""
- jahr: Optional[int] = None
- bundesland: Optional[str] = None
- fach: Optional[str] = None
- niveau: Optional[str] = None
- dokument_typ: Optional[str] = None
- aufgaben_nummer: Optional[str] = None
-
-
-# Backwards-compatible AbiturDokument for tests (different from internal dataclass)
-class AbiturDokumentCompat(BaseModel):
- """Backwards-compatible AbiturDokument model for tests."""
- id: str
- filename: str
- file_path: str
- metadata: DocumentMetadata
- status: VerarbeitungsStatus
- recognition_result: Optional[RecognitionResult] = None
- created_at: datetime
- updated_at: datetime
-
- class Config:
- arbitrary_types_allowed = True
+ return [{"value": t.value, "label": DOKUMENT_TYP_LABELS.get(t, t.value)} for t in DokumentTyp]
diff --git a/backend-lehrer/abitur_docs_models.py b/backend-lehrer/abitur_docs_models.py
new file mode 100644
index 0000000..c49e6c1
--- /dev/null
+++ b/backend-lehrer/abitur_docs_models.py
@@ -0,0 +1,327 @@
+"""
+Abitur Document Store - Enums, Pydantic Models, Data Classes.
+
+Shared types for abitur_docs_api and abitur_docs_recognition.
+"""
+
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+from enum import Enum
+from dataclasses import dataclass
+
+from pydantic import BaseModel, Field
+
+
+# ============================================================================
+# Enums
+# ============================================================================
+
+class Bundesland(str, Enum):
+ """Bundesländer mit Zentralabitur."""
+ NIEDERSACHSEN = "niedersachsen"
+ BAYERN = "bayern"
+ BADEN_WUERTTEMBERG = "baden_wuerttemberg"
+ NORDRHEIN_WESTFALEN = "nordrhein_westfalen"
+ HESSEN = "hessen"
+ SACHSEN = "sachsen"
+ THUERINGEN = "thueringen"
+ BERLIN = "berlin"
+ HAMBURG = "hamburg"
+ SCHLESWIG_HOLSTEIN = "schleswig_holstein"
+ BREMEN = "bremen"
+ BRANDENBURG = "brandenburg"
+ MECKLENBURG_VORPOMMERN = "mecklenburg_vorpommern"
+ SACHSEN_ANHALT = "sachsen_anhalt"
+ RHEINLAND_PFALZ = "rheinland_pfalz"
+ SAARLAND = "saarland"
+
+
+class Fach(str, Enum):
+ """Abiturfächer."""
+ DEUTSCH = "deutsch"
+ ENGLISCH = "englisch"
+ MATHEMATIK = "mathematik"
+ BIOLOGIE = "biologie"
+ CHEMIE = "chemie"
+ PHYSIK = "physik"
+ GESCHICHTE = "geschichte"
+ ERDKUNDE = "erdkunde"
+ POLITIK_WIRTSCHAFT = "politik_wirtschaft"
+ FRANZOESISCH = "franzoesisch"
+ SPANISCH = "spanisch"
+ LATEIN = "latein"
+ GRIECHISCH = "griechisch"
+ KUNST = "kunst"
+ MUSIK = "musik"
+ SPORT = "sport"
+ INFORMATIK = "informatik"
+ EV_RELIGION = "ev_religion"
+ KATH_RELIGION = "kath_religion"
+ WERTE_NORMEN = "werte_normen"
+ BRC = "brc"
+ BVW = "bvw"
+ ERNAEHRUNG = "ernaehrung"
+ MECHATRONIK = "mechatronik"
+ GESUNDHEIT_PFLEGE = "gesundheit_pflege"
+ PAEDAGOGIK_PSYCHOLOGIE = "paedagogik_psychologie"
+
+
+class Niveau(str, Enum):
+ """Anforderungsniveau."""
+ EA = "eA"
+ GA = "gA"
+
+
+class DokumentTyp(str, Enum):
+ """Dokumenttyp."""
+ AUFGABE = "aufgabe"
+ ERWARTUNGSHORIZONT = "erwartungshorizont"
+ DECKBLATT = "deckblatt"
+ MATERIAL = "material"
+ HOERVERSTEHEN = "hoerverstehen"
+ SPRACHMITTLUNG = "sprachmittlung"
+ BEWERTUNGSBOGEN = "bewertungsbogen"
+
+
+class VerarbeitungsStatus(str, Enum):
+ """Status der Dokumentenverarbeitung."""
+ PENDING = "pending"
+ PROCESSING = "processing"
+ RECOGNIZED = "recognized"
+ CONFIRMED = "confirmed"
+ INDEXED = "indexed"
+ ERROR = "error"
+
+
+# ============================================================================
+# Fach-Mapping für Dateinamen
+# ============================================================================
+
+FACH_NAME_MAPPING = {
+ "deutsch": Fach.DEUTSCH,
+ "englisch": Fach.ENGLISCH,
+ "mathe": Fach.MATHEMATIK,
+ "mathematik": Fach.MATHEMATIK,
+ "biologie": Fach.BIOLOGIE,
+ "bio": Fach.BIOLOGIE,
+ "chemie": Fach.CHEMIE,
+ "physik": Fach.PHYSIK,
+ "geschichte": Fach.GESCHICHTE,
+ "erdkunde": Fach.ERDKUNDE,
+ "geographie": Fach.ERDKUNDE,
+ "politikwirtschaft": Fach.POLITIK_WIRTSCHAFT,
+ "politik": Fach.POLITIK_WIRTSCHAFT,
+ "franzoesisch": Fach.FRANZOESISCH,
+ "franz": Fach.FRANZOESISCH,
+ "spanisch": Fach.SPANISCH,
+ "latein": Fach.LATEIN,
+ "griechisch": Fach.GRIECHISCH,
+ "kunst": Fach.KUNST,
+ "musik": Fach.MUSIK,
+ "sport": Fach.SPORT,
+ "informatik": Fach.INFORMATIK,
+ "evreligion": Fach.EV_RELIGION,
+ "kathreligion": Fach.KATH_RELIGION,
+ "wertenormen": Fach.WERTE_NORMEN,
+ "brc": Fach.BRC,
+ "bvw": Fach.BVW,
+ "ernaehrung": Fach.ERNAEHRUNG,
+ "mecha": Fach.MECHATRONIK,
+ "mechatronik": Fach.MECHATRONIK,
+ "technikmecha": Fach.MECHATRONIK,
+ "gespfl": Fach.GESUNDHEIT_PFLEGE,
+ "paedpsych": Fach.PAEDAGOGIK_PSYCHOLOGIE,
+}
+
+
+# ============================================================================
+# Pydantic Models
+# ============================================================================
+
+class DokumentCreate(BaseModel):
+ """Manuelles Erstellen eines Dokuments."""
+ bundesland: Bundesland
+ fach: Fach
+ jahr: int = Field(ge=2000, le=2100)
+ niveau: Niveau
+ typ: DokumentTyp
+ aufgaben_nummer: Optional[str] = None
+
+
+class DokumentUpdate(BaseModel):
+ """Update für erkannte Metadaten."""
+ bundesland: Optional[Bundesland] = None
+ fach: Optional[Fach] = None
+ jahr: Optional[int] = None
+ niveau: Optional[Niveau] = None
+ typ: Optional[DokumentTyp] = None
+ aufgaben_nummer: Optional[str] = None
+ status: Optional[VerarbeitungsStatus] = None
+
+
+class DokumentResponse(BaseModel):
+ """Response für ein Dokument."""
+ id: str
+ dateiname: str
+ original_dateiname: str
+ bundesland: Bundesland
+ fach: Fach
+ jahr: int
+ niveau: Niveau
+ typ: DokumentTyp
+ aufgaben_nummer: Optional[str]
+ status: VerarbeitungsStatus
+ confidence: float
+ file_path: str
+ file_size: int
+ indexed: bool
+ vector_ids: List[str]
+ created_at: datetime
+ updated_at: datetime
+
+
+class ImportResult(BaseModel):
+ """Ergebnis eines ZIP-Imports."""
+ total_files: int
+ recognized: int
+ errors: int
+ documents: List[DokumentResponse]
+
+
+class RecognitionResult(BaseModel):
+ """Ergebnis der Dokumentenerkennung."""
+ success: bool
+ bundesland: Optional[Bundesland]
+ fach: Optional[Fach]
+ jahr: Optional[int]
+ niveau: Optional[Niveau]
+ typ: Optional[DokumentTyp]
+ aufgaben_nummer: Optional[str]
+ confidence: float
+ raw_filename: str
+ suggestions: List[Dict[str, Any]]
+
+ @property
+ def extracted(self) -> Dict[str, Any]:
+ """Backwards-compatible property returning extracted values as dict."""
+ result = {}
+ if self.bundesland:
+ result["bundesland"] = self.bundesland.value
+ if self.fach:
+ result["fach"] = self.fach.value
+ if self.jahr:
+ result["jahr"] = self.jahr
+ if self.niveau:
+ result["niveau"] = self.niveau.value
+ if self.typ:
+ result["typ"] = self.typ.value
+ if self.aufgaben_nummer:
+ result["aufgaben_nummer"] = self.aufgaben_nummer
+ return result
+
+ @property
+ def method(self) -> str:
+ """Backwards-compatible property for recognition method."""
+ return "filename_pattern"
+
+
+# ============================================================================
+# Internal Data Classes
+# ============================================================================
+
+@dataclass
+class AbiturDokument:
+ """Internes Dokument."""
+ id: str
+ dateiname: str
+ original_dateiname: str
+ bundesland: Bundesland
+ fach: Fach
+ jahr: int
+ niveau: Niveau
+ typ: DokumentTyp
+ aufgaben_nummer: Optional[str]
+ status: VerarbeitungsStatus
+ confidence: float
+ file_path: str
+ file_size: int
+ indexed: bool
+ vector_ids: List[str]
+ created_at: datetime
+ updated_at: datetime
+
+
+# ============================================================================
+# Backwards-compatibility aliases (used by tests)
+# ============================================================================
+AbiturFach = Fach
+Anforderungsniveau = Niveau
+
+
+class DocumentMetadata(BaseModel):
+ """Backwards-compatible metadata model for tests."""
+ jahr: Optional[int] = None
+ bundesland: Optional[str] = None
+ fach: Optional[str] = None
+ niveau: Optional[str] = None
+ dokument_typ: Optional[str] = None
+ aufgaben_nummer: Optional[str] = None
+
+
+class AbiturDokumentCompat(BaseModel):
+ """Backwards-compatible AbiturDokument model for tests."""
+ id: str
+ filename: str
+ file_path: str
+ metadata: DocumentMetadata
+ status: VerarbeitungsStatus
+ recognition_result: Optional[RecognitionResult] = None
+ created_at: datetime
+ updated_at: datetime
+
+ class Config:
+ arbitrary_types_allowed = True
+
+
+# ============================================================================
+# Fach Labels (für Frontend Enum-Endpoint)
+# ============================================================================
+
+FACH_LABELS = {
+ Fach.DEUTSCH: "Deutsch",
+ Fach.ENGLISCH: "Englisch",
+ Fach.MATHEMATIK: "Mathematik",
+ Fach.BIOLOGIE: "Biologie",
+ Fach.CHEMIE: "Chemie",
+ Fach.PHYSIK: "Physik",
+ Fach.GESCHICHTE: "Geschichte",
+ Fach.ERDKUNDE: "Erdkunde",
+ Fach.POLITIK_WIRTSCHAFT: "Politik-Wirtschaft",
+ Fach.FRANZOESISCH: "Französisch",
+ Fach.SPANISCH: "Spanisch",
+ Fach.LATEIN: "Latein",
+ Fach.GRIECHISCH: "Griechisch",
+ Fach.KUNST: "Kunst",
+ Fach.MUSIK: "Musik",
+ Fach.SPORT: "Sport",
+ Fach.INFORMATIK: "Informatik",
+ Fach.EV_RELIGION: "Ev. Religion",
+ Fach.KATH_RELIGION: "Kath. Religion",
+ Fach.WERTE_NORMEN: "Werte und Normen",
+ Fach.BRC: "BRC (Betriebswirtschaft)",
+ Fach.BVW: "BVW (Volkswirtschaft)",
+ Fach.ERNAEHRUNG: "Ernährung",
+ Fach.MECHATRONIK: "Mechatronik",
+ Fach.GESUNDHEIT_PFLEGE: "Gesundheit-Pflege",
+ Fach.PAEDAGOGIK_PSYCHOLOGIE: "Pädagogik-Psychologie",
+}
+
+DOKUMENT_TYP_LABELS = {
+ DokumentTyp.AUFGABE: "Aufgabe",
+ DokumentTyp.ERWARTUNGSHORIZONT: "Erwartungshorizont",
+ DokumentTyp.DECKBLATT: "Deckblatt",
+ DokumentTyp.MATERIAL: "Material",
+ DokumentTyp.HOERVERSTEHEN: "Hörverstehen",
+ DokumentTyp.SPRACHMITTLUNG: "Sprachmittlung",
+ DokumentTyp.BEWERTUNGSBOGEN: "Bewertungsbogen",
+}
diff --git a/backend-lehrer/abitur_docs_recognition.py b/backend-lehrer/abitur_docs_recognition.py
new file mode 100644
index 0000000..69aae1b
--- /dev/null
+++ b/backend-lehrer/abitur_docs_recognition.py
@@ -0,0 +1,124 @@
+"""
+Abitur Document Store - Dateinamen-Erkennung und Helfer.
+
+Erkennt Metadaten aus NiBiS-Dateinamen (Niedersachsen).
+"""
+
+import re
+from typing import Dict, Any
+from pathlib import Path
+
+from abitur_docs_models import (
+ Bundesland, Fach, Niveau, DokumentTyp, VerarbeitungsStatus,
+ RecognitionResult, AbiturDokument, DokumentResponse,
+ FACH_NAME_MAPPING,
+)
+
+
+def parse_nibis_filename(filename: str) -> RecognitionResult:
+ """
+ Erkennt Metadaten aus NiBiS-Dateinamen.
+
+ Beispiele:
+ - 2025_Deutsch_eA_I.pdf
+ - 2025_Deutsch_eA_I_EWH.pdf
+ - 2025_Biologie_gA_1.pdf
+ - 2025_Englisch_eA_HV.pdf (Hörverstehen)
+ """
+ result = RecognitionResult(
+ success=False,
+ bundesland=Bundesland.NIEDERSACHSEN,
+ fach=None,
+ jahr=None,
+ niveau=None,
+ typ=None,
+ aufgaben_nummer=None,
+ confidence=0.0,
+ raw_filename=filename,
+ suggestions=[]
+ )
+
+ # Bereinige Dateiname
+ name = Path(filename).stem.lower()
+
+ # Extrahiere Jahr (4 Ziffern am Anfang)
+ jahr_match = re.match(r'^(\d{4})', name)
+ if jahr_match:
+ result.jahr = int(jahr_match.group(1))
+ result.confidence += 0.2
+
+ # Extrahiere Fach
+ for fach_key, fach_enum in FACH_NAME_MAPPING.items():
+ if fach_key in name.replace("_", "").replace("-", ""):
+ result.fach = fach_enum
+ result.confidence += 0.3
+ break
+
+ # Extrahiere Niveau (eA/gA)
+ if "_ea" in name or "_ea_" in name or "ea_" in name:
+ result.niveau = Niveau.EA
+ result.confidence += 0.2
+ elif "_ga" in name or "_ga_" in name or "ga_" in name:
+ result.niveau = Niveau.GA
+ result.confidence += 0.2
+
+ # Extrahiere Typ
+ if "_ewh" in name:
+ result.typ = DokumentTyp.ERWARTUNGSHORIZONT
+ result.confidence += 0.2
+ elif "_hv" in name or "hoerverstehen" in name:
+ result.typ = DokumentTyp.HOERVERSTEHEN
+ result.confidence += 0.15
+ elif "_sm" in name or "_me" in name or "sprachmittlung" in name:
+ result.typ = DokumentTyp.SPRACHMITTLUNG
+ result.confidence += 0.15
+ elif "deckblatt" in name:
+ result.typ = DokumentTyp.DECKBLATT
+ result.confidence += 0.15
+ elif "material" in name:
+ result.typ = DokumentTyp.MATERIAL
+ result.confidence += 0.15
+ elif "bewertung" in name:
+ result.typ = DokumentTyp.BEWERTUNGSBOGEN
+ result.confidence += 0.15
+ else:
+ result.typ = DokumentTyp.AUFGABE
+ result.confidence += 0.1
+
+ # Extrahiere Aufgabennummer (römisch oder arabisch)
+ aufgabe_match = re.search(r'_([ivx]+|[1-4][abc]?)(?:_|\.pdf|$)', name, re.IGNORECASE)
+ if aufgabe_match:
+ result.aufgaben_nummer = aufgabe_match.group(1).upper()
+ result.confidence += 0.1
+
+ # Erfolg wenn mindestens Fach und Jahr erkannt
+ if result.fach and result.jahr:
+ result.success = True
+
+ # Normalisiere Confidence auf max 1.0
+ result.confidence = min(result.confidence, 1.0)
+
+ return result
+
+
+def to_dokument_response(doc: AbiturDokument) -> DokumentResponse:
+ """Konvertiert internes Dokument zu Response."""
+ return DokumentResponse(
+ id=doc.id,
+ dateiname=doc.dateiname,
+ original_dateiname=doc.original_dateiname,
+ bundesland=doc.bundesland,
+ fach=doc.fach,
+ jahr=doc.jahr,
+ niveau=doc.niveau,
+ typ=doc.typ,
+ aufgaben_nummer=doc.aufgaben_nummer,
+ status=doc.status,
+ confidence=doc.confidence,
+ file_path=doc.file_path,
+ file_size=doc.file_size,
+ indexed=doc.indexed,
+ vector_ids=doc.vector_ids,
+ created_at=doc.created_at,
+ updated_at=doc.updated_at
+ )
diff --git a/backend-lehrer/alerts_agent/db/item_repository.py b/backend-lehrer/alerts_agent/db/item_repository.py
new file mode 100644
index 0000000..f187c92
--- /dev/null
+++ b/backend-lehrer/alerts_agent/db/item_repository.py
@@ -0,0 +1,394 @@
+"""
+Repository für Alert Items (einzelne Alerts/Artikel).
+"""
+import hashlib
+import urllib.parse
+import uuid
+from datetime import datetime, timedelta
+from typing import Optional, List, Dict, Any
+from sqlalchemy.orm import Session as DBSession
+from sqlalchemy import or_, func
+
+from .models import (
+ AlertItemDB, AlertSourceEnum, AlertStatusEnum, RelevanceDecisionEnum
+)
+
+
+class AlertItemRepository:
+ """Repository für Alert Items (einzelne Alerts/Artikel)."""
+
+ def __init__(self, db: DBSession):
+ self.db = db
+
+ # ==================== CREATE ====================
+
+ def create(
+ self,
+ topic_id: str,
+ title: str,
+ url: str,
+ snippet: str = "",
+ source: str = "google_alerts_rss",
+ published_at: datetime = None,
+ lang: str = "de",
+ ) -> AlertItemDB:
+ """Erstellt einen neuen Alert."""
+ url_hash = self._compute_url_hash(url)
+
+ alert = AlertItemDB(
+ id=str(uuid.uuid4()),
+ topic_id=topic_id,
+ title=title,
+ url=url,
+ snippet=snippet,
+ source=AlertSourceEnum(source),
+ published_at=published_at,
+ lang=lang,
+ url_hash=url_hash,
+ canonical_url=self._normalize_url(url),
+ )
+ self.db.add(alert)
+ self.db.commit()
+ self.db.refresh(alert)
+ return alert
+
+ def create_if_not_exists(
+ self,
+ topic_id: str,
+ title: str,
+ url: str,
+ snippet: str = "",
+ source: str = "google_alerts_rss",
+ published_at: datetime = None,
+ ) -> Optional[AlertItemDB]:
+ """Erstellt einen Alert nur wenn URL noch nicht existiert."""
+ url_hash = self._compute_url_hash(url)
+
+ existing = self.db.query(AlertItemDB).filter(
+ AlertItemDB.url_hash == url_hash
+ ).first()
+
+ if existing:
+ return None # Duplikat
+
+ return self.create(
+ topic_id=topic_id,
+ title=title,
+ url=url,
+ snippet=snippet,
+ source=source,
+ published_at=published_at,
+ )
+
+ # ==================== READ ====================
+
+ def get_by_id(self, alert_id: str) -> Optional[AlertItemDB]:
+ """Holt einen Alert nach ID."""
+ return self.db.query(AlertItemDB).filter(
+ AlertItemDB.id == alert_id
+ ).first()
+
+ def get_by_url_hash(self, url_hash: str) -> Optional[AlertItemDB]:
+ """Holt einen Alert nach URL-Hash."""
+ return self.db.query(AlertItemDB).filter(
+ AlertItemDB.url_hash == url_hash
+ ).first()
+
+ def get_inbox(
+ self,
+ user_id: str = None,
+ topic_id: str = None,
+ decision: str = None,
+ status: str = None,
+ limit: int = 50,
+ offset: int = 0,
+ ) -> List[AlertItemDB]:
+ """
+ Holt Inbox-Items mit Filtern.
+
+ Ohne decision werden KEEP und REVIEW angezeigt.
+ """
+ query = self.db.query(AlertItemDB)
+
+ if topic_id:
+ query = query.filter(AlertItemDB.topic_id == topic_id)
+
+ if decision:
+ query = query.filter(
+ AlertItemDB.relevance_decision == RelevanceDecisionEnum(decision)
+ )
+ else:
+ # Default: KEEP und REVIEW
+ query = query.filter(
+ or_(
+ AlertItemDB.relevance_decision == RelevanceDecisionEnum.KEEP,
+ AlertItemDB.relevance_decision == RelevanceDecisionEnum.REVIEW,
+ AlertItemDB.relevance_decision.is_(None)
+ )
+ )
+
+ if status:
+ query = query.filter(AlertItemDB.status == AlertStatusEnum(status))
+
+ return query.order_by(
+ AlertItemDB.relevance_score.desc().nullslast(),
+ AlertItemDB.fetched_at.desc()
+ ).offset(offset).limit(limit).all()
+
+ def get_unscored(
+ self,
+ topic_id: str = None,
+ limit: int = 100,
+ ) -> List[AlertItemDB]:
+ """Holt alle unbewerteten Alerts."""
+ query = self.db.query(AlertItemDB).filter(
+ AlertItemDB.status == AlertStatusEnum.NEW
+ )
+
+ if topic_id:
+ query = query.filter(AlertItemDB.topic_id == topic_id)
+
+ return query.order_by(AlertItemDB.fetched_at.desc()).limit(limit).all()
+
+ def get_by_topic(
+ self,
+ topic_id: str,
+ limit: int = 100,
+ offset: int = 0,
+ ) -> List[AlertItemDB]:
+ """Holt alle Alerts eines Topics."""
+ return self.db.query(AlertItemDB).filter(
+ AlertItemDB.topic_id == topic_id
+ ).order_by(
+ AlertItemDB.fetched_at.desc()
+ ).offset(offset).limit(limit).all()
+
+ def count_by_status(self, topic_id: str = None) -> Dict[str, int]:
+ """Zählt Alerts nach Status."""
+ query = self.db.query(
+ AlertItemDB.status,
+ func.count(AlertItemDB.id).label('count')
+ )
+
+ if topic_id:
+ query = query.filter(AlertItemDB.topic_id == topic_id)
+
+ results = query.group_by(AlertItemDB.status).all()
+
+ return {r[0].value: r[1] for r in results}
+
+ def count_by_decision(self, topic_id: str = None) -> Dict[str, int]:
+ """Zählt Alerts nach Relevanz-Entscheidung."""
+ query = self.db.query(
+ AlertItemDB.relevance_decision,
+ func.count(AlertItemDB.id).label('count')
+ )
+
+ if topic_id:
+ query = query.filter(AlertItemDB.topic_id == topic_id)
+
+ results = query.group_by(AlertItemDB.relevance_decision).all()
+
+ return {
+ (r[0].value if r[0] else "unscored"): r[1]
+ for r in results
+ }
+
+ # ==================== UPDATE ====================
+
+ def update_scoring(
+ self,
+ alert_id: str,
+ score: float,
+ decision: str,
+ reasons: List[str] = None,
+ summary: str = None,
+ model: str = None,
+ ) -> Optional[AlertItemDB]:
+ """Aktualisiert das Scoring eines Alerts."""
+ alert = self.get_by_id(alert_id)
+ if not alert:
+ return None
+
+ alert.relevance_score = score
+ alert.relevance_decision = RelevanceDecisionEnum(decision)
+ alert.relevance_reasons = reasons or []
+ alert.relevance_summary = summary
+ alert.scored_by_model = model
+ alert.scored_at = datetime.utcnow()
+ alert.status = AlertStatusEnum.SCORED
+ alert.processed_at = datetime.utcnow()
+
+ self.db.commit()
+ self.db.refresh(alert)
+ return alert
+
+ def update_status(
+ self,
+ alert_id: str,
+ status: str,
+ ) -> Optional[AlertItemDB]:
+ """Aktualisiert den Status eines Alerts."""
+ alert = self.get_by_id(alert_id)
+ if not alert:
+ return None
+
+ alert.status = AlertStatusEnum(status)
+
+ self.db.commit()
+ self.db.refresh(alert)
+ return alert
+
+ def mark_reviewed(
+ self,
+ alert_id: str,
+ is_relevant: bool,
+ notes: str = None,
+ tags: List[str] = None,
+ ) -> Optional[AlertItemDB]:
+ """Markiert einen Alert als reviewed mit Feedback."""
+ alert = self.get_by_id(alert_id)
+ if not alert:
+ return None
+
+ alert.status = AlertStatusEnum.REVIEWED
+ alert.user_marked_relevant = is_relevant
+ if notes:
+ alert.user_notes = notes
+ if tags:
+ alert.user_tags = tags
+
+ self.db.commit()
+ self.db.refresh(alert)
+ return alert
+
+ def archive(self, alert_id: str) -> Optional[AlertItemDB]:
+ """Archiviert einen Alert."""
+ return self.update_status(alert_id, "archived")
+
+ # ==================== DELETE ====================
+
+ def delete(self, alert_id: str) -> bool:
+ """Löscht einen Alert."""
+ alert = self.get_by_id(alert_id)
+ if not alert:
+ return False
+
+ self.db.delete(alert)
+ self.db.commit()
+ return True
+
+ def delete_old(self, days: int = 90, topic_id: str = None) -> int:
+ """Löscht alte archivierte Alerts."""
+ cutoff = datetime.utcnow() - timedelta(days=days)
+
+ query = self.db.query(AlertItemDB).filter(
+ AlertItemDB.status == AlertStatusEnum.ARCHIVED,
+ AlertItemDB.fetched_at < cutoff,
+ )
+
+ if topic_id:
+ query = query.filter(AlertItemDB.topic_id == topic_id)
+
+ count = query.delete()
+ self.db.commit()
+ return count
+
+ # ==================== FOR RSS FETCHER ====================
+
+ def get_existing_urls(self, topic_id: str) -> set:
+ """
+ Holt alle bekannten URL-Hashes für ein Topic.
+
+ Wird vom RSS-Fetcher verwendet um Duplikate zu vermeiden.
+ """
+ results = self.db.query(AlertItemDB.url_hash).filter(
+ AlertItemDB.topic_id == topic_id
+ ).all()
+
+ return {r[0] for r in results if r[0]}
+
+ def create_from_alert_item(self, alert_item, topic_id: str) -> AlertItemDB:
+ """
+ Erstellt einen Alert aus einem AlertItem-Objekt vom RSS-Fetcher.
+
+ Args:
+ alert_item: AlertItem from rss_fetcher
+ topic_id: Topic ID to associate with
+
+ Returns:
+ Created AlertItemDB instance
+ """
+ return self.create(
+ topic_id=topic_id,
+ title=alert_item.title,
+ url=alert_item.url,
+ snippet=alert_item.snippet or "",
+ source=alert_item.source.value if hasattr(alert_item.source, 'value') else str(alert_item.source),
+ published_at=alert_item.published_at,
+ )
+
+ # ==================== HELPER ====================
+
+ def _compute_url_hash(self, url: str) -> str:
+ """Berechnet SHA256 Hash der normalisierten URL."""
+ normalized = self._normalize_url(url)
+ return hashlib.sha256(normalized.encode()).hexdigest()[:16]
+
+ def _normalize_url(self, url: str) -> str:
+ """Normalisiert URL für Deduplizierung."""
+ parsed = urllib.parse.urlparse(url)
+
+ # Tracking-Parameter entfernen
+ tracking_params = {
+ "utm_source", "utm_medium", "utm_campaign", "utm_content", "utm_term",
+ "fbclid", "gclid", "ref", "source"
+ }
+
+ query_params = urllib.parse.parse_qs(parsed.query)
+ cleaned_params = {k: v for k, v in query_params.items()
+ if k.lower() not in tracking_params}
+
+ cleaned_query = urllib.parse.urlencode(cleaned_params, doseq=True)
+
+ # Rekonstruiere URL ohne Fragment
+ normalized = urllib.parse.urlunparse((
+ parsed.scheme,
+ parsed.netloc.lower(),
+ parsed.path.rstrip("/"),
+ parsed.params,
+ cleaned_query,
+ "" # No fragment
+ ))
+
+ return normalized
+
+ # ==================== CONVERSION ====================
+
+ def to_dict(self, alert: AlertItemDB) -> Dict[str, Any]:
+ """Konvertiert DB-Model zu Dictionary."""
+ return {
+ "id": alert.id,
+ "topic_id": alert.topic_id,
+ "title": alert.title,
+ "url": alert.url,
+ "snippet": alert.snippet,
+ "source": alert.source.value,
+ "lang": alert.lang,
+ "published_at": alert.published_at.isoformat() if alert.published_at else None,
+ "fetched_at": alert.fetched_at.isoformat() if alert.fetched_at else None,
+ "status": alert.status.value,
+ "relevance": {
+ "score": alert.relevance_score,
+ "decision": alert.relevance_decision.value if alert.relevance_decision else None,
+ "reasons": alert.relevance_reasons,
+ "summary": alert.relevance_summary,
+ "model": alert.scored_by_model,
+ "scored_at": alert.scored_at.isoformat() if alert.scored_at else None,
+ },
+ "user_feedback": {
+ "marked_relevant": alert.user_marked_relevant,
+ "tags": alert.user_tags,
+ "notes": alert.user_notes,
+ },
+ }
diff --git a/backend-lehrer/alerts_agent/db/profile_repository.py b/backend-lehrer/alerts_agent/db/profile_repository.py
new file mode 100644
index 0000000..09cdebb
--- /dev/null
+++ b/backend-lehrer/alerts_agent/db/profile_repository.py
@@ -0,0 +1,226 @@
+"""
+Repository für Alert Profiles (Nutzer-Profile für Relevanz-Scoring).
+"""
+import uuid
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+from sqlalchemy.orm import Session as DBSession
+from sqlalchemy.orm.attributes import flag_modified
+
+from .models import AlertProfileDB
+
+
+class ProfileRepository:
+ """Repository für Alert Profiles (Nutzer-Profile für Relevanz-Scoring)."""
+
+ def __init__(self, db: DBSession):
+ self.db = db
+
+ # ==================== CREATE / GET-OR-CREATE ====================
+
+ def get_or_create(self, user_id: str = None) -> AlertProfileDB:
+ """Holt oder erstellt ein Profil."""
+ profile = self.get_by_user_id(user_id)
+ if profile:
+ return profile
+
+ # Neues Profil erstellen
+ profile = AlertProfileDB(
+ id=str(uuid.uuid4()),
+ user_id=user_id,
+ name="Default" if not user_id else f"Profile {user_id[:8]}",
+ )
+ self.db.add(profile)
+ self.db.commit()
+ self.db.refresh(profile)
+ return profile
+
+ def create_default_education_profile(self, user_id: str = None) -> AlertProfileDB:
+ """Erstellt ein Standard-Profil für Bildungsthemen."""
+ profile = AlertProfileDB(
+ id=str(uuid.uuid4()),
+ user_id=user_id,
+ name="Bildung Default",
+ priorities=[
+ {
+ "label": "Inklusion",
+ "weight": 0.9,
+ "keywords": ["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
+ "description": "Inklusive Bildung, Förderschulen, Nachteilsausgleich"
+ },
+ {
+ "label": "Datenschutz Schule",
+ "weight": 0.85,
+ "keywords": ["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
+ "description": "DSGVO in Schulen, Datenschutz bei Klassenfotos"
+ },
+ {
+ "label": "Schulrecht Bayern",
+ "weight": 0.8,
+ "keywords": ["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
+ "description": "Bayerisches Schulrecht, Verordnungen"
+ },
+ {
+ "label": "Digitalisierung Schule",
+ "weight": 0.7,
+ "keywords": ["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
+ "description": "Digitale Medien im Unterricht"
+ },
+ ],
+ exclusions=["Stellenanzeige", "Praktikum gesucht", "Werbung", "Pressemitteilung"],
+ policies={
+ "prefer_german_sources": True,
+ "max_age_days": 30,
+ "min_content_length": 100,
+ }
+ )
+ self.db.add(profile)
+ self.db.commit()
+ self.db.refresh(profile)
+ return profile
+
+ # ==================== READ ====================
+
+ def get_by_id(self, profile_id: str) -> Optional[AlertProfileDB]:
+ """Holt ein Profil nach ID."""
+ return self.db.query(AlertProfileDB).filter(
+ AlertProfileDB.id == profile_id
+ ).first()
+
+ def get_by_user_id(self, user_id: str) -> Optional[AlertProfileDB]:
+ """Holt ein Profil nach User-ID."""
+ if not user_id:
+ # Default-Profil ohne User
+ return self.db.query(AlertProfileDB).filter(
+ AlertProfileDB.user_id.is_(None)
+ ).first()
+
+ return self.db.query(AlertProfileDB).filter(
+ AlertProfileDB.user_id == user_id
+ ).first()
+
+ # ==================== UPDATE ====================
+
+ def update_priorities(
+ self,
+ profile_id: str,
+ priorities: List[Dict],
+ ) -> Optional[AlertProfileDB]:
+ """Aktualisiert die Prioritäten eines Profils."""
+ profile = self.get_by_id(profile_id)
+ if not profile:
+ return None
+
+ profile.priorities = priorities
+ self.db.commit()
+ self.db.refresh(profile)
+ return profile
+
+ def update_exclusions(
+ self,
+ profile_id: str,
+ exclusions: List[str],
+ ) -> Optional[AlertProfileDB]:
+ """Aktualisiert die Ausschlüsse eines Profils."""
+ profile = self.get_by_id(profile_id)
+ if not profile:
+ return None
+
+ profile.exclusions = exclusions
+ self.db.commit()
+ self.db.refresh(profile)
+ return profile
+
+ def add_feedback(
+ self,
+ profile_id: str,
+ title: str,
+ url: str,
+ is_relevant: bool,
+ reason: str = "",
+ ) -> Optional[AlertProfileDB]:
+ """Fügt Feedback als Beispiel hinzu."""
+ profile = self.get_by_id(profile_id)
+ if not profile:
+ return None
+
+ example = {
+ "title": title,
+ "url": url,
+ "reason": reason,
+ "added_at": datetime.utcnow().isoformat(),
+ }
+
+ if is_relevant:
+ examples = list(profile.positive_examples or [])
+ examples.append(example)
+ profile.positive_examples = examples[-20:] # Max 20
+ profile.total_kept += 1
+ flag_modified(profile, "positive_examples")
+ else:
+ examples = list(profile.negative_examples or [])
+ examples.append(example)
+ profile.negative_examples = examples[-20:] # Max 20
+ profile.total_dropped += 1
+ flag_modified(profile, "negative_examples")
+
+ profile.total_scored += 1
+ self.db.commit()
+ self.db.refresh(profile)
+ return profile
+
+ def update_stats(
+ self,
+ profile_id: str,
+ kept: int = 0,
+ dropped: int = 0,
+ ) -> Optional[AlertProfileDB]:
+ """Aktualisiert die Statistiken eines Profils."""
+ profile = self.get_by_id(profile_id)
+ if not profile:
+ return None
+
+ profile.total_scored += kept + dropped
+ profile.total_kept += kept
+ profile.total_dropped += dropped
+
+ self.db.commit()
+ self.db.refresh(profile)
+ return profile
+
+ # ==================== DELETE ====================
+
+ def delete(self, profile_id: str) -> bool:
+ """Löscht ein Profil."""
+ profile = self.get_by_id(profile_id)
+ if not profile:
+ return False
+
+ self.db.delete(profile)
+ self.db.commit()
+ return True
+
+ # ==================== CONVERSION ====================
+
+ def to_dict(self, profile: AlertProfileDB) -> Dict[str, Any]:
+ """Konvertiert DB-Model zu Dictionary."""
+ return {
+ "id": profile.id,
+ "user_id": profile.user_id,
+ "name": profile.name,
+ "priorities": profile.priorities,
+ "exclusions": profile.exclusions,
+ "policies": profile.policies,
+ "examples": {
+ "positive": len(profile.positive_examples or []),
+ "negative": len(profile.negative_examples or []),
+ },
+ "stats": {
+ "total_scored": profile.total_scored,
+ "total_kept": profile.total_kept,
+ "total_dropped": profile.total_dropped,
+ "accuracy_estimate": profile.accuracy_estimate,
+ },
+ "created_at": profile.created_at.isoformat() if profile.created_at else None,
+ "updated_at": profile.updated_at.isoformat() if profile.updated_at else None,
+ }
diff --git a/backend-lehrer/alerts_agent/db/repository.py b/backend-lehrer/alerts_agent/db/repository.py
index b3e5b98..cd0739b 100644
--- a/backend-lehrer/alerts_agent/db/repository.py
+++ b/backend-lehrer/alerts_agent/db/repository.py
@@ -1,992 +1,20 @@
"""
Repository für Alerts Agent - CRUD Operationen für Topics, Items, Rules und Profile.
-Abstraktion der Datenbank-Operationen.
+Barrel re-export — die eigentliche Logik lebt in:
+- topic_repository.py
+- item_repository.py
+- rule_repository.py
+- profile_repository.py
"""
-import hashlib
-from datetime import datetime
-from typing import Optional, List, Dict, Any
-from sqlalchemy.orm import Session as DBSession
-from sqlalchemy.orm.attributes import flag_modified
-from sqlalchemy import or_, and_, func
-
-from .models import (
- AlertTopicDB, AlertItemDB, AlertRuleDB, AlertProfileDB,
- AlertSourceEnum, AlertStatusEnum, RelevanceDecisionEnum,
- FeedTypeEnum, RuleActionEnum
-)
-
-
-# =============================================================================
-# TOPIC REPOSITORY
-# =============================================================================
-
-class TopicRepository:
- """Repository für Alert Topics (Feed-Quellen)."""
-
- def __init__(self, db: DBSession):
- self.db = db
-
- # ==================== CREATE ====================
-
- def create(
- self,
- name: str,
- feed_url: str = None,
- feed_type: str = "rss",
- user_id: str = None,
- description: str = "",
- fetch_interval_minutes: int = 60,
- is_active: bool = True,
- ) -> AlertTopicDB:
- """Erstellt ein neues Topic."""
- import uuid
- topic = AlertTopicDB(
- id=str(uuid.uuid4()),
- user_id=user_id,
- name=name,
- description=description,
- feed_url=feed_url,
- feed_type=FeedTypeEnum(feed_type),
- fetch_interval_minutes=fetch_interval_minutes,
- is_active=is_active,
- )
- self.db.add(topic)
- self.db.commit()
- self.db.refresh(topic)
- return topic
-
- # ==================== READ ====================
-
- def get_by_id(self, topic_id: str) -> Optional[AlertTopicDB]:
- """Holt ein Topic nach ID."""
- return self.db.query(AlertTopicDB).filter(
- AlertTopicDB.id == topic_id
- ).first()
-
- def get_all(
- self,
- user_id: str = None,
- is_active: bool = None,
- limit: int = 100,
- offset: int = 0,
- ) -> List[AlertTopicDB]:
- """Holt alle Topics mit optionalen Filtern."""
- query = self.db.query(AlertTopicDB)
-
- if user_id:
- query = query.filter(AlertTopicDB.user_id == user_id)
- if is_active is not None:
- query = query.filter(AlertTopicDB.is_active == is_active)
-
- return query.order_by(
- AlertTopicDB.created_at.desc()
- ).offset(offset).limit(limit).all()
-
- def get_active_for_fetch(self) -> List[AlertTopicDB]:
- """Holt alle aktiven Topics die gefetcht werden sollten."""
- # Topics wo fetch_interval_minutes vergangen ist
- return self.db.query(AlertTopicDB).filter(
- AlertTopicDB.is_active == True,
- AlertTopicDB.feed_url.isnot(None),
- ).all()
-
- # ==================== UPDATE ====================
-
- def update(
- self,
- topic_id: str,
- name: str = None,
- description: str = None,
- feed_url: str = None,
- feed_type: str = None,
- is_active: bool = None,
- fetch_interval_minutes: int = None,
- ) -> Optional[AlertTopicDB]:
- """Aktualisiert ein Topic."""
- topic = self.get_by_id(topic_id)
- if not topic:
- return None
-
- if name is not None:
- topic.name = name
- if description is not None:
- topic.description = description
- if feed_url is not None:
- topic.feed_url = feed_url
- if feed_type is not None:
- topic.feed_type = FeedTypeEnum(feed_type)
- if is_active is not None:
- topic.is_active = is_active
- if fetch_interval_minutes is not None:
- topic.fetch_interval_minutes = fetch_interval_minutes
-
- self.db.commit()
- self.db.refresh(topic)
- return topic
-
- def update_fetch_status(
- self,
- topic_id: str,
- last_fetch_error: str = None,
- items_fetched: int = 0,
- ) -> Optional[AlertTopicDB]:
- """Aktualisiert den Fetch-Status eines Topics."""
- topic = self.get_by_id(topic_id)
- if not topic:
- return None
-
- topic.last_fetched_at = datetime.utcnow()
- topic.last_fetch_error = last_fetch_error
- topic.total_items_fetched += items_fetched
-
- self.db.commit()
- self.db.refresh(topic)
- return topic
-
- def increment_stats(
- self,
- topic_id: str,
- kept: int = 0,
- dropped: int = 0,
- ) -> Optional[AlertTopicDB]:
- """Erhöht die Statistiken eines Topics."""
- topic = self.get_by_id(topic_id)
- if not topic:
- return None
-
- topic.items_kept += kept
- topic.items_dropped += dropped
-
- self.db.commit()
- self.db.refresh(topic)
- return topic
-
- # ==================== DELETE ====================
-
- def delete(self, topic_id: str) -> bool:
- """Löscht ein Topic (und alle zugehörigen Items via CASCADE)."""
- topic = self.get_by_id(topic_id)
- if not topic:
- return False
-
- self.db.delete(topic)
- self.db.commit()
- return True
-
- # ==================== CONVERSION ====================
-
- def to_dict(self, topic: AlertTopicDB) -> Dict[str, Any]:
- """Konvertiert DB-Model zu Dictionary."""
- return {
- "id": topic.id,
- "user_id": topic.user_id,
- "name": topic.name,
- "description": topic.description,
- "feed_url": topic.feed_url,
- "feed_type": topic.feed_type.value,
- "is_active": topic.is_active,
- "fetch_interval_minutes": topic.fetch_interval_minutes,
- "last_fetched_at": topic.last_fetched_at.isoformat() if topic.last_fetched_at else None,
- "last_fetch_error": topic.last_fetch_error,
- "stats": {
- "total_items_fetched": topic.total_items_fetched,
- "items_kept": topic.items_kept,
- "items_dropped": topic.items_dropped,
- },
- "created_at": topic.created_at.isoformat() if topic.created_at else None,
- "updated_at": topic.updated_at.isoformat() if topic.updated_at else None,
- }
-
-
-# =============================================================================
-# ALERT ITEM REPOSITORY
-# =============================================================================
-
-class AlertItemRepository:
- """Repository für Alert Items (einzelne Alerts/Artikel)."""
-
- def __init__(self, db: DBSession):
- self.db = db
-
- # ==================== CREATE ====================
-
- def create(
- self,
- topic_id: str,
- title: str,
- url: str,
- snippet: str = "",
- source: str = "google_alerts_rss",
- published_at: datetime = None,
- lang: str = "de",
- ) -> AlertItemDB:
- """Erstellt einen neuen Alert."""
- import uuid
-
- # URL-Hash berechnen
- url_hash = self._compute_url_hash(url)
-
- alert = AlertItemDB(
- id=str(uuid.uuid4()),
- topic_id=topic_id,
- title=title,
- url=url,
- snippet=snippet,
- source=AlertSourceEnum(source),
- published_at=published_at,
- lang=lang,
- url_hash=url_hash,
- canonical_url=self._normalize_url(url),
- )
- self.db.add(alert)
- self.db.commit()
- self.db.refresh(alert)
- return alert
-
- def create_if_not_exists(
- self,
- topic_id: str,
- title: str,
- url: str,
- snippet: str = "",
- source: str = "google_alerts_rss",
- published_at: datetime = None,
- ) -> Optional[AlertItemDB]:
- """Erstellt einen Alert nur wenn URL noch nicht existiert."""
- url_hash = self._compute_url_hash(url)
-
- existing = self.db.query(AlertItemDB).filter(
- AlertItemDB.url_hash == url_hash
- ).first()
-
- if existing:
- return None # Duplikat
-
- return self.create(
- topic_id=topic_id,
- title=title,
- url=url,
- snippet=snippet,
- source=source,
- published_at=published_at,
- )
-
- # ==================== READ ====================
-
- def get_by_id(self, alert_id: str) -> Optional[AlertItemDB]:
- """Holt einen Alert nach ID."""
- return self.db.query(AlertItemDB).filter(
- AlertItemDB.id == alert_id
- ).first()
-
- def get_by_url_hash(self, url_hash: str) -> Optional[AlertItemDB]:
- """Holt einen Alert nach URL-Hash."""
- return self.db.query(AlertItemDB).filter(
- AlertItemDB.url_hash == url_hash
- ).first()
-
- def get_inbox(
- self,
- user_id: str = None,
- topic_id: str = None,
- decision: str = None,
- status: str = None,
- limit: int = 50,
- offset: int = 0,
- ) -> List[AlertItemDB]:
- """
- Holt Inbox-Items mit Filtern.
-
- Ohne decision werden KEEP und REVIEW angezeigt.
- """
- query = self.db.query(AlertItemDB)
-
- if topic_id:
- query = query.filter(AlertItemDB.topic_id == topic_id)
-
- if decision:
- query = query.filter(
- AlertItemDB.relevance_decision == RelevanceDecisionEnum(decision)
- )
- else:
- # Default: KEEP und REVIEW
- query = query.filter(
- or_(
- AlertItemDB.relevance_decision == RelevanceDecisionEnum.KEEP,
- AlertItemDB.relevance_decision == RelevanceDecisionEnum.REVIEW,
- AlertItemDB.relevance_decision.is_(None)
- )
- )
-
- if status:
- query = query.filter(AlertItemDB.status == AlertStatusEnum(status))
-
- return query.order_by(
- AlertItemDB.relevance_score.desc().nullslast(),
- AlertItemDB.fetched_at.desc()
- ).offset(offset).limit(limit).all()
-
- def get_unscored(
- self,
- topic_id: str = None,
- limit: int = 100,
- ) -> List[AlertItemDB]:
- """Holt alle unbewerteten Alerts."""
- query = self.db.query(AlertItemDB).filter(
- AlertItemDB.status == AlertStatusEnum.NEW
- )
-
- if topic_id:
- query = query.filter(AlertItemDB.topic_id == topic_id)
-
- return query.order_by(AlertItemDB.fetched_at.desc()).limit(limit).all()
-
- def get_by_topic(
- self,
- topic_id: str,
- limit: int = 100,
- offset: int = 0,
- ) -> List[AlertItemDB]:
- """Holt alle Alerts eines Topics."""
- return self.db.query(AlertItemDB).filter(
- AlertItemDB.topic_id == topic_id
- ).order_by(
- AlertItemDB.fetched_at.desc()
- ).offset(offset).limit(limit).all()
-
- def count_by_status(self, topic_id: str = None) -> Dict[str, int]:
- """Zählt Alerts nach Status."""
- query = self.db.query(
- AlertItemDB.status,
- func.count(AlertItemDB.id).label('count')
- )
-
- if topic_id:
- query = query.filter(AlertItemDB.topic_id == topic_id)
-
- results = query.group_by(AlertItemDB.status).all()
-
- return {r[0].value: r[1] for r in results}
-
- def count_by_decision(self, topic_id: str = None) -> Dict[str, int]:
- """Zählt Alerts nach Relevanz-Entscheidung."""
- query = self.db.query(
- AlertItemDB.relevance_decision,
- func.count(AlertItemDB.id).label('count')
- )
-
- if topic_id:
- query = query.filter(AlertItemDB.topic_id == topic_id)
-
- results = query.group_by(AlertItemDB.relevance_decision).all()
-
- return {
- (r[0].value if r[0] else "unscored"): r[1]
- for r in results
- }
-
- # ==================== UPDATE ====================
-
- def update_scoring(
- self,
- alert_id: str,
- score: float,
- decision: str,
- reasons: List[str] = None,
- summary: str = None,
- model: str = None,
- ) -> Optional[AlertItemDB]:
- """Aktualisiert das Scoring eines Alerts."""
- alert = self.get_by_id(alert_id)
- if not alert:
- return None
-
- alert.relevance_score = score
- alert.relevance_decision = RelevanceDecisionEnum(decision)
- alert.relevance_reasons = reasons or []
- alert.relevance_summary = summary
- alert.scored_by_model = model
- alert.scored_at = datetime.utcnow()
- alert.status = AlertStatusEnum.SCORED
- alert.processed_at = datetime.utcnow()
-
- self.db.commit()
- self.db.refresh(alert)
- return alert
-
- def update_status(
- self,
- alert_id: str,
- status: str,
- ) -> Optional[AlertItemDB]:
- """Aktualisiert den Status eines Alerts."""
- alert = self.get_by_id(alert_id)
- if not alert:
- return None
-
- alert.status = AlertStatusEnum(status)
-
- self.db.commit()
- self.db.refresh(alert)
- return alert
-
- def mark_reviewed(
- self,
- alert_id: str,
- is_relevant: bool,
- notes: str = None,
- tags: List[str] = None,
- ) -> Optional[AlertItemDB]:
- """Markiert einen Alert als reviewed mit Feedback."""
- alert = self.get_by_id(alert_id)
- if not alert:
- return None
-
- alert.status = AlertStatusEnum.REVIEWED
- alert.user_marked_relevant = is_relevant
- if notes:
- alert.user_notes = notes
- if tags:
- alert.user_tags = tags
-
- self.db.commit()
- self.db.refresh(alert)
- return alert
-
- def archive(self, alert_id: str) -> Optional[AlertItemDB]:
- """Archiviert einen Alert."""
- return self.update_status(alert_id, "archived")
-
- # ==================== DELETE ====================
-
- def delete(self, alert_id: str) -> bool:
- """Löscht einen Alert."""
- alert = self.get_by_id(alert_id)
- if not alert:
- return False
-
- self.db.delete(alert)
- self.db.commit()
- return True
-
- def delete_old(self, days: int = 90, topic_id: str = None) -> int:
- """Löscht alte archivierte Alerts."""
- from datetime import timedelta
- cutoff = datetime.utcnow() - timedelta(days=days)
-
- query = self.db.query(AlertItemDB).filter(
- AlertItemDB.status == AlertStatusEnum.ARCHIVED,
- AlertItemDB.fetched_at < cutoff,
- )
-
- if topic_id:
- query = query.filter(AlertItemDB.topic_id == topic_id)
-
- count = query.delete()
- self.db.commit()
- return count
-
- # ==================== FOR RSS FETCHER ====================
-
- def get_existing_urls(self, topic_id: str) -> set:
- """
- Holt alle bekannten URL-Hashes für ein Topic.
-
- Wird vom RSS-Fetcher verwendet um Duplikate zu vermeiden.
- """
- results = self.db.query(AlertItemDB.url_hash).filter(
- AlertItemDB.topic_id == topic_id
- ).all()
-
- return {r[0] for r in results if r[0]}
-
- def create_from_alert_item(self, alert_item, topic_id: str) -> AlertItemDB:
- """
- Erstellt einen Alert aus einem AlertItem-Objekt vom RSS-Fetcher.
-
- Args:
- alert_item: AlertItem from rss_fetcher
- topic_id: Topic ID to associate with
-
- Returns:
- Created AlertItemDB instance
- """
- return self.create(
- topic_id=topic_id,
- title=alert_item.title,
- url=alert_item.url,
- snippet=alert_item.snippet or "",
- source=alert_item.source.value if hasattr(alert_item.source, 'value') else str(alert_item.source),
- published_at=alert_item.published_at,
- )
-
- # ==================== HELPER ====================
-
- def _compute_url_hash(self, url: str) -> str:
- """Berechnet SHA256 Hash der normalisierten URL."""
- normalized = self._normalize_url(url)
- return hashlib.sha256(normalized.encode()).hexdigest()[:16]
-
- def _normalize_url(self, url: str) -> str:
- """Normalisiert URL für Deduplizierung."""
- import urllib.parse
- parsed = urllib.parse.urlparse(url)
-
- # Tracking-Parameter entfernen
- tracking_params = {
- "utm_source", "utm_medium", "utm_campaign", "utm_content", "utm_term",
- "fbclid", "gclid", "ref", "source"
- }
-
- query_params = urllib.parse.parse_qs(parsed.query)
- cleaned_params = {k: v for k, v in query_params.items()
- if k.lower() not in tracking_params}
-
- cleaned_query = urllib.parse.urlencode(cleaned_params, doseq=True)
-
- # Rekonstruiere URL ohne Fragment
- normalized = urllib.parse.urlunparse((
- parsed.scheme,
- parsed.netloc.lower(),
- parsed.path.rstrip("/"),
- parsed.params,
- cleaned_query,
- "" # No fragment
- ))
-
- return normalized
-
- # ==================== CONVERSION ====================
-
- def to_dict(self, alert: AlertItemDB) -> Dict[str, Any]:
- """Konvertiert DB-Model zu Dictionary."""
- return {
- "id": alert.id,
- "topic_id": alert.topic_id,
- "title": alert.title,
- "url": alert.url,
- "snippet": alert.snippet,
- "source": alert.source.value,
- "lang": alert.lang,
- "published_at": alert.published_at.isoformat() if alert.published_at else None,
- "fetched_at": alert.fetched_at.isoformat() if alert.fetched_at else None,
- "status": alert.status.value,
- "relevance": {
- "score": alert.relevance_score,
- "decision": alert.relevance_decision.value if alert.relevance_decision else None,
- "reasons": alert.relevance_reasons,
- "summary": alert.relevance_summary,
- "model": alert.scored_by_model,
- "scored_at": alert.scored_at.isoformat() if alert.scored_at else None,
- },
- "user_feedback": {
- "marked_relevant": alert.user_marked_relevant,
- "tags": alert.user_tags,
- "notes": alert.user_notes,
- },
- }
-
-
-# =============================================================================
-# ALERT RULE REPOSITORY
-# =============================================================================
-
-class RuleRepository:
- """Repository für Alert Rules (Filterregeln)."""
-
- def __init__(self, db: DBSession):
- self.db = db
-
- # ==================== CREATE ====================
-
- def create(
- self,
- name: str,
- conditions: List[Dict],
- action_type: str = "keep",
- action_config: Dict = None,
- topic_id: str = None,
- user_id: str = None,
- description: str = "",
- priority: int = 0,
- ) -> AlertRuleDB:
- """Erstellt eine neue Regel."""
- import uuid
- rule = AlertRuleDB(
- id=str(uuid.uuid4()),
- topic_id=topic_id,
- user_id=user_id,
- name=name,
- description=description,
- conditions=conditions,
- action_type=RuleActionEnum(action_type),
- action_config=action_config or {},
- priority=priority,
- )
- self.db.add(rule)
- self.db.commit()
- self.db.refresh(rule)
- return rule
-
- # ==================== READ ====================
-
- def get_by_id(self, rule_id: str) -> Optional[AlertRuleDB]:
- """Holt eine Regel nach ID."""
- return self.db.query(AlertRuleDB).filter(
- AlertRuleDB.id == rule_id
- ).first()
-
- def get_active(
- self,
- topic_id: str = None,
- user_id: str = None,
- ) -> List[AlertRuleDB]:
- """Holt alle aktiven Regeln, sortiert nach Priorität."""
- query = self.db.query(AlertRuleDB).filter(
- AlertRuleDB.is_active == True
- )
-
- if topic_id:
- # Topic-spezifische und globale Regeln
- query = query.filter(
- or_(
- AlertRuleDB.topic_id == topic_id,
- AlertRuleDB.topic_id.is_(None)
- )
- )
-
- if user_id:
- query = query.filter(
- or_(
- AlertRuleDB.user_id == user_id,
- AlertRuleDB.user_id.is_(None)
- )
- )
-
- return query.order_by(AlertRuleDB.priority.desc()).all()
-
- def get_all(
- self,
- user_id: str = None,
- topic_id: str = None,
- is_active: bool = None,
- ) -> List[AlertRuleDB]:
- """Holt alle Regeln mit optionalen Filtern."""
- query = self.db.query(AlertRuleDB)
-
- if user_id:
- query = query.filter(AlertRuleDB.user_id == user_id)
- if topic_id:
- query = query.filter(AlertRuleDB.topic_id == topic_id)
- if is_active is not None:
- query = query.filter(AlertRuleDB.is_active == is_active)
-
- return query.order_by(AlertRuleDB.priority.desc()).all()
-
- # ==================== UPDATE ====================
-
- def update(
- self,
- rule_id: str,
- name: str = None,
- description: str = None,
- conditions: List[Dict] = None,
- action_type: str = None,
- action_config: Dict = None,
- priority: int = None,
- is_active: bool = None,
- ) -> Optional[AlertRuleDB]:
- """Aktualisiert eine Regel."""
- rule = self.get_by_id(rule_id)
- if not rule:
- return None
-
- if name is not None:
- rule.name = name
- if description is not None:
- rule.description = description
- if conditions is not None:
- rule.conditions = conditions
- if action_type is not None:
- rule.action_type = RuleActionEnum(action_type)
- if action_config is not None:
- rule.action_config = action_config
- if priority is not None:
- rule.priority = priority
- if is_active is not None:
- rule.is_active = is_active
-
- self.db.commit()
- self.db.refresh(rule)
- return rule
-
- def increment_match_count(self, rule_id: str) -> Optional[AlertRuleDB]:
- """Erhöht den Match-Counter einer Regel."""
- rule = self.get_by_id(rule_id)
- if not rule:
- return None
-
- rule.match_count += 1
- rule.last_matched_at = datetime.utcnow()
-
- self.db.commit()
- self.db.refresh(rule)
- return rule
-
- # ==================== DELETE ====================
-
- def delete(self, rule_id: str) -> bool:
- """Löscht eine Regel."""
- rule = self.get_by_id(rule_id)
- if not rule:
- return False
-
- self.db.delete(rule)
- self.db.commit()
- return True
-
- # ==================== CONVERSION ====================
-
- def to_dict(self, rule: AlertRuleDB) -> Dict[str, Any]:
- """Konvertiert DB-Model zu Dictionary."""
- return {
- "id": rule.id,
- "topic_id": rule.topic_id,
- "user_id": rule.user_id,
- "name": rule.name,
- "description": rule.description,
- "conditions": rule.conditions,
- "action_type": rule.action_type.value,
- "action_config": rule.action_config,
- "priority": rule.priority,
- "is_active": rule.is_active,
- "stats": {
- "match_count": rule.match_count,
- "last_matched_at": rule.last_matched_at.isoformat() if rule.last_matched_at else None,
- },
- "created_at": rule.created_at.isoformat() if rule.created_at else None,
- "updated_at": rule.updated_at.isoformat() if rule.updated_at else None,
- }
-
-
-# =============================================================================
-# ALERT PROFILE REPOSITORY
-# =============================================================================
-
-class ProfileRepository:
- """Repository für Alert Profiles (Nutzer-Profile für Relevanz-Scoring)."""
-
- def __init__(self, db: DBSession):
- self.db = db
-
- # ==================== CREATE / GET-OR-CREATE ====================
-
- def get_or_create(self, user_id: str = None) -> AlertProfileDB:
- """Holt oder erstellt ein Profil."""
- profile = self.get_by_user_id(user_id)
- if profile:
- return profile
-
- # Neues Profil erstellen
- import uuid
- profile = AlertProfileDB(
- id=str(uuid.uuid4()),
- user_id=user_id,
- name="Default" if not user_id else f"Profile {user_id[:8]}",
- )
- self.db.add(profile)
- self.db.commit()
- self.db.refresh(profile)
- return profile
-
- def create_default_education_profile(self, user_id: str = None) -> AlertProfileDB:
- """Erstellt ein Standard-Profil für Bildungsthemen."""
- import uuid
- profile = AlertProfileDB(
- id=str(uuid.uuid4()),
- user_id=user_id,
- name="Bildung Default",
- priorities=[
- {
- "label": "Inklusion",
- "weight": 0.9,
- "keywords": ["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
- "description": "Inklusive Bildung, Förderschulen, Nachteilsausgleich"
- },
- {
- "label": "Datenschutz Schule",
- "weight": 0.85,
- "keywords": ["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
- "description": "DSGVO in Schulen, Datenschutz bei Klassenfotos"
- },
- {
- "label": "Schulrecht Bayern",
- "weight": 0.8,
- "keywords": ["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
- "description": "Bayerisches Schulrecht, Verordnungen"
- },
- {
- "label": "Digitalisierung Schule",
- "weight": 0.7,
- "keywords": ["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
- "description": "Digitale Medien im Unterricht"
- },
- ],
- exclusions=["Stellenanzeige", "Praktikum gesucht", "Werbung", "Pressemitteilung"],
- policies={
- "prefer_german_sources": True,
- "max_age_days": 30,
- "min_content_length": 100,
- }
- )
- self.db.add(profile)
- self.db.commit()
- self.db.refresh(profile)
- return profile
-
- # ==================== READ ====================
-
- def get_by_id(self, profile_id: str) -> Optional[AlertProfileDB]:
- """Holt ein Profil nach ID."""
- return self.db.query(AlertProfileDB).filter(
- AlertProfileDB.id == profile_id
- ).first()
-
- def get_by_user_id(self, user_id: str) -> Optional[AlertProfileDB]:
- """Holt ein Profil nach User-ID."""
- if not user_id:
- # Default-Profil ohne User
- return self.db.query(AlertProfileDB).filter(
- AlertProfileDB.user_id.is_(None)
- ).first()
-
- return self.db.query(AlertProfileDB).filter(
- AlertProfileDB.user_id == user_id
- ).first()
-
- # ==================== UPDATE ====================
-
- def update_priorities(
- self,
- profile_id: str,
- priorities: List[Dict],
- ) -> Optional[AlertProfileDB]:
- """Aktualisiert die Prioritäten eines Profils."""
- profile = self.get_by_id(profile_id)
- if not profile:
- return None
-
- profile.priorities = priorities
- self.db.commit()
- self.db.refresh(profile)
- return profile
-
- def update_exclusions(
- self,
- profile_id: str,
- exclusions: List[str],
- ) -> Optional[AlertProfileDB]:
- """Aktualisiert die Ausschlüsse eines Profils."""
- profile = self.get_by_id(profile_id)
- if not profile:
- return None
-
- profile.exclusions = exclusions
- self.db.commit()
- self.db.refresh(profile)
- return profile
-
- def add_feedback(
- self,
- profile_id: str,
- title: str,
- url: str,
- is_relevant: bool,
- reason: str = "",
- ) -> Optional[AlertProfileDB]:
- """Fügt Feedback als Beispiel hinzu."""
- profile = self.get_by_id(profile_id)
- if not profile:
- return None
-
- example = {
- "title": title,
- "url": url,
- "reason": reason,
- "added_at": datetime.utcnow().isoformat(),
- }
-
- if is_relevant:
- examples = list(profile.positive_examples or [])
- examples.append(example)
- profile.positive_examples = examples[-20:] # Max 20
- profile.total_kept += 1
- flag_modified(profile, "positive_examples")
- else:
- examples = list(profile.negative_examples or [])
- examples.append(example)
- profile.negative_examples = examples[-20:] # Max 20
- profile.total_dropped += 1
- flag_modified(profile, "negative_examples")
-
- profile.total_scored += 1
- self.db.commit()
- self.db.refresh(profile)
- return profile
-
- def update_stats(
- self,
- profile_id: str,
- kept: int = 0,
- dropped: int = 0,
- ) -> Optional[AlertProfileDB]:
- """Aktualisiert die Statistiken eines Profils."""
- profile = self.get_by_id(profile_id)
- if not profile:
- return None
-
- profile.total_scored += kept + dropped
- profile.total_kept += kept
- profile.total_dropped += dropped
-
- self.db.commit()
- self.db.refresh(profile)
- return profile
-
- # ==================== DELETE ====================
-
- def delete(self, profile_id: str) -> bool:
- """Löscht ein Profil."""
- profile = self.get_by_id(profile_id)
- if not profile:
- return False
-
- self.db.delete(profile)
- self.db.commit()
- return True
-
- # ==================== CONVERSION ====================
-
- def to_dict(self, profile: AlertProfileDB) -> Dict[str, Any]:
- """Konvertiert DB-Model zu Dictionary."""
- return {
- "id": profile.id,
- "user_id": profile.user_id,
- "name": profile.name,
- "priorities": profile.priorities,
- "exclusions": profile.exclusions,
- "policies": profile.policies,
- "examples": {
- "positive": len(profile.positive_examples or []),
- "negative": len(profile.negative_examples or []),
- },
- "stats": {
- "total_scored": profile.total_scored,
- "total_kept": profile.total_kept,
- "total_dropped": profile.total_dropped,
- "accuracy_estimate": profile.accuracy_estimate,
- },
- "created_at": profile.created_at.isoformat() if profile.created_at else None,
- "updated_at": profile.updated_at.isoformat() if profile.updated_at else None,
- }
+from .topic_repository import TopicRepository
+from .item_repository import AlertItemRepository
+from .rule_repository import RuleRepository
+from .profile_repository import ProfileRepository
+
+__all__ = [
+ "TopicRepository",
+ "AlertItemRepository",
+ "RuleRepository",
+ "ProfileRepository",
+]
diff --git a/backend-lehrer/alerts_agent/db/rule_repository.py b/backend-lehrer/alerts_agent/db/rule_repository.py
new file mode 100644
index 0000000..f969ee1
--- /dev/null
+++ b/backend-lehrer/alerts_agent/db/rule_repository.py
@@ -0,0 +1,187 @@
+"""
+Repository für Alert Rules (Filterregeln).
+"""
+import uuid
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+from sqlalchemy.orm import Session as DBSession
+from sqlalchemy import or_
+
+from .models import AlertRuleDB, RuleActionEnum
+
+
+class RuleRepository:
+ """Repository für Alert Rules (Filterregeln)."""
+
+ def __init__(self, db: DBSession):
+ self.db = db
+
+ # ==================== CREATE ====================
+
+ def create(
+ self,
+ name: str,
+ conditions: List[Dict],
+ action_type: str = "keep",
+ action_config: Dict = None,
+ topic_id: str = None,
+ user_id: str = None,
+ description: str = "",
+ priority: int = 0,
+ ) -> AlertRuleDB:
+ """Erstellt eine neue Regel."""
+ rule = AlertRuleDB(
+ id=str(uuid.uuid4()),
+ topic_id=topic_id,
+ user_id=user_id,
+ name=name,
+ description=description,
+ conditions=conditions,
+ action_type=RuleActionEnum(action_type),
+ action_config=action_config or {},
+ priority=priority,
+ )
+ self.db.add(rule)
+ self.db.commit()
+ self.db.refresh(rule)
+ return rule
+
+ # ==================== READ ====================
+
+ def get_by_id(self, rule_id: str) -> Optional[AlertRuleDB]:
+ """Holt eine Regel nach ID."""
+ return self.db.query(AlertRuleDB).filter(
+ AlertRuleDB.id == rule_id
+ ).first()
+
+ def get_active(
+ self,
+ topic_id: str = None,
+ user_id: str = None,
+ ) -> List[AlertRuleDB]:
+ """Holt alle aktiven Regeln, sortiert nach Priorität."""
+ query = self.db.query(AlertRuleDB).filter(
+ AlertRuleDB.is_active == True
+ )
+
+ if topic_id:
+ # Topic-spezifische und globale Regeln
+ query = query.filter(
+ or_(
+ AlertRuleDB.topic_id == topic_id,
+ AlertRuleDB.topic_id.is_(None)
+ )
+ )
+
+ if user_id:
+ query = query.filter(
+ or_(
+ AlertRuleDB.user_id == user_id,
+ AlertRuleDB.user_id.is_(None)
+ )
+ )
+
+ return query.order_by(AlertRuleDB.priority.desc()).all()
+
+ def get_all(
+ self,
+ user_id: str = None,
+ topic_id: str = None,
+ is_active: bool = None,
+ ) -> List[AlertRuleDB]:
+ """Holt alle Regeln mit optionalen Filtern."""
+ query = self.db.query(AlertRuleDB)
+
+ if user_id:
+ query = query.filter(AlertRuleDB.user_id == user_id)
+ if topic_id:
+ query = query.filter(AlertRuleDB.topic_id == topic_id)
+ if is_active is not None:
+ query = query.filter(AlertRuleDB.is_active == is_active)
+
+ return query.order_by(AlertRuleDB.priority.desc()).all()
+
+ # ==================== UPDATE ====================
+
+ def update(
+ self,
+ rule_id: str,
+ name: str = None,
+ description: str = None,
+ conditions: List[Dict] = None,
+ action_type: str = None,
+ action_config: Dict = None,
+ priority: int = None,
+ is_active: bool = None,
+ ) -> Optional[AlertRuleDB]:
+ """Aktualisiert eine Regel."""
+ rule = self.get_by_id(rule_id)
+ if not rule:
+ return None
+
+ if name is not None:
+ rule.name = name
+ if description is not None:
+ rule.description = description
+ if conditions is not None:
+ rule.conditions = conditions
+ if action_type is not None:
+ rule.action_type = RuleActionEnum(action_type)
+ if action_config is not None:
+ rule.action_config = action_config
+ if priority is not None:
+ rule.priority = priority
+ if is_active is not None:
+ rule.is_active = is_active
+
+ self.db.commit()
+ self.db.refresh(rule)
+ return rule
+
+ def increment_match_count(self, rule_id: str) -> Optional[AlertRuleDB]:
+ """Erhöht den Match-Counter einer Regel."""
+ rule = self.get_by_id(rule_id)
+ if not rule:
+ return None
+
+ rule.match_count += 1
+ rule.last_matched_at = datetime.utcnow()
+
+ self.db.commit()
+ self.db.refresh(rule)
+ return rule
+
+ # ==================== DELETE ====================
+
+ def delete(self, rule_id: str) -> bool:
+ """Löscht eine Regel."""
+ rule = self.get_by_id(rule_id)
+ if not rule:
+ return False
+
+ self.db.delete(rule)
+ self.db.commit()
+ return True
+
+ # ==================== CONVERSION ====================
+
+ def to_dict(self, rule: AlertRuleDB) -> Dict[str, Any]:
+ """Konvertiert DB-Model zu Dictionary."""
+ return {
+ "id": rule.id,
+ "topic_id": rule.topic_id,
+ "user_id": rule.user_id,
+ "name": rule.name,
+ "description": rule.description,
+ "conditions": rule.conditions,
+ "action_type": rule.action_type.value,
+ "action_config": rule.action_config,
+ "priority": rule.priority,
+ "is_active": rule.is_active,
+ "stats": {
+ "match_count": rule.match_count,
+ "last_matched_at": rule.last_matched_at.isoformat() if rule.last_matched_at else None,
+ },
+ "created_at": rule.created_at.isoformat() if rule.created_at else None,
+ "updated_at": rule.updated_at.isoformat() if rule.updated_at else None,
+ }
diff --git a/backend-lehrer/alerts_agent/db/topic_repository.py b/backend-lehrer/alerts_agent/db/topic_repository.py
new file mode 100644
index 0000000..77c5bab
--- /dev/null
+++ b/backend-lehrer/alerts_agent/db/topic_repository.py
@@ -0,0 +1,185 @@
+"""
+Repository für Alert Topics (Feed-Quellen).
+"""
+import uuid
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+from sqlalchemy.orm import Session as DBSession
+
+from .models import AlertTopicDB, FeedTypeEnum
+
+
+class TopicRepository:
+ """Repository für Alert Topics (Feed-Quellen)."""
+
+ def __init__(self, db: DBSession):
+ self.db = db
+
+ # ==================== CREATE ====================
+
+ def create(
+ self,
+ name: str,
+ feed_url: str = None,
+ feed_type: str = "rss",
+ user_id: str = None,
+ description: str = "",
+ fetch_interval_minutes: int = 60,
+ is_active: bool = True,
+ ) -> AlertTopicDB:
+ """Erstellt ein neues Topic."""
+ topic = AlertTopicDB(
+ id=str(uuid.uuid4()),
+ user_id=user_id,
+ name=name,
+ description=description,
+ feed_url=feed_url,
+ feed_type=FeedTypeEnum(feed_type),
+ fetch_interval_minutes=fetch_interval_minutes,
+ is_active=is_active,
+ )
+ self.db.add(topic)
+ self.db.commit()
+ self.db.refresh(topic)
+ return topic
+
+ # ==================== READ ====================
+
+ def get_by_id(self, topic_id: str) -> Optional[AlertTopicDB]:
+ """Holt ein Topic nach ID."""
+ return self.db.query(AlertTopicDB).filter(
+ AlertTopicDB.id == topic_id
+ ).first()
+
+ def get_all(
+ self,
+ user_id: str = None,
+ is_active: bool = None,
+ limit: int = 100,
+ offset: int = 0,
+ ) -> List[AlertTopicDB]:
+ """Holt alle Topics mit optionalen Filtern."""
+ query = self.db.query(AlertTopicDB)
+
+ if user_id:
+ query = query.filter(AlertTopicDB.user_id == user_id)
+ if is_active is not None:
+ query = query.filter(AlertTopicDB.is_active == is_active)
+
+ return query.order_by(
+ AlertTopicDB.created_at.desc()
+ ).offset(offset).limit(limit).all()
+
+ def get_active_for_fetch(self) -> List[AlertTopicDB]:
+ """Holt alle aktiven Topics die gefetcht werden sollten."""
+ return self.db.query(AlertTopicDB).filter(
+ AlertTopicDB.is_active == True,
+ AlertTopicDB.feed_url.isnot(None),
+ ).all()
+
+ # ==================== UPDATE ====================
+
+ def update(
+ self,
+ topic_id: str,
+ name: str = None,
+ description: str = None,
+ feed_url: str = None,
+ feed_type: str = None,
+ is_active: bool = None,
+ fetch_interval_minutes: int = None,
+ ) -> Optional[AlertTopicDB]:
+ """Aktualisiert ein Topic."""
+ topic = self.get_by_id(topic_id)
+ if not topic:
+ return None
+
+ if name is not None:
+ topic.name = name
+ if description is not None:
+ topic.description = description
+ if feed_url is not None:
+ topic.feed_url = feed_url
+ if feed_type is not None:
+ topic.feed_type = FeedTypeEnum(feed_type)
+ if is_active is not None:
+ topic.is_active = is_active
+ if fetch_interval_minutes is not None:
+ topic.fetch_interval_minutes = fetch_interval_minutes
+
+ self.db.commit()
+ self.db.refresh(topic)
+ return topic
+
+ def update_fetch_status(
+ self,
+ topic_id: str,
+ last_fetch_error: str = None,
+ items_fetched: int = 0,
+ ) -> Optional[AlertTopicDB]:
+ """Aktualisiert den Fetch-Status eines Topics."""
+ topic = self.get_by_id(topic_id)
+ if not topic:
+ return None
+
+ topic.last_fetched_at = datetime.utcnow()
+ topic.last_fetch_error = last_fetch_error
+ topic.total_items_fetched += items_fetched
+
+ self.db.commit()
+ self.db.refresh(topic)
+ return topic
+
+ def increment_stats(
+ self,
+ topic_id: str,
+ kept: int = 0,
+ dropped: int = 0,
+ ) -> Optional[AlertTopicDB]:
+ """Erhöht die Statistiken eines Topics."""
+ topic = self.get_by_id(topic_id)
+ if not topic:
+ return None
+
+ topic.items_kept += kept
+ topic.items_dropped += dropped
+
+ self.db.commit()
+ self.db.refresh(topic)
+ return topic
+
+ # ==================== DELETE ====================
+
+ def delete(self, topic_id: str) -> bool:
+ """Löscht ein Topic (und alle zugehörigen Items via CASCADE)."""
+ topic = self.get_by_id(topic_id)
+ if not topic:
+ return False
+
+ self.db.delete(topic)
+ self.db.commit()
+ return True
+
+ # ==================== CONVERSION ====================
+
+ def to_dict(self, topic: AlertTopicDB) -> Dict[str, Any]:
+ """Konvertiert DB-Model zu Dictionary."""
+ return {
+ "id": topic.id,
+ "user_id": topic.user_id,
+ "name": topic.name,
+ "description": topic.description,
+ "feed_url": topic.feed_url,
+ "feed_type": topic.feed_type.value,
+ "is_active": topic.is_active,
+ "fetch_interval_minutes": topic.fetch_interval_minutes,
+ "last_fetched_at": topic.last_fetched_at.isoformat() if topic.last_fetched_at else None,
+ "last_fetch_error": topic.last_fetch_error,
+ "stats": {
+ "total_items_fetched": topic.total_items_fetched,
+ "items_kept": topic.items_kept,
+ "items_dropped": topic.items_dropped,
+ },
+ "created_at": topic.created_at.isoformat() if topic.created_at else None,
+ "updated_at": topic.updated_at.isoformat() if topic.updated_at else None,
+ }
diff --git a/backend-lehrer/services/pdf_models.py b/backend-lehrer/services/pdf_models.py
new file mode 100644
index 0000000..6964d03
--- /dev/null
+++ b/backend-lehrer/services/pdf_models.py
@@ -0,0 +1,84 @@
+"""
+PDF Service - Data Models and Shared Types.
+
+Dataclasses for letters, certificates, and corrections.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, Optional, List
+
+
+@dataclass
+class SchoolInfo:
+ """Schulinformationen für Header."""
+ name: str
+ address: str
+ phone: str
+ email: str
+ logo_path: Optional[str] = None
+ website: Optional[str] = None
+ principal: Optional[str] = None
+
+
+@dataclass
+class LetterData:
+ """Daten für Elternbrief-PDF."""
+ recipient_name: str
+ recipient_address: str
+ student_name: str
+ student_class: str
+ subject: str
+ content: str
+ date: str
+ teacher_name: str
+ teacher_title: Optional[str] = None
+ school_info: Optional[SchoolInfo] = None
+ letter_type: str = "general" # general, halbjahr, fehlzeiten, elternabend, lob
+ tone: str = "professional"
+ legal_references: Optional[List[Dict[str, str]]] = None
+ gfk_principles_applied: Optional[List[str]] = None
+
+
+@dataclass
+class CertificateData:
+ """Daten für Zeugnis-PDF."""
+ student_name: str
+ student_birthdate: str
+ student_class: str
+ school_year: str
+ certificate_type: str # halbjahr, jahres, abschluss
+ subjects: List[Dict[str, Any]] # [{name, grade, note}]
+ attendance: Dict[str, int] # {days_absent, days_excused, days_unexcused}
+ remarks: Optional[str] = None
+ class_teacher: str = ""
+ principal: str = ""
+ school_info: Optional[SchoolInfo] = None
+ issue_date: str = ""
+ social_behavior: Optional[str] = None # A, B, C, D
+ work_behavior: Optional[str] = None # A, B, C, D
+
+
+@dataclass
+class StudentInfo:
+ """Schülerinformationen für Korrektur-PDFs."""
+ student_id: str
+ name: str
+ class_name: str
+
+
+@dataclass
+class CorrectionData:
+ """Daten für Korrektur-Übersicht PDF."""
+ student: StudentInfo
+ exam_title: str
+ subject: str
+ date: str
+ max_points: int
+ achieved_points: int
+ grade: str
+ percentage: float
+ corrections: List[Dict[str, Any]] # [{question, answer, points, feedback}]
+ teacher_notes: str = ""
+ ai_feedback: str = ""
+ grade_distribution: Optional[Dict[str, int]] = None # {note: anzahl}
+ class_average: Optional[float] = None
diff --git a/backend-lehrer/services/pdf_service.py b/backend-lehrer/services/pdf_service.py
index 9559964..356b5b4 100644
--- a/backend-lehrer/services/pdf_service.py
+++ b/backend-lehrer/services/pdf_service.py
@@ -7,101 +7,37 @@ Shared Service für:
- Correction (Korrektur-Übersichten)
Verwendet WeasyPrint für PDF-Rendering und Jinja2 für Templates.
+
+Split structure:
+- pdf_models.py: Data classes (SchoolInfo, LetterData, CertificateData, etc.)
+- pdf_templates.py: Inline HTML templates (letter, certificate, correction)
+- pdf_service.py: Core PDFService class + convenience functions (this file)
"""
import logging
-import os
from datetime import datetime
from pathlib import Path
-from typing import Any, Dict, Optional, List
-from dataclasses import dataclass
+from typing import Any, Dict, Optional
from jinja2 import Environment, FileSystemLoader, select_autoescape
from weasyprint import HTML, CSS
from weasyprint.text.fonts import FontConfiguration
+from .pdf_models import (
+ SchoolInfo, LetterData, CertificateData, StudentInfo, CorrectionData,
+)
+from .pdf_templates import (
+ get_letter_template_html,
+ get_certificate_template_html,
+ get_correction_template_html,
+)
+
logger = logging.getLogger(__name__)
# Template directory
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pdf"
-@dataclass
-class SchoolInfo:
- """Schulinformationen für Header."""
- name: str
- address: str
- phone: str
- email: str
- logo_path: Optional[str] = None
- website: Optional[str] = None
- principal: Optional[str] = None
-
-
-@dataclass
-class LetterData:
- """Daten für Elternbrief-PDF."""
- recipient_name: str
- recipient_address: str
- student_name: str
- student_class: str
- subject: str
- content: str
- date: str
- teacher_name: str
- teacher_title: Optional[str] = None
- school_info: Optional[SchoolInfo] = None
- letter_type: str = "general" # general, halbjahr, fehlzeiten, elternabend, lob
- tone: str = "professional"
- legal_references: Optional[List[Dict[str, str]]] = None
- gfk_principles_applied: Optional[List[str]] = None
-
-
-@dataclass
-class CertificateData:
- """Daten für Zeugnis-PDF."""
- student_name: str
- student_birthdate: str
- student_class: str
- school_year: str
- certificate_type: str # halbjahr, jahres, abschluss
- subjects: List[Dict[str, Any]] # [{name, grade, note}]
- attendance: Dict[str, int] # {days_absent, days_excused, days_unexcused}
- remarks: Optional[str] = None
- class_teacher: str = ""
- principal: str = ""
- school_info: Optional[SchoolInfo] = None
- issue_date: str = ""
- social_behavior: Optional[str] = None # A, B, C, D
- work_behavior: Optional[str] = None # A, B, C, D
-
-
-@dataclass
-class StudentInfo:
- """Schülerinformationen für Korrektur-PDFs."""
- student_id: str
- name: str
- class_name: str
-
-
-@dataclass
-class CorrectionData:
- """Daten für Korrektur-Übersicht PDF."""
- student: StudentInfo
- exam_title: str
- subject: str
- date: str
- max_points: int
- achieved_points: int
- grade: str
- percentage: float
- corrections: List[Dict[str, Any]] # [{question, answer, points, feedback}]
- teacher_notes: str = ""
- ai_feedback: str = ""
- grade_distribution: Optional[Dict[str, int]] = None # {note: anzahl}
- class_average: Optional[float] = None
-
-
class PDFService:
"""
Zentrale PDF-Generierung für BreakPilot.
@@ -113,18 +49,9 @@ class PDFService:
"""
def __init__(self, templates_dir: Optional[Path] = None):
- """
- Initialisiert den PDF-Service.
-
- Args:
- templates_dir: Optionaler Pfad zu Templates (Standard: backend/templates/pdf)
- """
self.templates_dir = templates_dir or TEMPLATES_DIR
-
- # Ensure templates directory exists
self.templates_dir.mkdir(parents=True, exist_ok=True)
- # Initialize Jinja2 environment
self.jinja_env = Environment(
loader=FileSystemLoader(str(self.templates_dir)),
autoescape=select_autoescape(['html', 'xml']),
@@ -132,13 +59,10 @@ class PDFService:
lstrip_blocks=True
)
- # Add custom filters
self.jinja_env.filters['date_format'] = self._date_format
self.jinja_env.filters['grade_color'] = self._grade_color
- # Font configuration for WeasyPrint
self.font_config = FontConfiguration()
-
logger.info(f"PDFService initialized with templates from {self.templates_dir}")
@staticmethod
@@ -156,16 +80,9 @@ class PDFService:
def _grade_color(grade: str) -> str:
"""Gibt Farbe basierend auf Note zurück."""
grade_colors = {
- "1": "#27ae60", # Grün
- "2": "#2ecc71", # Hellgrün
- "3": "#f1c40f", # Gelb
- "4": "#e67e22", # Orange
- "5": "#e74c3c", # Rot
- "6": "#c0392b", # Dunkelrot
- "A": "#27ae60",
- "B": "#2ecc71",
- "C": "#f1c40f",
- "D": "#e74c3c",
+ "1": "#27ae60", "2": "#2ecc71", "3": "#f1c40f",
+ "4": "#e67e22", "5": "#e74c3c", "6": "#c0392b",
+ "A": "#27ae60", "B": "#2ecc71", "C": "#f1c40f", "D": "#e74c3c",
}
return grade_colors.get(str(grade), "#333333")
@@ -181,291 +98,73 @@ class PDFService:
color: #666;
}
}
-
body {
font-family: 'DejaVu Sans', 'Liberation Sans', Arial, sans-serif;
- font-size: 11pt;
- line-height: 1.5;
- color: #333;
- }
-
- h1, h2, h3 {
- font-weight: bold;
- margin-top: 1em;
- margin-bottom: 0.5em;
- }
-
- h1 { font-size: 16pt; }
- h2 { font-size: 14pt; }
- h3 { font-size: 12pt; }
-
- .header {
- border-bottom: 2px solid #2c3e50;
- padding-bottom: 15px;
- margin-bottom: 20px;
- }
-
- .school-name {
- font-size: 18pt;
- font-weight: bold;
- color: #2c3e50;
- }
-
- .school-info {
- font-size: 9pt;
- color: #666;
- }
-
- .letter-date {
- text-align: right;
- margin-bottom: 20px;
- }
-
- .recipient {
- margin-bottom: 30px;
- }
-
- .subject {
- font-weight: bold;
- margin-bottom: 20px;
- }
-
- .content {
- text-align: justify;
- margin-bottom: 30px;
- }
-
- .signature {
- margin-top: 40px;
- }
-
- .legal-references {
- font-size: 9pt;
- color: #666;
- border-top: 1px solid #ddd;
- margin-top: 30px;
- padding-top: 10px;
- }
-
- .gfk-badge {
- display: inline-block;
- background: #e8f5e9;
- color: #27ae60;
- font-size: 8pt;
- padding: 2px 8px;
- border-radius: 10px;
- margin-right: 5px;
- }
-
- /* Zeugnis-Styles */
- .certificate-header {
- text-align: center;
- margin-bottom: 30px;
- }
-
- .certificate-title {
- font-size: 20pt;
- font-weight: bold;
- margin-bottom: 10px;
- }
-
- .student-info {
- margin-bottom: 20px;
- padding: 15px;
- background: #f9f9f9;
- border-radius: 5px;
- }
-
- .grades-table {
- width: 100%;
- border-collapse: collapse;
- margin-bottom: 20px;
- }
-
- .grades-table th,
- .grades-table td {
- border: 1px solid #ddd;
- padding: 8px 12px;
- text-align: left;
- }
-
- .grades-table th {
- background: #2c3e50;
- color: white;
- }
-
- .grades-table tr:nth-child(even) {
- background: #f9f9f9;
- }
-
- .grade-cell {
- text-align: center;
- font-weight: bold;
- font-size: 12pt;
- }
-
- .attendance-box {
- background: #fff3cd;
- padding: 15px;
- border-radius: 5px;
- margin-bottom: 20px;
- }
-
- .signatures-row {
- display: flex;
- justify-content: space-between;
- margin-top: 50px;
- }
-
- .signature-block {
- text-align: center;
- width: 40%;
- }
-
- .signature-line {
- border-top: 1px solid #333;
- margin-top: 40px;
- padding-top: 5px;
- }
-
- /* Korrektur-Styles */
- .exam-header {
- background: #2c3e50;
- color: white;
- padding: 15px;
- margin-bottom: 20px;
- }
-
- .result-box {
- background: #e8f5e9;
- padding: 20px;
- text-align: center;
- margin-bottom: 20px;
- border-radius: 5px;
- }
-
- .result-grade {
- font-size: 36pt;
- font-weight: bold;
- }
-
- .result-points {
- font-size: 14pt;
- color: #666;
- }
-
- .corrections-list {
- margin-bottom: 20px;
- }
-
- .correction-item {
- border: 1px solid #ddd;
- padding: 15px;
- margin-bottom: 10px;
- border-radius: 5px;
- }
-
- .correction-question {
- font-weight: bold;
- margin-bottom: 5px;
- }
-
- .correction-feedback {
- background: #fff8e1;
- padding: 10px;
- margin-top: 10px;
- border-left: 3px solid #ffc107;
- font-size: 10pt;
- }
-
- .stats-table {
- width: 100%;
- margin-top: 20px;
- }
-
- .stats-table td {
- padding: 5px 10px;
+ font-size: 11pt; line-height: 1.5; color: #333;
}
+ h1, h2, h3 { font-weight: bold; margin-top: 1em; margin-bottom: 0.5em; }
+ h1 { font-size: 16pt; } h2 { font-size: 14pt; } h3 { font-size: 12pt; }
+ .header { border-bottom: 2px solid #2c3e50; padding-bottom: 15px; margin-bottom: 20px; }
+ .school-name { font-size: 18pt; font-weight: bold; color: #2c3e50; }
+ .school-info { font-size: 9pt; color: #666; }
+ .letter-date { text-align: right; margin-bottom: 20px; }
+ .recipient { margin-bottom: 30px; }
+ .subject { font-weight: bold; margin-bottom: 20px; }
+ .content { text-align: justify; margin-bottom: 30px; }
+ .signature { margin-top: 40px; }
+ .legal-references { font-size: 9pt; color: #666; border-top: 1px solid #ddd; margin-top: 30px; padding-top: 10px; }
+ .gfk-badge { display: inline-block; background: #e8f5e9; color: #27ae60; font-size: 8pt; padding: 2px 8px; border-radius: 10px; margin-right: 5px; }
+ .certificate-header { text-align: center; margin-bottom: 30px; }
+ .certificate-title { font-size: 20pt; font-weight: bold; margin-bottom: 10px; }
+ .student-info { margin-bottom: 20px; padding: 15px; background: #f9f9f9; border-radius: 5px; }
+ .grades-table { width: 100%; border-collapse: collapse; margin-bottom: 20px; }
+ .grades-table th, .grades-table td { border: 1px solid #ddd; padding: 8px 12px; text-align: left; }
+ .grades-table th { background: #2c3e50; color: white; }
+ .grades-table tr:nth-child(even) { background: #f9f9f9; }
+ .grade-cell { text-align: center; font-weight: bold; font-size: 12pt; }
+ .attendance-box { background: #fff3cd; padding: 15px; border-radius: 5px; margin-bottom: 20px; }
+ .signatures-row { display: flex; justify-content: space-between; margin-top: 50px; }
+ .signature-block { text-align: center; width: 40%; }
+ .signature-line { border-top: 1px solid #333; margin-top: 40px; padding-top: 5px; }
+ .exam-header { background: #2c3e50; color: white; padding: 15px; margin-bottom: 20px; }
+ .result-box { background: #e8f5e9; padding: 20px; text-align: center; margin-bottom: 20px; border-radius: 5px; }
+ .result-grade { font-size: 36pt; font-weight: bold; }
+ .result-points { font-size: 14pt; color: #666; }
+ .corrections-list { margin-bottom: 20px; }
+ .correction-item { border: 1px solid #ddd; padding: 15px; margin-bottom: 10px; border-radius: 5px; }
+ .correction-question { font-weight: bold; margin-bottom: 5px; }
+ .correction-feedback { background: #fff8e1; padding: 10px; margin-top: 10px; border-left: 3px solid #ffc107; font-size: 10pt; }
+ .stats-table { width: 100%; margin-top: 20px; }
+ .stats-table td { padding: 5px 10px; }
"""
def generate_letter_pdf(self, data: LetterData) -> bytes:
- """
- Generiert PDF für Elternbrief.
-
- Args:
- data: LetterData mit allen Briefinformationen
-
- Returns:
- PDF als bytes
- """
+ """Generiert PDF für Elternbrief."""
logger.info(f"Generating letter PDF for student: {data.student_name}")
-
template = self._get_letter_template()
- html_content = template.render(
- data=data,
- generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
- )
-
+ html_content = template.render(data=data, generated_at=datetime.now().strftime("%d.%m.%Y %H:%M"))
css = CSS(string=self._get_base_css(), font_config=self.font_config)
- pdf_bytes = HTML(string=html_content).write_pdf(
- stylesheets=[css],
- font_config=self.font_config
- )
-
+ pdf_bytes = HTML(string=html_content).write_pdf(stylesheets=[css], font_config=self.font_config)
logger.info(f"Letter PDF generated: {len(pdf_bytes)} bytes")
return pdf_bytes
def generate_certificate_pdf(self, data: CertificateData) -> bytes:
- """
- Generiert PDF für Schulzeugnis.
-
- Args:
- data: CertificateData mit allen Zeugnisinformationen
-
- Returns:
- PDF als bytes
- """
+ """Generiert PDF für Schulzeugnis."""
logger.info(f"Generating certificate PDF for: {data.student_name}")
-
template = self._get_certificate_template()
- html_content = template.render(
- data=data,
- generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
- )
-
+ html_content = template.render(data=data, generated_at=datetime.now().strftime("%d.%m.%Y %H:%M"))
css = CSS(string=self._get_base_css(), font_config=self.font_config)
- pdf_bytes = HTML(string=html_content).write_pdf(
- stylesheets=[css],
- font_config=self.font_config
- )
-
+ pdf_bytes = HTML(string=html_content).write_pdf(stylesheets=[css], font_config=self.font_config)
logger.info(f"Certificate PDF generated: {len(pdf_bytes)} bytes")
return pdf_bytes
def generate_correction_pdf(self, data: CorrectionData) -> bytes:
- """
- Generiert PDF für Korrektur-Übersicht.
-
- Args:
- data: CorrectionData mit allen Korrekturinformationen
-
- Returns:
- PDF als bytes
- """
+ """Generiert PDF für Korrektur-Übersicht."""
logger.info(f"Generating correction PDF for: {data.student.name}")
-
template = self._get_correction_template()
- html_content = template.render(
- data=data,
- generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
- )
-
+ html_content = template.render(data=data, generated_at=datetime.now().strftime("%d.%m.%Y %H:%M"))
css = CSS(string=self._get_base_css(), font_config=self.font_config)
- pdf_bytes = HTML(string=html_content).write_pdf(
- stylesheets=[css],
- font_config=self.font_config
- )
-
+ pdf_bytes = HTML(string=html_content).write_pdf(stylesheets=[css], font_config=self.font_config)
logger.info(f"Correction PDF generated: {len(pdf_bytes)} bytes")
return pdf_bytes
@@ -474,321 +173,27 @@ class PDFService:
template_path = self.templates_dir / "letter.html"
if template_path.exists():
return self.jinja_env.get_template("letter.html")
-
- # Inline-Template als Fallback
- return self.jinja_env.from_string(self._get_letter_template_html())
+ return self.jinja_env.from_string(get_letter_template_html())
def _get_certificate_template(self):
"""Gibt Certificate-Template zurück."""
template_path = self.templates_dir / "certificate.html"
if template_path.exists():
return self.jinja_env.get_template("certificate.html")
-
- return self.jinja_env.from_string(self._get_certificate_template_html())
+ return self.jinja_env.from_string(get_certificate_template_html())
def _get_correction_template(self):
"""Gibt Correction-Template zurück."""
template_path = self.templates_dir / "correction.html"
if template_path.exists():
return self.jinja_env.get_template("correction.html")
-
- return self.jinja_env.from_string(self._get_correction_template_html())
-
- @staticmethod
- def _get_letter_template_html() -> str:
- """Inline HTML-Template für Elternbriefe."""
- return """
-
-
-
-
- {{ data.subject }}
-
-
-
-
-
- {{ data.date }}
-
-
-
- {{ data.recipient_name }}
- {{ data.recipient_address | replace('\\n', ' ') | safe }}
-
-
-
- Betreff: {{ data.subject }}
-
-
-
- Schüler/in: {{ data.student_name }} | Klasse: {{ data.student_class }}
-
-
-
- {{ data.content | replace('\\n', ' ') | safe }}
-
-
- {% if data.gfk_principles_applied %}
-
- {% for principle in data.gfk_principles_applied %}
- ✓ {{ principle }}
- {% endfor %}
-
- {% endif %}
-
-
-
Mit freundlichen Grüßen
-
- {{ data.teacher_name }}
- {% if data.teacher_title %}{{ data.teacher_title }} {% endif %}
-
-
-
- {% if data.legal_references %}
-
- Rechtliche Grundlagen:
- {% for ref in data.legal_references %}
- • {{ ref.law }} {{ ref.paragraph }}: {{ ref.title }}
- {% endfor %}
-
- {% endif %}
-
-
- Erstellt mit BreakPilot | {{ generated_at }}
-
-
-
-"""
-
- @staticmethod
- def _get_certificate_template_html() -> str:
- """Inline HTML-Template für Zeugnisse."""
- return """
-
-
-
-
- Zeugnis - {{ data.student_name }}
-
-
-
-
-
-
-
- Name: {{ data.student_name }}
- Geburtsdatum: {{ data.student_birthdate }}
-
-
- Klasse: {{ data.student_class }}
-
-
-
-
-
- Leistungen
-
-
-
- Fach
- Note
- Punkte
-
-
-
- {% for subject in data.subjects %}
-
- {{ subject.name }}
-
- {{ subject.grade }}
-
- {{ subject.points | default('-') }}
-
- {% endfor %}
-
-
-
- {% if data.social_behavior or data.work_behavior %}
- Verhalten
-
- {% if data.social_behavior %}
-
- Sozialverhalten
- {{ data.social_behavior }}
-
- {% endif %}
- {% if data.work_behavior %}
-
- Arbeitsverhalten
- {{ data.work_behavior }}
-
- {% endif %}
-
- {% endif %}
-
-
- Versäumte Tage: {{ data.attendance.days_absent | default(0) }}
- (davon entschuldigt: {{ data.attendance.days_excused | default(0) }},
- unentschuldigt: {{ data.attendance.days_unexcused | default(0) }})
-
-
- {% if data.remarks %}
-
- Bemerkungen:
- {{ data.remarks }}
-
- {% endif %}
-
-
- Ausgestellt am: {{ data.issue_date }}
-
-
-
-
-
{{ data.class_teacher }}
-
Klassenlehrer/in
-
-
-
{{ data.principal }}
-
Schulleiter/in
-
-
-
-
-
-
-"""
-
- @staticmethod
- def _get_correction_template_html() -> str:
- """Inline HTML-Template für Korrektur-Übersichten."""
- return """
-
-
-
-
- Korrektur - {{ data.exam_title }}
-
-
-
-
-
- {{ data.student.name }} | Klasse {{ data.student.class_name }}
-
-
-
-
- Note: {{ data.grade }}
-
-
- {{ data.achieved_points }} von {{ data.max_points }} Punkten
- ({{ data.percentage | round(1) }}%)
-
-
-
- Detaillierte Auswertung
-
- {% for item in data.corrections %}
-
-
- {{ item.question }}
-
- {% if item.answer %}
-
- Antwort: {{ item.answer }}
-
- {% endif %}
-
- Punkte: {{ item.points }}
-
- {% if item.feedback %}
-
- {{ item.feedback }}
-
- {% endif %}
-
- {% endfor %}
-
-
- {% if data.teacher_notes %}
-
- Lehrerkommentar:
- {{ data.teacher_notes }}
-
- {% endif %}
-
- {% if data.ai_feedback %}
-
- KI-Feedback:
- {{ data.ai_feedback }}
-
- {% endif %}
-
- {% if data.class_average or data.grade_distribution %}
- Klassenstatistik
-
- {% if data.class_average %}
-
- Klassendurchschnitt:
- {{ data.class_average }}
-
- {% endif %}
- {% if data.grade_distribution %}
-
- Notenverteilung:
-
- {% for grade, count in data.grade_distribution.items() %}
- Note {{ grade }}: {{ count }}x{% if not loop.last %}, {% endif %}
- {% endfor %}
-
-
- {% endif %}
-
- {% endif %}
-
-
-
Datum: {{ data.date }}
-
-
-
- Erstellt mit BreakPilot | {{ generated_at }}
-
-
-
-"""
+ return self.jinja_env.from_string(get_correction_template_html())
+# =============================================================================
# Convenience functions for direct usage
+# =============================================================================
+
_pdf_service: Optional[PDFService] = None
@@ -801,18 +206,8 @@ def get_pdf_service() -> PDFService:
def generate_letter_pdf(data: Dict[str, Any]) -> bytes:
- """
- Convenience function zum Generieren eines Elternbrief-PDFs.
-
- Args:
- data: Dict mit allen Briefdaten
-
- Returns:
- PDF als bytes
- """
+ """Convenience function zum Generieren eines Elternbrief-PDFs."""
service = get_pdf_service()
-
- # Convert dict to LetterData
school_info = None
if data.get("school_info"):
school_info = SchoolInfo(**data["school_info"])
@@ -833,22 +228,12 @@ def generate_letter_pdf(data: Dict[str, Any]) -> bytes:
legal_references=data.get("legal_references"),
gfk_principles_applied=data.get("gfk_principles_applied")
)
-
return service.generate_letter_pdf(letter_data)
def generate_certificate_pdf(data: Dict[str, Any]) -> bytes:
- """
- Convenience function zum Generieren eines Zeugnis-PDFs.
-
- Args:
- data: Dict mit allen Zeugnisdaten
-
- Returns:
- PDF als bytes
- """
+ """Convenience function zum Generieren eines Zeugnis-PDFs."""
service = get_pdf_service()
-
school_info = None
if data.get("school_info"):
school_info = SchoolInfo(**data["school_info"])
@@ -869,30 +254,19 @@ def generate_certificate_pdf(data: Dict[str, Any]) -> bytes:
social_behavior=data.get("social_behavior"),
work_behavior=data.get("work_behavior")
)
-
return service.generate_certificate_pdf(cert_data)
def generate_correction_pdf(data: Dict[str, Any]) -> bytes:
- """
- Convenience function zum Generieren eines Korrektur-PDFs.
-
- Args:
- data: Dict mit allen Korrekturdaten
-
- Returns:
- PDF als bytes
- """
+ """Convenience function zum Generieren eines Korrektur-PDFs."""
service = get_pdf_service()
- # Create StudentInfo from dict
student = StudentInfo(
student_id=data.get("student_id", "unknown"),
name=data.get("student_name", data.get("name", "")),
class_name=data.get("student_class", data.get("class_name", ""))
)
- # Calculate percentage if not provided
max_points = data.get("max_points", data.get("total_points", 0))
achieved_points = data.get("achieved_points", 0)
percentage = data.get("percentage", (achieved_points / max_points * 100) if max_points > 0 else 0.0)
@@ -912,5 +286,4 @@ def generate_correction_pdf(data: Dict[str, Any]) -> bytes:
grade_distribution=data.get("grade_distribution"),
class_average=data.get("class_average")
)
-
return service.generate_correction_pdf(correction_data)
diff --git a/backend-lehrer/services/pdf_templates.py b/backend-lehrer/services/pdf_templates.py
new file mode 100644
index 0000000..9c8f924
--- /dev/null
+++ b/backend-lehrer/services/pdf_templates.py
@@ -0,0 +1,298 @@
+"""
+PDF Service - Inline HTML Templates.
+
+Fallback templates when external template files don't exist.
+"""
+
+
+def get_letter_template_html() -> str:
+ """Inline HTML-Template für Elternbriefe."""
+ return """
+
+
+
+
+ {{ data.subject }}
+
+
+
+
+
+ {{ data.date }}
+
+
+
+ {{ data.recipient_name }}
+ {{ data.recipient_address | replace('\\n', ' ') | safe }}
+
+
+
+ Betreff: {{ data.subject }}
+
+
+
+ Schüler/in: {{ data.student_name }} | Klasse: {{ data.student_class }}
+
+
+
+ {{ data.content | replace('\\n', ' ') | safe }}
+
+
+ {% if data.gfk_principles_applied %}
+
+ {% for principle in data.gfk_principles_applied %}
+ ✓ {{ principle }}
+ {% endfor %}
+
+ {% endif %}
+
+
+
Mit freundlichen Grüßen
+
+ {{ data.teacher_name }}
+ {% if data.teacher_title %}{{ data.teacher_title }} {% endif %}
+
+
+
+ {% if data.legal_references %}
+
+ Rechtliche Grundlagen:
+ {% for ref in data.legal_references %}
+ • {{ ref.law }} {{ ref.paragraph }}: {{ ref.title }}
+ {% endfor %}
+
+ {% endif %}
+
+
+ Erstellt mit BreakPilot | {{ generated_at }}
+
+
+
+"""
+
+
+def get_certificate_template_html() -> str:
+ """Inline HTML-Template für Zeugnisse."""
+ return """
+
+
+
+
+ Zeugnis - {{ data.student_name }}
+
+
+
+
+
+
+
+ Name: {{ data.student_name }}
+ Geburtsdatum: {{ data.student_birthdate }}
+
+
+ Klasse: {{ data.student_class }}
+
+
+
+
+
+ Leistungen
+
+
+
+ Fach
+ Note
+ Punkte
+
+
+
+ {% for subject in data.subjects %}
+
+ {{ subject.name }}
+
+ {{ subject.grade }}
+
+ {{ subject.points | default('-') }}
+
+ {% endfor %}
+
+
+
+ {% if data.social_behavior or data.work_behavior %}
+ Verhalten
+
+ {% if data.social_behavior %}
+
+ Sozialverhalten
+ {{ data.social_behavior }}
+
+ {% endif %}
+ {% if data.work_behavior %}
+
+ Arbeitsverhalten
+ {{ data.work_behavior }}
+
+ {% endif %}
+
+ {% endif %}
+
+
+ Versäumte Tage: {{ data.attendance.days_absent | default(0) }}
+ (davon entschuldigt: {{ data.attendance.days_excused | default(0) }},
+ unentschuldigt: {{ data.attendance.days_unexcused | default(0) }})
+
+
+ {% if data.remarks %}
+
+ Bemerkungen:
+ {{ data.remarks }}
+
+ {% endif %}
+
+
+ Ausgestellt am: {{ data.issue_date }}
+
+
+
+
+
{{ data.class_teacher }}
+
Klassenlehrer/in
+
+
+
{{ data.principal }}
+
Schulleiter/in
+
+
+
+
+
+
+"""
+
+
+def get_correction_template_html() -> str:
+ """Inline HTML-Template für Korrektur-Übersichten."""
+ return """
+
+
+
+
+ Korrektur - {{ data.exam_title }}
+
+
+
+
+
+ {{ data.student.name }} | Klasse {{ data.student.class_name }}
+
+
+
+
+ Note: {{ data.grade }}
+
+
+ {{ data.achieved_points }} von {{ data.max_points }} Punkten
+ ({{ data.percentage | round(1) }}%)
+
+
+
+ Detaillierte Auswertung
+
+ {% for item in data.corrections %}
+
+
+ {{ item.question }}
+
+ {% if item.answer %}
+
+ Antwort: {{ item.answer }}
+
+ {% endif %}
+
+ Punkte: {{ item.points }}
+
+ {% if item.feedback %}
+
+ {{ item.feedback }}
+
+ {% endif %}
+
+ {% endfor %}
+
+
+ {% if data.teacher_notes %}
+
+ Lehrerkommentar:
+ {{ data.teacher_notes }}
+
+ {% endif %}
+
+ {% if data.ai_feedback %}
+
+ KI-Feedback:
+ {{ data.ai_feedback }}
+
+ {% endif %}
+
+ {% if data.class_average or data.grade_distribution %}
+ Klassenstatistik
+
+ {% if data.class_average %}
+
+ Klassendurchschnitt:
+ {{ data.class_average }}
+
+ {% endif %}
+ {% if data.grade_distribution %}
+
+ Notenverteilung:
+
+ {% for grade, count in data.grade_distribution.items() %}
+ Note {{ grade }}: {{ count }}x{% if not loop.last %}, {% endif %}
+ {% endfor %}
+
+
+ {% endif %}
+
+ {% endif %}
+
+
+
Datum: {{ data.date }}
+
+
+
+ Erstellt mit BreakPilot | {{ generated_at }}
+
+
+
+"""
diff --git a/backend-lehrer/teacher_dashboard_analytics.py b/backend-lehrer/teacher_dashboard_analytics.py
new file mode 100644
index 0000000..4a84bca
--- /dev/null
+++ b/backend-lehrer/teacher_dashboard_analytics.py
@@ -0,0 +1,267 @@
+# ==============================================
+# Teacher Dashboard - Analytics & Progress Routes
+# ==============================================
+
+from fastapi import APIRouter, HTTPException, Query, Depends, Request
+from typing import List, Optional, Dict, Any
+from datetime import datetime, timedelta
+import logging
+
+from teacher_dashboard_models import (
+ UnitAssignmentStatus, TeacherControlSettings,
+ UnitAssignment, StudentUnitProgress, ClassUnitProgress,
+ MisconceptionReport, ClassAnalyticsSummary, ContentResource,
+ get_current_teacher, get_teacher_database,
+ get_classes_for_teacher, get_students_in_class,
+ REQUIRE_AUTH,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["Teacher Dashboard"])
+
+# Shared in-memory store reference (set from teacher_dashboard_api)
+_assignments_store: Dict[str, Dict[str, Any]] = {}
+
+
+def set_assignments_store(store: Dict[str, Dict[str, Any]]):
+ """Share the in-memory assignments store from the main module."""
+ global _assignments_store
+ _assignments_store = store
+
+
+# ==============================================
+# API Endpoints - Progress & Analytics
+# ==============================================
+
+@router.get("/assignments/{assignment_id}/progress", response_model=ClassUnitProgress)
+async def get_assignment_progress(
+ assignment_id: str,
+ teacher: Dict[str, Any] = Depends(get_current_teacher)
+) -> ClassUnitProgress:
+ """Get detailed progress for an assignment."""
+ db = await get_teacher_database()
+ assignment = None
+ if db:
+ try:
+ assignment = await db.get_assignment(assignment_id)
+ except Exception as e:
+ logger.error(f"Failed to get assignment: {e}")
+ if not assignment and assignment_id in _assignments_store:
+ assignment = _assignments_store[assignment_id]
+ if not assignment or assignment["teacher_id"] != teacher["user_id"]:
+ raise HTTPException(status_code=404, detail="Assignment not found")
+
+ students = await get_students_in_class(assignment["class_id"])
+ student_progress = []
+ total_completion = 0.0
+ total_precheck = 0.0
+ total_postcheck = 0.0
+ total_time = 0
+ precheck_count = 0
+ postcheck_count = 0
+ started = 0
+ completed = 0
+
+ for student in students:
+ student_id = student.get("id", student.get("student_id"))
+ progress = StudentUnitProgress(
+ student_id=student_id,
+ student_name=student.get("name", f"Student {student_id[:8]}"),
+ status="not_started", completion_rate=0.0, stops_completed=0, total_stops=0,
+ )
+ if db:
+ try:
+ session_data = await db.get_student_unit_session(
+ student_id=student_id, unit_id=assignment["unit_id"]
+ )
+ if session_data:
+ progress.session_id = session_data.get("session_id")
+ progress.status = "completed" if session_data.get("completed_at") else "in_progress"
+ progress.completion_rate = session_data.get("completion_rate", 0.0)
+ progress.precheck_score = session_data.get("precheck_score")
+ progress.postcheck_score = session_data.get("postcheck_score")
+ progress.time_spent_minutes = session_data.get("duration_seconds", 0) // 60
+ progress.last_activity = session_data.get("updated_at")
+ progress.stops_completed = session_data.get("stops_completed", 0)
+ progress.total_stops = session_data.get("total_stops", 0)
+ if progress.precheck_score is not None and progress.postcheck_score is not None:
+ progress.learning_gain = progress.postcheck_score - progress.precheck_score
+ total_completion += progress.completion_rate
+ total_time += progress.time_spent_minutes
+ if progress.precheck_score is not None:
+ total_precheck += progress.precheck_score
+ precheck_count += 1
+ if progress.postcheck_score is not None:
+ total_postcheck += progress.postcheck_score
+ postcheck_count += 1
+ if progress.status != "not_started":
+ started += 1
+ if progress.status == "completed":
+ completed += 1
+ except Exception as e:
+ logger.error(f"Failed to get student progress: {e}")
+ student_progress.append(progress)
+
+ total_students = len(students) or 1
+ return ClassUnitProgress(
+ assignment_id=assignment_id, unit_id=assignment["unit_id"],
+ unit_title=f"Unit {assignment['unit_id']}", class_id=assignment["class_id"],
+ class_name=f"Class {assignment['class_id'][:8]}", total_students=len(students),
+ started_count=started, completed_count=completed,
+ avg_completion_rate=total_completion / total_students,
+ avg_precheck_score=total_precheck / precheck_count if precheck_count > 0 else None,
+ avg_postcheck_score=total_postcheck / postcheck_count if postcheck_count > 0 else None,
+ avg_learning_gain=(total_postcheck / postcheck_count - total_precheck / precheck_count)
+ if precheck_count > 0 and postcheck_count > 0 else None,
+ avg_time_minutes=total_time / started if started > 0 else 0,
+ students=student_progress,
+ )
+
+
+@router.get("/classes/{class_id}/analytics", response_model=ClassAnalyticsSummary)
+async def get_class_analytics(
+ class_id: str,
+ teacher: Dict[str, Any] = Depends(get_current_teacher)
+) -> ClassAnalyticsSummary:
+ """Get summary analytics for a class."""
+ db = await get_teacher_database()
+ assignments = []
+ if db:
+ try:
+ assignments = await db.list_assignments(teacher_id=teacher["user_id"], class_id=class_id)
+ except Exception as e:
+ logger.error(f"Failed to list assignments: {e}")
+ if not assignments:
+ assignments = [
+ a for a in _assignments_store.values()
+ if a["class_id"] == class_id and a["teacher_id"] == teacher["user_id"]
+ ]
+
+ total_units = len(assignments)
+ completed_units = sum(1 for a in assignments if a.get("status") == "completed")
+ active_units = sum(1 for a in assignments if a.get("status") == "active")
+
+ students = await get_students_in_class(class_id)
+ student_scores = {}
+ misconceptions = []
+ if db:
+ try:
+ for student in students:
+ student_id = student.get("id", student.get("student_id"))
+ analytics = await db.get_student_analytics(student_id)
+ if analytics:
+ student_scores[student_id] = {
+ "name": student.get("name", student_id[:8]),
+ "avg_score": analytics.get("avg_postcheck_score", 0),
+ "total_time": analytics.get("total_time_minutes", 0),
+ }
+ misconceptions_data = await db.get_class_misconceptions(class_id)
+ for m in misconceptions_data:
+ misconceptions.append(MisconceptionReport(
+ concept_id=m["concept_id"], concept_label=m["concept_label"],
+ misconception=m["misconception"], affected_students=m["affected_students"],
+ frequency=m["frequency"], unit_id=m["unit_id"], stop_id=m["stop_id"],
+ ))
+ except Exception as e:
+ logger.error(f"Failed to aggregate analytics: {e}")
+
+ sorted_students = sorted(student_scores.items(), key=lambda x: x[1]["avg_score"], reverse=True)
+ top_performers = [s[1]["name"] for s in sorted_students[:3]]
+ struggling_students = [s[1]["name"] for s in sorted_students[-3:] if s[1]["avg_score"] < 0.6]
+ total_time = sum(s["total_time"] for s in student_scores.values())
+ avg_scores = [s["avg_score"] for s in student_scores.values() if s["avg_score"] > 0]
+ avg_completion = sum(avg_scores) / len(avg_scores) if avg_scores else 0
+
+ return ClassAnalyticsSummary(
+ class_id=class_id, class_name=f"Klasse {class_id[:8]}",
+ total_units_assigned=total_units, units_completed=completed_units,
+ active_units=active_units, avg_completion_rate=avg_completion,
+ avg_learning_gain=None, total_time_hours=total_time / 60,
+ top_performers=top_performers, struggling_students=struggling_students,
+ common_misconceptions=misconceptions[:5],
+ )
+
+
+@router.get("/students/{student_id}/progress")
+async def get_student_progress(
+ student_id: str,
+ teacher: Dict[str, Any] = Depends(get_current_teacher)
+) -> Dict[str, Any]:
+ """Get detailed progress for a specific student."""
+ db = await get_teacher_database()
+ if db:
+ try:
+ progress = await db.get_student_full_progress(student_id)
+ return progress
+ except Exception as e:
+ logger.error(f"Failed to get student progress: {e}")
+ return {
+ "student_id": student_id, "units_attempted": 0, "units_completed": 0,
+ "avg_score": 0.0, "total_time_minutes": 0, "sessions": [],
+ }
+
+
+# ==============================================
+# API Endpoints - Content Resources
+# ==============================================
+
+@router.get("/assignments/{assignment_id}/resources", response_model=List[ContentResource])
+async def get_assignment_resources(
+ assignment_id: str,
+ teacher: Dict[str, Any] = Depends(get_current_teacher),
+ request: Request = None
+) -> List[ContentResource]:
+ """Get generated content resources for an assignment."""
+ db = await get_teacher_database()
+ assignment = None
+ if db:
+ try:
+ assignment = await db.get_assignment(assignment_id)
+ except Exception as e:
+ logger.error(f"Failed to get assignment: {e}")
+ if not assignment and assignment_id in _assignments_store:
+ assignment = _assignments_store[assignment_id]
+ if not assignment or assignment["teacher_id"] != teacher["user_id"]:
+ raise HTTPException(status_code=404, detail="Assignment not found")
+
+ unit_id = assignment["unit_id"]
+ base_url = str(request.base_url).rstrip("/") if request else "http://localhost:8000"
+ return [
+ ContentResource(resource_type="h5p", title=f"{unit_id} - H5P Aktivitaeten",
+ url=f"{base_url}/api/units/content/{unit_id}/h5p",
+ generated_at=datetime.utcnow(), unit_id=unit_id),
+ ContentResource(resource_type="worksheet", title=f"{unit_id} - Arbeitsblatt (HTML)",
+ url=f"{base_url}/api/units/content/{unit_id}/worksheet",
+ generated_at=datetime.utcnow(), unit_id=unit_id),
+ ContentResource(resource_type="pdf", title=f"{unit_id} - Arbeitsblatt (PDF)",
+ url=f"{base_url}/api/units/content/{unit_id}/worksheet.pdf",
+ generated_at=datetime.utcnow(), unit_id=unit_id),
+ ]
+
+
+@router.post("/assignments/{assignment_id}/regenerate-content")
+async def regenerate_content(
+ assignment_id: str,
+ resource_type: str = Query("all", description="h5p, pdf, or all"),
+ teacher: Dict[str, Any] = Depends(get_current_teacher)
+) -> Dict[str, Any]:
+ """Trigger regeneration of content resources."""
+ db = await get_teacher_database()
+ assignment = None
+ if db:
+ try:
+ assignment = await db.get_assignment(assignment_id)
+ except Exception as e:
+ logger.error(f"Failed to get assignment: {e}")
+ if not assignment and assignment_id in _assignments_store:
+ assignment = _assignments_store[assignment_id]
+ if not assignment or assignment["teacher_id"] != teacher["user_id"]:
+ raise HTTPException(status_code=404, detail="Assignment not found")
+
+ logger.info(f"Content regeneration triggered for {assignment['unit_id']}: {resource_type}")
+ return {
+ "status": "queued", "assignment_id": assignment_id,
+ "unit_id": assignment["unit_id"], "resource_type": resource_type,
+ "message": "Content regeneration has been queued",
+ }
diff --git a/backend-lehrer/teacher_dashboard_api.py b/backend-lehrer/teacher_dashboard_api.py
index 04c217b..0212acf 100644
--- a/backend-lehrer/teacher_dashboard_api.py
+++ b/backend-lehrer/teacher_dashboard_api.py
@@ -1,245 +1,42 @@
# ==============================================
# Breakpilot Drive - Teacher Dashboard API
# ==============================================
-# Lehrer-Dashboard fuer Unit-Zuweisung und Analytics:
-# - Units zu Klassen zuweisen
-# - Schueler-Fortschritt einsehen
-# - Klassen-Analytics
-# - H5P und PDF Content verwalten
-# - Unit-Einstellungen pro Klasse
+# Lehrer-Dashboard fuer Unit-Zuweisung und Analytics.
+#
+# Split structure:
+# - teacher_dashboard_models.py: Models, Auth, DB/School helpers
+# - teacher_dashboard_analytics.py: Progress, analytics, content routes
+# - teacher_dashboard_api.py: Assignment CRUD, dashboard, units (this file)
-from fastapi import APIRouter, HTTPException, Query, Depends, Request
-from pydantic import BaseModel, Field
+from fastapi import APIRouter, HTTPException, Query, Depends
from typing import List, Optional, Dict, Any
from datetime import datetime, timedelta
-from enum import Enum
import uuid
-import os
import logging
-import httpx
+
+from teacher_dashboard_models import (
+ UnitAssignmentStatus, TeacherControlSettings, AssignUnitRequest,
+ UnitAssignment,
+ get_current_teacher, get_teacher_database,
+ get_classes_for_teacher,
+ REQUIRE_AUTH,
+)
+from teacher_dashboard_analytics import (
+ router as analytics_router,
+ set_assignments_store,
+)
logger = logging.getLogger(__name__)
-# Feature flags
-USE_DATABASE = os.getenv("GAME_USE_DATABASE", "true").lower() == "true"
-REQUIRE_AUTH = os.getenv("TEACHER_REQUIRE_AUTH", "true").lower() == "true"
-SCHOOL_SERVICE_URL = os.getenv("SCHOOL_SERVICE_URL", "http://school-service:8084")
-
router = APIRouter(prefix="/api/teacher", tags=["Teacher Dashboard"])
-
-# ==============================================
-# Pydantic Models
-# ==============================================
-
-class UnitAssignmentStatus(str, Enum):
- """Status of a unit assignment"""
- DRAFT = "draft"
- ACTIVE = "active"
- COMPLETED = "completed"
- ARCHIVED = "archived"
-
-
-class TeacherControlSettings(BaseModel):
- """Unit settings that teachers can configure"""
- allow_skip: bool = True
- allow_replay: bool = True
- max_time_per_stop_sec: int = 90
- show_hints: bool = True
- require_precheck: bool = True
- require_postcheck: bool = True
-
-
-class AssignUnitRequest(BaseModel):
- """Request to assign a unit to a class"""
- unit_id: str
- class_id: str
- due_date: Optional[datetime] = None
- settings: Optional[TeacherControlSettings] = None
- notes: Optional[str] = None
-
-
-class UnitAssignment(BaseModel):
- """Unit assignment record"""
- assignment_id: str
- unit_id: str
- class_id: str
- teacher_id: str
- status: UnitAssignmentStatus
- settings: TeacherControlSettings
- due_date: Optional[datetime] = None
- notes: Optional[str] = None
- created_at: datetime
- updated_at: datetime
-
-
-class StudentUnitProgress(BaseModel):
- """Progress of a single student on a unit"""
- student_id: str
- student_name: str
- session_id: Optional[str] = None
- status: str # "not_started", "in_progress", "completed"
- completion_rate: float = 0.0
- precheck_score: Optional[float] = None
- postcheck_score: Optional[float] = None
- learning_gain: Optional[float] = None
- time_spent_minutes: int = 0
- last_activity: Optional[datetime] = None
- current_stop: Optional[str] = None
- stops_completed: int = 0
- total_stops: int = 0
-
-
-class ClassUnitProgress(BaseModel):
- """Overall progress of a class on a unit"""
- assignment_id: str
- unit_id: str
- unit_title: str
- class_id: str
- class_name: str
- total_students: int
- started_count: int
- completed_count: int
- avg_completion_rate: float
- avg_precheck_score: Optional[float] = None
- avg_postcheck_score: Optional[float] = None
- avg_learning_gain: Optional[float] = None
- avg_time_minutes: float
- students: List[StudentUnitProgress]
-
-
-class MisconceptionReport(BaseModel):
- """Report of detected misconceptions"""
- concept_id: str
- concept_label: str
- misconception: str
- affected_students: List[str]
- frequency: int
- unit_id: str
- stop_id: str
-
-
-class ClassAnalyticsSummary(BaseModel):
- """Summary analytics for a class"""
- class_id: str
- class_name: str
- total_units_assigned: int
- units_completed: int
- active_units: int
- avg_completion_rate: float
- avg_learning_gain: Optional[float]
- total_time_hours: float
- top_performers: List[str]
- struggling_students: List[str]
- common_misconceptions: List[MisconceptionReport]
-
-
-class ContentResource(BaseModel):
- """Generated content resource"""
- resource_type: str # "h5p", "pdf", "worksheet"
- title: str
- url: str
- generated_at: datetime
- unit_id: str
-
-
-# ==============================================
-# Auth Dependency
-# ==============================================
-
-async def get_current_teacher(request: Request) -> Dict[str, Any]:
- """Get current teacher from JWT token."""
- if not REQUIRE_AUTH:
- # Dev mode: return demo teacher
- return {
- "user_id": "e9484ad9-32ee-4f2b-a4e1-d182e02ccf20",
- "email": "demo@breakpilot.app",
- "role": "teacher",
- "name": "Demo Lehrer"
- }
-
- auth_header = request.headers.get("Authorization", "")
- if not auth_header.startswith("Bearer "):
- raise HTTPException(status_code=401, detail="Missing authorization token")
-
- try:
- import jwt
- token = auth_header[7:]
- secret = os.getenv("JWT_SECRET", "dev-secret-key")
- payload = jwt.decode(token, secret, algorithms=["HS256"])
-
- if payload.get("role") not in ["teacher", "admin"]:
- raise HTTPException(status_code=403, detail="Teacher or admin role required")
-
- return payload
- except jwt.ExpiredSignatureError:
- raise HTTPException(status_code=401, detail="Token expired")
- except jwt.InvalidTokenError:
- raise HTTPException(status_code=401, detail="Invalid token")
-
-
-# ==============================================
-# Database Integration
-# ==============================================
-
-_teacher_db = None
-
-async def get_teacher_database():
- """Get teacher database instance with lazy initialization."""
- global _teacher_db
- if not USE_DATABASE:
- return None
- if _teacher_db is None:
- try:
- from unit.database import get_teacher_db
- _teacher_db = await get_teacher_db()
- logger.info("Teacher database initialized")
- except ImportError:
- logger.warning("Teacher database module not available")
- except Exception as e:
- logger.warning(f"Teacher database not available: {e}")
- return _teacher_db
-
-
-# ==============================================
-# School Service Integration
-# ==============================================
-
-async def get_classes_for_teacher(teacher_id: str) -> List[Dict[str, Any]]:
- """Get classes assigned to a teacher from school service."""
- async with httpx.AsyncClient(timeout=10.0) as client:
- try:
- response = await client.get(
- f"{SCHOOL_SERVICE_URL}/api/v1/school/classes",
- headers={"X-Teacher-ID": teacher_id}
- )
- if response.status_code == 200:
- return response.json()
- except Exception as e:
- logger.error(f"Failed to get classes from school service: {e}")
- return []
-
-
-async def get_students_in_class(class_id: str) -> List[Dict[str, Any]]:
- """Get students in a class from school service."""
- async with httpx.AsyncClient(timeout=10.0) as client:
- try:
- response = await client.get(
- f"{SCHOOL_SERVICE_URL}/api/v1/school/classes/{class_id}/students"
- )
- if response.status_code == 200:
- return response.json()
- except Exception as e:
- logger.error(f"Failed to get students from school service: {e}")
- return []
-
-
-# ==============================================
# In-Memory Storage (Fallback)
-# ==============================================
-
_assignments_store: Dict[str, Dict[str, Any]] = {}
+# Share the store with the analytics module and include its routes
+set_assignments_store(_assignments_store)
+router.include_router(analytics_router)
+
# ==============================================
# API Endpoints - Unit Assignment
@@ -250,28 +47,17 @@ async def assign_unit_to_class(
request_data: AssignUnitRequest,
teacher: Dict[str, Any] = Depends(get_current_teacher)
) -> UnitAssignment:
- """
- Assign a unit to a class.
-
- Creates an assignment that allows students in the class to play the unit.
- Teacher can configure settings like skip, replay, time limits.
- """
+ """Assign a unit to a class."""
assignment_id = str(uuid.uuid4())
now = datetime.utcnow()
-
settings = request_data.settings or TeacherControlSettings()
assignment = {
- "assignment_id": assignment_id,
- "unit_id": request_data.unit_id,
- "class_id": request_data.class_id,
- "teacher_id": teacher["user_id"],
- "status": UnitAssignmentStatus.ACTIVE,
- "settings": settings.model_dump(),
- "due_date": request_data.due_date,
- "notes": request_data.notes,
- "created_at": now,
- "updated_at": now,
+ "assignment_id": assignment_id, "unit_id": request_data.unit_id,
+ "class_id": request_data.class_id, "teacher_id": teacher["user_id"],
+ "status": UnitAssignmentStatus.ACTIVE, "settings": settings.model_dump(),
+ "due_date": request_data.due_date, "notes": request_data.notes,
+ "created_at": now, "updated_at": now,
}
db = await get_teacher_database()
@@ -281,22 +67,15 @@ async def assign_unit_to_class(
except Exception as e:
logger.error(f"Failed to store assignment: {e}")
- # Fallback: store in memory
_assignments_store[assignment_id] = assignment
-
logger.info(f"Unit {request_data.unit_id} assigned to class {request_data.class_id}")
return UnitAssignment(
- assignment_id=assignment_id,
- unit_id=request_data.unit_id,
- class_id=request_data.class_id,
- teacher_id=teacher["user_id"],
- status=UnitAssignmentStatus.ACTIVE,
- settings=settings,
- due_date=request_data.due_date,
- notes=request_data.notes,
- created_at=now,
- updated_at=now,
+ assignment_id=assignment_id, unit_id=request_data.unit_id,
+ class_id=request_data.class_id, teacher_id=teacher["user_id"],
+ status=UnitAssignmentStatus.ACTIVE, settings=settings,
+ due_date=request_data.due_date, notes=request_data.notes,
+ created_at=now, updated_at=now,
)
@@ -306,11 +85,7 @@ async def list_assignments(
status: Optional[UnitAssignmentStatus] = Query(None, description="Filter by status"),
teacher: Dict[str, Any] = Depends(get_current_teacher)
) -> List[UnitAssignment]:
- """
- List all unit assignments for the teacher.
-
- Optionally filter by class or status.
- """
+ """List all unit assignments for the teacher."""
db = await get_teacher_database()
assignments = []
@@ -325,7 +100,6 @@ async def list_assignments(
logger.error(f"Failed to list assignments: {e}")
if not assignments:
- # Fallback: filter in-memory store
for assignment in _assignments_store.values():
if assignment["teacher_id"] != teacher["user_id"]:
continue
@@ -337,16 +111,11 @@ async def list_assignments(
return [
UnitAssignment(
- assignment_id=a["assignment_id"],
- unit_id=a["unit_id"],
- class_id=a["class_id"],
- teacher_id=a["teacher_id"],
- status=a["status"],
- settings=TeacherControlSettings(**a["settings"]),
- due_date=a.get("due_date"),
- notes=a.get("notes"),
- created_at=a["created_at"],
- updated_at=a["updated_at"],
+ assignment_id=a["assignment_id"], unit_id=a["unit_id"],
+ class_id=a["class_id"], teacher_id=a["teacher_id"],
+ status=a["status"], settings=TeacherControlSettings(**a["settings"]),
+ due_date=a.get("due_date"), notes=a.get("notes"),
+ created_at=a["created_at"], updated_at=a["updated_at"],
)
for a in assignments
]
@@ -359,41 +128,30 @@ async def get_assignment(
) -> UnitAssignment:
"""Get details of a specific assignment."""
db = await get_teacher_database()
-
if db:
try:
assignment = await db.get_assignment(assignment_id)
if assignment and assignment["teacher_id"] == teacher["user_id"]:
return UnitAssignment(
- assignment_id=assignment["assignment_id"],
- unit_id=assignment["unit_id"],
- class_id=assignment["class_id"],
- teacher_id=assignment["teacher_id"],
+ assignment_id=assignment["assignment_id"], unit_id=assignment["unit_id"],
+ class_id=assignment["class_id"], teacher_id=assignment["teacher_id"],
status=assignment["status"],
settings=TeacherControlSettings(**assignment["settings"]),
- due_date=assignment.get("due_date"),
- notes=assignment.get("notes"),
- created_at=assignment["created_at"],
- updated_at=assignment["updated_at"],
+ due_date=assignment.get("due_date"), notes=assignment.get("notes"),
+ created_at=assignment["created_at"], updated_at=assignment["updated_at"],
)
except Exception as e:
logger.error(f"Failed to get assignment: {e}")
- # Fallback
if assignment_id in _assignments_store:
a = _assignments_store[assignment_id]
if a["teacher_id"] == teacher["user_id"]:
return UnitAssignment(
- assignment_id=a["assignment_id"],
- unit_id=a["unit_id"],
- class_id=a["class_id"],
- teacher_id=a["teacher_id"],
- status=a["status"],
- settings=TeacherControlSettings(**a["settings"]),
- due_date=a.get("due_date"),
- notes=a.get("notes"),
- created_at=a["created_at"],
- updated_at=a["updated_at"],
+ assignment_id=a["assignment_id"], unit_id=a["unit_id"],
+ class_id=a["class_id"], teacher_id=a["teacher_id"],
+ status=a["status"], settings=TeacherControlSettings(**a["settings"]),
+ due_date=a.get("due_date"), notes=a.get("notes"),
+ created_at=a["created_at"], updated_at=a["updated_at"],
)
raise HTTPException(status_code=404, detail="Assignment not found")
@@ -424,7 +182,6 @@ async def update_assignment(
if not assignment or assignment["teacher_id"] != teacher["user_id"]:
raise HTTPException(status_code=404, detail="Assignment not found")
- # Update fields
if settings:
assignment["settings"] = settings.model_dump()
if status:
@@ -444,16 +201,11 @@ async def update_assignment(
_assignments_store[assignment_id] = assignment
return UnitAssignment(
- assignment_id=assignment["assignment_id"],
- unit_id=assignment["unit_id"],
- class_id=assignment["class_id"],
- teacher_id=assignment["teacher_id"],
- status=assignment["status"],
- settings=TeacherControlSettings(**assignment["settings"]),
- due_date=assignment.get("due_date"),
- notes=assignment.get("notes"),
- created_at=assignment["created_at"],
- updated_at=assignment["updated_at"],
+ assignment_id=assignment["assignment_id"], unit_id=assignment["unit_id"],
+ class_id=assignment["class_id"], teacher_id=assignment["teacher_id"],
+ status=assignment["status"], settings=TeacherControlSettings(**assignment["settings"]),
+ due_date=assignment.get("due_date"), notes=assignment.get("notes"),
+ created_at=assignment["created_at"], updated_at=assignment["updated_at"],
)
@@ -464,7 +216,6 @@ async def delete_assignment(
) -> Dict[str, str]:
"""Delete/archive an assignment."""
db = await get_teacher_database()
-
if db:
try:
assignment = await db.get_assignment(assignment_id)
@@ -485,339 +236,6 @@ async def delete_assignment(
raise HTTPException(status_code=404, detail="Assignment not found")
-# ==============================================
-# API Endpoints - Progress & Analytics
-# ==============================================
-
-@router.get("/assignments/{assignment_id}/progress", response_model=ClassUnitProgress)
-async def get_assignment_progress(
- assignment_id: str,
- teacher: Dict[str, Any] = Depends(get_current_teacher)
-) -> ClassUnitProgress:
- """
- Get detailed progress for an assignment.
-
- Shows each student's status, scores, and time spent.
- """
- db = await get_teacher_database()
- assignment = None
-
- if db:
- try:
- assignment = await db.get_assignment(assignment_id)
- except Exception as e:
- logger.error(f"Failed to get assignment: {e}")
-
- if not assignment and assignment_id in _assignments_store:
- assignment = _assignments_store[assignment_id]
-
- if not assignment or assignment["teacher_id"] != teacher["user_id"]:
- raise HTTPException(status_code=404, detail="Assignment not found")
-
- # Get students in class
- students = await get_students_in_class(assignment["class_id"])
-
- # Get progress for each student
- student_progress = []
- total_completion = 0.0
- total_precheck = 0.0
- total_postcheck = 0.0
- total_time = 0
- precheck_count = 0
- postcheck_count = 0
- started = 0
- completed = 0
-
- for student in students:
- student_id = student.get("id", student.get("student_id"))
- progress = StudentUnitProgress(
- student_id=student_id,
- student_name=student.get("name", f"Student {student_id[:8]}"),
- status="not_started",
- completion_rate=0.0,
- stops_completed=0,
- total_stops=0,
- )
-
- if db:
- try:
- session_data = await db.get_student_unit_session(
- student_id=student_id,
- unit_id=assignment["unit_id"]
- )
- if session_data:
- progress.session_id = session_data.get("session_id")
- progress.status = "completed" if session_data.get("completed_at") else "in_progress"
- progress.completion_rate = session_data.get("completion_rate", 0.0)
- progress.precheck_score = session_data.get("precheck_score")
- progress.postcheck_score = session_data.get("postcheck_score")
- progress.time_spent_minutes = session_data.get("duration_seconds", 0) // 60
- progress.last_activity = session_data.get("updated_at")
- progress.stops_completed = session_data.get("stops_completed", 0)
- progress.total_stops = session_data.get("total_stops", 0)
-
- if progress.precheck_score is not None and progress.postcheck_score is not None:
- progress.learning_gain = progress.postcheck_score - progress.precheck_score
-
- # Aggregate stats
- total_completion += progress.completion_rate
- total_time += progress.time_spent_minutes
- if progress.precheck_score is not None:
- total_precheck += progress.precheck_score
- precheck_count += 1
- if progress.postcheck_score is not None:
- total_postcheck += progress.postcheck_score
- postcheck_count += 1
- if progress.status != "not_started":
- started += 1
- if progress.status == "completed":
- completed += 1
- except Exception as e:
- logger.error(f"Failed to get student progress: {e}")
-
- student_progress.append(progress)
-
- total_students = len(students) or 1 # Avoid division by zero
-
- return ClassUnitProgress(
- assignment_id=assignment_id,
- unit_id=assignment["unit_id"],
- unit_title=f"Unit {assignment['unit_id']}", # Would load from unit definition
- class_id=assignment["class_id"],
- class_name=f"Class {assignment['class_id'][:8]}", # Would load from school service
- total_students=len(students),
- started_count=started,
- completed_count=completed,
- avg_completion_rate=total_completion / total_students,
- avg_precheck_score=total_precheck / precheck_count if precheck_count > 0 else None,
- avg_postcheck_score=total_postcheck / postcheck_count if postcheck_count > 0 else None,
- avg_learning_gain=(total_postcheck / postcheck_count - total_precheck / precheck_count)
- if precheck_count > 0 and postcheck_count > 0 else None,
- avg_time_minutes=total_time / started if started > 0 else 0,
- students=student_progress,
- )
-
-
-@router.get("/classes/{class_id}/analytics", response_model=ClassAnalyticsSummary)
-async def get_class_analytics(
- class_id: str,
- teacher: Dict[str, Any] = Depends(get_current_teacher)
-) -> ClassAnalyticsSummary:
- """
- Get summary analytics for a class.
-
- Includes all unit assignments, overall progress, and common misconceptions.
- """
- db = await get_teacher_database()
-
- # Get all assignments for this class
- assignments = []
- if db:
- try:
- assignments = await db.list_assignments(
- teacher_id=teacher["user_id"],
- class_id=class_id
- )
- except Exception as e:
- logger.error(f"Failed to list assignments: {e}")
-
- if not assignments:
- assignments = [
- a for a in _assignments_store.values()
- if a["class_id"] == class_id and a["teacher_id"] == teacher["user_id"]
- ]
-
- total_units = len(assignments)
- completed_units = sum(1 for a in assignments if a.get("status") == "completed")
- active_units = sum(1 for a in assignments if a.get("status") == "active")
-
- # Aggregate student performance
- students = await get_students_in_class(class_id)
- student_scores = {}
- misconceptions = []
-
- if db:
- try:
- for student in students:
- student_id = student.get("id", student.get("student_id"))
- analytics = await db.get_student_analytics(student_id)
- if analytics:
- student_scores[student_id] = {
- "name": student.get("name", student_id[:8]),
- "avg_score": analytics.get("avg_postcheck_score", 0),
- "total_time": analytics.get("total_time_minutes", 0),
- }
-
- # Get common misconceptions
- misconceptions_data = await db.get_class_misconceptions(class_id)
- for m in misconceptions_data:
- misconceptions.append(MisconceptionReport(
- concept_id=m["concept_id"],
- concept_label=m["concept_label"],
- misconception=m["misconception"],
- affected_students=m["affected_students"],
- frequency=m["frequency"],
- unit_id=m["unit_id"],
- stop_id=m["stop_id"],
- ))
- except Exception as e:
- logger.error(f"Failed to aggregate analytics: {e}")
-
- # Identify top and struggling students
- sorted_students = sorted(
- student_scores.items(),
- key=lambda x: x[1]["avg_score"],
- reverse=True
- )
- top_performers = [s[1]["name"] for s in sorted_students[:3]]
- struggling_students = [s[1]["name"] for s in sorted_students[-3:] if s[1]["avg_score"] < 0.6]
-
- total_time = sum(s["total_time"] for s in student_scores.values())
- avg_scores = [s["avg_score"] for s in student_scores.values() if s["avg_score"] > 0]
- avg_completion = sum(avg_scores) / len(avg_scores) if avg_scores else 0
-
- return ClassAnalyticsSummary(
- class_id=class_id,
- class_name=f"Klasse {class_id[:8]}",
- total_units_assigned=total_units,
- units_completed=completed_units,
- active_units=active_units,
- avg_completion_rate=avg_completion,
- avg_learning_gain=None, # Would calculate from pre/post scores
- total_time_hours=total_time / 60,
- top_performers=top_performers,
- struggling_students=struggling_students,
- common_misconceptions=misconceptions[:5],
- )
-
-
-@router.get("/students/{student_id}/progress")
-async def get_student_progress(
- student_id: str,
- teacher: Dict[str, Any] = Depends(get_current_teacher)
-) -> Dict[str, Any]:
- """
- Get detailed progress for a specific student.
-
- Shows all units attempted and their performance.
- """
- db = await get_teacher_database()
-
- if db:
- try:
- progress = await db.get_student_full_progress(student_id)
- return progress
- except Exception as e:
- logger.error(f"Failed to get student progress: {e}")
-
- return {
- "student_id": student_id,
- "units_attempted": 0,
- "units_completed": 0,
- "avg_score": 0.0,
- "total_time_minutes": 0,
- "sessions": [],
- }
-
-
-# ==============================================
-# API Endpoints - Content Resources
-# ==============================================
-
-@router.get("/assignments/{assignment_id}/resources", response_model=List[ContentResource])
-async def get_assignment_resources(
- assignment_id: str,
- teacher: Dict[str, Any] = Depends(get_current_teacher),
- request: Request = None
-) -> List[ContentResource]:
- """
- Get generated content resources for an assignment.
-
- Returns links to H5P activities and PDF worksheets.
- """
- db = await get_teacher_database()
- assignment = None
-
- if db:
- try:
- assignment = await db.get_assignment(assignment_id)
- except Exception as e:
- logger.error(f"Failed to get assignment: {e}")
-
- if not assignment and assignment_id in _assignments_store:
- assignment = _assignments_store[assignment_id]
-
- if not assignment or assignment["teacher_id"] != teacher["user_id"]:
- raise HTTPException(status_code=404, detail="Assignment not found")
-
- unit_id = assignment["unit_id"]
- base_url = str(request.base_url).rstrip("/") if request else "http://localhost:8000"
-
- resources = [
- ContentResource(
- resource_type="h5p",
- title=f"{unit_id} - H5P Aktivitaeten",
- url=f"{base_url}/api/units/content/{unit_id}/h5p",
- generated_at=datetime.utcnow(),
- unit_id=unit_id,
- ),
- ContentResource(
- resource_type="worksheet",
- title=f"{unit_id} - Arbeitsblatt (HTML)",
- url=f"{base_url}/api/units/content/{unit_id}/worksheet",
- generated_at=datetime.utcnow(),
- unit_id=unit_id,
- ),
- ContentResource(
- resource_type="pdf",
- title=f"{unit_id} - Arbeitsblatt (PDF)",
- url=f"{base_url}/api/units/content/{unit_id}/worksheet.pdf",
- generated_at=datetime.utcnow(),
- unit_id=unit_id,
- ),
- ]
-
- return resources
-
-
-@router.post("/assignments/{assignment_id}/regenerate-content")
-async def regenerate_content(
- assignment_id: str,
- resource_type: str = Query("all", description="h5p, pdf, or all"),
- teacher: Dict[str, Any] = Depends(get_current_teacher)
-) -> Dict[str, Any]:
- """
- Trigger regeneration of content resources.
-
- Useful after updating unit definitions.
- """
- db = await get_teacher_database()
- assignment = None
-
- if db:
- try:
- assignment = await db.get_assignment(assignment_id)
- except Exception as e:
- logger.error(f"Failed to get assignment: {e}")
-
- if not assignment and assignment_id in _assignments_store:
- assignment = _assignments_store[assignment_id]
-
- if not assignment or assignment["teacher_id"] != teacher["user_id"]:
- raise HTTPException(status_code=404, detail="Assignment not found")
-
- # In production, this would trigger async job to regenerate content
- logger.info(f"Content regeneration triggered for {assignment['unit_id']}: {resource_type}")
-
- return {
- "status": "queued",
- "assignment_id": assignment_id,
- "unit_id": assignment["unit_id"],
- "resource_type": resource_type,
- "message": "Content regeneration has been queued",
- }
-
-
# ==============================================
# API Endpoints - Available Units
# ==============================================
@@ -829,51 +247,30 @@ async def list_available_units(
locale: str = Query("de-DE", description="Locale"),
teacher: Dict[str, Any] = Depends(get_current_teacher)
) -> List[Dict[str, Any]]:
- """
- List all available units for assignment.
-
- Teachers see all published units matching their criteria.
- """
+ """List all available units for assignment."""
db = await get_teacher_database()
-
if db:
try:
- units = await db.list_available_units(
- grade=grade,
- template=template,
- locale=locale
- )
+ units = await db.list_available_units(grade=grade, template=template, locale=locale)
return units
except Exception as e:
logger.error(f"Failed to list units: {e}")
-
- # Fallback: return demo units
return [
{
- "unit_id": "bio_eye_lightpath_v1",
- "title": "Auge - Lichtstrahl-Flug",
- "template": "flight_path",
- "grade_band": ["5", "6", "7"],
- "duration_minutes": 8,
- "difficulty": "base",
+ "unit_id": "bio_eye_lightpath_v1", "title": "Auge - Lichtstrahl-Flug",
+ "template": "flight_path", "grade_band": ["5", "6", "7"],
+ "duration_minutes": 8, "difficulty": "base",
"description": "Reise durch das Auge und folge dem Lichtstrahl",
- "learning_objectives": [
- "Verstehen des Lichtwegs durch das Auge",
- "Funktionen der Augenbestandteile benennen",
- ],
+ "learning_objectives": ["Verstehen des Lichtwegs durch das Auge",
+ "Funktionen der Augenbestandteile benennen"],
},
{
"unit_id": "math_pizza_equivalence_v1",
"title": "Pizza-Boxenstopp - Brueche und Prozent",
- "template": "station_loop",
- "grade_band": ["5", "6"],
- "duration_minutes": 10,
- "difficulty": "base",
+ "template": "station_loop", "grade_band": ["5", "6"],
+ "duration_minutes": 10, "difficulty": "base",
"description": "Entdecke die Verbindung zwischen Bruechen, Dezimalzahlen und Prozent",
- "learning_objectives": [
- "Brueche in Prozent umrechnen",
- "Aequivalenzen erkennen",
- ],
+ "learning_objectives": ["Brueche in Prozent umrechnen", "Aequivalenzen erkennen"],
},
]
@@ -886,54 +283,38 @@ async def list_available_units(
async def get_dashboard(
teacher: Dict[str, Any] = Depends(get_current_teacher)
) -> Dict[str, Any]:
- """
- Get teacher dashboard overview.
-
- Summary of all classes, active assignments, and alerts.
- """
+ """Get teacher dashboard overview."""
db = await get_teacher_database()
-
- # Get teacher's classes
classes = await get_classes_for_teacher(teacher["user_id"])
- # Get all active assignments
active_assignments = []
if db:
try:
active_assignments = await db.list_assignments(
- teacher_id=teacher["user_id"],
- status="active"
+ teacher_id=teacher["user_id"], status="active"
)
except Exception as e:
logger.error(f"Failed to list assignments: {e}")
-
if not active_assignments:
active_assignments = [
a for a in _assignments_store.values()
if a["teacher_id"] == teacher["user_id"] and a.get("status") == "active"
]
- # Calculate alerts (students falling behind, due dates, etc.)
alerts = []
for assignment in active_assignments:
if assignment.get("due_date") and assignment["due_date"] < datetime.utcnow() + timedelta(days=2):
alerts.append({
- "type": "due_soon",
- "assignment_id": assignment["assignment_id"],
- "message": f"Zuweisung endet in weniger als 2 Tagen",
+ "type": "due_soon", "assignment_id": assignment["assignment_id"],
+ "message": "Zuweisung endet in weniger als 2 Tagen",
})
return {
- "teacher": {
- "id": teacher["user_id"],
- "name": teacher.get("name", "Lehrer"),
- "email": teacher.get("email"),
- },
- "classes": len(classes),
- "active_assignments": len(active_assignments),
+ "teacher": {"id": teacher["user_id"], "name": teacher.get("name", "Lehrer"),
+ "email": teacher.get("email")},
+ "classes": len(classes), "active_assignments": len(active_assignments),
"total_students": sum(c.get("student_count", 0) for c in classes),
- "alerts": alerts,
- "recent_activity": [], # Would load recent session completions
+ "alerts": alerts, "recent_activity": [],
}
@@ -942,10 +323,7 @@ async def health_check() -> Dict[str, Any]:
"""Health check for teacher dashboard API."""
db = await get_teacher_database()
db_status = "connected" if db else "in-memory"
-
return {
- "status": "healthy",
- "service": "teacher-dashboard",
- "database": db_status,
- "auth_required": REQUIRE_AUTH,
+ "status": "healthy", "service": "teacher-dashboard",
+ "database": db_status, "auth_required": REQUIRE_AUTH,
}
diff --git a/backend-lehrer/teacher_dashboard_models.py b/backend-lehrer/teacher_dashboard_models.py
new file mode 100644
index 0000000..88e6d9c
--- /dev/null
+++ b/backend-lehrer/teacher_dashboard_models.py
@@ -0,0 +1,226 @@
+"""
+Teacher Dashboard - Pydantic Models, Auth Dependency, and Service Helpers.
+"""
+
+import os
+import logging
+from datetime import datetime
+from typing import List, Optional, Dict, Any
+from enum import Enum
+
+from fastapi import HTTPException, Request
+from pydantic import BaseModel
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# Feature flags
+USE_DATABASE = os.getenv("GAME_USE_DATABASE", "true").lower() == "true"
+REQUIRE_AUTH = os.getenv("TEACHER_REQUIRE_AUTH", "true").lower() == "true"
+SCHOOL_SERVICE_URL = os.getenv("SCHOOL_SERVICE_URL", "http://school-service:8084")
+
+
+# ==============================================
+# Pydantic Models
+# ==============================================
+
+class UnitAssignmentStatus(str, Enum):
+ """Status of a unit assignment"""
+ DRAFT = "draft"
+ ACTIVE = "active"
+ COMPLETED = "completed"
+ ARCHIVED = "archived"
+
+
+class TeacherControlSettings(BaseModel):
+ """Unit settings that teachers can configure"""
+ allow_skip: bool = True
+ allow_replay: bool = True
+ max_time_per_stop_sec: int = 90
+ show_hints: bool = True
+ require_precheck: bool = True
+ require_postcheck: bool = True
+
+
+class AssignUnitRequest(BaseModel):
+ """Request to assign a unit to a class"""
+ unit_id: str
+ class_id: str
+ due_date: Optional[datetime] = None
+ settings: Optional[TeacherControlSettings] = None
+ notes: Optional[str] = None
+
+
+class UnitAssignment(BaseModel):
+ """Unit assignment record"""
+ assignment_id: str
+ unit_id: str
+ class_id: str
+ teacher_id: str
+ status: UnitAssignmentStatus
+ settings: TeacherControlSettings
+ due_date: Optional[datetime] = None
+ notes: Optional[str] = None
+ created_at: datetime
+ updated_at: datetime
+
+
+class StudentUnitProgress(BaseModel):
+ """Progress of a single student on a unit"""
+ student_id: str
+ student_name: str
+ session_id: Optional[str] = None
+ status: str # "not_started", "in_progress", "completed"
+ completion_rate: float = 0.0
+ precheck_score: Optional[float] = None
+ postcheck_score: Optional[float] = None
+ learning_gain: Optional[float] = None
+ time_spent_minutes: int = 0
+ last_activity: Optional[datetime] = None
+ current_stop: Optional[str] = None
+ stops_completed: int = 0
+ total_stops: int = 0
+
+
+class ClassUnitProgress(BaseModel):
+ """Overall progress of a class on a unit"""
+ assignment_id: str
+ unit_id: str
+ unit_title: str
+ class_id: str
+ class_name: str
+ total_students: int
+ started_count: int
+ completed_count: int
+ avg_completion_rate: float
+ avg_precheck_score: Optional[float] = None
+ avg_postcheck_score: Optional[float] = None
+ avg_learning_gain: Optional[float] = None
+ avg_time_minutes: float
+ students: List[StudentUnitProgress]
+
+
+class MisconceptionReport(BaseModel):
+ """Report of detected misconceptions"""
+ concept_id: str
+ concept_label: str
+ misconception: str
+ affected_students: List[str]
+ frequency: int
+ unit_id: str
+ stop_id: str
+
+
+class ClassAnalyticsSummary(BaseModel):
+ """Summary analytics for a class"""
+ class_id: str
+ class_name: str
+ total_units_assigned: int
+ units_completed: int
+ active_units: int
+ avg_completion_rate: float
+ avg_learning_gain: Optional[float]
+ total_time_hours: float
+ top_performers: List[str]
+ struggling_students: List[str]
+ common_misconceptions: List[MisconceptionReport]
+
+
+class ContentResource(BaseModel):
+ """Generated content resource"""
+ resource_type: str # "h5p", "pdf", "worksheet"
+ title: str
+ url: str
+ generated_at: datetime
+ unit_id: str
+
+
+# ==============================================
+# Auth Dependency
+# ==============================================
+
+async def get_current_teacher(request: Request) -> Dict[str, Any]:
+ """Get current teacher from JWT token."""
+ if not REQUIRE_AUTH:
+ return {
+ "user_id": "e9484ad9-32ee-4f2b-a4e1-d182e02ccf20",
+ "email": "demo@breakpilot.app",
+ "role": "teacher",
+ "name": "Demo Lehrer"
+ }
+
+ auth_header = request.headers.get("Authorization", "")
+ if not auth_header.startswith("Bearer "):
+ raise HTTPException(status_code=401, detail="Missing authorization token")
+
+ try:
+ import jwt
+ token = auth_header[7:]
+ secret = os.getenv("JWT_SECRET", "dev-secret-key")
+ payload = jwt.decode(token, secret, algorithms=["HS256"])
+
+ if payload.get("role") not in ["teacher", "admin"]:
+ raise HTTPException(status_code=403, detail="Teacher or admin role required")
+
+ return payload
+ except jwt.ExpiredSignatureError:
+ raise HTTPException(status_code=401, detail="Token expired")
+ except jwt.InvalidTokenError:
+ raise HTTPException(status_code=401, detail="Invalid token")
+
+
+# ==============================================
+# Database Integration
+# ==============================================
+
+_teacher_db = None
+
+
+async def get_teacher_database():
+ """Get teacher database instance with lazy initialization."""
+ global _teacher_db
+ if not USE_DATABASE:
+ return None
+ if _teacher_db is None:
+ try:
+ from unit.database import get_teacher_db
+ _teacher_db = await get_teacher_db()
+ logger.info("Teacher database initialized")
+ except ImportError:
+ logger.warning("Teacher database module not available")
+ except Exception as e:
+ logger.warning(f"Teacher database not available: {e}")
+ return _teacher_db
+
+
+# ==============================================
+# School Service Integration
+# ==============================================
+
+async def get_classes_for_teacher(teacher_id: str) -> List[Dict[str, Any]]:
+ """Get classes assigned to a teacher from school service."""
+ async with httpx.AsyncClient(timeout=10.0) as client:
+ try:
+ response = await client.get(
+ f"{SCHOOL_SERVICE_URL}/api/v1/school/classes",
+ headers={"X-Teacher-ID": teacher_id}
+ )
+ if response.status_code == 200:
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to get classes from school service: {e}")
+ return []
+
+
+async def get_students_in_class(class_id: str) -> List[Dict[str, Any]]:
+ """Get students in a class from school service."""
+ async with httpx.AsyncClient(timeout=10.0) as client:
+ try:
+ response = await client.get(
+ f"{SCHOOL_SERVICE_URL}/api/v1/school/classes/{class_id}/students"
+ )
+ if response.status_code == 200:
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to get students from school service: {e}")
+ return []
diff --git a/klausur-service/backend/legal_templates_chunking.py b/klausur-service/backend/legal_templates_chunking.py
new file mode 100644
index 0000000..724b0da
--- /dev/null
+++ b/klausur-service/backend/legal_templates_chunking.py
@@ -0,0 +1,282 @@
+"""
+Legal Templates Chunking — text splitting, type inference, and chunk creation.
+
+Extracted from legal_templates_ingestion.py to keep files under 500 LOC.
+
+Lizenz: Apache 2.0
+"""
+
+import re
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import List, Optional
+
+from template_sources import SourceConfig
+from github_crawler import ExtractedDocument
+
+
+# Chunking configuration defaults (can be overridden by env vars in ingestion module)
+DEFAULT_CHUNK_SIZE = 1000
+DEFAULT_CHUNK_OVERLAP = 200
+
+
+@dataclass
+class TemplateChunk:
+ """A chunk of template text ready for indexing."""
+ text: str
+ chunk_index: int
+ document_title: str
+ template_type: str
+ clause_category: Optional[str]
+ language: str
+ jurisdiction: str
+ license_id: str
+ license_name: str
+ license_url: str
+ attribution_required: bool
+ share_alike: bool
+ no_derivatives: bool
+ commercial_use: bool
+ source_name: str
+ source_url: str
+ source_repo: Optional[str]
+ source_commit: Optional[str]
+ source_file: str
+ source_hash: str
+ attribution_text: Optional[str]
+ copyright_notice: Optional[str]
+ is_complete_document: bool
+ is_modular: bool
+ requires_customization: bool
+ placeholders: List[str]
+ training_allowed: bool
+ output_allowed: bool
+ modification_allowed: bool
+ distortion_prohibited: bool
+
+
+@dataclass
+class IngestionStatus:
+ """Status of a source ingestion."""
+ source_name: str
+ status: str # "pending", "running", "completed", "failed"
+ documents_found: int = 0
+ chunks_created: int = 0
+ chunks_indexed: int = 0
+ errors: List[str] = field(default_factory=list)
+ started_at: Optional[datetime] = None
+ completed_at: Optional[datetime] = None
+
+
+def split_sentences(text: str) -> List[str]:
+ """Split text into sentences with basic abbreviation handling."""
+ # Protect common abbreviations
+ abbreviations = ['bzw', 'ca', 'd.h', 'etc', 'ggf', 'inkl', 'u.a', 'usw', 'z.B', 'z.b', 'e.g', 'i.e', 'vs', 'no']
+ protected = text
+ for abbr in abbreviations:
+ pattern = re.compile(r'\b' + re.escape(abbr) + r'\.', re.IGNORECASE)
+ protected = pattern.sub(abbr.replace('.', '') + '', protected)
+
+ # Protect decimal numbers
+ protected = re.sub(r'(\d)\.(\d)', r'\1\2', protected)
+
+ # Split on sentence endings
+ sentences = re.split(r'(?<=[.!?])\s+', protected)
+
+ # Restore protected characters
+ result = []
+ for s in sentences:
+ s = s.replace('', '.').replace('', '.').replace('', '.')
+ s = s.strip()
+ if s:
+ result.append(s)
+
+ return result
+
+
+def chunk_text(
+ text: str,
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
+ overlap: int = DEFAULT_CHUNK_OVERLAP,
+) -> List[str]:
+ """
+ Split text into overlapping chunks.
+ Respects paragraph and sentence boundaries where possible.
+ """
+ if not text:
+ return []
+
+ if len(text) <= chunk_size:
+ return [text.strip()]
+
+ # Split into paragraphs first
+ paragraphs = text.split('\n\n')
+ chunks = []
+ current_chunk: List[str] = []
+ current_length = 0
+
+ for para in paragraphs:
+ para = para.strip()
+ if not para:
+ continue
+
+ para_length = len(para)
+
+ if para_length > chunk_size:
+ # Large paragraph: split by sentences
+ if current_chunk:
+ chunks.append('\n\n'.join(current_chunk))
+ current_chunk = []
+ current_length = 0
+
+ # Split long paragraph by sentences
+ sentences = split_sentences(para)
+ for sentence in sentences:
+ if current_length + len(sentence) + 1 > chunk_size:
+ if current_chunk:
+ chunks.append(' '.join(current_chunk))
+ # Keep overlap
+ overlap_count = max(1, len(current_chunk) // 3)
+ current_chunk = current_chunk[-overlap_count:]
+ current_length = sum(len(s) + 1 for s in current_chunk)
+ current_chunk.append(sentence)
+ current_length += len(sentence) + 1
+
+ elif current_length + para_length + 2 > chunk_size:
+ # Paragraph would exceed chunk size
+ if current_chunk:
+ chunks.append('\n\n'.join(current_chunk))
+ current_chunk = []
+ current_length = 0
+ current_chunk.append(para)
+ current_length = para_length
+
+ else:
+ current_chunk.append(para)
+ current_length += para_length + 2
+
+ # Add final chunk
+ if current_chunk:
+ chunks.append('\n\n'.join(current_chunk))
+
+ return [c.strip() for c in chunks if c.strip()]
+
+
+def infer_template_type(doc: ExtractedDocument, source: SourceConfig) -> str:
+ """Infer the template type from document content and metadata."""
+ text_lower = doc.text.lower()
+ title_lower = doc.title.lower()
+
+ # Check known indicators
+ type_indicators = {
+ "privacy_policy": ["datenschutz", "privacy", "personal data", "personenbezogen"],
+ "terms_of_service": ["nutzungsbedingungen", "terms of service", "terms of use", "agb"],
+ "cookie_banner": ["cookie", "cookies", "tracking"],
+ "impressum": ["impressum", "legal notice", "imprint"],
+ "widerruf": ["widerruf", "cancellation", "withdrawal", "right to cancel"],
+ "dpa": ["auftragsverarbeitung", "data processing agreement", "dpa"],
+ "sla": ["service level", "availability", "uptime"],
+ "nda": ["confidential", "non-disclosure", "geheimhaltung", "vertraulich"],
+ "community_guidelines": ["community", "guidelines", "conduct", "verhaltens"],
+ "acceptable_use": ["acceptable use", "acceptable usage", "nutzungsrichtlinien"],
+ }
+
+ for template_type, indicators in type_indicators.items():
+ for indicator in indicators:
+ if indicator in text_lower or indicator in title_lower:
+ return template_type
+
+ # Fall back to source's first template type
+ if source.template_types:
+ return source.template_types[0]
+
+ return "clause" # Generic fallback
+
+
+def infer_clause_category(text: str) -> Optional[str]:
+ """Infer the clause category from text content."""
+ text_lower = text.lower()
+
+ categories = {
+ "haftung": ["haftung", "liability", "haftungsausschluss", "limitation"],
+ "datenschutz": ["datenschutz", "privacy", "personal data", "personenbezogen"],
+ "widerruf": ["widerruf", "cancellation", "withdrawal"],
+ "gewaehrleistung": ["gewaehrleistung", "warranty", "garantie"],
+ "kuendigung": ["kuendigung", "termination", "beendigung"],
+ "zahlung": ["zahlung", "payment", "preis", "price"],
+ "gerichtsstand": ["gerichtsstand", "jurisdiction", "governing law"],
+ "aenderungen": ["aenderung", "modification", "amendment"],
+ "schlussbestimmungen": ["schlussbestimmung", "miscellaneous", "final provisions"],
+ }
+
+ for category, indicators in categories.items():
+ for indicator in indicators:
+ if indicator in text_lower:
+ return category
+
+ return None
+
+
+def create_chunks(
+ doc: ExtractedDocument,
+ source: SourceConfig,
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
+ chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
+) -> List[TemplateChunk]:
+ """Create template chunks from an extracted document."""
+ license_info = source.license_info
+ template_type = infer_template_type(doc, source)
+
+ # Chunk the text
+ text_chunks = chunk_text(doc.text, chunk_size, chunk_overlap)
+
+ chunks = []
+ for i, chunk_text_str in enumerate(text_chunks):
+ # Determine if this is a complete document or a clause
+ is_complete = len(text_chunks) == 1 and len(chunk_text_str) > 500
+ is_modular = len(doc.sections) > 0 or '##' in doc.text
+ requires_customization = len(doc.placeholders) > 0
+
+ # Generate attribution text
+ attribution_text = None
+ if license_info.attribution_required:
+ attribution_text = license_info.get_attribution_text(
+ source.name,
+ doc.source_url or source.get_source_url()
+ )
+
+ chunk = TemplateChunk(
+ text=chunk_text_str,
+ chunk_index=i,
+ document_title=doc.title,
+ template_type=template_type,
+ clause_category=infer_clause_category(chunk_text_str),
+ language=doc.language,
+ jurisdiction=source.jurisdiction,
+ license_id=license_info.id.value,
+ license_name=license_info.name,
+ license_url=license_info.url,
+ attribution_required=license_info.attribution_required,
+ share_alike=license_info.share_alike,
+ no_derivatives=license_info.no_derivatives,
+ commercial_use=license_info.commercial_use,
+ source_name=source.name,
+ source_url=doc.source_url or source.get_source_url(),
+ source_repo=source.repo_url,
+ source_commit=doc.source_commit,
+ source_file=doc.file_path,
+ source_hash=doc.source_hash,
+ attribution_text=attribution_text,
+ copyright_notice=None,
+ is_complete_document=is_complete,
+ is_modular=is_modular,
+ requires_customization=requires_customization,
+ placeholders=doc.placeholders,
+ training_allowed=license_info.training_allowed,
+ output_allowed=license_info.output_allowed,
+ modification_allowed=license_info.modification_allowed,
+ distortion_prohibited=license_info.distortion_prohibited,
+ )
+ chunks.append(chunk)
+
+ return chunks
diff --git a/klausur-service/backend/legal_templates_cli.py b/klausur-service/backend/legal_templates_cli.py
new file mode 100644
index 0000000..f30b546
--- /dev/null
+++ b/klausur-service/backend/legal_templates_cli.py
@@ -0,0 +1,165 @@
+"""
+Legal Templates CLI — command-line entry point for ingestion and search.
+
+Extracted from legal_templates_ingestion.py to keep files under 500 LOC.
+
+Usage:
+ python legal_templates_cli.py --ingest-all
+ python legal_templates_cli.py --ingest-source github-site-policy
+ python legal_templates_cli.py --status
+ python legal_templates_cli.py --search "Datenschutzerklaerung"
+
+Lizenz: Apache 2.0
+"""
+
+import asyncio
+import json
+
+from template_sources import TEMPLATE_SOURCES, LicenseType
+from legal_templates_ingestion import LegalTemplatesIngestion
+
+
+async def main():
+ """CLI entry point."""
+ import argparse
+
+ parser = argparse.ArgumentParser(description="Legal Templates Ingestion")
+ parser.add_argument(
+ "--ingest-all",
+ action="store_true",
+ help="Ingest all enabled sources"
+ )
+ parser.add_argument(
+ "--ingest-source",
+ type=str,
+ metavar="NAME",
+ help="Ingest a specific source by name"
+ )
+ parser.add_argument(
+ "--ingest-license",
+ type=str,
+ choices=["cc0", "mit", "cc_by_4", "public_domain"],
+ help="Ingest all sources of a specific license type"
+ )
+ parser.add_argument(
+ "--max-priority",
+ type=int,
+ default=3,
+ help="Maximum priority level to ingest (1=highest, 5=lowest)"
+ )
+ parser.add_argument(
+ "--status",
+ action="store_true",
+ help="Show collection status"
+ )
+ parser.add_argument(
+ "--search",
+ type=str,
+ metavar="QUERY",
+ help="Test search query"
+ )
+ parser.add_argument(
+ "--template-type",
+ type=str,
+ help="Filter search by template type"
+ )
+ parser.add_argument(
+ "--language",
+ type=str,
+ help="Filter search by language"
+ )
+ parser.add_argument(
+ "--reset",
+ action="store_true",
+ help="Reset (delete and recreate) the collection"
+ )
+ parser.add_argument(
+ "--delete-source",
+ type=str,
+ metavar="NAME",
+ help="Delete all chunks from a source"
+ )
+
+ args = parser.parse_args()
+
+ ingestion = LegalTemplatesIngestion()
+
+ try:
+ if args.reset:
+ ingestion.reset_collection()
+ print("Collection reset successfully")
+
+ elif args.delete_source:
+ count = ingestion.delete_source(args.delete_source)
+ print(f"Deleted {count} chunks from {args.delete_source}")
+
+ elif args.status:
+ status = ingestion.get_status()
+ print(json.dumps(status, indent=2, default=str))
+
+ elif args.ingest_all:
+ print(f"Ingesting all sources (max priority: {args.max_priority})...")
+ results = await ingestion.ingest_all(max_priority=args.max_priority)
+ print("\nResults:")
+ for name, status in results.items():
+ print(f" {name}: {status.chunks_indexed} chunks ({status.status})")
+ if status.errors:
+ for error in status.errors:
+ print(f" ERROR: {error}")
+ total = sum(s.chunks_indexed for s in results.values())
+ print(f"\nTotal: {total} chunks indexed")
+
+ elif args.ingest_source:
+ source = next(
+ (s for s in TEMPLATE_SOURCES if s.name == args.ingest_source),
+ None
+ )
+ if not source:
+ print(f"Unknown source: {args.ingest_source}")
+ print("Available sources:")
+ for s in TEMPLATE_SOURCES:
+ print(f" - {s.name}")
+ return
+
+ print(f"Ingesting: {source.name}")
+ status = await ingestion.ingest_source(source)
+ print(f"\nResult: {status.chunks_indexed} chunks ({status.status})")
+ if status.errors:
+ for error in status.errors:
+ print(f" ERROR: {error}")
+
+ elif args.ingest_license:
+ license_type = LicenseType(args.ingest_license)
+ print(f"Ingesting all {license_type.value} sources...")
+ results = await ingestion.ingest_by_license(license_type)
+ print("\nResults:")
+ for name, status in results.items():
+ print(f" {name}: {status.chunks_indexed} chunks ({status.status})")
+
+ elif args.search:
+ print(f"Searching: {args.search}")
+ results = await ingestion.search(
+ args.search,
+ template_type=args.template_type,
+ language=args.language,
+ )
+ print(f"\nFound {len(results)} results:")
+ for i, result in enumerate(results, 1):
+ print(f"\n{i}. [{result['template_type']}] {result['document_title']}")
+ print(f" Score: {result['score']:.3f}")
+ print(f" License: {result['license_name']}")
+ print(f" Source: {result['source_name']}")
+ print(f" Language: {result['language']}")
+ if result['attribution_required']:
+ print(f" Attribution: {result['attribution_text']}")
+ print(f" Text: {result['text'][:200]}...")
+
+ else:
+ parser.print_help()
+
+ finally:
+ await ingestion.close()
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/klausur-service/backend/legal_templates_ingestion.py b/klausur-service/backend/legal_templates_ingestion.py
index 16580cd..c1c79fa 100644
--- a/klausur-service/backend/legal_templates_ingestion.py
+++ b/klausur-service/backend/legal_templates_ingestion.py
@@ -8,18 +8,16 @@ proper attribution tracking.
Collection: bp_legal_templates
Usage:
- python legal_templates_ingestion.py --ingest-all
- python legal_templates_ingestion.py --ingest-source github-site-policy
- python legal_templates_ingestion.py --status
- python legal_templates_ingestion.py --search "Datenschutzerklaerung"
+ python legal_templates_cli.py --ingest-all
+ python legal_templates_cli.py --ingest-source github-site-policy
+ python legal_templates_cli.py --status
+ python legal_templates_cli.py --search "Datenschutzerklaerung"
"""
import asyncio
import hashlib
-import json
import logging
import os
-from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse
@@ -50,6 +48,17 @@ from github_crawler import (
RepositoryDownloader,
)
+# Re-export from chunking module for backward compatibility
+from legal_templates_chunking import ( # noqa: F401
+ IngestionStatus,
+ TemplateChunk,
+ chunk_text,
+ create_chunks,
+ infer_clause_category,
+ infer_template_type,
+ split_sentences,
+)
+
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@@ -78,54 +87,6 @@ MAX_RETRIES = 3
RETRY_DELAY = 3.0
-@dataclass
-class IngestionStatus:
- """Status of a source ingestion."""
- source_name: str
- status: str # "pending", "running", "completed", "failed"
- documents_found: int = 0
- chunks_created: int = 0
- chunks_indexed: int = 0
- errors: List[str] = field(default_factory=list)
- started_at: Optional[datetime] = None
- completed_at: Optional[datetime] = None
-
-
-@dataclass
-class TemplateChunk:
- """A chunk of template text ready for indexing."""
- text: str
- chunk_index: int
- document_title: str
- template_type: str
- clause_category: Optional[str]
- language: str
- jurisdiction: str
- license_id: str
- license_name: str
- license_url: str
- attribution_required: bool
- share_alike: bool
- no_derivatives: bool
- commercial_use: bool
- source_name: str
- source_url: str
- source_repo: Optional[str]
- source_commit: Optional[str]
- source_file: str
- source_hash: str
- attribution_text: Optional[str]
- copyright_notice: Optional[str]
- is_complete_document: bool
- is_modular: bool
- requires_customization: bool
- placeholders: List[str]
- training_allowed: bool
- output_allowed: bool
- modification_allowed: bool
- distortion_prohibited: bool
-
-
class LegalTemplatesIngestion:
"""Handles ingestion of legal templates into Qdrant."""
@@ -168,212 +129,6 @@ class LegalTemplatesIngestion:
logger.error(f"Embedding generation failed: {e}")
raise
- def _chunk_text(self, text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[str]:
- """
- Split text into overlapping chunks.
- Respects paragraph and sentence boundaries where possible.
- """
- if not text:
- return []
-
- if len(text) <= chunk_size:
- return [text.strip()]
-
- # Split into paragraphs first
- paragraphs = text.split('\n\n')
- chunks = []
- current_chunk = []
- current_length = 0
-
- for para in paragraphs:
- para = para.strip()
- if not para:
- continue
-
- para_length = len(para)
-
- if para_length > chunk_size:
- # Large paragraph: split by sentences
- if current_chunk:
- chunks.append('\n\n'.join(current_chunk))
- current_chunk = []
- current_length = 0
-
- # Split long paragraph by sentences
- sentences = self._split_sentences(para)
- for sentence in sentences:
- if current_length + len(sentence) + 1 > chunk_size:
- if current_chunk:
- chunks.append(' '.join(current_chunk))
- # Keep overlap
- overlap_count = max(1, len(current_chunk) // 3)
- current_chunk = current_chunk[-overlap_count:]
- current_length = sum(len(s) + 1 for s in current_chunk)
- current_chunk.append(sentence)
- current_length += len(sentence) + 1
-
- elif current_length + para_length + 2 > chunk_size:
- # Paragraph would exceed chunk size
- if current_chunk:
- chunks.append('\n\n'.join(current_chunk))
- current_chunk = []
- current_length = 0
- current_chunk.append(para)
- current_length = para_length
-
- else:
- current_chunk.append(para)
- current_length += para_length + 2
-
- # Add final chunk
- if current_chunk:
- chunks.append('\n\n'.join(current_chunk))
-
- return [c.strip() for c in chunks if c.strip()]
-
- def _split_sentences(self, text: str) -> List[str]:
- """Split text into sentences with basic abbreviation handling."""
- import re
-
- # Protect common abbreviations
- abbreviations = ['bzw', 'ca', 'd.h', 'etc', 'ggf', 'inkl', 'u.a', 'usw', 'z.B', 'z.b', 'e.g', 'i.e', 'vs', 'no']
- protected = text
- for abbr in abbreviations:
- pattern = re.compile(r'\b' + re.escape(abbr) + r'\.', re.IGNORECASE)
- protected = pattern.sub(abbr.replace('.', '') + '', protected)
-
- # Protect decimal numbers
- protected = re.sub(r'(\d)\.(\d)', r'\1\2', protected)
-
- # Split on sentence endings
- sentences = re.split(r'(?<=[.!?])\s+', protected)
-
- # Restore protected characters
- result = []
- for s in sentences:
- s = s.replace('', '.').replace('', '.').replace('', '.')
- s = s.strip()
- if s:
- result.append(s)
-
- return result
-
- def _infer_template_type(self, doc: ExtractedDocument, source: SourceConfig) -> str:
- """Infer the template type from document content and metadata."""
- text_lower = doc.text.lower()
- title_lower = doc.title.lower()
-
- # Check known indicators
- type_indicators = {
- "privacy_policy": ["datenschutz", "privacy", "personal data", "personenbezogen"],
- "terms_of_service": ["nutzungsbedingungen", "terms of service", "terms of use", "agb"],
- "cookie_banner": ["cookie", "cookies", "tracking"],
- "impressum": ["impressum", "legal notice", "imprint"],
- "widerruf": ["widerruf", "cancellation", "withdrawal", "right to cancel"],
- "dpa": ["auftragsverarbeitung", "data processing agreement", "dpa"],
- "sla": ["service level", "availability", "uptime"],
- "nda": ["confidential", "non-disclosure", "geheimhaltung", "vertraulich"],
- "community_guidelines": ["community", "guidelines", "conduct", "verhaltens"],
- "acceptable_use": ["acceptable use", "acceptable usage", "nutzungsrichtlinien"],
- }
-
- for template_type, indicators in type_indicators.items():
- for indicator in indicators:
- if indicator in text_lower or indicator in title_lower:
- return template_type
-
- # Fall back to source's first template type
- if source.template_types:
- return source.template_types[0]
-
- return "clause" # Generic fallback
-
- def _infer_clause_category(self, text: str) -> Optional[str]:
- """Infer the clause category from text content."""
- text_lower = text.lower()
-
- categories = {
- "haftung": ["haftung", "liability", "haftungsausschluss", "limitation"],
- "datenschutz": ["datenschutz", "privacy", "personal data", "personenbezogen"],
- "widerruf": ["widerruf", "cancellation", "withdrawal"],
- "gewaehrleistung": ["gewaehrleistung", "warranty", "garantie"],
- "kuendigung": ["kuendigung", "termination", "beendigung"],
- "zahlung": ["zahlung", "payment", "preis", "price"],
- "gerichtsstand": ["gerichtsstand", "jurisdiction", "governing law"],
- "aenderungen": ["aenderung", "modification", "amendment"],
- "schlussbestimmungen": ["schlussbestimmung", "miscellaneous", "final provisions"],
- }
-
- for category, indicators in categories.items():
- for indicator in indicators:
- if indicator in text_lower:
- return category
-
- return None
-
- def _create_chunks(
- self,
- doc: ExtractedDocument,
- source: SourceConfig,
- ) -> List[TemplateChunk]:
- """Create template chunks from an extracted document."""
- license_info = source.license_info
- template_type = self._infer_template_type(doc, source)
-
- # Chunk the text
- text_chunks = self._chunk_text(doc.text)
-
- chunks = []
- for i, chunk_text in enumerate(text_chunks):
- # Determine if this is a complete document or a clause
- is_complete = len(text_chunks) == 1 and len(chunk_text) > 500
- is_modular = len(doc.sections) > 0 or '##' in doc.text
- requires_customization = len(doc.placeholders) > 0
-
- # Generate attribution text
- attribution_text = None
- if license_info.attribution_required:
- attribution_text = license_info.get_attribution_text(
- source.name,
- doc.source_url or source.get_source_url()
- )
-
- chunk = TemplateChunk(
- text=chunk_text,
- chunk_index=i,
- document_title=doc.title,
- template_type=template_type,
- clause_category=self._infer_clause_category(chunk_text),
- language=doc.language,
- jurisdiction=source.jurisdiction,
- license_id=license_info.id.value,
- license_name=license_info.name,
- license_url=license_info.url,
- attribution_required=license_info.attribution_required,
- share_alike=license_info.share_alike,
- no_derivatives=license_info.no_derivatives,
- commercial_use=license_info.commercial_use,
- source_name=source.name,
- source_url=doc.source_url or source.get_source_url(),
- source_repo=source.repo_url,
- source_commit=doc.source_commit,
- source_file=doc.file_path,
- source_hash=doc.source_hash,
- attribution_text=attribution_text,
- copyright_notice=None, # Could be extracted from doc if present
- is_complete_document=is_complete,
- is_modular=is_modular,
- requires_customization=requires_customization,
- placeholders=doc.placeholders,
- training_allowed=license_info.training_allowed,
- output_allowed=license_info.output_allowed,
- modification_allowed=license_info.modification_allowed,
- distortion_prohibited=license_info.distortion_prohibited,
- )
- chunks.append(chunk)
-
- return chunks
-
async def ingest_source(self, source: SourceConfig) -> IngestionStatus:
"""Ingest a single source into Qdrant."""
status = IngestionStatus(
@@ -405,7 +160,7 @@ class LegalTemplatesIngestion:
# Create chunks from all documents
all_chunks: List[TemplateChunk] = []
for doc in documents:
- chunks = self._create_chunks(doc, source)
+ chunks = create_chunks(doc, source, CHUNK_SIZE, CHUNK_OVERLAP)
all_chunks.extend(chunks)
status.chunks_created += len(chunks)
@@ -637,21 +392,7 @@ class LegalTemplatesIngestion:
attribution_required: Optional[bool] = None,
top_k: int = 10,
) -> List[Dict[str, Any]]:
- """
- Search the legal templates collection.
-
- Args:
- query: Search query text
- template_type: Filter by template type (e.g., "privacy_policy")
- license_types: Filter by license types (e.g., ["cc0", "mit"])
- language: Filter by language (e.g., "de")
- jurisdiction: Filter by jurisdiction (e.g., "DE")
- attribution_required: Filter by attribution requirement
- top_k: Number of results to return
-
- Returns:
- List of search results with full metadata
- """
+ """Search the legal templates collection."""
# Generate query embedding
embeddings = await self._generate_embeddings([query])
query_vector = embeddings[0]
@@ -661,45 +402,27 @@ class LegalTemplatesIngestion:
if template_type:
must_conditions.append(
- FieldCondition(
- key="template_type",
- match=MatchValue(value=template_type),
- )
+ FieldCondition(key="template_type", match=MatchValue(value=template_type))
)
-
if language:
must_conditions.append(
- FieldCondition(
- key="language",
- match=MatchValue(value=language),
- )
+ FieldCondition(key="language", match=MatchValue(value=language))
)
-
if jurisdiction:
must_conditions.append(
- FieldCondition(
- key="jurisdiction",
- match=MatchValue(value=jurisdiction),
- )
+ FieldCondition(key="jurisdiction", match=MatchValue(value=jurisdiction))
)
-
if attribution_required is not None:
must_conditions.append(
- FieldCondition(
- key="attribution_required",
- match=MatchValue(value=attribution_required),
- )
+ FieldCondition(key="attribution_required", match=MatchValue(value=attribution_required))
)
# License type filter (OR condition)
should_conditions = []
if license_types:
- for license_type in license_types:
+ for lt in license_types:
should_conditions.append(
- FieldCondition(
- key="license_id",
- match=MatchValue(value=license_type),
- )
+ FieldCondition(key="license_id", match=MatchValue(value=lt))
)
# Construct filter
@@ -747,196 +470,31 @@ class LegalTemplatesIngestion:
def delete_source(self, source_name: str) -> int:
"""Delete all chunks from a specific source."""
- # First count how many we're deleting
count_result = self.qdrant.count(
collection_name=LEGAL_TEMPLATES_COLLECTION,
count_filter=Filter(
- must=[
- FieldCondition(
- key="source_name",
- match=MatchValue(value=source_name),
- )
- ]
+ must=[FieldCondition(key="source_name", match=MatchValue(value=source_name))]
),
)
-
- # Delete by filter
self.qdrant.delete(
collection_name=LEGAL_TEMPLATES_COLLECTION,
points_selector=Filter(
- must=[
- FieldCondition(
- key="source_name",
- match=MatchValue(value=source_name),
- )
- ]
+ must=[FieldCondition(key="source_name", match=MatchValue(value=source_name))]
),
)
-
return count_result.count
def reset_collection(self):
"""Delete and recreate the collection."""
logger.warning(f"Resetting collection: {LEGAL_TEMPLATES_COLLECTION}")
-
- # Delete collection
try:
self.qdrant.delete_collection(LEGAL_TEMPLATES_COLLECTION)
except Exception:
- pass # Collection might not exist
-
- # Recreate
+ pass
self._ensure_collection()
self._ingestion_status.clear()
-
logger.info(f"Collection {LEGAL_TEMPLATES_COLLECTION} reset")
async def close(self):
"""Close HTTP client."""
await self.http_client.aclose()
-
-
-async def main():
- """CLI entry point."""
- import argparse
-
- parser = argparse.ArgumentParser(description="Legal Templates Ingestion")
- parser.add_argument(
- "--ingest-all",
- action="store_true",
- help="Ingest all enabled sources"
- )
- parser.add_argument(
- "--ingest-source",
- type=str,
- metavar="NAME",
- help="Ingest a specific source by name"
- )
- parser.add_argument(
- "--ingest-license",
- type=str,
- choices=["cc0", "mit", "cc_by_4", "public_domain"],
- help="Ingest all sources of a specific license type"
- )
- parser.add_argument(
- "--max-priority",
- type=int,
- default=3,
- help="Maximum priority level to ingest (1=highest, 5=lowest)"
- )
- parser.add_argument(
- "--status",
- action="store_true",
- help="Show collection status"
- )
- parser.add_argument(
- "--search",
- type=str,
- metavar="QUERY",
- help="Test search query"
- )
- parser.add_argument(
- "--template-type",
- type=str,
- help="Filter search by template type"
- )
- parser.add_argument(
- "--language",
- type=str,
- help="Filter search by language"
- )
- parser.add_argument(
- "--reset",
- action="store_true",
- help="Reset (delete and recreate) the collection"
- )
- parser.add_argument(
- "--delete-source",
- type=str,
- metavar="NAME",
- help="Delete all chunks from a source"
- )
-
- args = parser.parse_args()
-
- ingestion = LegalTemplatesIngestion()
-
- try:
- if args.reset:
- ingestion.reset_collection()
- print("Collection reset successfully")
-
- elif args.delete_source:
- count = ingestion.delete_source(args.delete_source)
- print(f"Deleted {count} chunks from {args.delete_source}")
-
- elif args.status:
- status = ingestion.get_status()
- print(json.dumps(status, indent=2, default=str))
-
- elif args.ingest_all:
- print(f"Ingesting all sources (max priority: {args.max_priority})...")
- results = await ingestion.ingest_all(max_priority=args.max_priority)
- print("\nResults:")
- for name, status in results.items():
- print(f" {name}: {status.chunks_indexed} chunks ({status.status})")
- if status.errors:
- for error in status.errors:
- print(f" ERROR: {error}")
- total = sum(s.chunks_indexed for s in results.values())
- print(f"\nTotal: {total} chunks indexed")
-
- elif args.ingest_source:
- source = next(
- (s for s in TEMPLATE_SOURCES if s.name == args.ingest_source),
- None
- )
- if not source:
- print(f"Unknown source: {args.ingest_source}")
- print("Available sources:")
- for s in TEMPLATE_SOURCES:
- print(f" - {s.name}")
- return
-
- print(f"Ingesting: {source.name}")
- status = await ingestion.ingest_source(source)
- print(f"\nResult: {status.chunks_indexed} chunks ({status.status})")
- if status.errors:
- for error in status.errors:
- print(f" ERROR: {error}")
-
- elif args.ingest_license:
- license_type = LicenseType(args.ingest_license)
- print(f"Ingesting all {license_type.value} sources...")
- results = await ingestion.ingest_by_license(license_type)
- print("\nResults:")
- for name, status in results.items():
- print(f" {name}: {status.chunks_indexed} chunks ({status.status})")
-
- elif args.search:
- print(f"Searching: {args.search}")
- results = await ingestion.search(
- args.search,
- template_type=args.template_type,
- language=args.language,
- )
- print(f"\nFound {len(results)} results:")
- for i, result in enumerate(results, 1):
- print(f"\n{i}. [{result['template_type']}] {result['document_title']}")
- print(f" Score: {result['score']:.3f}")
- print(f" License: {result['license_name']}")
- print(f" Source: {result['source_name']}")
- print(f" Language: {result['language']}")
- if result['attribution_required']:
- print(f" Attribution: {result['attribution_text']}")
- print(f" Text: {result['text'][:200]}...")
-
- else:
- parser.print_help()
-
- finally:
- await ingestion.close()
-
-
-if __name__ == "__main__":
- asyncio.run(main())
diff --git a/klausur-service/backend/mail/mail_db.py b/klausur-service/backend/mail/mail_db.py
index 36f5c44..8ddcce2 100644
--- a/klausur-service/backend/mail/mail_db.py
+++ b/klausur-service/backend/mail/mail_db.py
@@ -1,987 +1,70 @@
"""
Unified Inbox Mail Database Service
-PostgreSQL database operations for multi-account mail aggregation.
+Barrel re-export -- the actual logic lives in:
+- mail_db_pool.py: Connection pool and schema initialization
+- mail_db_accounts.py: Email account CRUD
+- mail_db_emails.py: Aggregated email operations
+- mail_db_tasks.py: Inbox task operations
+- mail_db_stats.py: Statistics and audit log
"""
-import os
-import json
-import uuid
-from typing import Optional, List, Dict, Any
-from datetime import datetime, timedelta
-
-# Database Configuration - from Vault or environment (test default for CI)
-DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://test:test@localhost:5432/test")
-
-# Flag to check if using test defaults
-_DB_CONFIGURED = DATABASE_URL != "postgresql://test:test@localhost:5432/test"
-
-# Connection pool (shared with metrics_db)
-_pool = None
-
-
-async def get_pool():
- """Get or create database connection pool."""
- global _pool
- if _pool is None:
- try:
- import asyncpg
- _pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
- except ImportError:
- print("Warning: asyncpg not installed. Mail database disabled.")
- return None
- except Exception as e:
- print(f"Warning: Failed to connect to PostgreSQL: {e}")
- return None
- return _pool
-
-
-async def init_mail_tables() -> bool:
- """Initialize mail tables in PostgreSQL."""
- pool = await get_pool()
- if pool is None:
- return False
-
- create_tables_sql = """
- -- =============================================================================
- -- External Email Accounts
- -- =============================================================================
- CREATE TABLE IF NOT EXISTS external_email_accounts (
- id VARCHAR(36) PRIMARY KEY,
- user_id VARCHAR(36) NOT NULL,
- tenant_id VARCHAR(36) NOT NULL,
- email VARCHAR(255) NOT NULL,
- display_name VARCHAR(255),
- account_type VARCHAR(50) DEFAULT 'personal',
-
- -- IMAP Settings (password stored in Vault)
- imap_host VARCHAR(255) NOT NULL,
- imap_port INTEGER DEFAULT 993,
- imap_ssl BOOLEAN DEFAULT TRUE,
-
- -- SMTP Settings
- smtp_host VARCHAR(255) NOT NULL,
- smtp_port INTEGER DEFAULT 465,
- smtp_ssl BOOLEAN DEFAULT TRUE,
-
- -- Vault path for credentials
- vault_path VARCHAR(500),
-
- -- Status tracking
- status VARCHAR(20) DEFAULT 'pending',
- last_sync TIMESTAMP,
- sync_error TEXT,
- email_count INTEGER DEFAULT 0,
- unread_count INTEGER DEFAULT 0,
-
- -- Timestamps
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW(),
-
- -- Constraints
- UNIQUE(user_id, email)
- );
-
- CREATE INDEX IF NOT EXISTS idx_mail_accounts_user ON external_email_accounts(user_id);
- CREATE INDEX IF NOT EXISTS idx_mail_accounts_tenant ON external_email_accounts(tenant_id);
- CREATE INDEX IF NOT EXISTS idx_mail_accounts_status ON external_email_accounts(status);
-
- -- =============================================================================
- -- Aggregated Emails
- -- =============================================================================
- CREATE TABLE IF NOT EXISTS aggregated_emails (
- id VARCHAR(36) PRIMARY KEY,
- account_id VARCHAR(36) REFERENCES external_email_accounts(id) ON DELETE CASCADE,
- user_id VARCHAR(36) NOT NULL,
- tenant_id VARCHAR(36) NOT NULL,
-
- -- Email identification
- message_id VARCHAR(500) NOT NULL,
- folder VARCHAR(100) DEFAULT 'INBOX',
-
- -- Email content
- subject TEXT,
- sender_email VARCHAR(255),
- sender_name VARCHAR(255),
- recipients JSONB DEFAULT '[]',
- cc JSONB DEFAULT '[]',
- body_preview TEXT,
- body_text TEXT,
- body_html TEXT,
- has_attachments BOOLEAN DEFAULT FALSE,
- attachments JSONB DEFAULT '[]',
- headers JSONB DEFAULT '{}',
-
- -- Status flags
- is_read BOOLEAN DEFAULT FALSE,
- is_starred BOOLEAN DEFAULT FALSE,
- is_deleted BOOLEAN DEFAULT FALSE,
-
- -- Dates
- date_sent TIMESTAMP,
- date_received TIMESTAMP,
-
- -- AI enrichment
- category VARCHAR(50),
- sender_type VARCHAR(50),
- sender_authority_name VARCHAR(255),
- detected_deadlines JSONB DEFAULT '[]',
- suggested_priority VARCHAR(20),
- ai_summary TEXT,
- ai_analyzed_at TIMESTAMP,
-
- created_at TIMESTAMP DEFAULT NOW(),
-
- -- Prevent duplicate imports
- UNIQUE(account_id, message_id)
- );
-
- CREATE INDEX IF NOT EXISTS idx_emails_account ON aggregated_emails(account_id);
- CREATE INDEX IF NOT EXISTS idx_emails_user ON aggregated_emails(user_id);
- CREATE INDEX IF NOT EXISTS idx_emails_tenant ON aggregated_emails(tenant_id);
- CREATE INDEX IF NOT EXISTS idx_emails_date ON aggregated_emails(date_received DESC);
- CREATE INDEX IF NOT EXISTS idx_emails_category ON aggregated_emails(category);
- CREATE INDEX IF NOT EXISTS idx_emails_unread ON aggregated_emails(is_read) WHERE is_read = FALSE;
- CREATE INDEX IF NOT EXISTS idx_emails_starred ON aggregated_emails(is_starred) WHERE is_starred = TRUE;
- CREATE INDEX IF NOT EXISTS idx_emails_sender ON aggregated_emails(sender_email);
-
- -- =============================================================================
- -- Inbox Tasks (Arbeitsvorrat)
- -- =============================================================================
- CREATE TABLE IF NOT EXISTS inbox_tasks (
- id VARCHAR(36) PRIMARY KEY,
- user_id VARCHAR(36) NOT NULL,
- tenant_id VARCHAR(36) NOT NULL,
- email_id VARCHAR(36) REFERENCES aggregated_emails(id) ON DELETE SET NULL,
- account_id VARCHAR(36) REFERENCES external_email_accounts(id) ON DELETE SET NULL,
-
- -- Task content
- title VARCHAR(500) NOT NULL,
- description TEXT,
- priority VARCHAR(20) DEFAULT 'medium',
- status VARCHAR(20) DEFAULT 'pending',
- deadline TIMESTAMP,
-
- -- Source information
- source_email_subject TEXT,
- source_sender VARCHAR(255),
- source_sender_type VARCHAR(50),
-
- -- AI extraction info
- ai_extracted BOOLEAN DEFAULT FALSE,
- confidence_score FLOAT,
-
- -- Completion tracking
- completed_at TIMESTAMP,
- reminder_at TIMESTAMP,
-
- -- Timestamps
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW()
- );
-
- CREATE INDEX IF NOT EXISTS idx_tasks_user ON inbox_tasks(user_id);
- CREATE INDEX IF NOT EXISTS idx_tasks_tenant ON inbox_tasks(tenant_id);
- CREATE INDEX IF NOT EXISTS idx_tasks_status ON inbox_tasks(status);
- CREATE INDEX IF NOT EXISTS idx_tasks_deadline ON inbox_tasks(deadline) WHERE deadline IS NOT NULL;
- CREATE INDEX IF NOT EXISTS idx_tasks_priority ON inbox_tasks(priority);
- CREATE INDEX IF NOT EXISTS idx_tasks_email ON inbox_tasks(email_id) WHERE email_id IS NOT NULL;
-
- -- =============================================================================
- -- Email Templates
- -- =============================================================================
- CREATE TABLE IF NOT EXISTS email_templates (
- id VARCHAR(36) PRIMARY KEY,
- user_id VARCHAR(36), -- NULL for system templates
- tenant_id VARCHAR(36),
-
- name VARCHAR(255) NOT NULL,
- category VARCHAR(100),
- subject_template TEXT,
- body_template TEXT,
- variables JSONB DEFAULT '[]',
-
- is_system BOOLEAN DEFAULT FALSE,
- usage_count INTEGER DEFAULT 0,
-
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW()
- );
-
- CREATE INDEX IF NOT EXISTS idx_templates_user ON email_templates(user_id);
- CREATE INDEX IF NOT EXISTS idx_templates_tenant ON email_templates(tenant_id);
- CREATE INDEX IF NOT EXISTS idx_templates_system ON email_templates(is_system);
-
- -- =============================================================================
- -- Mail Audit Log
- -- =============================================================================
- CREATE TABLE IF NOT EXISTS mail_audit_log (
- id VARCHAR(36) PRIMARY KEY,
- user_id VARCHAR(36) NOT NULL,
- tenant_id VARCHAR(36),
- action VARCHAR(100) NOT NULL,
- entity_type VARCHAR(50), -- account, email, task
- entity_id VARCHAR(36),
- details JSONB,
- ip_address VARCHAR(45),
- user_agent TEXT,
- created_at TIMESTAMP DEFAULT NOW()
- );
-
- CREATE INDEX IF NOT EXISTS idx_mail_audit_user ON mail_audit_log(user_id);
- CREATE INDEX IF NOT EXISTS idx_mail_audit_created ON mail_audit_log(created_at DESC);
- CREATE INDEX IF NOT EXISTS idx_mail_audit_action ON mail_audit_log(action);
-
- -- =============================================================================
- -- Sync Status Tracking
- -- =============================================================================
- CREATE TABLE IF NOT EXISTS mail_sync_status (
- id VARCHAR(36) PRIMARY KEY,
- account_id VARCHAR(36) REFERENCES external_email_accounts(id) ON DELETE CASCADE,
- folder VARCHAR(100),
- last_uid INTEGER DEFAULT 0,
- last_sync TIMESTAMP,
- sync_errors INTEGER DEFAULT 0,
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW(),
-
- UNIQUE(account_id, folder)
- );
- """
-
- try:
- async with pool.acquire() as conn:
- await conn.execute(create_tables_sql)
- print("Mail tables initialized successfully")
- return True
- except Exception as e:
- print(f"Failed to initialize mail tables: {e}")
- return False
-
-
-# =============================================================================
-# Email Account Operations
-# =============================================================================
-
-async def create_email_account(
- user_id: str,
- tenant_id: str,
- email: str,
- display_name: str,
- account_type: str,
- imap_host: str,
- imap_port: int,
- imap_ssl: bool,
- smtp_host: str,
- smtp_port: int,
- smtp_ssl: bool,
- vault_path: str,
-) -> Optional[str]:
- """Create a new email account. Returns the account ID."""
- pool = await get_pool()
- if pool is None:
- return None
-
- account_id = str(uuid.uuid4())
- try:
- async with pool.acquire() as conn:
- await conn.execute(
- """
- INSERT INTO external_email_accounts
- (id, user_id, tenant_id, email, display_name, account_type,
- imap_host, imap_port, imap_ssl, smtp_host, smtp_port, smtp_ssl, vault_path)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
- """,
- account_id, user_id, tenant_id, email, display_name, account_type,
- imap_host, imap_port, imap_ssl, smtp_host, smtp_port, smtp_ssl, vault_path
- )
- return account_id
- except Exception as e:
- print(f"Failed to create email account: {e}")
- return None
-
-
-async def get_email_accounts(
- user_id: str,
- tenant_id: Optional[str] = None,
-) -> List[Dict]:
- """Get all email accounts for a user."""
- pool = await get_pool()
- if pool is None:
- return []
-
- try:
- async with pool.acquire() as conn:
- if tenant_id:
- rows = await conn.fetch(
- """
- SELECT * FROM external_email_accounts
- WHERE user_id = $1 AND tenant_id = $2
- ORDER BY created_at
- """,
- user_id, tenant_id
- )
- else:
- rows = await conn.fetch(
- """
- SELECT * FROM external_email_accounts
- WHERE user_id = $1
- ORDER BY created_at
- """,
- user_id
- )
- return [dict(r) for r in rows]
- except Exception as e:
- print(f"Failed to get email accounts: {e}")
- return []
-
-
-async def get_email_account(account_id: str, user_id: str) -> Optional[Dict]:
- """Get a single email account."""
- pool = await get_pool()
- if pool is None:
- return None
-
- try:
- async with pool.acquire() as conn:
- row = await conn.fetchrow(
- """
- SELECT * FROM external_email_accounts
- WHERE id = $1 AND user_id = $2
- """,
- account_id, user_id
- )
- return dict(row) if row else None
- except Exception as e:
- print(f"Failed to get email account: {e}")
- return None
-
-
-async def update_account_status(
- account_id: str,
- status: str,
- sync_error: Optional[str] = None,
- email_count: Optional[int] = None,
- unread_count: Optional[int] = None,
-) -> bool:
- """Update account sync status."""
- pool = await get_pool()
- if pool is None:
- return False
-
- try:
- async with pool.acquire() as conn:
- await conn.execute(
- """
- UPDATE external_email_accounts SET
- status = $2,
- sync_error = $3,
- email_count = COALESCE($4, email_count),
- unread_count = COALESCE($5, unread_count),
- last_sync = NOW(),
- updated_at = NOW()
- WHERE id = $1
- """,
- account_id, status, sync_error, email_count, unread_count
- )
- return True
- except Exception as e:
- print(f"Failed to update account status: {e}")
- return False
-
-
-async def delete_email_account(account_id: str, user_id: str) -> bool:
- """Delete an email account (cascades to emails)."""
- pool = await get_pool()
- if pool is None:
- return False
-
- try:
- async with pool.acquire() as conn:
- result = await conn.execute(
- """
- DELETE FROM external_email_accounts
- WHERE id = $1 AND user_id = $2
- """,
- account_id, user_id
- )
- return "DELETE" in result
- except Exception as e:
- print(f"Failed to delete email account: {e}")
- return False
-
-
-# =============================================================================
-# Aggregated Email Operations
-# =============================================================================
-
-async def upsert_email(
- account_id: str,
- user_id: str,
- tenant_id: str,
- message_id: str,
- subject: str,
- sender_email: str,
- sender_name: Optional[str],
- recipients: List[str],
- cc: List[str],
- body_preview: Optional[str],
- body_text: Optional[str],
- body_html: Optional[str],
- has_attachments: bool,
- attachments: List[Dict],
- headers: Dict,
- folder: str,
- date_sent: datetime,
- date_received: datetime,
-) -> Optional[str]:
- """Insert or update an email. Returns the email ID."""
- pool = await get_pool()
- if pool is None:
- return None
-
- email_id = str(uuid.uuid4())
- try:
- async with pool.acquire() as conn:
- # Try insert, on conflict update (for re-sync scenarios)
- row = await conn.fetchrow(
- """
- INSERT INTO aggregated_emails
- (id, account_id, user_id, tenant_id, message_id, subject,
- sender_email, sender_name, recipients, cc, body_preview,
- body_text, body_html, has_attachments, attachments, headers,
- folder, date_sent, date_received)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)
- ON CONFLICT (account_id, message_id) DO UPDATE SET
- subject = EXCLUDED.subject,
- is_read = EXCLUDED.is_read,
- folder = EXCLUDED.folder
- RETURNING id
- """,
- email_id, account_id, user_id, tenant_id, message_id, subject,
- sender_email, sender_name, json.dumps(recipients), json.dumps(cc),
- body_preview, body_text, body_html, has_attachments,
- json.dumps(attachments), json.dumps(headers), folder,
- date_sent, date_received
- )
- return row['id'] if row else None
- except Exception as e:
- print(f"Failed to upsert email: {e}")
- return None
-
-
-async def get_unified_inbox(
- user_id: str,
- account_ids: Optional[List[str]] = None,
- categories: Optional[List[str]] = None,
- is_read: Optional[bool] = None,
- is_starred: Optional[bool] = None,
- limit: int = 50,
- offset: int = 0,
-) -> List[Dict]:
- """Get unified inbox with filtering."""
- pool = await get_pool()
- if pool is None:
- return []
-
- try:
- async with pool.acquire() as conn:
- # Build dynamic query
- conditions = ["user_id = $1", "is_deleted = FALSE"]
- params = [user_id]
- param_idx = 2
-
- if account_ids:
- conditions.append(f"account_id = ANY(${param_idx})")
- params.append(account_ids)
- param_idx += 1
-
- if categories:
- conditions.append(f"category = ANY(${param_idx})")
- params.append(categories)
- param_idx += 1
-
- if is_read is not None:
- conditions.append(f"is_read = ${param_idx}")
- params.append(is_read)
- param_idx += 1
-
- if is_starred is not None:
- conditions.append(f"is_starred = ${param_idx}")
- params.append(is_starred)
- param_idx += 1
-
- where_clause = " AND ".join(conditions)
- params.extend([limit, offset])
-
- query = f"""
- SELECT e.*, a.email as account_email, a.display_name as account_name
- FROM aggregated_emails e
- JOIN external_email_accounts a ON e.account_id = a.id
- WHERE {where_clause}
- ORDER BY e.date_received DESC
- LIMIT ${param_idx} OFFSET ${param_idx + 1}
- """
-
- rows = await conn.fetch(query, *params)
- return [dict(r) for r in rows]
- except Exception as e:
- print(f"Failed to get unified inbox: {e}")
- return []
-
-
-async def get_email(email_id: str, user_id: str) -> Optional[Dict]:
- """Get a single email by ID."""
- pool = await get_pool()
- if pool is None:
- return None
-
- try:
- async with pool.acquire() as conn:
- row = await conn.fetchrow(
- """
- SELECT e.*, a.email as account_email, a.display_name as account_name
- FROM aggregated_emails e
- JOIN external_email_accounts a ON e.account_id = a.id
- WHERE e.id = $1 AND e.user_id = $2
- """,
- email_id, user_id
- )
- return dict(row) if row else None
- except Exception as e:
- print(f"Failed to get email: {e}")
- return None
-
-
-async def update_email_ai_analysis(
- email_id: str,
- category: str,
- sender_type: str,
- sender_authority_name: Optional[str],
- detected_deadlines: List[Dict],
- suggested_priority: str,
- ai_summary: Optional[str],
-) -> bool:
- """Update email with AI analysis results."""
- pool = await get_pool()
- if pool is None:
- return False
-
- try:
- async with pool.acquire() as conn:
- await conn.execute(
- """
- UPDATE aggregated_emails SET
- category = $2,
- sender_type = $3,
- sender_authority_name = $4,
- detected_deadlines = $5,
- suggested_priority = $6,
- ai_summary = $7,
- ai_analyzed_at = NOW()
- WHERE id = $1
- """,
- email_id, category, sender_type, sender_authority_name,
- json.dumps(detected_deadlines), suggested_priority, ai_summary
- )
- return True
- except Exception as e:
- print(f"Failed to update email AI analysis: {e}")
- return False
-
-
-async def mark_email_read(email_id: str, user_id: str, is_read: bool = True) -> bool:
- """Mark email as read/unread."""
- pool = await get_pool()
- if pool is None:
- return False
-
- try:
- async with pool.acquire() as conn:
- await conn.execute(
- """
- UPDATE aggregated_emails SET is_read = $3
- WHERE id = $1 AND user_id = $2
- """,
- email_id, user_id, is_read
- )
- return True
- except Exception as e:
- print(f"Failed to mark email read: {e}")
- return False
-
-
-async def mark_email_starred(email_id: str, user_id: str, is_starred: bool = True) -> bool:
- """Mark email as starred/unstarred."""
- pool = await get_pool()
- if pool is None:
- return False
-
- try:
- async with pool.acquire() as conn:
- await conn.execute(
- """
- UPDATE aggregated_emails SET is_starred = $3
- WHERE id = $1 AND user_id = $2
- """,
- email_id, user_id, is_starred
- )
- return True
- except Exception as e:
- print(f"Failed to mark email starred: {e}")
- return False
-
-
-# =============================================================================
-# Inbox Task Operations
-# =============================================================================
-
-async def create_task(
- user_id: str,
- tenant_id: str,
- title: str,
- description: Optional[str] = None,
- priority: str = "medium",
- deadline: Optional[datetime] = None,
- email_id: Optional[str] = None,
- account_id: Optional[str] = None,
- source_email_subject: Optional[str] = None,
- source_sender: Optional[str] = None,
- source_sender_type: Optional[str] = None,
- ai_extracted: bool = False,
- confidence_score: Optional[float] = None,
-) -> Optional[str]:
- """Create a new inbox task."""
- pool = await get_pool()
- if pool is None:
- return None
-
- task_id = str(uuid.uuid4())
- try:
- async with pool.acquire() as conn:
- await conn.execute(
- """
- INSERT INTO inbox_tasks
- (id, user_id, tenant_id, title, description, priority, deadline,
- email_id, account_id, source_email_subject, source_sender,
- source_sender_type, ai_extracted, confidence_score)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
- """,
- task_id, user_id, tenant_id, title, description, priority, deadline,
- email_id, account_id, source_email_subject, source_sender,
- source_sender_type, ai_extracted, confidence_score
- )
- return task_id
- except Exception as e:
- print(f"Failed to create task: {e}")
- return None
-
-
-async def get_tasks(
- user_id: str,
- status: Optional[str] = None,
- priority: Optional[str] = None,
- include_completed: bool = False,
- limit: int = 50,
- offset: int = 0,
-) -> List[Dict]:
- """Get tasks for a user."""
- pool = await get_pool()
- if pool is None:
- return []
-
- try:
- async with pool.acquire() as conn:
- conditions = ["user_id = $1"]
- params = [user_id]
- param_idx = 2
-
- if not include_completed:
- conditions.append("status != 'completed'")
-
- if status:
- conditions.append(f"status = ${param_idx}")
- params.append(status)
- param_idx += 1
-
- if priority:
- conditions.append(f"priority = ${param_idx}")
- params.append(priority)
- param_idx += 1
-
- where_clause = " AND ".join(conditions)
- params.extend([limit, offset])
-
- query = f"""
- SELECT * FROM inbox_tasks
- WHERE {where_clause}
- ORDER BY
- CASE priority
- WHEN 'urgent' THEN 1
- WHEN 'high' THEN 2
- WHEN 'medium' THEN 3
- WHEN 'low' THEN 4
- END,
- deadline ASC NULLS LAST,
- created_at DESC
- LIMIT ${param_idx} OFFSET ${param_idx + 1}
- """
-
- rows = await conn.fetch(query, *params)
- return [dict(r) for r in rows]
- except Exception as e:
- print(f"Failed to get tasks: {e}")
- return []
-
-
-async def get_task(task_id: str, user_id: str) -> Optional[Dict]:
- """Get a single task."""
- pool = await get_pool()
- if pool is None:
- return None
-
- try:
- async with pool.acquire() as conn:
- row = await conn.fetchrow(
- "SELECT * FROM inbox_tasks WHERE id = $1 AND user_id = $2",
- task_id, user_id
- )
- return dict(row) if row else None
- except Exception as e:
- print(f"Failed to get task: {e}")
- return None
-
-
-async def update_task(
- task_id: str,
- user_id: str,
- title: Optional[str] = None,
- description: Optional[str] = None,
- priority: Optional[str] = None,
- status: Optional[str] = None,
- deadline: Optional[datetime] = None,
-) -> bool:
- """Update a task."""
- pool = await get_pool()
- if pool is None:
- return False
-
- try:
- async with pool.acquire() as conn:
- # Build dynamic update
- updates = ["updated_at = NOW()"]
- params = [task_id, user_id]
- param_idx = 3
-
- if title is not None:
- updates.append(f"title = ${param_idx}")
- params.append(title)
- param_idx += 1
-
- if description is not None:
- updates.append(f"description = ${param_idx}")
- params.append(description)
- param_idx += 1
-
- if priority is not None:
- updates.append(f"priority = ${param_idx}")
- params.append(priority)
- param_idx += 1
-
- if status is not None:
- updates.append(f"status = ${param_idx}")
- params.append(status)
- param_idx += 1
- if status == "completed":
- updates.append("completed_at = NOW()")
-
- if deadline is not None:
- updates.append(f"deadline = ${param_idx}")
- params.append(deadline)
- param_idx += 1
-
- set_clause = ", ".join(updates)
- await conn.execute(
- f"UPDATE inbox_tasks SET {set_clause} WHERE id = $1 AND user_id = $2",
- *params
- )
- return True
- except Exception as e:
- print(f"Failed to update task: {e}")
- return False
-
-
-async def get_task_dashboard_stats(user_id: str) -> Dict:
- """Get dashboard statistics for tasks."""
- pool = await get_pool()
- if pool is None:
- return {}
-
- try:
- async with pool.acquire() as conn:
- now = datetime.now()
- today_end = now.replace(hour=23, minute=59, second=59)
- week_end = now + timedelta(days=7)
-
- stats = await conn.fetchrow(
- """
- SELECT
- COUNT(*) as total_tasks,
- COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
- COUNT(*) FILTER (WHERE status = 'in_progress') as in_progress_tasks,
- COUNT(*) FILTER (WHERE status = 'completed') as completed_tasks,
- COUNT(*) FILTER (WHERE status != 'completed' AND deadline < $2) as overdue_tasks,
- COUNT(*) FILTER (WHERE status != 'completed' AND deadline <= $3) as due_today,
- COUNT(*) FILTER (WHERE status != 'completed' AND deadline <= $4) as due_this_week
- FROM inbox_tasks
- WHERE user_id = $1
- """,
- user_id, now, today_end, week_end
- )
-
- by_priority = await conn.fetch(
- """
- SELECT priority, COUNT(*) as count
- FROM inbox_tasks
- WHERE user_id = $1 AND status != 'completed'
- GROUP BY priority
- """,
- user_id
- )
-
- by_sender = await conn.fetch(
- """
- SELECT source_sender_type, COUNT(*) as count
- FROM inbox_tasks
- WHERE user_id = $1 AND status != 'completed' AND source_sender_type IS NOT NULL
- GROUP BY source_sender_type
- """,
- user_id
- )
-
- return {
- "total_tasks": stats['total_tasks'] or 0,
- "pending_tasks": stats['pending_tasks'] or 0,
- "in_progress_tasks": stats['in_progress_tasks'] or 0,
- "completed_tasks": stats['completed_tasks'] or 0,
- "overdue_tasks": stats['overdue_tasks'] or 0,
- "due_today": stats['due_today'] or 0,
- "due_this_week": stats['due_this_week'] or 0,
- "by_priority": {r['priority']: r['count'] for r in by_priority},
- "by_sender_type": {r['source_sender_type']: r['count'] for r in by_sender},
- }
- except Exception as e:
- print(f"Failed to get task stats: {e}")
- return {}
-
-
-# =============================================================================
-# Statistics & Audit
-# =============================================================================
-
-async def get_mail_stats(user_id: str) -> Dict:
- """Get overall mail statistics for a user."""
- pool = await get_pool()
- if pool is None:
- return {}
-
- try:
- async with pool.acquire() as conn:
- today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
-
- # Account stats
- accounts = await conn.fetch(
- """
- SELECT id, email, display_name, status, email_count, unread_count, last_sync
- FROM external_email_accounts
- WHERE user_id = $1
- """,
- user_id
- )
-
- # Email counts
- email_stats = await conn.fetchrow(
- """
- SELECT
- COUNT(*) as total_emails,
- COUNT(*) FILTER (WHERE is_read = FALSE) as unread_emails,
- COUNT(*) FILTER (WHERE date_received >= $2) as emails_today,
- COUNT(*) FILTER (WHERE ai_analyzed_at >= $2) as ai_analyses_today
- FROM aggregated_emails
- WHERE user_id = $1
- """,
- user_id, today
- )
-
- # Task counts
- task_stats = await conn.fetchrow(
- """
- SELECT
- COUNT(*) as total_tasks,
- COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
- COUNT(*) FILTER (WHERE status != 'completed' AND deadline < NOW()) as overdue_tasks
- FROM inbox_tasks
- WHERE user_id = $1
- """,
- user_id
- )
-
- return {
- "total_accounts": len(accounts),
- "active_accounts": sum(1 for a in accounts if a['status'] == 'active'),
- "error_accounts": sum(1 for a in accounts if a['status'] == 'error'),
- "total_emails": email_stats['total_emails'] or 0,
- "unread_emails": email_stats['unread_emails'] or 0,
- "total_tasks": task_stats['total_tasks'] or 0,
- "pending_tasks": task_stats['pending_tasks'] or 0,
- "overdue_tasks": task_stats['overdue_tasks'] or 0,
- "emails_today": email_stats['emails_today'] or 0,
- "ai_analyses_today": email_stats['ai_analyses_today'] or 0,
- "per_account": [
- {
- "id": a['id'],
- "email": a['email'],
- "display_name": a['display_name'],
- "status": a['status'],
- "email_count": a['email_count'],
- "unread_count": a['unread_count'],
- "last_sync": a['last_sync'].isoformat() if a['last_sync'] else None,
- }
- for a in accounts
- ],
- }
- except Exception as e:
- print(f"Failed to get mail stats: {e}")
- return {}
-
-
-async def log_mail_audit(
- user_id: str,
- action: str,
- entity_type: Optional[str] = None,
- entity_id: Optional[str] = None,
- details: Optional[Dict] = None,
- tenant_id: Optional[str] = None,
- ip_address: Optional[str] = None,
- user_agent: Optional[str] = None,
-) -> bool:
- """Log a mail action for audit trail."""
- pool = await get_pool()
- if pool is None:
- return False
-
- try:
- async with pool.acquire() as conn:
- await conn.execute(
- """
- INSERT INTO mail_audit_log
- (id, user_id, tenant_id, action, entity_type, entity_id, details, ip_address, user_agent)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
- """,
- str(uuid.uuid4()), user_id, tenant_id, action, entity_type, entity_id,
- json.dumps(details) if details else None, ip_address, user_agent
- )
- return True
- except Exception as e:
- print(f"Failed to log mail audit: {e}")
- return False
+from .mail_db_pool import get_pool, init_mail_tables
+
+from .mail_db_accounts import (
+ create_email_account,
+ get_email_accounts,
+ get_email_account,
+ update_account_status,
+ delete_email_account,
+)
+
+from .mail_db_emails import (
+ upsert_email,
+ get_unified_inbox,
+ get_email,
+ update_email_ai_analysis,
+ mark_email_read,
+ mark_email_starred,
+)
+
+from .mail_db_tasks import (
+ create_task,
+ get_tasks,
+ get_task,
+ update_task,
+ get_task_dashboard_stats,
+)
+
+from .mail_db_stats import (
+ get_mail_stats,
+ log_mail_audit,
+)
+
+__all__ = [
+ # Pool
+ "get_pool",
+ "init_mail_tables",
+ # Accounts
+ "create_email_account",
+ "get_email_accounts",
+ "get_email_account",
+ "update_account_status",
+ "delete_email_account",
+ # Emails
+ "upsert_email",
+ "get_unified_inbox",
+ "get_email",
+ "update_email_ai_analysis",
+ "mark_email_read",
+ "mark_email_starred",
+ # Tasks
+ "create_task",
+ "get_tasks",
+ "get_task",
+ "update_task",
+ "get_task_dashboard_stats",
+ # Stats
+ "get_mail_stats",
+ "log_mail_audit",
+]
diff --git a/klausur-service/backend/mail/mail_db_accounts.py b/klausur-service/backend/mail/mail_db_accounts.py
new file mode 100644
index 0000000..503dd5a
--- /dev/null
+++ b/klausur-service/backend/mail/mail_db_accounts.py
@@ -0,0 +1,156 @@
+"""
+Mail Database - Email Account Operations.
+"""
+
+import uuid
+from typing import Optional, List, Dict
+
+from .mail_db_pool import get_pool
+
+
+async def create_email_account(
+ user_id: str,
+ tenant_id: str,
+ email: str,
+ display_name: str,
+ account_type: str,
+ imap_host: str,
+ imap_port: int,
+ imap_ssl: bool,
+ smtp_host: str,
+ smtp_port: int,
+ smtp_ssl: bool,
+ vault_path: str,
+) -> Optional[str]:
+ """Create a new email account. Returns the account ID."""
+ pool = await get_pool()
+ if pool is None:
+ return None
+
+ account_id = str(uuid.uuid4())
+ try:
+ async with pool.acquire() as conn:
+ await conn.execute(
+ """
+ INSERT INTO external_email_accounts
+ (id, user_id, tenant_id, email, display_name, account_type,
+ imap_host, imap_port, imap_ssl, smtp_host, smtp_port, smtp_ssl, vault_path)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
+ """,
+ account_id, user_id, tenant_id, email, display_name, account_type,
+ imap_host, imap_port, imap_ssl, smtp_host, smtp_port, smtp_ssl, vault_path
+ )
+ return account_id
+ except Exception as e:
+ print(f"Failed to create email account: {e}")
+ return None
+
+
+async def get_email_accounts(
+ user_id: str,
+ tenant_id: Optional[str] = None,
+) -> List[Dict]:
+ """Get all email accounts for a user."""
+ pool = await get_pool()
+ if pool is None:
+ return []
+
+ try:
+ async with pool.acquire() as conn:
+ if tenant_id:
+ rows = await conn.fetch(
+ """
+ SELECT * FROM external_email_accounts
+ WHERE user_id = $1 AND tenant_id = $2
+ ORDER BY created_at
+ """,
+ user_id, tenant_id
+ )
+ else:
+ rows = await conn.fetch(
+ """
+ SELECT * FROM external_email_accounts
+ WHERE user_id = $1
+ ORDER BY created_at
+ """,
+ user_id
+ )
+ return [dict(r) for r in rows]
+ except Exception as e:
+ print(f"Failed to get email accounts: {e}")
+ return []
+
+
+async def get_email_account(account_id: str, user_id: str) -> Optional[Dict]:
+ """Get a single email account."""
+ pool = await get_pool()
+ if pool is None:
+ return None
+
+ try:
+ async with pool.acquire() as conn:
+ row = await conn.fetchrow(
+ """
+ SELECT * FROM external_email_accounts
+ WHERE id = $1 AND user_id = $2
+ """,
+ account_id, user_id
+ )
+ return dict(row) if row else None
+ except Exception as e:
+ print(f"Failed to get email account: {e}")
+ return None
+
+
+async def update_account_status(
+ account_id: str,
+ status: str,
+ sync_error: Optional[str] = None,
+ email_count: Optional[int] = None,
+ unread_count: Optional[int] = None,
+) -> bool:
+ """Update account sync status."""
+ pool = await get_pool()
+ if pool is None:
+ return False
+
+ try:
+ async with pool.acquire() as conn:
+ await conn.execute(
+ """
+ UPDATE external_email_accounts SET
+ status = $2,
+ sync_error = $3,
+ email_count = COALESCE($4, email_count),
+ unread_count = COALESCE($5, unread_count),
+ last_sync = NOW(),
+ updated_at = NOW()
+ WHERE id = $1
+ """,
+ account_id, status, sync_error, email_count, unread_count
+ )
+ return True
+ except Exception as e:
+ print(f"Failed to update account status: {e}")
+ return False
+
+
+async def delete_email_account(account_id: str, user_id: str) -> bool:
+ """Delete an email account (cascades to emails)."""
+ pool = await get_pool()
+ if pool is None:
+ return False
+
+ try:
+ async with pool.acquire() as conn:
+ result = await conn.execute(
+ """
+ DELETE FROM external_email_accounts
+ WHERE id = $1 AND user_id = $2
+ """,
+ account_id, user_id
+ )
+ return "DELETE" in result
+ except Exception as e:
+ print(f"Failed to delete email account: {e}")
+ return False
diff --git a/klausur-service/backend/mail/mail_db_emails.py b/klausur-service/backend/mail/mail_db_emails.py
new file mode 100644
index 0000000..7ee8966
--- /dev/null
+++ b/klausur-service/backend/mail/mail_db_emails.py
@@ -0,0 +1,225 @@
+"""
+Mail Database - Aggregated Email Operations.
+"""
+
+import json
+import uuid
+from typing import Optional, List, Dict
+from datetime import datetime
+
+from .mail_db_pool import get_pool
+
+
+async def upsert_email(
+ account_id: str,
+ user_id: str,
+ tenant_id: str,
+ message_id: str,
+ subject: str,
+ sender_email: str,
+ sender_name: Optional[str],
+ recipients: List[str],
+ cc: List[str],
+ body_preview: Optional[str],
+ body_text: Optional[str],
+ body_html: Optional[str],
+ has_attachments: bool,
+ attachments: List[Dict],
+ headers: Dict,
+ folder: str,
+ date_sent: datetime,
+ date_received: datetime,
+) -> Optional[str]:
+ """Insert or update an email. Returns the email ID."""
+ pool = await get_pool()
+ if pool is None:
+ return None
+
+ email_id = str(uuid.uuid4())
+ try:
+ async with pool.acquire() as conn:
+ row = await conn.fetchrow(
+ """
+ INSERT INTO aggregated_emails
+ (id, account_id, user_id, tenant_id, message_id, subject,
+ sender_email, sender_name, recipients, cc, body_preview,
+ body_text, body_html, has_attachments, attachments, headers,
+ folder, date_sent, date_received)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)
+ ON CONFLICT (account_id, message_id) DO UPDATE SET
+ subject = EXCLUDED.subject,
+ is_read = EXCLUDED.is_read,
+ folder = EXCLUDED.folder
+ RETURNING id
+ """,
+ email_id, account_id, user_id, tenant_id, message_id, subject,
+ sender_email, sender_name, json.dumps(recipients), json.dumps(cc),
+ body_preview, body_text, body_html, has_attachments,
+ json.dumps(attachments), json.dumps(headers), folder,
+ date_sent, date_received
+ )
+ return row['id'] if row else None
+ except Exception as e:
+ print(f"Failed to upsert email: {e}")
+ return None
+
+
+async def get_unified_inbox(
+ user_id: str,
+ account_ids: Optional[List[str]] = None,
+ categories: Optional[List[str]] = None,
+ is_read: Optional[bool] = None,
+ is_starred: Optional[bool] = None,
+ limit: int = 50,
+ offset: int = 0,
+) -> List[Dict]:
+ """Get unified inbox with filtering."""
+ pool = await get_pool()
+ if pool is None:
+ return []
+
+ try:
+ async with pool.acquire() as conn:
+ conditions = ["user_id = $1", "is_deleted = FALSE"]
+ params = [user_id]
+ param_idx = 2
+
+ if account_ids:
+ conditions.append(f"account_id = ANY(${param_idx})")
+ params.append(account_ids)
+ param_idx += 1
+
+ if categories:
+ conditions.append(f"category = ANY(${param_idx})")
+ params.append(categories)
+ param_idx += 1
+
+ if is_read is not None:
+ conditions.append(f"is_read = ${param_idx}")
+ params.append(is_read)
+ param_idx += 1
+
+ if is_starred is not None:
+ conditions.append(f"is_starred = ${param_idx}")
+ params.append(is_starred)
+ param_idx += 1
+
+ where_clause = " AND ".join(conditions)
+ params.extend([limit, offset])
+
+ query = f"""
+ SELECT e.*, a.email as account_email, a.display_name as account_name
+ FROM aggregated_emails e
+ JOIN external_email_accounts a ON e.account_id = a.id
+ WHERE {where_clause}
+ ORDER BY e.date_received DESC
+ LIMIT ${param_idx} OFFSET ${param_idx + 1}
+ """
+
+ rows = await conn.fetch(query, *params)
+ return [dict(r) for r in rows]
+ except Exception as e:
+ print(f"Failed to get unified inbox: {e}")
+ return []
+
+
+async def get_email(email_id: str, user_id: str) -> Optional[Dict]:
+ """Get a single email by ID."""
+ pool = await get_pool()
+ if pool is None:
+ return None
+
+ try:
+ async with pool.acquire() as conn:
+ row = await conn.fetchrow(
+ """
+ SELECT e.*, a.email as account_email, a.display_name as account_name
+ FROM aggregated_emails e
+ JOIN external_email_accounts a ON e.account_id = a.id
+ WHERE e.id = $1 AND e.user_id = $2
+ """,
+ email_id, user_id
+ )
+ return dict(row) if row else None
+ except Exception as e:
+ print(f"Failed to get email: {e}")
+ return None
+
+
+async def update_email_ai_analysis(
+ email_id: str,
+ category: str,
+ sender_type: str,
+ sender_authority_name: Optional[str],
+ detected_deadlines: List[Dict],
+ suggested_priority: str,
+ ai_summary: Optional[str],
+) -> bool:
+ """Update email with AI analysis results."""
+ pool = await get_pool()
+ if pool is None:
+ return False
+
+ try:
+ async with pool.acquire() as conn:
+ await conn.execute(
+ """
+ UPDATE aggregated_emails SET
+ category = $2,
+ sender_type = $3,
+ sender_authority_name = $4,
+ detected_deadlines = $5,
+ suggested_priority = $6,
+ ai_summary = $7,
+ ai_analyzed_at = NOW()
+ WHERE id = $1
+ """,
+ email_id, category, sender_type, sender_authority_name,
+ json.dumps(detected_deadlines), suggested_priority, ai_summary
+ )
+ return True
+ except Exception as e:
+ print(f"Failed to update email AI analysis: {e}")
+ return False
+
+
+async def mark_email_read(email_id: str, user_id: str, is_read: bool = True) -> bool:
+ """Mark email as read/unread."""
+ pool = await get_pool()
+ if pool is None:
+ return False
+
+ try:
+ async with pool.acquire() as conn:
+ await conn.execute(
+ """
+ UPDATE aggregated_emails SET is_read = $3
+ WHERE id = $1 AND user_id = $2
+ """,
+ email_id, user_id, is_read
+ )
+ return True
+ except Exception as e:
+ print(f"Failed to mark email read: {e}")
+ return False
+
+
+async def mark_email_starred(email_id: str, user_id: str, is_starred: bool = True) -> bool:
+ """Mark email as starred/unstarred."""
+ pool = await get_pool()
+ if pool is None:
+ return False
+
+ try:
+ async with pool.acquire() as conn:
+ await conn.execute(
+ """
+ UPDATE aggregated_emails SET is_starred = $3
+ WHERE id = $1 AND user_id = $2
+ """,
+ email_id, user_id, is_starred
+ )
+ return True
+ except Exception as e:
+ print(f"Failed to mark email starred: {e}")
+ return False
diff --git a/klausur-service/backend/mail/mail_db_pool.py b/klausur-service/backend/mail/mail_db_pool.py
new file mode 100644
index 0000000..41a50e5
--- /dev/null
+++ b/klausur-service/backend/mail/mail_db_pool.py
@@ -0,0 +1,253 @@
+"""
+Mail Database - Connection Pool and Schema Initialization.
+"""
+
+import os
+
+# Database Configuration - from Vault or environment (test default for CI)
+DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://test:test@localhost:5432/test")
+
+# Flag to check if using test defaults
+_DB_CONFIGURED = DATABASE_URL != "postgresql://test:test@localhost:5432/test"
+
+# Connection pool (shared with metrics_db)
+_pool = None
+
+
+async def get_pool():
+ """Get or create database connection pool."""
+ global _pool
+ if _pool is None:
+ try:
+ import asyncpg
+ _pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
+ except ImportError:
+ print("Warning: asyncpg not installed. Mail database disabled.")
+ return None
+ except Exception as e:
+ print(f"Warning: Failed to connect to PostgreSQL: {e}")
+ return None
+ return _pool
+
+
+async def init_mail_tables() -> bool:
+ """Initialize mail tables in PostgreSQL."""
+ pool = await get_pool()
+ if pool is None:
+ return False
+
+ create_tables_sql = """
+ -- =============================================================================
+ -- External Email Accounts
+ -- =============================================================================
+ CREATE TABLE IF NOT EXISTS external_email_accounts (
+ id VARCHAR(36) PRIMARY KEY,
+ user_id VARCHAR(36) NOT NULL,
+ tenant_id VARCHAR(36) NOT NULL,
+ email VARCHAR(255) NOT NULL,
+ display_name VARCHAR(255),
+ account_type VARCHAR(50) DEFAULT 'personal',
+
+ -- IMAP Settings (password stored in Vault)
+ imap_host VARCHAR(255) NOT NULL,
+ imap_port INTEGER DEFAULT 993,
+ imap_ssl BOOLEAN DEFAULT TRUE,
+
+ -- SMTP Settings
+ smtp_host VARCHAR(255) NOT NULL,
+ smtp_port INTEGER DEFAULT 465,
+ smtp_ssl BOOLEAN DEFAULT TRUE,
+
+ -- Vault path for credentials
+ vault_path VARCHAR(500),
+
+ -- Status tracking
+ status VARCHAR(20) DEFAULT 'pending',
+ last_sync TIMESTAMP,
+ sync_error TEXT,
+ email_count INTEGER DEFAULT 0,
+ unread_count INTEGER DEFAULT 0,
+
+ -- Timestamps
+ created_at TIMESTAMP DEFAULT NOW(),
+ updated_at TIMESTAMP DEFAULT NOW(),
+
+ -- Constraints
+ UNIQUE(user_id, email)
+ );
+
+ CREATE INDEX IF NOT EXISTS idx_mail_accounts_user ON external_email_accounts(user_id);
+ CREATE INDEX IF NOT EXISTS idx_mail_accounts_tenant ON external_email_accounts(tenant_id);
+ CREATE INDEX IF NOT EXISTS idx_mail_accounts_status ON external_email_accounts(status);
+
+ -- =============================================================================
+ -- Aggregated Emails
+ -- =============================================================================
+ CREATE TABLE IF NOT EXISTS aggregated_emails (
+ id VARCHAR(36) PRIMARY KEY,
+ account_id VARCHAR(36) REFERENCES external_email_accounts(id) ON DELETE CASCADE,
+ user_id VARCHAR(36) NOT NULL,
+ tenant_id VARCHAR(36) NOT NULL,
+
+ -- Email identification
+ message_id VARCHAR(500) NOT NULL,
+ folder VARCHAR(100) DEFAULT 'INBOX',
+
+ -- Email content
+ subject TEXT,
+ sender_email VARCHAR(255),
+ sender_name VARCHAR(255),
+ recipients JSONB DEFAULT '[]',
+ cc JSONB DEFAULT '[]',
+ body_preview TEXT,
+ body_text TEXT,
+ body_html TEXT,
+ has_attachments BOOLEAN DEFAULT FALSE,
+ attachments JSONB DEFAULT '[]',
+ headers JSONB DEFAULT '{}',
+
+ -- Status flags
+ is_read BOOLEAN DEFAULT FALSE,
+ is_starred BOOLEAN DEFAULT FALSE,
+ is_deleted BOOLEAN DEFAULT FALSE,
+
+ -- Dates
+ date_sent TIMESTAMP,
+ date_received TIMESTAMP,
+
+ -- AI enrichment
+ category VARCHAR(50),
+ sender_type VARCHAR(50),
+ sender_authority_name VARCHAR(255),
+ detected_deadlines JSONB DEFAULT '[]',
+ suggested_priority VARCHAR(20),
+ ai_summary TEXT,
+ ai_analyzed_at TIMESTAMP,
+
+ created_at TIMESTAMP DEFAULT NOW(),
+
+ -- Prevent duplicate imports
+ UNIQUE(account_id, message_id)
+ );
+
+ CREATE INDEX IF NOT EXISTS idx_emails_account ON aggregated_emails(account_id);
+ CREATE INDEX IF NOT EXISTS idx_emails_user ON aggregated_emails(user_id);
+ CREATE INDEX IF NOT EXISTS idx_emails_tenant ON aggregated_emails(tenant_id);
+ CREATE INDEX IF NOT EXISTS idx_emails_date ON aggregated_emails(date_received DESC);
+ CREATE INDEX IF NOT EXISTS idx_emails_category ON aggregated_emails(category);
+ CREATE INDEX IF NOT EXISTS idx_emails_unread ON aggregated_emails(is_read) WHERE is_read = FALSE;
+ CREATE INDEX IF NOT EXISTS idx_emails_starred ON aggregated_emails(is_starred) WHERE is_starred = TRUE;
+ CREATE INDEX IF NOT EXISTS idx_emails_sender ON aggregated_emails(sender_email);
+
+ -- =============================================================================
+ -- Inbox Tasks (Arbeitsvorrat)
+ -- =============================================================================
+ CREATE TABLE IF NOT EXISTS inbox_tasks (
+ id VARCHAR(36) PRIMARY KEY,
+ user_id VARCHAR(36) NOT NULL,
+ tenant_id VARCHAR(36) NOT NULL,
+ email_id VARCHAR(36) REFERENCES aggregated_emails(id) ON DELETE SET NULL,
+ account_id VARCHAR(36) REFERENCES external_email_accounts(id) ON DELETE SET NULL,
+
+ -- Task content
+ title VARCHAR(500) NOT NULL,
+ description TEXT,
+ priority VARCHAR(20) DEFAULT 'medium',
+ status VARCHAR(20) DEFAULT 'pending',
+ deadline TIMESTAMP,
+
+ -- Source information
+ source_email_subject TEXT,
+ source_sender VARCHAR(255),
+ source_sender_type VARCHAR(50),
+
+ -- AI extraction info
+ ai_extracted BOOLEAN DEFAULT FALSE,
+ confidence_score FLOAT,
+
+ -- Completion tracking
+ completed_at TIMESTAMP,
+ reminder_at TIMESTAMP,
+
+ -- Timestamps
+ created_at TIMESTAMP DEFAULT NOW(),
+ updated_at TIMESTAMP DEFAULT NOW()
+ );
+
+ CREATE INDEX IF NOT EXISTS idx_tasks_user ON inbox_tasks(user_id);
+ CREATE INDEX IF NOT EXISTS idx_tasks_tenant ON inbox_tasks(tenant_id);
+ CREATE INDEX IF NOT EXISTS idx_tasks_status ON inbox_tasks(status);
+ CREATE INDEX IF NOT EXISTS idx_tasks_deadline ON inbox_tasks(deadline) WHERE deadline IS NOT NULL;
+ CREATE INDEX IF NOT EXISTS idx_tasks_priority ON inbox_tasks(priority);
+ CREATE INDEX IF NOT EXISTS idx_tasks_email ON inbox_tasks(email_id) WHERE email_id IS NOT NULL;
+
+ -- =============================================================================
+ -- Email Templates
+ -- =============================================================================
+ CREATE TABLE IF NOT EXISTS email_templates (
+ id VARCHAR(36) PRIMARY KEY,
+ user_id VARCHAR(36), -- NULL for system templates
+ tenant_id VARCHAR(36),
+
+ name VARCHAR(255) NOT NULL,
+ category VARCHAR(100),
+ subject_template TEXT,
+ body_template TEXT,
+ variables JSONB DEFAULT '[]',
+
+ is_system BOOLEAN DEFAULT FALSE,
+ usage_count INTEGER DEFAULT 0,
+
+ created_at TIMESTAMP DEFAULT NOW(),
+ updated_at TIMESTAMP DEFAULT NOW()
+ );
+
+ CREATE INDEX IF NOT EXISTS idx_templates_user ON email_templates(user_id);
+ CREATE INDEX IF NOT EXISTS idx_templates_tenant ON email_templates(tenant_id);
+ CREATE INDEX IF NOT EXISTS idx_templates_system ON email_templates(is_system);
+
+ -- =============================================================================
+ -- Mail Audit Log
+ -- =============================================================================
+ CREATE TABLE IF NOT EXISTS mail_audit_log (
+ id VARCHAR(36) PRIMARY KEY,
+ user_id VARCHAR(36) NOT NULL,
+ tenant_id VARCHAR(36),
+ action VARCHAR(100) NOT NULL,
+ entity_type VARCHAR(50), -- account, email, task
+ entity_id VARCHAR(36),
+ details JSONB,
+ ip_address VARCHAR(45),
+ user_agent TEXT,
+ created_at TIMESTAMP DEFAULT NOW()
+ );
+
+ CREATE INDEX IF NOT EXISTS idx_mail_audit_user ON mail_audit_log(user_id);
+ CREATE INDEX IF NOT EXISTS idx_mail_audit_created ON mail_audit_log(created_at DESC);
+ CREATE INDEX IF NOT EXISTS idx_mail_audit_action ON mail_audit_log(action);
+
+ -- =============================================================================
+ -- Sync Status Tracking
+ -- =============================================================================
+ CREATE TABLE IF NOT EXISTS mail_sync_status (
+ id VARCHAR(36) PRIMARY KEY,
+ account_id VARCHAR(36) REFERENCES external_email_accounts(id) ON DELETE CASCADE,
+ folder VARCHAR(100),
+ last_uid INTEGER DEFAULT 0,
+ last_sync TIMESTAMP,
+ sync_errors INTEGER DEFAULT 0,
+ created_at TIMESTAMP DEFAULT NOW(),
+ updated_at TIMESTAMP DEFAULT NOW(),
+
+ UNIQUE(account_id, folder)
+ );
+ """
+
+ try:
+ async with pool.acquire() as conn:
+ await conn.execute(create_tables_sql)
+ print("Mail tables initialized successfully")
+ return True
+ except Exception as e:
+ print(f"Failed to initialize mail tables: {e}")
+ return False
diff --git a/klausur-service/backend/mail/mail_db_stats.py b/klausur-service/backend/mail/mail_db_stats.py
new file mode 100644
index 0000000..193d117
--- /dev/null
+++ b/klausur-service/backend/mail/mail_db_stats.py
@@ -0,0 +1,118 @@
+"""
+Mail Database - Statistics and Audit Log Operations.
+"""
+
+import json
+import uuid
+from typing import Optional, Dict
+from datetime import datetime
+
+from .mail_db_pool import get_pool
+
+
+async def get_mail_stats(user_id: str) -> Dict:
+ """Get overall mail statistics for a user."""
+ pool = await get_pool()
+ if pool is None:
+ return {}
+
+ try:
+ async with pool.acquire() as conn:
+ today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+
+ # Account stats
+ accounts = await conn.fetch(
+ """
+ SELECT id, email, display_name, status, email_count, unread_count, last_sync
+ FROM external_email_accounts
+ WHERE user_id = $1
+ """,
+ user_id
+ )
+
+ # Email counts
+ email_stats = await conn.fetchrow(
+ """
+ SELECT
+ COUNT(*) as total_emails,
+ COUNT(*) FILTER (WHERE is_read = FALSE) as unread_emails,
+ COUNT(*) FILTER (WHERE date_received >= $2) as emails_today,
+ COUNT(*) FILTER (WHERE ai_analyzed_at >= $2) as ai_analyses_today
+ FROM aggregated_emails
+ WHERE user_id = $1
+ """,
+ user_id, today
+ )
+
+ # Task counts
+ task_stats = await conn.fetchrow(
+ """
+ SELECT
+ COUNT(*) as total_tasks,
+ COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
+ COUNT(*) FILTER (WHERE status != 'completed' AND deadline < NOW()) as overdue_tasks
+ FROM inbox_tasks
+ WHERE user_id = $1
+ """,
+ user_id
+ )
+
+ return {
+ "total_accounts": len(accounts),
+ "active_accounts": sum(1 for a in accounts if a['status'] == 'active'),
+ "error_accounts": sum(1 for a in accounts if a['status'] == 'error'),
+ "total_emails": email_stats['total_emails'] or 0,
+ "unread_emails": email_stats['unread_emails'] or 0,
+ "total_tasks": task_stats['total_tasks'] or 0,
+ "pending_tasks": task_stats['pending_tasks'] or 0,
+ "overdue_tasks": task_stats['overdue_tasks'] or 0,
+ "emails_today": email_stats['emails_today'] or 0,
+ "ai_analyses_today": email_stats['ai_analyses_today'] or 0,
+ "per_account": [
+ {
+ "id": a['id'],
+ "email": a['email'],
+ "display_name": a['display_name'],
+ "status": a['status'],
+ "email_count": a['email_count'],
+ "unread_count": a['unread_count'],
+ "last_sync": a['last_sync'].isoformat() if a['last_sync'] else None,
+ }
+ for a in accounts
+ ],
+ }
+ except Exception as e:
+ print(f"Failed to get mail stats: {e}")
+ return {}
+
+
+async def log_mail_audit(
+ user_id: str,
+ action: str,
+ entity_type: Optional[str] = None,
+ entity_id: Optional[str] = None,
+ details: Optional[Dict] = None,
+ tenant_id: Optional[str] = None,
+ ip_address: Optional[str] = None,
+ user_agent: Optional[str] = None,
+) -> bool:
+ """Log a mail action for audit trail."""
+ pool = await get_pool()
+ if pool is None:
+ return False
+
+ try:
+ async with pool.acquire() as conn:
+ await conn.execute(
+ """
+ INSERT INTO mail_audit_log
+ (id, user_id, tenant_id, action, entity_type, entity_id, details, ip_address, user_agent)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
+ """,
+ str(uuid.uuid4()), user_id, tenant_id, action, entity_type, entity_id,
+ json.dumps(details) if details else None, ip_address, user_agent
+ )
+ return True
+ except Exception as e:
+ print(f"Failed to log mail audit: {e}")
+ return False
diff --git a/klausur-service/backend/mail/mail_db_tasks.py b/klausur-service/backend/mail/mail_db_tasks.py
new file mode 100644
index 0000000..af48ef7
--- /dev/null
+++ b/klausur-service/backend/mail/mail_db_tasks.py
@@ -0,0 +1,247 @@
+"""
+Mail Database - Inbox Task Operations.
+"""
+
+import uuid
+from typing import Optional, List, Dict
+from datetime import datetime, timedelta
+
+from .mail_db_pool import get_pool
+
+
+async def create_task(
+ user_id: str,
+ tenant_id: str,
+ title: str,
+ description: Optional[str] = None,
+ priority: str = "medium",
+ deadline: Optional[datetime] = None,
+ email_id: Optional[str] = None,
+ account_id: Optional[str] = None,
+ source_email_subject: Optional[str] = None,
+ source_sender: Optional[str] = None,
+ source_sender_type: Optional[str] = None,
+ ai_extracted: bool = False,
+ confidence_score: Optional[float] = None,
+) -> Optional[str]:
+ """Create a new inbox task."""
+ pool = await get_pool()
+ if pool is None:
+ return None
+
+ task_id = str(uuid.uuid4())
+ try:
+ async with pool.acquire() as conn:
+ await conn.execute(
+ """
+ INSERT INTO inbox_tasks
+ (id, user_id, tenant_id, title, description, priority, deadline,
+ email_id, account_id, source_email_subject, source_sender,
+ source_sender_type, ai_extracted, confidence_score)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
+ """,
+ task_id, user_id, tenant_id, title, description, priority, deadline,
+ email_id, account_id, source_email_subject, source_sender,
+ source_sender_type, ai_extracted, confidence_score
+ )
+ return task_id
+ except Exception as e:
+ print(f"Failed to create task: {e}")
+ return None
+
+
+async def get_tasks(
+ user_id: str,
+ status: Optional[str] = None,
+ priority: Optional[str] = None,
+ include_completed: bool = False,
+ limit: int = 50,
+ offset: int = 0,
+) -> List[Dict]:
+ """Get tasks for a user."""
+ pool = await get_pool()
+ if pool is None:
+ return []
+
+ try:
+ async with pool.acquire() as conn:
+ conditions = ["user_id = $1"]
+ params = [user_id]
+ param_idx = 2
+
+ if not include_completed:
+ conditions.append("status != 'completed'")
+
+ if status:
+ conditions.append(f"status = ${param_idx}")
+ params.append(status)
+ param_idx += 1
+
+ if priority:
+ conditions.append(f"priority = ${param_idx}")
+ params.append(priority)
+ param_idx += 1
+
+ where_clause = " AND ".join(conditions)
+ params.extend([limit, offset])
+
+ query = f"""
+ SELECT * FROM inbox_tasks
+ WHERE {where_clause}
+ ORDER BY
+ CASE priority
+ WHEN 'urgent' THEN 1
+ WHEN 'high' THEN 2
+ WHEN 'medium' THEN 3
+ WHEN 'low' THEN 4
+ END,
+ deadline ASC NULLS LAST,
+ created_at DESC
+ LIMIT ${param_idx} OFFSET ${param_idx + 1}
+ """
+
+ rows = await conn.fetch(query, *params)
+ return [dict(r) for r in rows]
+ except Exception as e:
+ print(f"Failed to get tasks: {e}")
+ return []
+
+
+async def get_task(task_id: str, user_id: str) -> Optional[Dict]:
+ """Get a single task."""
+ pool = await get_pool()
+ if pool is None:
+ return None
+
+ try:
+ async with pool.acquire() as conn:
+ row = await conn.fetchrow(
+ "SELECT * FROM inbox_tasks WHERE id = $1 AND user_id = $2",
+ task_id, user_id
+ )
+ return dict(row) if row else None
+ except Exception as e:
+ print(f"Failed to get task: {e}")
+ return None
+
+
+async def update_task(
+ task_id: str,
+ user_id: str,
+ title: Optional[str] = None,
+ description: Optional[str] = None,
+ priority: Optional[str] = None,
+ status: Optional[str] = None,
+ deadline: Optional[datetime] = None,
+) -> bool:
+ """Update a task."""
+ pool = await get_pool()
+ if pool is None:
+ return False
+
+ try:
+ async with pool.acquire() as conn:
+ updates = ["updated_at = NOW()"]
+ params = [task_id, user_id]
+ param_idx = 3
+
+ if title is not None:
+ updates.append(f"title = ${param_idx}")
+ params.append(title)
+ param_idx += 1
+
+ if description is not None:
+ updates.append(f"description = ${param_idx}")
+ params.append(description)
+ param_idx += 1
+
+ if priority is not None:
+ updates.append(f"priority = ${param_idx}")
+ params.append(priority)
+ param_idx += 1
+
+ if status is not None:
+ updates.append(f"status = ${param_idx}")
+ params.append(status)
+ param_idx += 1
+ if status == "completed":
+ updates.append("completed_at = NOW()")
+
+ if deadline is not None:
+ updates.append(f"deadline = ${param_idx}")
+ params.append(deadline)
+ param_idx += 1
+
+ set_clause = ", ".join(updates)
+ await conn.execute(
+ f"UPDATE inbox_tasks SET {set_clause} WHERE id = $1 AND user_id = $2",
+ *params
+ )
+ return True
+ except Exception as e:
+ print(f"Failed to update task: {e}")
+ return False
+
+
+async def get_task_dashboard_stats(user_id: str) -> Dict:
+ """Get dashboard statistics for tasks."""
+ pool = await get_pool()
+ if pool is None:
+ return {}
+
+ try:
+ async with pool.acquire() as conn:
+ now = datetime.now()
+ today_end = now.replace(hour=23, minute=59, second=59)
+ week_end = now + timedelta(days=7)
+
+ stats = await conn.fetchrow(
+ """
+ SELECT
+ COUNT(*) as total_tasks,
+ COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
+ COUNT(*) FILTER (WHERE status = 'in_progress') as in_progress_tasks,
+ COUNT(*) FILTER (WHERE status = 'completed') as completed_tasks,
+ COUNT(*) FILTER (WHERE status != 'completed' AND deadline < $2) as overdue_tasks,
+ COUNT(*) FILTER (WHERE status != 'completed' AND deadline <= $3) as due_today,
+ COUNT(*) FILTER (WHERE status != 'completed' AND deadline <= $4) as due_this_week
+ FROM inbox_tasks
+ WHERE user_id = $1
+ """,
+ user_id, now, today_end, week_end
+ )
+
+ by_priority = await conn.fetch(
+ """
+ SELECT priority, COUNT(*) as count
+ FROM inbox_tasks
+ WHERE user_id = $1 AND status != 'completed'
+ GROUP BY priority
+ """,
+ user_id
+ )
+
+ by_sender = await conn.fetch(
+ """
+ SELECT source_sender_type, COUNT(*) as count
+ FROM inbox_tasks
+ WHERE user_id = $1 AND status != 'completed' AND source_sender_type IS NOT NULL
+ GROUP BY source_sender_type
+ """,
+ user_id
+ )
+
+ return {
+ "total_tasks": stats['total_tasks'] or 0,
+ "pending_tasks": stats['pending_tasks'] or 0,
+ "in_progress_tasks": stats['in_progress_tasks'] or 0,
+ "completed_tasks": stats['completed_tasks'] or 0,
+ "overdue_tasks": stats['overdue_tasks'] or 0,
+ "due_today": stats['due_today'] or 0,
+ "due_this_week": stats['due_this_week'] or 0,
+ "by_priority": {r['priority']: r['count'] for r in by_priority},
+ "by_sender_type": {r['source_sender_type']: r['count'] for r in by_sender},
+ }
+ except Exception as e:
+ print(f"Failed to get task stats: {e}")
+ return {}
diff --git a/klausur-service/backend/ocr_merge_helpers.py b/klausur-service/backend/ocr_merge_helpers.py
new file mode 100644
index 0000000..571c116
--- /dev/null
+++ b/klausur-service/backend/ocr_merge_helpers.py
@@ -0,0 +1,272 @@
+"""
+OCR Merge Helpers — functions for combining PaddleOCR/RapidOCR with Tesseract results.
+
+Extracted from ocr_pipeline_ocr_merge.py.
+
+Lizenz: Apache 2.0
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+from typing import List
+
+logger = logging.getLogger(__name__)
+
+
+def _split_paddle_multi_words(words: list) -> list:
+ """Split PaddleOCR multi-word boxes into individual word boxes.
+
+ PaddleOCR often returns entire phrases as a single box, e.g.
+ "More than 200 singers took part in the" with one bounding box.
+ This splits them into individual words with proportional widths.
+ Also handles leading "!" (e.g. "!Betonung" -> ["!", "Betonung"])
+ and IPA brackets (e.g. "badge[bxd3]" -> ["badge", "[bxd3]"]).
+ """
+ import re
+
+ result = []
+ for w in words:
+ raw_text = w.get("text", "").strip()
+ if not raw_text:
+ continue
+ # Split on whitespace, before "[" (IPA), and after "!" before letter
+ tokens = re.split(
+ r'\s+|(?=\[)|(?<=!)(?=[A-Za-z\u00c0-\u024f])', raw_text
+ )
+ tokens = [t for t in tokens if t]
+
+ if len(tokens) <= 1:
+ result.append(w)
+ else:
+ # Split proportionally by character count
+ total_chars = sum(len(t) for t in tokens)
+ if total_chars == 0:
+ continue
+ n_gaps = len(tokens) - 1
+ gap_px = w["width"] * 0.02
+ usable_w = w["width"] - gap_px * n_gaps
+ cursor = w["left"]
+ for t in tokens:
+ token_w = max(1, usable_w * len(t) / total_chars)
+ result.append({
+ "text": t,
+ "left": round(cursor),
+ "top": w["top"],
+ "width": round(token_w),
+ "height": w["height"],
+ "conf": w.get("conf", 0),
+ })
+ cursor += token_w + gap_px
+ return result
+
+
+def _group_words_into_rows(words: list, row_gap: int = 12) -> list:
+ """Group words into rows by Y-position clustering.
+
+ Words whose vertical centers are within `row_gap` pixels are on the same row.
+ Returns list of rows, each row is a list of words sorted left-to-right.
+ """
+ if not words:
+ return []
+ # Sort by vertical center
+ sorted_words = sorted(words, key=lambda w: w["top"] + w.get("height", 0) / 2)
+ rows: list = []
+ current_row: list = [sorted_words[0]]
+ current_cy = sorted_words[0]["top"] + sorted_words[0].get("height", 0) / 2
+
+ for w in sorted_words[1:]:
+ cy = w["top"] + w.get("height", 0) / 2
+ if abs(cy - current_cy) <= row_gap:
+ current_row.append(w)
+ else:
+ # Sort current row left-to-right before saving
+ rows.append(sorted(current_row, key=lambda w: w["left"]))
+ current_row = [w]
+ current_cy = cy
+ if current_row:
+ rows.append(sorted(current_row, key=lambda w: w["left"]))
+ return rows
+
+
+def _row_center_y(row: list) -> float:
+ """Average vertical center of a row of words."""
+ if not row:
+ return 0.0
+ return sum(w["top"] + w.get("height", 0) / 2 for w in row) / len(row)
+
+
+def _merge_row_sequences(paddle_row: list, tess_row: list) -> list:
+ """Merge two word sequences from the same row using sequence alignment.
+
+ Both sequences are sorted left-to-right. Walk through both simultaneously:
+ - If words match (same/similar text): take Paddle text with averaged coords
+ - If they don't match: the extra word is unique to one engine, include it
+ """
+ merged = []
+ pi, ti = 0, 0
+
+ while pi < len(paddle_row) and ti < len(tess_row):
+ pw = paddle_row[pi]
+ tw = tess_row[ti]
+
+ pt = pw.get("text", "").lower().strip()
+ tt = tw.get("text", "").lower().strip()
+
+ is_same = (pt == tt) or (len(pt) > 1 and len(tt) > 1 and (pt in tt or tt in pt))
+
+ # Spatial overlap check
+ spatial_match = False
+ if not is_same:
+ overlap_left = max(pw["left"], tw["left"])
+ overlap_right = min(
+ pw["left"] + pw.get("width", 0),
+ tw["left"] + tw.get("width", 0),
+ )
+ overlap_w = max(0, overlap_right - overlap_left)
+ min_w = min(pw.get("width", 1), tw.get("width", 1))
+ if min_w > 0 and overlap_w / min_w >= 0.4:
+ is_same = True
+ spatial_match = True
+
+ if is_same:
+ pc = pw.get("conf", 80)
+ tc = tw.get("conf", 50)
+ total = pc + tc
+ if total == 0:
+ total = 1
+ if spatial_match and pc < tc:
+ best_text = tw["text"]
+ else:
+ best_text = pw["text"]
+ merged.append({
+ "text": best_text,
+ "left": round((pw["left"] * pc + tw["left"] * tc) / total),
+ "top": round((pw["top"] * pc + tw["top"] * tc) / total),
+ "width": round((pw["width"] * pc + tw["width"] * tc) / total),
+ "height": round((pw["height"] * pc + tw["height"] * tc) / total),
+ "conf": max(pc, tc),
+ })
+ pi += 1
+ ti += 1
+ else:
+ paddle_ahead = any(
+ tess_row[t].get("text", "").lower().strip() == pt
+ for t in range(ti + 1, min(ti + 4, len(tess_row)))
+ )
+ tess_ahead = any(
+ paddle_row[p].get("text", "").lower().strip() == tt
+ for p in range(pi + 1, min(pi + 4, len(paddle_row)))
+ )
+
+ if paddle_ahead and not tess_ahead:
+ if tw.get("conf", 0) >= 30:
+ merged.append(tw)
+ ti += 1
+ elif tess_ahead and not paddle_ahead:
+ merged.append(pw)
+ pi += 1
+ else:
+ if pw["left"] <= tw["left"]:
+ merged.append(pw)
+ pi += 1
+ else:
+ if tw.get("conf", 0) >= 30:
+ merged.append(tw)
+ ti += 1
+
+ while pi < len(paddle_row):
+ merged.append(paddle_row[pi])
+ pi += 1
+ while ti < len(tess_row):
+ tw = tess_row[ti]
+ if tw.get("conf", 0) >= 30:
+ merged.append(tw)
+ ti += 1
+
+ return merged
+
+
+def _merge_paddle_tesseract(paddle_words: list, tess_words: list) -> list:
+ """Merge word boxes from PaddleOCR and Tesseract using row-based sequence alignment."""
+ if not paddle_words and not tess_words:
+ return []
+ if not paddle_words:
+ return [w for w in tess_words if w.get("conf", 0) >= 40]
+ if not tess_words:
+ return list(paddle_words)
+
+ paddle_rows = _group_words_into_rows(paddle_words)
+ tess_rows = _group_words_into_rows(tess_words)
+
+ used_tess_rows: set = set()
+ merged_all: list = []
+
+ for pr in paddle_rows:
+ pr_cy = _row_center_y(pr)
+ best_dist, best_tri = float("inf"), -1
+ for tri, tr in enumerate(tess_rows):
+ if tri in used_tess_rows:
+ continue
+ tr_cy = _row_center_y(tr)
+ dist = abs(pr_cy - tr_cy)
+ if dist < best_dist:
+ best_dist, best_tri = dist, tri
+
+ max_row_dist = max(
+ max((w.get("height", 20) for w in pr), default=20),
+ 15,
+ )
+
+ if best_tri >= 0 and best_dist <= max_row_dist:
+ tr = tess_rows[best_tri]
+ used_tess_rows.add(best_tri)
+ merged_all.extend(_merge_row_sequences(pr, tr))
+ else:
+ merged_all.extend(pr)
+
+ for tri, tr in enumerate(tess_rows):
+ if tri not in used_tess_rows:
+ for tw in tr:
+ if tw.get("conf", 0) >= 40:
+ merged_all.append(tw)
+
+ return merged_all
+
+
+def _deduplicate_words(words: list) -> list:
+ """Remove duplicate words with same text at overlapping positions."""
+ if not words:
+ return words
+
+ result: list = []
+ for w in words:
+ wt = w.get("text", "").lower().strip()
+ if not wt:
+ continue
+ is_dup = False
+ w_right = w["left"] + w.get("width", 0)
+ w_bottom = w["top"] + w.get("height", 0)
+ for existing in result:
+ et = existing.get("text", "").lower().strip()
+ if wt != et:
+ continue
+ ox_l = max(w["left"], existing["left"])
+ ox_r = min(w_right, existing["left"] + existing.get("width", 0))
+ ox = max(0, ox_r - ox_l)
+ min_w = min(w.get("width", 1), existing.get("width", 1))
+ if min_w <= 0 or ox / min_w < 0.5:
+ continue
+ oy_t = max(w["top"], existing["top"])
+ oy_b = min(w_bottom, existing["top"] + existing.get("height", 0))
+ oy = max(0, oy_b - oy_t)
+ min_h = min(w.get("height", 1), existing.get("height", 1))
+ if min_h > 0 and oy / min_h >= 0.5:
+ is_dup = True
+ break
+ if not is_dup:
+ result.append(w)
+
+ removed = len(words) - len(result)
+ if removed:
+ logger.info("dedup: removed %d duplicate words", removed)
+ return result
diff --git a/klausur-service/backend/ocr_pipeline_llm_review.py b/klausur-service/backend/ocr_pipeline_llm_review.py
new file mode 100644
index 0000000..37e8df7
--- /dev/null
+++ b/klausur-service/backend/ocr_pipeline_llm_review.py
@@ -0,0 +1,209 @@
+"""
+OCR Pipeline LLM Review — LLM-based correction endpoints.
+
+Extracted from ocr_pipeline_postprocess.py.
+
+Lizenz: Apache 2.0
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import json
+import logging
+from datetime import datetime
+from typing import Dict, List
+
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import StreamingResponse
+
+from cv_vocab_pipeline import (
+ OLLAMA_REVIEW_MODEL,
+ llm_review_entries,
+ llm_review_entries_streaming,
+)
+from ocr_pipeline_session_store import (
+ get_session_db,
+ update_session_db,
+)
+from ocr_pipeline_common import (
+ _cache,
+ _append_pipeline_log,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+
+
+# ---------------------------------------------------------------------------
+# Step 8: LLM Review
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/llm-review")
+async def run_llm_review(session_id: str, request: Request, stream: bool = False):
+ """Run LLM-based correction on vocab entries from Step 5.
+
+ Query params:
+ stream: false (default) for JSON response, true for SSE streaming
+ """
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ word_result = session.get("word_result")
+ if not word_result:
+ raise HTTPException(status_code=400, detail="No word result found — run Step 5 first")
+
+ entries = word_result.get("vocab_entries") or word_result.get("entries") or []
+ if not entries:
+ raise HTTPException(status_code=400, detail="No vocab entries found — run Step 5 first")
+
+ # Optional model override from request body
+ body = {}
+ try:
+ body = await request.json()
+ except Exception:
+ pass
+ model = body.get("model") or OLLAMA_REVIEW_MODEL
+
+ if stream:
+ return StreamingResponse(
+ _llm_review_stream_generator(session_id, entries, word_result, model, request),
+ media_type="text/event-stream",
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
+ )
+
+ # Non-streaming path
+ try:
+ result = await llm_review_entries(entries, model=model)
+ except Exception as e:
+ import traceback
+ logger.error(f"LLM review failed for session {session_id}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
+ raise HTTPException(status_code=502, detail=f"LLM review failed ({type(e).__name__}): {e}")
+
+ # Store result inside word_result as a sub-key
+ word_result["llm_review"] = {
+ "changes": result["changes"],
+ "model_used": result["model_used"],
+ "duration_ms": result["duration_ms"],
+ "entries_corrected": result["entries_corrected"],
+ }
+ await update_session_db(session_id, word_result=word_result, current_step=9)
+
+ if session_id in _cache:
+ _cache[session_id]["word_result"] = word_result
+
+ logger.info(f"LLM review session {session_id}: {len(result['changes'])} changes, "
+ f"{result['duration_ms']}ms, model={result['model_used']}")
+
+ await _append_pipeline_log(session_id, "correction", {
+ "engine": "llm",
+ "model": result["model_used"],
+ "total_entries": len(entries),
+ "corrections_proposed": len(result["changes"]),
+ }, duration_ms=result["duration_ms"])
+
+ return {
+ "session_id": session_id,
+ "changes": result["changes"],
+ "model_used": result["model_used"],
+ "duration_ms": result["duration_ms"],
+ "total_entries": len(entries),
+ "corrections_found": len(result["changes"]),
+ }
+
+
+async def _llm_review_stream_generator(
+ session_id: str,
+ entries: List[Dict],
+ word_result: Dict,
+ model: str,
+ request: Request,
+):
+ """SSE generator that yields batch-by-batch LLM review progress."""
+ try:
+ async for event in llm_review_entries_streaming(entries, model=model):
+ if await request.is_disconnected():
+ logger.info(f"SSE: client disconnected during LLM review for {session_id}")
+ return
+
+ yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n"
+
+ # On complete: persist to DB
+ if event.get("type") == "complete":
+ word_result["llm_review"] = {
+ "changes": event["changes"],
+ "model_used": event["model_used"],
+ "duration_ms": event["duration_ms"],
+ "entries_corrected": event["entries_corrected"],
+ }
+ await update_session_db(session_id, word_result=word_result, current_step=9)
+ if session_id in _cache:
+ _cache[session_id]["word_result"] = word_result
+
+ logger.info(f"LLM review SSE session {session_id}: {event['corrections_found']} changes, "
+ f"{event['duration_ms']}ms, skipped={event['skipped']}, model={event['model_used']}")
+
+ except Exception as e:
+ import traceback
+ logger.error(f"LLM review SSE failed for {session_id}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
+ error_event = {"type": "error", "detail": f"{type(e).__name__}: {e}"}
+ yield f"data: {json.dumps(error_event)}\n\n"
+
+
+@router.post("/sessions/{session_id}/llm-review/apply")
+async def apply_llm_corrections(session_id: str, request: Request):
+ """Apply selected LLM corrections to vocab entries."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ word_result = session.get("word_result")
+ if not word_result:
+ raise HTTPException(status_code=400, detail="No word result found")
+
+ llm_review = word_result.get("llm_review")
+ if not llm_review:
+ raise HTTPException(status_code=400, detail="No LLM review found — run /llm-review first")
+
+ body = await request.json()
+ accepted_indices = set(body.get("accepted_indices", [])) # indices into changes[]
+
+ changes = llm_review.get("changes", [])
+ entries = word_result.get("vocab_entries") or word_result.get("entries") or []
+
+ # Build a lookup: (row_index, field) -> new_value for accepted changes
+ corrections = {}
+ applied_count = 0
+ for idx, change in enumerate(changes):
+ if idx in accepted_indices:
+ key = (change["row_index"], change["field"])
+ corrections[key] = change["new"]
+ applied_count += 1
+
+ # Apply corrections to entries
+ for entry in entries:
+ row_idx = entry.get("row_index", -1)
+ for field_name in ("english", "german", "example"):
+ key = (row_idx, field_name)
+ if key in corrections:
+ entry[field_name] = corrections[key]
+ entry["llm_corrected"] = True
+
+ # Update word_result
+ word_result["vocab_entries"] = entries
+ word_result["entries"] = entries
+ word_result["llm_review"]["applied_count"] = applied_count
+ word_result["llm_review"]["applied_at"] = datetime.utcnow().isoformat()
+
+ await update_session_db(session_id, word_result=word_result)
+
+ if session_id in _cache:
+ _cache[session_id]["word_result"] = word_result
+
+ logger.info(f"Applied {applied_count}/{len(changes)} LLM corrections for session {session_id}")
+
+ return {
+ "session_id": session_id,
+ "applied_count": applied_count,
+ "total_changes": len(changes),
+ }
diff --git a/klausur-service/backend/ocr_pipeline_ocr_merge.py b/klausur-service/backend/ocr_pipeline_ocr_merge.py
index d8b4c8c..c91f8b2 100644
--- a/klausur-service/backend/ocr_pipeline_ocr_merge.py
+++ b/klausur-service/backend/ocr_pipeline_ocr_merge.py
@@ -1,10 +1,8 @@
"""
-OCR Merge Helpers and Kombi Endpoints.
+OCR Merge Kombi Endpoints — paddle-kombi and rapid-kombi endpoints.
-Contains merge helper functions for combining PaddleOCR/RapidOCR with Tesseract
-results, plus the paddle-kombi and rapid-kombi endpoints.
-
-Extracted from ocr_pipeline_api.py for modularity.
+Merge helper functions live in ocr_merge_helpers.py.
+This module re-exports them for backward compatibility.
Lizenz: Apache 2.0
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
@@ -12,10 +10,8 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
import logging
import time
-from typing import Any, Dict, List
import cv2
-import httpx
import numpy as np
from fastapi import APIRouter, HTTPException
@@ -23,356 +19,23 @@ from cv_words_first import build_grid_from_words
from ocr_pipeline_common import _cache, _append_pipeline_log
from ocr_pipeline_session_store import get_session_image, update_session_db
+# Re-export merge helpers for backward compatibility
+from ocr_merge_helpers import ( # noqa: F401
+ _split_paddle_multi_words,
+ _group_words_into_rows,
+ _row_center_y,
+ _merge_row_sequences,
+ _merge_paddle_tesseract,
+ _deduplicate_words,
+)
+
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
-# ---------------------------------------------------------------------------
-# Merge helper functions
-# ---------------------------------------------------------------------------
-
-
-def _split_paddle_multi_words(words: list) -> list:
- """Split PaddleOCR multi-word boxes into individual word boxes.
-
- PaddleOCR often returns entire phrases as a single box, e.g.
- "More than 200 singers took part in the" with one bounding box.
- This splits them into individual words with proportional widths.
- Also handles leading "!" (e.g. "!Betonung" → ["!", "Betonung"])
- and IPA brackets (e.g. "badge[bxd3]" → ["badge", "[bxd3]"]).
- """
- import re
-
- result = []
- for w in words:
- raw_text = w.get("text", "").strip()
- if not raw_text:
- continue
- # Split on whitespace, before "[" (IPA), and after "!" before letter
- tokens = re.split(
- r'\s+|(?=\[)|(?<=!)(?=[A-Za-z\u00c0-\u024f])', raw_text
- )
- tokens = [t for t in tokens if t]
-
- if len(tokens) <= 1:
- result.append(w)
- else:
- # Split proportionally by character count
- total_chars = sum(len(t) for t in tokens)
- if total_chars == 0:
- continue
- n_gaps = len(tokens) - 1
- gap_px = w["width"] * 0.02
- usable_w = w["width"] - gap_px * n_gaps
- cursor = w["left"]
- for t in tokens:
- token_w = max(1, usable_w * len(t) / total_chars)
- result.append({
- "text": t,
- "left": round(cursor),
- "top": w["top"],
- "width": round(token_w),
- "height": w["height"],
- "conf": w.get("conf", 0),
- })
- cursor += token_w + gap_px
- return result
-
-
-def _group_words_into_rows(words: list, row_gap: int = 12) -> list:
- """Group words into rows by Y-position clustering.
-
- Words whose vertical centers are within `row_gap` pixels are on the same row.
- Returns list of rows, each row is a list of words sorted left-to-right.
- """
- if not words:
- return []
- # Sort by vertical center
- sorted_words = sorted(words, key=lambda w: w["top"] + w.get("height", 0) / 2)
- rows: list = []
- current_row: list = [sorted_words[0]]
- current_cy = sorted_words[0]["top"] + sorted_words[0].get("height", 0) / 2
-
- for w in sorted_words[1:]:
- cy = w["top"] + w.get("height", 0) / 2
- if abs(cy - current_cy) <= row_gap:
- current_row.append(w)
- else:
- # Sort current row left-to-right before saving
- rows.append(sorted(current_row, key=lambda w: w["left"]))
- current_row = [w]
- current_cy = cy
- if current_row:
- rows.append(sorted(current_row, key=lambda w: w["left"]))
- return rows
-
-
-def _row_center_y(row: list) -> float:
- """Average vertical center of a row of words."""
- if not row:
- return 0.0
- return sum(w["top"] + w.get("height", 0) / 2 for w in row) / len(row)
-
-
-def _merge_row_sequences(paddle_row: list, tess_row: list) -> list:
- """Merge two word sequences from the same row using sequence alignment.
-
- Both sequences are sorted left-to-right. Walk through both simultaneously:
- - If words match (same/similar text): take Paddle text with averaged coords
- - If they don't match: the extra word is unique to one engine, include it
-
- This prevents duplicates because both engines produce words in the same order.
- """
- merged = []
- pi, ti = 0, 0
-
- while pi < len(paddle_row) and ti < len(tess_row):
- pw = paddle_row[pi]
- tw = tess_row[ti]
-
- # Check if these are the same word
- pt = pw.get("text", "").lower().strip()
- tt = tw.get("text", "").lower().strip()
-
- # Same text or one contains the other
- is_same = (pt == tt) or (len(pt) > 1 and len(tt) > 1 and (pt in tt or tt in pt))
-
- # Spatial overlap check: if words overlap >= 40% horizontally,
- # they're the same physical word regardless of OCR text differences.
- # (40% catches borderline cases like "Stick"/"Stück" at 48% overlap)
- spatial_match = False
- if not is_same:
- overlap_left = max(pw["left"], tw["left"])
- overlap_right = min(
- pw["left"] + pw.get("width", 0),
- tw["left"] + tw.get("width", 0),
- )
- overlap_w = max(0, overlap_right - overlap_left)
- min_w = min(pw.get("width", 1), tw.get("width", 1))
- if min_w > 0 and overlap_w / min_w >= 0.4:
- is_same = True
- spatial_match = True
-
- if is_same:
- # Matched — average coordinates weighted by confidence
- pc = pw.get("conf", 80)
- tc = tw.get("conf", 50)
- total = pc + tc
- if total == 0:
- total = 1
- # Text: prefer higher-confidence engine when texts differ
- # (e.g. Tesseract "Stück" conf=98 vs PaddleOCR "Stick" conf=80)
- if spatial_match and pc < tc:
- best_text = tw["text"]
- else:
- best_text = pw["text"]
- merged.append({
- "text": best_text,
- "left": round((pw["left"] * pc + tw["left"] * tc) / total),
- "top": round((pw["top"] * pc + tw["top"] * tc) / total),
- "width": round((pw["width"] * pc + tw["width"] * tc) / total),
- "height": round((pw["height"] * pc + tw["height"] * tc) / total),
- "conf": max(pc, tc),
- })
- pi += 1
- ti += 1
- else:
- # Different text — one engine found something extra
- # Look ahead: is the current Paddle word somewhere in Tesseract ahead?
- paddle_ahead = any(
- tess_row[t].get("text", "").lower().strip() == pt
- for t in range(ti + 1, min(ti + 4, len(tess_row)))
- )
- # Is the current Tesseract word somewhere in Paddle ahead?
- tess_ahead = any(
- paddle_row[p].get("text", "").lower().strip() == tt
- for p in range(pi + 1, min(pi + 4, len(paddle_row)))
- )
-
- if paddle_ahead and not tess_ahead:
- # Tesseract has an extra word (e.g. "!" or bullet) → include it
- if tw.get("conf", 0) >= 30:
- merged.append(tw)
- ti += 1
- elif tess_ahead and not paddle_ahead:
- # Paddle has an extra word → include it
- merged.append(pw)
- pi += 1
- else:
- # Both have unique words or neither found ahead → take leftmost first
- if pw["left"] <= tw["left"]:
- merged.append(pw)
- pi += 1
- else:
- if tw.get("conf", 0) >= 30:
- merged.append(tw)
- ti += 1
-
- # Remaining words from either engine
- while pi < len(paddle_row):
- merged.append(paddle_row[pi])
- pi += 1
- while ti < len(tess_row):
- tw = tess_row[ti]
- if tw.get("conf", 0) >= 30:
- merged.append(tw)
- ti += 1
-
- return merged
-
-
-def _merge_paddle_tesseract(paddle_words: list, tess_words: list) -> list:
- """Merge word boxes from PaddleOCR and Tesseract using row-based sequence alignment.
-
- Strategy:
- 1. Group each engine's words into rows (by Y-position clustering)
- 2. Match rows between engines (by vertical center proximity)
- 3. Within each matched row: merge sequences left-to-right, deduplicating
- words that appear in both engines at the same sequence position
- 4. Unmatched rows from either engine: keep as-is
-
- This prevents:
- - Cross-line averaging (words from different lines being merged)
- - Duplicate words (same word from both engines shown twice)
- """
- if not paddle_words and not tess_words:
- return []
- if not paddle_words:
- return [w for w in tess_words if w.get("conf", 0) >= 40]
- if not tess_words:
- return list(paddle_words)
-
- # Step 1: Group into rows
- paddle_rows = _group_words_into_rows(paddle_words)
- tess_rows = _group_words_into_rows(tess_words)
-
- # Step 2: Match rows between engines by vertical center proximity
- used_tess_rows: set = set()
- merged_all: list = []
-
- for pr in paddle_rows:
- pr_cy = _row_center_y(pr)
- best_dist, best_tri = float("inf"), -1
- for tri, tr in enumerate(tess_rows):
- if tri in used_tess_rows:
- continue
- tr_cy = _row_center_y(tr)
- dist = abs(pr_cy - tr_cy)
- if dist < best_dist:
- best_dist, best_tri = dist, tri
-
- # Row height threshold — rows must be within ~1.5x typical line height
- max_row_dist = max(
- max((w.get("height", 20) for w in pr), default=20),
- 15,
- )
-
- if best_tri >= 0 and best_dist <= max_row_dist:
- # Matched row — merge sequences
- tr = tess_rows[best_tri]
- used_tess_rows.add(best_tri)
- merged_all.extend(_merge_row_sequences(pr, tr))
- else:
- # No matching Tesseract row — keep Paddle row as-is
- merged_all.extend(pr)
-
- # Add unmatched Tesseract rows
- for tri, tr in enumerate(tess_rows):
- if tri not in used_tess_rows:
- for tw in tr:
- if tw.get("conf", 0) >= 40:
- merged_all.append(tw)
-
- return merged_all
-
-
-def _deduplicate_words(words: list) -> list:
- """Remove duplicate words with same text at overlapping positions.
-
- PaddleOCR can return overlapping phrases (e.g. "von jm." and "jm. =")
- that produce duplicate words after splitting. This pass removes them.
-
- A word is a duplicate only when BOTH horizontal AND vertical overlap
- exceed 50% — same text on the same visual line at the same position.
- """
- if not words:
- return words
-
- result: list = []
- for w in words:
- wt = w.get("text", "").lower().strip()
- if not wt:
- continue
- is_dup = False
- w_right = w["left"] + w.get("width", 0)
- w_bottom = w["top"] + w.get("height", 0)
- for existing in result:
- et = existing.get("text", "").lower().strip()
- if wt != et:
- continue
- # Horizontal overlap
- ox_l = max(w["left"], existing["left"])
- ox_r = min(w_right, existing["left"] + existing.get("width", 0))
- ox = max(0, ox_r - ox_l)
- min_w = min(w.get("width", 1), existing.get("width", 1))
- if min_w <= 0 or ox / min_w < 0.5:
- continue
- # Vertical overlap — must also be on the same line
- oy_t = max(w["top"], existing["top"])
- oy_b = min(w_bottom, existing["top"] + existing.get("height", 0))
- oy = max(0, oy_b - oy_t)
- min_h = min(w.get("height", 1), existing.get("height", 1))
- if min_h > 0 and oy / min_h >= 0.5:
- is_dup = True
- break
- if not is_dup:
- result.append(w)
-
- removed = len(words) - len(result)
- if removed:
- logger.info("dedup: removed %d duplicate words", removed)
- return result
-
-
-# ---------------------------------------------------------------------------
-# Kombi endpoints
-# ---------------------------------------------------------------------------
-
-
-@router.post("/sessions/{session_id}/paddle-kombi")
-async def paddle_kombi(session_id: str):
- """Run PaddleOCR + Tesseract on the preprocessed image and merge results.
-
- Both engines run on the same preprocessed (cropped/dewarped) image.
- Word boxes are matched by IoU and coordinates are averaged weighted by
- confidence. Unmatched Tesseract words (bullets, symbols) are added.
- """
- img_png = await get_session_image(session_id, "cropped")
- if not img_png:
- img_png = await get_session_image(session_id, "dewarped")
- if not img_png:
- img_png = await get_session_image(session_id, "original")
- if not img_png:
- raise HTTPException(status_code=404, detail="No image found for this session")
-
- img_arr = np.frombuffer(img_png, dtype=np.uint8)
- img_bgr = cv2.imdecode(img_arr, cv2.IMREAD_COLOR)
- if img_bgr is None:
- raise HTTPException(status_code=400, detail="Failed to decode image")
-
- img_h, img_w = img_bgr.shape[:2]
-
- from cv_ocr_engines import ocr_region_paddle
-
- t0 = time.time()
-
- # --- PaddleOCR ---
- paddle_words = await ocr_region_paddle(img_bgr, region=None)
- if not paddle_words:
- paddle_words = []
-
- # --- Tesseract ---
+def _run_tesseract_words(img_bgr) -> list:
+ """Run Tesseract OCR on an image and return word dicts."""
from PIL import Image
import pytesseract
@@ -397,15 +60,98 @@ async def paddle_kombi(session_id: str):
"height": data["height"][i],
"conf": conf,
})
+ return tess_words
+
+
+def _build_kombi_word_result(
+ cells: list,
+ columns_meta: list,
+ img_w: int,
+ img_h: int,
+ duration: float,
+ engine_name: str,
+ raw_engine_words: list,
+ raw_engine_words_split: list,
+ tess_words: list,
+ merged_words: list,
+ raw_engine_key: str = "raw_paddle_words",
+ raw_split_key: str = "raw_paddle_words_split",
+) -> dict:
+ """Build the word_result dict for kombi endpoints."""
+ n_rows = len(set(c["row_index"] for c in cells)) if cells else 0
+ n_cols = len(columns_meta)
+ col_types = {c.get("type") for c in columns_meta}
+ is_vocab = bool(col_types & {"column_en", "column_de"})
+
+ return {
+ "cells": cells,
+ "grid_shape": {"rows": n_rows, "cols": n_cols, "total_cells": len(cells)},
+ "columns_used": columns_meta,
+ "layout": "vocab" if is_vocab else "generic",
+ "image_width": img_w,
+ "image_height": img_h,
+ "duration_seconds": round(duration, 2),
+ "ocr_engine": engine_name,
+ "grid_method": engine_name,
+ raw_engine_key: raw_engine_words,
+ raw_split_key: raw_engine_words_split,
+ "raw_tesseract_words": tess_words,
+ "summary": {
+ "total_cells": len(cells),
+ "non_empty_cells": sum(1 for c in cells if c.get("text")),
+ "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
+ raw_engine_key.replace("raw_", "").replace("_words", "_words"): len(raw_engine_words),
+ raw_split_key.replace("raw_", "").replace("_words_split", "_words_split"): len(raw_engine_words_split),
+ "tesseract_words": len(tess_words),
+ "merged_words": len(merged_words),
+ },
+ }
+
+
+async def _load_session_image(session_id: str):
+ """Load preprocessed image for kombi endpoints."""
+ img_png = await get_session_image(session_id, "cropped")
+ if not img_png:
+ img_png = await get_session_image(session_id, "dewarped")
+ if not img_png:
+ img_png = await get_session_image(session_id, "original")
+ if not img_png:
+ raise HTTPException(status_code=404, detail="No image found for this session")
+
+ img_arr = np.frombuffer(img_png, dtype=np.uint8)
+ img_bgr = cv2.imdecode(img_arr, cv2.IMREAD_COLOR)
+ if img_bgr is None:
+ raise HTTPException(status_code=400, detail="Failed to decode image")
+
+ return img_png, img_bgr
+
+
+# ---------------------------------------------------------------------------
+# Kombi endpoints
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/paddle-kombi")
+async def paddle_kombi(session_id: str):
+ """Run PaddleOCR + Tesseract on the preprocessed image and merge results."""
+ img_png, img_bgr = await _load_session_image(session_id)
+ img_h, img_w = img_bgr.shape[:2]
+
+ from cv_ocr_engines import ocr_region_paddle
+
+ t0 = time.time()
+
+ paddle_words = await ocr_region_paddle(img_bgr, region=None)
+ if not paddle_words:
+ paddle_words = []
+
+ tess_words = _run_tesseract_words(img_bgr)
- # --- Split multi-word Paddle boxes into individual words ---
paddle_words_split = _split_paddle_multi_words(paddle_words)
logger.info(
- "paddle_kombi: split %d paddle boxes → %d individual words",
+ "paddle_kombi: split %d paddle boxes -> %d individual words",
len(paddle_words), len(paddle_words_split),
)
- # --- Merge ---
if not paddle_words_split and not tess_words:
raise HTTPException(status_code=400, detail="Both OCR engines returned no words")
@@ -418,49 +164,23 @@ async def paddle_kombi(session_id: str):
for cell in cells:
cell["ocr_engine"] = "kombi"
- n_rows = len(set(c["row_index"] for c in cells)) if cells else 0
- n_cols = len(columns_meta)
- col_types = {c.get("type") for c in columns_meta}
- is_vocab = bool(col_types & {"column_en", "column_de"})
-
- word_result = {
- "cells": cells,
- "grid_shape": {"rows": n_rows, "cols": n_cols, "total_cells": len(cells)},
- "columns_used": columns_meta,
- "layout": "vocab" if is_vocab else "generic",
- "image_width": img_w,
- "image_height": img_h,
- "duration_seconds": round(duration, 2),
- "ocr_engine": "kombi",
- "grid_method": "kombi",
- "raw_paddle_words": paddle_words,
- "raw_paddle_words_split": paddle_words_split,
- "raw_tesseract_words": tess_words,
- "summary": {
- "total_cells": len(cells),
- "non_empty_cells": sum(1 for c in cells if c.get("text")),
- "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
- "paddle_words": len(paddle_words),
- "paddle_words_split": len(paddle_words_split),
- "tesseract_words": len(tess_words),
- "merged_words": len(merged_words),
- },
- }
+ word_result = _build_kombi_word_result(
+ cells, columns_meta, img_w, img_h, duration, "kombi",
+ paddle_words, paddle_words_split, tess_words, merged_words,
+ "raw_paddle_words", "raw_paddle_words_split",
+ )
await update_session_db(
- session_id,
- word_result=word_result,
- cropped_png=img_png,
- current_step=8,
+ session_id, word_result=word_result, cropped_png=img_png, current_step=8,
)
- # Update in-memory cache so detect-structure can access word_result
if session_id in _cache:
_cache[session_id]["word_result"] = word_result
logger.info(
"paddle_kombi session %s: %d cells (%d rows, %d cols) in %.2fs "
"[paddle=%d, tess=%d, merged=%d]",
- session_id, len(cells), n_rows, n_cols, duration,
+ session_id, len(cells), word_result["grid_shape"]["rows"],
+ word_result["grid_shape"]["cols"], duration,
len(paddle_words), len(tess_words), len(merged_words),
)
@@ -478,24 +198,8 @@ async def paddle_kombi(session_id: str):
@router.post("/sessions/{session_id}/rapid-kombi")
async def rapid_kombi(session_id: str):
- """Run RapidOCR + Tesseract on the preprocessed image and merge results.
-
- Same merge logic as paddle-kombi, but uses local RapidOCR (ONNX Runtime)
- instead of remote PaddleOCR service.
- """
- img_png = await get_session_image(session_id, "cropped")
- if not img_png:
- img_png = await get_session_image(session_id, "dewarped")
- if not img_png:
- img_png = await get_session_image(session_id, "original")
- if not img_png:
- raise HTTPException(status_code=404, detail="No image found for this session")
-
- img_arr = np.frombuffer(img_png, dtype=np.uint8)
- img_bgr = cv2.imdecode(img_arr, cv2.IMREAD_COLOR)
- if img_bgr is None:
- raise HTTPException(status_code=400, detail="Failed to decode image")
-
+ """Run RapidOCR + Tesseract on the preprocessed image and merge results."""
+ img_png, img_bgr = await _load_session_image(session_id)
img_h, img_w = img_bgr.shape[:2]
from cv_ocr_engines import ocr_region_rapid
@@ -503,7 +207,6 @@ async def rapid_kombi(session_id: str):
t0 = time.time()
- # --- RapidOCR (local, synchronous) ---
full_region = PageRegion(
type="full_page", x=0, y=0, width=img_w, height=img_h,
)
@@ -511,40 +214,14 @@ async def rapid_kombi(session_id: str):
if not rapid_words:
rapid_words = []
- # --- Tesseract ---
- from PIL import Image
- import pytesseract
+ tess_words = _run_tesseract_words(img_bgr)
- pil_img = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
- data = pytesseract.image_to_data(
- pil_img, lang="eng+deu",
- config="--psm 6 --oem 3",
- output_type=pytesseract.Output.DICT,
- )
- tess_words = []
- for i in range(len(data["text"])):
- text = str(data["text"][i]).strip()
- conf_raw = str(data["conf"][i])
- conf = int(conf_raw) if conf_raw.lstrip("-").isdigit() else -1
- if not text or conf < 20:
- continue
- tess_words.append({
- "text": text,
- "left": data["left"][i],
- "top": data["top"][i],
- "width": data["width"][i],
- "height": data["height"][i],
- "conf": conf,
- })
-
- # --- Split multi-word RapidOCR boxes into individual words ---
rapid_words_split = _split_paddle_multi_words(rapid_words)
logger.info(
- "rapid_kombi: split %d rapid boxes → %d individual words",
+ "rapid_kombi: split %d rapid boxes -> %d individual words",
len(rapid_words), len(rapid_words_split),
)
- # --- Merge ---
if not rapid_words_split and not tess_words:
raise HTTPException(status_code=400, detail="Both OCR engines returned no words")
@@ -557,49 +234,23 @@ async def rapid_kombi(session_id: str):
for cell in cells:
cell["ocr_engine"] = "rapid_kombi"
- n_rows = len(set(c["row_index"] for c in cells)) if cells else 0
- n_cols = len(columns_meta)
- col_types = {c.get("type") for c in columns_meta}
- is_vocab = bool(col_types & {"column_en", "column_de"})
-
- word_result = {
- "cells": cells,
- "grid_shape": {"rows": n_rows, "cols": n_cols, "total_cells": len(cells)},
- "columns_used": columns_meta,
- "layout": "vocab" if is_vocab else "generic",
- "image_width": img_w,
- "image_height": img_h,
- "duration_seconds": round(duration, 2),
- "ocr_engine": "rapid_kombi",
- "grid_method": "rapid_kombi",
- "raw_rapid_words": rapid_words,
- "raw_rapid_words_split": rapid_words_split,
- "raw_tesseract_words": tess_words,
- "summary": {
- "total_cells": len(cells),
- "non_empty_cells": sum(1 for c in cells if c.get("text")),
- "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
- "rapid_words": len(rapid_words),
- "rapid_words_split": len(rapid_words_split),
- "tesseract_words": len(tess_words),
- "merged_words": len(merged_words),
- },
- }
+ word_result = _build_kombi_word_result(
+ cells, columns_meta, img_w, img_h, duration, "rapid_kombi",
+ rapid_words, rapid_words_split, tess_words, merged_words,
+ "raw_rapid_words", "raw_rapid_words_split",
+ )
await update_session_db(
- session_id,
- word_result=word_result,
- cropped_png=img_png,
- current_step=8,
+ session_id, word_result=word_result, cropped_png=img_png, current_step=8,
)
- # Update in-memory cache so detect-structure can access word_result
if session_id in _cache:
_cache[session_id]["word_result"] = word_result
logger.info(
"rapid_kombi session %s: %d cells (%d rows, %d cols) in %.2fs "
"[rapid=%d, tess=%d, merged=%d]",
- session_id, len(cells), n_rows, n_cols, duration,
+ session_id, len(cells), word_result["grid_shape"]["rows"],
+ word_result["grid_shape"]["cols"], duration,
len(rapid_words), len(tess_words), len(merged_words),
)
diff --git a/klausur-service/backend/ocr_pipeline_postprocess.py b/klausur-service/backend/ocr_pipeline_postprocess.py
index 3445800..388f5e2 100644
--- a/klausur-service/backend/ocr_pipeline_postprocess.py
+++ b/klausur-service/backend/ocr_pipeline_postprocess.py
@@ -1,929 +1,26 @@
"""
-OCR Pipeline Postprocessing API — LLM review, reconstruction, export, validation,
-image detection/generation, and handwriting removal endpoints.
+OCR Pipeline Postprocessing API — composite router assembling LLM review,
+reconstruction, export, validation, image detection/generation, and
+handwriting removal endpoints.
-Extracted from ocr_pipeline_api.py to keep the main module manageable.
+Split into sub-modules:
+ ocr_pipeline_llm_review — LLM review + apply corrections
+ ocr_pipeline_reconstruction — reconstruction save, Fabric JSON, merged entries, PDF/DOCX
+ ocr_pipeline_validation — image detection, generation, validation, handwriting removal
Lizenz: Apache 2.0
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
-import json
-import logging
-import os
-import re
-from datetime import datetime
-from typing import Any, Dict, List, Optional
-
-from fastapi import APIRouter, HTTPException, Request
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-
-from cv_vocab_pipeline import (
- OLLAMA_REVIEW_MODEL,
- llm_review_entries,
- llm_review_entries_streaming,
-)
-from ocr_pipeline_session_store import (
- get_session_db,
- get_session_image,
- get_sub_sessions,
- update_session_db,
-)
-from ocr_pipeline_common import (
- _cache,
- _load_session_to_cache,
- _get_cached,
- _get_base_image_png,
- _append_pipeline_log,
- RemoveHandwritingRequest,
-)
-
-logger = logging.getLogger(__name__)
-
-router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
-
-# ---------------------------------------------------------------------------
-# Pydantic Models
-# ---------------------------------------------------------------------------
-
-STYLE_SUFFIXES = {
- "educational": "educational illustration, textbook style, clear, colorful",
- "cartoon": "cartoon, child-friendly, simple shapes",
- "sketch": "pencil sketch, hand-drawn, black and white",
- "clipart": "clipart, flat vector style, simple",
- "realistic": "photorealistic, high detail",
-}
-
-
-class ValidationRequest(BaseModel):
- notes: Optional[str] = None
- score: Optional[int] = None
-
-
-class GenerateImageRequest(BaseModel):
- region_index: int
- prompt: str
- style: str = "educational"
-
-
-# ---------------------------------------------------------------------------
-# Step 8: LLM Review
-# ---------------------------------------------------------------------------
-
-@router.post("/sessions/{session_id}/llm-review")
-async def run_llm_review(session_id: str, request: Request, stream: bool = False):
- """Run LLM-based correction on vocab entries from Step 5.
-
- Query params:
- stream: false (default) for JSON response, true for SSE streaming
- """
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- word_result = session.get("word_result")
- if not word_result:
- raise HTTPException(status_code=400, detail="No word result found — run Step 5 first")
-
- entries = word_result.get("vocab_entries") or word_result.get("entries") or []
- if not entries:
- raise HTTPException(status_code=400, detail="No vocab entries found — run Step 5 first")
-
- # Optional model override from request body
- body = {}
- try:
- body = await request.json()
- except Exception:
- pass
- model = body.get("model") or OLLAMA_REVIEW_MODEL
-
- if stream:
- return StreamingResponse(
- _llm_review_stream_generator(session_id, entries, word_result, model, request),
- media_type="text/event-stream",
- headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
- )
-
- # Non-streaming path
- try:
- result = await llm_review_entries(entries, model=model)
- except Exception as e:
- import traceback
- logger.error(f"LLM review failed for session {session_id}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
- raise HTTPException(status_code=502, detail=f"LLM review failed ({type(e).__name__}): {e}")
-
- # Store result inside word_result as a sub-key
- word_result["llm_review"] = {
- "changes": result["changes"],
- "model_used": result["model_used"],
- "duration_ms": result["duration_ms"],
- "entries_corrected": result["entries_corrected"],
- }
- await update_session_db(session_id, word_result=word_result, current_step=9)
-
- if session_id in _cache:
- _cache[session_id]["word_result"] = word_result
-
- logger.info(f"LLM review session {session_id}: {len(result['changes'])} changes, "
- f"{result['duration_ms']}ms, model={result['model_used']}")
-
- await _append_pipeline_log(session_id, "correction", {
- "engine": "llm",
- "model": result["model_used"],
- "total_entries": len(entries),
- "corrections_proposed": len(result["changes"]),
- }, duration_ms=result["duration_ms"])
-
- return {
- "session_id": session_id,
- "changes": result["changes"],
- "model_used": result["model_used"],
- "duration_ms": result["duration_ms"],
- "total_entries": len(entries),
- "corrections_found": len(result["changes"]),
- }
-
-
-async def _llm_review_stream_generator(
- session_id: str,
- entries: List[Dict],
- word_result: Dict,
- model: str,
- request: Request,
-):
- """SSE generator that yields batch-by-batch LLM review progress."""
- try:
- async for event in llm_review_entries_streaming(entries, model=model):
- if await request.is_disconnected():
- logger.info(f"SSE: client disconnected during LLM review for {session_id}")
- return
-
- yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n"
-
- # On complete: persist to DB
- if event.get("type") == "complete":
- word_result["llm_review"] = {
- "changes": event["changes"],
- "model_used": event["model_used"],
- "duration_ms": event["duration_ms"],
- "entries_corrected": event["entries_corrected"],
- }
- await update_session_db(session_id, word_result=word_result, current_step=9)
- if session_id in _cache:
- _cache[session_id]["word_result"] = word_result
-
- logger.info(f"LLM review SSE session {session_id}: {event['corrections_found']} changes, "
- f"{event['duration_ms']}ms, skipped={event['skipped']}, model={event['model_used']}")
-
- except Exception as e:
- import traceback
- logger.error(f"LLM review SSE failed for {session_id}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
- error_event = {"type": "error", "detail": f"{type(e).__name__}: {e}"}
- yield f"data: {json.dumps(error_event)}\n\n"
-
-
-@router.post("/sessions/{session_id}/llm-review/apply")
-async def apply_llm_corrections(session_id: str, request: Request):
- """Apply selected LLM corrections to vocab entries."""
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- word_result = session.get("word_result")
- if not word_result:
- raise HTTPException(status_code=400, detail="No word result found")
-
- llm_review = word_result.get("llm_review")
- if not llm_review:
- raise HTTPException(status_code=400, detail="No LLM review found — run /llm-review first")
-
- body = await request.json()
- accepted_indices = set(body.get("accepted_indices", [])) # indices into changes[]
-
- changes = llm_review.get("changes", [])
- entries = word_result.get("vocab_entries") or word_result.get("entries") or []
-
- # Build a lookup: (row_index, field) -> new_value for accepted changes
- corrections = {}
- applied_count = 0
- for idx, change in enumerate(changes):
- if idx in accepted_indices:
- key = (change["row_index"], change["field"])
- corrections[key] = change["new"]
- applied_count += 1
-
- # Apply corrections to entries
- for entry in entries:
- row_idx = entry.get("row_index", -1)
- for field_name in ("english", "german", "example"):
- key = (row_idx, field_name)
- if key in corrections:
- entry[field_name] = corrections[key]
- entry["llm_corrected"] = True
-
- # Update word_result
- word_result["vocab_entries"] = entries
- word_result["entries"] = entries
- word_result["llm_review"]["applied_count"] = applied_count
- word_result["llm_review"]["applied_at"] = datetime.utcnow().isoformat()
-
- await update_session_db(session_id, word_result=word_result)
-
- if session_id in _cache:
- _cache[session_id]["word_result"] = word_result
-
- logger.info(f"Applied {applied_count}/{len(changes)} LLM corrections for session {session_id}")
-
- return {
- "session_id": session_id,
- "applied_count": applied_count,
- "total_changes": len(changes),
- }
-
-
-# ---------------------------------------------------------------------------
-# Step 9: Reconstruction + Fabric JSON export
-# ---------------------------------------------------------------------------
-
-@router.post("/sessions/{session_id}/reconstruction")
-async def save_reconstruction(session_id: str, request: Request):
- """Save edited cell texts from reconstruction step."""
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- word_result = session.get("word_result")
- if not word_result:
- raise HTTPException(status_code=400, detail="No word result found")
-
- body = await request.json()
- cell_updates = body.get("cells", [])
-
- if not cell_updates:
- await update_session_db(session_id, current_step=10)
- return {"session_id": session_id, "updated": 0}
-
- # Build update map: cell_id -> new text
- update_map = {c["cell_id"]: c["text"] for c in cell_updates}
-
- # Separate sub-session updates (cell_ids prefixed with "box{N}_")
- sub_updates: Dict[int, Dict[str, str]] = {} # box_index -> {original_cell_id: text}
- main_updates: Dict[str, str] = {}
- for cell_id, text in update_map.items():
- m = re.match(r'^box(\d+)_(.+)$', cell_id)
- if m:
- bi = int(m.group(1))
- original_id = m.group(2)
- sub_updates.setdefault(bi, {})[original_id] = text
- else:
- main_updates[cell_id] = text
-
- # Update main session cells
- cells = word_result.get("cells", [])
- updated_count = 0
- for cell in cells:
- if cell["cell_id"] in main_updates:
- cell["text"] = main_updates[cell["cell_id"]]
- cell["status"] = "edited"
- updated_count += 1
-
- word_result["cells"] = cells
-
- # Also update vocab_entries if present
- entries = word_result.get("vocab_entries") or word_result.get("entries") or []
- if entries:
- # Map cell_id pattern "R{row}_C{col}" to entry fields
- for entry in entries:
- row_idx = entry.get("row_index", -1)
- # Check each field's cell
- for col_idx, field_name in enumerate(["english", "german", "example"]):
- cell_id = f"R{row_idx:02d}_C{col_idx}"
- # Also try without zero-padding
- cell_id_alt = f"R{row_idx}_C{col_idx}"
- new_text = main_updates.get(cell_id) or main_updates.get(cell_id_alt)
- if new_text is not None:
- entry[field_name] = new_text
-
- word_result["vocab_entries"] = entries
- if "entries" in word_result:
- word_result["entries"] = entries
-
- await update_session_db(session_id, word_result=word_result, current_step=10)
-
- if session_id in _cache:
- _cache[session_id]["word_result"] = word_result
-
- # Route sub-session updates
- sub_updated = 0
- if sub_updates:
- subs = await get_sub_sessions(session_id)
- sub_by_index = {s.get("box_index"): s["id"] for s in subs}
- for bi, updates in sub_updates.items():
- sub_id = sub_by_index.get(bi)
- if not sub_id:
- continue
- sub_session = await get_session_db(sub_id)
- if not sub_session:
- continue
- sub_word = sub_session.get("word_result")
- if not sub_word:
- continue
- sub_cells = sub_word.get("cells", [])
- for cell in sub_cells:
- if cell["cell_id"] in updates:
- cell["text"] = updates[cell["cell_id"]]
- cell["status"] = "edited"
- sub_updated += 1
- sub_word["cells"] = sub_cells
- await update_session_db(sub_id, word_result=sub_word)
- if sub_id in _cache:
- _cache[sub_id]["word_result"] = sub_word
-
- total_updated = updated_count + sub_updated
- logger.info(f"Reconstruction saved for session {session_id}: "
- f"{updated_count} main + {sub_updated} sub-session cells updated")
-
- return {
- "session_id": session_id,
- "updated": total_updated,
- "main_updated": updated_count,
- "sub_updated": sub_updated,
- }
-
-
-@router.get("/sessions/{session_id}/reconstruction/fabric-json")
-async def get_fabric_json(session_id: str):
- """Return cell grid as Fabric.js-compatible JSON for the canvas editor.
-
- If the session has sub-sessions (box regions), their cells are merged
- into the result at the correct Y positions.
- """
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- word_result = session.get("word_result")
- if not word_result:
- raise HTTPException(status_code=400, detail="No word result found")
-
- cells = list(word_result.get("cells", []))
- img_w = word_result.get("image_width", 800)
- img_h = word_result.get("image_height", 600)
-
- # Merge sub-session cells at box positions
- subs = await get_sub_sessions(session_id)
- if subs:
- column_result = session.get("column_result") or {}
- zones = column_result.get("zones") or []
- box_zones = [z for z in zones if z.get("zone_type") == "box" and z.get("box")]
-
- for sub in subs:
- sub_session = await get_session_db(sub["id"])
- if not sub_session:
- continue
- sub_word = sub_session.get("word_result")
- if not sub_word or not sub_word.get("cells"):
- continue
-
- bi = sub.get("box_index", 0)
- if bi < len(box_zones):
- box = box_zones[bi]["box"]
- box_y, box_x = box["y"], box["x"]
- else:
- box_y, box_x = 0, 0
-
- # Offset sub-session cells to absolute page coordinates
- for cell in sub_word["cells"]:
- cell_copy = dict(cell)
- # Prefix cell_id with box index
- cell_copy["cell_id"] = f"box{bi}_{cell_copy.get('cell_id', '')}"
- cell_copy["source"] = f"box_{bi}"
- # Offset bbox_px
- bbox = cell_copy.get("bbox_px", {})
- if bbox:
- bbox = dict(bbox)
- bbox["x"] = bbox.get("x", 0) + box_x
- bbox["y"] = bbox.get("y", 0) + box_y
- cell_copy["bbox_px"] = bbox
- cells.append(cell_copy)
-
- from services.layout_reconstruction_service import cells_to_fabric_json
- fabric_json = cells_to_fabric_json(cells, img_w, img_h)
-
- return fabric_json
-
-
-# ---------------------------------------------------------------------------
-# Vocab entries merged + PDF/DOCX export
-# ---------------------------------------------------------------------------
-
-@router.get("/sessions/{session_id}/vocab-entries/merged")
-async def get_merged_vocab_entries(session_id: str):
- """Return vocab entries from main session + all sub-sessions, sorted by Y position."""
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- word_result = session.get("word_result") or {}
- entries = list(word_result.get("vocab_entries") or word_result.get("entries") or [])
-
- # Tag main entries
- for e in entries:
- e.setdefault("source", "main")
-
- # Merge sub-session entries
- subs = await get_sub_sessions(session_id)
- if subs:
- column_result = session.get("column_result") or {}
- zones = column_result.get("zones") or []
- box_zones = [z for z in zones if z.get("zone_type") == "box" and z.get("box")]
-
- for sub in subs:
- sub_session = await get_session_db(sub["id"])
- if not sub_session:
- continue
- sub_word = sub_session.get("word_result") or {}
- sub_entries = sub_word.get("vocab_entries") or sub_word.get("entries") or []
-
- bi = sub.get("box_index", 0)
- box_y = 0
- if bi < len(box_zones):
- box_y = box_zones[bi]["box"]["y"]
-
- for e in sub_entries:
- e_copy = dict(e)
- e_copy["source"] = f"box_{bi}"
- e_copy["source_y"] = box_y # for sorting
- entries.append(e_copy)
-
- # Sort by approximate Y position
- def _sort_key(e):
- if e.get("source", "main") == "main":
- return e.get("row_index", 0) * 100 # main entries by row index
- return e.get("source_y", 0) * 100 + e.get("row_index", 0)
-
- entries.sort(key=_sort_key)
-
- return {
- "session_id": session_id,
- "entries": entries,
- "total": len(entries),
- "sources": list(set(e.get("source", "main") for e in entries)),
- }
-
-
-@router.get("/sessions/{session_id}/reconstruction/export/pdf")
-async def export_reconstruction_pdf(session_id: str):
- """Export the reconstructed cell grid as a PDF table."""
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- word_result = session.get("word_result")
- if not word_result:
- raise HTTPException(status_code=400, detail="No word result found")
-
- cells = word_result.get("cells", [])
- columns_used = word_result.get("columns_used", [])
- grid_shape = word_result.get("grid_shape", {})
- n_rows = grid_shape.get("rows", 0)
- n_cols = grid_shape.get("cols", 0)
-
- # Build table data: rows x columns
- table_data: list[list[str]] = []
- header = [c.get("label", c.get("type", f"Col {i}")) for i, c in enumerate(columns_used)]
- if not header:
- header = [f"Col {i}" for i in range(n_cols)]
- table_data.append(header)
-
- for r in range(n_rows):
- row_texts = []
- for ci in range(n_cols):
- cell_id = f"R{r:02d}_C{ci}"
- cell = next((c for c in cells if c.get("cell_id") == cell_id), None)
- row_texts.append(cell.get("text", "") if cell else "")
- table_data.append(row_texts)
-
- # Generate PDF with reportlab
- try:
- from reportlab.lib.pagesizes import A4
- from reportlab.lib import colors
- from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
- import io as _io
-
- buf = _io.BytesIO()
- doc = SimpleDocTemplate(buf, pagesize=A4)
- if not table_data or not table_data[0]:
- raise HTTPException(status_code=400, detail="No data to export")
-
- t = Table(table_data)
- t.setStyle(TableStyle([
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#0d9488')),
- ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
- ('FONTSIZE', (0, 0), (-1, -1), 9),
- ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
- ('WORDWRAP', (0, 0), (-1, -1), True),
- ]))
- doc.build([t])
- buf.seek(0)
-
- from fastapi.responses import StreamingResponse
- return StreamingResponse(
- buf,
- media_type="application/pdf",
- headers={"Content-Disposition": f'attachment; filename="reconstruction_{session_id}.pdf"'},
- )
- except ImportError:
- raise HTTPException(status_code=501, detail="reportlab not installed")
-
-
-@router.get("/sessions/{session_id}/reconstruction/export/docx")
-async def export_reconstruction_docx(session_id: str):
- """Export the reconstructed cell grid as a DOCX table."""
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- word_result = session.get("word_result")
- if not word_result:
- raise HTTPException(status_code=400, detail="No word result found")
-
- cells = word_result.get("cells", [])
- columns_used = word_result.get("columns_used", [])
- grid_shape = word_result.get("grid_shape", {})
- n_rows = grid_shape.get("rows", 0)
- n_cols = grid_shape.get("cols", 0)
-
- try:
- from docx import Document
- from docx.shared import Pt
- import io as _io
-
- doc = Document()
- doc.add_heading(f'Rekonstruktion – Session {session_id[:8]}', level=1)
-
- # Build header
- header = [c.get("label", c.get("type", f"Col {i}")) for i, c in enumerate(columns_used)]
- if not header:
- header = [f"Col {i}" for i in range(n_cols)]
-
- table = doc.add_table(rows=1 + n_rows, cols=max(n_cols, 1))
- table.style = 'Table Grid'
-
- # Header row
- for ci, h in enumerate(header):
- table.rows[0].cells[ci].text = h
-
- # Data rows
- for r in range(n_rows):
- for ci in range(n_cols):
- cell_id = f"R{r:02d}_C{ci}"
- cell = next((c for c in cells if c.get("cell_id") == cell_id), None)
- table.rows[r + 1].cells[ci].text = cell.get("text", "") if cell else ""
-
- buf = _io.BytesIO()
- doc.save(buf)
- buf.seek(0)
-
- from fastapi.responses import StreamingResponse
- return StreamingResponse(
- buf,
- media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
- headers={"Content-Disposition": f'attachment; filename="reconstruction_{session_id}.docx"'},
- )
- except ImportError:
- raise HTTPException(status_code=501, detail="python-docx not installed")
-
-
-# ---------------------------------------------------------------------------
-# Step 8: Validation — Original vs. Reconstruction
-# ---------------------------------------------------------------------------
-
-@router.post("/sessions/{session_id}/reconstruction/detect-images")
-async def detect_image_regions(session_id: str):
- """Detect illustration/image regions in the original scan using VLM.
-
- Sends the original image to qwen2.5vl to find non-text, non-table
- image areas, returning bounding boxes (in %) and descriptions.
- """
- import base64
- import httpx
- import re
-
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- # Get original image bytes
- original_png = await get_session_image(session_id, "original")
- if not original_png:
- raise HTTPException(status_code=400, detail="No original image found")
-
- # Build context from vocab entries for richer descriptions
- word_result = session.get("word_result") or {}
- entries = word_result.get("vocab_entries") or word_result.get("entries") or []
- vocab_context = ""
- if entries:
- sample = entries[:10]
- words = [f"{e.get('english', '')} / {e.get('german', '')}" for e in sample if e.get('english')]
- if words:
- vocab_context = f"\nContext: This is a vocabulary page with words like: {', '.join(words)}"
-
- ollama_base = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
- model = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b")
-
- prompt = (
- "Analyze this scanned page. Find ALL illustration/image/picture regions "
- "(NOT text, NOT table cells, NOT blank areas). "
- "For each image region found, return its bounding box as percentage of page dimensions "
- "and a short English description of what the image shows. "
- "Reply with ONLY a JSON array like: "
- '[{"x": 10, "y": 20, "w": 30, "h": 25, "description": "drawing of a cat"}] '
- "where x, y, w, h are percentages (0-100) of the page width/height. "
- "If there are NO images on the page, return an empty array: []"
- f"{vocab_context}"
- )
-
- img_b64 = base64.b64encode(original_png).decode("utf-8")
- payload = {
- "model": model,
- "prompt": prompt,
- "images": [img_b64],
- "stream": False,
- }
-
- try:
- async with httpx.AsyncClient(timeout=120.0) as client:
- resp = await client.post(f"{ollama_base}/api/generate", json=payload)
- resp.raise_for_status()
- text = resp.json().get("response", "")
-
- # Parse JSON array from response
- match = re.search(r'\[.*?\]', text, re.DOTALL)
- if match:
- raw_regions = json.loads(match.group(0))
- else:
- raw_regions = []
-
- # Normalize to ImageRegion format
- regions = []
- for r in raw_regions:
- regions.append({
- "bbox_pct": {
- "x": max(0, min(100, float(r.get("x", 0)))),
- "y": max(0, min(100, float(r.get("y", 0)))),
- "w": max(1, min(100, float(r.get("w", 10)))),
- "h": max(1, min(100, float(r.get("h", 10)))),
- },
- "description": r.get("description", ""),
- "prompt": r.get("description", ""),
- "image_b64": None,
- "style": "educational",
- })
-
- # Enrich prompts with nearby vocab context
- if entries:
- for region in regions:
- ry = region["bbox_pct"]["y"]
- rh = region["bbox_pct"]["h"]
- nearby = [
- e for e in entries
- if e.get("bbox") and abs(e["bbox"].get("y", 0) - ry) < rh + 10
- ]
- if nearby:
- en_words = [e.get("english", "") for e in nearby if e.get("english")]
- de_words = [e.get("german", "") for e in nearby if e.get("german")]
- if en_words or de_words:
- context = f" (vocabulary context: {', '.join(en_words[:5])}"
- if de_words:
- context += f" / {', '.join(de_words[:5])}"
- context += ")"
- region["prompt"] = region["description"] + context
-
- # Save to ground_truth JSONB
- ground_truth = session.get("ground_truth") or {}
- validation = ground_truth.get("validation") or {}
- validation["image_regions"] = regions
- validation["detected_at"] = datetime.utcnow().isoformat()
- ground_truth["validation"] = validation
- await update_session_db(session_id, ground_truth=ground_truth)
-
- if session_id in _cache:
- _cache[session_id]["ground_truth"] = ground_truth
-
- logger.info(f"Detected {len(regions)} image regions for session {session_id}")
-
- return {"regions": regions, "count": len(regions)}
-
- except httpx.ConnectError:
- logger.warning(f"VLM not available at {ollama_base} for image detection")
- return {"regions": [], "count": 0, "error": "VLM not available"}
- except Exception as e:
- logger.error(f"Image detection failed for {session_id}: {e}")
- return {"regions": [], "count": 0, "error": str(e)}
-
-
-@router.post("/sessions/{session_id}/reconstruction/generate-image")
-async def generate_image_for_region(session_id: str, req: GenerateImageRequest):
- """Generate a replacement image for a detected region using mflux.
-
- Sends the prompt (with style suffix) to the mflux-service running
- natively on the Mac Mini (Metal GPU required).
- """
- import httpx
-
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- ground_truth = session.get("ground_truth") or {}
- validation = ground_truth.get("validation") or {}
- regions = validation.get("image_regions") or []
-
- if req.region_index < 0 or req.region_index >= len(regions):
- raise HTTPException(status_code=400, detail=f"Invalid region_index {req.region_index}, have {len(regions)} regions")
-
- mflux_url = os.getenv("MFLUX_URL", "http://host.docker.internal:8095")
- style_suffix = STYLE_SUFFIXES.get(req.style, STYLE_SUFFIXES["educational"])
- full_prompt = f"{req.prompt}, {style_suffix}"
-
- # Determine image size from region aspect ratio (snap to multiples of 64)
- region = regions[req.region_index]
- bbox = region["bbox_pct"]
- aspect = bbox["w"] / max(bbox["h"], 1)
- if aspect > 1.3:
- width, height = 768, 512
- elif aspect < 0.7:
- width, height = 512, 768
- else:
- width, height = 512, 512
-
- try:
- async with httpx.AsyncClient(timeout=300.0) as client:
- resp = await client.post(f"{mflux_url}/generate", json={
- "prompt": full_prompt,
- "width": width,
- "height": height,
- "steps": 4,
- })
- resp.raise_for_status()
- data = resp.json()
- image_b64 = data.get("image_b64")
-
- if not image_b64:
- return {"image_b64": None, "success": False, "error": "No image returned"}
-
- # Save to ground_truth
- regions[req.region_index]["image_b64"] = image_b64
- regions[req.region_index]["prompt"] = req.prompt
- regions[req.region_index]["style"] = req.style
- validation["image_regions"] = regions
- ground_truth["validation"] = validation
- await update_session_db(session_id, ground_truth=ground_truth)
-
- if session_id in _cache:
- _cache[session_id]["ground_truth"] = ground_truth
-
- logger.info(f"Generated image for session {session_id} region {req.region_index}")
- return {"image_b64": image_b64, "success": True}
-
- except httpx.ConnectError:
- logger.warning(f"mflux-service not available at {mflux_url}")
- return {"image_b64": None, "success": False, "error": f"mflux-service not available at {mflux_url}"}
- except Exception as e:
- logger.error(f"Image generation failed for {session_id}: {e}")
- return {"image_b64": None, "success": False, "error": str(e)}
-
-
-@router.post("/sessions/{session_id}/reconstruction/validate")
-async def save_validation(session_id: str, req: ValidationRequest):
- """Save final validation results for step 8.
-
- Stores notes, score, and preserves any detected/generated image regions.
- Sets current_step = 10 to mark pipeline as complete.
- """
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- ground_truth = session.get("ground_truth") or {}
- validation = ground_truth.get("validation") or {}
- validation["validated_at"] = datetime.utcnow().isoformat()
- validation["notes"] = req.notes
- validation["score"] = req.score
- ground_truth["validation"] = validation
-
- await update_session_db(session_id, ground_truth=ground_truth, current_step=11)
-
- if session_id in _cache:
- _cache[session_id]["ground_truth"] = ground_truth
-
- logger.info(f"Validation saved for session {session_id}: score={req.score}")
-
- return {"session_id": session_id, "validation": validation}
-
-
-@router.get("/sessions/{session_id}/reconstruction/validation")
-async def get_validation(session_id: str):
- """Retrieve saved validation data for step 8."""
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- ground_truth = session.get("ground_truth") or {}
- validation = ground_truth.get("validation")
-
- return {
- "session_id": session_id,
- "validation": validation,
- "word_result": session.get("word_result"),
- }
-
-
-# ---------------------------------------------------------------------------
-# Remove handwriting
-# ---------------------------------------------------------------------------
-
-@router.post("/sessions/{session_id}/remove-handwriting")
-async def remove_handwriting_endpoint(session_id: str, req: RemoveHandwritingRequest):
- """
- Remove handwriting from a session image using inpainting.
-
- Steps:
- 1. Load source image (auto -> deskewed if available, else original)
- 2. Detect handwriting mask (filtered by target_ink)
- 3. Dilate mask to cover stroke edges
- 4. Inpaint the image
- 5. Store result as clean_png in the session
-
- Returns metadata including the URL to fetch the clean image.
- """
- import time as _time
- t0 = _time.monotonic()
-
- from services.handwriting_detection import detect_handwriting
- from services.inpainting_service import inpaint_image, dilate_mask as _dilate_mask, InpaintingMethod, image_to_png
-
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- # 1. Determine source image
- source = req.use_source
- if source == "auto":
- deskewed = await get_session_image(session_id, "deskewed")
- source = "deskewed" if deskewed else "original"
-
- image_bytes = await get_session_image(session_id, source)
- if not image_bytes:
- raise HTTPException(status_code=404, detail=f"Source image '{source}' not available")
-
- # 2. Detect handwriting mask
- detection = detect_handwriting(image_bytes, target_ink=req.target_ink)
-
- # 3. Convert mask to PNG bytes and dilate
- import io
- from PIL import Image as _PILImage
- mask_img = _PILImage.fromarray(detection.mask)
- mask_buf = io.BytesIO()
- mask_img.save(mask_buf, format="PNG")
- mask_bytes = mask_buf.getvalue()
-
- if req.dilation > 0:
- mask_bytes = _dilate_mask(mask_bytes, iterations=req.dilation)
-
- # 4. Inpaint
- method_map = {
- "telea": InpaintingMethod.OPENCV_TELEA,
- "ns": InpaintingMethod.OPENCV_NS,
- "auto": InpaintingMethod.AUTO,
- }
- inpaint_method = method_map.get(req.method, InpaintingMethod.AUTO)
-
- result = inpaint_image(image_bytes, mask_bytes, method=inpaint_method)
- if not result.success:
- raise HTTPException(status_code=500, detail="Inpainting failed")
-
- elapsed_ms = int((_time.monotonic() - t0) * 1000)
-
- meta = {
- "method_used": result.method_used.value if hasattr(result.method_used, "value") else str(result.method_used),
- "handwriting_ratio": round(detection.handwriting_ratio, 4),
- "detection_confidence": round(detection.confidence, 4),
- "target_ink": req.target_ink,
- "dilation": req.dilation,
- "source_image": source,
- "processing_time_ms": elapsed_ms,
- }
-
- # 5. Persist clean image (convert BGR ndarray -> PNG bytes)
- clean_png_bytes = image_to_png(result.image)
- await update_session_db(session_id, clean_png=clean_png_bytes, handwriting_removal_meta=meta)
-
- return {
- **meta,
- "image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/clean",
- "session_id": session_id,
- }
+from fastapi import APIRouter
+
+from ocr_pipeline_llm_review import router as _llm_review_router
+from ocr_pipeline_reconstruction import router as _reconstruction_router
+from ocr_pipeline_validation import router as _validation_router
+
+# Composite router — drop-in replacement for the old monolithic router.
+# ocr_pipeline_api.py imports ``from ocr_pipeline_postprocess import router``.
+router = APIRouter()
+router.include_router(_llm_review_router)
+router.include_router(_reconstruction_router)
+router.include_router(_validation_router)
diff --git a/klausur-service/backend/ocr_pipeline_reconstruction.py b/klausur-service/backend/ocr_pipeline_reconstruction.py
new file mode 100644
index 0000000..99081c4
--- /dev/null
+++ b/klausur-service/backend/ocr_pipeline_reconstruction.py
@@ -0,0 +1,362 @@
+"""
+OCR Pipeline Reconstruction — save edits, Fabric JSON export, merged entries, PDF/DOCX export.
+
+Extracted from ocr_pipeline_postprocess.py.
+
+Lizenz: Apache 2.0
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+import re
+from typing import Dict
+
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import StreamingResponse
+
+from ocr_pipeline_session_store import (
+ get_session_db,
+ get_sub_sessions,
+ update_session_db,
+)
+from ocr_pipeline_common import _cache
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+
+
+# ---------------------------------------------------------------------------
+# Step 9: Reconstruction + Fabric JSON export
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/reconstruction")
+async def save_reconstruction(session_id: str, request: Request):
+ """Save edited cell texts from reconstruction step."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ word_result = session.get("word_result")
+ if not word_result:
+ raise HTTPException(status_code=400, detail="No word result found")
+
+ body = await request.json()
+ cell_updates = body.get("cells", [])
+
+ if not cell_updates:
+ await update_session_db(session_id, current_step=10)
+ return {"session_id": session_id, "updated": 0}
+
+ # Build update map: cell_id -> new text
+ update_map = {c["cell_id"]: c["text"] for c in cell_updates}
+
+ # Separate sub-session updates (cell_ids prefixed with "box{N}_")
+ sub_updates: Dict[int, Dict[str, str]] = {} # box_index -> {original_cell_id: text}
+ main_updates: Dict[str, str] = {}
+ for cell_id, text in update_map.items():
+ m = re.match(r'^box(\d+)_(.+)$', cell_id)
+ if m:
+ bi = int(m.group(1))
+ original_id = m.group(2)
+ sub_updates.setdefault(bi, {})[original_id] = text
+ else:
+ main_updates[cell_id] = text
+
+ # Update main session cells
+ cells = word_result.get("cells", [])
+ updated_count = 0
+ for cell in cells:
+ if cell["cell_id"] in main_updates:
+ cell["text"] = main_updates[cell["cell_id"]]
+ cell["status"] = "edited"
+ updated_count += 1
+
+ word_result["cells"] = cells
+
+ # Also update vocab_entries if present
+ entries = word_result.get("vocab_entries") or word_result.get("entries") or []
+ if entries:
+ for entry in entries:
+ row_idx = entry.get("row_index", -1)
+ for col_idx, field_name in enumerate(["english", "german", "example"]):
+ cell_id = f"R{row_idx:02d}_C{col_idx}"
+ cell_id_alt = f"R{row_idx}_C{col_idx}"
+ new_text = main_updates.get(cell_id) or main_updates.get(cell_id_alt)
+ if new_text is not None:
+ entry[field_name] = new_text
+
+ word_result["vocab_entries"] = entries
+ if "entries" in word_result:
+ word_result["entries"] = entries
+
+ await update_session_db(session_id, word_result=word_result, current_step=10)
+
+ if session_id in _cache:
+ _cache[session_id]["word_result"] = word_result
+
+ # Route sub-session updates
+ sub_updated = 0
+ if sub_updates:
+ subs = await get_sub_sessions(session_id)
+ sub_by_index = {s.get("box_index"): s["id"] for s in subs}
+ for bi, updates in sub_updates.items():
+ sub_id = sub_by_index.get(bi)
+ if not sub_id:
+ continue
+ sub_session = await get_session_db(sub_id)
+ if not sub_session:
+ continue
+ sub_word = sub_session.get("word_result")
+ if not sub_word:
+ continue
+ sub_cells = sub_word.get("cells", [])
+ for cell in sub_cells:
+ if cell["cell_id"] in updates:
+ cell["text"] = updates[cell["cell_id"]]
+ cell["status"] = "edited"
+ sub_updated += 1
+ sub_word["cells"] = sub_cells
+ await update_session_db(sub_id, word_result=sub_word)
+ if sub_id in _cache:
+ _cache[sub_id]["word_result"] = sub_word
+
+ total_updated = updated_count + sub_updated
+ logger.info(f"Reconstruction saved for session {session_id}: "
+ f"{updated_count} main + {sub_updated} sub-session cells updated")
+
+ return {
+ "session_id": session_id,
+ "updated": total_updated,
+ "main_updated": updated_count,
+ "sub_updated": sub_updated,
+ }
+
+
+@router.get("/sessions/{session_id}/reconstruction/fabric-json")
+async def get_fabric_json(session_id: str):
+ """Return cell grid as Fabric.js-compatible JSON for the canvas editor."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ word_result = session.get("word_result")
+ if not word_result:
+ raise HTTPException(status_code=400, detail="No word result found")
+
+ cells = list(word_result.get("cells", []))
+ img_w = word_result.get("image_width", 800)
+ img_h = word_result.get("image_height", 600)
+
+ # Merge sub-session cells at box positions
+ subs = await get_sub_sessions(session_id)
+ if subs:
+ column_result = session.get("column_result") or {}
+ zones = column_result.get("zones") or []
+ box_zones = [z for z in zones if z.get("zone_type") == "box" and z.get("box")]
+
+ for sub in subs:
+ sub_session = await get_session_db(sub["id"])
+ if not sub_session:
+ continue
+ sub_word = sub_session.get("word_result")
+ if not sub_word or not sub_word.get("cells"):
+ continue
+
+ bi = sub.get("box_index", 0)
+ if bi < len(box_zones):
+ box = box_zones[bi]["box"]
+ box_y, box_x = box["y"], box["x"]
+ else:
+ box_y, box_x = 0, 0
+
+ for cell in sub_word["cells"]:
+ cell_copy = dict(cell)
+ cell_copy["cell_id"] = f"box{bi}_{cell_copy.get('cell_id', '')}"
+ cell_copy["source"] = f"box_{bi}"
+ bbox = cell_copy.get("bbox_px", {})
+ if bbox:
+ bbox = dict(bbox)
+ bbox["x"] = bbox.get("x", 0) + box_x
+ bbox["y"] = bbox.get("y", 0) + box_y
+ cell_copy["bbox_px"] = bbox
+ cells.append(cell_copy)
+
+ from services.layout_reconstruction_service import cells_to_fabric_json
+ fabric_json = cells_to_fabric_json(cells, img_w, img_h)
+
+ return fabric_json
+
+
+# ---------------------------------------------------------------------------
+# Vocab entries merged + PDF/DOCX export
+# ---------------------------------------------------------------------------
+
+@router.get("/sessions/{session_id}/vocab-entries/merged")
+async def get_merged_vocab_entries(session_id: str):
+ """Return vocab entries from main session + all sub-sessions, sorted by Y position."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ word_result = session.get("word_result") or {}
+ entries = list(word_result.get("vocab_entries") or word_result.get("entries") or [])
+
+ for e in entries:
+ e.setdefault("source", "main")
+
+ subs = await get_sub_sessions(session_id)
+ if subs:
+ column_result = session.get("column_result") or {}
+ zones = column_result.get("zones") or []
+ box_zones = [z for z in zones if z.get("zone_type") == "box" and z.get("box")]
+
+ for sub in subs:
+ sub_session = await get_session_db(sub["id"])
+ if not sub_session:
+ continue
+ sub_word = sub_session.get("word_result") or {}
+ sub_entries = sub_word.get("vocab_entries") or sub_word.get("entries") or []
+
+ bi = sub.get("box_index", 0)
+ box_y = 0
+ if bi < len(box_zones):
+ box_y = box_zones[bi]["box"]["y"]
+
+ for e in sub_entries:
+ e_copy = dict(e)
+ e_copy["source"] = f"box_{bi}"
+ e_copy["source_y"] = box_y
+ entries.append(e_copy)
+
+ def _sort_key(e):
+ if e.get("source", "main") == "main":
+ return e.get("row_index", 0) * 100
+ return e.get("source_y", 0) * 100 + e.get("row_index", 0)
+
+ entries.sort(key=_sort_key)
+
+ return {
+ "session_id": session_id,
+ "entries": entries,
+ "total": len(entries),
+ "sources": list(set(e.get("source", "main") for e in entries)),
+ }
+
+
+@router.get("/sessions/{session_id}/reconstruction/export/pdf")
+async def export_reconstruction_pdf(session_id: str):
+ """Export the reconstructed cell grid as a PDF table."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ word_result = session.get("word_result")
+ if not word_result:
+ raise HTTPException(status_code=400, detail="No word result found")
+
+ cells = word_result.get("cells", [])
+ columns_used = word_result.get("columns_used", [])
+ grid_shape = word_result.get("grid_shape", {})
+ n_rows = grid_shape.get("rows", 0)
+ n_cols = grid_shape.get("cols", 0)
+
+ # Build table data: rows x columns
+ table_data: list[list[str]] = []
+ header = [c.get("label", c.get("type", f"Col {i}")) for i, c in enumerate(columns_used)]
+ if not header:
+ header = [f"Col {i}" for i in range(n_cols)]
+ table_data.append(header)
+
+ for r in range(n_rows):
+ row_texts = []
+ for ci in range(n_cols):
+ cell_id = f"R{r:02d}_C{ci}"
+ cell = next((c for c in cells if c.get("cell_id") == cell_id), None)
+ row_texts.append(cell.get("text", "") if cell else "")
+ table_data.append(row_texts)
+
+ try:
+ from reportlab.lib.pagesizes import A4
+ from reportlab.lib import colors
+ from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
+ import io as _io
+
+ buf = _io.BytesIO()
+ doc = SimpleDocTemplate(buf, pagesize=A4)
+ if not table_data or not table_data[0]:
+ raise HTTPException(status_code=400, detail="No data to export")
+
+ t = Table(table_data)
+ t.setStyle(TableStyle([
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#0d9488')),
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+ ('FONTSIZE', (0, 0), (-1, -1), 9),
+ ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+ ('WORDWRAP', (0, 0), (-1, -1), True),
+ ]))
+ doc.build([t])
+ buf.seek(0)
+
+ return StreamingResponse(
+ buf,
+ media_type="application/pdf",
+ headers={"Content-Disposition": f'attachment; filename="reconstruction_{session_id}.pdf"'},
+ )
+ except ImportError:
+ raise HTTPException(status_code=501, detail="reportlab not installed")
+
+
+@router.get("/sessions/{session_id}/reconstruction/export/docx")
+async def export_reconstruction_docx(session_id: str):
+ """Export the reconstructed cell grid as a DOCX table."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ word_result = session.get("word_result")
+ if not word_result:
+ raise HTTPException(status_code=400, detail="No word result found")
+
+ cells = word_result.get("cells", [])
+ columns_used = word_result.get("columns_used", [])
+ grid_shape = word_result.get("grid_shape", {})
+ n_rows = grid_shape.get("rows", 0)
+ n_cols = grid_shape.get("cols", 0)
+
+ try:
+ from docx import Document
+ from docx.shared import Pt
+ import io as _io
+
+ doc = Document()
+ doc.add_heading(f'Rekonstruktion -- Session {session_id[:8]}', level=1)
+
+ header = [c.get("label", c.get("type", f"Col {i}")) for i, c in enumerate(columns_used)]
+ if not header:
+ header = [f"Col {i}" for i in range(n_cols)]
+
+ table = doc.add_table(rows=1 + n_rows, cols=max(n_cols, 1))
+ table.style = 'Table Grid'
+
+ for ci, h in enumerate(header):
+ table.rows[0].cells[ci].text = h
+
+ for r in range(n_rows):
+ for ci in range(n_cols):
+ cell_id = f"R{r:02d}_C{ci}"
+ cell = next((c for c in cells if c.get("cell_id") == cell_id), None)
+ table.rows[r + 1].cells[ci].text = cell.get("text", "") if cell else ""
+
+ buf = _io.BytesIO()
+ doc.save(buf)
+ buf.seek(0)
+
+ return StreamingResponse(
+ buf,
+ media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ headers={"Content-Disposition": f'attachment; filename="reconstruction_{session_id}.docx"'},
+ )
+ except ImportError:
+ raise HTTPException(status_code=501, detail="python-docx not installed")
diff --git a/klausur-service/backend/ocr_pipeline_validation.py b/klausur-service/backend/ocr_pipeline_validation.py
new file mode 100644
index 0000000..3382a3f
--- /dev/null
+++ b/klausur-service/backend/ocr_pipeline_validation.py
@@ -0,0 +1,362 @@
+"""
+OCR Pipeline Validation — image detection, generation, validation save,
+and handwriting removal endpoints.
+
+Extracted from ocr_pipeline_postprocess.py.
+
+Lizenz: Apache 2.0
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import json
+import logging
+import os
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+from ocr_pipeline_session_store import (
+ get_session_db,
+ get_session_image,
+ update_session_db,
+)
+from ocr_pipeline_common import (
+ _cache,
+ RemoveHandwritingRequest,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+
+# ---------------------------------------------------------------------------
+# Pydantic Models
+# ---------------------------------------------------------------------------
+
+STYLE_SUFFIXES = {
+ "educational": "educational illustration, textbook style, clear, colorful",
+ "cartoon": "cartoon, child-friendly, simple shapes",
+ "sketch": "pencil sketch, hand-drawn, black and white",
+ "clipart": "clipart, flat vector style, simple",
+ "realistic": "photorealistic, high detail",
+}
+
+
+class ValidationRequest(BaseModel):
+ notes: Optional[str] = None
+ score: Optional[int] = None
+
+
+class GenerateImageRequest(BaseModel):
+ region_index: int
+ prompt: str
+ style: str = "educational"
+
+
+# ---------------------------------------------------------------------------
+# Image detection + generation
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/reconstruction/detect-images")
+async def detect_image_regions(session_id: str):
+ """Detect illustration/image regions in the original scan using VLM."""
+ import base64
+ import httpx
+ import re
+
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ original_png = await get_session_image(session_id, "original")
+ if not original_png:
+ raise HTTPException(status_code=400, detail="No original image found")
+
+ word_result = session.get("word_result") or {}
+ entries = word_result.get("vocab_entries") or word_result.get("entries") or []
+ vocab_context = ""
+ if entries:
+ sample = entries[:10]
+ words = [f"{e.get('english', '')} / {e.get('german', '')}" for e in sample if e.get('english')]
+ if words:
+ vocab_context = f"\nContext: This is a vocabulary page with words like: {', '.join(words)}"
+
+ ollama_base = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
+ model = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b")
+
+ prompt = (
+ "Analyze this scanned page. Find ALL illustration/image/picture regions "
+ "(NOT text, NOT table cells, NOT blank areas). "
+ "For each image region found, return its bounding box as percentage of page dimensions "
+ "and a short English description of what the image shows. "
+ "Reply with ONLY a JSON array like: "
+ '[{"x": 10, "y": 20, "w": 30, "h": 25, "description": "drawing of a cat"}] '
+ "where x, y, w, h are percentages (0-100) of the page width/height. "
+ "If there are NO images on the page, return an empty array: []"
+ f"{vocab_context}"
+ )
+
+ img_b64 = base64.b64encode(original_png).decode("utf-8")
+ payload = {
+ "model": model,
+ "prompt": prompt,
+ "images": [img_b64],
+ "stream": False,
+ }
+
+ try:
+ async with httpx.AsyncClient(timeout=120.0) as client:
+ resp = await client.post(f"{ollama_base}/api/generate", json=payload)
+ resp.raise_for_status()
+ text = resp.json().get("response", "")
+
+ match = re.search(r'\[.*?\]', text, re.DOTALL)
+ if match:
+ raw_regions = json.loads(match.group(0))
+ else:
+ raw_regions = []
+
+ regions = []
+ for r in raw_regions:
+ regions.append({
+ "bbox_pct": {
+ "x": max(0, min(100, float(r.get("x", 0)))),
+ "y": max(0, min(100, float(r.get("y", 0)))),
+ "w": max(1, min(100, float(r.get("w", 10)))),
+ "h": max(1, min(100, float(r.get("h", 10)))),
+ },
+ "description": r.get("description", ""),
+ "prompt": r.get("description", ""),
+ "image_b64": None,
+ "style": "educational",
+ })
+
+ # Enrich prompts with nearby vocab context
+ if entries:
+ for region in regions:
+ ry = region["bbox_pct"]["y"]
+ rh = region["bbox_pct"]["h"]
+ nearby = [
+ e for e in entries
+ if e.get("bbox") and abs(e["bbox"].get("y", 0) - ry) < rh + 10
+ ]
+ if nearby:
+ en_words = [e.get("english", "") for e in nearby if e.get("english")]
+ de_words = [e.get("german", "") for e in nearby if e.get("german")]
+ if en_words or de_words:
+ context = f" (vocabulary context: {', '.join(en_words[:5])}"
+ if de_words:
+ context += f" / {', '.join(de_words[:5])}"
+ context += ")"
+ region["prompt"] = region["description"] + context
+
+ ground_truth = session.get("ground_truth") or {}
+ validation = ground_truth.get("validation") or {}
+ validation["image_regions"] = regions
+ validation["detected_at"] = datetime.utcnow().isoformat()
+ ground_truth["validation"] = validation
+ await update_session_db(session_id, ground_truth=ground_truth)
+
+ if session_id in _cache:
+ _cache[session_id]["ground_truth"] = ground_truth
+
+ logger.info(f"Detected {len(regions)} image regions for session {session_id}")
+
+ return {"regions": regions, "count": len(regions)}
+
+ except httpx.ConnectError:
+ logger.warning(f"VLM not available at {ollama_base} for image detection")
+ return {"regions": [], "count": 0, "error": "VLM not available"}
+ except Exception as e:
+ logger.error(f"Image detection failed for {session_id}: {e}")
+ return {"regions": [], "count": 0, "error": str(e)}
+
+
+@router.post("/sessions/{session_id}/reconstruction/generate-image")
+async def generate_image_for_region(session_id: str, req: GenerateImageRequest):
+ """Generate a replacement image for a detected region using mflux."""
+ import httpx
+
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ ground_truth = session.get("ground_truth") or {}
+ validation = ground_truth.get("validation") or {}
+ regions = validation.get("image_regions") or []
+
+ if req.region_index < 0 or req.region_index >= len(regions):
+ raise HTTPException(status_code=400, detail=f"Invalid region_index {req.region_index}, have {len(regions)} regions")
+
+ mflux_url = os.getenv("MFLUX_URL", "http://host.docker.internal:8095")
+ style_suffix = STYLE_SUFFIXES.get(req.style, STYLE_SUFFIXES["educational"])
+ full_prompt = f"{req.prompt}, {style_suffix}"
+
+ region = regions[req.region_index]
+ bbox = region["bbox_pct"]
+ aspect = bbox["w"] / max(bbox["h"], 1)
+ if aspect > 1.3:
+ width, height = 768, 512
+ elif aspect < 0.7:
+ width, height = 512, 768
+ else:
+ width, height = 512, 512
+
+ try:
+ async with httpx.AsyncClient(timeout=300.0) as client:
+ resp = await client.post(f"{mflux_url}/generate", json={
+ "prompt": full_prompt,
+ "width": width,
+ "height": height,
+ "steps": 4,
+ })
+ resp.raise_for_status()
+ data = resp.json()
+ image_b64 = data.get("image_b64")
+
+ if not image_b64:
+ return {"image_b64": None, "success": False, "error": "No image returned"}
+
+ regions[req.region_index]["image_b64"] = image_b64
+ regions[req.region_index]["prompt"] = req.prompt
+ regions[req.region_index]["style"] = req.style
+ validation["image_regions"] = regions
+ ground_truth["validation"] = validation
+ await update_session_db(session_id, ground_truth=ground_truth)
+
+ if session_id in _cache:
+ _cache[session_id]["ground_truth"] = ground_truth
+
+ logger.info(f"Generated image for session {session_id} region {req.region_index}")
+ return {"image_b64": image_b64, "success": True}
+
+ except httpx.ConnectError:
+ logger.warning(f"mflux-service not available at {mflux_url}")
+ return {"image_b64": None, "success": False, "error": f"mflux-service not available at {mflux_url}"}
+ except Exception as e:
+ logger.error(f"Image generation failed for {session_id}: {e}")
+ return {"image_b64": None, "success": False, "error": str(e)}
+
+
+# ---------------------------------------------------------------------------
+# Validation save/get
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/reconstruction/validate")
+async def save_validation(session_id: str, req: ValidationRequest):
+ """Save final validation results for step 8."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ ground_truth = session.get("ground_truth") or {}
+ validation = ground_truth.get("validation") or {}
+ validation["validated_at"] = datetime.utcnow().isoformat()
+ validation["notes"] = req.notes
+ validation["score"] = req.score
+ ground_truth["validation"] = validation
+
+ await update_session_db(session_id, ground_truth=ground_truth, current_step=11)
+
+ if session_id in _cache:
+ _cache[session_id]["ground_truth"] = ground_truth
+
+ logger.info(f"Validation saved for session {session_id}: score={req.score}")
+
+ return {"session_id": session_id, "validation": validation}
+
+
+@router.get("/sessions/{session_id}/reconstruction/validation")
+async def get_validation(session_id: str):
+ """Retrieve saved validation data for step 8."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ ground_truth = session.get("ground_truth") or {}
+ validation = ground_truth.get("validation")
+
+ return {
+ "session_id": session_id,
+ "validation": validation,
+ "word_result": session.get("word_result"),
+ }
+
+
+# ---------------------------------------------------------------------------
+# Remove handwriting
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/remove-handwriting")
+async def remove_handwriting_endpoint(session_id: str, req: RemoveHandwritingRequest):
+ """Remove handwriting from a session image using inpainting."""
+ import time as _time
+
+ from services.handwriting_detection import detect_handwriting
+ from services.inpainting_service import inpaint_image, dilate_mask as _dilate_mask, InpaintingMethod, image_to_png
+
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ t0 = _time.monotonic()
+
+ # 1. Determine source image
+ source = req.use_source
+ if source == "auto":
+ deskewed = await get_session_image(session_id, "deskewed")
+ source = "deskewed" if deskewed else "original"
+
+ image_bytes = await get_session_image(session_id, source)
+ if not image_bytes:
+ raise HTTPException(status_code=404, detail=f"Source image '{source}' not available")
+
+ # 2. Detect handwriting mask
+ detection = detect_handwriting(image_bytes, target_ink=req.target_ink)
+
+ # 3. Convert mask to PNG bytes and dilate
+ import io
+ from PIL import Image as _PILImage
+ mask_img = _PILImage.fromarray(detection.mask)
+ mask_buf = io.BytesIO()
+ mask_img.save(mask_buf, format="PNG")
+ mask_bytes = mask_buf.getvalue()
+
+ if req.dilation > 0:
+ mask_bytes = _dilate_mask(mask_bytes, iterations=req.dilation)
+
+ # 4. Inpaint
+ method_map = {
+ "telea": InpaintingMethod.OPENCV_TELEA,
+ "ns": InpaintingMethod.OPENCV_NS,
+ "auto": InpaintingMethod.AUTO,
+ }
+ inpaint_method = method_map.get(req.method, InpaintingMethod.AUTO)
+
+ result = inpaint_image(image_bytes, mask_bytes, method=inpaint_method)
+ if not result.success:
+ raise HTTPException(status_code=500, detail="Inpainting failed")
+
+ elapsed_ms = int((_time.monotonic() - t0) * 1000)
+
+ meta = {
+ "method_used": result.method_used.value if hasattr(result.method_used, "value") else str(result.method_used),
+ "handwriting_ratio": round(detection.handwriting_ratio, 4),
+ "detection_confidence": round(detection.confidence, 4),
+ "target_ink": req.target_ink,
+ "dilation": req.dilation,
+ "source_image": source,
+ "processing_time_ms": elapsed_ms,
+ }
+
+ # 5. Persist clean image
+ clean_png_bytes = image_to_png(result.image)
+ await update_session_db(session_id, clean_png=clean_png_bytes, handwriting_removal_meta=meta)
+
+ return {
+ **meta,
+ "image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/clean",
+ "session_id": session_id,
+ }
diff --git a/klausur-service/backend/ocr_pipeline_words.py b/klausur-service/backend/ocr_pipeline_words.py
index c3eb7d6..a1d0f87 100644
--- a/klausur-service/backend/ocr_pipeline_words.py
+++ b/klausur-service/backend/ocr_pipeline_words.py
@@ -1,18 +1,18 @@
"""
-OCR Pipeline Words - Word detection and ground truth endpoints.
+OCR Pipeline Words — composite router for word detection, PaddleOCR direct,
+and ground truth endpoints.
-Extracted from ocr_pipeline_api.py.
-Handles:
-- POST /sessions/{session_id}/words — main SSE streaming word detection
-- POST /sessions/{session_id}/paddle-direct — PaddleOCR direct endpoint
-- POST /sessions/{session_id}/ground-truth/words — save ground truth
-- GET /sessions/{session_id}/ground-truth/words — get ground truth
+Split into sub-modules:
+ ocr_pipeline_words_detect — main detect_words endpoint (Step 7)
+ ocr_pipeline_words_stream — SSE streaming generators
+
+This barrel module contains the PaddleOCR direct endpoint and ground truth
+endpoints, and assembles all word-related routers.
Lizenz: Apache 2.0
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
-import json
import logging
import time
from datetime import datetime
@@ -20,22 +20,9 @@ from typing import Any, Dict, List, Optional
import cv2
import numpy as np
-from fastapi import APIRouter, HTTPException, Request
-from fastapi.responses import StreamingResponse
+from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
-from cv_vocab_pipeline import (
- PageRegion,
- RowGeometry,
- _cells_to_vocab_entries,
- _fix_character_confusion,
- _fix_phonetic_brackets,
- fix_cell_phonetics,
- build_cell_grid_v2,
- build_cell_grid_v2_streaming,
- create_ocr_image,
- detect_column_geometry,
-)
from cv_words_first import build_grid_from_words
from ocr_pipeline_session_store import (
get_session_db,
@@ -44,15 +31,13 @@ from ocr_pipeline_session_store import (
)
from ocr_pipeline_common import (
_cache,
- _load_session_to_cache,
- _get_cached,
- _get_base_image_png,
_append_pipeline_log,
)
+from ocr_pipeline_words_detect import router as _detect_router
logger = logging.getLogger(__name__)
-router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+_local_router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
# ---------------------------------------------------------------------------
@@ -65,689 +50,13 @@ class WordGroundTruthRequest(BaseModel):
notes: Optional[str] = None
-# ---------------------------------------------------------------------------
-# Word Detection Endpoint (Step 7)
-# ---------------------------------------------------------------------------
-
-@router.post("/sessions/{session_id}/words")
-async def detect_words(
- session_id: str,
- request: Request,
- engine: str = "auto",
- pronunciation: str = "british",
- stream: bool = False,
- skip_heal_gaps: bool = False,
- grid_method: str = "v2",
-):
- """Build word grid from columns × rows, OCR each cell.
-
- Query params:
- engine: 'auto' (default), 'tesseract', 'rapid', or 'paddle'
- pronunciation: 'british' (default) or 'american' — for IPA dictionary lookup
- stream: false (default) for JSON response, true for SSE streaming
- skip_heal_gaps: false (default). When true, cells keep exact row geometry
- positions without gap-healing expansion. Better for overlay rendering.
- grid_method: 'v2' (default) or 'words_first' — grid construction strategy.
- 'v2' uses pre-detected columns/rows (top-down).
- 'words_first' clusters words bottom-up (no column/row detection needed).
- """
- # PaddleOCR is full-page remote OCR → force words_first grid method
- if engine == "paddle" and grid_method != "words_first":
- logger.info("detect_words: engine=paddle requires words_first, overriding grid_method=%s", grid_method)
- grid_method = "words_first"
-
- if session_id not in _cache:
- logger.info("detect_words: session %s not in cache, loading from DB", session_id)
- await _load_session_to_cache(session_id)
- cached = _get_cached(session_id)
-
- dewarped_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
- if dewarped_bgr is None:
- logger.warning("detect_words: no cropped/dewarped image for session %s (cache keys: %s)",
- session_id, [k for k in cached.keys() if k.endswith('_bgr')])
- raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before word detection")
-
- session = await get_session_db(session_id)
- if not session:
- raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
-
- column_result = session.get("column_result")
- row_result = session.get("row_result")
- if not column_result or not column_result.get("columns"):
- # No column detection — synthesize a single full-page pseudo-column.
- # This enables the overlay pipeline which skips column detection.
- img_h_tmp, img_w_tmp = dewarped_bgr.shape[:2]
- column_result = {
- "columns": [{
- "type": "column_text",
- "x": 0, "y": 0,
- "width": img_w_tmp, "height": img_h_tmp,
- "classification_confidence": 1.0,
- "classification_method": "full_page_fallback",
- }],
- "zones": [],
- "duration_seconds": 0,
- }
- logger.info("detect_words: no column_result — using full-page pseudo-column %dx%d", img_w_tmp, img_h_tmp)
- if grid_method != "words_first" and (not row_result or not row_result.get("rows")):
- raise HTTPException(status_code=400, detail="Row detection must be completed first")
-
- # Convert column dicts back to PageRegion objects
- col_regions = [
- PageRegion(
- type=c["type"],
- x=c["x"], y=c["y"],
- width=c["width"], height=c["height"],
- classification_confidence=c.get("classification_confidence", 1.0),
- classification_method=c.get("classification_method", ""),
- )
- for c in column_result["columns"]
- ]
-
- # Convert row dicts back to RowGeometry objects
- row_geoms = [
- RowGeometry(
- index=r["index"],
- x=r["x"], y=r["y"],
- width=r["width"], height=r["height"],
- word_count=r.get("word_count", 0),
- words=[],
- row_type=r.get("row_type", "content"),
- gap_before=r.get("gap_before", 0),
- )
- for r in row_result["rows"]
- ]
-
- # Cell-First OCR (v2): no full-page word re-population needed.
- # Each cell is cropped and OCR'd in isolation → no neighbour bleeding.
- # We still need word_count > 0 for row filtering in build_cell_grid_v2,
- # so populate from cached words if available (just for counting).
- word_dicts = cached.get("_word_dicts")
- if word_dicts is None:
- ocr_img_tmp = create_ocr_image(dewarped_bgr)
- geo_result = detect_column_geometry(ocr_img_tmp, dewarped_bgr)
- if geo_result is not None:
- _geoms, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result
- cached["_word_dicts"] = word_dicts
- cached["_inv"] = inv
- cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
-
- if word_dicts:
- content_bounds = cached.get("_content_bounds")
- if content_bounds:
- _lx, _rx, top_y, _by = content_bounds
- else:
- top_y = min(r.y for r in row_geoms) if row_geoms else 0
-
- for row in row_geoms:
- row_y_rel = row.y - top_y
- row_bottom_rel = row_y_rel + row.height
- row.words = [
- w for w in word_dicts
- if row_y_rel <= w['top'] + w['height'] / 2 < row_bottom_rel
- ]
- row.word_count = len(row.words)
-
- # Exclude rows that fall within box zones.
- # Use inner box range (shrunk by border_thickness) so that rows at
- # the boundary (overlapping with the box border) are NOT excluded.
- zones = column_result.get("zones") or []
- box_ranges_inner = []
- for zone in zones:
- if zone.get("zone_type") == "box" and zone.get("box"):
- box = zone["box"]
- bt = max(box.get("border_thickness", 0), 5) # minimum 5px margin
- box_ranges_inner.append((box["y"] + bt, box["y"] + box["height"] - bt))
-
- if box_ranges_inner:
- def _row_in_box(r):
- center_y = r.y + r.height / 2
- return any(by_s <= center_y < by_e for by_s, by_e in box_ranges_inner)
-
- before_count = len(row_geoms)
- row_geoms = [r for r in row_geoms if not _row_in_box(r)]
- excluded = before_count - len(row_geoms)
- if excluded:
- logger.info(f"detect_words: excluded {excluded} rows inside box zones")
-
- # --- Words-First path: bottom-up grid from word boxes ---
- if grid_method == "words_first":
- t0 = time.time()
- img_h, img_w = dewarped_bgr.shape[:2]
-
- # For paddle engine: run remote PaddleOCR full-page instead of Tesseract
- if engine == "paddle":
- from cv_ocr_engines import ocr_region_paddle
-
- wf_word_dicts = await ocr_region_paddle(dewarped_bgr, region=None)
- # PaddleOCR returns absolute coordinates, no content_bounds offset needed
- cached["_paddle_word_dicts"] = wf_word_dicts
- else:
- # Get word_dicts from cache or run Tesseract full-page
- wf_word_dicts = cached.get("_word_dicts")
- if wf_word_dicts is None:
- ocr_img_tmp = create_ocr_image(dewarped_bgr)
- geo_result = detect_column_geometry(ocr_img_tmp, dewarped_bgr)
- if geo_result is not None:
- _geoms, left_x, right_x, top_y, bottom_y, wf_word_dicts, inv = geo_result
- cached["_word_dicts"] = wf_word_dicts
- cached["_inv"] = inv
- cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
-
- if not wf_word_dicts:
- raise HTTPException(status_code=400, detail="No words detected — cannot build words-first grid")
-
- # Convert word coordinates to absolute image coordinates if needed
- # (detect_column_geometry returns words relative to content ROI)
- # PaddleOCR already returns absolute coordinates — skip offset.
- if engine != "paddle":
- content_bounds = cached.get("_content_bounds")
- if content_bounds:
- lx, _rx, ty, _by = content_bounds
- abs_words = []
- for w in wf_word_dicts:
- abs_words.append({
- **w,
- 'left': w['left'] + lx,
- 'top': w['top'] + ty,
- })
- wf_word_dicts = abs_words
-
- # Extract box rects for box-aware column clustering
- box_rects = []
- for zone in zones:
- if zone.get("zone_type") == "box" and zone.get("box"):
- box_rects.append(zone["box"])
-
- cells, columns_meta = build_grid_from_words(
- wf_word_dicts, img_w, img_h, box_rects=box_rects or None,
- )
- duration = time.time() - t0
-
- # Apply IPA phonetic fixes
- fix_cell_phonetics(cells, pronunciation=pronunciation)
-
- # Add zone_index for backward compat
- for cell in cells:
- cell.setdefault("zone_index", 0)
-
- col_types = {c['type'] for c in columns_meta}
- is_vocab = bool(col_types & {'column_en', 'column_de'})
- n_rows = len(set(c['row_index'] for c in cells)) if cells else 0
- n_cols = len(columns_meta)
- used_engine = "paddle" if engine == "paddle" else "words_first"
-
- word_result = {
- "cells": cells,
- "grid_shape": {
- "rows": n_rows,
- "cols": n_cols,
- "total_cells": len(cells),
- },
- "columns_used": columns_meta,
- "layout": "vocab" if is_vocab else "generic",
- "image_width": img_w,
- "image_height": img_h,
- "duration_seconds": round(duration, 2),
- "ocr_engine": used_engine,
- "grid_method": "words_first",
- "summary": {
- "total_cells": len(cells),
- "non_empty_cells": sum(1 for c in cells if c.get("text")),
- "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
- },
- }
-
- if is_vocab or 'column_text' in col_types:
- entries = _cells_to_vocab_entries(cells, columns_meta)
- entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
- word_result["vocab_entries"] = entries
- word_result["entries"] = entries
- word_result["entry_count"] = len(entries)
- word_result["summary"]["total_entries"] = len(entries)
- word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
- word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
-
- await update_session_db(session_id, word_result=word_result, current_step=8)
- cached["word_result"] = word_result
-
- logger.info(f"OCR Pipeline: words-first session {session_id}: "
- f"{len(cells)} cells ({duration:.2f}s), {n_rows} rows, {n_cols} cols")
-
- await _append_pipeline_log(session_id, "words", {
- "grid_method": "words_first",
- "total_cells": len(cells),
- "non_empty_cells": word_result["summary"]["non_empty_cells"],
- "ocr_engine": used_engine,
- "layout": word_result["layout"],
- }, duration_ms=int(duration * 1000))
-
- return {"session_id": session_id, **word_result}
-
- if stream:
- # Cell-First OCR v2: use batch-then-stream approach instead of
- # per-cell streaming. The parallel ThreadPoolExecutor in
- # build_cell_grid_v2 is much faster than sequential streaming.
- return StreamingResponse(
- _word_batch_stream_generator(
- session_id, cached, col_regions, row_geoms,
- dewarped_bgr, engine, pronunciation, request,
- skip_heal_gaps=skip_heal_gaps,
- ),
- media_type="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no",
- },
- )
-
- # --- Non-streaming path (grid_method=v2) ---
- t0 = time.time()
-
- # Create binarized OCR image (for Tesseract)
- ocr_img = create_ocr_image(dewarped_bgr)
- img_h, img_w = dewarped_bgr.shape[:2]
-
- # Build cell grid using Cell-First OCR (v2) — each cell cropped in isolation
- cells, columns_meta = build_cell_grid_v2(
- ocr_img, col_regions, row_geoms, img_w, img_h,
- ocr_engine=engine, img_bgr=dewarped_bgr,
- skip_heal_gaps=skip_heal_gaps,
- )
- duration = time.time() - t0
-
- # Add zone_index to each cell (default 0 for backward compatibility)
- for cell in cells:
- cell.setdefault("zone_index", 0)
-
- # Layout detection
- col_types = {c['type'] for c in columns_meta}
- is_vocab = bool(col_types & {'column_en', 'column_de'})
-
- # Count content rows and columns for grid_shape
- n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
- n_cols = len(columns_meta)
-
- # Determine which engine was actually used
- used_engine = cells[0].get("ocr_engine", "tesseract") if cells else engine
-
- # Apply IPA phonetic fixes directly to cell texts (for overlay mode)
- fix_cell_phonetics(cells, pronunciation=pronunciation)
-
- # Grid result (always generic)
- word_result = {
- "cells": cells,
- "grid_shape": {
- "rows": n_content_rows,
- "cols": n_cols,
- "total_cells": len(cells),
- },
- "columns_used": columns_meta,
- "layout": "vocab" if is_vocab else "generic",
- "image_width": img_w,
- "image_height": img_h,
- "duration_seconds": round(duration, 2),
- "ocr_engine": used_engine,
- "summary": {
- "total_cells": len(cells),
- "non_empty_cells": sum(1 for c in cells if c.get("text")),
- "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
- },
- }
-
- # For vocab layout or single-column (box sub-sessions): map cells 1:1
- # to vocab entries (row→entry).
- has_text_col = 'column_text' in col_types
- if is_vocab or has_text_col:
- entries = _cells_to_vocab_entries(cells, columns_meta)
- entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
- word_result["vocab_entries"] = entries
- word_result["entries"] = entries
- word_result["entry_count"] = len(entries)
- word_result["summary"]["total_entries"] = len(entries)
- word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
- word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
-
- # Persist to DB
- await update_session_db(
- session_id,
- word_result=word_result,
- current_step=8,
- )
-
- cached["word_result"] = word_result
-
- logger.info(f"OCR Pipeline: words session {session_id}: "
- f"layout={word_result['layout']}, "
- f"{len(cells)} cells ({duration:.2f}s), summary: {word_result['summary']}")
-
- await _append_pipeline_log(session_id, "words", {
- "total_cells": len(cells),
- "non_empty_cells": word_result["summary"]["non_empty_cells"],
- "low_confidence_count": word_result["summary"]["low_confidence"],
- "ocr_engine": used_engine,
- "layout": word_result["layout"],
- "entry_count": word_result.get("entry_count", 0),
- }, duration_ms=int(duration * 1000))
-
- return {
- "session_id": session_id,
- **word_result,
- }
-
-
-async def _word_batch_stream_generator(
- session_id: str,
- cached: Dict[str, Any],
- col_regions: List[PageRegion],
- row_geoms: List[RowGeometry],
- dewarped_bgr: np.ndarray,
- engine: str,
- pronunciation: str,
- request: Request,
- skip_heal_gaps: bool = False,
-):
- """SSE generator that runs batch OCR (parallel) then streams results.
-
- Unlike the old per-cell streaming, this uses build_cell_grid_v2 with
- ThreadPoolExecutor for parallel OCR, then emits all cells as SSE events.
- The 'preparing' event keeps the connection alive during OCR processing.
- """
- import asyncio
-
- t0 = time.time()
- ocr_img = create_ocr_image(dewarped_bgr)
- img_h, img_w = dewarped_bgr.shape[:2]
-
- _skip_types = {'column_ignore', 'header', 'footer', 'margin_top', 'margin_bottom', 'margin_left', 'margin_right'}
- n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
- n_cols = len([c for c in col_regions if c.type not in _skip_types])
- col_types = {c.type for c in col_regions if c.type not in _skip_types}
- is_vocab = bool(col_types & {'column_en', 'column_de'})
- total_cells = n_content_rows * n_cols
-
- # 1. Send meta event immediately
- meta_event = {
- "type": "meta",
- "grid_shape": {"rows": n_content_rows, "cols": n_cols, "total_cells": total_cells},
- "layout": "vocab" if is_vocab else "generic",
- }
- yield f"data: {json.dumps(meta_event)}\n\n"
-
- # 2. Send preparing event (keepalive for proxy)
- yield f"data: {json.dumps({'type': 'preparing', 'message': 'Cell-First OCR laeuft parallel...'})}\n\n"
-
- # 3. Run batch OCR in thread pool with periodic keepalive events.
- # The OCR takes 30-60s and proxy servers (Nginx) may drop idle SSE
- # connections after 30-60s. Send keepalive every 5s to prevent this.
- loop = asyncio.get_event_loop()
- ocr_future = loop.run_in_executor(
- None,
- lambda: build_cell_grid_v2(
- ocr_img, col_regions, row_geoms, img_w, img_h,
- ocr_engine=engine, img_bgr=dewarped_bgr,
- skip_heal_gaps=skip_heal_gaps,
- ),
- )
-
- # Send keepalive events every 5 seconds while OCR runs
- keepalive_count = 0
- while not ocr_future.done():
- try:
- cells, columns_meta = await asyncio.wait_for(
- asyncio.shield(ocr_future), timeout=5.0,
- )
- break # OCR finished
- except asyncio.TimeoutError:
- keepalive_count += 1
- elapsed = int(time.time() - t0)
- yield f"data: {json.dumps({'type': 'keepalive', 'elapsed': elapsed, 'message': f'OCR laeuft... ({elapsed}s)'})}\n\n"
- if await request.is_disconnected():
- logger.info(f"SSE batch: client disconnected during OCR for {session_id}")
- ocr_future.cancel()
- return
- else:
- cells, columns_meta = ocr_future.result()
-
- if await request.is_disconnected():
- logger.info(f"SSE batch: client disconnected after OCR for {session_id}")
- return
-
- # 4. Apply IPA phonetic fixes directly to cell texts (for overlay mode)
- fix_cell_phonetics(cells, pronunciation=pronunciation)
-
- # 5. Send columns meta
- if columns_meta:
- yield f"data: {json.dumps({'type': 'columns', 'columns_used': columns_meta})}\n\n"
-
- # 6. Stream all cells
- for idx, cell in enumerate(cells):
- cell_event = {
- "type": "cell",
- "cell": cell,
- "progress": {"current": idx + 1, "total": len(cells)},
- }
- yield f"data: {json.dumps(cell_event)}\n\n"
-
- # 6. Build final result and persist
- duration = time.time() - t0
- used_engine = cells[0].get("ocr_engine", "tesseract") if cells else engine
-
- word_result = {
- "cells": cells,
- "grid_shape": {"rows": n_content_rows, "cols": n_cols, "total_cells": len(cells)},
- "columns_used": columns_meta,
- "layout": "vocab" if is_vocab else "generic",
- "image_width": img_w,
- "image_height": img_h,
- "duration_seconds": round(duration, 2),
- "ocr_engine": used_engine,
- "summary": {
- "total_cells": len(cells),
- "non_empty_cells": sum(1 for c in cells if c.get("text")),
- "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
- },
- }
-
- vocab_entries = None
- has_text_col = 'column_text' in col_types
- if is_vocab or has_text_col:
- entries = _cells_to_vocab_entries(cells, columns_meta)
- entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
- word_result["vocab_entries"] = entries
- word_result["entries"] = entries
- word_result["entry_count"] = len(entries)
- word_result["summary"]["total_entries"] = len(entries)
- word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
- word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
- vocab_entries = entries
-
- await update_session_db(session_id, word_result=word_result, current_step=8)
- cached["word_result"] = word_result
-
- logger.info(f"OCR Pipeline SSE batch: words session {session_id}: "
- f"layout={word_result['layout']}, {len(cells)} cells ({duration:.2f}s)")
-
- # 7. Send complete event
- complete_event = {
- "type": "complete",
- "summary": word_result["summary"],
- "duration_seconds": round(duration, 2),
- "ocr_engine": used_engine,
- }
- if vocab_entries is not None:
- complete_event["vocab_entries"] = vocab_entries
- yield f"data: {json.dumps(complete_event)}\n\n"
-
-
-async def _word_stream_generator(
- session_id: str,
- cached: Dict[str, Any],
- col_regions: List[PageRegion],
- row_geoms: List[RowGeometry],
- dewarped_bgr: np.ndarray,
- engine: str,
- pronunciation: str,
- request: Request,
-):
- """SSE generator that yields cell-by-cell OCR progress."""
- t0 = time.time()
-
- ocr_img = create_ocr_image(dewarped_bgr)
- img_h, img_w = dewarped_bgr.shape[:2]
-
- # Compute grid shape upfront for the meta event
- n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
- _skip_types = {'column_ignore', 'header', 'footer', 'margin_top', 'margin_bottom', 'margin_left', 'margin_right'}
- n_cols = len([c for c in col_regions if c.type not in _skip_types])
-
- # Determine layout
- col_types = {c.type for c in col_regions if c.type not in _skip_types}
- is_vocab = bool(col_types & {'column_en', 'column_de'})
-
- # Start streaming — first event: meta
- columns_meta = None # will be set from first yield
- total_cells = n_content_rows * n_cols
-
- meta_event = {
- "type": "meta",
- "grid_shape": {"rows": n_content_rows, "cols": n_cols, "total_cells": total_cells},
- "layout": "vocab" if is_vocab else "generic",
- }
- yield f"data: {json.dumps(meta_event)}\n\n"
-
- # Keepalive: send preparing event so proxy doesn't timeout during OCR init
- yield f"data: {json.dumps({'type': 'preparing', 'message': 'Cell-First OCR wird initialisiert...'})}\n\n"
-
- # Stream cells one by one
- all_cells: List[Dict[str, Any]] = []
- cell_idx = 0
- last_keepalive = time.time()
-
- for cell, cols_meta, total in build_cell_grid_v2_streaming(
- ocr_img, col_regions, row_geoms, img_w, img_h,
- ocr_engine=engine, img_bgr=dewarped_bgr,
- ):
- if await request.is_disconnected():
- logger.info(f"SSE: client disconnected during streaming for {session_id}")
- return
-
- if columns_meta is None:
- columns_meta = cols_meta
- # Send columns_used as part of first cell or update meta
- meta_update = {
- "type": "columns",
- "columns_used": cols_meta,
- }
- yield f"data: {json.dumps(meta_update)}\n\n"
-
- all_cells.append(cell)
- cell_idx += 1
-
- cell_event = {
- "type": "cell",
- "cell": cell,
- "progress": {"current": cell_idx, "total": total},
- }
- yield f"data: {json.dumps(cell_event)}\n\n"
-
- # All cells done — build final result
- duration = time.time() - t0
- if columns_meta is None:
- columns_meta = []
-
- # Post-OCR: remove rows where ALL cells are empty (inter-row gaps
- # that had stray Tesseract artifacts giving word_count > 0).
- rows_with_text: set = set()
- for c in all_cells:
- if c.get("text", "").strip():
- rows_with_text.add(c["row_index"])
- before_filter = len(all_cells)
- all_cells = [c for c in all_cells if c["row_index"] in rows_with_text]
- empty_rows_removed = (before_filter - len(all_cells)) // max(n_cols, 1)
- if empty_rows_removed > 0:
- logger.info(f"SSE: removed {empty_rows_removed} all-empty rows after OCR")
-
- used_engine = all_cells[0].get("ocr_engine", "tesseract") if all_cells else engine
-
- # Apply IPA phonetic fixes directly to cell texts (for overlay mode)
- fix_cell_phonetics(all_cells, pronunciation=pronunciation)
-
- word_result = {
- "cells": all_cells,
- "grid_shape": {
- "rows": n_content_rows,
- "cols": n_cols,
- "total_cells": len(all_cells),
- },
- "columns_used": columns_meta,
- "layout": "vocab" if is_vocab else "generic",
- "image_width": img_w,
- "image_height": img_h,
- "duration_seconds": round(duration, 2),
- "ocr_engine": used_engine,
- "summary": {
- "total_cells": len(all_cells),
- "non_empty_cells": sum(1 for c in all_cells if c.get("text")),
- "low_confidence": sum(1 for c in all_cells if 0 < c.get("confidence", 0) < 50),
- },
- }
-
- # For vocab layout or single-column (box sub-sessions): map cells 1:1
- # to vocab entries (row→entry).
- vocab_entries = None
- has_text_col = 'column_text' in col_types
- if is_vocab or has_text_col:
- entries = _cells_to_vocab_entries(all_cells, columns_meta)
- entries = _fix_character_confusion(entries)
- entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
- word_result["vocab_entries"] = entries
- word_result["entries"] = entries
- word_result["entry_count"] = len(entries)
- word_result["summary"]["total_entries"] = len(entries)
- word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
- word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
- vocab_entries = entries
-
- # Persist to DB
- await update_session_db(
- session_id,
- word_result=word_result,
- current_step=8,
- )
- cached["word_result"] = word_result
-
- logger.info(f"OCR Pipeline SSE: words session {session_id}: "
- f"layout={word_result['layout']}, "
- f"{len(all_cells)} cells ({duration:.2f}s)")
-
- # Final complete event
- complete_event = {
- "type": "complete",
- "summary": word_result["summary"],
- "duration_seconds": round(duration, 2),
- "ocr_engine": used_engine,
- }
- if vocab_entries is not None:
- complete_event["vocab_entries"] = vocab_entries
- yield f"data: {json.dumps(complete_event)}\n\n"
-
-
# ---------------------------------------------------------------------------
# PaddleOCR Direct Endpoint
# ---------------------------------------------------------------------------
-@router.post("/sessions/{session_id}/paddle-direct")
+@_local_router.post("/sessions/{session_id}/paddle-direct")
async def paddle_direct(session_id: str):
- """Run PaddleOCR on the preprocessed image and build a word grid directly.
-
- Expects orientation/deskew/dewarp/crop to be done already.
- Uses the cropped image (falls back to dewarped, then original).
- The used image is stored as cropped_png so OverlayReconstruction
- can display it as the background.
- """
- # Try preprocessed images first (crop > dewarp > original)
+ """Run PaddleOCR on the preprocessed image and build a word grid directly."""
img_png = await get_session_image(session_id, "cropped")
if not img_png:
img_png = await get_session_image(session_id, "dewarped")
@@ -770,13 +79,9 @@ async def paddle_direct(session_id: str):
if not word_dicts:
raise HTTPException(status_code=400, detail="PaddleOCR returned no words")
- # Reuse build_grid_from_words — same function that works in the regular
- # pipeline with PaddleOCR (engine=paddle, grid_method=words_first).
- # Handles phrase splitting, column clustering, and reading order.
cells, columns_meta = build_grid_from_words(word_dicts, img_w, img_h)
duration = time.time() - t0
- # Tag cells as paddle_direct
for cell in cells:
cell["ocr_engine"] = "paddle_direct"
@@ -787,11 +92,7 @@ async def paddle_direct(session_id: str):
word_result = {
"cells": cells,
- "grid_shape": {
- "rows": n_rows,
- "cols": n_cols,
- "total_cells": len(cells),
- },
+ "grid_shape": {"rows": n_rows, "cols": n_cols, "total_cells": len(cells)},
"columns_used": columns_meta,
"layout": "vocab" if is_vocab else "generic",
"image_width": img_w,
@@ -806,7 +107,6 @@ async def paddle_direct(session_id: str):
},
}
- # Store preprocessed image as cropped_png so OverlayReconstruction shows it
await update_session_db(
session_id,
word_result=word_result,
@@ -832,7 +132,7 @@ async def paddle_direct(session_id: str):
# Ground Truth Words Endpoints
# ---------------------------------------------------------------------------
-@router.post("/sessions/{session_id}/ground-truth/words")
+@_local_router.post("/sessions/{session_id}/ground-truth/words")
async def save_word_ground_truth(session_id: str, req: WordGroundTruthRequest):
"""Save ground truth feedback for the word recognition step."""
session = await get_session_db(session_id)
@@ -857,7 +157,7 @@ async def save_word_ground_truth(session_id: str, req: WordGroundTruthRequest):
return {"session_id": session_id, "ground_truth": gt}
-@router.get("/sessions/{session_id}/ground-truth/words")
+@_local_router.get("/sessions/{session_id}/ground-truth/words")
async def get_word_ground_truth(session_id: str):
"""Retrieve saved ground truth for word recognition."""
session = await get_session_db(session_id)
@@ -874,3 +174,12 @@ async def get_word_ground_truth(session_id: str):
"words_gt": words_gt,
"words_auto": session.get("word_result"),
}
+
+
+# ---------------------------------------------------------------------------
+# Composite router
+# ---------------------------------------------------------------------------
+
+router = APIRouter()
+router.include_router(_detect_router)
+router.include_router(_local_router)
diff --git a/klausur-service/backend/ocr_pipeline_words_detect.py b/klausur-service/backend/ocr_pipeline_words_detect.py
new file mode 100644
index 0000000..b70cff3
--- /dev/null
+++ b/klausur-service/backend/ocr_pipeline_words_detect.py
@@ -0,0 +1,393 @@
+"""
+OCR Pipeline Words Detect — main word detection endpoint (Step 7).
+
+Extracted from ocr_pipeline_words.py. Contains the ``detect_words``
+endpoint which handles both v2 and words_first grid methods.
+
+Lizenz: Apache 2.0
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import json
+import logging
+import time
+from typing import Any, Dict, List
+
+import numpy as np
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import StreamingResponse
+
+from cv_vocab_pipeline import (
+ PageRegion,
+ RowGeometry,
+ _cells_to_vocab_entries,
+ _fix_phonetic_brackets,
+ fix_cell_phonetics,
+ build_cell_grid_v2,
+ create_ocr_image,
+ detect_column_geometry,
+)
+from cv_words_first import build_grid_from_words
+from ocr_pipeline_session_store import (
+ get_session_db,
+ update_session_db,
+)
+from ocr_pipeline_common import (
+ _cache,
+ _load_session_to_cache,
+ _get_cached,
+ _append_pipeline_log,
+)
+from ocr_pipeline_words_stream import (
+ _word_batch_stream_generator,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
+
+
+# ---------------------------------------------------------------------------
+# Word Detection Endpoint (Step 7)
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/words")
+async def detect_words(
+ session_id: str,
+ request: Request,
+ engine: str = "auto",
+ pronunciation: str = "british",
+ stream: bool = False,
+ skip_heal_gaps: bool = False,
+ grid_method: str = "v2",
+):
+ """Build word grid from columns x rows, OCR each cell.
+
+ Query params:
+ engine: 'auto' (default), 'tesseract', 'rapid', or 'paddle'
+ pronunciation: 'british' (default) or 'american'
+ stream: false (default) for JSON response, true for SSE streaming
+ skip_heal_gaps: false (default). When true, cells keep exact row geometry.
+ grid_method: 'v2' (default) or 'words_first'
+ """
+ # PaddleOCR is full-page remote OCR -> force words_first grid method
+ if engine == "paddle" and grid_method != "words_first":
+ logger.info("detect_words: engine=paddle requires words_first, overriding grid_method=%s", grid_method)
+ grid_method = "words_first"
+
+ if session_id not in _cache:
+ logger.info("detect_words: session %s not in cache, loading from DB", session_id)
+ await _load_session_to_cache(session_id)
+ cached = _get_cached(session_id)
+
+ dewarped_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
+ if dewarped_bgr is None:
+ logger.warning("detect_words: no cropped/dewarped image for session %s (cache keys: %s)",
+ session_id, [k for k in cached.keys() if k.endswith('_bgr')])
+ raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before word detection")
+
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ column_result = session.get("column_result")
+ row_result = session.get("row_result")
+ if not column_result or not column_result.get("columns"):
+ img_h_tmp, img_w_tmp = dewarped_bgr.shape[:2]
+ column_result = {
+ "columns": [{
+ "type": "column_text",
+ "x": 0, "y": 0,
+ "width": img_w_tmp, "height": img_h_tmp,
+ "classification_confidence": 1.0,
+ "classification_method": "full_page_fallback",
+ }],
+ "zones": [],
+ "duration_seconds": 0,
+ }
+ logger.info("detect_words: no column_result -- using full-page pseudo-column %dx%d", img_w_tmp, img_h_tmp)
+ if grid_method != "words_first" and (not row_result or not row_result.get("rows")):
+ raise HTTPException(status_code=400, detail="Row detection must be completed first")
+
+ # Convert column dicts back to PageRegion objects
+ col_regions = [
+ PageRegion(
+ type=c["type"],
+ x=c["x"], y=c["y"],
+ width=c["width"], height=c["height"],
+ classification_confidence=c.get("classification_confidence", 1.0),
+ classification_method=c.get("classification_method", ""),
+ )
+ for c in column_result["columns"]
+ ]
+
+ # Convert row dicts back to RowGeometry objects
+ row_geoms = [
+ RowGeometry(
+ index=r["index"],
+ x=r["x"], y=r["y"],
+ width=r["width"], height=r["height"],
+ word_count=r.get("word_count", 0),
+ words=[],
+ row_type=r.get("row_type", "content"),
+ gap_before=r.get("gap_before", 0),
+ )
+ for r in row_result["rows"]
+ ]
+
+ # Populate word counts from cached words
+ word_dicts = cached.get("_word_dicts")
+ if word_dicts is None:
+ ocr_img_tmp = create_ocr_image(dewarped_bgr)
+ geo_result = detect_column_geometry(ocr_img_tmp, dewarped_bgr)
+ if geo_result is not None:
+ _geoms, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result
+ cached["_word_dicts"] = word_dicts
+ cached["_inv"] = inv
+ cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
+
+ if word_dicts:
+ content_bounds = cached.get("_content_bounds")
+ if content_bounds:
+ _lx, _rx, top_y, _by = content_bounds
+ else:
+ top_y = min(r.y for r in row_geoms) if row_geoms else 0
+
+ for row in row_geoms:
+ row_y_rel = row.y - top_y
+ row_bottom_rel = row_y_rel + row.height
+ row.words = [
+ w for w in word_dicts
+ if row_y_rel <= w['top'] + w['height'] / 2 < row_bottom_rel
+ ]
+ row.word_count = len(row.words)
+
+ # Exclude rows that fall within box zones
+ zones = column_result.get("zones") or []
+ box_ranges_inner = []
+ for zone in zones:
+ if zone.get("zone_type") == "box" and zone.get("box"):
+ box = zone["box"]
+ bt = max(box.get("border_thickness", 0), 5)
+ box_ranges_inner.append((box["y"] + bt, box["y"] + box["height"] - bt))
+
+ if box_ranges_inner:
+ def _row_in_box(r):
+ center_y = r.y + r.height / 2
+ return any(by_s <= center_y < by_e for by_s, by_e in box_ranges_inner)
+
+ before_count = len(row_geoms)
+ row_geoms = [r for r in row_geoms if not _row_in_box(r)]
+ excluded = before_count - len(row_geoms)
+ if excluded:
+ logger.info(f"detect_words: excluded {excluded} rows inside box zones")
+
+ # --- Words-First path ---
+ if grid_method == "words_first":
+ return await _words_first_path(
+ session_id, cached, dewarped_bgr, engine, pronunciation, zones,
+ )
+
+ if stream:
+ return StreamingResponse(
+ _word_batch_stream_generator(
+ session_id, cached, col_regions, row_geoms,
+ dewarped_bgr, engine, pronunciation, request,
+ skip_heal_gaps=skip_heal_gaps,
+ ),
+ media_type="text/event-stream",
+ headers={
+ "Cache-Control": "no-cache",
+ "Connection": "keep-alive",
+ "X-Accel-Buffering": "no",
+ },
+ )
+
+ # --- Non-streaming path (grid_method=v2) ---
+ return await _v2_path(
+ session_id, cached, col_regions, row_geoms,
+ dewarped_bgr, engine, pronunciation, skip_heal_gaps,
+ )
+
+
+async def _words_first_path(
+ session_id: str,
+ cached: Dict[str, Any],
+ dewarped_bgr: np.ndarray,
+ engine: str,
+ pronunciation: str,
+ zones: list,
+) -> dict:
+ """Words-first grid construction path."""
+ t0 = time.time()
+ img_h, img_w = dewarped_bgr.shape[:2]
+
+ if engine == "paddle":
+ from cv_ocr_engines import ocr_region_paddle
+ wf_word_dicts = await ocr_region_paddle(dewarped_bgr, region=None)
+ cached["_paddle_word_dicts"] = wf_word_dicts
+ else:
+ wf_word_dicts = cached.get("_word_dicts")
+ if wf_word_dicts is None:
+ ocr_img_tmp = create_ocr_image(dewarped_bgr)
+ geo_result = detect_column_geometry(ocr_img_tmp, dewarped_bgr)
+ if geo_result is not None:
+ _geoms, left_x, right_x, top_y, bottom_y, wf_word_dicts, inv = geo_result
+ cached["_word_dicts"] = wf_word_dicts
+ cached["_inv"] = inv
+ cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
+
+ if not wf_word_dicts:
+ raise HTTPException(status_code=400, detail="No words detected -- cannot build words-first grid")
+
+ # Convert word coordinates to absolute if needed
+ if engine != "paddle":
+ content_bounds = cached.get("_content_bounds")
+ if content_bounds:
+ lx, _rx, ty, _by = content_bounds
+ abs_words = []
+ for w in wf_word_dicts:
+ abs_words.append({**w, 'left': w['left'] + lx, 'top': w['top'] + ty})
+ wf_word_dicts = abs_words
+
+ box_rects = []
+ for zone in zones:
+ if zone.get("zone_type") == "box" and zone.get("box"):
+ box_rects.append(zone["box"])
+
+ cells, columns_meta = build_grid_from_words(
+ wf_word_dicts, img_w, img_h, box_rects=box_rects or None,
+ )
+ duration = time.time() - t0
+
+ fix_cell_phonetics(cells, pronunciation=pronunciation)
+ for cell in cells:
+ cell.setdefault("zone_index", 0)
+
+ col_types = {c['type'] for c in columns_meta}
+ is_vocab = bool(col_types & {'column_en', 'column_de'})
+ n_rows = len(set(c['row_index'] for c in cells)) if cells else 0
+ n_cols = len(columns_meta)
+ used_engine = "paddle" if engine == "paddle" else "words_first"
+
+ word_result = {
+ "cells": cells,
+ "grid_shape": {"rows": n_rows, "cols": n_cols, "total_cells": len(cells)},
+ "columns_used": columns_meta,
+ "layout": "vocab" if is_vocab else "generic",
+ "image_width": img_w,
+ "image_height": img_h,
+ "duration_seconds": round(duration, 2),
+ "ocr_engine": used_engine,
+ "grid_method": "words_first",
+ "summary": {
+ "total_cells": len(cells),
+ "non_empty_cells": sum(1 for c in cells if c.get("text")),
+ "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
+ },
+ }
+
+ if is_vocab or 'column_text' in col_types:
+ entries = _cells_to_vocab_entries(cells, columns_meta)
+ entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
+ word_result["vocab_entries"] = entries
+ word_result["entries"] = entries
+ word_result["entry_count"] = len(entries)
+ word_result["summary"]["total_entries"] = len(entries)
+ word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
+ word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
+
+ await update_session_db(session_id, word_result=word_result, current_step=8)
+ cached["word_result"] = word_result
+
+ logger.info(f"OCR Pipeline: words-first session {session_id}: "
+ f"{len(cells)} cells ({duration:.2f}s), {n_rows} rows, {n_cols} cols")
+
+ await _append_pipeline_log(session_id, "words", {
+ "grid_method": "words_first",
+ "total_cells": len(cells),
+ "non_empty_cells": word_result["summary"]["non_empty_cells"],
+ "ocr_engine": used_engine,
+ "layout": word_result["layout"],
+ }, duration_ms=int(duration * 1000))
+
+ return {"session_id": session_id, **word_result}
+
+
+async def _v2_path(
+ session_id: str,
+ cached: Dict[str, Any],
+ col_regions: List[PageRegion],
+ row_geoms: List[RowGeometry],
+ dewarped_bgr: np.ndarray,
+ engine: str,
+ pronunciation: str,
+ skip_heal_gaps: bool,
+) -> dict:
+ """Cell-First OCR v2 non-streaming path."""
+ t0 = time.time()
+ ocr_img = create_ocr_image(dewarped_bgr)
+ img_h, img_w = dewarped_bgr.shape[:2]
+
+ cells, columns_meta = build_cell_grid_v2(
+ ocr_img, col_regions, row_geoms, img_w, img_h,
+ ocr_engine=engine, img_bgr=dewarped_bgr,
+ skip_heal_gaps=skip_heal_gaps,
+ )
+ duration = time.time() - t0
+
+ for cell in cells:
+ cell.setdefault("zone_index", 0)
+
+ col_types = {c['type'] for c in columns_meta}
+ is_vocab = bool(col_types & {'column_en', 'column_de'})
+ n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
+ n_cols = len(columns_meta)
+ used_engine = cells[0].get("ocr_engine", "tesseract") if cells else engine
+
+ fix_cell_phonetics(cells, pronunciation=pronunciation)
+
+ word_result = {
+ "cells": cells,
+ "grid_shape": {"rows": n_content_rows, "cols": n_cols, "total_cells": len(cells)},
+ "columns_used": columns_meta,
+ "layout": "vocab" if is_vocab else "generic",
+ "image_width": img_w,
+ "image_height": img_h,
+ "duration_seconds": round(duration, 2),
+ "ocr_engine": used_engine,
+ "summary": {
+ "total_cells": len(cells),
+ "non_empty_cells": sum(1 for c in cells if c.get("text")),
+ "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
+ },
+ }
+
+ has_text_col = 'column_text' in col_types
+ if is_vocab or has_text_col:
+ entries = _cells_to_vocab_entries(cells, columns_meta)
+ entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
+ word_result["vocab_entries"] = entries
+ word_result["entries"] = entries
+ word_result["entry_count"] = len(entries)
+ word_result["summary"]["total_entries"] = len(entries)
+ word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
+ word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
+
+ await update_session_db(session_id, word_result=word_result, current_step=8)
+ cached["word_result"] = word_result
+
+ logger.info(f"OCR Pipeline: words session {session_id}: "
+ f"layout={word_result['layout']}, "
+ f"{len(cells)} cells ({duration:.2f}s), summary: {word_result['summary']}")
+
+ await _append_pipeline_log(session_id, "words", {
+ "total_cells": len(cells),
+ "non_empty_cells": word_result["summary"]["non_empty_cells"],
+ "low_confidence_count": word_result["summary"]["low_confidence"],
+ "ocr_engine": used_engine,
+ "layout": word_result["layout"],
+ "entry_count": word_result.get("entry_count", 0),
+ }, duration_ms=int(duration * 1000))
+
+ return {"session_id": session_id, **word_result}
diff --git a/klausur-service/backend/ocr_pipeline_words_stream.py b/klausur-service/backend/ocr_pipeline_words_stream.py
new file mode 100644
index 0000000..bb7d990
--- /dev/null
+++ b/klausur-service/backend/ocr_pipeline_words_stream.py
@@ -0,0 +1,303 @@
+"""
+OCR Pipeline Words Stream — SSE streaming generators for word detection.
+
+Extracted from ocr_pipeline_words.py.
+
+Lizenz: Apache 2.0
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import json
+import logging
+import time
+from typing import Any, Dict, List
+
+import numpy as np
+from fastapi import Request
+
+from cv_vocab_pipeline import (
+ PageRegion,
+ RowGeometry,
+ _cells_to_vocab_entries,
+ _fix_character_confusion,
+ _fix_phonetic_brackets,
+ fix_cell_phonetics,
+ build_cell_grid_v2,
+ build_cell_grid_v2_streaming,
+ create_ocr_image,
+)
+from ocr_pipeline_session_store import update_session_db
+from ocr_pipeline_common import _cache
+
+logger = logging.getLogger(__name__)
+
+
+async def _word_batch_stream_generator(
+ session_id: str,
+ cached: Dict[str, Any],
+ col_regions: List[PageRegion],
+ row_geoms: List[RowGeometry],
+ dewarped_bgr: np.ndarray,
+ engine: str,
+ pronunciation: str,
+ request: Request,
+ skip_heal_gaps: bool = False,
+):
+ """SSE generator that runs batch OCR (parallel) then streams results.
+
+ Uses build_cell_grid_v2 with ThreadPoolExecutor for parallel OCR,
+ then emits all cells as SSE events.
+ """
+ import asyncio
+
+ t0 = time.time()
+ ocr_img = create_ocr_image(dewarped_bgr)
+ img_h, img_w = dewarped_bgr.shape[:2]
+
+ _skip_types = {'column_ignore', 'header', 'footer', 'margin_top', 'margin_bottom', 'margin_left', 'margin_right'}
+ n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
+ n_cols = len([c for c in col_regions if c.type not in _skip_types])
+ col_types = {c.type for c in col_regions if c.type not in _skip_types}
+ is_vocab = bool(col_types & {'column_en', 'column_de'})
+ total_cells = n_content_rows * n_cols
+
+ # 1. Send meta event immediately
+ meta_event = {
+ "type": "meta",
+ "grid_shape": {"rows": n_content_rows, "cols": n_cols, "total_cells": total_cells},
+ "layout": "vocab" if is_vocab else "generic",
+ }
+ yield f"data: {json.dumps(meta_event)}\n\n"
+
+ # 2. Send preparing event (keepalive for proxy)
+ yield f"data: {json.dumps({'type': 'preparing', 'message': 'Cell-First OCR laeuft parallel...'})}\n\n"
+
+ # 3. Run batch OCR in thread pool with periodic keepalive events.
+ loop = asyncio.get_event_loop()
+ ocr_future = loop.run_in_executor(
+ None,
+ lambda: build_cell_grid_v2(
+ ocr_img, col_regions, row_geoms, img_w, img_h,
+ ocr_engine=engine, img_bgr=dewarped_bgr,
+ skip_heal_gaps=skip_heal_gaps,
+ ),
+ )
+
+ # Send keepalive events every 5 seconds while OCR runs
+ keepalive_count = 0
+ while not ocr_future.done():
+ try:
+ cells, columns_meta = await asyncio.wait_for(
+ asyncio.shield(ocr_future), timeout=5.0,
+ )
+ break # OCR finished
+ except asyncio.TimeoutError:
+ keepalive_count += 1
+ elapsed = int(time.time() - t0)
+ yield f"data: {json.dumps({'type': 'keepalive', 'elapsed': elapsed, 'message': f'OCR laeuft... ({elapsed}s)'})}\n\n"
+ if await request.is_disconnected():
+ logger.info(f"SSE batch: client disconnected during OCR for {session_id}")
+ ocr_future.cancel()
+ return
+ else:
+ cells, columns_meta = ocr_future.result()
+
+ if await request.is_disconnected():
+ logger.info(f"SSE batch: client disconnected after OCR for {session_id}")
+ return
+
+ # 4. Apply IPA phonetic fixes
+ fix_cell_phonetics(cells, pronunciation=pronunciation)
+
+ # 5. Send columns meta
+ if columns_meta:
+ yield f"data: {json.dumps({'type': 'columns', 'columns_used': columns_meta})}\n\n"
+
+ # 6. Stream all cells
+ for idx, cell in enumerate(cells):
+ cell_event = {
+ "type": "cell",
+ "cell": cell,
+ "progress": {"current": idx + 1, "total": len(cells)},
+ }
+ yield f"data: {json.dumps(cell_event)}\n\n"
+
+ # 7. Build final result and persist
+ duration = time.time() - t0
+ used_engine = cells[0].get("ocr_engine", "tesseract") if cells else engine
+
+ word_result = {
+ "cells": cells,
+ "grid_shape": {"rows": n_content_rows, "cols": n_cols, "total_cells": len(cells)},
+ "columns_used": columns_meta,
+ "layout": "vocab" if is_vocab else "generic",
+ "image_width": img_w,
+ "image_height": img_h,
+ "duration_seconds": round(duration, 2),
+ "ocr_engine": used_engine,
+ "summary": {
+ "total_cells": len(cells),
+ "non_empty_cells": sum(1 for c in cells if c.get("text")),
+ "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
+ },
+ }
+
+ vocab_entries = None
+ has_text_col = 'column_text' in col_types
+ if is_vocab or has_text_col:
+ entries = _cells_to_vocab_entries(cells, columns_meta)
+ entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
+ word_result["vocab_entries"] = entries
+ word_result["entries"] = entries
+ word_result["entry_count"] = len(entries)
+ word_result["summary"]["total_entries"] = len(entries)
+ word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
+ word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
+ vocab_entries = entries
+
+ await update_session_db(session_id, word_result=word_result, current_step=8)
+ cached["word_result"] = word_result
+
+ logger.info(f"OCR Pipeline SSE batch: words session {session_id}: "
+ f"layout={word_result['layout']}, {len(cells)} cells ({duration:.2f}s)")
+
+ # 8. Send complete event
+ complete_event = {
+ "type": "complete",
+ "summary": word_result["summary"],
+ "duration_seconds": round(duration, 2),
+ "ocr_engine": used_engine,
+ }
+ if vocab_entries is not None:
+ complete_event["vocab_entries"] = vocab_entries
+ yield f"data: {json.dumps(complete_event)}\n\n"
+
+
+async def _word_stream_generator(
+ session_id: str,
+ cached: Dict[str, Any],
+ col_regions: List[PageRegion],
+ row_geoms: List[RowGeometry],
+ dewarped_bgr: np.ndarray,
+ engine: str,
+ pronunciation: str,
+ request: Request,
+):
+ """SSE generator that yields cell-by-cell OCR progress."""
+ t0 = time.time()
+
+ ocr_img = create_ocr_image(dewarped_bgr)
+ img_h, img_w = dewarped_bgr.shape[:2]
+
+ n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
+ _skip_types = {'column_ignore', 'header', 'footer', 'margin_top', 'margin_bottom', 'margin_left', 'margin_right'}
+ n_cols = len([c for c in col_regions if c.type not in _skip_types])
+
+ col_types = {c.type for c in col_regions if c.type not in _skip_types}
+ is_vocab = bool(col_types & {'column_en', 'column_de'})
+
+ columns_meta = None
+ total_cells = n_content_rows * n_cols
+
+ meta_event = {
+ "type": "meta",
+ "grid_shape": {"rows": n_content_rows, "cols": n_cols, "total_cells": total_cells},
+ "layout": "vocab" if is_vocab else "generic",
+ }
+ yield f"data: {json.dumps(meta_event)}\n\n"
+
+ yield f"data: {json.dumps({'type': 'preparing', 'message': 'Cell-First OCR wird initialisiert...'})}\n\n"
+
+ all_cells: List[Dict[str, Any]] = []
+ cell_idx = 0
+ last_keepalive = time.time()
+
+ for cell, cols_meta, total in build_cell_grid_v2_streaming(
+ ocr_img, col_regions, row_geoms, img_w, img_h,
+ ocr_engine=engine, img_bgr=dewarped_bgr,
+ ):
+ if await request.is_disconnected():
+ logger.info(f"SSE: client disconnected during streaming for {session_id}")
+ return
+
+ if columns_meta is None:
+ columns_meta = cols_meta
+ meta_update = {"type": "columns", "columns_used": cols_meta}
+ yield f"data: {json.dumps(meta_update)}\n\n"
+
+ all_cells.append(cell)
+ cell_idx += 1
+
+ cell_event = {
+ "type": "cell",
+ "cell": cell,
+ "progress": {"current": cell_idx, "total": total},
+ }
+ yield f"data: {json.dumps(cell_event)}\n\n"
+
+ # All cells done
+ duration = time.time() - t0
+ if columns_meta is None:
+ columns_meta = []
+
+ # Remove all-empty rows
+ rows_with_text: set = set()
+ for c in all_cells:
+ if c.get("text", "").strip():
+ rows_with_text.add(c["row_index"])
+ before_filter = len(all_cells)
+ all_cells = [c for c in all_cells if c["row_index"] in rows_with_text]
+ empty_rows_removed = (before_filter - len(all_cells)) // max(n_cols, 1)
+ if empty_rows_removed > 0:
+ logger.info(f"SSE: removed {empty_rows_removed} all-empty rows after OCR")
+
+ used_engine = all_cells[0].get("ocr_engine", "tesseract") if all_cells else engine
+
+ fix_cell_phonetics(all_cells, pronunciation=pronunciation)
+
+ word_result = {
+ "cells": all_cells,
+ "grid_shape": {"rows": n_content_rows, "cols": n_cols, "total_cells": len(all_cells)},
+ "columns_used": columns_meta,
+ "layout": "vocab" if is_vocab else "generic",
+ "image_width": img_w,
+ "image_height": img_h,
+ "duration_seconds": round(duration, 2),
+ "ocr_engine": used_engine,
+ "summary": {
+ "total_cells": len(all_cells),
+ "non_empty_cells": sum(1 for c in all_cells if c.get("text")),
+ "low_confidence": sum(1 for c in all_cells if 0 < c.get("confidence", 0) < 50),
+ },
+ }
+
+ vocab_entries = None
+ has_text_col = 'column_text' in col_types
+ if is_vocab or has_text_col:
+ entries = _cells_to_vocab_entries(all_cells, columns_meta)
+ entries = _fix_character_confusion(entries)
+ entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
+ word_result["vocab_entries"] = entries
+ word_result["entries"] = entries
+ word_result["entry_count"] = len(entries)
+ word_result["summary"]["total_entries"] = len(entries)
+ word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
+ word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
+ vocab_entries = entries
+
+ await update_session_db(session_id, word_result=word_result, current_step=8)
+ cached["word_result"] = word_result
+
+ logger.info(f"OCR Pipeline SSE: words session {session_id}: "
+ f"layout={word_result['layout']}, "
+ f"{len(all_cells)} cells ({duration:.2f}s)")
+
+ complete_event = {
+ "type": "complete",
+ "summary": word_result["summary"],
+ "duration_seconds": round(duration, 2),
+ "ocr_engine": used_engine,
+ }
+ if vocab_entries is not None:
+ complete_event["vocab_entries"] = vocab_entries
+ yield f"data: {json.dumps(complete_event)}\n\n"
diff --git a/klausur-service/frontend/src/components/KorrekturDocumentViewer.tsx b/klausur-service/frontend/src/components/KorrekturDocumentViewer.tsx
new file mode 100644
index 0000000..f75d5a4
--- /dev/null
+++ b/klausur-service/frontend/src/components/KorrekturDocumentViewer.tsx
@@ -0,0 +1,79 @@
+/**
+ * KorrekturDocumentViewer — center panel document display.
+ *
+ * Extracted from KorrekturPage.tsx.
+ */
+
+import { StudentKlausur } from '../services/api'
+
+interface KorrekturDocumentViewerProps {
+ currentStudent: StudentKlausur | null
+}
+
+export default function KorrekturDocumentViewer({ currentStudent }: KorrekturDocumentViewerProps) {
+ return (
+
+
+
+
+ {currentStudent ? currentStudent.student_name : 'Dokument-Ansicht'}
+
+
+ {currentStudent && (
+ <>
+
+ OCR-Text
+
+
+ Original
+
+ >
+ )}
+
+
+
+
+ {!currentStudent ? (
+
+
{'\uD83D\uDCC4'}
+
Keine Arbeit ausgewaehlt
+
+ Waehlen Sie eine Schuelerarbeit aus der Liste oder laden Sie eine neue hoch
+
+
+ ) : currentStudent.file_path ? (
+
+
+ {'\uD83D\uDCC4'} {currentStudent.student_name}
+ {'\u2713'} Hochgeladen
+
+
+ {currentStudent.file_path.endsWith('.pdf') ? (
+
+ ) : (
+
+ )}
+
+
+ ) : (
+
+
{'\uD83D\uDCC4'}
+
Keine Datei vorhanden
+
+ Laden Sie eine Schuelerarbeit hoch, um mit der Korrektur zu beginnen.
+
+
+ )}
+
+
+
+ )
+}
diff --git a/klausur-service/frontend/src/components/KorrekturModals.tsx b/klausur-service/frontend/src/components/KorrekturModals.tsx
new file mode 100644
index 0000000..3c1d03f
--- /dev/null
+++ b/klausur-service/frontend/src/components/KorrekturModals.tsx
@@ -0,0 +1,197 @@
+/**
+ * KorrekturModals — upload modal and EH prompt modal.
+ *
+ * Extracted from KorrekturPage.tsx.
+ */
+
+import { useRef, useState } from 'react'
+
+// ---------------------------------------------------------------------------
+// Upload Modal
+// ---------------------------------------------------------------------------
+
+interface UploadModalProps {
+ open: boolean
+ onClose: () => void
+ studentName: string
+ onStudentNameChange: (name: string) => void
+ classStudents: Array<{ id: string; name: string }>
+ onUpload: () => void
+ uploading: boolean
+ selectedFile: File | null
+ onFileSelect: (e: React.ChangeEvent) => void
+}
+
+export function UploadModal({
+ open,
+ onClose,
+ studentName,
+ onStudentNameChange,
+ classStudents,
+ onUpload,
+ uploading,
+ selectedFile,
+ onFileSelect,
+}: UploadModalProps) {
+ const fileInputRef = useRef(null)
+ const [useStudentDropdown, setUseStudentDropdown] = useState(true)
+
+ if (!open) return null
+
+ return (
+
+
e.stopPropagation()}>
+
+
Schuelerarbeit hochladen
+
{'\u00D7'}
+
+
+
+
Schueler zuweisen
+
+ {classStudents.length > 0 && (
+
+
+ setUseStudentDropdown(e.target.checked)}
+ />
+ Aus Klassenliste waehlen
+
+
+ )}
+
+ {useStudentDropdown && classStudents.length > 0 ? (
+
onStudentNameChange(e.target.value)}
+ style={{ width: '100%' }}
+ >
+ -- Schueler waehlen --
+ {classStudents.map(s => (
+ {s.name}
+ ))}
+
+ ) : (
+
onStudentNameChange(e.target.value)}
+ />
+ )}
+
+ {classStudents.length === 0 && (
+
+ Keine Klassenliste verfuegbar. Bitte Namen manuell eingeben.
+
+ )}
+
+
+
+
Datei (PDF oder Bild)
+
fileInputRef.current?.click()}
+ >
+
+ {selectedFile ? (
+ <>
+
{'\uD83D\uDCC4'}
+
{selectedFile.name}
+ >
+ ) : (
+ <>
+
{'\uD83D\uDCC1'}
+
+ Klicken Sie hier oder ziehen Sie eine Datei hinein
+
+ >
+ )}
+
+
+
+
+
+ Abbrechen
+
+
+ {uploading ? 'Wird hochgeladen...' : 'Hochladen'}
+
+
+
+
+ )
+}
+
+// ---------------------------------------------------------------------------
+// EH Prompt Modal
+// ---------------------------------------------------------------------------
+
+interface EHPromptModalProps {
+ open: boolean
+ onUpload: () => void
+ onDismiss: () => void
+}
+
+export function EHPromptModal({ open, onUpload, onDismiss }: EHPromptModalProps) {
+ if (!open) return null
+
+ return (
+
+
e.stopPropagation()} style={{ maxWidth: 500 }}>
+
+
{'\uD83D\uDCCB'} Erwartungshorizont hochladen?
+
{'\u00D7'}
+
+
+
+ Sie haben die erste Schuelerarbeit hochgeladen. Moechten Sie jetzt einen
+ Erwartungshorizont hinzufuegen?
+
+
+
+
{'\u2713'} Vorteile:
+
+ KI-gestuetzte Korrekturvorschlaege basierend auf Ihrem EH
+ Bessere und konsistentere Bewertungen
+ Automatisch fuer alle Korrektoren verfuegbar
+ Ende-zu-Ende verschluesselt - nur Sie haben den Schluessel
+
+
+
+
+ Sie koennen den Erwartungshorizont auch spaeter hochladen.
+
+
+
+
+ Spaeter
+
+
+ Jetzt hochladen
+
+
+
+
+ )
+}
diff --git a/klausur-service/frontend/src/components/KorrekturSidebar.tsx b/klausur-service/frontend/src/components/KorrekturSidebar.tsx
new file mode 100644
index 0000000..4afde61
--- /dev/null
+++ b/klausur-service/frontend/src/components/KorrekturSidebar.tsx
@@ -0,0 +1,101 @@
+/**
+ * KorrekturSidebar — collapsible left sidebar with klausur info and student list.
+ *
+ * Extracted from KorrekturPage.tsx.
+ */
+
+import { StudentKlausur } from '../services/api'
+
+interface KorrekturSidebarProps {
+ collapsed: boolean
+ onToggle: () => void
+ klausurTitle: string
+ klausurModus: string
+ students: StudentKlausur[]
+ currentStudentId?: string
+ onSelectStudent: (id: string) => void
+ onDeleteStudent: (id: string, e: React.MouseEvent) => void
+ onUploadClick: () => void
+}
+
+export default function KorrekturSidebar({
+ collapsed,
+ onToggle,
+ klausurTitle,
+ klausurModus,
+ students,
+ currentStudentId,
+ onSelectStudent,
+ onDeleteStudent,
+ onUploadClick,
+}: KorrekturSidebarProps) {
+ return (
+
+
+ {collapsed ? '\u2192' : '\u2190'}
+
+
+ {!collapsed && (
+ <>
+
+
Klausur
+
+
{'\uD83D\uDCCB'}
+
+
{klausurTitle}
+
+ {klausurModus === 'landes_abitur' ? 'Abitur' : 'Vorabitur'} {'\u2022'} {students.length} Schueler
+
+
+
+
+
+
+
Schuelerarbeiten
+
+ {students.length === 0 ? (
+
+ Noch keine Arbeiten hochgeladen
+
+ ) : (
+ students.map((student) => (
+
onSelectStudent(student.id)}
+ >
+
{'\uD83D\uDCC4'}
+
+
{student.student_name}
+
+ {student.status === 'completed' ? `${student.grade_points} Punkte` : student.status}
+
+
+
onDeleteStudent(student.id, e)}
+ title="Loeschen"
+ >
+ {'\uD83D\uDDD1\uFE0F'}
+
+
+ ))
+ )}
+
+
+ + Arbeit hochladen
+
+
+ >
+ )}
+
+ )
+}
diff --git a/klausur-service/frontend/src/components/KorrekturWizardSteps.tsx b/klausur-service/frontend/src/components/KorrekturWizardSteps.tsx
new file mode 100644
index 0000000..359aa8c
--- /dev/null
+++ b/klausur-service/frontend/src/components/KorrekturWizardSteps.tsx
@@ -0,0 +1,285 @@
+/**
+ * KorrekturWizardSteps — right panel wizard content for korrektur, bewertung, gutachten.
+ *
+ * Extracted from KorrekturPage.tsx.
+ */
+
+import { StudentKlausur } from '../services/api'
+import { CRITERIA, GRADE_LABELS, WizardStep } from '../pages/KorrekturConstants'
+
+interface WizardStepIndicatorProps {
+ current: WizardStep
+}
+
+function WizardStepIndicator({ current }: WizardStepIndicatorProps) {
+ const steps: { key: WizardStep; label: string; number: string }[] = [
+ { key: 'korrektur', label: 'Korrektur', number: '1' },
+ { key: 'bewertung', label: 'Bewertung', number: '2' },
+ { key: 'gutachten', label: 'Gutachten', number: '3' },
+ ]
+
+ const currentIdx = steps.findIndex(s => s.key === current)
+
+ return (
+
+ {steps.map((step, idx) => {
+ const isCompleted = idx < currentIdx
+ const isActive = idx === currentIdx
+ return (
+
+ {isCompleted ? '\u2713' : step.number}
+ {step.label}
+
+ )
+ })}
+
+ )
+}
+
+interface KorrekturStepProps {
+ korrekturNotes: string
+ onNotesChange: (notes: string) => void
+ onComplete: () => void
+}
+
+export function KorrekturStep({ korrekturNotes, onNotesChange, onComplete }: KorrekturStepProps) {
+ return (
+ <>
+
+
+
+
{'\u270F\uFE0F'} Korrektur durchfuehren
+
+ Lesen Sie die Arbeit sorgfaeltig und machen Sie Anmerkungen direkt im Dokument.
+ Notieren Sie hier Ihre wichtigsten Beobachtungen.
+
+
+
+ Korrektur-Notizen
+
+
+
+
+
+ Weiter zur Bewertung {'\u2192'}
+
+
+ >
+ )
+}
+
+interface BewertungStepProps {
+ gradePoints: number
+ totalPercentage: number
+ localScores: Record
+ savingCriteria: boolean
+ allCriteriaFilled: boolean
+ onCriteriaChange: (criterion: string, value: number) => void
+ onBack: () => void
+ onComplete: () => void
+}
+
+export function BewertungStep({
+ gradePoints,
+ totalPercentage,
+ localScores,
+ savingCriteria,
+ allCriteriaFilled,
+ onCriteriaChange,
+ onBack,
+ onComplete,
+}: BewertungStepProps) {
+ return (
+ <>
+
+
+
+
{'\uD83D\uDCCA'} Gesamtnote
+
+
{gradePoints}
+
+ {GRADE_LABELS[gradePoints]} ({totalPercentage}%)
+
+
+
+
+
+
+ {'\u270F\uFE0F'} Bewertungskriterien
+ {savingCriteria && (Speichert...) }
+
+
+ {CRITERIA.map(c => (
+
+
+ {c.label} ({Math.round(c.weight * 100)}%)
+ {localScores[c.key] || 0}%
+
+
onCriteriaChange(c.key, Number(e.target.value))}
+ />
+
+ ))}
+
+
+
+
+
+ {'\u2190'} Zurueck
+
+
+ Weiter {'\u2192'}
+
+
+ {!allCriteriaFilled && (
+
+ Bitte alle Kriterien bewerten
+
+ )}
+
+ >
+ )
+}
+
+interface GutachtenStepProps {
+ gradePoints: number
+ currentStudent: StudentKlausur
+ localGutachten: { einleitung: string; hauptteil: string; fazit: string }
+ generatingGutachten: boolean
+ savingGutachten: boolean
+ finalizingStudent: boolean
+ onGutachtenChange: (field: 'einleitung' | 'hauptteil' | 'fazit', value: string) => void
+ onGenerate: () => void
+ onSave: () => void
+ onFinalize: () => void
+ onBack: () => void
+}
+
+export function GutachtenStep({
+ gradePoints,
+ currentStudent,
+ localGutachten,
+ generatingGutachten,
+ savingGutachten,
+ finalizingStudent,
+ onGutachtenChange,
+ onGenerate,
+ onSave,
+ onFinalize,
+ onBack,
+}: GutachtenStepProps) {
+ return (
+ <>
+
+
+
+
+ {'\uD83D\uDCCA'} Endergebnis: {gradePoints} Punkte ({GRADE_LABELS[gradePoints]})
+
+
+
+
+
+ {'\uD83D\uDCDD'} Gutachten
+ {savingGutachten && (Speichert...) }
+
+
+
+ {generatingGutachten ? '\u231B KI generiert...' : '\uD83E\uDD16 KI-Gutachten generieren'}
+
+
+
+ Einleitung
+
+
+ Hauptteil
+
+
+ Fazit
+
+
+
+ {savingGutachten ? '\uD83D\uDCBE Speichert...' : '\uD83D\uDCBE Gutachten speichern'}
+
+
+
+
+
+
+ {'\u2190'} Zurueck
+
+
+
+ {currentStudent.status === 'completed'
+ ? '\u2713 Abgeschlossen'
+ : finalizingStudent
+ ? 'Wird abgeschlossen...'
+ : '\u2713 Bewertung abschliessen'}
+
+
+ >
+ )
+}
diff --git a/klausur-service/frontend/src/pages/KorrekturConstants.ts b/klausur-service/frontend/src/pages/KorrekturConstants.ts
new file mode 100644
index 0000000..31eda50
--- /dev/null
+++ b/klausur-service/frontend/src/pages/KorrekturConstants.ts
@@ -0,0 +1,37 @@
+/**
+ * KorrekturPage Constants — grade tables, criteria definitions, types.
+ *
+ * Extracted from KorrekturPage.tsx.
+ */
+
+// Grade calculation
+export const GRADE_THRESHOLDS: Record = {
+ 15: 95, 14: 90, 13: 85, 12: 80, 11: 75, 10: 70,
+ 9: 65, 8: 60, 7: 55, 6: 50, 5: 45, 4: 40,
+ 3: 33, 2: 27, 1: 20, 0: 0
+}
+
+export const GRADE_LABELS: Record = {
+ 15: '1+', 14: '1', 13: '1-', 12: '2+', 11: '2', 10: '2-',
+ 9: '3+', 8: '3', 7: '3-', 6: '4+', 5: '4', 4: '4-',
+ 3: '5+', 2: '5', 1: '5-', 0: '6'
+}
+
+export const CRITERIA = [
+ { key: 'inhalt', label: 'Inhaltliche Leistung', weight: 0.40 },
+ { key: 'struktur', label: 'Aufbau & Struktur', weight: 0.15 },
+ { key: 'stil', label: 'Ausdruck & Stil', weight: 0.15 },
+ { key: 'grammatik', label: 'Grammatik', weight: 0.15 },
+ { key: 'rechtschreibung', label: 'Rechtschreibung', weight: 0.15 }
+] as const
+
+export type WizardStep = 'korrektur' | 'bewertung' | 'gutachten'
+
+export function calculateGradePoints(percentage: number): number {
+ for (const [points, threshold] of Object.entries(GRADE_THRESHOLDS).sort((a, b) => Number(b[0]) - Number(a[0]))) {
+ if (percentage >= threshold) {
+ return Number(points)
+ }
+ }
+ return 0
+}
diff --git a/klausur-service/frontend/src/pages/KorrekturPage.tsx b/klausur-service/frontend/src/pages/KorrekturPage.tsx
index e9e7b31..1614201 100644
--- a/klausur-service/frontend/src/pages/KorrekturPage.tsx
+++ b/klausur-service/frontend/src/pages/KorrekturPage.tsx
@@ -1,47 +1,29 @@
-import { useState, useEffect, useRef } from 'react'
+/**
+ * KorrekturPage — orchestrator for the Klausur correction workflow.
+ *
+ * Split into sub-components:
+ * KorrekturConstants — grades, criteria, types
+ * KorrekturSidebar — collapsible left sidebar
+ * KorrekturDocumentViewer — center document display
+ * KorrekturWizardSteps — wizard step panels (korrektur, bewertung, gutachten)
+ * KorrekturModals — upload + EH prompt modals
+ */
+
+import { useState, useEffect } from 'react'
import { useParams, useNavigate } from 'react-router-dom'
import { useKlausur } from '../hooks/useKlausur'
-import { klausurApi, uploadStudentWork, StudentKlausur, klausurEHApi, LinkedEHInfo } from '../services/api'
+import { klausurApi, uploadStudentWork, klausurEHApi, LinkedEHInfo } from '../services/api'
import EHUploadWizard from '../components/EHUploadWizard'
-
-// Grade calculation
-const GRADE_THRESHOLDS: Record = {
- 15: 95, 14: 90, 13: 85, 12: 80, 11: 75, 10: 70,
- 9: 65, 8: 60, 7: 55, 6: 50, 5: 45, 4: 40,
- 3: 33, 2: 27, 1: 20, 0: 0
-}
-
-const GRADE_LABELS: Record = {
- 15: '1+', 14: '1', 13: '1-', 12: '2+', 11: '2', 10: '2-',
- 9: '3+', 8: '3', 7: '3-', 6: '4+', 5: '4', 4: '4-',
- 3: '5+', 2: '5', 1: '5-', 0: '6'
-}
-
-const CRITERIA = [
- { key: 'inhalt', label: 'Inhaltliche Leistung', weight: 0.40 },
- { key: 'struktur', label: 'Aufbau & Struktur', weight: 0.15 },
- { key: 'stil', label: 'Ausdruck & Stil', weight: 0.15 },
- { key: 'grammatik', label: 'Grammatik', weight: 0.15 },
- { key: 'rechtschreibung', label: 'Rechtschreibung', weight: 0.15 }
-]
-
-// Wizard steps
-type WizardStep = 'korrektur' | 'bewertung' | 'gutachten'
-
-function calculateGradePoints(percentage: number): number {
- for (const [points, threshold] of Object.entries(GRADE_THRESHOLDS).sort((a, b) => Number(b[0]) - Number(a[0]))) {
- if (percentage >= threshold) {
- return Number(points)
- }
- }
- return 0
-}
+import KorrekturSidebar from '../components/KorrekturSidebar'
+import KorrekturDocumentViewer from '../components/KorrekturDocumentViewer'
+import { KorrekturStep, BewertungStep, GutachtenStep } from '../components/KorrekturWizardSteps'
+import { UploadModal, EHPromptModal } from '../components/KorrekturModals'
+import { CRITERIA, WizardStep, calculateGradePoints } from './KorrekturConstants'
export default function KorrekturPage() {
const { klausurId } = useParams<{ klausurId: string }>()
const navigate = useNavigate()
const { currentKlausur, currentStudent, selectKlausur, selectStudent, refreshAndSelectStudent, loading, error } = useKlausur()
- const fileInputRef = useRef(null)
// Wizard state
const [wizardStep, setWizardStep] = useState('korrektur')
@@ -53,7 +35,6 @@ export default function KorrekturPage() {
const [studentName, setStudentName] = useState('')
const [selectedFile, setSelectedFile] = useState(null)
const [classStudents, setClassStudents] = useState>([])
- const [useStudentDropdown, setUseStudentDropdown] = useState(true)
// Korrektur state (Step 1)
const [korrekturNotes, setKorrekturNotes] = useState('')
@@ -64,317 +45,137 @@ export default function KorrekturPage() {
// Gutachten state (Step 3)
const [generatingGutachten, setGeneratingGutachten] = useState(false)
- const [localGutachten, setLocalGutachten] = useState<{
- einleitung: string
- hauptteil: string
- fazit: string
- }>({ einleitung: '', hauptteil: '', fazit: '' })
+ const [localGutachten, setLocalGutachten] = useState({ einleitung: '', hauptteil: '', fazit: '' })
const [savingGutachten, setSavingGutachten] = useState(false)
const [finalizingStudent, setFinalizingStudent] = useState(false)
- // BYOEH state - Erwartungshorizont integration
+ // BYOEH state
const [showEHPrompt, setShowEHPrompt] = useState(false)
const [showEHWizard, setShowEHWizard] = useState(false)
const [linkedEHs, setLinkedEHs] = useState([])
const [ehPromptDismissed, setEhPromptDismissed] = useState(false)
- const [_loadingEHs, setLoadingEHs] = useState(false)
// Load klausur on mount
- useEffect(() => {
- if (klausurId) {
- selectKlausur(klausurId)
- }
- }, [klausurId, selectKlausur])
+ useEffect(() => { if (klausurId) selectKlausur(klausurId) }, [klausurId, selectKlausur])
// Load class students when upload modal opens
useEffect(() => {
if (uploadModalOpen && currentKlausur?.class_id) {
- loadClassStudents(currentKlausur.class_id)
+ fetch(`/api/school/classes/${currentKlausur.class_id}/students`)
+ .then(r => r.ok ? r.json() : [])
+ .then(setClassStudents)
+ .catch(() => {})
}
}, [uploadModalOpen, currentKlausur?.class_id])
- const loadClassStudents = async (classId: string) => {
- try {
- const resp = await fetch(`/api/school/classes/${classId}/students`)
- if (resp.ok) {
- const data = await resp.json()
- setClassStudents(data)
- }
- } catch (e) {
- console.error('Failed to load class students:', e)
- }
- }
-
- // Load linked Erwartungshorizonte for this Klausur
+ // Load linked EHs
const loadLinkedEHs = async () => {
if (!klausurId) return
- setLoadingEHs(true)
- try {
- const ehs = await klausurEHApi.getLinkedEH(klausurId)
- setLinkedEHs(ehs)
- } catch (e) {
- console.error('Failed to load linked EHs:', e)
- } finally {
- setLoadingEHs(false)
- }
+ try { setLinkedEHs(await klausurEHApi.getLinkedEH(klausurId)) } catch { /* ignore */ }
}
+ useEffect(() => { if (klausurId) loadLinkedEHs() }, [klausurId])
- // Load linked EHs when klausur changes
+ // Show EH prompt after first student upload
useEffect(() => {
- if (klausurId) {
- loadLinkedEHs()
- }
- }, [klausurId])
-
- // Show EH prompt after first student upload (if no EH is linked yet)
- useEffect(() => {
- // After upload is complete and modal is closed
- if (
- currentKlausur &&
- currentKlausur.students.length === 1 &&
- linkedEHs.length === 0 &&
- !ehPromptDismissed &&
- !uploadModalOpen &&
- !showEHWizard
- ) {
- // Check localStorage to see if prompt was already shown for this klausur
- const dismissedKey = `eh_prompt_dismissed_${klausurId}`
- if (!localStorage.getItem(dismissedKey)) {
- setShowEHPrompt(true)
- }
+ if (currentKlausur && currentKlausur.students.length === 1 && linkedEHs.length === 0
+ && !ehPromptDismissed && !uploadModalOpen && !showEHWizard) {
+ if (!localStorage.getItem(`eh_prompt_dismissed_${klausurId}`)) setShowEHPrompt(true)
}
}, [currentKlausur?.students.length, linkedEHs.length, uploadModalOpen, showEHWizard, ehPromptDismissed])
- // Handle EH prompt responses
- const handleEHPromptUpload = () => {
- setShowEHPrompt(false)
- setShowEHWizard(true)
- }
-
- const handleEHPromptDismiss = () => {
- setShowEHPrompt(false)
- setEhPromptDismissed(true)
- if (klausurId) {
- localStorage.setItem(`eh_prompt_dismissed_${klausurId}`, 'true')
- }
- }
-
- // Handle EH wizard completion
- const handleEHWizardComplete = async () => {
- setShowEHWizard(false)
- // Reload linked EHs
- await loadLinkedEHs()
- }
-
// Sync local state with current student
useEffect(() => {
if (currentStudent) {
const scores: Record = {}
- for (const c of CRITERIA) {
- scores[c.key] = currentStudent.criteria_scores?.[c.key]?.score ?? 0
- }
+ for (const c of CRITERIA) scores[c.key] = currentStudent.criteria_scores?.[c.key]?.score ?? 0
setLocalScores(scores)
-
setLocalGutachten({
einleitung: currentStudent.gutachten?.einleitung || '',
hauptteil: currentStudent.gutachten?.hauptteil || '',
fazit: currentStudent.gutachten?.fazit || ''
})
-
- // Reset wizard to first step when selecting new student
setWizardStep('korrektur')
setKorrekturNotes('')
}
}, [currentStudent?.id])
- const handleFileSelect = (e: React.ChangeEvent) => {
- if (e.target.files?.[0]) {
- setSelectedFile(e.target.files[0])
- }
- }
-
+ // --- Handlers ---
const handleUpload = async () => {
if (!klausurId || !studentName || !selectedFile) return
-
setUploading(true)
try {
const newStudent = await uploadStudentWork(klausurId, studentName, selectedFile)
- // Refresh klausur and auto-select the newly uploaded student
await refreshAndSelectStudent(klausurId, newStudent.id)
- setUploadModalOpen(false)
- setStudentName('')
- setSelectedFile(null)
- } catch (e) {
- console.error('Upload failed:', e)
- alert('Fehler beim Hochladen')
- } finally {
- setUploading(false)
- }
+ setUploadModalOpen(false); setStudentName(''); setSelectedFile(null)
+ } catch { alert('Fehler beim Hochladen') }
+ finally { setUploading(false) }
}
const handleDeleteStudent = async (studentId: string, e: React.MouseEvent) => {
e.stopPropagation()
if (!confirm('Schuelerarbeit wirklich loeschen?')) return
-
- try {
- await klausurApi.deleteStudent(studentId)
- if (klausurId) {
- await selectKlausur(klausurId, true)
- }
- } catch (e) {
- console.error('Failed to delete student:', e)
- }
+ try { await klausurApi.deleteStudent(studentId); if (klausurId) await selectKlausur(klausurId, true) } catch { /* ignore */ }
}
- // Step 1: Complete Korrektur and go to Bewertung
- const handleKorrekturComplete = () => {
- setWizardStep('bewertung')
- }
-
- // Step 2: Save criteria scores
const handleCriteriaChange = async (criterion: string, value: number) => {
setLocalScores(prev => ({ ...prev, [criterion]: value }))
-
if (!currentStudent) return
-
setSavingCriteria(true)
- try {
- await klausurApi.updateCriteria(currentStudent.id, criterion, value)
- if (klausurId) {
- await selectKlausur(klausurId, true)
- }
- } catch (e) {
- console.error('Failed to update criteria:', e)
- } finally {
- setSavingCriteria(false)
- }
+ try { await klausurApi.updateCriteria(currentStudent.id, criterion, value); if (klausurId) await selectKlausur(klausurId, true) } catch { /* ignore */ }
+ finally { setSavingCriteria(false) }
}
- // Check if all criteria are filled
const allCriteriaFilled = CRITERIA.every(c => (localScores[c.key] || 0) > 0)
- // Step 2: Complete Bewertung and go to Gutachten
- const handleBewertungComplete = () => {
- if (!allCriteriaFilled) {
- alert('Bitte alle Bewertungskriterien ausfuellen')
- return
- }
- setWizardStep('gutachten')
- }
-
- // Step 3: Generate Gutachten
const handleGenerateGutachten = async () => {
if (!currentStudent) return
-
setGeneratingGutachten(true)
try {
- const generated = await klausurApi.generateGutachten(currentStudent.id, {
- include_strengths: true,
- include_weaknesses: true,
- tone: 'formal'
- })
- setLocalGutachten({
- einleitung: generated.einleitung,
- hauptteil: generated.hauptteil,
- fazit: generated.fazit
- })
- } catch (e) {
- console.error('Failed to generate gutachten:', e)
- alert('Fehler bei der KI-Generierung')
- } finally {
- setGeneratingGutachten(false)
- }
+ const g = await klausurApi.generateGutachten(currentStudent.id, { include_strengths: true, include_weaknesses: true, tone: 'formal' })
+ setLocalGutachten({ einleitung: g.einleitung, hauptteil: g.hauptteil, fazit: g.fazit })
+ } catch { alert('Fehler bei der KI-Generierung') }
+ finally { setGeneratingGutachten(false) }
}
- // Step 3: Save Gutachten
const handleSaveGutachten = async () => {
if (!currentStudent) return
-
setSavingGutachten(true)
- try {
- await klausurApi.updateGutachten(currentStudent.id, {
- einleitung: localGutachten.einleitung,
- hauptteil: localGutachten.hauptteil,
- fazit: localGutachten.fazit
- })
- if (klausurId) {
- await selectKlausur(klausurId, true)
- }
- } catch (e) {
- console.error('Failed to save gutachten:', e)
- alert('Fehler beim Speichern des Gutachtens')
- } finally {
- setSavingGutachten(false)
- }
+ try { await klausurApi.updateGutachten(currentStudent.id, localGutachten); if (klausurId) await selectKlausur(klausurId, true) } catch { alert('Fehler beim Speichern') }
+ finally { setSavingGutachten(false) }
}
- // Finalize the correction
const handleFinalizeStudent = async () => {
- if (!currentStudent) return
- if (!confirm('Bewertung wirklich abschliessen? Dies kann nicht rueckgaengig gemacht werden.')) return
-
+ if (!currentStudent || !confirm('Bewertung wirklich abschliessen?')) return
setFinalizingStudent(true)
- try {
- await klausurApi.finalizeStudent(currentStudent.id)
- if (klausurId) {
- await selectKlausur(klausurId, true)
- }
- } catch (e) {
- console.error('Failed to finalize:', e)
- alert('Fehler beim Abschliessen der Bewertung')
- } finally {
- setFinalizingStudent(false)
- }
+ try { await klausurApi.finalizeStudent(currentStudent.id); if (klausurId) await selectKlausur(klausurId, true) } catch { alert('Fehler beim Abschliessen') }
+ finally { setFinalizingStudent(false) }
}
const calculateTotalPercentage = (): number => {
let total = 0
- for (const c of CRITERIA) {
- total += (localScores[c.key] || 0) * c.weight
- }
+ for (const c of CRITERIA) total += (localScores[c.key] || 0) * c.weight
return Math.round(total)
}
- if (loading && !currentKlausur) {
- return (
-
-
-
Klausur wird geladen...
-
- )
+ const handleEHPromptDismiss = () => {
+ setShowEHPrompt(false); setEhPromptDismissed(true)
+ if (klausurId) localStorage.setItem(`eh_prompt_dismissed_${klausurId}`, 'true')
}
- if (error) {
- return (
-
-
Fehler: {error}
-
navigate('/')}>
- Zurueck zur Startseite
-
-
- )
- }
-
- if (!currentKlausur) {
- return (
-
-
Klausur nicht gefunden
-
navigate('/')}>
- Zurueck zur Startseite
-
-
- )
- }
+ // --- Loading/Error states ---
+ if (loading && !currentKlausur) return
+ if (error) return Fehler: {error}
navigate('/')}>Zurueck zur Startseite
+ if (!currentKlausur) return Klausur nicht gefunden
navigate('/')}>Zurueck zur Startseite
const totalPercentage = calculateTotalPercentage()
const gradePoints = calculateGradePoints(totalPercentage)
- // Render right panel content based on wizard step
+ // --- Wizard content ---
const renderWizardContent = () => {
if (!currentStudent) {
return (
-
📋
+
{'\uD83D\uDCCB'}
Waehlen Sie eine Schuelerarbeit aus, um die Bewertung zu beginnen
@@ -384,568 +185,68 @@ export default function KorrekturPage() {
switch (wizardStep) {
case 'korrektur':
- return (
- <>
- {/* Step indicator */}
-
-
- 1
- Korrektur
-
-
- 2
- Bewertung
-
-
- 3
- Gutachten
-
-
-
-
-
✏️ Korrektur durchfuehren
-
- Lesen Sie die Arbeit sorgfaeltig und machen Sie Anmerkungen direkt im Dokument.
- Notieren Sie hier Ihre wichtigsten Beobachtungen.
-
-
-
- Korrektur-Notizen
-
-
-
-
-
- Weiter zur Bewertung →
-
-
- >
- )
-
+ return
setWizardStep('bewertung')} />
case 'bewertung':
- return (
- <>
- {/* Step indicator */}
-
-
- ✓
- Korrektur
-
-
- 2
- Bewertung
-
-
- 3
- Gutachten
-
-
-
-
-
- 📊 Gesamtnote
-
-
-
{gradePoints}
-
- {GRADE_LABELS[gradePoints]} ({totalPercentage}%)
-
-
-
-
-
-
- ✏️ Bewertungskriterien
- {savingCriteria && (Speichert...) }
-
-
- {CRITERIA.map(c => (
-
-
- {c.label} ({Math.round(c.weight * 100)}%)
- {localScores[c.key] || 0}%
-
-
handleCriteriaChange(c.key, Number(e.target.value))}
- />
-
- ))}
-
-
-
-
- setWizardStep('korrektur')}
- >
- ← Zurueck
-
-
- Weiter →
-
-
- {!allCriteriaFilled && (
-
- Bitte alle Kriterien bewerten
-
- )}
-
- >
- )
-
+ return setWizardStep('korrektur')} onComplete={() => { if (!allCriteriaFilled) { alert('Bitte alle Bewertungskriterien ausfuellen'); return }; setWizardStep('gutachten') }} />
case 'gutachten':
- return (
- <>
- {/* Step indicator */}
-
-
- ✓
- Korrektur
-
-
- ✓
- Bewertung
-
-
- 3
- Gutachten
-
-
-
-
-
- 📊 Endergebnis: {gradePoints} Punkte ({GRADE_LABELS[gradePoints]})
-
-
-
-
-
- 📝 Gutachten
- {savingGutachten && (Speichert...) }
-
-
-
- {generatingGutachten ? '⏳ KI generiert...' : '🤖 KI-Gutachten generieren'}
-
-
-
- Einleitung
-
-
- Hauptteil
-
-
- Fazit
-
-
-
- {savingGutachten ? '💾 Speichert...' : '💾 Gutachten speichern'}
-
-
-
-
-
- setWizardStep('bewertung')}
- >
- ← Zurueck
-
-
-
- {currentStudent.status === 'completed'
- ? '✓ Abgeschlossen'
- : finalizingStudent
- ? 'Wird abgeschlossen...'
- : '✓ Bewertung abschliessen'}
-
-
- >
- )
+ return setLocalGutachten(prev => ({ ...prev, [f]: v }))} onGenerate={handleGenerateGutachten} onSave={handleSaveGutachten} onFinalize={handleFinalizeStudent} onBack={() => setWizardStep('bewertung')} />
}
}
return (
- {/* Collapsible Left Sidebar */}
-
-
setSidebarCollapsed(!sidebarCollapsed)}
- title={sidebarCollapsed ? 'Sidebar einblenden' : 'Sidebar ausblenden'}
- >
- {sidebarCollapsed ? '→' : '←'}
-
+
setSidebarCollapsed(!sidebarCollapsed)}
+ klausurTitle={currentKlausur.title}
+ klausurModus={currentKlausur.modus}
+ students={currentKlausur.students}
+ currentStudentId={currentStudent?.id}
+ onSelectStudent={selectStudent}
+ onDeleteStudent={handleDeleteStudent}
+ onUploadClick={() => setUploadModalOpen(true)}
+ />
- {!sidebarCollapsed && (
- <>
-
-
Klausur
-
-
📋
-
-
{currentKlausur.title}
-
- {currentKlausur.modus === 'landes_abitur' ? 'Abitur' : 'Vorabitur'} • {currentKlausur.students.length} Schueler
-
-
-
-
+
-
-
Schuelerarbeiten
-
- {currentKlausur.students.length === 0 ? (
-
- Noch keine Arbeiten hochgeladen
-
- ) : (
- currentKlausur.students.map((student: StudentKlausur) => (
-
selectStudent(student.id)}
- >
-
📄
-
-
{student.student_name}
-
- {student.status === 'completed' ? `${student.grade_points} Punkte` : student.status}
-
-
-
handleDeleteStudent(student.id, e)}
- title="Loeschen"
- >
- 🗑️
-
-
- ))
- )}
-
-
setUploadModalOpen(true)}
- >
- + Arbeit hochladen
-
-
- >
- )}
-
-
- {/* Center - Document Viewer (2/3) */}
-
-
-
-
- {currentStudent ? currentStudent.student_name : 'Dokument-Ansicht'}
-
-
- {currentStudent && (
- <>
-
- OCR-Text
-
-
- Original
-
- >
- )}
-
-
-
-
- {!currentStudent ? (
-
-
📄
-
Keine Arbeit ausgewaehlt
-
- Waehlen Sie eine Schuelerarbeit aus der Liste oder laden Sie eine neue hoch
-
-
- ) : currentStudent.file_path ? (
-
-
- 📄 {currentStudent.student_name}
- ✓ Hochgeladen
-
-
- {currentStudent.file_path.endsWith('.pdf') ? (
-
- ) : (
-
- )}
-
-
- ) : (
-
-
📄
-
Keine Datei vorhanden
-
- Laden Sie eine Schuelerarbeit hoch, um mit der Korrektur zu beginnen.
-
-
- )}
-
-
-
-
- {/* Right Panel - Wizard (1/3) */}
{renderWizardContent()}
- {/* EH Info in Sidebar - Show linked Erwartungshorizonte */}
{linkedEHs.length > 0 && (
setShowEHWizard(true)}>
- 📋
+ {'\uD83D\uDCCB'}
{linkedEHs.length} Erwartungshorizont{linkedEHs.length > 1 ? 'e' : ''} verknuepft
)}
- {/* Upload Modal */}
- {uploadModalOpen && (
-
setUploadModalOpen(false)}>
-
e.stopPropagation()}>
-
-
Schuelerarbeit hochladen
-
setUploadModalOpen(false)}>×
-
-
-
-
-
-
Datei (PDF oder Bild)
-
fileInputRef.current?.click()}
- >
-
- {selectedFile ? (
- <>
-
📄
-
{selectedFile.name}
- >
- ) : (
- <>
-
📁
-
- Klicken Sie hier oder ziehen Sie eine Datei hinein
-
- >
- )}
-
-
-
-
- setUploadModalOpen(false)}
- >
- Abbrechen
-
-
- {uploading ? 'Wird hochgeladen...' : 'Hochladen'}
-
-
-
-
- )}
-
- {/* EH Prompt Modal - Shown after first student upload */}
- {showEHPrompt && (
-
-
e.stopPropagation()} style={{ maxWidth: 500 }}>
-
-
📋 Erwartungshorizont hochladen?
-
×
-
-
-
- Sie haben die erste Schuelerarbeit hochgeladen. Moechten Sie jetzt einen
- Erwartungshorizont hinzufuegen?
-
-
-
-
✓ Vorteile:
-
- KI-gestuetzte Korrekturvorschlaege basierend auf Ihrem EH
- Bessere und konsistentere Bewertungen
- Automatisch fuer alle Korrektoren verfuegbar
- Ende-zu-Ende verschluesselt - nur Sie haben den Schluessel
-
-
-
-
- Sie koennen den Erwartungshorizont auch spaeter hochladen.
-
-
-
-
- Spaeter
-
-
- Jetzt hochladen
-
-
-
-
- )}
-
- {/* EH Upload Wizard */}
{showEHWizard && currentKlausur && (
setShowEHWizard(false)}
- onComplete={handleEHWizardComplete}
+ onComplete={async () => { setShowEHWizard(false); await loadLinkedEHs() }}
defaultSubject={currentKlausur.subject}
defaultYear={currentKlausur.year}
klausurId={klausurId}
diff --git a/studio-v2/app/_components/BackgroundBlobs.tsx b/studio-v2/app/_components/BackgroundBlobs.tsx
new file mode 100644
index 0000000..e119da4
--- /dev/null
+++ b/studio-v2/app/_components/BackgroundBlobs.tsx
@@ -0,0 +1,41 @@
+'use client'
+
+interface BackgroundBlobsProps {
+ isDark: boolean
+}
+
+export function BackgroundBlobs({ isDark }: BackgroundBlobsProps) {
+ return (
+ <>
+
+
+
+ >
+ )
+}
diff --git a/studio-v2/app/_components/DashboardContent.tsx b/studio-v2/app/_components/DashboardContent.tsx
new file mode 100644
index 0000000..9098a7c
--- /dev/null
+++ b/studio-v2/app/_components/DashboardContent.tsx
@@ -0,0 +1,385 @@
+'use client'
+
+import { useRouter } from 'next/navigation'
+import { useLanguage } from '@/lib/LanguageContext'
+import { useTheme } from '@/lib/ThemeContext'
+import { useAlerts } from '@/lib/AlertsContext'
+import { useMessages, formatMessageTime, getContactInitials } from '@/lib/MessagesContext'
+import { useActivity, formatDurationCompact } from '@/lib/ActivityContext'
+
+interface StatsItem {
+ labelKey: string
+ value: string
+ icon: string
+ color: string
+}
+
+interface RecentKlausur {
+ id: number
+ title: string
+ students: number
+ completed: number
+ statusKey: string
+}
+
+interface DashboardContentProps {
+ documents: { id: string }[]
+ setShowUploadModal: (show: boolean) => void
+ setSelectedTab: (tab: string) => void
+}
+
+export function DashboardContent({ documents, setShowUploadModal, setSelectedTab }: DashboardContentProps) {
+ const router = useRouter()
+ const { t } = useLanguage()
+ const { isDark } = useTheme()
+ const { alerts, unreadCount: alertsUnreadCount, markAsRead } = useAlerts()
+ const { conversations, unreadCount: messagesUnreadCount, contacts, markAsRead: markMessageAsRead } = useMessages()
+ const { stats: activityStats } = useActivity()
+
+ const timeSaved = formatDurationCompact(activityStats.weekSavedSeconds)
+ const timeSavedDisplay = activityStats.weekSavedSeconds > 0
+ ? `${timeSaved.value}${timeSaved.unit}`
+ : '0min'
+
+ const stats: StatsItem[] = [
+ { labelKey: 'stat_open_corrections', value: '12', icon: '📋', color: 'from-blue-400 to-blue-600' },
+ { labelKey: 'stat_completed_week', value: String(activityStats.activityCount), icon: '✅', color: 'from-green-400 to-green-600' },
+ { labelKey: 'stat_average', value: '2.3', icon: '📈', color: 'from-purple-400 to-purple-600' },
+ { labelKey: 'stat_time_saved', value: timeSavedDisplay, icon: '⏱', color: 'from-orange-400 to-orange-600' },
+ ]
+
+ const recentKlausuren: RecentKlausur[] = [
+ { id: 1, title: 'Deutsch LK - Textanalyse', students: 24, completed: 18, statusKey: 'status_in_progress' },
+ { id: 2, title: 'Deutsch GK - Erörterung', students: 28, completed: 28, statusKey: 'status_completed' },
+ { id: 3, title: 'Vorabitur - Gedichtanalyse', students: 22, completed: 10, statusKey: 'status_in_progress' },
+ ]
+
+ return (
+ <>
+ {/* Stats Kacheln */}
+
+ {stats.map((stat, index) => (
+
+
+
+ {stat.icon}
+
+
+
+
+
+
{t(stat.labelKey)}
+
{stat.value}
+
+ ))}
+
+
+ {/* Dashboard Grid */}
+
+ {/* Aktuelle Klausuren Kachel */}
+
+
+
{t('recent_klausuren')}
+
+ {t('show_all')} →
+
+
+
+
+ {recentKlausuren.map((klausur) => (
+
+
+ 📝
+
+
+
{klausur.title}
+
{klausur.students} {t('students')}
+
+
+
+ {t(klausur.statusKey)}
+
+
+
+
{klausur.completed}/{klausur.students}
+
+
+
+
+
+
+ ))}
+
+
+
+ {/* Schnellaktionen Kachel */}
+
+
{t('quick_actions')}
+
+
+
+ ➕
+ {t('create_klausur')}
+
+
+
setShowUploadModal(true)}
+ className={`w-full flex items-center gap-4 p-4 rounded-2xl transition-all ${
+ isDark
+ ? 'bg-white/10 text-white hover:bg-white/20'
+ : 'bg-slate-100 text-slate-800 hover:bg-slate-200'
+ }`}>
+ 📤
+ {t('upload_work')}
+
+
+
setSelectedTab('dokumente')}
+ className={`w-full flex items-center justify-between p-4 rounded-2xl transition-all ${
+ isDark
+ ? 'bg-white/10 text-white hover:bg-white/20'
+ : 'bg-slate-100 text-slate-800 hover:bg-slate-200'
+ }`}>
+
+ 📁
+ {t('nav_dokumente')}
+
+ {documents.length > 0 && (
+
+ {documents.length}
+
+ )}
+
+
+
router.push('/worksheet-editor')}
+ className={`w-full flex items-center gap-4 p-4 rounded-2xl transition-all ${
+ isDark
+ ? 'bg-gradient-to-r from-purple-500/20 to-pink-500/20 text-white hover:from-purple-500/30 hover:to-pink-500/30 border border-purple-500/30'
+ : 'bg-gradient-to-r from-purple-50 to-pink-50 text-slate-800 hover:from-purple-100 hover:to-pink-100 border border-purple-200'
+ }`}>
+ 🎨
+ {t('nav_worksheet_editor')}
+
+
+
+ ✨
+ {t('magic_help')}
+
+
+
+ 📊
+ {t('fairness_check')}
+
+
+
+ {/* AI Insight mini */}
+
+
+ 🤖
+ {t('ai_tip')}
+
+
+ {t('ai_tip_text')}
+
+
+
+ {/* Alerts Kachel */}
+
+
+
+ 🔔 Aktuelle Alerts
+
+ {alertsUnreadCount > 0 && (
+
+ {alertsUnreadCount} neu
+
+ )}
+
+
+
+ {alerts.slice(0, 3).map(alert => (
+
{
+ markAsRead(alert.id)
+ router.push('/alerts')
+ }}
+ className={`w-full text-left p-2 rounded-lg transition-all text-sm ${
+ isDark
+ ? `hover:bg-white/10 ${!alert.isRead ? 'bg-white/5' : ''}`
+ : `hover:bg-white ${!alert.isRead ? 'bg-white/50' : ''}`
+ }`}
+ >
+
+ {!alert.isRead && (
+
+ )}
+
+ {alert.title}
+
+
+
+ ))}
+ {alerts.length === 0 && (
+
+ Keine Alerts vorhanden
+
+ )}
+
+
+
router.push('/alerts')}
+ className={`w-full mt-3 text-sm font-medium ${
+ isDark ? 'text-amber-400 hover:text-amber-300' : 'text-amber-600 hover:text-amber-700'
+ }`}
+ >
+ Alle Alerts anzeigen →
+
+
+
+ {/* Nachrichten Kachel */}
+
+
+
+ 💬 {t('nav_messages')}
+
+ {messagesUnreadCount > 0 && (
+
+ {messagesUnreadCount} neu
+
+ )}
+
+
+
+ {conversations.slice(0, 3).map(conv => {
+ const contact = contacts.find(c => conv.participant_ids.includes(c.id))
+ return (
+
{
+ if (conv.unread_count > 0) {
+ markMessageAsRead(conv.id)
+ }
+ router.push('/messages')
+ }}
+ className={`w-full text-left p-2 rounded-lg transition-all text-sm ${
+ isDark
+ ? `hover:bg-white/10 ${conv.unread_count > 0 ? 'bg-white/5' : ''}`
+ : `hover:bg-white ${conv.unread_count > 0 ? 'bg-white/50' : ''}`
+ }`}
+ >
+
+
+ {conv.title ? getContactInitials(conv.title) : '?'}
+
+
+
+ {conv.unread_count > 0 && (
+
+ )}
+
+ {conv.title || 'Unbenannt'}
+
+
+ {conv.last_message && (
+
+ {conv.last_message}
+
+ )}
+
+ {conv.last_message_time && (
+
+ {formatMessageTime(conv.last_message_time)}
+
+ )}
+
+
+ )
+ })}
+ {conversations.length === 0 && (
+
+ Keine Nachrichten vorhanden
+
+ )}
+
+
+
router.push('/messages')}
+ className={`w-full mt-3 text-sm font-medium ${
+ isDark ? 'text-green-400 hover:text-green-300' : 'text-green-600 hover:text-green-700'
+ }`}
+ >
+ Alle Nachrichten anzeigen →
+
+
+
+
+ >
+ )
+}
diff --git a/studio-v2/app/_components/DocumentsTab.tsx b/studio-v2/app/_components/DocumentsTab.tsx
new file mode 100644
index 0000000..8d2d011
--- /dev/null
+++ b/studio-v2/app/_components/DocumentsTab.tsx
@@ -0,0 +1,91 @@
+'use client'
+
+import { useTheme } from '@/lib/ThemeContext'
+import { DocumentSpace } from '@/components/DocumentSpace'
+
+interface StoredDocument {
+ id: string
+ name: string
+ type: string
+ size: number
+ uploadedAt: Date
+ url?: string
+}
+
+interface DocumentsTabProps {
+ documents: StoredDocument[]
+ onDelete: (id: string) => void
+ onRename: (id: string, newName: string) => void
+ setShowUploadModal: (show: boolean) => void
+ setShowQRModal: (show: boolean) => void
+}
+
+export function DocumentsTab({ documents, onDelete, onRename, setShowUploadModal, setShowQRModal }: DocumentsTabProps) {
+ const { isDark } = useTheme()
+
+ return (
+
+ {/* Upload-Optionen */}
+
+
setShowUploadModal(true)}
+ className={`p-6 rounded-3xl border backdrop-blur-xl text-left transition-all hover:scale-105 ${
+ isDark
+ ? 'bg-white/10 border-white/20 hover:bg-white/15'
+ : 'bg-white/70 border-black/10 hover:bg-white/90 shadow-lg'
+ }`}
+ >
+
+ 📤
+
+
+ Direkt hochladen
+
+
+ Ziehen Sie Dateien hierher oder klicken Sie zum Auswaehlen
+
+
+
+
setShowQRModal(true)}
+ className={`p-6 rounded-3xl border backdrop-blur-xl text-left transition-all hover:scale-105 ${
+ isDark
+ ? 'bg-white/10 border-white/20 hover:bg-white/15'
+ : 'bg-white/70 border-black/10 hover:bg-white/90 shadow-lg'
+ }`}
+ >
+
+ 📱
+
+
+ Mit Mobiltelefon hochladen
+
+
+ QR-Code scannen (nur im lokalen Netzwerk)
+
+
+
+
+ {/* Document Space */}
+
+
+ Meine Dokumente
+
+ doc.url && window.open(doc.url, '_blank')}
+ />
+
+
+ )
+}
diff --git a/studio-v2/app/_components/HeaderBar.tsx b/studio-v2/app/_components/HeaderBar.tsx
new file mode 100644
index 0000000..109298b
--- /dev/null
+++ b/studio-v2/app/_components/HeaderBar.tsx
@@ -0,0 +1,150 @@
+'use client'
+
+import { useRouter } from 'next/navigation'
+import { useLanguage } from '@/lib/LanguageContext'
+import { useTheme } from '@/lib/ThemeContext'
+import { useAlerts, getImportanceColor, getRelativeTime } from '@/lib/AlertsContext'
+import { LanguageDropdown } from '@/components/LanguageDropdown'
+import { ThemeToggle } from '@/components/ThemeToggle'
+
+interface HeaderBarProps {
+ showAlertsDropdown: boolean
+ setShowAlertsDropdown: (show: boolean) => void
+}
+
+export function HeaderBar({ showAlertsDropdown, setShowAlertsDropdown }: HeaderBarProps) {
+ const router = useRouter()
+ const { t } = useLanguage()
+ const { isDark } = useTheme()
+ const { alerts, unreadCount, markAsRead } = useAlerts()
+
+ return (
+
+
+
{t('dashboard')}
+
{t('dashboard_subtitle')}
+
+
+ {/* Search, Language & Actions */}
+
+
+
+
+
+
+ {/* Notifications Bell with Glow Effect */}
+
+
setShowAlertsDropdown(!showAlertsDropdown)}
+ className={`relative p-3 backdrop-blur-xl border rounded-2xl transition-all ${
+ unreadCount > 0
+ ? 'animate-pulse bg-gradient-to-r from-amber-500/20 to-orange-500/20 border-amber-500/30 shadow-lg shadow-amber-500/30'
+ : isDark
+ ? 'bg-white/10 border-white/20 hover:bg-white/20'
+ : 'bg-black/5 border-black/10 hover:bg-black/10'
+ } ${isDark ? 'text-white' : 'text-slate-700'}`}
+ >
+
+
+
+ {unreadCount > 0 && (
+
+ {unreadCount > 9 ? '9+' : unreadCount}
+
+ )}
+
+
+ {/* Alerts Dropdown */}
+ {showAlertsDropdown && (
+ <>
+
setShowAlertsDropdown(false)} />
+
+
+
+
+ Aktuelle Alerts
+
+ {unreadCount > 0 && (
+
+ {unreadCount} neu
+
+ )}
+
+
+
+ {alerts.slice(0, 5).map(alert => (
+
{
+ markAsRead(alert.id)
+ setShowAlertsDropdown(false)
+ router.push('/alerts')
+ }}
+ className={`w-full text-left p-4 transition-all ${
+ isDark
+ ? `hover:bg-white/5 ${!alert.isRead ? 'bg-amber-500/5 border-l-2 border-amber-500' : ''}`
+ : `hover:bg-slate-50 ${!alert.isRead ? 'bg-amber-50 border-l-2 border-amber-500' : ''}`
+ }`}
+ >
+
+
+ {alert.importance.slice(0, 4)}
+
+
+
+ {alert.title}
+
+
+ {getRelativeTime(alert.timestamp)}
+
+
+
+
+ ))}
+ {alerts.length === 0 && (
+
+ )}
+
+
+ {
+ setShowAlertsDropdown(false)
+ router.push('/alerts')
+ }}
+ className={`w-full py-2 text-sm font-medium rounded-lg transition-all ${
+ isDark
+ ? 'text-amber-400 hover:bg-amber-500/10'
+ : 'text-amber-600 hover:bg-amber-50'
+ }`}
+ >
+ Alle Alerts anzeigen →
+
+
+
+ >
+ )}
+
+
+
+ )
+}
diff --git a/studio-v2/app/_components/UploadModals.tsx b/studio-v2/app/_components/UploadModals.tsx
new file mode 100644
index 0000000..9098d40
--- /dev/null
+++ b/studio-v2/app/_components/UploadModals.tsx
@@ -0,0 +1,102 @@
+'use client'
+
+import { useTheme } from '@/lib/ThemeContext'
+import { DocumentUpload } from '@/components/DocumentUpload'
+import { QRCodeUpload } from '@/components/QRCodeUpload'
+
+interface StoredDocument {
+ id: string
+ name: string
+ type: string
+ size: number
+ uploadedAt: Date
+ url?: string
+}
+
+interface UploadModalProps {
+ documents: StoredDocument[]
+ onUploadComplete: (docs: any[]) => void
+ onClose: () => void
+ onGoToDocuments: () => void
+}
+
+export function UploadModal({ documents, onUploadComplete, onClose, onGoToDocuments }: UploadModalProps) {
+ const { isDark } = useTheme()
+
+ return (
+
+
+
+
+
+ Dokumente hochladen
+
+
+
+
+
+
+
+
{
+ onUploadComplete(docs)
+ }}
+ />
+ {/* Aktions-Buttons */}
+
+
+ {documents.length > 0 ? `${documents.length} Dokument${documents.length !== 1 ? 'e' : ''} gespeichert` : 'Noch keine Dokumente'}
+
+
+
+ Schliessen
+
+
+ Zu meinen Dokumenten
+
+
+
+
+
+ )
+}
+
+interface QRModalProps {
+ sessionId: string
+ onClose: () => void
+}
+
+export function QRModal({ sessionId, onClose }: QRModalProps) {
+ const { isDark } = useTheme()
+
+ return (
+
+ )
+}
diff --git a/studio-v2/app/korrektur/_components/CreateKlausurModal.tsx b/studio-v2/app/korrektur/_components/CreateKlausurModal.tsx
new file mode 100644
index 0000000..9b86f6a
--- /dev/null
+++ b/studio-v2/app/korrektur/_components/CreateKlausurModal.tsx
@@ -0,0 +1,106 @@
+'use client'
+
+import { useState } from 'react'
+import type { CreateKlausurData } from '../types'
+import { GlassCard } from './GlassCard'
+
+interface CreateKlausurModalProps {
+ isOpen: boolean
+ onClose: () => void
+ onSubmit: (data: CreateKlausurData) => void
+ isLoading: boolean
+ isDark?: boolean
+}
+
+export function CreateKlausurModal({ isOpen, onClose, onSubmit, isLoading, isDark = true }: CreateKlausurModalProps) {
+ const [title, setTitle] = useState('')
+ const [subject, setSubject] = useState('Deutsch')
+ const [year, setYear] = useState(new Date().getFullYear())
+ const [semester, setSemester] = useState('Abitur')
+ const [modus, setModus] = useState<'landes_abitur' | 'vorabitur'>('landes_abitur')
+
+ if (!isOpen) return null
+
+ const handleSubmit = (e: React.FormEvent) => {
+ e.preventDefault()
+ onSubmit({ title, subject, year, semester, modus })
+ }
+
+ const inputClasses = isDark
+ ? 'bg-white/10 border-white/20 text-white placeholder-white/40'
+ : 'bg-slate-100 border-slate-300 text-slate-900 placeholder-slate-400'
+
+ return (
+
+
+
+ Neue Klausur erstellen
+
+
+
+
+ )
+}
diff --git a/studio-v2/app/korrektur/_components/GlassCard.tsx b/studio-v2/app/korrektur/_components/GlassCard.tsx
new file mode 100644
index 0000000..fc0d5f2
--- /dev/null
+++ b/studio-v2/app/korrektur/_components/GlassCard.tsx
@@ -0,0 +1,49 @@
+'use client'
+
+import { useState, useEffect } from 'react'
+
+interface GlassCardProps {
+ children: React.ReactNode
+ className?: string
+ onClick?: () => void
+ size?: 'sm' | 'md' | 'lg'
+ delay?: number
+ isDark?: boolean
+}
+
+export function GlassCard({ children, className = '', onClick, size = 'md', delay = 0, isDark = true }: GlassCardProps) {
+ const [isVisible, setIsVisible] = useState(false)
+ const [isHovered, setIsHovered] = useState(false)
+
+ useEffect(() => {
+ const timer = setTimeout(() => setIsVisible(true), delay)
+ return () => clearTimeout(timer)
+ }, [delay])
+
+ const sizeClasses = { sm: 'p-4', md: 'p-5', lg: 'p-6' }
+
+ return (
+
setIsHovered(true)}
+ onMouseLeave={() => setIsHovered(false)}
+ onClick={onClick}
+ >
+ {children}
+
+ )
+}
diff --git a/studio-v2/app/korrektur/_components/KlausurCard.tsx b/studio-v2/app/korrektur/_components/KlausurCard.tsx
new file mode 100644
index 0000000..2e9c65a
--- /dev/null
+++ b/studio-v2/app/korrektur/_components/KlausurCard.tsx
@@ -0,0 +1,60 @@
+'use client'
+
+import type { Klausur } from '../types'
+import { GlassCard } from './GlassCard'
+
+interface KlausurCardProps {
+ klausur: Klausur
+ onClick: () => void
+ delay?: number
+ isDark?: boolean
+}
+
+export function KlausurCard({ klausur, onClick, delay = 0, isDark = true }: KlausurCardProps) {
+ const progress = klausur.student_count
+ ? Math.round(((klausur.completed_count || 0) / klausur.student_count) * 100)
+ : 0
+
+ const statusColor = klausur.status === 'completed'
+ ? '#22c55e'
+ : klausur.status === 'in_progress'
+ ? '#f97316'
+ : '#6b7280'
+
+ return (
+
+
+
+
{klausur.title}
+
+ {klausur.status === 'completed' ? 'Fertig' : klausur.status === 'in_progress' ? 'In Arbeit' : 'Entwurf'}
+
+
+
+
+ {klausur.subject} {klausur.semester} {klausur.year}
+
+
+
+
+ {klausur.student_count || 0} Arbeiten
+ {progress}%
+
+
+
+
+
+
+ )
+}
diff --git a/studio-v2/app/korrektur/_components/StatCard.tsx b/studio-v2/app/korrektur/_components/StatCard.tsx
new file mode 100644
index 0000000..b6cb23f
--- /dev/null
+++ b/studio-v2/app/korrektur/_components/StatCard.tsx
@@ -0,0 +1,31 @@
+'use client'
+
+import { GlassCard } from './GlassCard'
+
+interface StatCardProps {
+ label: string
+ value: string | number
+ icon: React.ReactNode
+ color?: string
+ delay?: number
+ isDark?: boolean
+}
+
+export function StatCard({ label, value, icon, color = '#a78bfa', delay = 0, isDark = true }: StatCardProps) {
+ return (
+
+
+
+ )
+}
diff --git a/studio-v2/app/korrektur/_components/UploadModals.tsx b/studio-v2/app/korrektur/_components/UploadModals.tsx
new file mode 100644
index 0000000..b4b74f1
--- /dev/null
+++ b/studio-v2/app/korrektur/_components/UploadModals.tsx
@@ -0,0 +1,199 @@
+'use client'
+
+import { QRCodeUpload, UploadedFile } from '@/components/QRCodeUpload'
+import { GlassCard } from './GlassCard'
+
+// =============================================================================
+// Direct Upload Modal
+// =============================================================================
+
+interface DirectUploadModalProps {
+ isDark: boolean
+ isDragging: boolean
+ uploadedFiles: File[]
+ isUploading: boolean
+ error: string | null
+ onDragOver: (e: React.DragEvent) => void
+ onDragLeave: (e: React.DragEvent) => void
+ onDrop: (e: React.DragEvent) => void
+ onFileSelect: (e: React.ChangeEvent
) => void
+ onRemoveFile: (idx: number) => void
+ onUpload: () => void
+ onClose: () => void
+}
+
+export function DirectUploadModal({
+ isDark, isDragging, uploadedFiles, isUploading, error,
+ onDragOver, onDragLeave, onDrop, onFileSelect, onRemoveFile, onUpload, onClose
+}: DirectUploadModalProps) {
+ return (
+
+
+
+ Arbeiten hochladen
+
+ Ziehen Sie eingescannte Klausuren hierher oder klicken Sie zum Auswaehlen.
+
+
+ {error && (
+ {error}
+ )}
+
+
+
+
+
+
+
+
+ {isDragging ? 'Dateien hier ablegen' : 'Dateien hierher ziehen'}
+
+
PDF oder Bilder (JPG, PNG)
+
+
+
+ {uploadedFiles.length > 0 && (
+
+
+ {uploadedFiles.length} Datei(en) ausgewaehlt:
+
+
+ {uploadedFiles.map((file, idx) => (
+
+
{file.name}
+
onRemoveFile(idx)} className="text-red-400 hover:text-red-300">
+
+
+
+
+
+ ))}
+
+
+ )}
+
+
+
+ Abbrechen
+
+
+ {isUploading ? 'Hochladen...' : `${uploadedFiles.length} Arbeiten hochladen`}
+
+
+
+
+ )
+}
+
+// =============================================================================
+// EH Upload Modal
+// =============================================================================
+
+interface EHUploadModalProps {
+ isDark: boolean
+ isDragging: boolean
+ ehFile: File | null
+ isUploading: boolean
+ onDragOver: (e: React.DragEvent) => void
+ onDragLeave: (e: React.DragEvent) => void
+ onDrop: (e: React.DragEvent) => void
+ onFileSelect: (e: React.ChangeEvent) => void
+ onRemoveFile: () => void
+ onUpload: () => void
+ onClose: () => void
+}
+
+export function EHUploadModal({
+ isDark, isDragging, ehFile, isUploading,
+ onDragOver, onDragLeave, onDrop, onFileSelect, onRemoveFile, onUpload, onClose
+}: EHUploadModalProps) {
+ return (
+
+
+
+ Erwartungshorizont hochladen
+
+ Laden Sie einen eigenen Erwartungshorizont fuer Vorabitur-Klausuren hoch.
+
+
+
+
+
+
+
+
+
+ {ehFile ? ehFile.name : 'EH-Datei hierher ziehen'}
+
+
PDF oder Word-Dokument
+
+
+
+ {ehFile && (
+
+ )}
+
+
+
+ Abbrechen
+
+
+ {isUploading ? 'Hochladen...' : 'EH hochladen'}
+
+
+
+
+ )
+}
+
+// =============================================================================
+// QR Code Modal
+// =============================================================================
+
+interface QRCodeModalProps {
+ isDark: boolean
+ sessionId: string
+ onClose: () => void
+ onFileUploaded?: (file: UploadedFile) => void
+}
+
+export function QRCodeModal({ isDark, sessionId, onClose, onFileUploaded }: QRCodeModalProps) {
+ return (
+
+ )
+}
diff --git a/studio-v2/app/korrektur/page.tsx b/studio-v2/app/korrektur/page.tsx
index 5479955..dfda4f7 100644
--- a/studio-v2/app/korrektur/page.tsx
+++ b/studio-v2/app/korrektur/page.tsx
@@ -3,319 +3,29 @@
import { useState, useEffect, useCallback } from 'react'
import { useRouter } from 'next/navigation'
import { useTheme } from '@/lib/ThemeContext'
-import { useLanguage } from '@/lib/LanguageContext'
import { Sidebar } from '@/components/Sidebar'
import { ThemeToggle } from '@/components/ThemeToggle'
import { LanguageDropdown } from '@/components/LanguageDropdown'
-import { QRCodeUpload, UploadedFile } from '@/components/QRCodeUpload'
-import {
- korrekturApi,
- getKorrekturStats,
- type KorrekturStats,
-} from '@/lib/korrektur/api'
+import type { UploadedFile } from '@/components/QRCodeUpload'
+import { korrekturApi, getKorrekturStats, type KorrekturStats } from '@/lib/korrektur/api'
import type { Klausur, CreateKlausurData } from './types'
+import { GlassCard } from './_components/GlassCard'
+import { StatCard } from './_components/StatCard'
+import { KlausurCard } from './_components/KlausurCard'
+import { CreateKlausurModal } from './_components/CreateKlausurModal'
+import { DirectUploadModal, EHUploadModal, QRCodeModal } from './_components/UploadModals'
-// LocalStorage Key for upload session
const SESSION_ID_KEY = 'bp_korrektur_session'
-// =============================================================================
-// GLASS CARD - Ultra Transparent (Apple Weather Style)
-// =============================================================================
-
-interface GlassCardProps {
- children: React.ReactNode
- className?: string
- onClick?: () => void
- size?: 'sm' | 'md' | 'lg'
- delay?: number
-}
-
-function GlassCard({ children, className = '', onClick, size = 'md', delay = 0, isDark = true }: GlassCardProps & { isDark?: boolean }) {
- const [isVisible, setIsVisible] = useState(false)
- const [isHovered, setIsHovered] = useState(false)
-
- useEffect(() => {
- const timer = setTimeout(() => setIsVisible(true), delay)
- return () => clearTimeout(timer)
- }, [delay])
-
- const sizeClasses = {
- sm: 'p-4',
- md: 'p-5',
- lg: 'p-6',
- }
-
- return (
- setIsHovered(true)}
- onMouseLeave={() => setIsHovered(false)}
- onClick={onClick}
- >
- {children}
-
- )
-}
-
-// =============================================================================
-// STAT CARD
-// =============================================================================
-
-interface StatCardProps {
- label: string
- value: string | number
- icon: React.ReactNode
- color?: string
- delay?: number
- isDark?: boolean
-}
-
-function StatCard({ label, value, icon, color = '#a78bfa', delay = 0, isDark = true }: StatCardProps) {
- return (
-
-
-
- )
-}
-
-// =============================================================================
-// KLAUSUR CARD
-// =============================================================================
-
-interface KlausurCardProps {
- klausur: Klausur
- onClick: () => void
- delay?: number
- isDark?: boolean
-}
-
-function KlausurCard({ klausur, onClick, delay = 0, isDark = true }: KlausurCardProps) {
- const progress = klausur.student_count
- ? Math.round(((klausur.completed_count || 0) / klausur.student_count) * 100)
- : 0
-
- const statusColor = klausur.status === 'completed'
- ? '#22c55e'
- : klausur.status === 'in_progress'
- ? '#f97316'
- : '#6b7280'
-
- return (
-
-
-
-
{klausur.title}
-
- {klausur.status === 'completed' ? 'Fertig' : klausur.status === 'in_progress' ? 'In Arbeit' : 'Entwurf'}
-
-
-
-
- {klausur.subject} {klausur.semester} {klausur.year}
-
-
-
-
- {klausur.student_count || 0} Arbeiten
- {progress}%
-
-
-
-
-
-
- )
-}
-
-// =============================================================================
-// CREATE KLAUSUR MODAL
-// =============================================================================
-
-interface CreateKlausurModalProps {
- isOpen: boolean
- onClose: () => void
- onSubmit: (data: CreateKlausurData) => void
- isLoading: boolean
- isDark?: boolean
-}
-
-function CreateKlausurModal({ isOpen, onClose, onSubmit, isLoading, isDark = true }: CreateKlausurModalProps) {
- const [title, setTitle] = useState('')
- const [subject, setSubject] = useState('Deutsch')
- const [year, setYear] = useState(new Date().getFullYear())
- const [semester, setSemester] = useState('Abitur')
- const [modus, setModus] = useState<'landes_abitur' | 'vorabitur'>('landes_abitur')
-
- if (!isOpen) return null
-
- const handleSubmit = (e: React.FormEvent) => {
- e.preventDefault()
- onSubmit({ title, subject, year, semester, modus })
- }
-
- const inputClasses = isDark
- ? 'bg-white/10 border-white/20 text-white placeholder-white/40'
- : 'bg-slate-100 border-slate-300 text-slate-900 placeholder-slate-400'
-
- return (
-
-
-
- Neue Klausur erstellen
-
-
-
-
- )
-}
-
-// =============================================================================
-// MAIN PAGE
-// =============================================================================
-
export default function KorrekturPage() {
const { isDark } = useTheme()
- const { t } = useLanguage()
const router = useRouter()
- // State
const [klausuren, setKlausuren] = useState([])
const [stats, setStats] = useState(null)
const [isLoading, setIsLoading] = useState(true)
const [error, setError] = useState(null)
- // Modal states
const [showCreateModal, setShowCreateModal] = useState(false)
const [isCreating, setIsCreating] = useState(false)
const [showQRModal, setShowQRModal] = useState(false)
@@ -327,161 +37,98 @@ export default function KorrekturPage() {
const [ehFile, setEhFile] = useState(null)
const [isUploading, setIsUploading] = useState(false)
- // Initialize session ID
useEffect(() => {
- let storedSessionId = localStorage.getItem(SESSION_ID_KEY)
- if (!storedSessionId) {
- storedSessionId = `korrektur-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
- localStorage.setItem(SESSION_ID_KEY, storedSessionId)
+ let sid = localStorage.getItem(SESSION_ID_KEY)
+ if (!sid) {
+ sid = `korrektur-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
+ localStorage.setItem(SESSION_ID_KEY, sid)
}
- setUploadSessionId(storedSessionId)
+ setUploadSessionId(sid)
}, [])
- // Load data
const loadData = useCallback(async () => {
- setIsLoading(true)
- setError(null)
-
+ setIsLoading(true); setError(null)
try {
- const [klausurenData, statsData] = await Promise.all([
- korrekturApi.getKlausuren(),
- getKorrekturStats(),
- ])
- setKlausuren(klausurenData)
- setStats(statsData)
+ const [kd, sd] = await Promise.all([korrekturApi.getKlausuren(), getKorrekturStats()])
+ setKlausuren(kd); setStats(sd)
} catch (err) {
- console.error('Failed to load data:', err)
setError(err instanceof Error ? err.message : 'Laden fehlgeschlagen')
- } finally {
- setIsLoading(false)
- }
+ } finally { setIsLoading(false) }
}, [])
- useEffect(() => {
- loadData()
- }, [loadData])
+ useEffect(() => { loadData() }, [loadData])
- // Create klausur
const handleCreateKlausur = async (data: CreateKlausurData) => {
setIsCreating(true)
try {
- const newKlausur = await korrekturApi.createKlausur(data)
- setKlausuren((prev) => [newKlausur, ...prev])
- setShowCreateModal(false)
- // Navigate to the new klausur
- router.push(`/korrektur/${newKlausur.id}`)
+ const nk = await korrekturApi.createKlausur(data)
+ setKlausuren(prev => [nk, ...prev]); setShowCreateModal(false)
+ router.push(`/korrektur/${nk.id}`)
} catch (err) {
- console.error('Failed to create klausur:', err)
setError(err instanceof Error ? err.message : 'Erstellung fehlgeschlagen')
- } finally {
- setIsCreating(false)
- }
+ } finally { setIsCreating(false) }
}
- // Handle QR uploaded files
- const handleMobileFileSelect = async (uploadedFile: UploadedFile) => {
- // For now, just close the modal - in production this would create a quick-start klausur
- setShowQRModal(false)
- // Could auto-create a klausur and navigate
- }
+ const handleMobileFileSelect = async (_: UploadedFile) => { setShowQRModal(false) }
- // Handle direct file upload with drag & drop
- const handleDragOver = (e: React.DragEvent) => {
- e.preventDefault()
- setIsDragging(true)
- }
-
- const handleDragLeave = (e: React.DragEvent) => {
- e.preventDefault()
- setIsDragging(false)
- }
+ const handleDragOver = (e: React.DragEvent) => { e.preventDefault(); setIsDragging(true) }
+ const handleDragLeave = (e: React.DragEvent) => { e.preventDefault(); setIsDragging(false) }
const handleDrop = (e: React.DragEvent, isEH = false) => {
- e.preventDefault()
- setIsDragging(false)
- const files = Array.from(e.dataTransfer.files).filter(
- f => f.type === 'application/pdf' || f.type.startsWith('image/')
- )
- if (isEH && files.length > 0) {
- setEhFile(files[0])
- } else {
- setUploadedFiles(prev => [...prev, ...files])
- }
+ e.preventDefault(); setIsDragging(false)
+ const files = Array.from(e.dataTransfer.files).filter(f => f.type === 'application/pdf' || f.type.startsWith('image/'))
+ if (isEH && files.length > 0) setEhFile(files[0])
+ else setUploadedFiles(prev => [...prev, ...files])
}
const handleFileSelect = (e: React.ChangeEvent, isEH = false) => {
if (!e.target.files) return
const files = Array.from(e.target.files)
- if (isEH && files.length > 0) {
- setEhFile(files[0])
- } else {
- setUploadedFiles(prev => [...prev, ...files])
- }
+ if (isEH && files.length > 0) setEhFile(files[0])
+ else setUploadedFiles(prev => [...prev, ...files])
}
const handleDirectUpload = async () => {
if (uploadedFiles.length === 0) return
setIsUploading(true)
try {
- // Create a quick-start klausur
- const newKlausur = await korrekturApi.createKlausur({
+ const nk = await korrekturApi.createKlausur({
title: `Schnellstart ${new Date().toLocaleDateString('de-DE')}`,
- subject: 'Deutsch',
- year: new Date().getFullYear(),
- semester: 'Abitur',
- modus: 'landes_abitur'
+ subject: 'Deutsch', year: new Date().getFullYear(), semester: 'Abitur', modus: 'landes_abitur'
})
-
- // Upload each file
for (let i = 0; i < uploadedFiles.length; i++) {
- await korrekturApi.uploadStudentWork(newKlausur.id, uploadedFiles[i], `Arbeit-${i + 1}`)
+ await korrekturApi.uploadStudentWork(nk.id, uploadedFiles[i], `Arbeit-${i + 1}`)
}
-
- setShowDirectUpload(false)
- setUploadedFiles([])
- router.push(`/korrektur/${newKlausur.id}`)
+ setShowDirectUpload(false); setUploadedFiles([])
+ router.push(`/korrektur/${nk.id}`)
} catch (err) {
- console.error('Upload failed:', err)
setError(err instanceof Error ? err.message : 'Upload fehlgeschlagen')
- } finally {
- setIsUploading(false)
- }
+ } finally { setIsUploading(false) }
}
const handleEHUpload = async () => {
if (!ehFile) return
setIsUploading(true)
try {
- // Upload EH to backend
await korrekturApi.uploadEH(ehFile)
- setShowEHUpload(false)
- setEhFile(null)
- loadData() // Refresh to show new EH
+ setShowEHUpload(false); setEhFile(null); loadData()
} catch (err) {
- console.error('EH Upload failed:', err)
setError(err instanceof Error ? err.message : 'EH Upload fehlgeschlagen')
- } finally {
- setIsUploading(false)
- }
+ } finally { setIsUploading(false) }
}
return (
- {/* Animated Background Blobs */}
+ {/* Background Blobs */}
- {/* Sidebar */}
-
-
-
+
- {/* Main Content */}
{/* Header */}
@@ -489,121 +136,57 @@ export default function KorrekturPage() {
Korrekturplattform
KI-gestuetzte Abiturklausur-Korrektur
-
-
-
-
+
- {/* Stats Cards */}
+ {/* Stats */}
{stats && (
-
-
-
- }
- color="#f97316"
- delay={100}
- isDark={isDark}
- />
-
-
-
- }
- color="#22c55e"
- delay={200}
- isDark={isDark}
- />
- 0 ? `${stats.averageGrade} P` : '-'}
- icon={
-
-
-
- }
- color="#3b82f6"
- delay={300}
- isDark={isDark}
- />
-
-
-
- }
- color="#a78bfa"
- delay={400}
- isDark={isDark}
- />
+ }
+ color="#f97316" delay={100} isDark={isDark} />
+ }
+ color="#22c55e" delay={200} isDark={isDark} />
+ 0 ? `${stats.averageGrade} P` : '-'}
+ icon={ }
+ color="#3b82f6" delay={300} isDark={isDark} />
+ }
+ color="#a78bfa" delay={400} isDark={isDark} />
)}
- {/* Error Display */}
{error && (
-
-
-
+
{error}
-
- Erneut versuchen
-
+
Erneut versuchen
)}
- {/* Loading */}
{isLoading && (
)}
- {/* Klausuren Grid */}
{!isLoading && (
<>
Klausuren
-
- {klausuren.map((klausur, index) => (
-
router.push(`/korrektur/${klausur.id}`)}
- delay={500 + index * 50}
- isDark={isDark}
- />
+ {klausuren.map((k, i) => (
+ router.push(`/korrektur/${k.id}`)} delay={500 + i * 50} isDark={isDark} />
))}
-
- {/* New Klausur Card */}
- setShowCreateModal(true)}
- delay={500 + klausuren.length * 50}
- className={`min-h-[180px] border-2 border-dashed ${isDark ? 'border-white/20 hover:border-purple-400/50' : 'border-slate-300 hover:border-purple-400'}`}
- isDark={isDark}
- >
+ setShowCreateModal(true)} delay={500 + klausuren.length * 50}
+ className={`min-h-[180px] border-2 border-dashed ${isDark ? 'border-white/20 hover:border-purple-400/50' : 'border-slate-300 hover:border-purple-400'}`} isDark={isDark}>
Neue Klausur
Klausur erstellen
@@ -611,304 +194,48 @@ export default function KorrekturPage() {
- {/* Quick Actions */}
Schnellaktionen
-
setShowQRModal(true)}
- delay={700}
- className="cursor-pointer"
- isDark={isDark}
- >
-
-
-
📱
+ {[
+ { onClick: () => setShowQRModal(true), icon: '📱', title: 'QR Upload', sub: 'Mit Handy scannen', bg: 'blue', delay: 700 },
+ { onClick: () => setShowDirectUpload(true), icon:
, title: 'Direkt hochladen', sub: 'Drag & Drop', bg: 'green', delay: 750 },
+ { onClick: () => setShowCreateModal(true), icon:
, title: 'Schnellstart', sub: 'Direkt loslegen', bg: 'purple', delay: 800 },
+ { onClick: () => setShowEHUpload(true), icon:
, title: 'EH hochladen', sub: 'Erwartungshorizont', bg: 'orange', delay: 850 },
+ { onClick: () => router.push('/korrektur/archiv'), icon:
, title: 'Abitur-Archiv', sub: 'EH durchsuchen', bg: 'indigo', delay: 900 },
+ ].map((action, i) => (
+
+
+
+ {typeof action.icon === 'string' ? {action.icon} : action.icon}
+
+
+
{action.title}
+
{action.sub}
+
-
-
QR Upload
-
Mit Handy scannen
-
-
-
-
-
setShowDirectUpload(true)}
- delay={750}
- className="cursor-pointer"
- isDark={isDark}
- >
-
-
-
-
Direkt hochladen
-
Drag & Drop
-
-
-
-
-
setShowCreateModal(true)}
- delay={800}
- className="cursor-pointer"
- isDark={isDark}
- >
-
-
-
-
Schnellstart
-
Direkt loslegen
-
-
-
-
-
setShowEHUpload(true)}
- delay={850}
- className="cursor-pointer"
- isDark={isDark}
- >
-
-
-
-
EH hochladen
-
Erwartungshorizont
-
-
-
-
-
router.push('/korrektur/archiv')}
- delay={900}
- className="cursor-pointer"
- isDark={isDark}
- >
-
-
-
-
Abitur-Archiv
-
EH durchsuchen
-
-
-
+
+ ))}
>
)}
- {/* Create Klausur Modal */}
- setShowCreateModal(false)}
- onSubmit={handleCreateKlausur}
- isLoading={isCreating}
- isDark={isDark}
- />
-
- {/* QR Code Modal */}
- {showQRModal && (
-
-
setShowQRModal(false)} />
-
- setShowQRModal(false)}
- onFileUploaded={handleMobileFileSelect}
- />
-
-
- )}
-
- {/* Direct Upload Modal */}
- {showDirectUpload && (
-
-
setShowDirectUpload(false)} />
-
- Arbeiten hochladen
-
- Ziehen Sie eingescannte Klausuren hierher oder klicken Sie zum Auswaehlen.
-
-
- {/* Error Display in Modal */}
- {error && (
-
- {error}
-
- )}
-
- {/* Drag & Drop Zone */}
- handleDrop(e, false)}
- className={`relative p-8 rounded-2xl border-2 border-dashed transition-colors ${
- isDragging
- ? 'border-purple-400 bg-purple-500/10'
- : isDark
- ? 'border-white/20 hover:border-white/40'
- : 'border-slate-300 hover:border-slate-400'
- }`}
- >
-
handleFileSelect(e, false)}
- className="absolute inset-0 w-full h-full opacity-0 cursor-pointer"
- />
-
-
-
-
-
- {isDragging ? 'Dateien hier ablegen' : 'Dateien hierher ziehen'}
-
-
- PDF oder Bilder (JPG, PNG)
-
-
-
-
- {/* Uploaded Files List */}
- {uploadedFiles.length > 0 && (
-
-
- {uploadedFiles.length} Datei(en) ausgewaehlt:
-
-
- {uploadedFiles.map((file, idx) => (
-
-
{file.name}
-
setUploadedFiles(prev => prev.filter((_, i) => i !== idx))}
- className="text-red-400 hover:text-red-300"
- >
-
-
-
-
-
- ))}
-
-
- )}
-
- {/* Actions */}
-
- { setShowDirectUpload(false); setUploadedFiles([]) }}
- className={`flex-1 px-4 py-3 rounded-xl transition-colors ${isDark ? 'bg-white/10 text-white hover:bg-white/20' : 'bg-slate-200 text-slate-700 hover:bg-slate-300'}`}
- >
- Abbrechen
-
-
- {isUploading ? 'Hochladen...' : `${uploadedFiles.length} Arbeiten hochladen`}
-
-
-
-
- )}
-
- {/* EH Upload Modal */}
- {showEHUpload && (
-
-
setShowEHUpload(false)} />
-
- Erwartungshorizont hochladen
-
- Laden Sie einen eigenen Erwartungshorizont fuer Vorabitur-Klausuren hoch.
-
-
- {/* Drag & Drop Zone */}
- handleDrop(e, true)}
- className={`relative p-8 rounded-2xl border-2 border-dashed transition-colors ${
- isDragging
- ? 'border-orange-400 bg-orange-500/10'
- : isDark
- ? 'border-white/20 hover:border-white/40'
- : 'border-slate-300 hover:border-slate-400'
- }`}
- >
-
handleFileSelect(e, true)}
- className="absolute inset-0 w-full h-full opacity-0 cursor-pointer"
- />
-
-
-
-
-
- {ehFile ? ehFile.name : 'EH-Datei hierher ziehen'}
-
-
- PDF oder Word-Dokument
-
-
-
-
- {/* Selected File */}
- {ehFile && (
-
-
-
setEhFile(null)}
- className="text-red-400 hover:text-red-300"
- >
-
-
-
-
-
- )}
-
- {/* Actions */}
-
- { setShowEHUpload(false); setEhFile(null) }}
- className={`flex-1 px-4 py-3 rounded-xl transition-colors ${isDark ? 'bg-white/10 text-white hover:bg-white/20' : 'bg-slate-200 text-slate-700 hover:bg-slate-300'}`}
- >
- Abbrechen
-
-
- {isUploading ? 'Hochladen...' : 'EH hochladen'}
-
-
-
-
- )}
+
setShowCreateModal(false)}
+ onSubmit={handleCreateKlausur} isLoading={isCreating} isDark={isDark} />
+ {showQRModal && setShowQRModal(false)} onFileUploaded={handleMobileFileSelect} />}
+ {showDirectUpload && handleDrop(e, false)} onFileSelect={(e) => handleFileSelect(e, false)}
+ onRemoveFile={(idx) => setUploadedFiles(prev => prev.filter((_, i) => i !== idx))}
+ onUpload={handleDirectUpload} onClose={() => { setShowDirectUpload(false); setUploadedFiles([]) }} />}
+ {showEHUpload && handleDrop(e, true)} onFileSelect={(e) => handleFileSelect(e, true)}
+ onRemoveFile={() => setEhFile(null)} onUpload={handleEHUpload}
+ onClose={() => { setShowEHUpload(false); setEhFile(null) }} />}
)
}
diff --git a/studio-v2/app/page-original.tsx b/studio-v2/app/page-original.tsx
deleted file mode 100644
index 5bfeb4c..0000000
--- a/studio-v2/app/page-original.tsx
+++ /dev/null
@@ -1,934 +0,0 @@
-'use client'
-
-import { useState, useEffect, useCallback } from 'react'
-import { useRouter } from 'next/navigation'
-import { BPIcon } from '@/components/Logo'
-import { useLanguage } from '@/lib/LanguageContext'
-import { useTheme } from '@/lib/ThemeContext'
-import { useAlerts, getImportanceColor, getRelativeTime } from '@/lib/AlertsContext'
-import { useMessages, formatMessageTime, getContactInitials } from '@/lib/MessagesContext'
-import { LanguageDropdown } from '@/components/LanguageDropdown'
-import { ThemeToggle } from '@/components/ThemeToggle'
-import { Footer } from '@/components/Footer'
-import { Sidebar } from '@/components/Sidebar'
-import { OnboardingWizard, OnboardingData } from '@/components/OnboardingWizard'
-import { DocumentUpload } from '@/components/DocumentUpload'
-import { QRCodeUpload } from '@/components/QRCodeUpload'
-import { DocumentSpace } from '@/components/DocumentSpace'
-import { ChatOverlay } from '@/components/ChatOverlay'
-
-// LocalStorage Keys
-const ONBOARDING_KEY = 'bp_onboarding_complete'
-const USER_DATA_KEY = 'bp_user_data'
-const DOCUMENTS_KEY = 'bp_documents'
-const FIRST_VISIT_KEY = 'bp_first_dashboard_visit'
-const SESSION_ID_KEY = 'bp_session_id'
-
-// BreakPilot Studio v2 - Glassmorphism Design
-
-interface StoredDocument {
- id: string
- name: string
- type: string
- size: number
- uploadedAt: Date
- url?: string
-}
-
-export default function HomePage() {
- const router = useRouter()
- const [selectedTab, setSelectedTab] = useState('dashboard')
- const [showOnboarding, setShowOnboarding] = useState
(null)
- const [userData, setUserData] = useState(null)
- const [documents, setDocuments] = useState([])
- const [showUploadModal, setShowUploadModal] = useState(false)
- const [showQRModal, setShowQRModal] = useState(false)
- const [isFirstVisit, setIsFirstVisit] = useState(false)
- const [sessionId, setSessionId] = useState('')
- const [showAlertsDropdown, setShowAlertsDropdown] = useState(false)
- const { t } = useLanguage()
- const { isDark } = useTheme()
- const { alerts, unreadCount, markAsRead } = useAlerts()
- const { conversations, unreadCount: messagesUnreadCount, contacts, markAsRead: markMessageAsRead } = useMessages()
-
- // Funktion zum Laden von Uploads aus der API
- const fetchUploadsFromAPI = useCallback(async (sid: string) => {
- if (!sid) return
- try {
- const response = await fetch(`/api/uploads?sessionId=${encodeURIComponent(sid)}`)
- if (response.ok) {
- const data = await response.json()
- if (data.uploads && data.uploads.length > 0) {
- // Konvertiere API-Uploads zu StoredDocument Format
- const apiDocs: StoredDocument[] = data.uploads.map((u: any) => ({
- id: u.id,
- name: u.name,
- type: u.type,
- size: u.size,
- uploadedAt: new Date(u.uploadedAt),
- url: u.dataUrl // Data URL direkt verwenden
- }))
- // Merge mit existierenden Dokumenten (ohne Duplikate)
- setDocuments(prev => {
- const existingIds = new Set(prev.map(d => d.id))
- const newDocs = apiDocs.filter(d => !existingIds.has(d.id))
- if (newDocs.length > 0) {
- return [...prev, ...newDocs]
- }
- return prev
- })
- }
- }
- } catch (error) {
- console.error('Error fetching uploads:', error)
- }
- }, [])
-
- // Prüfe beim Laden, ob Onboarding abgeschlossen ist
- useEffect(() => {
- const onboardingComplete = localStorage.getItem(ONBOARDING_KEY)
- const storedUserData = localStorage.getItem(USER_DATA_KEY)
- const storedDocs = localStorage.getItem(DOCUMENTS_KEY)
- const firstVisit = localStorage.getItem(FIRST_VISIT_KEY)
- let storedSessionId = localStorage.getItem(SESSION_ID_KEY)
-
- // Session ID generieren falls nicht vorhanden
- if (!storedSessionId) {
- storedSessionId = `session-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
- localStorage.setItem(SESSION_ID_KEY, storedSessionId)
- }
- setSessionId(storedSessionId)
-
- if (onboardingComplete === 'true' && storedUserData) {
- setUserData(JSON.parse(storedUserData))
- setShowOnboarding(false)
-
- // Dokumente laden
- if (storedDocs) {
- setDocuments(JSON.parse(storedDocs))
- }
-
- // Erster Dashboard-Besuch nach Onboarding?
- if (!firstVisit) {
- setIsFirstVisit(true)
- localStorage.setItem(FIRST_VISIT_KEY, 'true')
- }
-
- // Initialer Fetch von der API
- fetchUploadsFromAPI(storedSessionId)
- } else {
- setShowOnboarding(true)
- }
- }, [fetchUploadsFromAPI])
-
- // Polling fuer neue Uploads von der API (alle 3 Sekunden)
- useEffect(() => {
- if (!sessionId || showOnboarding) return
-
- const interval = setInterval(() => {
- fetchUploadsFromAPI(sessionId)
- }, 3000)
-
- return () => clearInterval(interval)
- }, [sessionId, showOnboarding, fetchUploadsFromAPI])
-
- // Dokumente in localStorage speichern
- useEffect(() => {
- if (documents.length > 0) {
- localStorage.setItem(DOCUMENTS_KEY, JSON.stringify(documents))
- }
- }, [documents])
-
- // Handler fuer neue Uploads
- const handleUploadComplete = (uploadedDocs: any[]) => {
- const newDocs: StoredDocument[] = uploadedDocs.map(d => ({
- id: d.id,
- name: d.name,
- type: d.type,
- size: d.size,
- uploadedAt: d.uploadedAt,
- url: d.url
- }))
- setDocuments(prev => [...prev, ...newDocs])
- setIsFirstVisit(false)
- }
-
- // Dokument loeschen (aus State und API)
- const handleDeleteDocument = async (id: string) => {
- setDocuments(prev => prev.filter(d => d.id !== id))
- // Auch aus API loeschen
- try {
- await fetch(`/api/uploads?id=${encodeURIComponent(id)}`, { method: 'DELETE' })
- } catch (error) {
- console.error('Error deleting from API:', error)
- }
- }
-
- // Dokument umbenennen
- const handleRenameDocument = (id: string, newName: string) => {
- setDocuments(prev => prev.map(d => d.id === id ? { ...d, name: newName } : d))
- }
-
- // Onboarding abschließen
- const handleOnboardingComplete = (data: OnboardingData) => {
- localStorage.setItem(ONBOARDING_KEY, 'true')
- localStorage.setItem(USER_DATA_KEY, JSON.stringify(data))
- setUserData(data)
- setShowOnboarding(false)
- }
-
- // Zeige Ladebildschirm während der Prüfung
- if (showOnboarding === null) {
- return (
-
- )
- }
-
- // Zeige Onboarding falls noch nicht abgeschlossen
- if (showOnboarding) {
- return
- }
-
- // Ab hier: Dashboard (bestehender Code)
-
- const stats = [
- { labelKey: 'stat_open_corrections', value: '12', icon: '📋', color: 'from-blue-400 to-blue-600' },
- { labelKey: 'stat_completed_week', value: '28', icon: '✅', color: 'from-green-400 to-green-600' },
- { labelKey: 'stat_average', value: '2.3', icon: '📈', color: 'from-purple-400 to-purple-600' },
- { labelKey: 'stat_time_saved', value: '4.2h', icon: '⏱', color: 'from-orange-400 to-orange-600' },
- ]
-
- const recentKlausuren = [
- { id: 1, title: 'Deutsch LK - Textanalyse', students: 24, completed: 18, statusKey: 'status_in_progress' },
- { id: 2, title: 'Deutsch GK - Erörterung', students: 28, completed: 28, statusKey: 'status_completed' },
- { id: 3, title: 'Vorabitur - Gedichtanalyse', students: 22, completed: 10, statusKey: 'status_in_progress' },
- ]
-
- return (
-
- {/* Animated Background Blobs */}
-
-
-
- {/* Sidebar */}
-
-
- {/* ============================================
- ARBEITSFLAECHE (Main Content)
- ============================================ */}
-
- {/* Kopfleiste (Header) */}
-
-
-
{t('dashboard')}
-
{t('dashboard_subtitle')}
-
-
- {/* Search, Language & Actions */}
-
-
-
- {/* Language Dropdown */}
-
-
- {/* Theme Toggle */}
-
-
- {/* Notifications Bell with Glow Effect */}
-
-
setShowAlertsDropdown(!showAlertsDropdown)}
- className={`relative p-3 backdrop-blur-xl border rounded-2xl transition-all ${
- unreadCount > 0
- ? 'animate-pulse bg-gradient-to-r from-amber-500/20 to-orange-500/20 border-amber-500/30 shadow-lg shadow-amber-500/30'
- : isDark
- ? 'bg-white/10 border-white/20 hover:bg-white/20'
- : 'bg-black/5 border-black/10 hover:bg-black/10'
- } ${isDark ? 'text-white' : 'text-slate-700'}`}
- >
-
-
-
- {unreadCount > 0 && (
-
- {unreadCount > 9 ? '9+' : unreadCount}
-
- )}
-
-
- {/* Alerts Dropdown */}
- {showAlertsDropdown && (
- <>
-
setShowAlertsDropdown(false)} />
-
-
-
-
- Aktuelle Alerts
-
- {unreadCount > 0 && (
-
- {unreadCount} neu
-
- )}
-
-
-
- {alerts.slice(0, 5).map(alert => (
-
{
- markAsRead(alert.id)
- setShowAlertsDropdown(false)
- router.push('/alerts')
- }}
- className={`w-full text-left p-4 transition-all ${
- isDark
- ? `hover:bg-white/5 ${!alert.isRead ? 'bg-amber-500/5 border-l-2 border-amber-500' : ''}`
- : `hover:bg-slate-50 ${!alert.isRead ? 'bg-amber-50 border-l-2 border-amber-500' : ''}`
- }`}
- >
-
-
- {alert.importance.slice(0, 4)}
-
-
-
- {alert.title}
-
-
- {getRelativeTime(alert.timestamp)}
-
-
-
-
- ))}
- {alerts.length === 0 && (
-
- )}
-
-
- {
- setShowAlertsDropdown(false)
- router.push('/alerts')
- }}
- className={`w-full py-2 text-sm font-medium rounded-lg transition-all ${
- isDark
- ? 'text-amber-400 hover:bg-amber-500/10'
- : 'text-amber-600 hover:bg-amber-50'
- }`}
- >
- Alle Alerts anzeigen →
-
-
-
- >
- )}
-
-
-
-
- {/* Willkommensnachricht fuer ersten Besuch */}
- {isFirstVisit && documents.length === 0 && (
-
-
-
- 🎉
-
-
-
- Willkommen bei BreakPilot Studio!
-
-
- Grossartig, dass Sie hier sind! Laden Sie jetzt Ihr erstes Dokument hoch,
- um die KI-gestuetzte Korrektur zu erleben. Sie koennen Dateien von Ihrem
- Computer oder Mobiltelefon hochladen.
-
-
- setShowUploadModal(true)}
- className="px-6 py-3 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-xl font-medium hover:shadow-lg hover:shadow-purple-500/30 transition-all hover:scale-105"
- >
- Dokument hochladen
-
- setShowQRModal(true)}
- className={`px-6 py-3 rounded-xl font-medium transition-all ${
- isDark
- ? 'bg-white/20 text-white hover:bg-white/30'
- : 'bg-white text-slate-700 hover:bg-slate-50 shadow'
- }`}
- >
- Mit Mobiltelefon hochladen
-
-
-
-
setIsFirstVisit(false)}
- className={`p-2 rounded-lg ${isDark ? 'hover:bg-white/10' : 'hover:bg-white'}`}
- >
-
-
-
-
-
-
- )}
-
- {/* Stats Kacheln */}
-
- {stats.map((stat, index) => (
-
-
-
- {stat.icon}
-
-
-
-
-
-
{t(stat.labelKey)}
-
{stat.value}
-
- ))}
-
-
- {/* Tab-Content */}
- {selectedTab === 'dokumente' ? (
- /* Dokumente-Tab */
-
- {/* Upload-Optionen */}
-
-
setShowUploadModal(true)}
- className={`p-6 rounded-3xl border backdrop-blur-xl text-left transition-all hover:scale-105 ${
- isDark
- ? 'bg-white/10 border-white/20 hover:bg-white/15'
- : 'bg-white/70 border-black/10 hover:bg-white/90 shadow-lg'
- }`}
- >
-
- 📤
-
-
- Direkt hochladen
-
-
- Ziehen Sie Dateien hierher oder klicken Sie zum Auswaehlen
-
-
-
-
setShowQRModal(true)}
- className={`p-6 rounded-3xl border backdrop-blur-xl text-left transition-all hover:scale-105 ${
- isDark
- ? 'bg-white/10 border-white/20 hover:bg-white/15'
- : 'bg-white/70 border-black/10 hover:bg-white/90 shadow-lg'
- }`}
- >
-
- 📱
-
-
- Mit Mobiltelefon hochladen
-
-
- QR-Code scannen (nur im lokalen Netzwerk)
-
-
-
-
- {/* Document Space */}
-
-
- Meine Dokumente
-
- doc.url && window.open(doc.url, '_blank')}
- />
-
-
- ) : (
- /* Dashboard-Tab (Standard) */
-
- {/* Aktuelle Klausuren Kachel */}
-
-
-
{t('recent_klausuren')}
-
- {t('show_all')} →
-
-
-
-
- {recentKlausuren.map((klausur) => (
-
-
- 📝
-
-
-
{klausur.title}
-
{klausur.students} {t('students')}
-
-
-
- {t(klausur.statusKey)}
-
-
-
-
{klausur.completed}/{klausur.students}
-
-
-
-
-
-
- ))}
-
-
-
- {/* Schnellaktionen Kachel */}
-
-
{t('quick_actions')}
-
-
-
- ➕
- {t('create_klausur')}
-
-
-
setShowUploadModal(true)}
- className={`w-full flex items-center gap-4 p-4 rounded-2xl transition-all ${
- isDark
- ? 'bg-white/10 text-white hover:bg-white/20'
- : 'bg-slate-100 text-slate-800 hover:bg-slate-200'
- }`}>
- 📤
- {t('upload_work')}
-
-
-
setSelectedTab('dokumente')}
- className={`w-full flex items-center justify-between p-4 rounded-2xl transition-all ${
- isDark
- ? 'bg-white/10 text-white hover:bg-white/20'
- : 'bg-slate-100 text-slate-800 hover:bg-slate-200'
- }`}>
-
- 📁
- {t('nav_dokumente')}
-
- {documents.length > 0 && (
-
- {documents.length}
-
- )}
-
-
-
router.push('/worksheet-editor')}
- className={`w-full flex items-center gap-4 p-4 rounded-2xl transition-all ${
- isDark
- ? 'bg-gradient-to-r from-purple-500/20 to-pink-500/20 text-white hover:from-purple-500/30 hover:to-pink-500/30 border border-purple-500/30'
- : 'bg-gradient-to-r from-purple-50 to-pink-50 text-slate-800 hover:from-purple-100 hover:to-pink-100 border border-purple-200'
- }`}>
- 🎨
- {t('nav_worksheet_editor')}
-
-
-
- ✨
- {t('magic_help')}
-
-
-
- 📊
- {t('fairness_check')}
-
-
-
- {/* AI Insight mini */}
-
-
- 🤖
- {t('ai_tip')}
-
-
- {t('ai_tip_text')}
-
-
-
- {/* Alerts Kachel */}
-
-
-
- 🔔 Aktuelle Alerts
-
- {unreadCount > 0 && (
-
- {unreadCount} neu
-
- )}
-
-
- {/* Headlines Liste */}
-
- {alerts.slice(0, 3).map(alert => (
-
{
- markAsRead(alert.id)
- router.push('/alerts')
- }}
- className={`w-full text-left p-2 rounded-lg transition-all text-sm ${
- isDark
- ? `hover:bg-white/10 ${!alert.isRead ? 'bg-white/5' : ''}`
- : `hover:bg-white ${!alert.isRead ? 'bg-white/50' : ''}`
- }`}
- >
-
- {!alert.isRead && (
-
- )}
-
- {alert.title}
-
-
-
- ))}
- {alerts.length === 0 && (
-
- Keine Alerts vorhanden
-
- )}
-
-
- {/* Mehr anzeigen */}
-
router.push('/alerts')}
- className={`w-full mt-3 text-sm font-medium ${
- isDark ? 'text-amber-400 hover:text-amber-300' : 'text-amber-600 hover:text-amber-700'
- }`}
- >
- Alle Alerts anzeigen →
-
-
-
- {/* Nachrichten Kachel */}
-
-
-
- 💬 {t('nav_messages')}
-
- {messagesUnreadCount > 0 && (
-
- {messagesUnreadCount} neu
-
- )}
-
-
- {/* Conversations Liste */}
-
- {conversations.slice(0, 3).map(conv => {
- const contact = contacts.find(c => conv.participant_ids.includes(c.id))
- return (
-
{
- if (conv.unread_count > 0) {
- markMessageAsRead(conv.id)
- }
- router.push('/messages')
- }}
- className={`w-full text-left p-2 rounded-lg transition-all text-sm ${
- isDark
- ? `hover:bg-white/10 ${conv.unread_count > 0 ? 'bg-white/5' : ''}`
- : `hover:bg-white ${conv.unread_count > 0 ? 'bg-white/50' : ''}`
- }`}
- >
-
- {/* Avatar */}
-
- {conv.title ? getContactInitials(conv.title) : '?'}
-
-
-
- {conv.unread_count > 0 && (
-
- )}
-
- {conv.title || 'Unbenannt'}
-
-
- {conv.last_message && (
-
- {conv.last_message}
-
- )}
-
- {conv.last_message_time && (
-
- {formatMessageTime(conv.last_message_time)}
-
- )}
-
-
- )
- })}
- {conversations.length === 0 && (
-
- Keine Nachrichten vorhanden
-
- )}
-
-
- {/* Mehr anzeigen */}
-
router.push('/messages')}
- className={`w-full mt-3 text-sm font-medium ${
- isDark ? 'text-green-400 hover:text-green-300' : 'text-green-600 hover:text-green-700'
- }`}
- >
- Alle Nachrichten anzeigen →
-
-
-
-
- )}
-
-
-
- {/* Upload Modal */}
- {showUploadModal && (
-
-
setShowUploadModal(false)} />
-
-
-
- Dokumente hochladen
-
-
setShowUploadModal(false)}
- className={`p-2 rounded-lg ${isDark ? 'hover:bg-white/10' : 'hover:bg-slate-100'}`}
- >
-
-
-
-
-
-
{
- handleUploadComplete(docs)
- }}
- />
- {/* Aktions-Buttons */}
-
-
- {documents.length > 0 ? `${documents.length} Dokument${documents.length !== 1 ? 'e' : ''} gespeichert` : 'Noch keine Dokumente'}
-
-
- setShowUploadModal(false)}
- className={`px-4 py-2 rounded-xl text-sm font-medium ${
- isDark ? 'text-white/60 hover:text-white' : 'text-slate-500 hover:text-slate-700'
- }`}
- >
- Schliessen
-
- {
- setShowUploadModal(false)
- setSelectedTab('dokumente')
- }}
- className="px-4 py-2 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-xl text-sm font-medium hover:shadow-lg transition-all"
- >
- Zu meinen Dokumenten
-
-
-
-
-
- )}
-
- {/* QR Code Modal */}
- {showQRModal && (
-
-
setShowQRModal(false)} />
-
- setShowQRModal(false)}
- />
-
-
- )}
-
- {/* Diegetic Chat Overlay - Cinematic message notifications */}
-
-
- {/* Footer */}
-
-
- {/* Blob Animation Styles */}
-
-
- )
-}
diff --git a/studio-v2/app/page.tsx b/studio-v2/app/page.tsx
index 1299284..3b9886f 100644
--- a/studio-v2/app/page.tsx
+++ b/studio-v2/app/page.tsx
@@ -1,23 +1,18 @@
'use client'
import { useState, useEffect, useCallback } from 'react'
-import { useRouter } from 'next/navigation'
import { BPIcon } from '@/components/Logo'
-import { useLanguage } from '@/lib/LanguageContext'
import { useTheme } from '@/lib/ThemeContext'
-import { useAlerts, getImportanceColor, getRelativeTime } from '@/lib/AlertsContext'
-import { useMessages, formatMessageTime, getContactInitials } from '@/lib/MessagesContext'
-import { useActivity, formatDurationCompact } from '@/lib/ActivityContext'
-import { LanguageDropdown } from '@/components/LanguageDropdown'
-import { ThemeToggle } from '@/components/ThemeToggle'
-import { Footer } from '@/components/Footer'
import { Sidebar } from '@/components/Sidebar'
import { OnboardingWizard, OnboardingData } from '@/components/OnboardingWizard'
-import { DocumentUpload } from '@/components/DocumentUpload'
-import { QRCodeUpload } from '@/components/QRCodeUpload'
-import { DocumentSpace } from '@/components/DocumentSpace'
import { ChatOverlay } from '@/components/ChatOverlay'
import { AiPrompt } from '@/components/AiPrompt'
+import { Footer } from '@/components/Footer'
+import { BackgroundBlobs } from './_components/BackgroundBlobs'
+import { HeaderBar } from './_components/HeaderBar'
+import { DashboardContent } from './_components/DashboardContent'
+import { DocumentsTab } from './_components/DocumentsTab'
+import { UploadModal, QRModal } from './_components/UploadModals'
// LocalStorage Keys
const ONBOARDING_KEY = 'bp_onboarding_complete'
@@ -26,8 +21,6 @@ const DOCUMENTS_KEY = 'bp_documents'
const FIRST_VISIT_KEY = 'bp_first_dashboard_visit'
const SESSION_ID_KEY = 'bp_session_id'
-// BreakPilot Studio v2 - Glassmorphism Design
-
interface StoredDocument {
id: string
name: string
@@ -38,7 +31,6 @@ interface StoredDocument {
}
export default function HomePage() {
- const router = useRouter()
const [selectedTab, setSelectedTab] = useState('dashboard')
const [showOnboarding, setShowOnboarding] = useState
(null)
const [userData, setUserData] = useState(null)
@@ -48,11 +40,7 @@ export default function HomePage() {
const [isFirstVisit, setIsFirstVisit] = useState(false)
const [sessionId, setSessionId] = useState('')
const [showAlertsDropdown, setShowAlertsDropdown] = useState(false)
- const { t } = useLanguage()
const { isDark } = useTheme()
- const { alerts, unreadCount, markAsRead } = useAlerts()
- const { conversations, unreadCount: messagesUnreadCount, contacts, markAsRead: markMessageAsRead } = useMessages()
- const { stats: activityStats } = useActivity()
// Funktion zum Laden von Uploads aus der API
const fetchUploadsFromAPI = useCallback(async (sid: string) => {
@@ -62,23 +50,14 @@ export default function HomePage() {
if (response.ok) {
const data = await response.json()
if (data.uploads && data.uploads.length > 0) {
- // Konvertiere API-Uploads zu StoredDocument Format
const apiDocs: StoredDocument[] = data.uploads.map((u: any) => ({
- id: u.id,
- name: u.name,
- type: u.type,
- size: u.size,
- uploadedAt: new Date(u.uploadedAt),
- url: u.dataUrl // Data URL direkt verwenden
+ id: u.id, name: u.name, type: u.type, size: u.size,
+ uploadedAt: new Date(u.uploadedAt), url: u.dataUrl
}))
- // Merge mit existierenden Dokumenten (ohne Duplikate)
setDocuments(prev => {
const existingIds = new Set(prev.map(d => d.id))
const newDocs = apiDocs.filter(d => !existingIds.has(d.id))
- if (newDocs.length > 0) {
- return [...prev, ...newDocs]
- }
- return prev
+ return newDocs.length > 0 ? [...prev, ...newDocs] : prev
})
}
}
@@ -95,7 +74,6 @@ export default function HomePage() {
const firstVisit = localStorage.getItem(FIRST_VISIT_KEY)
let storedSessionId = localStorage.getItem(SESSION_ID_KEY)
- // Session ID generieren falls nicht vorhanden
if (!storedSessionId) {
storedSessionId = `session-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
localStorage.setItem(SESSION_ID_KEY, storedSessionId)
@@ -105,33 +83,21 @@ export default function HomePage() {
if (onboardingComplete === 'true' && storedUserData) {
setUserData(JSON.parse(storedUserData))
setShowOnboarding(false)
-
- // Dokumente laden
- if (storedDocs) {
- setDocuments(JSON.parse(storedDocs))
- }
-
- // Erster Dashboard-Besuch nach Onboarding?
+ if (storedDocs) setDocuments(JSON.parse(storedDocs))
if (!firstVisit) {
setIsFirstVisit(true)
localStorage.setItem(FIRST_VISIT_KEY, 'true')
}
-
- // Initialer Fetch von der API
fetchUploadsFromAPI(storedSessionId)
} else {
setShowOnboarding(true)
}
}, [fetchUploadsFromAPI])
- // Polling fuer neue Uploads von der API (alle 3 Sekunden)
+ // Polling fuer neue Uploads
useEffect(() => {
if (!sessionId || showOnboarding) return
-
- const interval = setInterval(() => {
- fetchUploadsFromAPI(sessionId)
- }, 3000)
-
+ const interval = setInterval(() => fetchUploadsFromAPI(sessionId), 3000)
return () => clearInterval(interval)
}, [sessionId, showOnboarding, fetchUploadsFromAPI])
@@ -142,24 +108,17 @@ export default function HomePage() {
}
}, [documents])
- // Handler fuer neue Uploads
const handleUploadComplete = (uploadedDocs: any[]) => {
const newDocs: StoredDocument[] = uploadedDocs.map(d => ({
- id: d.id,
- name: d.name,
- type: d.type,
- size: d.size,
- uploadedAt: d.uploadedAt,
- url: d.url
+ id: d.id, name: d.name, type: d.type, size: d.size,
+ uploadedAt: d.uploadedAt, url: d.url
}))
setDocuments(prev => [...prev, ...newDocs])
setIsFirstVisit(false)
}
- // Dokument loeschen (aus State und API)
const handleDeleteDocument = async (id: string) => {
setDocuments(prev => prev.filter(d => d.id !== id))
- // Auch aus API loeschen
try {
await fetch(`/api/uploads?id=${encodeURIComponent(id)}`, { method: 'DELETE' })
} catch (error) {
@@ -167,12 +126,10 @@ export default function HomePage() {
}
}
- // Dokument umbenennen
const handleRenameDocument = (id: string, newName: string) => {
setDocuments(prev => prev.map(d => d.id === id ? { ...d, name: newName } : d))
}
- // Onboarding abschließen
const handleOnboardingComplete = (data: OnboardingData) => {
localStorage.setItem(ONBOARDING_KEY, 'true')
localStorage.setItem(USER_DATA_KEY, JSON.stringify(data))
@@ -180,7 +137,7 @@ export default function HomePage() {
setShowOnboarding(false)
}
- // Zeige Ladebildschirm während der Prüfung
+ // Loading screen
if (showOnboarding === null) {
return (
-
- Laden...
-
+ Laden...
)
}
- // Zeige Onboarding falls noch nicht abgeschlossen
if (showOnboarding) {
return
}
- // Ab hier: Dashboard (bestehender Code)
-
- // Calculate time saved from activity tracking
- const timeSaved = formatDurationCompact(activityStats.weekSavedSeconds)
- const timeSavedDisplay = activityStats.weekSavedSeconds > 0
- ? `${timeSaved.value}${timeSaved.unit}`
- : '0min'
-
- const stats = [
- { labelKey: 'stat_open_corrections', value: '12', icon: '📋', color: 'from-blue-400 to-blue-600' },
- { labelKey: 'stat_completed_week', value: String(activityStats.activityCount), icon: '✅', color: 'from-green-400 to-green-600' },
- { labelKey: 'stat_average', value: '2.3', icon: '📈', color: 'from-purple-400 to-purple-600' },
- { labelKey: 'stat_time_saved', value: timeSavedDisplay, icon: '⏱', color: 'from-orange-400 to-orange-600' },
- ]
-
- const recentKlausuren = [
- { id: 1, title: 'Deutsch LK - Textanalyse', students: 24, completed: 18, statusKey: 'status_in_progress' },
- { id: 2, title: 'Deutsch GK - Erörterung', students: 28, completed: 28, statusKey: 'status_completed' },
- { id: 3, title: 'Vorabitur - Gedichtanalyse', students: 22, completed: 10, statusKey: 'status_in_progress' },
- ]
-
return (
- {/* Animated Background Blobs */}
-
+
- {/* Sidebar */}
- {/* ============================================
- ARBEITSFLAECHE (Main Content)
- ============================================ */}
- {/* Kopfleiste (Header) */}
-
-
-
{t('dashboard')}
-
{t('dashboard_subtitle')}
-
+
- {/* Search, Language & Actions */}
-
-
-
- {/* Language Dropdown */}
-
-
- {/* Theme Toggle */}
-
-
- {/* Notifications Bell with Glow Effect */}
-
-
setShowAlertsDropdown(!showAlertsDropdown)}
- className={`relative p-3 backdrop-blur-xl border rounded-2xl transition-all ${
- unreadCount > 0
- ? 'animate-pulse bg-gradient-to-r from-amber-500/20 to-orange-500/20 border-amber-500/30 shadow-lg shadow-amber-500/30'
- : isDark
- ? 'bg-white/10 border-white/20 hover:bg-white/20'
- : 'bg-black/5 border-black/10 hover:bg-black/10'
- } ${isDark ? 'text-white' : 'text-slate-700'}`}
- >
-
-
-
- {unreadCount > 0 && (
-
- {unreadCount > 9 ? '9+' : unreadCount}
-
- )}
-
-
- {/* Alerts Dropdown */}
- {showAlertsDropdown && (
- <>
-
setShowAlertsDropdown(false)} />
-
-
-
-
- Aktuelle Alerts
-
- {unreadCount > 0 && (
-
- {unreadCount} neu
-
- )}
-
-
-
- {alerts.slice(0, 5).map(alert => (
-
{
- markAsRead(alert.id)
- setShowAlertsDropdown(false)
- router.push('/alerts')
- }}
- className={`w-full text-left p-4 transition-all ${
- isDark
- ? `hover:bg-white/5 ${!alert.isRead ? 'bg-amber-500/5 border-l-2 border-amber-500' : ''}`
- : `hover:bg-slate-50 ${!alert.isRead ? 'bg-amber-50 border-l-2 border-amber-500' : ''}`
- }`}
- >
-
-
- {alert.importance.slice(0, 4)}
-
-
-
- {alert.title}
-
-
- {getRelativeTime(alert.timestamp)}
-
-
-
-
- ))}
- {alerts.length === 0 && (
-
- )}
-
-
- {
- setShowAlertsDropdown(false)
- router.push('/alerts')
- }}
- className={`w-full py-2 text-sm font-medium rounded-lg transition-all ${
- isDark
- ? 'text-amber-400 hover:bg-amber-500/10'
- : 'text-amber-600 hover:bg-amber-50'
- }`}
- >
- Alle Alerts anzeigen →
-
-
-
- >
- )}
-
-
-
-
- {/* Willkommensnachricht fuer ersten Besuch */}
+ {/* Welcome message for first visit */}
{isFirstVisit && documents.length === 0 && (
Willkommen bei BreakPilot Studio!
@@ -406,28 +192,20 @@ export default function HomePage() {
Computer oder Mobiltelefon hochladen.
- setShowUploadModal(true)}
- className="px-6 py-3 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-xl font-medium hover:shadow-lg hover:shadow-purple-500/30 transition-all hover:scale-105"
- >
+ setShowUploadModal(true)}
+ className="px-6 py-3 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-xl font-medium hover:shadow-lg hover:shadow-purple-500/30 transition-all hover:scale-105">
Dokument hochladen
- setShowQRModal(true)}
+ setShowQRModal(true)}
className={`px-6 py-3 rounded-xl font-medium transition-all ${
- isDark
- ? 'bg-white/20 text-white hover:bg-white/30'
- : 'bg-white text-slate-700 hover:bg-slate-50 shadow'
- }`}
- >
+ isDark ? 'bg-white/20 text-white hover:bg-white/30' : 'bg-white text-slate-700 hover:bg-slate-50 shadow'
+ }`}>
Mit Mobiltelefon hochladen
-
setIsFirstVisit(false)}
- className={`p-2 rounded-lg ${isDark ? 'hover:bg-white/10' : 'hover:bg-white'}`}
- >
+ setIsFirstVisit(false)}
+ className={`p-2 rounded-lg ${isDark ? 'hover:bg-white/10' : 'hover:bg-white'}`}>
@@ -436,511 +214,38 @@ export default function HomePage() {
)}
- {/* KI-Assistent */}
- {/* Stats Kacheln */}
-
- {stats.map((stat, index) => (
-
-
-
- {stat.icon}
-
-
-
-
-
-
{t(stat.labelKey)}
-
{stat.value}
-
- ))}
-
-
- {/* Tab-Content */}
{selectedTab === 'dokumente' ? (
- /* Dokumente-Tab */
-
- {/* Upload-Optionen */}
-
-
setShowUploadModal(true)}
- className={`p-6 rounded-3xl border backdrop-blur-xl text-left transition-all hover:scale-105 ${
- isDark
- ? 'bg-white/10 border-white/20 hover:bg-white/15'
- : 'bg-white/70 border-black/10 hover:bg-white/90 shadow-lg'
- }`}
- >
-
- 📤
-
-
- Direkt hochladen
-
-
- Ziehen Sie Dateien hierher oder klicken Sie zum Auswaehlen
-
-
-
-
setShowQRModal(true)}
- className={`p-6 rounded-3xl border backdrop-blur-xl text-left transition-all hover:scale-105 ${
- isDark
- ? 'bg-white/10 border-white/20 hover:bg-white/15'
- : 'bg-white/70 border-black/10 hover:bg-white/90 shadow-lg'
- }`}
- >
-
- 📱
-
-
- Mit Mobiltelefon hochladen
-
-
- QR-Code scannen (nur im lokalen Netzwerk)
-
-
-
-
- {/* Document Space */}
-
-
- Meine Dokumente
-
- doc.url && window.open(doc.url, '_blank')}
- />
-
-
+
) : (
- /* Dashboard-Tab (Standard) */
-
- {/* Aktuelle Klausuren Kachel */}
-
-
-
{t('recent_klausuren')}
-
- {t('show_all')} →
-
-
-
-
- {recentKlausuren.map((klausur) => (
-
-
- 📝
-
-
-
{klausur.title}
-
{klausur.students} {t('students')}
-
-
-
- {t(klausur.statusKey)}
-
-
-
-
{klausur.completed}/{klausur.students}
-
-
-
-
-
-
- ))}
-
-
-
- {/* Schnellaktionen Kachel */}
-
-
{t('quick_actions')}
-
-
-
- ➕
- {t('create_klausur')}
-
-
-
setShowUploadModal(true)}
- className={`w-full flex items-center gap-4 p-4 rounded-2xl transition-all ${
- isDark
- ? 'bg-white/10 text-white hover:bg-white/20'
- : 'bg-slate-100 text-slate-800 hover:bg-slate-200'
- }`}>
- 📤
- {t('upload_work')}
-
-
-
setSelectedTab('dokumente')}
- className={`w-full flex items-center justify-between p-4 rounded-2xl transition-all ${
- isDark
- ? 'bg-white/10 text-white hover:bg-white/20'
- : 'bg-slate-100 text-slate-800 hover:bg-slate-200'
- }`}>
-
- 📁
- {t('nav_dokumente')}
-
- {documents.length > 0 && (
-
- {documents.length}
-
- )}
-
-
-
router.push('/worksheet-editor')}
- className={`w-full flex items-center gap-4 p-4 rounded-2xl transition-all ${
- isDark
- ? 'bg-gradient-to-r from-purple-500/20 to-pink-500/20 text-white hover:from-purple-500/30 hover:to-pink-500/30 border border-purple-500/30'
- : 'bg-gradient-to-r from-purple-50 to-pink-50 text-slate-800 hover:from-purple-100 hover:to-pink-100 border border-purple-200'
- }`}>
- 🎨
- {t('nav_worksheet_editor')}
-
-
-
- ✨
- {t('magic_help')}
-
-
-
- 📊
- {t('fairness_check')}
-
-
-
- {/* AI Insight mini */}
-
-
- 🤖
- {t('ai_tip')}
-
-
- {t('ai_tip_text')}
-
-
-
- {/* Alerts Kachel */}
-
-
-
- 🔔 Aktuelle Alerts
-
- {unreadCount > 0 && (
-
- {unreadCount} neu
-
- )}
-
-
- {/* Headlines Liste */}
-
- {alerts.slice(0, 3).map(alert => (
-
{
- markAsRead(alert.id)
- router.push('/alerts')
- }}
- className={`w-full text-left p-2 rounded-lg transition-all text-sm ${
- isDark
- ? `hover:bg-white/10 ${!alert.isRead ? 'bg-white/5' : ''}`
- : `hover:bg-white ${!alert.isRead ? 'bg-white/50' : ''}`
- }`}
- >
-
- {!alert.isRead && (
-
- )}
-
- {alert.title}
-
-
-
- ))}
- {alerts.length === 0 && (
-
- Keine Alerts vorhanden
-
- )}
-
-
- {/* Mehr anzeigen */}
-
router.push('/alerts')}
- className={`w-full mt-3 text-sm font-medium ${
- isDark ? 'text-amber-400 hover:text-amber-300' : 'text-amber-600 hover:text-amber-700'
- }`}
- >
- Alle Alerts anzeigen →
-
-
-
- {/* Nachrichten Kachel */}
-
-
-
- 💬 {t('nav_messages')}
-
- {messagesUnreadCount > 0 && (
-
- {messagesUnreadCount} neu
-
- )}
-
-
- {/* Conversations Liste */}
-
- {conversations.slice(0, 3).map(conv => {
- const contact = contacts.find(c => conv.participant_ids.includes(c.id))
- return (
-
{
- if (conv.unread_count > 0) {
- markMessageAsRead(conv.id)
- }
- router.push('/messages')
- }}
- className={`w-full text-left p-2 rounded-lg transition-all text-sm ${
- isDark
- ? `hover:bg-white/10 ${conv.unread_count > 0 ? 'bg-white/5' : ''}`
- : `hover:bg-white ${conv.unread_count > 0 ? 'bg-white/50' : ''}`
- }`}
- >
-
- {/* Avatar */}
-
- {conv.title ? getContactInitials(conv.title) : '?'}
-
-
-
- {conv.unread_count > 0 && (
-
- )}
-
- {conv.title || 'Unbenannt'}
-
-
- {conv.last_message && (
-
- {conv.last_message}
-
- )}
-
- {conv.last_message_time && (
-
- {formatMessageTime(conv.last_message_time)}
-
- )}
-
-
- )
- })}
- {conversations.length === 0 && (
-
- Keine Nachrichten vorhanden
-
- )}
-
-
- {/* Mehr anzeigen */}
-
router.push('/messages')}
- className={`w-full mt-3 text-sm font-medium ${
- isDark ? 'text-green-400 hover:text-green-300' : 'text-green-600 hover:text-green-700'
- }`}
- >
- Alle Nachrichten anzeigen →
-
-
-
-
+
)}
- {/* Upload Modal */}
{showUploadModal && (
-
-
setShowUploadModal(false)} />
-
-
-
- Dokumente hochladen
-
-
setShowUploadModal(false)}
- className={`p-2 rounded-lg ${isDark ? 'hover:bg-white/10' : 'hover:bg-slate-100'}`}
- >
-
-
-
-
-
-
{
- handleUploadComplete(docs)
- }}
- />
- {/* Aktions-Buttons */}
-
-
- {documents.length > 0 ? `${documents.length} Dokument${documents.length !== 1 ? 'e' : ''} gespeichert` : 'Noch keine Dokumente'}
-
-
- setShowUploadModal(false)}
- className={`px-4 py-2 rounded-xl text-sm font-medium ${
- isDark ? 'text-white/60 hover:text-white' : 'text-slate-500 hover:text-slate-700'
- }`}
- >
- Schliessen
-
- {
- setShowUploadModal(false)
- setSelectedTab('dokumente')
- }}
- className="px-4 py-2 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-xl text-sm font-medium hover:shadow-lg transition-all"
- >
- Zu meinen Dokumenten
-
-
-
-
-
+
setShowUploadModal(false)}
+ onGoToDocuments={() => { setShowUploadModal(false); setSelectedTab('dokumente') }}
+ />
)}
+ {showQRModal && setShowQRModal(false)} />}
- {/* QR Code Modal */}
- {showQRModal && (
-
-
setShowQRModal(false)} />
-
- setShowQRModal(false)}
- />
-
-
- )}
-
- {/* Diegetic Chat Overlay - Cinematic message notifications */}
-
-
- {/* Footer */}
+
-
- {/* Blob Animation Styles */}
-
)
}
diff --git a/studio-v2/app/vocab-worksheet/usePageProcessing.ts b/studio-v2/app/vocab-worksheet/usePageProcessing.ts
new file mode 100644
index 0000000..20159f2
--- /dev/null
+++ b/studio-v2/app/vocab-worksheet/usePageProcessing.ts
@@ -0,0 +1,97 @@
+import type { VocabularyEntry, OcrPrompts, IpaMode, SyllableMode } from './types'
+import { getApiBase } from './constants'
+
+/**
+ * Process a single page and return vocabulary + optional scan quality info.
+ */
+export async function processSinglePage(
+ sessionId: string,
+ pageIndex: number,
+ ipa: IpaMode,
+ syllable: SyllableMode,
+ ocrPrompts: OcrPrompts,
+ ocrEnhance: boolean,
+ ocrMaxCols: number,
+ ocrMinConf: number,
+): Promise<{ success: boolean; vocabulary: VocabularyEntry[]; error?: string; scanQuality?: any }> {
+ const API_BASE = getApiBase()
+
+ try {
+ const params = new URLSearchParams({
+ ipa_mode: ipa,
+ syllable_mode: syllable,
+ enhance: String(ocrEnhance),
+ max_cols: String(ocrMaxCols),
+ min_conf: String(ocrMinConf),
+ })
+ const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}/process-single-page/${pageIndex}?${params}`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ ocr_prompts: ocrPrompts }),
+ })
+
+ if (!res.ok) {
+ const errBody = await res.json().catch(() => ({}))
+ const detail = errBody.detail || `HTTP ${res.status}`
+ return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${detail}` }
+ }
+
+ const data = await res.json()
+ if (!data.success) {
+ return { success: false, vocabulary: [], error: data.error || `Seite ${pageIndex + 1}: Unbekannter Fehler` }
+ }
+
+ return { success: true, vocabulary: data.vocabulary || [], scanQuality: data.scan_quality }
+ } catch (e) {
+ return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${e instanceof Error ? e.message : 'Netzwerkfehler'}` }
+ }
+}
+
+/**
+ * Reprocess pages with updated IPA/syllable settings.
+ * Returns the new vocabulary array.
+ */
+export async function reprocessPagesFlow(
+ sessionId: string,
+ pagesToReprocess: number[],
+ ipa: IpaMode,
+ syllable: SyllableMode,
+ ocrPrompts: OcrPrompts,
+ ocrEnhance: boolean,
+ ocrMaxCols: number,
+ ocrMinConf: number,
+ setExtractionStatus: (s: string) => void,
+): Promise<{ vocabulary: VocabularyEntry[]; qualityInfo: string }> {
+ const API_BASE = getApiBase()
+ const allVocab: VocabularyEntry[] = []
+ let lastQuality: any = null
+
+ for (const pageIndex of pagesToReprocess) {
+ setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`)
+ try {
+ const params = new URLSearchParams({
+ ipa_mode: ipa,
+ syllable_mode: syllable,
+ enhance: String(ocrEnhance),
+ max_cols: String(ocrMaxCols),
+ min_conf: String(ocrMinConf),
+ })
+ const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}/process-single-page/${pageIndex}?${params}`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ ocr_prompts: ocrPrompts }),
+ })
+ if (res.ok) {
+ const data = await res.json()
+ if (data.vocabulary) allVocab.push(...data.vocabulary)
+ if (data.scan_quality) lastQuality = data.scan_quality
+ }
+ } catch { /* ignore individual page failures */ }
+ }
+
+ const qualityInfo = lastQuality
+ ? ` | Qualitaet: ${lastQuality.quality_pct}%${lastQuality.is_degraded ? ' (degradiert!)' : ''} | Blur: ${lastQuality.blur_score} | Kontrast: ${lastQuality.contrast_score}`
+ : ''
+
+ return { vocabulary: allVocab, qualityInfo }
+}
diff --git a/studio-v2/app/vocab-worksheet/useSessionHandlers.ts b/studio-v2/app/vocab-worksheet/useSessionHandlers.ts
new file mode 100644
index 0000000..6aa5fa2
--- /dev/null
+++ b/studio-v2/app/vocab-worksheet/useSessionHandlers.ts
@@ -0,0 +1,156 @@
+import type {
+ VocabularyEntry, Session, StoredDocument, OcrPrompts, IpaMode, SyllableMode,
+} from './types'
+import { getApiBase } from './constants'
+
+/**
+ * Start a new session: create on server, upload document, process first page or PDF.
+ */
+export async function startSessionFlow(params: {
+ sessionName: string
+ selectedDocumentId: string | null
+ directFile: File | null
+ selectedMobileFile: { dataUrl: string; type: string; name: string } | null
+ storedDocuments: StoredDocument[]
+ ocrPrompts: OcrPrompts
+ startActivity: (type: string, meta: any) => void
+ setSession: (s: Session | null | ((prev: Session | null) => Session | null)) => void
+ setWorksheetTitle: (t: string) => void
+ setExtractionStatus: (s: string) => void
+ setPdfPageCount: (n: number) => void
+ setSelectedPages: (p: number[]) => void
+ setPagesThumbnails: (t: string[]) => void
+ setIsLoadingThumbnails: (l: boolean) => void
+ setVocabulary: (v: VocabularyEntry[]) => void
+ setActiveTab: (t: 'upload' | 'pages' | 'vocabulary' | 'spreadsheet' | 'worksheet' | 'export' | 'settings') => void
+ setError: (e: string | null) => void
+}): Promise {
+ const {
+ sessionName, selectedDocumentId, directFile, selectedMobileFile, storedDocuments,
+ ocrPrompts, startActivity, setSession, setWorksheetTitle, setExtractionStatus,
+ setPdfPageCount, setSelectedPages, setPagesThumbnails, setIsLoadingThumbnails,
+ setVocabulary, setActiveTab, setError,
+ } = params
+
+ setError(null)
+ setExtractionStatus('Session wird erstellt...')
+
+ const API_BASE = getApiBase()
+
+ const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ name: sessionName, ocr_prompts: ocrPrompts }),
+ })
+
+ if (!sessionRes.ok) throw new Error('Session konnte nicht erstellt werden')
+
+ const sessionData = await sessionRes.json()
+ setSession(sessionData)
+ setWorksheetTitle(sessionName)
+ startActivity('vocab_extraction', { description: sessionName })
+
+ let file: File
+ let isPdf = false
+
+ if (directFile) {
+ file = directFile
+ isPdf = directFile.type === 'application/pdf'
+ } else if (selectedMobileFile) {
+ isPdf = selectedMobileFile.type === 'application/pdf'
+ const base64Data = selectedMobileFile.dataUrl.split(',')[1]
+ const byteCharacters = atob(base64Data)
+ const byteNumbers = new Array(byteCharacters.length)
+ for (let i = 0; i < byteCharacters.length; i++) byteNumbers[i] = byteCharacters.charCodeAt(i)
+ const blob = new Blob([new Uint8Array(byteNumbers)], { type: selectedMobileFile.type })
+ file = new File([blob], selectedMobileFile.name, { type: selectedMobileFile.type })
+ } else {
+ const selectedDoc = storedDocuments.find(d => d.id === selectedDocumentId)
+ if (!selectedDoc || !selectedDoc.url) throw new Error('Das ausgewaehlte Dokument ist nicht verfuegbar.')
+ isPdf = selectedDoc.type === 'application/pdf'
+ const base64Data = selectedDoc.url.split(',')[1]
+ const byteCharacters = atob(base64Data)
+ const byteNumbers = new Array(byteCharacters.length)
+ for (let i = 0; i < byteCharacters.length; i++) byteNumbers[i] = byteCharacters.charCodeAt(i)
+ const blob = new Blob([new Uint8Array(byteNumbers)], { type: selectedDoc.type })
+ file = new File([blob], selectedDoc.name, { type: selectedDoc.type })
+ }
+
+ if (isPdf) {
+ setExtractionStatus('PDF wird hochgeladen...')
+ const formData = new FormData()
+ formData.append('file', file)
+ const pdfInfoRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload-pdf-info`, {
+ method: 'POST', body: formData,
+ })
+ if (!pdfInfoRes.ok) throw new Error('PDF konnte nicht verarbeitet werden')
+ const pdfInfo = await pdfInfoRes.json()
+ setPdfPageCount(pdfInfo.page_count)
+ setSelectedPages(Array.from({ length: pdfInfo.page_count }, (_, i) => i))
+ setActiveTab('pages')
+ setExtractionStatus(`${pdfInfo.page_count} Seiten erkannt. Vorschau wird geladen...`)
+ setIsLoadingThumbnails(true)
+ const thumbnails: string[] = []
+ for (let i = 0; i < pdfInfo.page_count; i++) {
+ try {
+ const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/pdf-thumbnail/${i}?hires=true`)
+ if (thumbRes.ok) { const blob = await thumbRes.blob(); thumbnails.push(URL.createObjectURL(blob)) }
+ } catch (e) { console.error(`Failed to load thumbnail for page ${i}`) }
+ }
+ setPagesThumbnails(thumbnails)
+ setIsLoadingThumbnails(false)
+ setExtractionStatus(`${pdfInfo.page_count} Seiten bereit. Waehlen Sie die zu verarbeitenden Seiten.`)
+ } else {
+ setExtractionStatus('KI analysiert das Bild... (kann 30-60 Sekunden dauern)')
+ const formData = new FormData()
+ formData.append('file', file)
+ const uploadRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload`, {
+ method: 'POST', body: formData,
+ })
+ if (!uploadRes.ok) throw new Error('Bild konnte nicht verarbeitet werden')
+ const uploadData = await uploadRes.json()
+ setSession(prev => prev ? { ...prev, status: 'extracted', vocabulary_count: uploadData.vocabulary_count } : null)
+ const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/vocabulary`)
+ if (vocabRes.ok) {
+ const vocabData = await vocabRes.json()
+ setVocabulary(vocabData.vocabulary || [])
+ setExtractionStatus(`${vocabData.vocabulary?.length || 0} Vokabeln gefunden!`)
+ }
+ await new Promise(r => setTimeout(r, 1000))
+ setActiveTab('vocabulary')
+ }
+
+ return sessionData
+}
+
+/**
+ * Resume an existing session from the API.
+ */
+export async function resumeSessionFlow(
+ existingSession: Session,
+ setSession: (s: Session) => void,
+ setWorksheetTitle: (t: string) => void,
+ setVocabulary: (v: VocabularyEntry[]) => void,
+ setActiveTab: (t: 'upload' | 'pages' | 'vocabulary' | 'spreadsheet' | 'worksheet' | 'export' | 'settings') => void,
+ setExtractionStatus: (s: string) => void,
+): Promise {
+ const API_BASE = getApiBase()
+ const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}`)
+ if (!sessionRes.ok) throw new Error('Session nicht gefunden')
+ const sessionData = await sessionRes.json()
+ setSession(sessionData)
+ setWorksheetTitle(sessionData.name)
+
+ if (sessionData.status === 'extracted' || sessionData.status === 'completed') {
+ const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}/vocabulary`)
+ if (vocabRes.ok) { const vd = await vocabRes.json(); setVocabulary(vd.vocabulary || []) }
+ setActiveTab('vocabulary')
+ setExtractionStatus('')
+ } else if (sessionData.status === 'pending') {
+ setActiveTab('upload')
+ setExtractionStatus('Diese Session hat noch keine Vokabeln. Bitte laden Sie ein Dokument hoch.')
+ } else {
+ setActiveTab('vocabulary')
+ setExtractionStatus('')
+ }
+}
diff --git a/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts b/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts
index a54f271..c305233 100644
--- a/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts
+++ b/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts
@@ -3,7 +3,6 @@
import { useState, useRef, useEffect } from 'react'
import { useTheme } from '@/lib/ThemeContext'
import { useLanguage } from '@/lib/LanguageContext'
-import { useRouter } from 'next/navigation'
import { useActivity } from '@/lib/ActivityContext'
import type { UploadedFile } from '@/components/QRCodeUpload'
@@ -16,11 +15,12 @@ import {
getApiBase, DOCUMENTS_KEY, OCR_PROMPTS_KEY, SESSION_ID_KEY,
defaultOcrPrompts, formatFileSize,
} from './constants'
+import { startSessionFlow, resumeSessionFlow } from './useSessionHandlers'
+import { processSinglePage, reprocessPagesFlow } from './usePageProcessing'
export function useVocabWorksheet(): VocabWorksheetHook {
const { isDark } = useTheme()
const { t } = useLanguage()
- const router = useRouter()
const { startActivity, completeActivity } = useActivity()
const [mounted, setMounted] = useState(false)
@@ -34,39 +34,39 @@ export function useVocabWorksheet(): VocabWorksheetHook {
const [error, setError] = useState(null)
const [extractionStatus, setExtractionStatus] = useState('')
- // Existing sessions list
+ // Existing sessions
const [existingSessions, setExistingSessions] = useState([])
const [isLoadingSessions, setIsLoadingSessions] = useState(true)
- // Documents from storage
+ // Documents
const [storedDocuments, setStoredDocuments] = useState([])
const [selectedDocumentId, setSelectedDocumentId] = useState(null)
- // Direct file upload
+ // Direct file
const [directFile, setDirectFile] = useState(null)
const [directFilePreview, setDirectFilePreview] = useState(null)
const [showFullPreview, setShowFullPreview] = useState(false)
const directFileInputRef = useRef(null)
- // PDF page selection state
+ // PDF pages
const [pdfPageCount, setPdfPageCount] = useState(0)
const [selectedPages, setSelectedPages] = useState([])
const [pagesThumbnails, setPagesThumbnails] = useState([])
const [isLoadingThumbnails, setIsLoadingThumbnails] = useState(false)
const [excludedPages, setExcludedPages] = useState([])
- // Dynamic extra columns per source page
+ // Extra columns
const [pageExtraColumns, setPageExtraColumns] = useState>({})
- // Upload state
+ // Upload
const [uploadedImage, setUploadedImage] = useState(null)
const [isExtracting, setIsExtracting] = useState(false)
const fileInputRef = useRef(null)
- // Vocabulary state
+ // Vocabulary
const [vocabulary, setVocabulary] = useState([])
- // Worksheet state
+ // Worksheet
const [selectedTypes, setSelectedTypes] = useState(['en_to_de'])
const [worksheetTitle, setWorksheetTitle] = useState('')
const [includeSolutions, setIncludeSolutions] = useState(true)
@@ -75,27 +75,25 @@ export function useVocabWorksheet(): VocabWorksheetHook {
const [ipaMode, setIpaMode] = useState('none')
const [syllableMode, setSyllableMode] = useState('none')
- // Export state
+ // Export
const [worksheetId, setWorksheetId] = useState(null)
const [isGenerating, setIsGenerating] = useState(false)
- // Processing results
+ // Processing
const [processingErrors, setProcessingErrors] = useState([])
const [successfulPages, setSuccessfulPages] = useState([])
const [failedPages, setFailedPages] = useState([])
const [currentlyProcessingPage, setCurrentlyProcessingPage] = useState(null)
const [processingQueue, setProcessingQueue] = useState([])
- // OCR Prompts/Settings
+ // OCR Settings
const [ocrPrompts, setOcrPrompts] = useState(defaultOcrPrompts)
const [showSettings, setShowSettings] = useState(false)
+ const [ocrEnhance, setOcrEnhance] = useState(true)
+ const [ocrMaxCols, setOcrMaxCols] = useState(3)
+ const [ocrMinConf, setOcrMinConf] = useState(0)
- // OCR Quality Steps (toggle individually for A/B testing)
- const [ocrEnhance, setOcrEnhance] = useState(true) // Step 3: CLAHE + denoise
- const [ocrMaxCols, setOcrMaxCols] = useState(3) // Step 2: max columns (0=unlimited)
- const [ocrMinConf, setOcrMinConf] = useState(0) // Step 1: 0=auto from quality score
-
- // QR Code Upload
+ // QR
const [showQRModal, setShowQRModal] = useState(false)
const [uploadSessionId, setUploadSessionId] = useState('')
const [mobileUploadedFiles, setMobileUploadedFiles] = useState([])
@@ -109,772 +107,260 @@ export function useVocabWorksheet(): VocabWorksheetHook {
const [ocrCompareError, setOcrCompareError] = useState(null)
// --- Effects ---
-
- // SSR Safety
useEffect(() => {
setMounted(true)
- let storedSessionId = localStorage.getItem(SESSION_ID_KEY)
- if (!storedSessionId) {
- storedSessionId = `vocab-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
- localStorage.setItem(SESSION_ID_KEY, storedSessionId)
- }
- setUploadSessionId(storedSessionId)
+ let sid = localStorage.getItem(SESSION_ID_KEY)
+ if (!sid) { sid = `vocab-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; localStorage.setItem(SESSION_ID_KEY, sid) }
+ setUploadSessionId(sid)
}, [])
- // Load OCR prompts from localStorage
useEffect(() => {
if (!mounted) return
const stored = localStorage.getItem(OCR_PROMPTS_KEY)
- if (stored) {
- try {
- setOcrPrompts({ ...defaultOcrPrompts, ...JSON.parse(stored) })
- } catch (e) {
- console.error('Failed to parse OCR prompts:', e)
- }
- }
+ if (stored) { try { setOcrPrompts({ ...defaultOcrPrompts, ...JSON.parse(stored) }) } catch {} }
}, [mounted])
- // Load documents from localStorage
useEffect(() => {
if (!mounted) return
const stored = localStorage.getItem(DOCUMENTS_KEY)
if (stored) {
- try {
- const docs = JSON.parse(stored)
- const imagesDocs = docs.filter((d: StoredDocument) =>
- d.type?.startsWith('image/') || d.type === 'application/pdf'
- )
- setStoredDocuments(imagesDocs)
- } catch (e) {
- console.error('Failed to parse stored documents:', e)
- }
+ try { setStoredDocuments(JSON.parse(stored).filter((d: StoredDocument) => d.type?.startsWith('image/') || d.type === 'application/pdf')) } catch {}
}
}, [mounted])
- // Load existing sessions from API
useEffect(() => {
if (!mounted) return
- const loadSessions = async () => {
- const API_BASE = getApiBase()
+ ;(async () => {
try {
- const res = await fetch(`${API_BASE}/api/v1/vocab/sessions`)
- if (res.ok) {
- const sessions = await res.json()
- setExistingSessions(sessions)
- }
- } catch (e) {
- console.error('Failed to load sessions:', e)
- } finally {
- setIsLoadingSessions(false)
- }
- }
- loadSessions()
+ const res = await fetch(`${getApiBase()}/api/v1/vocab/sessions`)
+ if (res.ok) setExistingSessions(await res.json())
+ } catch {} finally { setIsLoadingSessions(false) }
+ })()
}, [mounted])
// --- Glassmorphism styles ---
-
- const glassCard = isDark
- ? 'backdrop-blur-xl bg-white/10 border border-white/20'
- : 'backdrop-blur-xl bg-white/70 border border-black/10'
-
- const glassInput = isDark
- ? 'bg-white/10 border-white/20 text-white placeholder-white/40 focus:border-purple-400'
- : 'bg-white/50 border-black/10 text-slate-900 placeholder-slate-400 focus:border-purple-500'
+ const glassCard = isDark ? 'backdrop-blur-xl bg-white/10 border border-white/20' : 'backdrop-blur-xl bg-white/70 border border-black/10'
+ const glassInput = isDark ? 'bg-white/10 border-white/20 text-white placeholder-white/40 focus:border-purple-400' : 'bg-white/50 border-black/10 text-slate-900 placeholder-slate-400 focus:border-purple-500'
// --- Handlers ---
-
- const saveOcrPrompts = (prompts: OcrPrompts) => {
- setOcrPrompts(prompts)
- localStorage.setItem(OCR_PROMPTS_KEY, JSON.stringify(prompts))
- }
+ const saveOcrPrompts = (prompts: OcrPrompts) => { setOcrPrompts(prompts); localStorage.setItem(OCR_PROMPTS_KEY, JSON.stringify(prompts)) }
const handleDirectFileSelect = (e: React.ChangeEvent) => {
- const file = e.target.files?.[0]
- if (!file) return
-
- setDirectFile(file)
- setSelectedDocumentId(null)
- setSelectedMobileFile(null)
-
+ const file = e.target.files?.[0]; if (!file) return
+ setDirectFile(file); setSelectedDocumentId(null); setSelectedMobileFile(null)
if (file.type.startsWith('image/')) {
- const reader = new FileReader()
- reader.onload = (ev) => {
- setDirectFilePreview(ev.target?.result as string)
- }
- reader.readAsDataURL(file)
- } else if (file.type === 'application/pdf') {
- setDirectFilePreview(URL.createObjectURL(file))
- } else {
- setDirectFilePreview(null)
- }
+ const reader = new FileReader(); reader.onload = (ev) => setDirectFilePreview(ev.target?.result as string); reader.readAsDataURL(file)
+ } else if (file.type === 'application/pdf') { setDirectFilePreview(URL.createObjectURL(file)) }
+ else { setDirectFilePreview(null) }
}
const startSession = async () => {
- if (!sessionName.trim()) {
- setError('Bitte geben Sie einen Namen fuer die Session ein.')
- return
- }
- if (!selectedDocumentId && !directFile && !selectedMobileFile) {
- setError('Bitte waehlen Sie ein Dokument aus oder laden Sie eine Datei hoch.')
- return
- }
-
- setError(null)
+ if (!sessionName.trim()) { setError('Bitte geben Sie einen Namen fuer die Session ein.'); return }
+ if (!selectedDocumentId && !directFile && !selectedMobileFile) { setError('Bitte waehlen Sie ein Dokument aus oder laden Sie eine Datei hoch.'); return }
setIsCreatingSession(true)
- setExtractionStatus('Session wird erstellt...')
-
- const API_BASE = getApiBase()
-
try {
- const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({
- name: sessionName,
- ocr_prompts: ocrPrompts
- }),
+ await startSessionFlow({
+ sessionName, selectedDocumentId, directFile, selectedMobileFile, storedDocuments,
+ ocrPrompts, startActivity, setSession, setWorksheetTitle, setExtractionStatus,
+ setPdfPageCount, setSelectedPages, setPagesThumbnails, setIsLoadingThumbnails,
+ setVocabulary, setActiveTab, setError,
})
-
- if (!sessionRes.ok) {
- throw new Error('Session konnte nicht erstellt werden')
- }
-
- const sessionData = await sessionRes.json()
- setSession(sessionData)
- setWorksheetTitle(sessionName)
-
- startActivity('vocab_extraction', { description: sessionName })
-
- let file: File
- let isPdf = false
-
- if (directFile) {
- file = directFile
- isPdf = directFile.type === 'application/pdf'
- } else if (selectedMobileFile) {
- isPdf = selectedMobileFile.type === 'application/pdf'
- const base64Data = selectedMobileFile.dataUrl.split(',')[1]
- const byteCharacters = atob(base64Data)
- const byteNumbers = new Array(byteCharacters.length)
- for (let i = 0; i < byteCharacters.length; i++) {
- byteNumbers[i] = byteCharacters.charCodeAt(i)
- }
- const byteArray = new Uint8Array(byteNumbers)
- const blob = new Blob([byteArray], { type: selectedMobileFile.type })
- file = new File([blob], selectedMobileFile.name, { type: selectedMobileFile.type })
- } else {
- const selectedDoc = storedDocuments.find(d => d.id === selectedDocumentId)
- if (!selectedDoc || !selectedDoc.url) {
- throw new Error('Das ausgewaehlte Dokument ist nicht verfuegbar.')
- }
-
- isPdf = selectedDoc.type === 'application/pdf'
-
- const base64Data = selectedDoc.url.split(',')[1]
- const byteCharacters = atob(base64Data)
- const byteNumbers = new Array(byteCharacters.length)
- for (let i = 0; i < byteCharacters.length; i++) {
- byteNumbers[i] = byteCharacters.charCodeAt(i)
- }
- const byteArray = new Uint8Array(byteNumbers)
- const blob = new Blob([byteArray], { type: selectedDoc.type })
- file = new File([blob], selectedDoc.name, { type: selectedDoc.type })
- }
-
- if (isPdf) {
- setExtractionStatus('PDF wird hochgeladen...')
-
- const formData = new FormData()
- formData.append('file', file)
-
- const pdfInfoRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload-pdf-info`, {
- method: 'POST',
- body: formData,
- })
-
- if (!pdfInfoRes.ok) {
- throw new Error('PDF konnte nicht verarbeitet werden')
- }
-
- const pdfInfo = await pdfInfoRes.json()
- setPdfPageCount(pdfInfo.page_count)
- setSelectedPages(Array.from({ length: pdfInfo.page_count }, (_, i) => i))
-
- setActiveTab('pages')
- setExtractionStatus(`${pdfInfo.page_count} Seiten erkannt. Vorschau wird geladen...`)
- setIsLoadingThumbnails(true)
-
- const thumbnails: string[] = []
- for (let i = 0; i < pdfInfo.page_count; i++) {
- try {
- const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/pdf-thumbnail/${i}?hires=true`)
- if (thumbRes.ok) {
- const blob = await thumbRes.blob()
- thumbnails.push(URL.createObjectURL(blob))
- }
- } catch (e) {
- console.error(`Failed to load thumbnail for page ${i}`)
- }
- }
-
- setPagesThumbnails(thumbnails)
- setIsLoadingThumbnails(false)
- setExtractionStatus(`${pdfInfo.page_count} Seiten bereit. Waehlen Sie die zu verarbeitenden Seiten.`)
-
- } else {
- setExtractionStatus('KI analysiert das Bild... (kann 30-60 Sekunden dauern)')
-
- const formData = new FormData()
- formData.append('file', file)
-
- const uploadRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload`, {
- method: 'POST',
- body: formData,
- })
-
- if (!uploadRes.ok) {
- throw new Error('Bild konnte nicht verarbeitet werden')
- }
-
- const uploadData = await uploadRes.json()
- setSession(prev => prev ? { ...prev, status: 'extracted', vocabulary_count: uploadData.vocabulary_count } : null)
-
- const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/vocabulary`)
- if (vocabRes.ok) {
- const vocabData = await vocabRes.json()
- setVocabulary(vocabData.vocabulary || [])
- setExtractionStatus(`${vocabData.vocabulary?.length || 0} Vokabeln gefunden!`)
- }
-
- await new Promise(r => setTimeout(r, 1000))
- setActiveTab('vocabulary')
- }
-
} catch (error) {
- console.error('Session start failed:', error)
setError(error instanceof Error ? error.message : 'Ein Fehler ist aufgetreten')
- setExtractionStatus('')
- setSession(null)
- } finally {
- setIsCreatingSession(false)
- }
- }
-
- const processSinglePage = async (pageIndex: number, ipa: IpaMode, syllable: SyllableMode): Promise<{ success: boolean; vocabulary: VocabularyEntry[]; error?: string; scanQuality?: any }> => {
- const API_BASE = getApiBase()
-
- try {
- const params = new URLSearchParams({
- ipa_mode: ipa,
- syllable_mode: syllable,
- enhance: String(ocrEnhance),
- max_cols: String(ocrMaxCols),
- min_conf: String(ocrMinConf),
- })
- const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?${params}`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ ocr_prompts: ocrPrompts }),
- })
-
- if (!res.ok) {
- const errBody = await res.json().catch(() => ({}))
- const detail = errBody.detail || `HTTP ${res.status}`
- return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${detail}` }
- }
-
- const data = await res.json()
-
- if (!data.success) {
- return { success: false, vocabulary: [], error: data.error || `Seite ${pageIndex + 1}: Unbekannter Fehler` }
- }
-
- return { success: true, vocabulary: data.vocabulary || [], scanQuality: data.scan_quality }
- } catch (e) {
- return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${e instanceof Error ? e.message : 'Netzwerkfehler'}` }
- }
+ setExtractionStatus(''); setSession(null)
+ } finally { setIsCreatingSession(false) }
}
const processSelectedPages = async () => {
if (!session || selectedPages.length === 0) return
-
const pagesToProcess = [...selectedPages].sort((a, b) => a - b)
+ setIsExtracting(true); setProcessingErrors([]); setSuccessfulPages([]); setFailedPages([])
+ setProcessingQueue(pagesToProcess); setVocabulary([]); setActiveTab('vocabulary')
- setIsExtracting(true)
- setProcessingErrors([])
- setSuccessfulPages([])
- setFailedPages([])
- setProcessingQueue(pagesToProcess)
- setVocabulary([])
-
- setActiveTab('vocabulary')
-
- const API_BASE = getApiBase()
- const errors: string[] = []
- const successful: number[] = []
- const failed: number[] = []
+ const errors: string[] = []; const successful: number[] = []; const failed: number[] = []
for (let i = 0; i < pagesToProcess.length; i++) {
const pageIndex = pagesToProcess[i]
setCurrentlyProcessingPage(pageIndex + 1)
setExtractionStatus(`Verarbeite Seite ${pageIndex + 1} von ${pagesToProcess.length}... (kann 30-60 Sekunden dauern)`)
-
- const result = await processSinglePage(pageIndex, ipaMode, syllableMode)
-
+ const result = await processSinglePage(session.id, pageIndex, ipaMode, syllableMode, ocrPrompts, ocrEnhance, ocrMaxCols, ocrMinConf)
if (result.success) {
- successful.push(pageIndex + 1)
- setSuccessfulPages([...successful])
- setVocabulary(prev => [...prev, ...result.vocabulary])
- const qualityInfo = result.scanQuality
- ? ` | Qualitaet: ${result.scanQuality.quality_pct}%${result.scanQuality.is_degraded ? ' (degradiert!)' : ''}`
- : ''
- setExtractionStatus(`Seite ${pageIndex + 1} fertig: ${result.vocabulary.length} Vokabeln gefunden${qualityInfo}`)
+ successful.push(pageIndex + 1); setSuccessfulPages([...successful]); setVocabulary(prev => [...prev, ...result.vocabulary])
+ const qi = result.scanQuality ? ` | Qualitaet: ${result.scanQuality.quality_pct}%${result.scanQuality.is_degraded ? ' (degradiert!)' : ''}` : ''
+ setExtractionStatus(`Seite ${pageIndex + 1} fertig: ${result.vocabulary.length} Vokabeln gefunden${qi}`)
} else {
- failed.push(pageIndex + 1)
- setFailedPages([...failed])
- if (result.error) {
- errors.push(result.error)
- setProcessingErrors([...errors])
- }
+ failed.push(pageIndex + 1); setFailedPages([...failed])
+ if (result.error) { errors.push(result.error); setProcessingErrors([...errors]) }
setExtractionStatus(`Seite ${pageIndex + 1} fehlgeschlagen`)
}
-
await new Promise(r => setTimeout(r, 500))
}
- setCurrentlyProcessingPage(null)
- setProcessingQueue([])
- setIsExtracting(false)
+ setCurrentlyProcessingPage(null); setProcessingQueue([]); setIsExtracting(false)
- if (successful.length === pagesToProcess.length) {
- setExtractionStatus(`Fertig! Alle ${successful.length} Seiten verarbeitet.`)
- } else if (successful.length > 0) {
- setExtractionStatus(`${successful.length} von ${pagesToProcess.length} Seiten verarbeitet. ${failed.length} fehlgeschlagen.`)
- } else {
- setExtractionStatus(`Alle Seiten fehlgeschlagen.`)
- }
+ if (successful.length === pagesToProcess.length) setExtractionStatus(`Fertig! Alle ${successful.length} Seiten verarbeitet.`)
+ else if (successful.length > 0) setExtractionStatus(`${successful.length} von ${pagesToProcess.length} Seiten verarbeitet. ${failed.length} fehlgeschlagen.`)
+ else setExtractionStatus(`Alle Seiten fehlgeschlagen.`)
- // Reload thumbnails for processed pages (server may have rotated them)
+ // Reload thumbnails for processed pages
if (successful.length > 0 && session) {
- const updatedThumbs = [...pagesThumbnails]
+ const API_BASE = getApiBase(); const updatedThumbs = [...pagesThumbnails]
for (const pageNum of successful) {
const idx = pageNum - 1
try {
- const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/pdf-thumbnail/${idx}?hires=true&t=${Date.now()}`)
- if (thumbRes.ok) {
- const blob = await thumbRes.blob()
- if (updatedThumbs[idx]) URL.revokeObjectURL(updatedThumbs[idx])
- updatedThumbs[idx] = URL.createObjectURL(blob)
- }
- } catch (e) {
- console.error(`Failed to refresh thumbnail for page ${pageNum}`)
- }
+ const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/pdf-thumbnail/${idx}?hires=true&t=${Date.now()}`)
+ if (res.ok) { if (updatedThumbs[idx]) URL.revokeObjectURL(updatedThumbs[idx]); updatedThumbs[idx] = URL.createObjectURL(await res.blob()) }
+ } catch {}
}
setPagesThumbnails(updatedThumbs)
}
-
setSession(prev => prev ? { ...prev, status: 'extracted' } : null)
}
- const togglePageSelection = (pageIndex: number) => {
- setSelectedPages(prev =>
- prev.includes(pageIndex)
- ? prev.filter(p => p !== pageIndex)
- : [...prev, pageIndex].sort((a, b) => a - b)
- )
- }
-
- const selectAllPages = () => setSelectedPages(
- Array.from({ length: pdfPageCount }, (_, i) => i).filter(p => !excludedPages.includes(p))
- )
+ const togglePageSelection = (i: number) => { setSelectedPages(p => p.includes(i) ? p.filter(x => x !== i) : [...p, i].sort((a, b) => a - b)) }
+ const selectAllPages = () => setSelectedPages(Array.from({ length: pdfPageCount }, (_, i) => i).filter(p => !excludedPages.includes(p)))
const selectNoPages = () => setSelectedPages([])
-
- const excludePage = (pageIndex: number, e: React.MouseEvent) => {
- e.stopPropagation()
- setExcludedPages(prev => [...prev, pageIndex])
- setSelectedPages(prev => prev.filter(p => p !== pageIndex))
- }
-
- const restoreExcludedPages = () => {
- setExcludedPages([])
- }
+ const excludePage = (i: number, e: React.MouseEvent) => { e.stopPropagation(); setExcludedPages(p => [...p, i]); setSelectedPages(p => p.filter(x => x !== i)) }
+ const restoreExcludedPages = () => setExcludedPages([])
const runOcrComparison = async (pageIndex: number) => {
if (!session) return
-
- setOcrComparePageIndex(pageIndex)
- setShowOcrComparison(true)
- setIsComparingOcr(true)
- setOcrCompareError(null)
- setOcrCompareResult(null)
-
- const API_BASE = getApiBase()
-
+ setOcrComparePageIndex(pageIndex); setShowOcrComparison(true); setIsComparingOcr(true); setOcrCompareError(null); setOcrCompareResult(null)
try {
- const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/compare-ocr/${pageIndex}`, {
- method: 'POST',
- })
-
- if (!res.ok) {
- throw new Error(`HTTP ${res.status}`)
- }
-
- const data = await res.json()
- setOcrCompareResult(data)
- } catch (e) {
- setOcrCompareError(e instanceof Error ? e.message : 'Vergleich fehlgeschlagen')
- } finally {
- setIsComparingOcr(false)
- }
+ const res = await fetch(`${getApiBase()}/api/v1/vocab/sessions/${session.id}/compare-ocr/${pageIndex}`, { method: 'POST' })
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
+ setOcrCompareResult(await res.json())
+ } catch (e) { setOcrCompareError(e instanceof Error ? e.message : 'Vergleich fehlgeschlagen') }
+ finally { setIsComparingOcr(false) }
}
const updateVocabularyEntry = (id: string, field: string, value: string) => {
setVocabulary(prev => prev.map(v => {
if (v.id !== id) return v
- if (field === 'english' || field === 'german' || field === 'example_sentence' || field === 'word_type') {
- return { ...v, [field]: value }
- }
+ if (field === 'english' || field === 'german' || field === 'example_sentence' || field === 'word_type') return { ...v, [field]: value }
return { ...v, extras: { ...(v.extras || {}), [field]: value } }
}))
}
const addExtraColumn = (sourcePage: number) => {
- const label = prompt('Spaltenname:')
- if (!label || !label.trim()) return
+ const label = prompt('Spaltenname:'); if (!label || !label.trim()) return
const key = `extra_${Date.now()}`
- setPageExtraColumns(prev => ({
- ...prev,
- [sourcePage]: [...(prev[sourcePage] || []), { key, label: label.trim() }],
- }))
+ setPageExtraColumns(prev => ({ ...prev, [sourcePage]: [...(prev[sourcePage] || []), { key, label: label.trim() }] }))
}
const removeExtraColumn = (sourcePage: number, key: string) => {
- setPageExtraColumns(prev => ({
- ...prev,
- [sourcePage]: (prev[sourcePage] || []).filter(c => c.key !== key),
- }))
- setVocabulary(prev => prev.map(v => {
- if (!v.extras || !(key in v.extras)) return v
- const { [key]: _, ...rest } = v.extras
- return { ...v, extras: rest }
- }))
+ setPageExtraColumns(prev => ({ ...prev, [sourcePage]: (prev[sourcePage] || []).filter(c => c.key !== key) }))
+ setVocabulary(prev => prev.map(v => { if (!v.extras || !(key in v.extras)) return v; const { [key]: _, ...rest } = v.extras; return { ...v, extras: rest } }))
}
- const getExtraColumnsForPage = (sourcePage: number): ExtraColumn[] => {
- const global = pageExtraColumns[0] || []
- const pageSpecific = pageExtraColumns[sourcePage] || []
- return [...global, ...pageSpecific]
- }
+ const getExtraColumnsForPage = (sourcePage: number): ExtraColumn[] => [...(pageExtraColumns[0] || []), ...(pageExtraColumns[sourcePage] || [])]
const getAllExtraColumns = (): ExtraColumn[] => {
- const seen = new Set()
- const result: ExtraColumn[] = []
- for (const cols of Object.values(pageExtraColumns)) {
- for (const col of cols) {
- if (!seen.has(col.key)) {
- seen.add(col.key)
- result.push(col)
- }
- }
- }
+ const seen = new Set(); const result: ExtraColumn[] = []
+ for (const cols of Object.values(pageExtraColumns)) for (const col of cols) { if (!seen.has(col.key)) { seen.add(col.key); result.push(col) } }
return result
}
- const deleteVocabularyEntry = (id: string) => {
- setVocabulary(prev => prev.filter(v => v.id !== id))
- }
-
- const toggleVocabularySelection = (id: string) => {
- setVocabulary(prev => prev.map(v =>
- v.id === id ? { ...v, selected: !v.selected } : v
- ))
- }
-
- const toggleAllSelection = () => {
- const allSelected = vocabulary.every(v => v.selected)
- setVocabulary(prev => prev.map(v => ({ ...v, selected: !allSelected })))
- }
+ const deleteVocabularyEntry = (id: string) => setVocabulary(prev => prev.filter(v => v.id !== id))
+ const toggleVocabularySelection = (id: string) => setVocabulary(prev => prev.map(v => v.id === id ? { ...v, selected: !v.selected } : v))
+ const toggleAllSelection = () => { const all = vocabulary.every(v => v.selected); setVocabulary(prev => prev.map(v => ({ ...v, selected: !all }))) }
const addVocabularyEntry = (atIndex?: number) => {
- const newEntry: VocabularyEntry = {
- id: `new-${Date.now()}`,
- english: '',
- german: '',
- example_sentence: '',
- selected: true
- }
- setVocabulary(prev => {
- if (atIndex === undefined) {
- return [...prev, newEntry]
- }
- const newList = [...prev]
- newList.splice(atIndex, 0, newEntry)
- return newList
- })
+ const ne: VocabularyEntry = { id: `new-${Date.now()}`, english: '', german: '', example_sentence: '', selected: true }
+ setVocabulary(prev => { if (atIndex === undefined) return [...prev, ne]; const nl = [...prev]; nl.splice(atIndex, 0, ne); return nl })
}
const saveVocabulary = async () => {
if (!session) return
- const API_BASE = getApiBase()
-
- try {
- await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/vocabulary`, {
- method: 'PUT',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ vocabulary }),
- })
- } catch (error) {
- console.error('Failed to save vocabulary:', error)
- }
+ try { await fetch(`${getApiBase()}/api/v1/vocab/sessions/${session.id}/vocabulary`, { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ vocabulary }) }) }
+ catch (e) { console.error('Failed to save vocabulary:', e) }
}
const generateWorksheet = async () => {
- if (!session) return
- if (selectedFormat === 'standard' && selectedTypes.length === 0) return
-
+ if (!session) return; if (selectedFormat === 'standard' && selectedTypes.length === 0) return
setIsGenerating(true)
- const API_BASE = getApiBase()
-
try {
await saveVocabulary()
-
- let res: Response
-
- if (selectedFormat === 'nru') {
- res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/generate-nru`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({
- title: worksheetTitle || session.name,
- include_solutions: includeSolutions,
- }),
- })
- } else {
- res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/generate`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({
- worksheet_types: selectedTypes,
- title: worksheetTitle || session.name,
- include_solutions: includeSolutions,
- line_height: lineHeight,
- }),
- })
- }
-
- if (res.ok) {
- const data = await res.json()
- setWorksheetId(data.worksheet_id || data.id)
- setActiveTab('export')
- completeActivity({ vocabCount: vocabulary.length })
- }
- } catch (error) {
- console.error('Failed to generate worksheet:', error)
- } finally {
- setIsGenerating(false)
- }
+ const API_BASE = getApiBase()
+ const endpoint = selectedFormat === 'nru' ? 'generate-nru' : 'generate'
+ const body = selectedFormat === 'nru'
+ ? { title: worksheetTitle || session.name, include_solutions: includeSolutions }
+ : { worksheet_types: selectedTypes, title: worksheetTitle || session.name, include_solutions: includeSolutions, line_height: lineHeight }
+ const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/${endpoint}`, {
+ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body),
+ })
+ if (res.ok) { const data = await res.json(); setWorksheetId(data.worksheet_id || data.id); setActiveTab('export'); completeActivity({ vocabCount: vocabulary.length }) }
+ } catch (e) { console.error('Failed to generate worksheet:', e) }
+ finally { setIsGenerating(false) }
}
const downloadPDF = (type: 'worksheet' | 'solution') => {
if (!worksheetId) return
- const API_BASE = getApiBase()
- const endpoint = type === 'worksheet' ? 'pdf' : 'solution'
- window.open(`${API_BASE}/api/v1/vocab/worksheets/${worksheetId}/${endpoint}`, '_blank')
+ window.open(`${getApiBase()}/api/v1/vocab/worksheets/${worksheetId}/${type === 'worksheet' ? 'pdf' : 'solution'}`, '_blank')
}
- const toggleWorksheetType = (type: WorksheetType) => {
- setSelectedTypes(prev =>
- prev.includes(type) ? prev.filter(t => t !== type) : [...prev, type]
- )
- }
+ const toggleWorksheetType = (type: WorksheetType) => setSelectedTypes(prev => prev.includes(type) ? prev.filter(t => t !== type) : [...prev, type])
const resumeSession = async (existingSession: Session) => {
- setError(null)
- setExtractionStatus('Session wird geladen...')
-
- const API_BASE = getApiBase()
-
- try {
- const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}`)
- if (!sessionRes.ok) throw new Error('Session nicht gefunden')
- const sessionData = await sessionRes.json()
- setSession(sessionData)
- setWorksheetTitle(sessionData.name)
-
- if (sessionData.status === 'extracted' || sessionData.status === 'completed') {
- const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}/vocabulary`)
- if (vocabRes.ok) {
- const vocabData = await vocabRes.json()
- setVocabulary(vocabData.vocabulary || [])
- }
- setActiveTab('vocabulary')
- setExtractionStatus('')
- } else if (sessionData.status === 'pending') {
- setActiveTab('upload')
- setExtractionStatus('Diese Session hat noch keine Vokabeln. Bitte laden Sie ein Dokument hoch.')
- } else {
- setActiveTab('vocabulary')
- setExtractionStatus('')
- }
-
- } catch (error) {
- console.error('Failed to resume session:', error)
- setError(error instanceof Error ? error.message : 'Fehler beim Laden der Session')
- setExtractionStatus('')
- }
+ setError(null); setExtractionStatus('Session wird geladen...')
+ try { await resumeSessionFlow(existingSession, setSession, setWorksheetTitle, setVocabulary, setActiveTab, setExtractionStatus) }
+ catch (e) { setError(e instanceof Error ? e.message : 'Fehler beim Laden der Session'); setExtractionStatus('') }
}
const resetSession = async () => {
- setSession(null)
- setSessionName('')
- setVocabulary([])
- setUploadedImage(null)
- setWorksheetId(null)
- setSelectedDocumentId(null)
- setDirectFile(null)
- setDirectFilePreview(null)
- setShowFullPreview(false)
- setPdfPageCount(0)
- setSelectedPages([])
- setPagesThumbnails([])
- setExcludedPages([])
- setActiveTab('upload')
- setError(null)
- setExtractionStatus('')
-
- const API_BASE = getApiBase()
- try {
- const res = await fetch(`${API_BASE}/api/v1/vocab/sessions`)
- if (res.ok) {
- const sessions = await res.json()
- setExistingSessions(sessions)
- }
- } catch (e) {
- console.error('Failed to reload sessions:', e)
- }
+ setSession(null); setSessionName(''); setVocabulary([]); setUploadedImage(null); setWorksheetId(null)
+ setSelectedDocumentId(null); setDirectFile(null); setDirectFilePreview(null); setShowFullPreview(false)
+ setPdfPageCount(0); setSelectedPages([]); setPagesThumbnails([]); setExcludedPages([])
+ setActiveTab('upload'); setError(null); setExtractionStatus('')
+ try { const res = await fetch(`${getApiBase()}/api/v1/vocab/sessions`); if (res.ok) setExistingSessions(await res.json()) } catch {}
}
const deleteSession = async (sessionId: string, e: React.MouseEvent) => {
e.stopPropagation()
- if (!confirm('Session wirklich loeschen? Diese Aktion kann nicht rueckgaengig gemacht werden.')) {
- return
- }
-
- const API_BASE = getApiBase()
- try {
- const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}`, {
- method: 'DELETE',
- })
- if (res.ok) {
- setExistingSessions(prev => prev.filter(s => s.id !== sessionId))
- }
- } catch (e) {
- console.error('Failed to delete session:', e)
- }
+ if (!confirm('Session wirklich loeschen? Diese Aktion kann nicht rueckgaengig gemacht werden.')) return
+ try { const res = await fetch(`${getApiBase()}/api/v1/vocab/sessions/${sessionId}`, { method: 'DELETE' }); if (res.ok) setExistingSessions(prev => prev.filter(s => s.id !== sessionId)) } catch {}
}
- // Reprocess all successful pages with new IPA/syllable modes
const reprocessPages = (ipa: IpaMode, syllable: SyllableMode) => {
if (!session) return
+ let pages: number[]
+ if (successfulPages.length > 0) pages = successfulPages.map(p => p - 1)
+ else if (vocabulary.length > 0) pages = [...new Set(vocabulary.map(v => (v.source_page || 1) - 1))].sort((a, b) => a - b)
+ else if (selectedPages.length > 0) pages = [...selectedPages]
+ else pages = [0]
+ if (pages.length === 0) return
- // Determine pages to reprocess: use successfulPages if available,
- // otherwise derive from vocabulary source_page or selectedPages
- let pagesToReprocess: number[]
- if (successfulPages.length > 0) {
- pagesToReprocess = successfulPages.map(p => p - 1)
- } else if (vocabulary.length > 0) {
- // Derive from vocabulary entries' source_page (1-indexed → 0-indexed)
- const pageSet = new Set(vocabulary.map(v => (v.source_page || 1) - 1))
- pagesToReprocess = [...pageSet].sort((a, b) => a - b)
- } else if (selectedPages.length > 0) {
- pagesToReprocess = [...selectedPages]
- } else {
- // Fallback: try page 0
- pagesToReprocess = [0]
- }
-
- if (pagesToReprocess.length === 0) return
-
- setIsExtracting(true)
- setExtractionStatus('Verarbeite mit neuen Einstellungen...')
- const API_BASE = getApiBase()
-
+ setIsExtracting(true); setExtractionStatus('Verarbeite mit neuen Einstellungen...')
;(async () => {
- const allVocab: VocabularyEntry[] = []
- let lastQuality: any = null
- for (const pageIndex of pagesToReprocess) {
- setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`)
- try {
- const params = new URLSearchParams({
- ipa_mode: ipa,
- syllable_mode: syllable,
- enhance: String(ocrEnhance),
- max_cols: String(ocrMaxCols),
- min_conf: String(ocrMinConf),
- })
- const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?${params}`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ ocr_prompts: ocrPrompts }),
- })
- if (res.ok) {
- const data = await res.json()
- if (data.vocabulary) allVocab.push(...data.vocabulary)
- if (data.scan_quality) lastQuality = data.scan_quality
- }
- } catch {}
- }
- setVocabulary(allVocab)
- setIsExtracting(false)
- const qualityInfo = lastQuality
- ? ` | Qualitaet: ${lastQuality.quality_pct}%${lastQuality.is_degraded ? ' (degradiert!)' : ''} | Blur: ${lastQuality.blur_score} | Kontrast: ${lastQuality.contrast_score}`
- : ''
+ const { vocabulary: allVocab, qualityInfo } = await reprocessPagesFlow(
+ session.id, pages, ipa, syllable, ocrPrompts, ocrEnhance, ocrMaxCols, ocrMinConf, setExtractionStatus
+ )
+ setVocabulary(allVocab); setIsExtracting(false)
setExtractionStatus(`${allVocab.length} Vokabeln mit neuen Einstellungen${qualityInfo}`)
})()
}
return {
- // Mounted
- mounted,
- // Theme
- isDark, glassCard, glassInput,
- // Tab
+ mounted, isDark, glassCard, glassInput,
activeTab, setActiveTab,
- // Session
session, sessionName, setSessionName, isCreatingSession, error, setError, extractionStatus,
- // Existing sessions
existingSessions, isLoadingSessions,
- // Documents
storedDocuments, selectedDocumentId, setSelectedDocumentId,
- // Direct file
directFile, setDirectFile, directFilePreview, showFullPreview, setShowFullPreview, directFileInputRef,
- // PDF pages
pdfPageCount, selectedPages, pagesThumbnails, isLoadingThumbnails, excludedPages,
- // Extra columns
pageExtraColumns,
- // Upload
uploadedImage, isExtracting,
- // Vocabulary
vocabulary,
- // Worksheet
selectedTypes, worksheetTitle, setWorksheetTitle,
- includeSolutions, setIncludeSolutions,
- lineHeight, setLineHeight,
- selectedFormat, setSelectedFormat,
- ipaMode, setIpaMode, syllableMode, setSyllableMode,
- // Export
+ includeSolutions, setIncludeSolutions, lineHeight, setLineHeight,
+ selectedFormat, setSelectedFormat, ipaMode, setIpaMode, syllableMode, setSyllableMode,
worksheetId, isGenerating,
- // Processing
processingErrors, successfulPages, failedPages, currentlyProcessingPage,
- // OCR settings
ocrPrompts, showSettings, setShowSettings,
- // QR
showQRModal, setShowQRModal, uploadSessionId,
mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles,
- // OCR Comparison
showOcrComparison, setShowOcrComparison,
ocrComparePageIndex, ocrCompareResult, isComparingOcr, ocrCompareError,
- // Handlers
handleDirectFileSelect, startSession, processSelectedPages,
togglePageSelection, selectAllPages, selectNoPages, excludePage, restoreExcludedPages,
runOcrComparison,
diff --git a/studio-v2/app/worksheet-cleanup/_components/GlassCard.tsx b/studio-v2/app/worksheet-cleanup/_components/GlassCard.tsx
new file mode 100644
index 0000000..fc0d5f2
--- /dev/null
+++ b/studio-v2/app/worksheet-cleanup/_components/GlassCard.tsx
@@ -0,0 +1,49 @@
+'use client'
+
+import { useState, useEffect } from 'react'
+
+interface GlassCardProps {
+ children: React.ReactNode
+ className?: string
+ onClick?: () => void
+ size?: 'sm' | 'md' | 'lg'
+ delay?: number
+ isDark?: boolean
+}
+
+export function GlassCard({ children, className = '', onClick, size = 'md', delay = 0, isDark = true }: GlassCardProps) {
+ const [isVisible, setIsVisible] = useState(false)
+ const [isHovered, setIsHovered] = useState(false)
+
+ useEffect(() => {
+ const timer = setTimeout(() => setIsVisible(true), delay)
+ return () => clearTimeout(timer)
+ }, [delay])
+
+ const sizeClasses = { sm: 'p-4', md: 'p-5', lg: 'p-6' }
+
+ return (
+ setIsHovered(true)}
+ onMouseLeave={() => setIsHovered(false)}
+ onClick={onClick}
+ >
+ {children}
+
+ )
+}
diff --git a/studio-v2/app/worksheet-cleanup/_components/PreviewStep.tsx b/studio-v2/app/worksheet-cleanup/_components/PreviewStep.tsx
new file mode 100644
index 0000000..b9259bc
--- /dev/null
+++ b/studio-v2/app/worksheet-cleanup/_components/PreviewStep.tsx
@@ -0,0 +1,127 @@
+'use client'
+
+import { GlassCard } from './GlassCard'
+import { ProgressRing } from './ProgressRing'
+
+interface PreviewResult {
+ has_handwriting: boolean
+ confidence: number
+ handwriting_ratio: number
+ image_width: number
+ image_height: number
+ estimated_times_ms: {
+ detection: number
+ inpainting: number
+ reconstruction: number
+ total: number
+ }
+}
+
+interface PreviewStepProps {
+ previewResult: PreviewResult
+ previewUrl: string | null
+ maskUrl: string | null
+ removeHandwriting: boolean
+ reconstructLayout: boolean
+ isProcessing: boolean
+ onBack: () => void
+ onCleanup: () => void
+ onGetMask: () => void
+}
+
+export function PreviewStep({
+ previewResult, previewUrl, maskUrl,
+ removeHandwriting, reconstructLayout, isProcessing,
+ onBack, onCleanup, onGetMask
+}: PreviewStepProps) {
+ return (
+
+
+ Analyse
+
+
+ {previewResult.has_handwriting ? 'Handschrift erkannt' : 'Keine Handschrift gefunden'}
+
+
+
+
+ Geschätzte Zeit
+
+
+ Erkennung
+ ~{Math.round(previewResult.estimated_times_ms.detection / 1000)}s
+
+ {removeHandwriting && previewResult.has_handwriting && (
+
+ Bereinigung
+ ~{Math.round(previewResult.estimated_times_ms.inpainting / 1000)}s
+
+ )}
+ {reconstructLayout && (
+
+ Layout
+ ~{Math.round(previewResult.estimated_times_ms.reconstruction / 1000)}s
+
+ )}
+
+ Gesamt
+ ~{Math.round(previewResult.estimated_times_ms.total / 1000)}s
+
+
+
+
+
+ Bild-Info
+
+
Breite {previewResult.image_width}px
+
Höhe {previewResult.image_height}px
+
Pixel {(previewResult.image_width * previewResult.image_height / 1000000).toFixed(1)}MP
+
+
+ Maske anzeigen
+
+
+
+
+ Original
+ {previewUrl && }
+
+
+ {maskUrl && (
+
+ Maske
+
+
+ )}
+
+
+
+
+ Zurück
+
+
+
+ Bereinigen starten
+
+
+
+ )
+}
diff --git a/studio-v2/app/worksheet-cleanup/_components/ProgressRing.tsx b/studio-v2/app/worksheet-cleanup/_components/ProgressRing.tsx
new file mode 100644
index 0000000..b6045ea
--- /dev/null
+++ b/studio-v2/app/worksheet-cleanup/_components/ProgressRing.tsx
@@ -0,0 +1,47 @@
+'use client'
+
+interface ProgressRingProps {
+ progress: number
+ size?: number
+ strokeWidth?: number
+ label: string
+ value: string
+ color?: string
+}
+
+export function ProgressRing({
+ progress,
+ size = 80,
+ strokeWidth = 6,
+ label,
+ value,
+ color = '#a78bfa'
+}: ProgressRingProps) {
+ const radius = (size - strokeWidth) / 2
+ const circumference = radius * 2 * Math.PI
+ const offset = circumference - (progress / 100) * circumference
+
+ return (
+
+ )
+}
diff --git a/studio-v2/app/worksheet-cleanup/_components/ResultStep.tsx b/studio-v2/app/worksheet-cleanup/_components/ResultStep.tsx
new file mode 100644
index 0000000..38c1f74
--- /dev/null
+++ b/studio-v2/app/worksheet-cleanup/_components/ResultStep.tsx
@@ -0,0 +1,87 @@
+'use client'
+
+import { GlassCard } from './GlassCard'
+
+interface PipelineResult {
+ success: boolean
+ handwriting_detected: boolean
+ handwriting_removed: boolean
+ layout_reconstructed: boolean
+ cleaned_image_base64?: string
+ fabric_json?: any
+ metadata: any
+}
+
+interface ResultStepProps {
+ pipelineResult: PipelineResult
+ previewUrl: string | null
+ cleanedUrl: string | null
+ onReset: () => void
+ onOpenInEditor: () => void
+}
+
+export function ResultStep({ pipelineResult, previewUrl, cleanedUrl, onReset, onOpenInEditor }: ResultStepProps) {
+ return (
+
+
+
+
+ {pipelineResult.success ? (
+
+ ) : (
+
+ )}
+
+
+
+ {pipelineResult.success ? 'Erfolgreich bereinigt!' : 'Verarbeitung fehlgeschlagen'}
+
+
+ {pipelineResult.handwriting_removed
+ ? `Handschrift wurde entfernt. ${pipelineResult.metadata?.layout?.element_count || 0} Elemente erkannt.`
+ : pipelineResult.handwriting_detected
+ ? 'Handschrift erkannt, aber nicht entfernt'
+ : 'Keine Handschrift im Bild gefunden'}
+
+
+
+
+
+
+ Original
+ {previewUrl && }
+
+
+
+ Bereinigt
+ {cleanedUrl ? (
+
+ ) : (
+ Kein Bild
+ )}
+
+
+
+
+
+ Neues Bild
+
+ {cleanedUrl && (
+
+
+ Download
+
+ )}
+ {pipelineResult.layout_reconstructed && pipelineResult.fabric_json && (
+
+
+ Im Editor öffnen
+
+ )}
+
+
+ )
+}
diff --git a/studio-v2/app/worksheet-cleanup/_components/UploadStep.tsx b/studio-v2/app/worksheet-cleanup/_components/UploadStep.tsx
new file mode 100644
index 0000000..25aab00
--- /dev/null
+++ b/studio-v2/app/worksheet-cleanup/_components/UploadStep.tsx
@@ -0,0 +1,125 @@
+'use client'
+
+import { GlassCard } from './GlassCard'
+
+interface UploadStepProps {
+ isDark: boolean
+ previewUrl: string | null
+ file: File | null
+ removeHandwriting: boolean
+ setRemoveHandwriting: (v: boolean) => void
+ reconstructLayout: boolean
+ setReconstructLayout: (v: boolean) => void
+ inpaintingMethod: string
+ setInpaintingMethod: (v: string) => void
+ isPreviewing: boolean
+ onDrop: (e: React.DragEvent) => void
+ onFileSelect: (file: File) => void
+ onPreview: () => void
+ onQRClick: () => void
+}
+
+export function UploadStep({
+ isDark, previewUrl, file,
+ removeHandwriting, setRemoveHandwriting,
+ reconstructLayout, setReconstructLayout,
+ inpaintingMethod, setInpaintingMethod,
+ isPreviewing, onDrop, onFileSelect, onPreview, onQRClick
+}: UploadStepProps) {
+ return (
+
+
+ e.preventDefault()}
+ onClick={() => document.getElementById('file-input')?.click()}
+ >
+
e.target.files?.[0] && onFileSelect(e.target.files[0])}
+ className="hidden" />
+ {previewUrl ? (
+
+
+
{file?.name}
+
Klicke zum Ändern
+
+ ) : (
+ <>
+
+
+
+
Datei auswählen
+
Ziehe ein Bild hierher oder klicke
+
PNG, JPG, JPEG
+ >
+ )}
+
+
+
+
+
+
+ 📱
+
+
Mit Handy scannen
+
QR-Code scannen um Foto hochzuladen
+
Im lokalen Netzwerk
+
+
+
+ {file && (
+ <>
+
+ Optionen
+
+
+ setRemoveHandwriting(e.target.checked)}
+ className="w-5 h-5 rounded bg-white/10 border-white/20 text-purple-500 focus:ring-purple-500" />
+
+
Handschrift entfernen
+
Erkennt und entfernt handgeschriebene Inhalte
+
+
+
+ setReconstructLayout(e.target.checked)}
+ className="w-5 h-5 rounded bg-white/10 border-white/20 text-purple-500 focus:ring-purple-500" />
+
+
Layout rekonstruieren
+
Erstellt bearbeitbare Textblöcke
+
+
+
+
+
+
+ Methode
+ setInpaintingMethod(e.target.value)}
+ className="w-full p-3 rounded-xl bg-white/10 border border-white/20 text-white focus:ring-2 focus:ring-purple-500 focus:border-transparent">
+ Automatisch (empfohlen)
+ OpenCV Telea (schnell)
+ OpenCV NS (glatter)
+
+
+ Die automatische Methode wählt die beste Option basierend auf dem Bildinhalt.
+
+
+
+
+
+ {isPreviewing ? (
+ <>
Analysiere...>
+ ) : (
+ <> Vorschau>
+ )}
+
+
+ >
+ )}
+
+ )
+}
diff --git a/studio-v2/app/worksheet-cleanup/page.tsx b/studio-v2/app/worksheet-cleanup/page.tsx
index 36d9ad0..785dcbe 100644
--- a/studio-v2/app/worksheet-cleanup/page.tsx
+++ b/studio-v2/app/worksheet-cleanup/page.tsx
@@ -7,220 +7,56 @@ import { Sidebar } from '@/components/Sidebar'
import { ThemeToggle } from '@/components/ThemeToggle'
import { LanguageDropdown } from '@/components/LanguageDropdown'
import { QRCodeUpload, UploadedFile } from '@/components/QRCodeUpload'
+import { GlassCard } from './_components/GlassCard'
+import { UploadStep } from './_components/UploadStep'
+import { PreviewStep } from './_components/PreviewStep'
+import { ResultStep } from './_components/ResultStep'
-// LocalStorage Key for upload session
const SESSION_ID_KEY = 'bp_cleanup_session'
-/**
- * Worksheet Cleanup Page - Apple Weather Dashboard Style
- *
- * Design principles:
- * - Dark gradient background
- * - Ultra-translucent glass cards (~8% opacity)
- * - White text, monochrome palette
- * - Step-by-step cleanup wizard
- */
-
-// =============================================================================
-// GLASS CARD - Ultra Transparent
-// =============================================================================
-
-interface GlassCardProps {
- children: React.ReactNode
- className?: string
- onClick?: () => void
- size?: 'sm' | 'md' | 'lg'
- delay?: number
-}
-
-function GlassCard({ children, className = '', onClick, size = 'md', delay = 0, isDark = true }: GlassCardProps & { isDark?: boolean }) {
- const [isVisible, setIsVisible] = useState(false)
- const [isHovered, setIsHovered] = useState(false)
-
- useEffect(() => {
- const timer = setTimeout(() => setIsVisible(true), delay)
- return () => clearTimeout(timer)
- }, [delay])
-
- const sizeClasses = {
- sm: 'p-4',
- md: 'p-5',
- lg: 'p-6',
- }
-
- return (
- setIsHovered(true)}
- onMouseLeave={() => setIsHovered(false)}
- onClick={onClick}
- >
- {children}
-
- )
-}
-
-// =============================================================================
-// PROGRESS RING
-// =============================================================================
-
-interface ProgressRingProps {
- progress: number
- size?: number
- strokeWidth?: number
- label: string
- value: string
- color?: string
-}
-
-function ProgressRing({
- progress,
- size = 80,
- strokeWidth = 6,
- label,
- value,
- color = '#a78bfa'
-}: ProgressRingProps) {
- const radius = (size - strokeWidth) / 2
- const circumference = radius * 2 * Math.PI
- const offset = circumference - (progress / 100) * circumference
-
- return (
-
- )
-}
-
-// =============================================================================
-// TYPES
-// =============================================================================
-
interface PreviewResult {
- has_handwriting: boolean
- confidence: number
- handwriting_ratio: number
- image_width: number
- image_height: number
- estimated_times_ms: {
- detection: number
- inpainting: number
- reconstruction: number
- total: number
- }
+ has_handwriting: boolean; confidence: number; handwriting_ratio: number
+ image_width: number; image_height: number
+ estimated_times_ms: { detection: number; inpainting: number; reconstruction: number; total: number }
}
interface PipelineResult {
- success: boolean
- handwriting_detected: boolean
- handwriting_removed: boolean
- layout_reconstructed: boolean
- cleaned_image_base64?: string
- fabric_json?: any
- metadata: any
+ success: boolean; handwriting_detected: boolean; handwriting_removed: boolean
+ layout_reconstructed: boolean; cleaned_image_base64?: string; fabric_json?: any; metadata: any
}
-// =============================================================================
-// MAIN PAGE
-// =============================================================================
-
export default function WorksheetCleanupPage() {
const { isDark } = useTheme()
const router = useRouter()
- // File state
const [file, setFile] = useState(null)
const [previewUrl, setPreviewUrl] = useState(null)
const [cleanedUrl, setCleanedUrl] = useState(null)
const [maskUrl, setMaskUrl] = useState(null)
-
- // Loading states
const [isPreviewing, setIsPreviewing] = useState(false)
const [isProcessing, setIsProcessing] = useState(false)
const [error, setError] = useState(null)
-
- // Results
const [previewResult, setPreviewResult] = useState(null)
const [pipelineResult, setPipelineResult] = useState(null)
-
- // Options
const [removeHandwriting, setRemoveHandwriting] = useState(true)
const [reconstructLayout, setReconstructLayout] = useState(true)
const [inpaintingMethod, setInpaintingMethod] = useState('auto')
-
- // Step tracking
const [currentStep, setCurrentStep] = useState<'upload' | 'preview' | 'processing' | 'result'>('upload')
-
- // QR Code Upload
const [showQRModal, setShowQRModal] = useState(false)
const [uploadSessionId, setUploadSessionId] = useState('')
const [mobileUploadedFiles, setMobileUploadedFiles] = useState([])
- // Format file size
const formatFileSize = (bytes: number): string => {
if (bytes === 0) return '0 B'
- const k = 1024
- const sizes = ['B', 'KB', 'MB', 'GB']
+ const k = 1024; const sizes = ['B', 'KB', 'MB', 'GB']
const i = Math.floor(Math.log(bytes) / Math.log(k))
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i]
}
- // Initialize upload session ID
useEffect(() => {
- let storedSessionId = localStorage.getItem(SESSION_ID_KEY)
- if (!storedSessionId) {
- storedSessionId = `cleanup-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
- localStorage.setItem(SESSION_ID_KEY, storedSessionId)
- }
- setUploadSessionId(storedSessionId)
+ let sid = localStorage.getItem(SESSION_ID_KEY)
+ if (!sid) { sid = `cleanup-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; localStorage.setItem(SESSION_ID_KEY, sid) }
+ setUploadSessionId(sid)
}, [])
const getApiUrl = useCallback(() => {
@@ -229,661 +65,182 @@ export default function WorksheetCleanupPage() {
return hostname === 'localhost' ? 'http://localhost:8086' : `${protocol}//${hostname}:8086`
}, [])
- // Handle file selection
const handleFileSelect = useCallback((selectedFile: File) => {
- setFile(selectedFile)
- setError(null)
- setPreviewResult(null)
- setPipelineResult(null)
- setCleanedUrl(null)
- setMaskUrl(null)
-
- const url = URL.createObjectURL(selectedFile)
- setPreviewUrl(url)
- setCurrentStep('upload')
+ setFile(selectedFile); setError(null); setPreviewResult(null); setPipelineResult(null)
+ setCleanedUrl(null); setMaskUrl(null)
+ setPreviewUrl(URL.createObjectURL(selectedFile)); setCurrentStep('upload')
}, [])
- // Handle mobile file selection - convert to File and trigger handleFileSelect
const handleMobileFileSelect = useCallback(async (uploadedFile: UploadedFile) => {
try {
const base64Data = uploadedFile.dataUrl.split(',')[1]
const byteCharacters = atob(base64Data)
const byteNumbers = new Array(byteCharacters.length)
- for (let i = 0; i < byteCharacters.length; i++) {
- byteNumbers[i] = byteCharacters.charCodeAt(i)
- }
- const byteArray = new Uint8Array(byteNumbers)
- const blob = new Blob([byteArray], { type: uploadedFile.type })
- const file = new File([blob], uploadedFile.name, { type: uploadedFile.type })
- handleFileSelect(file)
+ for (let i = 0; i < byteCharacters.length; i++) byteNumbers[i] = byteCharacters.charCodeAt(i)
+ const blob = new Blob([new Uint8Array(byteNumbers)], { type: uploadedFile.type })
+ handleFileSelect(new File([blob], uploadedFile.name, { type: uploadedFile.type }))
setShowQRModal(false)
- } catch (error) {
- console.error('Failed to convert mobile file:', error)
- setError('Fehler beim Laden der Datei vom Handy')
- }
+ } catch { setError('Fehler beim Laden der Datei vom Handy') }
}, [handleFileSelect])
- // Handle drop
const handleDrop = useCallback((e: React.DragEvent) => {
e.preventDefault()
- const droppedFile = e.dataTransfer.files[0]
- if (droppedFile && droppedFile.type.startsWith('image/')) {
- handleFileSelect(droppedFile)
- }
+ const f = e.dataTransfer.files[0]
+ if (f && f.type.startsWith('image/')) handleFileSelect(f)
}, [handleFileSelect])
- // Preview cleanup
const handlePreview = useCallback(async () => {
- if (!file) return
-
- setIsPreviewing(true)
- setError(null)
-
+ if (!file) return; setIsPreviewing(true); setError(null)
try {
- const formData = new FormData()
- formData.append('image', file)
-
- const response = await fetch(`${getApiUrl()}/api/v1/worksheet/preview-cleanup`, {
- method: 'POST',
- body: formData
- })
-
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}`)
- }
-
- const result = await response.json()
- setPreviewResult(result)
- setCurrentStep('preview')
- } catch (err) {
- console.error('Preview failed:', err)
- setError(err instanceof Error ? err.message : 'Vorschau fehlgeschlagen')
- } finally {
- setIsPreviewing(false)
- }
+ const fd = new FormData(); fd.append('image', file)
+ const res = await fetch(`${getApiUrl()}/api/v1/worksheet/preview-cleanup`, { method: 'POST', body: fd })
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
+ setPreviewResult(await res.json()); setCurrentStep('preview')
+ } catch (err) { setError(err instanceof Error ? err.message : 'Vorschau fehlgeschlagen') }
+ finally { setIsPreviewing(false) }
}, [file, getApiUrl])
- // Run full cleanup pipeline
const handleCleanup = useCallback(async () => {
- if (!file) return
-
- setIsProcessing(true)
- setCurrentStep('processing')
- setError(null)
-
+ if (!file) return; setIsProcessing(true); setCurrentStep('processing'); setError(null)
try {
- const formData = new FormData()
- formData.append('image', file)
- formData.append('remove_handwriting', String(removeHandwriting))
- formData.append('reconstruct', String(reconstructLayout))
- formData.append('inpainting_method', inpaintingMethod)
-
- const response = await fetch(`${getApiUrl()}/api/v1/worksheet/cleanup-pipeline`, {
- method: 'POST',
- body: formData
- })
-
- if (!response.ok) {
- const errorData = await response.json().catch(() => ({ detail: 'Unknown error' }))
- throw new Error(errorData.detail || `HTTP ${response.status}`)
- }
-
- const result: PipelineResult = await response.json()
- setPipelineResult(result)
-
- // Create cleaned image URL
+ const fd = new FormData(); fd.append('image', file)
+ fd.append('remove_handwriting', String(removeHandwriting))
+ fd.append('reconstruct', String(reconstructLayout)); fd.append('inpainting_method', inpaintingMethod)
+ const res = await fetch(`${getApiUrl()}/api/v1/worksheet/cleanup-pipeline`, { method: 'POST', body: fd })
+ if (!res.ok) { const ed = await res.json().catch(() => ({ detail: 'Unknown error' })); throw new Error(ed.detail || `HTTP ${res.status}`) }
+ const result: PipelineResult = await res.json(); setPipelineResult(result)
if (result.cleaned_image_base64) {
- const cleanedBlob = await fetch(`data:image/png;base64,${result.cleaned_image_base64}`).then(r => r.blob())
- setCleanedUrl(URL.createObjectURL(cleanedBlob))
+ const blob = await fetch(`data:image/png;base64,${result.cleaned_image_base64}`).then(r => r.blob())
+ setCleanedUrl(URL.createObjectURL(blob))
}
-
setCurrentStep('result')
- } catch (err) {
- console.error('Cleanup failed:', err)
- setError(err instanceof Error ? err.message : 'Bereinigung fehlgeschlagen')
- setCurrentStep('preview')
- } finally {
- setIsProcessing(false)
- }
+ } catch (err) { setError(err instanceof Error ? err.message : 'Bereinigung fehlgeschlagen'); setCurrentStep('preview') }
+ finally { setIsProcessing(false) }
}, [file, removeHandwriting, reconstructLayout, inpaintingMethod, getApiUrl])
- // Get detection mask
const handleGetMask = useCallback(async () => {
if (!file) return
-
try {
- const formData = new FormData()
- formData.append('image', file)
-
- const response = await fetch(`${getApiUrl()}/api/v1/worksheet/detect-handwriting/mask`, {
- method: 'POST',
- body: formData
- })
-
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}`)
- }
-
- const blob = await response.blob()
- setMaskUrl(URL.createObjectURL(blob))
- } catch (err) {
- console.error('Mask fetch failed:', err)
- }
+ const fd = new FormData(); fd.append('image', file)
+ const res = await fetch(`${getApiUrl()}/api/v1/worksheet/detect-handwriting/mask`, { method: 'POST', body: fd })
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
+ setMaskUrl(URL.createObjectURL(await res.blob()))
+ } catch (err) { console.error('Mask fetch failed:', err) }
}, [file, getApiUrl])
- // Open in worksheet editor
const handleOpenInEditor = useCallback(() => {
if (pipelineResult?.fabric_json) {
- // Store the fabric JSON in sessionStorage
sessionStorage.setItem('worksheetCleanupResult', JSON.stringify(pipelineResult.fabric_json))
router.push('/worksheet-editor')
}
}, [pipelineResult, router])
- // Reset to start
const handleReset = useCallback(() => {
- setFile(null)
- setPreviewUrl(null)
- setCleanedUrl(null)
- setMaskUrl(null)
- setPreviewResult(null)
- setPipelineResult(null)
- setError(null)
- setCurrentStep('upload')
+ setFile(null); setPreviewUrl(null); setCleanedUrl(null); setMaskUrl(null)
+ setPreviewResult(null); setPipelineResult(null); setError(null); setCurrentStep('upload')
}, [])
+ const steps = ['upload', 'preview', 'processing', 'result'] as const
+ const currentStepIdx = steps.indexOf(currentStep)
+
return (
- {/* Animated Background Blobs */}
- {/* Sidebar */}
-
-
-
+
- {/* Main Content */}
- {/* Header */}
Arbeitsblatt bereinigen
Handschrift entfernen und Layout rekonstruieren
-
-
-
-
+
{/* Step Indicator */}
- {['upload', 'preview', 'processing', 'result'].map((step, idx) => (
+ {steps.map((step, idx) => (
-
idx
- ? 'bg-green-500 text-white'
- : isDark ? 'bg-white/10 text-white/40' : 'bg-slate-200 text-slate-400'
- }
- `}>
- {['upload', 'preview', 'processing', 'result'].indexOf(currentStep) > idx ? (
-
-
-
- ) : (
- idx + 1
- )}
+
idx ? 'bg-green-500 text-white'
+ : isDark ? 'bg-white/10 text-white/40' : 'bg-slate-200 text-slate-400'
+ }`}>
+ {currentStepIdx > idx ? (
+
+ ) : idx + 1}
- {idx < 3 && (
-
idx
- ? 'bg-green-500'
- : isDark ? 'bg-white/20' : 'bg-slate-300'
- }`} />
- )}
+ {idx < 3 &&
idx ? 'bg-green-500' : isDark ? 'bg-white/20' : 'bg-slate-300'}`} />}
))}
- {/* Error Display */}
{error && (
)}
- {/* Content based on step */}
- {/* Step 1: Upload */}
{currentStep === 'upload' && (
-
- {/* Upload Options - File and QR Code side by side */}
-
- e.preventDefault()}
- onClick={() => document.getElementById('file-input')?.click()}
- >
-
e.target.files?.[0] && handleFileSelect(e.target.files[0])}
- className="hidden"
- />
- {previewUrl ? (
-
-
-
{file?.name}
-
Klicke zum Ändern
-
- ) : (
- <>
-
-
-
-
Datei auswählen
-
Ziehe ein Bild hierher oder klicke
-
PNG, JPG, JPEG
- >
- )}
-
-
-
- {/* QR Code Upload */}
-
- setShowQRModal(true)}
- >
-
- 📱
-
-
Mit Handy scannen
-
QR-Code scannen um Foto hochzuladen
-
Im lokalen Netzwerk
-
-
-
- {/* Options */}
- {file && (
- <>
-
- Optionen
-
-
- setRemoveHandwriting(e.target.checked)}
- className="w-5 h-5 rounded bg-white/10 border-white/20 text-purple-500 focus:ring-purple-500"
- />
-
-
- Handschrift entfernen
-
-
Erkennt und entfernt handgeschriebene Inhalte
-
-
-
-
- setReconstructLayout(e.target.checked)}
- className="w-5 h-5 rounded bg-white/10 border-white/20 text-purple-500 focus:ring-purple-500"
- />
-
-
- Layout rekonstruieren
-
-
Erstellt bearbeitbare Textblöcke
-
-
-
-
-
-
- Methode
- setInpaintingMethod(e.target.value)}
- className="w-full p-3 rounded-xl bg-white/10 border border-white/20 text-white focus:ring-2 focus:ring-purple-500 focus:border-transparent"
- >
- Automatisch (empfohlen)
- OpenCV Telea (schnell)
- OpenCV NS (glatter)
-
-
- Die automatische Methode wählt die beste Option basierend auf dem Bildinhalt.
-
-
-
- {/* Action Button */}
-
-
- {isPreviewing ? (
- <>
-
- Analysiere...
- >
- ) : (
- <>
-
-
-
-
- Vorschau
- >
- )}
-
-
- >
- )}
-
+
setShowQRModal(true)} />
)}
-
- {/* Step 2: Preview */}
{currentStep === 'preview' && previewResult && (
-
- {/* Stats */}
-
- Analyse
-
-
- {previewResult.has_handwriting
- ? 'Handschrift erkannt'
- : 'Keine Handschrift gefunden'}
-
-
-
- {/* Time Estimates */}
-
- Geschätzte Zeit
-
-
- Erkennung
- ~{Math.round(previewResult.estimated_times_ms.detection / 1000)}s
-
- {removeHandwriting && previewResult.has_handwriting && (
-
- Bereinigung
- ~{Math.round(previewResult.estimated_times_ms.inpainting / 1000)}s
-
- )}
- {reconstructLayout && (
-
- Layout
- ~{Math.round(previewResult.estimated_times_ms.reconstruction / 1000)}s
-
- )}
-
- Gesamt
- ~{Math.round(previewResult.estimated_times_ms.total / 1000)}s
-
-
-
-
- {/* Image Info */}
-
- Bild-Info
-
-
- Breite
- {previewResult.image_width}px
-
-
- Höhe
- {previewResult.image_height}px
-
-
- Pixel
- {(previewResult.image_width * previewResult.image_height / 1000000).toFixed(1)}MP
-
-
-
- Maske anzeigen
-
-
-
- {/* Preview Images */}
-
- Original
- {previewUrl && (
-
- )}
-
-
- {maskUrl && (
-
- Maske
-
-
- )}
-
- {/* Actions */}
-
-
setCurrentStep('upload')}
- className="px-6 py-3 rounded-xl bg-white/10 text-white hover:bg-white/20 transition-all flex items-center gap-2"
- >
-
-
-
- Zurück
-
-
-
-
-
- Bereinigen starten
-
-
-
+ setCurrentStep('upload')}
+ onCleanup={handleCleanup} onGetMask={handleGetMask} />
)}
-
- {/* Step 3: Processing */}
{currentStep === 'processing' && (
Verarbeite Bild...
-
- {removeHandwriting ? 'Handschrift wird erkannt und entfernt' : 'Bild wird analysiert'}
-
+ {removeHandwriting ? 'Handschrift wird erkannt und entfernt' : 'Bild wird analysiert'}
)}
-
- {/* Step 4: Result */}
{currentStep === 'result' && pipelineResult && (
-
- {/* Status */}
-
-
-
- {pipelineResult.success ? (
-
-
-
- ) : (
-
-
-
- )}
-
-
-
- {pipelineResult.success ? 'Erfolgreich bereinigt!' : 'Verarbeitung fehlgeschlagen'}
-
-
- {pipelineResult.handwriting_removed
- ? `Handschrift wurde entfernt. ${pipelineResult.metadata?.layout?.element_count || 0} Elemente erkannt.`
- : pipelineResult.handwriting_detected
- ? 'Handschrift erkannt, aber nicht entfernt'
- : 'Keine Handschrift im Bild gefunden'}
-
-
-
-
-
- {/* Original */}
-
- Original
- {previewUrl && (
-
- )}
-
-
- {/* Cleaned */}
-
- Bereinigt
- {cleanedUrl ? (
-
- ) : (
-
- Kein Bild
-
- )}
-
-
- {/* Actions */}
-
-
-
-
-
- Neues Bild
-
- {cleanedUrl && (
-
-
-
-
- Download
-
- )}
- {pipelineResult.layout_reconstructed && pipelineResult.fabric_json && (
-
-
-
-
- Im Editor öffnen
-
- )}
-
-
+
)}
- {/* QR Code Modal */}
{showQRModal && (
setShowQRModal(false)} />
-
setShowQRModal(false)}
- onFilesChanged={(files) => {
- setMobileUploadedFiles(files)
- }}
- />
- {/* Select button for mobile files */}
+ setShowQRModal(false)}
+ onFilesChanged={(files) => setMobileUploadedFiles(files)} />
{mobileUploadedFiles.length > 0 && (
Datei auswählen:
- {mobileUploadedFiles.map((file) => (
-
handleMobileFileSelect(file)}
- className="w-full flex items-center gap-3 p-3 rounded-xl text-left transition-all bg-white/5 hover:bg-white/10 border border-white/10"
- >
- {file.type.startsWith('image/') ? '🖼️' : '📄'}
+ {mobileUploadedFiles.map((f) => (
+ handleMobileFileSelect(f)}
+ className="w-full flex items-center gap-3 p-3 rounded-xl text-left transition-all bg-white/5 hover:bg-white/10 border border-white/10">
+ {f.type.startsWith('image/') ? '🖼️' : '📄'}
-
{file.name}
-
{formatFileSize(file.size)}
+
{f.name}
+
{formatFileSize(f.size)}
Verwenden →
diff --git a/studio-v2/lib/MessagesContext.tsx b/studio-v2/lib/MessagesContext.tsx
index 2e409ea..b703733 100644
--- a/studio-v2/lib/MessagesContext.tsx
+++ b/studio-v2/lib/MessagesContext.tsx
@@ -1,733 +1,80 @@
'use client'
import { createContext, useContext, useState, useEffect, useCallback, ReactNode } from 'react'
+import type { Contact, Conversation, Message, MessageTemplate, MessagesStats, MessagesContextType } from './messages/types'
+import { mockContacts, mockConversations, mockMessages, mockTemplates } from './messages/mock-data'
-// ============================================
-// TYPES
-// ============================================
-
-export interface Contact {
- id: string
- name: string
- email?: string
- phone?: string
- role: 'parent' | 'teacher' | 'staff' | 'student'
- student_name?: string
- class_name?: string
- notes?: string
- tags: string[]
- avatar_url?: string
- preferred_channel: 'email' | 'matrix' | 'pwa'
- online: boolean
- last_seen?: string
- created_at: string
- updated_at: string
-}
-
-export interface Message {
- id: string
- conversation_id: string
- sender_id: string // "self" for own messages
- content: string
- content_type: 'text' | 'file' | 'image' | 'voice'
- file_url?: string
- file_name?: string
- timestamp: string
- read: boolean
- read_at?: string
- delivered: boolean
- send_email: boolean
- email_sent: boolean
- email_sent_at?: string
- email_error?: string
- reply_to?: string // ID of message being replied to
- reactions?: { emoji: string; user_id: string }[]
-}
-
-export interface Conversation {
- id: string
- participant_ids: string[]
- group_id?: string
- created_at: string
- updated_at: string
- last_message?: string
- last_message_time?: string
- unread_count: number
- is_group: boolean
- title?: string
- typing?: boolean // Someone is typing
- pinned?: boolean
- muted?: boolean
- archived?: boolean
-}
-
-export interface MessageTemplate {
- id: string
- name: string
- content: string
- created_at: string
-}
-
-export interface MessagesStats {
- total_contacts: number
- total_conversations: number
- total_messages: number
- unread_messages: number
-}
-
-// ============================================
-// CONTEXT INTERFACE
-// ============================================
-
-interface MessagesContextType {
- // Data
- contacts: Contact[]
- conversations: Conversation[]
- messages: Record // conversationId -> messages
- templates: MessageTemplate[]
- stats: MessagesStats
-
- // Computed
- unreadCount: number
- recentConversations: Conversation[]
-
- // Actions
- fetchContacts: () => Promise
- fetchConversations: () => Promise
- fetchMessages: (conversationId: string) => Promise
- sendMessage: (conversationId: string, content: string, sendEmail?: boolean, replyTo?: string) => Promise
- markAsRead: (conversationId: string) => Promise
- createConversation: (contactId: string) => Promise
- addReaction: (messageId: string, emoji: string) => void
- deleteMessage: (conversationId: string, messageId: string) => void
- pinConversation: (conversationId: string) => void
- muteConversation: (conversationId: string) => void
-
- // State
- isLoading: boolean
- error: string | null
- currentConversationId: string | null
- setCurrentConversationId: (id: string | null) => void
-}
+// Re-export types and helpers for backward compatibility
+export type { Contact, Conversation, Message, MessageTemplate, MessagesStats } from './messages/types'
+export { formatMessageTime, formatMessageDate, getContactInitials, getRoleLabel, getRoleColor, emojiCategories } from './messages/helpers'
const MessagesContext = createContext(null)
-// ============================================
-// MOCK DATA - Realistic German school context
-// ============================================
-
-const mockContacts: Contact[] = [
- {
- id: 'contact_mueller',
- name: 'Familie Mueller',
- email: 'familie.mueller@gmail.com',
- phone: '+49 170 1234567',
- role: 'parent',
- student_name: 'Max Mueller',
- class_name: '10a',
- notes: 'Bevorzugt Kommunikation per E-Mail',
- tags: ['aktiv', 'Elternbeirat'],
- preferred_channel: 'email',
- online: false,
- last_seen: new Date(Date.now() - 1800000).toISOString(),
- created_at: new Date(Date.now() - 86400000 * 30).toISOString(),
- updated_at: new Date().toISOString()
- },
- {
- id: 'contact_schmidt',
- name: 'Petra Schmidt',
- email: 'p.schmidt@web.de',
- phone: '+49 171 9876543',
- role: 'parent',
- student_name: 'Lisa Schmidt',
- class_name: '10a',
- tags: ['responsive'],
- preferred_channel: 'pwa',
- online: true,
- created_at: new Date(Date.now() - 86400000 * 60).toISOString(),
- updated_at: new Date().toISOString()
- },
- {
- id: 'contact_weber',
- name: 'Sabine Weber',
- email: 's.weber@schule-musterstadt.de',
- role: 'teacher',
- tags: ['Fachschaft Deutsch', 'Klassenleitung 9b'],
- preferred_channel: 'pwa',
- online: true,
- last_seen: new Date().toISOString(),
- created_at: new Date(Date.now() - 86400000 * 90).toISOString(),
- updated_at: new Date().toISOString()
- },
- {
- id: 'contact_hoffmann',
- name: 'Thomas Hoffmann',
- email: 't.hoffmann@schule-musterstadt.de',
- role: 'teacher',
- tags: ['Fachschaft Mathe', 'Oberstufenkoordinator'],
- preferred_channel: 'pwa',
- online: false,
- last_seen: new Date(Date.now() - 3600000 * 2).toISOString(),
- created_at: new Date(Date.now() - 86400000 * 120).toISOString(),
- updated_at: new Date().toISOString()
- },
- {
- id: 'contact_becker',
- name: 'Familie Becker',
- email: 'becker.familie@gmx.de',
- phone: '+49 172 5551234',
- role: 'parent',
- student_name: 'Tim Becker',
- class_name: '10a',
- tags: [],
- preferred_channel: 'email',
- online: false,
- last_seen: new Date(Date.now() - 86400000).toISOString(),
- created_at: new Date(Date.now() - 86400000 * 45).toISOString(),
- updated_at: new Date().toISOString()
- },
- {
- id: 'contact_klein',
- name: 'Monika Klein',
- email: 'm.klein@schule-musterstadt.de',
- role: 'staff',
- tags: ['Sekretariat'],
- preferred_channel: 'pwa',
- online: true,
- created_at: new Date(Date.now() - 86400000 * 180).toISOString(),
- updated_at: new Date().toISOString()
- },
- {
- id: 'contact_fischer',
- name: 'Familie Fischer',
- email: 'fischer@t-online.de',
- phone: '+49 173 4445566',
- role: 'parent',
- student_name: 'Anna Fischer',
- class_name: '11b',
- tags: ['Foerderverein'],
- preferred_channel: 'pwa',
- online: false,
- last_seen: new Date(Date.now() - 7200000).toISOString(),
- created_at: new Date(Date.now() - 86400000 * 75).toISOString(),
- updated_at: new Date().toISOString()
- },
- {
- id: 'contact_meyer',
- name: 'Dr. Hans Meyer',
- email: 'h.meyer@schule-musterstadt.de',
- role: 'teacher',
- tags: ['Schulleitung', 'Stellvertretender Schulleiter'],
- preferred_channel: 'email',
- online: false,
- last_seen: new Date(Date.now() - 3600000).toISOString(),
- created_at: new Date(Date.now() - 86400000 * 365).toISOString(),
- updated_at: new Date().toISOString()
- }
-]
-
-const mockConversations: Conversation[] = [
- {
- id: 'conv_mueller',
- participant_ids: ['contact_mueller'],
- created_at: new Date(Date.now() - 86400000 * 7).toISOString(),
- updated_at: new Date(Date.now() - 300000).toISOString(),
- last_message: 'Vielen Dank fuer die Info! Max freut sich schon auf die Klassenfahrt 🎉',
- last_message_time: new Date(Date.now() - 300000).toISOString(),
- unread_count: 2,
- is_group: false,
- title: 'Familie Mueller',
- pinned: true
- },
- {
- id: 'conv_schmidt',
- participant_ids: ['contact_schmidt'],
- created_at: new Date(Date.now() - 86400000 * 14).toISOString(),
- updated_at: new Date(Date.now() - 3600000).toISOString(),
- last_message: 'Lisa war heute krank, sie kommt morgen wieder.',
- last_message_time: new Date(Date.now() - 3600000).toISOString(),
- unread_count: 0,
- is_group: false,
- title: 'Petra Schmidt'
- },
- {
- id: 'conv_weber',
- participant_ids: ['contact_weber'],
- created_at: new Date(Date.now() - 86400000 * 30).toISOString(),
- updated_at: new Date(Date.now() - 7200000).toISOString(),
- last_message: 'Koenntest du mir die Klausuraufgaben bis Freitag schicken? 📝',
- last_message_time: new Date(Date.now() - 7200000).toISOString(),
- unread_count: 1,
- is_group: false,
- title: 'Sabine Weber',
- typing: true
- },
- {
- id: 'conv_hoffmann',
- participant_ids: ['contact_hoffmann'],
- created_at: new Date(Date.now() - 86400000 * 5).toISOString(),
- updated_at: new Date(Date.now() - 86400000).toISOString(),
- last_message: 'Die Notenkonferenz ist am 15.02. um 14:00 Uhr.',
- last_message_time: new Date(Date.now() - 86400000).toISOString(),
- unread_count: 0,
- is_group: false,
- title: 'Thomas Hoffmann'
- },
- {
- id: 'conv_becker',
- participant_ids: ['contact_becker'],
- created_at: new Date(Date.now() - 86400000 * 3).toISOString(),
- updated_at: new Date(Date.now() - 172800000).toISOString(),
- last_message: 'Wir haben die Einverstaendniserklaerung unterschrieben.',
- last_message_time: new Date(Date.now() - 172800000).toISOString(),
- unread_count: 0,
- is_group: false,
- title: 'Familie Becker',
- muted: true
- },
- {
- id: 'conv_fachschaft',
- participant_ids: ['contact_weber', 'contact_hoffmann', 'contact_meyer'],
- created_at: new Date(Date.now() - 86400000 * 60).toISOString(),
- updated_at: new Date(Date.now() - 14400000).toISOString(),
- last_message: 'Sabine: Hat jemand die neuen Lehrplaene schon gelesen?',
- last_message_time: new Date(Date.now() - 14400000).toISOString(),
- unread_count: 3,
- is_group: true,
- title: 'Fachschaft Deutsch 📚'
- }
-]
-
-const mockMessages: Record = {
- 'conv_mueller': [
- {
- id: 'msg_m1',
- conversation_id: 'conv_mueller',
- sender_id: 'self',
- content: 'Guten Tag Frau Mueller,\n\nich moechte Sie ueber die anstehende Klassenfahrt nach Berlin informieren. Die Reise findet vom 15.-19. April statt.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000).toISOString(),
- read: true,
- delivered: true,
- send_email: true,
- email_sent: true,
- email_sent_at: new Date(Date.now() - 86400000).toISOString()
- },
- {
- id: 'msg_m2',
- conversation_id: 'conv_mueller',
- sender_id: 'self',
- content: 'Die Kosten belaufen sich auf 280 Euro pro Schueler. Bitte ueberweisen Sie den Betrag bis zum 01.03. auf das Schulkonto.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 + 60000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_m3',
- conversation_id: 'conv_mueller',
- sender_id: 'contact_mueller',
- content: 'Vielen Dank fuer die Information! Wir werden den Betrag diese Woche ueberweisen.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 3600000).toISOString(),
- read: false,
- delivered: true,
- send_email: false,
- email_sent: false,
- reactions: [{ emoji: '👍', user_id: 'self' }]
- },
- {
- id: 'msg_m4',
- conversation_id: 'conv_mueller',
- sender_id: 'contact_mueller',
- content: 'Vielen Dank fuer die Info! Max freut sich schon auf die Klassenfahrt 🎉',
- content_type: 'text',
- timestamp: new Date(Date.now() - 300000).toISOString(),
- read: false,
- delivered: true,
- send_email: false,
- email_sent: false
- }
- ],
- 'conv_schmidt': [
- {
- id: 'msg_s1',
- conversation_id: 'conv_schmidt',
- sender_id: 'contact_schmidt',
- content: 'Guten Morgen! Lisa ist heute leider krank und kann nicht zur Schule kommen.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 * 2).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_s2',
- conversation_id: 'conv_schmidt',
- sender_id: 'self',
- content: 'Gute Besserung an Lisa! 🤒 Soll ich ihr die Hausaufgaben zukommen lassen?',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 * 2 + 1800000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_s3',
- conversation_id: 'conv_schmidt',
- sender_id: 'contact_schmidt',
- content: 'Das waere sehr nett, vielen Dank! 🙏',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 * 2 + 3600000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_s4',
- conversation_id: 'conv_schmidt',
- sender_id: 'self',
- content: 'Hier sind die Hausaufgaben fuer diese Woche:\n\n📖 Deutsch: Seite 45-48 lesen\n📝 Mathe: Aufgaben 1-5 auf Seite 112\n🔬 Bio: Referat vorbereiten',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000).toISOString(),
- read: true,
- delivered: true,
- send_email: true,
- email_sent: true
- },
- {
- id: 'msg_s5',
- conversation_id: 'conv_schmidt',
- sender_id: 'contact_schmidt',
- content: 'Lisa war heute krank, sie kommt morgen wieder.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 3600000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- }
- ],
- 'conv_weber': [
- {
- id: 'msg_w1',
- conversation_id: 'conv_weber',
- sender_id: 'contact_weber',
- content: 'Hi! Hast du schon die neuen Abi-Themen gesehen?',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 * 3).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_w2',
- conversation_id: 'conv_weber',
- sender_id: 'self',
- content: 'Ja, habe ich! Finde ich ganz gut machbar dieses Jahr. 📚',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 * 3 + 1800000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_w3',
- conversation_id: 'conv_weber',
- sender_id: 'contact_weber',
- content: 'Koenntest du mir die Klausuraufgaben bis Freitag schicken? 📝',
- content_type: 'text',
- timestamp: new Date(Date.now() - 7200000).toISOString(),
- read: false,
- delivered: true,
- send_email: false,
- email_sent: false
- }
- ],
- 'conv_hoffmann': [
- {
- id: 'msg_h1',
- conversation_id: 'conv_hoffmann',
- sender_id: 'contact_hoffmann',
- content: 'Kurze Info: Die Notenkonferenz ist am 15.02. um 14:00 Uhr.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 * 2).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_h2',
- conversation_id: 'conv_hoffmann',
- sender_id: 'self',
- content: 'Danke fuer die Info! Bin dabei. 👍',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 * 2 + 3600000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_h3',
- conversation_id: 'conv_hoffmann',
- sender_id: 'contact_hoffmann',
- content: 'Die Notenkonferenz ist am 15.02. um 14:00 Uhr.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- }
- ],
- 'conv_becker': [
- {
- id: 'msg_b1',
- conversation_id: 'conv_becker',
- sender_id: 'self',
- content: 'Guten Tag Familie Becker,\n\nbitte vergessen Sie nicht, die Einverstaendniserklaerung fuer den Schwimmunterricht zu unterschreiben.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000 * 4).toISOString(),
- read: true,
- delivered: true,
- send_email: true,
- email_sent: true
- },
- {
- id: 'msg_b2',
- conversation_id: 'conv_becker',
- sender_id: 'contact_becker',
- content: 'Wir haben die Einverstaendniserklaerung unterschrieben.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 172800000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- }
- ],
- 'conv_fachschaft': [
- {
- id: 'msg_f1',
- conversation_id: 'conv_fachschaft',
- sender_id: 'contact_meyer',
- content: 'Liebe Kolleginnen und Kollegen,\n\ndie neuen Lehrplaene sind jetzt online verfuegbar.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 86400000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_f2',
- conversation_id: 'conv_fachschaft',
- sender_id: 'contact_hoffmann',
- content: 'Danke fuer die Info! Werde ich mir heute Abend anschauen.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 72000000).toISOString(),
- read: true,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_f3',
- conversation_id: 'conv_fachschaft',
- sender_id: 'contact_weber',
- content: 'Hat jemand die neuen Lehrplaene schon gelesen?',
- content_type: 'text',
- timestamp: new Date(Date.now() - 14400000).toISOString(),
- read: false,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_f4',
- conversation_id: 'conv_fachschaft',
- sender_id: 'contact_hoffmann',
- content: 'Noch nicht komplett, aber sieht interessant aus! 📖',
- content_type: 'text',
- timestamp: new Date(Date.now() - 10800000).toISOString(),
- read: false,
- delivered: true,
- send_email: false,
- email_sent: false
- },
- {
- id: 'msg_f5',
- conversation_id: 'conv_fachschaft',
- sender_id: 'contact_meyer',
- content: 'Wir sollten naechste Woche eine Besprechung ansetzen.',
- content_type: 'text',
- timestamp: new Date(Date.now() - 7200000).toISOString(),
- read: false,
- delivered: true,
- send_email: false,
- email_sent: false
- }
- ]
-}
-
-const mockTemplates: MessageTemplate[] = [
- {
- id: 'tpl_1',
- name: 'Krankmeldung bestaetigen',
- content: 'Vielen Dank fuer die Krankmeldung. Gute Besserung! 🤒',
- created_at: new Date().toISOString()
- },
- {
- id: 'tpl_2',
- name: 'Hausaufgaben senden',
- content: 'Hier sind die Hausaufgaben fuer diese Woche:\n\n📖 Deutsch: \n📝 Mathe: \n🔬 Bio: ',
- created_at: new Date().toISOString()
- },
- {
- id: 'tpl_3',
- name: 'Elterngespraech anfragen',
- content: 'Guten Tag,\n\nich wuerde gerne ein Elterngespraech mit Ihnen vereinbaren. Wann haetten Sie Zeit?',
- created_at: new Date().toISOString()
- },
- {
- id: 'tpl_4',
- name: 'Termin bestaetigen',
- content: 'Vielen Dank, der Termin ist bestaetigt. Ich freue mich auf unser Gespraech! 📅',
- created_at: new Date().toISOString()
- }
-]
-
-// ============================================
-// PROVIDER
-// ============================================
-
export function MessagesProvider({ children }: { children: ReactNode }) {
const [contacts, setContacts] = useState(mockContacts)
const [conversations, setConversations] = useState(mockConversations)
const [messages, setMessages] = useState>(mockMessages)
- const [templates, setTemplates] = useState(mockTemplates)
- const [stats, setStats] = useState({
+ const [templates] = useState(mockTemplates)
+ const [stats] = useState({
total_contacts: mockContacts.length,
total_conversations: mockConversations.length,
total_messages: Object.values(mockMessages).flat().length,
unread_messages: mockConversations.reduce((sum, c) => sum + c.unread_count, 0)
})
- const [isLoading, setIsLoading] = useState(false)
- const [error, setError] = useState(null)
+ const [isLoading] = useState(false)
+ const [error] = useState(null)
const [currentConversationId, setCurrentConversationId] = useState(null)
const [mounted, setMounted] = useState(false)
- // Initialize
- useEffect(() => {
- setMounted(true)
- }, [])
+ useEffect(() => { setMounted(true) }, [])
- // Computed: unread count
const unreadCount = conversations.reduce((sum, c) => sum + c.unread_count, 0)
- // Computed: recent conversations (sorted by last_message_time, pinned first)
- const recentConversations = [...conversations]
- .sort((a, b) => {
- // Pinned conversations first
- if (a.pinned && !b.pinned) return -1
- if (!a.pinned && b.pinned) return 1
- // Then by last_message_time
- const aTime = a.last_message_time ? new Date(a.last_message_time).getTime() : 0
- const bTime = b.last_message_time ? new Date(b.last_message_time).getTime() : 0
- return bTime - aTime
- })
-
- // Actions
- const fetchContacts = useCallback(async () => {
- // Using mock data directly
- setContacts(mockContacts)
- }, [])
-
- const fetchConversations = useCallback(async () => {
- // Using mock data directly
- setConversations(mockConversations)
- }, [])
+ const recentConversations = [...conversations].sort((a, b) => {
+ if (a.pinned && !b.pinned) return -1
+ if (!a.pinned && b.pinned) return 1
+ const aTime = a.last_message_time ? new Date(a.last_message_time).getTime() : 0
+ const bTime = b.last_message_time ? new Date(b.last_message_time).getTime() : 0
+ return bTime - aTime
+ })
+ const fetchContacts = useCallback(async () => { setContacts(mockContacts) }, [])
+ const fetchConversations = useCallback(async () => { setConversations(mockConversations) }, [])
const fetchMessages = useCallback(async (conversationId: string): Promise => {
return messages[conversationId] || []
}, [messages])
const sendMessage = useCallback(async (
- conversationId: string,
- content: string,
- sendEmail: boolean = false,
- replyTo?: string
+ conversationId: string, content: string, sendEmail = false, replyTo?: string
): Promise => {
const newMsg: Message = {
- id: `msg_${Date.now()}`,
- conversation_id: conversationId,
- sender_id: 'self',
- content,
- content_type: 'text',
- timestamp: new Date().toISOString(),
- read: true,
- delivered: true,
- send_email: sendEmail,
- email_sent: sendEmail,
- reply_to: replyTo
+ id: `msg_${Date.now()}`, conversation_id: conversationId, sender_id: 'self',
+ content, content_type: 'text', timestamp: new Date().toISOString(),
+ read: true, delivered: true, send_email: sendEmail, email_sent: sendEmail, reply_to: replyTo
}
-
- setMessages(prev => ({
- ...prev,
- [conversationId]: [...(prev[conversationId] || []), newMsg]
- }))
-
- // Update conversation
+ setMessages(prev => ({ ...prev, [conversationId]: [...(prev[conversationId] || []), newMsg] }))
setConversations(prev => prev.map(c =>
c.id === conversationId
- ? {
- ...c,
- last_message: content.length > 50 ? content.slice(0, 50) + '...' : content,
- last_message_time: newMsg.timestamp,
- updated_at: newMsg.timestamp
- }
+ ? { ...c, last_message: content.length > 50 ? content.slice(0, 50) + '...' : content,
+ last_message_time: newMsg.timestamp, updated_at: newMsg.timestamp }
: c
))
-
return newMsg
}, [])
const markAsRead = useCallback(async (conversationId: string) => {
- setMessages(prev => ({
- ...prev,
- [conversationId]: (prev[conversationId] || []).map(m => ({ ...m, read: true }))
- }))
- setConversations(prev => prev.map(c =>
- c.id === conversationId ? { ...c, unread_count: 0 } : c
- ))
+ setMessages(prev => ({ ...prev, [conversationId]: (prev[conversationId] || []).map(m => ({ ...m, read: true })) }))
+ setConversations(prev => prev.map(c => c.id === conversationId ? { ...c, unread_count: 0 } : c))
}, [])
const createConversation = useCallback(async (contactId: string): Promise => {
- // Check if conversation exists
- const existing = conversations.find(c =>
- !c.is_group && c.participant_ids.includes(contactId)
- )
+ const existing = conversations.find(c => !c.is_group && c.participant_ids.includes(contactId))
if (existing) return existing
-
- // Create new conversation
const contact = contacts.find(c => c.id === contactId)
const newConv: Conversation = {
- id: `conv_${Date.now()}`,
- participant_ids: [contactId],
- created_at: new Date().toISOString(),
- updated_at: new Date().toISOString(),
- unread_count: 0,
- is_group: false,
- title: contact?.name || 'Neue Konversation'
+ id: `conv_${Date.now()}`, participant_ids: [contactId],
+ created_at: new Date().toISOString(), updated_at: new Date().toISOString(),
+ unread_count: 0, is_group: false, title: contact?.name || 'Neue Konversation'
}
setConversations(prev => [newConv, ...prev])
setMessages(prev => ({ ...prev, [newConv.id]: [] }))
@@ -739,22 +86,14 @@ export function MessagesProvider({ children }: { children: ReactNode }) {
const newMessages = { ...prev }
for (const convId of Object.keys(newMessages)) {
newMessages[convId] = newMessages[convId].map(msg => {
- if (msg.id === messageId) {
- const reactions = msg.reactions || []
- const existingIndex = reactions.findIndex(r => r.user_id === 'self')
- if (existingIndex >= 0) {
- // Toggle or change reaction
- if (reactions[existingIndex].emoji === emoji) {
- reactions.splice(existingIndex, 1)
- } else {
- reactions[existingIndex].emoji = emoji
- }
- } else {
- reactions.push({ emoji, user_id: 'self' })
- }
- return { ...msg, reactions }
- }
- return msg
+ if (msg.id !== messageId) return msg
+ const reactions = [...(msg.reactions || [])]
+ const existingIndex = reactions.findIndex(r => r.user_id === 'self')
+ if (existingIndex >= 0) {
+ if (reactions[existingIndex].emoji === emoji) { reactions.splice(existingIndex, 1) }
+ else { reactions[existingIndex] = { ...reactions[existingIndex], emoji } }
+ } else { reactions.push({ emoji, user_id: 'self' }) }
+ return { ...msg, reactions }
})
}
return newMessages
@@ -762,86 +101,30 @@ export function MessagesProvider({ children }: { children: ReactNode }) {
}, [])
const deleteMessage = useCallback((conversationId: string, messageId: string) => {
- setMessages(prev => ({
- ...prev,
- [conversationId]: (prev[conversationId] || []).filter(m => m.id !== messageId)
- }))
+ setMessages(prev => ({ ...prev, [conversationId]: (prev[conversationId] || []).filter(m => m.id !== messageId) }))
}, [])
const pinConversation = useCallback((conversationId: string) => {
- setConversations(prev => prev.map(c =>
- c.id === conversationId ? { ...c, pinned: !c.pinned } : c
- ))
+ setConversations(prev => prev.map(c => c.id === conversationId ? { ...c, pinned: !c.pinned } : c))
}, [])
const muteConversation = useCallback((conversationId: string) => {
- setConversations(prev => prev.map(c =>
- c.id === conversationId ? { ...c, muted: !c.muted } : c
- ))
+ setConversations(prev => prev.map(c => c.id === conversationId ? { ...c, muted: !c.muted } : c))
}, [])
- // SSR safety
- if (!mounted) {
- return (
- {},
- fetchConversations: async () => {},
- fetchMessages: async () => [],
- sendMessage: async () => null,
- markAsRead: async () => {},
- createConversation: async () => null,
- addReaction: () => {},
- deleteMessage: () => {},
- pinConversation: () => {},
- muteConversation: () => {},
- isLoading: false,
- error: null,
- currentConversationId: null,
- setCurrentConversationId: () => {}
- }}
- >
- {children}
-
- )
+ const value: MessagesContextType = {
+ contacts: mounted ? contacts : [], conversations: mounted ? conversations : [],
+ messages: mounted ? messages : {}, templates: mounted ? templates : [],
+ stats: mounted ? stats : { total_contacts: 0, total_conversations: 0, total_messages: 0, unread_messages: 0 },
+ unreadCount: mounted ? unreadCount : 0,
+ recentConversations: mounted ? recentConversations : [],
+ fetchContacts, fetchConversations, fetchMessages, sendMessage, markAsRead,
+ createConversation, addReaction, deleteMessage, pinConversation, muteConversation,
+ isLoading, error, currentConversationId,
+ setCurrentConversationId: mounted ? setCurrentConversationId : () => {}
}
- return (
-
- {children}
-
- )
+ return {children}
}
export function useMessages() {
@@ -851,75 +134,3 @@ export function useMessages() {
}
return context
}
-
-// ============================================
-// HELPER FUNCTIONS
-// ============================================
-
-export function formatMessageTime(timestamp: string): string {
- const date = new Date(timestamp)
- const now = new Date()
- const diffMs = now.getTime() - date.getTime()
- const diffMins = Math.floor(diffMs / 60000)
- const diffHours = Math.floor(diffMs / 3600000)
- const diffDays = Math.floor(diffMs / 86400000)
-
- if (diffMins < 1) return 'Gerade eben'
- if (diffMins < 60) return `${diffMins} Min.`
- if (diffHours < 24) return `${diffHours} Std.`
- if (diffDays === 1) return 'Gestern'
- if (diffDays < 7) return `${diffDays} Tage`
-
- return date.toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit' })
-}
-
-export function formatMessageDate(timestamp: string): string {
- const date = new Date(timestamp)
- const now = new Date()
- const diffDays = Math.floor((now.getTime() - date.getTime()) / 86400000)
-
- if (diffDays === 0) return 'Heute'
- if (diffDays === 1) return 'Gestern'
- if (diffDays < 7) {
- return date.toLocaleDateString('de-DE', { weekday: 'long' })
- }
-
- return date.toLocaleDateString('de-DE', { day: '2-digit', month: 'long', year: 'numeric' })
-}
-
-export function getContactInitials(name: string): string {
- const parts = name.split(' ').filter(p => p.length > 0)
- if (parts.length >= 2) {
- return (parts[0][0] + parts[parts.length - 1][0]).toUpperCase()
- }
- return name.slice(0, 2).toUpperCase()
-}
-
-export function getRoleLabel(role: Contact['role']): string {
- const labels: Record = {
- parent: 'Eltern',
- teacher: 'Lehrkraft',
- staff: 'Verwaltung',
- student: 'Schueler/in'
- }
- return labels[role] || role
-}
-
-export function getRoleColor(role: Contact['role'], isDark: boolean): string {
- const colors: Record = {
- parent: { dark: 'bg-blue-500/20 text-blue-300', light: 'bg-blue-100 text-blue-700' },
- teacher: { dark: 'bg-purple-500/20 text-purple-300', light: 'bg-purple-100 text-purple-700' },
- staff: { dark: 'bg-amber-500/20 text-amber-300', light: 'bg-amber-100 text-amber-700' },
- student: { dark: 'bg-green-500/20 text-green-300', light: 'bg-green-100 text-green-700' }
- }
- return isDark ? colors[role].dark : colors[role].light
-}
-
-// Emoji categories for picker
-export const emojiCategories = {
- 'Häufig': ['👍', '❤️', '😊', '😂', '🙏', '👏', '🎉', '✅', '📝', '📚'],
- 'Smileys': ['😀', '😃', '😄', '😁', '😅', '😂', '🤣', '😊', '😇', '🙂', '😉', '😌', '😍', '🥰', '😘'],
- 'Gesten': ['👍', '👎', '👌', '✌️', '🤞', '🤝', '👏', '🙌', '👋', '✋', '🤚', '🖐️', '🙏'],
- 'Symbole': ['❤️', '💙', '💚', '💛', '🧡', '💜', '✅', '❌', '⭐', '🌟', '💯', '📌', '📎'],
- 'Schule': ['📚', '📖', '📝', '✏️', '📓', '📕', '📗', '📘', '🎓', '🏫', '📅', '⏰', '🔔']
-}
diff --git a/studio-v2/lib/messages/helpers.ts b/studio-v2/lib/messages/helpers.ts
new file mode 100644
index 0000000..d2b5a11
--- /dev/null
+++ b/studio-v2/lib/messages/helpers.ts
@@ -0,0 +1,69 @@
+import type { Contact } from './types'
+
+export function formatMessageTime(timestamp: string): string {
+ const date = new Date(timestamp)
+ const now = new Date()
+ const diffMs = now.getTime() - date.getTime()
+ const diffMins = Math.floor(diffMs / 60000)
+ const diffHours = Math.floor(diffMs / 3600000)
+ const diffDays = Math.floor(diffMs / 86400000)
+
+ if (diffMins < 1) return 'Gerade eben'
+ if (diffMins < 60) return `${diffMins} Min.`
+ if (diffHours < 24) return `${diffHours} Std.`
+ if (diffDays === 1) return 'Gestern'
+ if (diffDays < 7) return `${diffDays} Tage`
+
+ return date.toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit' })
+}
+
+export function formatMessageDate(timestamp: string): string {
+ const date = new Date(timestamp)
+ const now = new Date()
+ const diffDays = Math.floor((now.getTime() - date.getTime()) / 86400000)
+
+ if (diffDays === 0) return 'Heute'
+ if (diffDays === 1) return 'Gestern'
+ if (diffDays < 7) {
+ return date.toLocaleDateString('de-DE', { weekday: 'long' })
+ }
+
+ return date.toLocaleDateString('de-DE', { day: '2-digit', month: 'long', year: 'numeric' })
+}
+
+export function getContactInitials(name: string): string {
+ const parts = name.split(' ').filter(p => p.length > 0)
+ if (parts.length >= 2) {
+ return (parts[0][0] + parts[parts.length - 1][0]).toUpperCase()
+ }
+ return name.slice(0, 2).toUpperCase()
+}
+
+export function getRoleLabel(role: Contact['role']): string {
+ const labels: Record = {
+ parent: 'Eltern',
+ teacher: 'Lehrkraft',
+ staff: 'Verwaltung',
+ student: 'Schueler/in'
+ }
+ return labels[role] || role
+}
+
+export function getRoleColor(role: Contact['role'], isDark: boolean): string {
+ const colors: Record = {
+ parent: { dark: 'bg-blue-500/20 text-blue-300', light: 'bg-blue-100 text-blue-700' },
+ teacher: { dark: 'bg-purple-500/20 text-purple-300', light: 'bg-purple-100 text-purple-700' },
+ staff: { dark: 'bg-amber-500/20 text-amber-300', light: 'bg-amber-100 text-amber-700' },
+ student: { dark: 'bg-green-500/20 text-green-300', light: 'bg-green-100 text-green-700' }
+ }
+ return isDark ? colors[role].dark : colors[role].light
+}
+
+// Emoji categories for picker
+export const emojiCategories = {
+ 'Häufig': ['👍', '❤️', '😊', '😂', '🙏', '👏', '🎉', '✅', '📝', '📚'],
+ 'Smileys': ['😀', '😃', '😄', '😁', '😅', '😂', '🤣', '😊', '😇', '🙂', '😉', '😌', '😍', '🥰', '😘'],
+ 'Gesten': ['👍', '👎', '👌', '✌️', '🤞', '🤝', '👏', '🙌', '👋', '✋', '🤚', '🖐️', '🙏'],
+ 'Symbole': ['❤️', '💙', '💚', '💛', '🧡', '💜', '✅', '❌', '⭐', '🌟', '💯', '📌', '📎'],
+ 'Schule': ['📚', '📖', '📝', '✏️', '📓', '📕', '📗', '📘', '🎓', '🏫', '📅', '⏰', '🔔']
+}
diff --git a/studio-v2/lib/messages/mock-data.ts b/studio-v2/lib/messages/mock-data.ts
new file mode 100644
index 0000000..12b5482
--- /dev/null
+++ b/studio-v2/lib/messages/mock-data.ts
@@ -0,0 +1,227 @@
+import type { Contact, Conversation, Message, MessageTemplate } from './types'
+
+export const mockContacts: Contact[] = [
+ {
+ id: 'contact_mueller',
+ name: 'Familie Mueller',
+ email: 'familie.mueller@gmail.com',
+ phone: '+49 170 1234567',
+ role: 'parent',
+ student_name: 'Max Mueller',
+ class_name: '10a',
+ notes: 'Bevorzugt Kommunikation per E-Mail',
+ tags: ['aktiv', 'Elternbeirat'],
+ preferred_channel: 'email',
+ online: false,
+ last_seen: new Date(Date.now() - 1800000).toISOString(),
+ created_at: new Date(Date.now() - 86400000 * 30).toISOString(),
+ updated_at: new Date().toISOString()
+ },
+ {
+ id: 'contact_schmidt',
+ name: 'Petra Schmidt',
+ email: 'p.schmidt@web.de',
+ phone: '+49 171 9876543',
+ role: 'parent',
+ student_name: 'Lisa Schmidt',
+ class_name: '10a',
+ tags: ['responsive'],
+ preferred_channel: 'pwa',
+ online: true,
+ created_at: new Date(Date.now() - 86400000 * 60).toISOString(),
+ updated_at: new Date().toISOString()
+ },
+ {
+ id: 'contact_weber',
+ name: 'Sabine Weber',
+ email: 's.weber@schule-musterstadt.de',
+ role: 'teacher',
+ tags: ['Fachschaft Deutsch', 'Klassenleitung 9b'],
+ preferred_channel: 'pwa',
+ online: true,
+ last_seen: new Date().toISOString(),
+ created_at: new Date(Date.now() - 86400000 * 90).toISOString(),
+ updated_at: new Date().toISOString()
+ },
+ {
+ id: 'contact_hoffmann',
+ name: 'Thomas Hoffmann',
+ email: 't.hoffmann@schule-musterstadt.de',
+ role: 'teacher',
+ tags: ['Fachschaft Mathe', 'Oberstufenkoordinator'],
+ preferred_channel: 'pwa',
+ online: false,
+ last_seen: new Date(Date.now() - 3600000 * 2).toISOString(),
+ created_at: new Date(Date.now() - 86400000 * 120).toISOString(),
+ updated_at: new Date().toISOString()
+ },
+ {
+ id: 'contact_becker',
+ name: 'Familie Becker',
+ email: 'becker.familie@gmx.de',
+ phone: '+49 172 5551234',
+ role: 'parent',
+ student_name: 'Tim Becker',
+ class_name: '10a',
+ tags: [],
+ preferred_channel: 'email',
+ online: false,
+ last_seen: new Date(Date.now() - 86400000).toISOString(),
+ created_at: new Date(Date.now() - 86400000 * 45).toISOString(),
+ updated_at: new Date().toISOString()
+ },
+ {
+ id: 'contact_klein',
+ name: 'Monika Klein',
+ email: 'm.klein@schule-musterstadt.de',
+ role: 'staff',
+ tags: ['Sekretariat'],
+ preferred_channel: 'pwa',
+ online: true,
+ created_at: new Date(Date.now() - 86400000 * 180).toISOString(),
+ updated_at: new Date().toISOString()
+ },
+ {
+ id: 'contact_fischer',
+ name: 'Familie Fischer',
+ email: 'fischer@t-online.de',
+ phone: '+49 173 4445566',
+ role: 'parent',
+ student_name: 'Anna Fischer',
+ class_name: '11b',
+ tags: ['Foerderverein'],
+ preferred_channel: 'pwa',
+ online: false,
+ last_seen: new Date(Date.now() - 7200000).toISOString(),
+ created_at: new Date(Date.now() - 86400000 * 75).toISOString(),
+ updated_at: new Date().toISOString()
+ },
+ {
+ id: 'contact_meyer',
+ name: 'Dr. Hans Meyer',
+ email: 'h.meyer@schule-musterstadt.de',
+ role: 'teacher',
+ tags: ['Schulleitung', 'Stellvertretender Schulleiter'],
+ preferred_channel: 'email',
+ online: false,
+ last_seen: new Date(Date.now() - 3600000).toISOString(),
+ created_at: new Date(Date.now() - 86400000 * 365).toISOString(),
+ updated_at: new Date().toISOString()
+ }
+]
+
+export const mockConversations: Conversation[] = [
+ {
+ id: 'conv_mueller',
+ participant_ids: ['contact_mueller'],
+ created_at: new Date(Date.now() - 86400000 * 7).toISOString(),
+ updated_at: new Date(Date.now() - 300000).toISOString(),
+ last_message: 'Vielen Dank fuer die Info! Max freut sich schon auf die Klassenfahrt 🎉',
+ last_message_time: new Date(Date.now() - 300000).toISOString(),
+ unread_count: 2,
+ is_group: false,
+ title: 'Familie Mueller',
+ pinned: true
+ },
+ {
+ id: 'conv_schmidt',
+ participant_ids: ['contact_schmidt'],
+ created_at: new Date(Date.now() - 86400000 * 14).toISOString(),
+ updated_at: new Date(Date.now() - 3600000).toISOString(),
+ last_message: 'Lisa war heute krank, sie kommt morgen wieder.',
+ last_message_time: new Date(Date.now() - 3600000).toISOString(),
+ unread_count: 0,
+ is_group: false,
+ title: 'Petra Schmidt'
+ },
+ {
+ id: 'conv_weber',
+ participant_ids: ['contact_weber'],
+ created_at: new Date(Date.now() - 86400000 * 30).toISOString(),
+ updated_at: new Date(Date.now() - 7200000).toISOString(),
+ last_message: 'Koenntest du mir die Klausuraufgaben bis Freitag schicken? 📝',
+ last_message_time: new Date(Date.now() - 7200000).toISOString(),
+ unread_count: 1,
+ is_group: false,
+ title: 'Sabine Weber',
+ typing: true
+ },
+ {
+ id: 'conv_hoffmann',
+ participant_ids: ['contact_hoffmann'],
+ created_at: new Date(Date.now() - 86400000 * 5).toISOString(),
+ updated_at: new Date(Date.now() - 86400000).toISOString(),
+ last_message: 'Die Notenkonferenz ist am 15.02. um 14:00 Uhr.',
+ last_message_time: new Date(Date.now() - 86400000).toISOString(),
+ unread_count: 0,
+ is_group: false,
+ title: 'Thomas Hoffmann'
+ },
+ {
+ id: 'conv_becker',
+ participant_ids: ['contact_becker'],
+ created_at: new Date(Date.now() - 86400000 * 3).toISOString(),
+ updated_at: new Date(Date.now() - 172800000).toISOString(),
+ last_message: 'Wir haben die Einverstaendniserklaerung unterschrieben.',
+ last_message_time: new Date(Date.now() - 172800000).toISOString(),
+ unread_count: 0,
+ is_group: false,
+ title: 'Familie Becker',
+ muted: true
+ },
+ {
+ id: 'conv_fachschaft',
+ participant_ids: ['contact_weber', 'contact_hoffmann', 'contact_meyer'],
+ created_at: new Date(Date.now() - 86400000 * 60).toISOString(),
+ updated_at: new Date(Date.now() - 14400000).toISOString(),
+ last_message: 'Sabine: Hat jemand die neuen Lehrplaene schon gelesen?',
+ last_message_time: new Date(Date.now() - 14400000).toISOString(),
+ unread_count: 3,
+ is_group: true,
+ title: 'Fachschaft Deutsch 📚'
+ }
+]
+
+export const mockMessages: Record = {
+ 'conv_mueller': [
+ { id: 'msg_m1', conversation_id: 'conv_mueller', sender_id: 'self', content: 'Guten Tag Frau Mueller,\n\nich moechte Sie ueber die anstehende Klassenfahrt nach Berlin informieren. Die Reise findet vom 15.-19. April statt.', content_type: 'text', timestamp: new Date(Date.now() - 86400000).toISOString(), read: true, delivered: true, send_email: true, email_sent: true, email_sent_at: new Date(Date.now() - 86400000).toISOString() },
+ { id: 'msg_m2', conversation_id: 'conv_mueller', sender_id: 'self', content: 'Die Kosten belaufen sich auf 280 Euro pro Schueler. Bitte ueberweisen Sie den Betrag bis zum 01.03. auf das Schulkonto.', content_type: 'text', timestamp: new Date(Date.now() - 86400000 + 60000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_m3', conversation_id: 'conv_mueller', sender_id: 'contact_mueller', content: 'Vielen Dank fuer die Information! Wir werden den Betrag diese Woche ueberweisen.', content_type: 'text', timestamp: new Date(Date.now() - 3600000).toISOString(), read: false, delivered: true, send_email: false, email_sent: false, reactions: [{ emoji: '👍', user_id: 'self' }] },
+ { id: 'msg_m4', conversation_id: 'conv_mueller', sender_id: 'contact_mueller', content: 'Vielen Dank fuer die Info! Max freut sich schon auf die Klassenfahrt 🎉', content_type: 'text', timestamp: new Date(Date.now() - 300000).toISOString(), read: false, delivered: true, send_email: false, email_sent: false },
+ ],
+ 'conv_schmidt': [
+ { id: 'msg_s1', conversation_id: 'conv_schmidt', sender_id: 'contact_schmidt', content: 'Guten Morgen! Lisa ist heute leider krank und kann nicht zur Schule kommen.', content_type: 'text', timestamp: new Date(Date.now() - 86400000 * 2).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_s2', conversation_id: 'conv_schmidt', sender_id: 'self', content: 'Gute Besserung an Lisa! 🤒 Soll ich ihr die Hausaufgaben zukommen lassen?', content_type: 'text', timestamp: new Date(Date.now() - 86400000 * 2 + 1800000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_s3', conversation_id: 'conv_schmidt', sender_id: 'contact_schmidt', content: 'Das waere sehr nett, vielen Dank! 🙏', content_type: 'text', timestamp: new Date(Date.now() - 86400000 * 2 + 3600000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_s4', conversation_id: 'conv_schmidt', sender_id: 'self', content: 'Hier sind die Hausaufgaben fuer diese Woche:\n\n📖 Deutsch: Seite 45-48 lesen\n📝 Mathe: Aufgaben 1-5 auf Seite 112\n🔬 Bio: Referat vorbereiten', content_type: 'text', timestamp: new Date(Date.now() - 86400000).toISOString(), read: true, delivered: true, send_email: true, email_sent: true },
+ { id: 'msg_s5', conversation_id: 'conv_schmidt', sender_id: 'contact_schmidt', content: 'Lisa war heute krank, sie kommt morgen wieder.', content_type: 'text', timestamp: new Date(Date.now() - 3600000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ ],
+ 'conv_weber': [
+ { id: 'msg_w1', conversation_id: 'conv_weber', sender_id: 'contact_weber', content: 'Hi! Hast du schon die neuen Abi-Themen gesehen?', content_type: 'text', timestamp: new Date(Date.now() - 86400000 * 3).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_w2', conversation_id: 'conv_weber', sender_id: 'self', content: 'Ja, habe ich! Finde ich ganz gut machbar dieses Jahr. 📚', content_type: 'text', timestamp: new Date(Date.now() - 86400000 * 3 + 1800000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_w3', conversation_id: 'conv_weber', sender_id: 'contact_weber', content: 'Koenntest du mir die Klausuraufgaben bis Freitag schicken? 📝', content_type: 'text', timestamp: new Date(Date.now() - 7200000).toISOString(), read: false, delivered: true, send_email: false, email_sent: false },
+ ],
+ 'conv_hoffmann': [
+ { id: 'msg_h1', conversation_id: 'conv_hoffmann', sender_id: 'contact_hoffmann', content: 'Kurze Info: Die Notenkonferenz ist am 15.02. um 14:00 Uhr.', content_type: 'text', timestamp: new Date(Date.now() - 86400000 * 2).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_h2', conversation_id: 'conv_hoffmann', sender_id: 'self', content: 'Danke fuer die Info! Bin dabei. 👍', content_type: 'text', timestamp: new Date(Date.now() - 86400000 * 2 + 3600000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_h3', conversation_id: 'conv_hoffmann', sender_id: 'contact_hoffmann', content: 'Die Notenkonferenz ist am 15.02. um 14:00 Uhr.', content_type: 'text', timestamp: new Date(Date.now() - 86400000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ ],
+ 'conv_becker': [
+ { id: 'msg_b1', conversation_id: 'conv_becker', sender_id: 'self', content: 'Guten Tag Familie Becker,\n\nbitte vergessen Sie nicht, die Einverstaendniserklaerung fuer den Schwimmunterricht zu unterschreiben.', content_type: 'text', timestamp: new Date(Date.now() - 86400000 * 4).toISOString(), read: true, delivered: true, send_email: true, email_sent: true },
+ { id: 'msg_b2', conversation_id: 'conv_becker', sender_id: 'contact_becker', content: 'Wir haben die Einverstaendniserklaerung unterschrieben.', content_type: 'text', timestamp: new Date(Date.now() - 172800000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ ],
+ 'conv_fachschaft': [
+ { id: 'msg_f1', conversation_id: 'conv_fachschaft', sender_id: 'contact_meyer', content: 'Liebe Kolleginnen und Kollegen,\n\ndie neuen Lehrplaene sind jetzt online verfuegbar.', content_type: 'text', timestamp: new Date(Date.now() - 86400000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_f2', conversation_id: 'conv_fachschaft', sender_id: 'contact_hoffmann', content: 'Danke fuer die Info! Werde ich mir heute Abend anschauen.', content_type: 'text', timestamp: new Date(Date.now() - 72000000).toISOString(), read: true, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_f3', conversation_id: 'conv_fachschaft', sender_id: 'contact_weber', content: 'Hat jemand die neuen Lehrplaene schon gelesen?', content_type: 'text', timestamp: new Date(Date.now() - 14400000).toISOString(), read: false, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_f4', conversation_id: 'conv_fachschaft', sender_id: 'contact_hoffmann', content: 'Noch nicht komplett, aber sieht interessant aus! 📖', content_type: 'text', timestamp: new Date(Date.now() - 10800000).toISOString(), read: false, delivered: true, send_email: false, email_sent: false },
+ { id: 'msg_f5', conversation_id: 'conv_fachschaft', sender_id: 'contact_meyer', content: 'Wir sollten naechste Woche eine Besprechung ansetzen.', content_type: 'text', timestamp: new Date(Date.now() - 7200000).toISOString(), read: false, delivered: true, send_email: false, email_sent: false },
+ ]
+}
+
+export const mockTemplates: MessageTemplate[] = [
+ { id: 'tpl_1', name: 'Krankmeldung bestaetigen', content: 'Vielen Dank fuer die Krankmeldung. Gute Besserung! 🤒', created_at: new Date().toISOString() },
+ { id: 'tpl_2', name: 'Hausaufgaben senden', content: 'Hier sind die Hausaufgaben fuer diese Woche:\n\n📖 Deutsch: \n📝 Mathe: \n🔬 Bio: ', created_at: new Date().toISOString() },
+ { id: 'tpl_3', name: 'Elterngespraech anfragen', content: 'Guten Tag,\n\nich wuerde gerne ein Elterngespraech mit Ihnen vereinbaren. Wann haetten Sie Zeit?', created_at: new Date().toISOString() },
+ { id: 'tpl_4', name: 'Termin bestaetigen', content: 'Vielen Dank, der Termin ist bestaetigt. Ich freue mich auf unser Gespraech! 📅', created_at: new Date().toISOString() },
+]
diff --git a/studio-v2/lib/messages/types.ts b/studio-v2/lib/messages/types.ts
new file mode 100644
index 0000000..5235b71
--- /dev/null
+++ b/studio-v2/lib/messages/types.ts
@@ -0,0 +1,99 @@
+export interface Contact {
+ id: string
+ name: string
+ email?: string
+ phone?: string
+ role: 'parent' | 'teacher' | 'staff' | 'student'
+ student_name?: string
+ class_name?: string
+ notes?: string
+ tags: string[]
+ avatar_url?: string
+ preferred_channel: 'email' | 'matrix' | 'pwa'
+ online: boolean
+ last_seen?: string
+ created_at: string
+ updated_at: string
+}
+
+export interface Message {
+ id: string
+ conversation_id: string
+ sender_id: string // "self" for own messages
+ content: string
+ content_type: 'text' | 'file' | 'image' | 'voice'
+ file_url?: string
+ file_name?: string
+ timestamp: string
+ read: boolean
+ read_at?: string
+ delivered: boolean
+ send_email: boolean
+ email_sent: boolean
+ email_sent_at?: string
+ email_error?: string
+ reply_to?: string // ID of message being replied to
+ reactions?: { emoji: string; user_id: string }[]
+}
+
+export interface Conversation {
+ id: string
+ participant_ids: string[]
+ group_id?: string
+ created_at: string
+ updated_at: string
+ last_message?: string
+ last_message_time?: string
+ unread_count: number
+ is_group: boolean
+ title?: string
+ typing?: boolean // Someone is typing
+ pinned?: boolean
+ muted?: boolean
+ archived?: boolean
+}
+
+export interface MessageTemplate {
+ id: string
+ name: string
+ content: string
+ created_at: string
+}
+
+export interface MessagesStats {
+ total_contacts: number
+ total_conversations: number
+ total_messages: number
+ unread_messages: number
+}
+
+export interface MessagesContextType {
+ // Data
+ contacts: Contact[]
+ conversations: Conversation[]
+ messages: Record // conversationId -> messages
+ templates: MessageTemplate[]
+ stats: MessagesStats
+
+ // Computed
+ unreadCount: number
+ recentConversations: Conversation[]
+
+ // Actions
+ fetchContacts: () => Promise
+ fetchConversations: () => Promise
+ fetchMessages: (conversationId: string) => Promise
+ sendMessage: (conversationId: string, content: string, sendEmail?: boolean, replyTo?: string) => Promise
+ markAsRead: (conversationId: string) => Promise
+ createConversation: (contactId: string) => Promise
+ addReaction: (messageId: string, emoji: string) => void
+ deleteMessage: (conversationId: string, messageId: string) => void
+ pinConversation: (conversationId: string) => void
+ muteConversation: (conversationId: string) => void
+
+ // State
+ isLoading: boolean
+ error: string | null
+ currentConversationId: string | null
+ setCurrentConversationId: (id: string | null) => void
+}
diff --git a/website/app/admin/compliance/audit-workspace/_components/RequirementDetailPanel.tsx b/website/app/admin/compliance/audit-workspace/_components/RequirementDetailPanel.tsx
new file mode 100644
index 0000000..1b7d808
--- /dev/null
+++ b/website/app/admin/compliance/audit-workspace/_components/RequirementDetailPanel.tsx
@@ -0,0 +1,306 @@
+'use client'
+
+import { useState, useEffect } from 'react'
+import type { Requirement, Regulation, RequirementUpdate, AIInterpretation } from '../types'
+import { IMPLEMENTATION_STATUS, AUDIT_STATUS } from '../types'
+
+export default function RequirementDetailPanel({
+ requirement,
+ regulation,
+ onUpdate,
+ saving,
+}: {
+ requirement: Requirement
+ regulation: Regulation | undefined
+ onUpdate: (updates: RequirementUpdate) => void
+ saving: boolean
+}) {
+ const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'
+ const [editMode, setEditMode] = useState(false)
+ const [aiLoading, setAiLoading] = useState(false)
+ const [aiInterpretation, setAiInterpretation] = useState(null)
+ const [showAiPanel, setShowAiPanel] = useState(false)
+ const [localData, setLocalData] = useState({
+ implementation_status: requirement.implementation_status,
+ implementation_details: requirement.implementation_details || '',
+ evidence_description: requirement.evidence_description || '',
+ audit_status: requirement.audit_status,
+ auditor_notes: requirement.auditor_notes || '',
+ is_applicable: requirement.is_applicable,
+ applicability_reason: requirement.applicability_reason || '',
+ })
+ const [newCodeRef, setNewCodeRef] = useState({ file: '', line: '', description: '' })
+
+ useEffect(() => {
+ setLocalData({
+ implementation_status: requirement.implementation_status,
+ implementation_details: requirement.implementation_details || '',
+ evidence_description: requirement.evidence_description || '',
+ audit_status: requirement.audit_status,
+ auditor_notes: requirement.auditor_notes || '',
+ is_applicable: requirement.is_applicable,
+ applicability_reason: requirement.applicability_reason || '',
+ })
+ setEditMode(false)
+ setAiInterpretation(null)
+ setShowAiPanel(false)
+ }, [requirement.id])
+
+ const generateAiInterpretation = async () => {
+ setAiLoading(true)
+ setShowAiPanel(true)
+ try {
+ const res = await fetch(`${BACKEND_URL}/api/v1/compliance/ai/interpret`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ requirement_id: requirement.id }),
+ })
+ if (res.ok) {
+ setAiInterpretation(await res.json())
+ } else {
+ const err = await res.json()
+ setAiInterpretation({
+ summary: '', applicability: '', technical_measures: [],
+ affected_modules: [], risk_level: 'unknown', implementation_hints: [],
+ confidence_score: 0, error: err.detail || 'Fehler bei AI-Analyse'
+ })
+ }
+ } catch (err) {
+ setAiInterpretation({
+ summary: '', applicability: '', technical_measures: [],
+ affected_modules: [], risk_level: 'unknown', implementation_hints: [],
+ confidence_score: 0, error: 'Netzwerkfehler bei AI-Analyse'
+ })
+ } finally {
+ setAiLoading(false)
+ }
+ }
+
+ const handleSave = () => {
+ onUpdate(localData)
+ setEditMode(false)
+ }
+
+ const addCodeReference = () => {
+ if (!newCodeRef.file) return
+ const refs = requirement.code_references || []
+ onUpdate({
+ code_references: [...refs, {
+ file: newCodeRef.file,
+ line: newCodeRef.line ? parseInt(newCodeRef.line) : undefined,
+ description: newCodeRef.description,
+ }],
+ })
+ setNewCodeRef({ file: '', line: '', description: '' })
+ }
+
+ return (
+
+ {/* Header */}
+
+
+
+
+
+ {requirement.article}{requirement.paragraph ? ` ${requirement.paragraph}` : ''}
+
+
+ {AUDIT_STATUS[requirement.audit_status as keyof typeof AUDIT_STATUS]?.label || requirement.audit_status}
+
+
+ {IMPLEMENTATION_STATUS[requirement.implementation_status as keyof typeof IMPLEMENTATION_STATUS]?.label || requirement.implementation_status}
+
+
+
{requirement.title}
+
+
+ {editMode ? (
+ <>
+ setEditMode(false)} className="px-3 py-1.5 text-sm text-slate-600 hover:text-slate-800">Abbrechen
+
+ {saving ? 'Speichern...' : 'Speichern'}
+
+ >
+ ) : (
+ setEditMode(true)} className="px-3 py-1.5 text-sm bg-slate-100 text-slate-700 rounded-lg hover:bg-slate-200">Bearbeiten
+ )}
+
+
+
+
+
+ {/* Original Requirement Text */}
+
+
+
+ Originaler Anforderungstext
+
+
+
{requirement.requirement_text || 'Kein Originaltext hinterlegt'}
+ {requirement.source_page && (
+
Quelle: {regulation?.code} Seite {requirement.source_page}{requirement.source_section ? `, ${requirement.source_section}` : ''}
+ )}
+
+
+
+ {/* Applicability */}
+
+ Anwendbarkeit auf Breakpilot
+ {editMode ? (
+
+
+ setLocalData({ ...localData, is_applicable: e.target.checked })} className="rounded" />
+ Anwendbar
+
+
+ ) : (
+
+
+ {requirement.is_applicable ? 'Anwendbar' : 'Nicht anwendbar'}
+
+ {requirement.applicability_reason &&
{requirement.applicability_reason}
}
+
+ )}
+
+
+ {/* Interpretation & AI Analysis */}
+
+
+
+
+ Interpretation
+
+
+ {aiLoading ? (
+ <> AI analysiert...>
+ ) : (
+ <> AI Analyse>
+ )}
+
+
+
+
{requirement.breakpilot_interpretation || 'Keine Interpretation hinterlegt'}
+
+
+ {/* AI Panel */}
+ {showAiPanel && (
+
+
+
+
+ AI-generierte Analyse
+
+ {aiInterpretation?.confidence_score ?
Konfidenz: {Math.round(aiInterpretation.confidence_score * 100)}% : null}
+
+ {aiLoading &&
Claude analysiert die Anforderung...
}
+ {aiInterpretation?.error &&
{aiInterpretation.error}
}
+ {aiInterpretation && !aiInterpretation.error && !aiLoading && (
+
+ {aiInterpretation.summary &&
Zusammenfassung
{aiInterpretation.summary}
}
+ {aiInterpretation.applicability &&
Anwendbarkeit auf Breakpilot
{aiInterpretation.applicability}
}
+ {aiInterpretation.risk_level && (
+
+ Risiko:
+ {aiInterpretation.risk_level}
+
+ )}
+ {aiInterpretation.technical_measures?.length > 0 &&
Technische Massnahmen
{aiInterpretation.technical_measures.map((m, i) => {m} )} }
+ {aiInterpretation.affected_modules?.length > 0 &&
Betroffene Module
{aiInterpretation.affected_modules.map((m, i) => {m} )}
}
+ {aiInterpretation.implementation_hints?.length > 0 &&
Implementierungshinweise
{aiInterpretation.implementation_hints.map((h, i) => {h} )} }
+
+ )}
+
+ )}
+
+
+ {/* Implementation Details */}
+
+
+
+ Umsetzung (fuer Auditor)
+
+ {editMode ? (
+
+
+ Implementierungsstatus
+ setLocalData({ ...localData, implementation_status: e.target.value })} className="px-3 py-2 border border-slate-300 rounded-lg text-sm">
+ {Object.entries(IMPLEMENTATION_STATUS).map(([key, { label }]) => {label} )}
+
+
+
+ ) : (
+
+
{requirement.implementation_details || 'Noch keine Umsetzungsdetails dokumentiert'}
+
+ )}
+
+
+ {/* Code References */}
+
+
+ {/* Evidence */}
+
+
+
+ Nachweis / Evidence
+
+ {editMode ? (
+
+
+ {/* Auditor Section */}
+
+
+
+ Auditor-Bereich
+
+ {editMode ? (
+
+
+ Audit-Status
+ setLocalData({ ...localData, audit_status: e.target.value })} className="px-3 py-2 border border-slate-300 rounded-lg text-sm">
+ {Object.entries(AUDIT_STATUS).map(([key, { label }]) => {label} )}
+
+
+
+ ) : (
+
+
{requirement.auditor_notes || 'Keine Auditor-Notizen'}
+
+ )}
+
+
+
+ )
+}
diff --git a/website/app/admin/compliance/audit-workspace/_components/RequirementList.tsx b/website/app/admin/compliance/audit-workspace/_components/RequirementList.tsx
new file mode 100644
index 0000000..bd6db4c
--- /dev/null
+++ b/website/app/admin/compliance/audit-workspace/_components/RequirementList.tsx
@@ -0,0 +1,163 @@
+'use client'
+
+import type { Requirement, Regulation, Category } from '../types'
+import { AUDIT_STATUS, IMPLEMENTATION_STATUS, PRIORITY_LABELS } from '../types'
+
+export default function RequirementList({
+ regulations,
+ selectedRegulation,
+ setSelectedRegulation,
+ filteredRequirements,
+ selectedRequirement,
+ setSelectedRequirement,
+ searchQuery,
+ setSearchQuery,
+ filterAuditStatus,
+ setFilterAuditStatus,
+ filterImplStatus,
+ setFilterImplStatus,
+}: {
+ regulations: Regulation[]
+ selectedRegulation: string | null
+ setSelectedRegulation: (code: string) => void
+ filteredRequirements: Requirement[]
+ selectedRequirement: Requirement | null
+ setSelectedRequirement: (req: Requirement) => void
+ searchQuery: string
+ setSearchQuery: (q: string) => void
+ filterAuditStatus: string
+ setFilterAuditStatus: (s: string) => void
+ filterImplStatus: string
+ setFilterImplStatus: (s: string) => void
+}) {
+ const currentRegulation = regulations.find(r => r.code === selectedRegulation)
+
+ return (
+
+ {/* Regulation Selector */}
+
+
+ Verordnung / Standard
+
+
setSelectedRegulation(e.target.value)}
+ className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500 focus:border-primary-500"
+ >
+ {regulations.map(reg => (
+
+ {reg.code} - {reg.name} ({reg.requirement_count})
+
+ ))}
+
+
+ {currentRegulation?.source_url && (
+
+
+
+
+ Originaldokument oeffnen
+
+ )}
+
+ {currentRegulation?.local_pdf_path && (
+
+
+
+
+ Lokale PDF
+
+ )}
+
+
+ {/* Filters */}
+
+
+ Suche
+ setSearchQuery(e.target.value)}
+ placeholder="Artikel, Titel..."
+ className="w-full px-3 py-1.5 text-sm border border-slate-300 rounded-lg"
+ />
+
+
+
+ Audit-Status
+ setFilterAuditStatus(e.target.value)}
+ className="w-full px-2 py-1.5 text-sm border border-slate-300 rounded-lg"
+ >
+ Alle
+ {Object.entries(AUDIT_STATUS).map(([key, { label }]) => (
+ {label}
+ ))}
+
+
+
+ Impl.-Status
+ setFilterImplStatus(e.target.value)}
+ className="w-full px-2 py-1.5 text-sm border border-slate-300 rounded-lg"
+ >
+ Alle
+ {Object.entries(IMPLEMENTATION_STATUS).map(([key, { label }]) => (
+ {label}
+ ))}
+
+
+
+
+
+ {/* Requirements List */}
+
+
+
+ Anforderungen ({filteredRequirements.length})
+
+
+
+ {filteredRequirements.map(req => (
+
setSelectedRequirement(req)}
+ className={`w-full text-left p-3 border-b border-slate-100 hover:bg-slate-50 transition-colors ${
+ selectedRequirement?.id === req.id ? 'bg-primary-50 border-l-4 border-l-primary-500' : ''
+ }`}
+ >
+
+
+
+
+ {req.article}{req.paragraph ? ` ${req.paragraph}` : ''}
+
+
+ {PRIORITY_LABELS[req.priority]?.label || ''}
+
+
+
{req.title}
+
+
+
+
+
+
+
+ ))}
+
+
+
+ )
+}
diff --git a/website/app/admin/compliance/audit-workspace/page.tsx b/website/app/admin/compliance/audit-workspace/page.tsx
index f412fcc..a77c936 100644
--- a/website/app/admin/compliance/audit-workspace/page.tsx
+++ b/website/app/admin/compliance/audit-workspace/page.tsx
@@ -15,72 +15,9 @@
import { useState, useEffect } from 'react'
import Link from 'next/link'
import AdminLayout from '@/components/admin/AdminLayout'
-
-// Types
-interface Regulation {
- id: string
- code: string
- name: string
- full_name: string
- regulation_type: string
- source_url: string | null
- local_pdf_path: string | null
- requirement_count: number
-}
-
-interface Requirement {
- id: string
- regulation_id: string
- regulation_code?: string
- article: string
- paragraph: string | null
- title: string
- description: string | null
- requirement_text: string | null
- breakpilot_interpretation: string | null
- implementation_status: string
- implementation_details: string | null
- code_references: Array<{ file: string; line?: number; description: string }> | null
- evidence_description: string | null
- audit_status: string
- auditor_notes: string | null
- is_applicable: boolean
- applicability_reason: string | null
- priority: number
- source_page: number | null
- source_section: string | null
-}
-
-interface RequirementUpdate {
- implementation_status?: string
- implementation_details?: string
- code_references?: Array<{ file: string; line?: number; description: string }>
- evidence_description?: string
- audit_status?: string
- auditor_notes?: string
- is_applicable?: boolean
- applicability_reason?: string
-}
-
-const IMPLEMENTATION_STATUS = {
- not_started: { label: 'Nicht gestartet', color: 'bg-slate-400' },
- in_progress: { label: 'In Arbeit', color: 'bg-yellow-500' },
- implemented: { label: 'Implementiert', color: 'bg-blue-500' },
- verified: { label: 'Verifiziert', color: 'bg-green-500' },
-}
-
-const AUDIT_STATUS = {
- pending: { label: 'Ausstehend', color: 'bg-slate-400' },
- in_review: { label: 'In Pruefung', color: 'bg-yellow-500' },
- approved: { label: 'Genehmigt', color: 'bg-green-500' },
- rejected: { label: 'Abgelehnt', color: 'bg-red-500' },
-}
-
-const PRIORITY_LABELS: Record = {
- 1: { label: 'Kritisch', color: 'text-red-600' },
- 2: { label: 'Hoch', color: 'text-orange-600' },
- 3: { label: 'Mittel', color: 'text-yellow-600' },
-}
+import type { Regulation, Requirement, RequirementUpdate } from './types'
+import RequirementList from './_components/RequirementList'
+import RequirementDetailPanel from './_components/RequirementDetailPanel'
export default function AuditWorkspacePage() {
const [regulations, setRegulations] = useState([])
@@ -95,14 +32,10 @@ export default function AuditWorkspacePage() {
const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'
- useEffect(() => {
- loadRegulations()
- }, [])
+ useEffect(() => { loadRegulations() }, [])
useEffect(() => {
- if (selectedRegulation) {
- loadRequirements(selectedRegulation)
- }
+ if (selectedRegulation) loadRequirements(selectedRegulation)
}, [selectedRegulation])
const loadRegulations = async () => {
@@ -111,10 +44,7 @@ export default function AuditWorkspacePage() {
if (res.ok) {
const data = await res.json()
setRegulations(data.regulations || [])
- // Select first regulation by default
- if (data.regulations?.length > 0) {
- setSelectedRegulation(data.regulations[0].code)
- }
+ if (data.regulations?.length > 0) setSelectedRegulation(data.regulations[0].code)
}
} catch (err) {
console.error('Failed to load regulations:', err)
@@ -143,9 +73,7 @@ export default function AuditWorkspacePage() {
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(updates),
})
-
if (res.ok) {
- const updated = await res.json()
setRequirements(prev => prev.map(r => r.id === reqId ? { ...r, ...updates } : r))
if (selectedRequirement?.id === reqId) {
setSelectedRequirement({ ...selectedRequirement, ...updates })
@@ -174,7 +102,6 @@ export default function AuditWorkspacePage() {
const currentRegulation = regulations.find(r => r.code === selectedRegulation)
- // Statistics
const stats = {
total: requirements.length,
verified: requirements.filter(r => r.implementation_status === 'verified').length,
@@ -204,135 +131,22 @@ export default function AuditWorkspacePage() {
- {/* Left Sidebar - Regulation & Requirement List */}
-
- {/* Regulation Selector */}
-
-
- Verordnung / Standard
-
-
setSelectedRegulation(e.target.value)}
- className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500 focus:border-primary-500"
- >
- {regulations.map(reg => (
-
- {reg.code} - {reg.name} ({reg.requirement_count})
-
- ))}
-
+
- {currentRegulation?.source_url && (
-
-
-
-
- Originaldokument oeffnen
-
- )}
-
- {currentRegulation?.local_pdf_path && (
-
-
-
-
- Lokale PDF
-
- )}
-
-
- {/* Filters */}
-
-
- Suche
- setSearchQuery(e.target.value)}
- placeholder="Artikel, Titel..."
- className="w-full px-3 py-1.5 text-sm border border-slate-300 rounded-lg"
- />
-
-
-
- Audit-Status
- setFilterAuditStatus(e.target.value)}
- className="w-full px-2 py-1.5 text-sm border border-slate-300 rounded-lg"
- >
- Alle
- {Object.entries(AUDIT_STATUS).map(([key, { label }]) => (
- {label}
- ))}
-
-
-
- Impl.-Status
- setFilterImplStatus(e.target.value)}
- className="w-full px-2 py-1.5 text-sm border border-slate-300 rounded-lg"
- >
- Alle
- {Object.entries(IMPLEMENTATION_STATUS).map(([key, { label }]) => (
- {label}
- ))}
-
-
-
-
-
- {/* Requirements List */}
-
-
-
- Anforderungen ({filteredRequirements.length})
-
-
-
- {filteredRequirements.map(req => (
-
setSelectedRequirement(req)}
- className={`w-full text-left p-3 border-b border-slate-100 hover:bg-slate-50 transition-colors ${
- selectedRequirement?.id === req.id ? 'bg-primary-50 border-l-4 border-l-primary-500' : ''
- }`}
- >
-
-
-
-
- {req.article}{req.paragraph ? ` ${req.paragraph}` : ''}
-
-
- {PRIORITY_LABELS[req.priority]?.label || ''}
-
-
-
{req.title}
-
-
-
-
-
-
-
- ))}
-
-
-
-
- {/* Right Panel - Requirement Detail */}
+ {/* Right Panel */}
{selectedRequirement ? (
)
}
-
-// AI Interpretation Types
-interface AIInterpretation {
- summary: string
- applicability: string
- technical_measures: string[]
- affected_modules: string[]
- risk_level: string
- implementation_hints: string[]
- confidence_score: number
- error?: string
-}
-
-// Requirement Detail Panel Component
-function RequirementDetailPanel({
- requirement,
- regulation,
- onUpdate,
- saving,
-}: {
- requirement: Requirement
- regulation: Regulation | undefined
- onUpdate: (updates: RequirementUpdate) => void
- saving: boolean
-}) {
- const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'
- const [editMode, setEditMode] = useState(false)
- const [aiLoading, setAiLoading] = useState(false)
- const [aiInterpretation, setAiInterpretation] = useState(null)
- const [showAiPanel, setShowAiPanel] = useState(false)
- const [localData, setLocalData] = useState({
- implementation_status: requirement.implementation_status,
- implementation_details: requirement.implementation_details || '',
- evidence_description: requirement.evidence_description || '',
- audit_status: requirement.audit_status,
- auditor_notes: requirement.auditor_notes || '',
- is_applicable: requirement.is_applicable,
- applicability_reason: requirement.applicability_reason || '',
- })
- const [newCodeRef, setNewCodeRef] = useState({ file: '', line: '', description: '' })
-
- useEffect(() => {
- setLocalData({
- implementation_status: requirement.implementation_status,
- implementation_details: requirement.implementation_details || '',
- evidence_description: requirement.evidence_description || '',
- audit_status: requirement.audit_status,
- auditor_notes: requirement.auditor_notes || '',
- is_applicable: requirement.is_applicable,
- applicability_reason: requirement.applicability_reason || '',
- })
- setEditMode(false)
- setAiInterpretation(null)
- setShowAiPanel(false)
- }, [requirement.id])
-
- const generateAiInterpretation = async () => {
- setAiLoading(true)
- setShowAiPanel(true)
- try {
- const res = await fetch(`${BACKEND_URL}/api/v1/compliance/ai/interpret`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ requirement_id: requirement.id }),
- })
- if (res.ok) {
- const data = await res.json()
- setAiInterpretation(data)
- } else {
- const err = await res.json()
- setAiInterpretation({
- summary: '', applicability: '', technical_measures: [],
- affected_modules: [], risk_level: 'unknown', implementation_hints: [],
- confidence_score: 0, error: err.detail || 'Fehler bei AI-Analyse'
- })
- }
- } catch (err) {
- setAiInterpretation({
- summary: '', applicability: '', technical_measures: [],
- affected_modules: [], risk_level: 'unknown', implementation_hints: [],
- confidence_score: 0, error: 'Netzwerkfehler bei AI-Analyse'
- })
- } finally {
- setAiLoading(false)
- }
- }
-
- const handleSave = () => {
- onUpdate(localData)
- setEditMode(false)
- }
-
- const addCodeReference = () => {
- if (!newCodeRef.file) return
- const refs = requirement.code_references || []
- onUpdate({
- code_references: [...refs, {
- file: newCodeRef.file,
- line: newCodeRef.line ? parseInt(newCodeRef.line) : undefined,
- description: newCodeRef.description,
- }],
- })
- setNewCodeRef({ file: '', line: '', description: '' })
- }
-
- return (
-
- {/* Header */}
-
-
-
-
-
- {requirement.article}{requirement.paragraph ? ` ${requirement.paragraph}` : ''}
-
-
- {AUDIT_STATUS[requirement.audit_status as keyof typeof AUDIT_STATUS]?.label || requirement.audit_status}
-
-
- {IMPLEMENTATION_STATUS[requirement.implementation_status as keyof typeof IMPLEMENTATION_STATUS]?.label || requirement.implementation_status}
-
-
-
{requirement.title}
-
-
- {editMode ? (
- <>
- setEditMode(false)}
- className="px-3 py-1.5 text-sm text-slate-600 hover:text-slate-800"
- >
- Abbrechen
-
-
- {saving ? 'Speichern...' : 'Speichern'}
-
- >
- ) : (
- setEditMode(true)}
- className="px-3 py-1.5 text-sm bg-slate-100 text-slate-700 rounded-lg hover:bg-slate-200"
- >
- Bearbeiten
-
- )}
-
-
-
-
-
- {/* Original Requirement Text */}
-
-
-
-
-
- Originaler Anforderungstext
-
-
-
- {requirement.requirement_text || 'Kein Originaltext hinterlegt'}
-
- {requirement.source_page && (
-
- Quelle: {regulation?.code} Seite {requirement.source_page}
- {requirement.source_section ? `, ${requirement.source_section}` : ''}
-
- )}
-
-
-
- {/* Applicability */}
-
-
- Anwendbarkeit auf Breakpilot
-
- {editMode ? (
-
-
- setLocalData({ ...localData, is_applicable: e.target.checked })}
- className="rounded"
- />
- Anwendbar
-
-
- ) : (
-
-
- {requirement.is_applicable ? 'Anwendbar' : 'Nicht anwendbar'}
-
- {requirement.applicability_reason && (
-
{requirement.applicability_reason}
- )}
-
- )}
-
-
- {/* Breakpilot Interpretation & AI Analysis */}
-
-
-
-
-
-
- Interpretation
-
-
- {aiLoading ? (
- <>
-
-
-
-
- AI analysiert...
- >
- ) : (
- <>
-
-
-
- AI Analyse
- >
- )}
-
-
-
- {/* Existing interpretation */}
-
-
- {requirement.breakpilot_interpretation || 'Keine Interpretation hinterlegt'}
-
-
-
- {/* AI Interpretation Panel */}
- {showAiPanel && (
-
-
-
-
-
-
- AI-generierte Analyse
-
- {aiInterpretation?.confidence_score && (
-
- Konfidenz: {Math.round(aiInterpretation.confidence_score * 100)}%
-
- )}
-
-
- {aiLoading && (
-
-
Claude analysiert die Anforderung...
-
- )}
-
- {aiInterpretation?.error && (
-
- {aiInterpretation.error}
-
- )}
-
- {aiInterpretation && !aiInterpretation.error && !aiLoading && (
-
- {/* Summary */}
- {aiInterpretation.summary && (
-
-
Zusammenfassung
-
{aiInterpretation.summary}
-
- )}
-
- {/* Applicability */}
- {aiInterpretation.applicability && (
-
-
Anwendbarkeit auf Breakpilot
-
{aiInterpretation.applicability}
-
- )}
-
- {/* Risk Level */}
- {aiInterpretation.risk_level && (
-
- Risiko:
-
- {aiInterpretation.risk_level}
-
-
- )}
-
- {/* Technical Measures */}
- {aiInterpretation.technical_measures?.length > 0 && (
-
-
Technische Massnahmen
-
- {aiInterpretation.technical_measures.map((m, i) => (
- {m}
- ))}
-
-
- )}
-
- {/* Affected Modules */}
- {aiInterpretation.affected_modules?.length > 0 && (
-
-
Betroffene Module
-
- {aiInterpretation.affected_modules.map((m, i) => (
-
- {m}
-
- ))}
-
-
- )}
-
- {/* Implementation Hints */}
- {aiInterpretation.implementation_hints?.length > 0 && (
-
-
Implementierungshinweise
-
- {aiInterpretation.implementation_hints.map((h, i) => (
- {h}
- ))}
-
-
- )}
-
- )}
-
- )}
-
-
- {/* Implementation Details */}
-
-
-
-
-
- Umsetzung (fuer Auditor)
-
- {editMode ? (
-
-
- Implementierungsstatus
- setLocalData({ ...localData, implementation_status: e.target.value })}
- className="px-3 py-2 border border-slate-300 rounded-lg text-sm"
- >
- {Object.entries(IMPLEMENTATION_STATUS).map(([key, { label }]) => (
- {label}
- ))}
-
-
-
- ) : (
-
-
- {requirement.implementation_details || 'Noch keine Umsetzungsdetails dokumentiert'}
-
-
- )}
-
-
- {/* Code References */}
-
-
- Code-Referenzen
-
-
-
-
- {/* Evidence */}
-
-
-
-
-
- Nachweis / Evidence
-
- {editMode ? (
-
-
- {/* Auditor Section */}
-
-
-
-
-
- Auditor-Bereich
-
- {editMode ? (
-
-
- Audit-Status
- setLocalData({ ...localData, audit_status: e.target.value })}
- className="px-3 py-2 border border-slate-300 rounded-lg text-sm"
- >
- {Object.entries(AUDIT_STATUS).map(([key, { label }]) => (
- {label}
- ))}
-
-
-
- ) : (
-
-
- {requirement.auditor_notes || 'Keine Auditor-Notizen'}
-
-
- )}
-
-
-
- )
-}
diff --git a/website/app/admin/compliance/audit-workspace/types.ts b/website/app/admin/compliance/audit-workspace/types.ts
new file mode 100644
index 0000000..195afe7
--- /dev/null
+++ b/website/app/admin/compliance/audit-workspace/types.ts
@@ -0,0 +1,75 @@
+export interface Regulation {
+ id: string
+ code: string
+ name: string
+ full_name: string
+ regulation_type: string
+ source_url: string | null
+ local_pdf_path: string | null
+ requirement_count: number
+}
+
+export interface Requirement {
+ id: string
+ regulation_id: string
+ regulation_code?: string
+ article: string
+ paragraph: string | null
+ title: string
+ description: string | null
+ requirement_text: string | null
+ breakpilot_interpretation: string | null
+ implementation_status: string
+ implementation_details: string | null
+ code_references: Array<{ file: string; line?: number; description: string }> | null
+ evidence_description: string | null
+ audit_status: string
+ auditor_notes: string | null
+ is_applicable: boolean
+ applicability_reason: string | null
+ priority: number
+ source_page: number | null
+ source_section: string | null
+}
+
+export interface RequirementUpdate {
+ implementation_status?: string
+ implementation_details?: string
+ code_references?: Array<{ file: string; line?: number; description: string }>
+ evidence_description?: string
+ audit_status?: string
+ auditor_notes?: string
+ is_applicable?: boolean
+ applicability_reason?: string
+}
+
+export interface AIInterpretation {
+ summary: string
+ applicability: string
+ technical_measures: string[]
+ affected_modules: string[]
+ risk_level: string
+ implementation_hints: string[]
+ confidence_score: number
+ error?: string
+}
+
+export const IMPLEMENTATION_STATUS = {
+ not_started: { label: 'Nicht gestartet', color: 'bg-slate-400' },
+ in_progress: { label: 'In Arbeit', color: 'bg-yellow-500' },
+ implemented: { label: 'Implementiert', color: 'bg-blue-500' },
+ verified: { label: 'Verifiziert', color: 'bg-green-500' },
+}
+
+export const AUDIT_STATUS = {
+ pending: { label: 'Ausstehend', color: 'bg-slate-400' },
+ in_review: { label: 'In Pruefung', color: 'bg-yellow-500' },
+ approved: { label: 'Genehmigt', color: 'bg-green-500' },
+ rejected: { label: 'Abgelehnt', color: 'bg-red-500' },
+}
+
+export const PRIORITY_LABELS: Record = {
+ 1: { label: 'Kritisch', color: 'text-red-600' },
+ 2: { label: 'Hoch', color: 'text-orange-600' },
+ 3: { label: 'Mittel', color: 'text-yellow-600' },
+}
diff --git a/website/app/admin/edu-search/_components/CrawlTab.tsx b/website/app/admin/edu-search/_components/CrawlTab.tsx
new file mode 100644
index 0000000..0f73448
--- /dev/null
+++ b/website/app/admin/edu-search/_components/CrawlTab.tsx
@@ -0,0 +1,99 @@
+'use client'
+
+import type { CrawlStats } from '../types'
+
+export default function CrawlTab({
+ stats,
+ loading,
+ onStartCrawl,
+}: {
+ stats: CrawlStats
+ loading: boolean
+ onStartCrawl: () => void
+}) {
+ return (
+
+ {/* Crawl Status */}
+
+
+
+
Crawl-Status
+
+ Letzter Crawl: {stats.lastCrawlTime ? new Date(stats.lastCrawlTime).toLocaleString('de-DE') : 'Noch nie'}
+
+
+
+ {stats.crawlStatus === 'running' ? '🔄 Läuft...' :
+ stats.crawlStatus === 'error' ? '❌ Fehler' :
+ '✅ Bereit'}
+
+
+
+ {loading ? (
+ <>
+
+
+
+
+ Crawl läuft...
+ >
+ ) : (
+ <>
+
+
+
+
+ Crawl starten
+ >
+ )}
+
+
+
+ {/* Crawl Settings */}
+
+
+
Crawl-Einstellungen
+
+
+
+
Scheduler
+
+
+
+ Automatischer Crawl aktiviert
+
+
+ Intervall
+
+ Täglich
+ Wöchentlich
+ Monatlich
+
+
+
+
+
+
+ )
+}
diff --git a/website/app/admin/edu-search/_components/RulesTab.tsx b/website/app/admin/edu-search/_components/RulesTab.tsx
new file mode 100644
index 0000000..8183e04
--- /dev/null
+++ b/website/app/admin/edu-search/_components/RulesTab.tsx
@@ -0,0 +1,37 @@
+'use client'
+
+export default function RulesTab() {
+ return (
+
+
+
+
⚠️
+
+
Tagging-Regeln Editor
+
+ Die Tagging-Regeln werden aktuell über YAML-Dateien verwaltet.
+ Ein visueller Editor ist in Entwicklung.
+
+
+
+
+
+
+ {[
+ { name: 'Doc-Type Regeln', file: 'doc_type_rules.yaml', desc: 'Klassifiziert Dokumente (Lehrplan, Arbeitsblatt, etc.)' },
+ { name: 'Fach-Regeln', file: 'subject_rules.yaml', desc: 'Erkennt Unterrichtsfächer' },
+ { name: 'Schulstufen-Regeln', file: 'level_rules.yaml', desc: 'Erkennt Primar, SekI, SekII, etc.' },
+ { name: 'Trust-Score Regeln', file: 'trust_rules.yaml', desc: 'Domain-basierte Vertrauensbewertung' },
+ ].map(rule => (
+
+
{rule.name}
+
{rule.desc}
+
+ /rules/{rule.file}
+
+
+ ))}
+
+
+ )
+}
diff --git a/website/app/admin/edu-search/_components/SeedModal.tsx b/website/app/admin/edu-search/_components/SeedModal.tsx
new file mode 100644
index 0000000..d847a7b
--- /dev/null
+++ b/website/app/admin/edu-search/_components/SeedModal.tsx
@@ -0,0 +1,190 @@
+'use client'
+
+import { useState } from 'react'
+import type { SeedURL, Category } from '../types'
+
+export default function SeedModal({
+ seed,
+ categories,
+ onClose,
+ onSaved,
+}: {
+ seed?: SeedURL | null
+ categories: Category[]
+ onClose: () => void
+ onSaved: () => void
+}) {
+ const [formData, setFormData] = useState>(seed || {
+ url: '',
+ category: 'federal',
+ name: '',
+ description: '',
+ trustBoost: 0.5,
+ enabled: true,
+ })
+ const [saving, setSaving] = useState(false)
+ const [saveError, setSaveError] = useState(null)
+
+ const handleSubmit = async (e: React.FormEvent) => {
+ e.preventDefault()
+ setSaving(true)
+ setSaveError(null)
+
+ try {
+ const category = categories.find(c => c.name === formData.category || c.id === formData.category)
+
+ const payload = {
+ url: formData.url,
+ name: formData.name,
+ description: formData.description || '',
+ category_id: category?.id || null,
+ trust_boost: formData.trustBoost,
+ enabled: formData.enabled,
+ source_type: 'GOV',
+ scope: 'FEDERAL',
+ }
+
+ if (seed) {
+ const res = await fetch(`/api/admin/edu-search?id=${seed.id}`, {
+ method: 'PUT',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify(payload),
+ })
+ if (!res.ok) {
+ const errData = await res.json()
+ throw new Error(errData.detail || errData.error || `HTTP ${res.status}`)
+ }
+ } else {
+ const res = await fetch(`/api/admin/edu-search?action=seed`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify(payload),
+ })
+ if (!res.ok) {
+ const errData = await res.json()
+ throw new Error(errData.detail || errData.error || `HTTP ${res.status}`)
+ }
+ }
+
+ onSaved()
+ onClose()
+ } catch (err) {
+ console.error('Failed to save seed:', err)
+ setSaveError(err instanceof Error ? err.message : 'Fehler beim Speichern')
+ } finally {
+ setSaving(false)
+ }
+ }
+
+ return (
+
+
+
+
{seed ? 'Seed bearbeiten' : 'Neue Seed-URL hinzufügen'}
+
+
+
+
+ )
+}
diff --git a/website/app/admin/edu-search/_components/SeedsTab.tsx b/website/app/admin/edu-search/_components/SeedsTab.tsx
new file mode 100644
index 0000000..4f0762b
--- /dev/null
+++ b/website/app/admin/edu-search/_components/SeedsTab.tsx
@@ -0,0 +1,200 @@
+'use client'
+
+import { useState } from 'react'
+import type { SeedURL, Category } from '../types'
+import SeedModal from './SeedModal'
+
+export default function SeedsTab({
+ seeds,
+ allSeeds,
+ categories,
+ searchQuery,
+ setSearchQuery,
+ selectedCategory,
+ setSelectedCategory,
+ onToggleEnabled,
+ onDelete,
+ onSaved,
+}: {
+ seeds: SeedURL[]
+ allSeeds: SeedURL[]
+ categories: Category[]
+ searchQuery: string
+ setSearchQuery: (q: string) => void
+ selectedCategory: string
+ setSelectedCategory: (cat: string) => void
+ onToggleEnabled: (id: string) => void
+ onDelete: (id: string) => void
+ onSaved: () => void
+}) {
+ const [showAddModal, setShowAddModal] = useState(false)
+ const [editingSeed, setEditingSeed] = useState(null)
+
+ const filteredSeeds = seeds.filter(seed => {
+ const matchesCategory = selectedCategory === 'all' || seed.category === selectedCategory
+ const matchesSearch = seed.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
+ seed.url.toLowerCase().includes(searchQuery.toLowerCase())
+ return matchesCategory && matchesSearch
+ })
+
+ return (
+
+ {/* Header with filters */}
+
+
+ setSearchQuery(e.target.value)}
+ />
+
+
setSelectedCategory(e.target.value)}
+ >
+ Alle Kategorien
+ {categories.map(cat => (
+ {cat.icon} {cat.display_name || cat.name}
+ ))}
+
+
setShowAddModal(true)}
+ className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors flex items-center gap-2"
+ >
+
+
+
+ Neue Seed-URL
+
+
+
+ {/* Category Quick Stats */}
+
+ {categories.map(cat => {
+ const count = allSeeds.filter(s => s.category === cat.name).length
+ return (
+
setSelectedCategory(selectedCategory === cat.name ? 'all' : cat.name)}
+ className={`p-4 rounded-lg border transition-colors text-left ${
+ selectedCategory === cat.name
+ ? 'border-primary-500 bg-primary-50'
+ : 'border-slate-200 hover:border-slate-300'
+ }`}
+ >
+ {cat.icon}
+ {cat.display_name || cat.name}
+ {count} Seeds
+
+ )
+ })}
+
+
+ {/* Seeds Table */}
+
+
+
+
+ Status
+ Name
+ URL
+ Kategorie
+ Trust
+ Dokumente
+ Aktionen
+
+
+
+ {filteredSeeds.map(seed => (
+
+
+ onToggleEnabled(seed.id)}
+ className={`w-10 h-6 rounded-full transition-colors ${
+ seed.enabled ? 'bg-green-500' : 'bg-slate-300'
+ }`}
+ >
+
+
+
+
+ {seed.name}
+ {seed.description}
+
+
+
+ {seed.url.replace(/^https?:\/\/(www\.)?/, '').slice(0, 30)}...
+
+
+
+
+ {categories.find(c => c.name === seed.category)?.icon || '📁'}
+ {categories.find(c => c.name === seed.category)?.display_name || seed.category}
+
+
+
+ = 0.4 ? 'bg-green-100 text-green-700' :
+ seed.trustBoost >= 0.2 ? 'bg-yellow-100 text-yellow-700' :
+ 'bg-slate-100 text-slate-700'
+ }`}>
+ +{seed.trustBoost.toFixed(2)}
+
+
+
+ {seed.documentCount?.toLocaleString() || '-'}
+
+
+
+
setEditingSeed(seed)}
+ className="p-1.5 text-slate-400 hover:text-slate-600 hover:bg-slate-100 rounded"
+ title="Bearbeiten"
+ >
+
+
+
+
+
onDelete(seed.id)}
+ className="p-1.5 text-slate-400 hover:text-red-600 hover:bg-red-50 rounded"
+ title="Löschen"
+ >
+
+
+
+
+
+
+
+ ))}
+
+
+
+
+ {filteredSeeds.length === 0 && (
+
+ Keine Seed-URLs gefunden
+
+ )}
+
+ {/* Modals */}
+ {(showAddModal || editingSeed) && (
+
{
+ setShowAddModal(false)
+ setEditingSeed(null)
+ }}
+ onSaved={onSaved}
+ />
+ )}
+
+ )
+}
diff --git a/website/app/admin/edu-search/_components/StatsTab.tsx b/website/app/admin/edu-search/_components/StatsTab.tsx
new file mode 100644
index 0000000..c358168
--- /dev/null
+++ b/website/app/admin/edu-search/_components/StatsTab.tsx
@@ -0,0 +1,88 @@
+'use client'
+
+import type { CrawlStats, Category } from '../types'
+
+export default function StatsTab({
+ stats,
+ categories,
+}: {
+ stats: CrawlStats
+ categories: Category[]
+}) {
+ return (
+
+ {/* Overview Stats */}
+
+
+
{stats.totalDocuments.toLocaleString()}
+
Dokumente indexiert
+
+
+
{stats.totalSeeds}
+
Seed-URLs aktiv
+
+
+
{(stats.avgTrustScore * 100).toFixed(0)}%
+
Ø Trust-Score
+
+
+
{Object.keys(stats.documentsPerDocType).length}
+
Dokumenttypen
+
+
+
+ {/* Charts */}
+
+
+
Dokumente nach Kategorie
+
+ {Object.entries(stats.documentsPerCategory).map(([cat, count]) => {
+ const category = categories.find(c => c.name === cat)
+ const percentage = stats.totalDocuments > 0 ? (count / stats.totalDocuments) * 100 : 0
+ return (
+
+
+ {category?.icon || '📁'} {category?.display_name || cat}
+ {count.toLocaleString()} ({percentage.toFixed(1)}%)
+
+
+
+ )
+ })}
+
+
+
+
+
Dokumente nach Typ
+
+ {Object.entries(stats.documentsPerDocType)
+ .sort(([,a], [,b]) => b - a)
+ .slice(0, 6)
+ .map(([docType, count]) => {
+ const percentage = (count / stats.totalDocuments) * 100
+ return (
+
+
+ {docType.replace(/_/g, ' ')}
+ {count.toLocaleString()} ({percentage.toFixed(1)}%)
+
+
+
+ )
+ })}
+
+
+
+
+ )
+}
diff --git a/website/app/admin/edu-search/page.tsx b/website/app/admin/edu-search/page.tsx
index 763f47f..6bbb883 100644
--- a/website/app/admin/edu-search/page.tsx
+++ b/website/app/admin/edu-search/page.tsx
@@ -7,531 +7,34 @@
* edu-search-service (Tavily alternative for German education content)
*/
-import { useState, useEffect, useCallback } from 'react'
+import { useState } from 'react'
import AdminLayout from '@/components/admin/AdminLayout'
+import { useEduSearchData } from './useEduSearchData'
+import SeedsTab from './_components/SeedsTab'
+import CrawlTab from './_components/CrawlTab'
+import StatsTab from './_components/StatsTab'
+import RulesTab from './_components/RulesTab'
-// All API calls go through Next.js API proxy at /api/admin/edu-search
-// This avoids CORS issues since browser calls same-origin API routes
-
-// Types
-interface SeedURL {
- id: string
- url: string
- category: string
- category_id?: string
- name: string
- description: string
- trustBoost: number
- enabled: boolean
- lastCrawled?: string
- documentCount?: number
- source_type?: string
- scope?: string
- state?: string
- crawl_depth?: number
- crawl_frequency?: string
-}
-
-interface CrawlStats {
- totalDocuments: number
- totalSeeds: number
- lastCrawlTime?: string
- crawlStatus: 'idle' | 'running' | 'error'
- documentsPerCategory: Record
- documentsPerDocType: Record
- avgTrustScore: number
-}
-
-interface Category {
- id: string
- name: string
- display_name?: string
- description: string
- icon: string
- sort_order?: number
- is_active?: boolean
-}
-
-interface ApiSeed {
- id: string
- url: string
- name: string
- description: string | null
- category: string | null // Backend returns 'category' not 'category_name'
- category_display_name: string | null
- source_type: string
- scope: string
- state: string | null
- trust_boost: number
- enabled: boolean
- crawl_depth: number
- crawl_frequency: string
- last_crawled_at: string | null
- last_crawl_status: string | null
- last_crawl_docs: number
- total_documents: number
- created_at: string
- updated_at: string
-}
-
-// Default categories (fallback if API fails)
-const DEFAULT_CATEGORIES: Category[] = [
- { id: 'federal', name: 'federal', display_name: 'Bundesebene', description: 'KMK, BMBF, Bildungsserver', icon: '🏛️' },
- { id: 'states', name: 'states', display_name: 'Bundesländer', description: 'Ministerien, Landesbildungsserver', icon: '🗺️' },
- { id: 'science', name: 'science', display_name: 'Wissenschaft', description: 'Bertelsmann, PISA, IGLU, TIMSS', icon: '🔬' },
- { id: 'universities', name: 'universities', display_name: 'Hochschulen', description: 'Universitäten, Fachhochschulen, Pädagogische Hochschulen', icon: '🎓' },
- { id: 'legal', name: 'legal', display_name: 'Recht & Schulgesetze', description: 'Schulgesetze, Erlasse, Verordnungen, Datenschutzrecht', icon: '⚖️' },
- { id: 'portals', name: 'portals', display_name: 'Bildungsportale', description: 'Lehrer-Online, 4teachers, ZUM', icon: '📚' },
- { id: 'authorities', name: 'authorities', display_name: 'Schulbehörden', description: 'Regierungspräsidien, Schulämter', icon: '📋' },
+const tabDefs = [
+ { id: 'seeds' as const, name: 'Seed-URLs', icon: '🌱' },
+ { id: 'crawl' as const, name: 'Crawl-Steuerung', icon: '🕷️' },
+ { id: 'stats' as const, name: 'Statistiken', icon: '📊' },
+ { id: 'rules' as const, name: 'Tagging-Regeln', icon: '🏷️' },
]
-// Convert API seed to frontend format
-function apiSeedToFrontend(seed: ApiSeed): SeedURL {
- return {
- id: seed.id,
- url: seed.url,
- name: seed.name,
- description: seed.description || '',
- category: seed.category || 'federal', // Backend uses 'category' field
- category_id: undefined,
- trustBoost: seed.trust_boost,
- enabled: seed.enabled,
- lastCrawled: seed.last_crawled_at || undefined,
- documentCount: seed.total_documents,
- source_type: seed.source_type,
- scope: seed.scope,
- state: seed.state || undefined,
- crawl_depth: seed.crawl_depth,
- crawl_frequency: seed.crawl_frequency,
- }
-}
-
-// Default empty stats (loaded from API)
-const DEFAULT_STATS: CrawlStats = {
- totalDocuments: 0,
- totalSeeds: 0,
- lastCrawlTime: undefined,
- crawlStatus: 'idle',
- documentsPerCategory: {},
- documentsPerDocType: {},
- avgTrustScore: 0,
-}
-
export default function EduSearchAdminPage() {
const [activeTab, setActiveTab] = useState<'seeds' | 'crawl' | 'stats' | 'rules'>('seeds')
- const [seeds, setSeeds] = useState([])
- const [allSeeds, setAllSeeds] = useState([]) // All seeds for category counts
- const [categories, setCategories] = useState(DEFAULT_CATEGORIES)
- const [stats, setStats] = useState(DEFAULT_STATS)
- const [selectedCategory, setSelectedCategory] = useState('all')
const [searchQuery, setSearchQuery] = useState('')
- const [showAddModal, setShowAddModal] = useState(false)
- const [editingSeed, setEditingSeed] = useState(null)
- const [loading, setLoading] = useState(false)
- const [initialLoading, setInitialLoading] = useState(true)
- const [error, setError] = useState(null)
- // Fetch categories from API (via proxy)
- const fetchCategories = useCallback(async () => {
- try {
- const res = await fetch(`/api/admin/edu-search?action=categories`)
- if (res.ok) {
- const data = await res.json()
- if (data.categories && data.categories.length > 0) {
- setCategories(data.categories.map((cat: { id: string; name: string; display_name: string; description: string; icon: string; sort_order: number; is_active: boolean }) => ({
- id: cat.id,
- name: cat.name,
- display_name: cat.display_name,
- description: cat.description || '',
- icon: cat.icon || '📁',
- sort_order: cat.sort_order,
- is_active: cat.is_active,
- })))
- }
- }
- } catch (err) {
- console.error('Failed to fetch categories:', err)
- }
- }, [])
-
- // Fetch all seeds from API (for category counts)
- const fetchAllSeeds = useCallback(async () => {
- try {
- const res = await fetch(`/api/admin/edu-search?action=seeds`)
- if (!res.ok) {
- throw new Error(`HTTP ${res.status}`)
- }
- const data = await res.json()
- setAllSeeds((data.seeds || []).map(apiSeedToFrontend))
- } catch (err) {
- console.error('Failed to fetch all seeds:', err)
- }
- }, [])
-
- // Fetch seeds from API (via proxy) - filtered by category
- const fetchSeeds = useCallback(async () => {
- try {
- const params = new URLSearchParams()
- params.append('action', 'seeds')
- if (selectedCategory !== 'all') {
- params.append('category', selectedCategory)
- }
- const res = await fetch(`/api/admin/edu-search?${params}`)
- if (!res.ok) {
- throw new Error(`HTTP ${res.status}`)
- }
- const data = await res.json()
- const fetchedSeeds = (data.seeds || []).map(apiSeedToFrontend)
- setSeeds(fetchedSeeds)
- // If fetching all, also update allSeeds for counts
- if (selectedCategory === 'all') {
- setAllSeeds(fetchedSeeds)
- }
- setError(null)
- } catch (err) {
- console.error('Failed to fetch seeds:', err)
- setError('Seeds konnten nicht geladen werden. API nicht erreichbar.')
- }
- }, [selectedCategory])
-
- // Fetch stats from API (via proxy)
- const fetchStats = useCallback(async (preserveCrawlStatus = false) => {
- try {
- const res = await fetch(`/api/admin/edu-search?action=stats`)
- if (res.ok) {
- const data = await res.json()
- setStats(prev => ({
- totalDocuments: data.total_documents || 0,
- totalSeeds: data.total_seeds || 0,
- lastCrawlTime: data.last_crawl_time || prev.lastCrawlTime,
- crawlStatus: preserveCrawlStatus ? prev.crawlStatus : (data.crawl_status || 'idle'),
- documentsPerCategory: data.seeds_per_category || {},
- documentsPerDocType: {},
- avgTrustScore: data.avg_trust_boost || 0,
- }))
- }
- } catch (err) {
- console.error('Failed to fetch stats:', err)
- }
- }, [])
-
- // Initial load
- useEffect(() => {
- const loadData = async () => {
- setInitialLoading(true)
- await Promise.all([fetchCategories(), fetchSeeds(), fetchAllSeeds(), fetchStats()])
- setInitialLoading(false)
- }
- loadData()
- }, [fetchCategories, fetchSeeds, fetchAllSeeds, fetchStats])
-
- // Reload seeds when category filter changes
- useEffect(() => {
- if (!initialLoading) {
- fetchSeeds()
- }
- }, [selectedCategory, initialLoading, fetchSeeds])
-
- // Filter seeds
- const filteredSeeds = seeds.filter(seed => {
- const matchesCategory = selectedCategory === 'all' || seed.category === selectedCategory
- const matchesSearch = seed.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
- seed.url.toLowerCase().includes(searchQuery.toLowerCase())
- return matchesCategory && matchesSearch
- })
-
- // Add/Edit Seed Modal
- const SeedModal = ({ seed, onClose }: { seed?: SeedURL | null, onClose: () => void }) => {
- const [formData, setFormData] = useState>(seed || {
- url: '',
- category: 'federal',
- name: '',
- description: '',
- trustBoost: 0.5,
- enabled: true,
- })
- const [saving, setSaving] = useState(false)
- const [saveError, setSaveError] = useState(null)
-
- const handleSubmit = async (e: React.FormEvent) => {
- e.preventDefault()
- setSaving(true)
- setSaveError(null)
-
- try {
- // Find category ID by name
- const category = categories.find(c => c.name === formData.category || c.id === formData.category)
-
- const payload = {
- url: formData.url,
- name: formData.name,
- description: formData.description || '',
- category_id: category?.id || null,
- trust_boost: formData.trustBoost,
- enabled: formData.enabled,
- source_type: 'GOV',
- scope: 'FEDERAL',
- }
-
- if (seed) {
- // Update existing seed (via proxy)
- const res = await fetch(`/api/admin/edu-search?id=${seed.id}`, {
- method: 'PUT',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify(payload),
- })
- if (!res.ok) {
- const errData = await res.json()
- throw new Error(errData.detail || errData.error || `HTTP ${res.status}`)
- }
- } else {
- // Create new seed (via proxy)
- const res = await fetch(`/api/admin/edu-search?action=seed`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify(payload),
- })
- if (!res.ok) {
- const errData = await res.json()
- throw new Error(errData.detail || errData.error || `HTTP ${res.status}`)
- }
- }
-
- // Reload seeds and close modal
- await fetchSeeds()
- await fetchStats()
- onClose()
- } catch (err) {
- console.error('Failed to save seed:', err)
- setSaveError(err instanceof Error ? err.message : 'Fehler beim Speichern')
- } finally {
- setSaving(false)
- }
- }
-
- return (
-
-
-
-
{seed ? 'Seed bearbeiten' : 'Neue Seed-URL hinzufügen'}
-
-
- {saveError && (
-
- {saveError}
-
- )}
-
- URL *
- setFormData({ ...formData, url: e.target.value })}
- />
-
-
- Name *
- setFormData({ ...formData, name: e.target.value })}
- />
-
-
- Kategorie *
- setFormData({ ...formData, category: e.target.value })}
- >
- {categories.map(cat => (
- {cat.icon} {cat.display_name || cat.name}
- ))}
-
-
-
- Beschreibung
- setFormData({ ...formData, description: e.target.value })}
- />
-
-
-
- Trust-Boost: {formData.trustBoost?.toFixed(2)}
-
-
setFormData({ ...formData, trustBoost: parseFloat(e.target.value) })}
- />
-
- Höhere Werte für vertrauenswürdigere Quellen (1.0 = max für offizielle Regierungsquellen)
-
-
-
- setFormData({ ...formData, enabled: e.target.checked })}
- />
- Aktiv (wird beim nächsten Crawl berücksichtigt)
-
-
-
- Abbrechen
-
-
- {saving && (
-
-
-
-
- )}
- {seed ? 'Speichern' : 'Hinzufügen'}
-
-
-
-
-
- )
- }
-
- const handleDelete = async (id: string) => {
- if (!confirm('Seed-URL wirklich löschen?')) return
-
- try {
- const res = await fetch(`/api/admin/edu-search?id=${id}`, {
- method: 'DELETE',
- })
- if (!res.ok) {
- throw new Error(`HTTP ${res.status}`)
- }
- await fetchSeeds()
- await fetchStats()
- } catch (err) {
- console.error('Failed to delete seed:', err)
- alert('Fehler beim Löschen')
- }
- }
-
- const handleToggleEnabled = async (id: string) => {
- const seed = seeds.find(s => s.id === id)
- if (!seed) return
-
- try {
- const res = await fetch(`/api/admin/edu-search?id=${id}`, {
- method: 'PUT',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ enabled: !seed.enabled }),
- })
- if (!res.ok) {
- throw new Error(`HTTP ${res.status}`)
- }
- // Optimistic update
- setSeeds(seeds.map(s => s.id === id ? { ...s, enabled: !s.enabled } : s))
- } catch (err) {
- console.error('Failed to toggle seed:', err)
- // Reload on error
- await fetchSeeds()
- }
- }
-
- // Poll for crawl status from backend
- const pollCrawlStatus = useCallback(async () => {
- try {
- const res = await fetch('/api/admin/edu-search?action=legal-crawler-status', {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({}),
- })
- if (res.ok) {
- const data = await res.json()
- return data.status // 'running', 'idle', 'completed', 'error'
- }
- } catch {
- // Ignore errors
- }
- return 'idle'
- }, [])
-
- const handleStartCrawl = async () => {
- setLoading(true)
- setError(null)
- setStats(prev => ({ ...prev, crawlStatus: 'running', lastCrawlTime: new Date().toISOString() }))
-
- try {
- const response = await fetch('/api/admin/edu-search?action=crawl', {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({}),
- })
-
- const data = await response.json()
-
- if (response.ok) {
- // Crawl gestartet - kontinuierlich Status prüfen
- const checkStatus = async () => {
- const status = await pollCrawlStatus()
-
- if (status === 'running') {
- // Noch am Laufen - weiter pollen
- setStats(prev => ({ ...prev, crawlStatus: 'running' }))
- setTimeout(checkStatus, 3000)
- } else if (status === 'completed' || status === 'idle') {
- // Fertig
- setStats(prev => ({ ...prev, crawlStatus: 'idle' }))
- setLoading(false)
- await fetchStats(false) // Refresh stats
- } else {
- // Fehler oder unbekannter Status
- setStats(prev => ({ ...prev, crawlStatus: 'error' }))
- setLoading(false)
- }
- }
-
- // Start polling nach kurzer Verzögerung
- setTimeout(checkStatus, 2000)
- } else {
- setError(data.error || 'Fehler beim Starten des Crawls')
- setLoading(false)
- setStats(prev => ({ ...prev, crawlStatus: 'idle' }))
- }
- } catch (err) {
- setError('Netzwerkfehler beim Starten des Crawls')
- setLoading(false)
- setStats(prev => ({ ...prev, crawlStatus: 'idle' }))
- }
- }
+ const {
+ seeds, allSeeds, categories, stats, selectedCategory, setSelectedCategory,
+ loading, initialLoading, error,
+ handleStartCrawl, handleDelete, handleToggleEnabled, handleSaved,
+ fetchSeeds, fetchStats,
+ } = useEduSearchData()
return (
- {/* Loading State */}
{initialLoading && (
@@ -542,7 +45,6 @@ export default function EduSearchAdminPage() {
)}
- {/* Error State */}
{error && !initialLoading && (
@@ -563,395 +65,45 @@ export default function EduSearchAdminPage() {
)}
- {/* Tabs */}
{!initialLoading && (
-
-
-
- {[
- { id: 'seeds', name: 'Seed-URLs', icon: '🌱' },
- { id: 'crawl', name: 'Crawl-Steuerung', icon: '🕷️' },
- { id: 'stats', name: 'Statistiken', icon: '📊' },
- { id: 'rules', name: 'Tagging-Regeln', icon: '🏷️' },
- ].map(tab => (
- setActiveTab(tab.id as typeof activeTab)}
- className={`px-6 py-4 text-sm font-medium border-b-2 transition-colors ${
- activeTab === tab.id
- ? 'border-primary-600 text-primary-600'
- : 'border-transparent text-slate-500 hover:text-slate-700 hover:border-slate-300'
- }`}
- >
- {tab.icon}
- {tab.name}
-
- ))}
-
-
-
-
- {/* Seeds Tab */}
- {activeTab === 'seeds' && (
-
- {/* Header with filters */}
-
-
- setSearchQuery(e.target.value)}
- />
-
-
setSelectedCategory(e.target.value)}
- >
- Alle Kategorien
- {categories.map(cat => (
- {cat.icon} {cat.display_name || cat.name}
- ))}
-
+
+
+
+ {tabDefs.map(tab => (
setShowAddModal(true)}
- className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors flex items-center gap-2"
+ key={tab.id}
+ onClick={() => setActiveTab(tab.id)}
+ className={`px-6 py-4 text-sm font-medium border-b-2 transition-colors ${
+ activeTab === tab.id
+ ? 'border-primary-600 text-primary-600'
+ : 'border-transparent text-slate-500 hover:text-slate-700 hover:border-slate-300'
+ }`}
>
-
-
-
- Neue Seed-URL
+ {tab.icon}
+ {tab.name}
-
+ ))}
+
+
- {/* Category Quick Stats - show all categories, use allSeeds for counts */}
-
- {categories.map(cat => {
- const count = allSeeds.filter(s => s.category === cat.name).length
- return (
-
setSelectedCategory(selectedCategory === cat.name ? 'all' : cat.name)}
- className={`p-4 rounded-lg border transition-colors text-left ${
- selectedCategory === cat.name
- ? 'border-primary-500 bg-primary-50'
- : 'border-slate-200 hover:border-slate-300'
- }`}
- >
- {cat.icon}
- {cat.display_name || cat.name}
- {count} Seeds
-
- )
- })}
-
-
- {/* Seeds Table */}
-
-
-
-
- Status
- Name
- URL
- Kategorie
- Trust
- Dokumente
- Aktionen
-
-
-
- {filteredSeeds.map(seed => (
-
-
- handleToggleEnabled(seed.id)}
- className={`w-10 h-6 rounded-full transition-colors ${
- seed.enabled ? 'bg-green-500' : 'bg-slate-300'
- }`}
- >
-
-
-
-
- {seed.name}
- {seed.description}
-
-
-
- {seed.url.replace(/^https?:\/\/(www\.)?/, '').slice(0, 30)}...
-
-
-
-
- {categories.find(c => c.name === seed.category)?.icon || '📁'}
- {categories.find(c => c.name === seed.category)?.display_name || seed.category}
-
-
-
- = 0.4 ? 'bg-green-100 text-green-700' :
- seed.trustBoost >= 0.2 ? 'bg-yellow-100 text-yellow-700' :
- 'bg-slate-100 text-slate-700'
- }`}>
- +{seed.trustBoost.toFixed(2)}
-
-
-
- {seed.documentCount?.toLocaleString() || '-'}
-
-
-
-
setEditingSeed(seed)}
- className="p-1.5 text-slate-400 hover:text-slate-600 hover:bg-slate-100 rounded"
- title="Bearbeiten"
- >
-
-
-
-
-
handleDelete(seed.id)}
- className="p-1.5 text-slate-400 hover:text-red-600 hover:bg-red-50 rounded"
- title="Löschen"
- >
-
-
-
-
-
-
-
- ))}
-
-
-
-
- {filteredSeeds.length === 0 && (
-
- Keine Seed-URLs gefunden
-
- )}
-
- )}
-
- {/* Crawl Tab */}
- {activeTab === 'crawl' && (
-
- {/* Crawl Status */}
-
-
-
-
Crawl-Status
-
- Letzter Crawl: {stats.lastCrawlTime ? new Date(stats.lastCrawlTime).toLocaleString('de-DE') : 'Noch nie'}
-
-
-
- {stats.crawlStatus === 'running' ? '🔄 Läuft...' :
- stats.crawlStatus === 'error' ? '❌ Fehler' :
- '✅ Bereit'}
-
-
-
- {loading ? (
- <>
-
-
-
-
- Crawl läuft...
- >
- ) : (
- <>
-
-
-
-
- Crawl starten
- >
- )}
-
-
-
- {/* Crawl Settings */}
-
-
-
Crawl-Einstellungen
-
-
-
-
Scheduler
-
-
-
- Automatischer Crawl aktiviert
-
-
- Intervall
-
- Täglich
- Wöchentlich
- Monatlich
-
-
-
-
-
-
- )}
-
- {/* Stats Tab */}
- {activeTab === 'stats' && (
-
- {/* Overview Stats */}
-
-
-
{stats.totalDocuments.toLocaleString()}
-
Dokumente indexiert
-
-
-
{stats.totalSeeds}
-
Seed-URLs aktiv
-
-
-
{(stats.avgTrustScore * 100).toFixed(0)}%
-
Ø Trust-Score
-
-
-
{Object.keys(stats.documentsPerDocType).length}
-
Dokumenttypen
-
-
-
- {/* Charts */}
-
-
-
Dokumente nach Kategorie
-
- {Object.entries(stats.documentsPerCategory).map(([cat, count]) => {
- const category = categories.find(c => c.name === cat)
- const percentage = stats.totalDocuments > 0 ? (count / stats.totalDocuments) * 100 : 0
- return (
-
-
- {category?.icon || '📁'} {category?.display_name || cat}
- {count.toLocaleString()} ({percentage.toFixed(1)}%)
-
-
-
- )
- })}
-
-
-
-
-
Dokumente nach Typ
-
- {Object.entries(stats.documentsPerDocType)
- .sort(([,a], [,b]) => b - a)
- .slice(0, 6)
- .map(([docType, count]) => {
- const percentage = (count / stats.totalDocuments) * 100
- return (
-
-
- {docType.replace(/_/g, ' ')}
- {count.toLocaleString()} ({percentage.toFixed(1)}%)
-
-
-
- )
- })}
-
-
-
-
- )}
-
- {/* Rules Tab */}
- {activeTab === 'rules' && (
-
-
-
-
⚠️
-
-
Tagging-Regeln Editor
-
- Die Tagging-Regeln werden aktuell über YAML-Dateien verwaltet.
- Ein visueller Editor ist in Entwicklung.
-
-
-
-
-
-
- {[
- { name: 'Doc-Type Regeln', file: 'doc_type_rules.yaml', desc: 'Klassifiziert Dokumente (Lehrplan, Arbeitsblatt, etc.)' },
- { name: 'Fach-Regeln', file: 'subject_rules.yaml', desc: 'Erkennt Unterrichtsfächer' },
- { name: 'Schulstufen-Regeln', file: 'level_rules.yaml', desc: 'Erkennt Primar, SekI, SekII, etc.' },
- { name: 'Trust-Score Regeln', file: 'trust_rules.yaml', desc: 'Domain-basierte Vertrauensbewertung' },
- ].map(rule => (
-
-
{rule.name}
-
{rule.desc}
-
- /rules/{rule.file}
-
-
- ))}
-
-
- )}
+
+ {activeTab === 'seeds' && (
+
+ )}
+ {activeTab === 'crawl' && (
+
+ )}
+ {activeTab === 'stats' && (
+
+ )}
+ {activeTab === 'rules' && }
+
-
- )}
-
- {/* Modals */}
- {(showAddModal || editingSeed) && (
-
{
- setShowAddModal(false)
- setEditingSeed(null)
- }}
- />
)}
)
diff --git a/website/app/admin/edu-search/types.ts b/website/app/admin/edu-search/types.ts
new file mode 100644
index 0000000..a1ce8c4
--- /dev/null
+++ b/website/app/admin/edu-search/types.ts
@@ -0,0 +1,102 @@
+export interface SeedURL {
+ id: string
+ url: string
+ category: string
+ category_id?: string
+ name: string
+ description: string
+ trustBoost: number
+ enabled: boolean
+ lastCrawled?: string
+ documentCount?: number
+ source_type?: string
+ scope?: string
+ state?: string
+ crawl_depth?: number
+ crawl_frequency?: string
+}
+
+export interface CrawlStats {
+ totalDocuments: number
+ totalSeeds: number
+ lastCrawlTime?: string
+ crawlStatus: 'idle' | 'running' | 'error'
+ documentsPerCategory: Record
+ documentsPerDocType: Record
+ avgTrustScore: number
+}
+
+export interface Category {
+ id: string
+ name: string
+ display_name?: string
+ description: string
+ icon: string
+ sort_order?: number
+ is_active?: boolean
+}
+
+export interface ApiSeed {
+ id: string
+ url: string
+ name: string
+ description: string | null
+ category: string | null
+ category_display_name: string | null
+ source_type: string
+ scope: string
+ state: string | null
+ trust_boost: number
+ enabled: boolean
+ crawl_depth: number
+ crawl_frequency: string
+ last_crawled_at: string | null
+ last_crawl_status: string | null
+ last_crawl_docs: number
+ total_documents: number
+ created_at: string
+ updated_at: string
+}
+
+// Default categories (fallback if API fails)
+export const DEFAULT_CATEGORIES: Category[] = [
+ { id: 'federal', name: 'federal', display_name: 'Bundesebene', description: 'KMK, BMBF, Bildungsserver', icon: '🏛️' },
+ { id: 'states', name: 'states', display_name: 'Bundesländer', description: 'Ministerien, Landesbildungsserver', icon: '🗺️' },
+ { id: 'science', name: 'science', display_name: 'Wissenschaft', description: 'Bertelsmann, PISA, IGLU, TIMSS', icon: '🔬' },
+ { id: 'universities', name: 'universities', display_name: 'Hochschulen', description: 'Universitäten, Fachhochschulen, Pädagogische Hochschulen', icon: '🎓' },
+ { id: 'legal', name: 'legal', display_name: 'Recht & Schulgesetze', description: 'Schulgesetze, Erlasse, Verordnungen, Datenschutzrecht', icon: '⚖️' },
+ { id: 'portals', name: 'portals', display_name: 'Bildungsportale', description: 'Lehrer-Online, 4teachers, ZUM', icon: '📚' },
+ { id: 'authorities', name: 'authorities', display_name: 'Schulbehörden', description: 'Regierungspräsidien, Schulämter', icon: '📋' },
+]
+
+// Default empty stats (loaded from API)
+export const DEFAULT_STATS: CrawlStats = {
+ totalDocuments: 0,
+ totalSeeds: 0,
+ lastCrawlTime: undefined,
+ crawlStatus: 'idle',
+ documentsPerCategory: {},
+ documentsPerDocType: {},
+ avgTrustScore: 0,
+}
+
+// Convert API seed to frontend format
+export function apiSeedToFrontend(seed: ApiSeed): SeedURL {
+ return {
+ id: seed.id,
+ url: seed.url,
+ name: seed.name,
+ description: seed.description || '',
+ category: seed.category || 'federal',
+ category_id: undefined,
+ trustBoost: seed.trust_boost,
+ enabled: seed.enabled,
+ lastCrawled: seed.last_crawled_at || undefined,
+ documentCount: seed.total_documents,
+ source_type: seed.source_type,
+ scope: seed.scope,
+ state: seed.state || undefined,
+ crawl_depth: seed.crawl_depth,
+ crawl_frequency: seed.crawl_frequency,
+ }
+}
diff --git a/website/app/admin/edu-search/useEduSearchData.ts b/website/app/admin/edu-search/useEduSearchData.ts
new file mode 100644
index 0000000..d84965d
--- /dev/null
+++ b/website/app/admin/edu-search/useEduSearchData.ts
@@ -0,0 +1,171 @@
+'use client'
+
+import { useState, useEffect, useCallback } from 'react'
+import type { SeedURL, Category, CrawlStats } from './types'
+import { DEFAULT_CATEGORIES, DEFAULT_STATS, apiSeedToFrontend } from './types'
+
+export function useEduSearchData() {
+ const [seeds, setSeeds] = useState([])
+ const [allSeeds, setAllSeeds] = useState([])
+ const [categories, setCategories] = useState(DEFAULT_CATEGORIES)
+ const [stats, setStats] = useState(DEFAULT_STATS)
+ const [selectedCategory, setSelectedCategory] = useState('all')
+ const [loading, setLoading] = useState(false)
+ const [initialLoading, setInitialLoading] = useState(true)
+ const [error, setError] = useState(null)
+
+ const fetchCategories = useCallback(async () => {
+ try {
+ const res = await fetch(`/api/admin/edu-search?action=categories`)
+ if (res.ok) {
+ const data = await res.json()
+ if (data.categories && data.categories.length > 0) {
+ setCategories(data.categories.map((cat: { id: string; name: string; display_name: string; description: string; icon: string; sort_order: number; is_active: boolean }) => ({
+ id: cat.id, name: cat.name, display_name: cat.display_name,
+ description: cat.description || '', icon: cat.icon || '📁',
+ sort_order: cat.sort_order, is_active: cat.is_active,
+ })))
+ }
+ }
+ } catch (err) {
+ console.error('Failed to fetch categories:', err)
+ }
+ }, [])
+
+ const fetchAllSeeds = useCallback(async () => {
+ try {
+ const res = await fetch(`/api/admin/edu-search?action=seeds`)
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
+ const data = await res.json()
+ setAllSeeds((data.seeds || []).map(apiSeedToFrontend))
+ } catch (err) {
+ console.error('Failed to fetch all seeds:', err)
+ }
+ }, [])
+
+ const fetchSeeds = useCallback(async () => {
+ try {
+ const params = new URLSearchParams()
+ params.append('action', 'seeds')
+ if (selectedCategory !== 'all') params.append('category', selectedCategory)
+ const res = await fetch(`/api/admin/edu-search?${params}`)
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
+ const data = await res.json()
+ const fetchedSeeds = (data.seeds || []).map(apiSeedToFrontend)
+ setSeeds(fetchedSeeds)
+ if (selectedCategory === 'all') setAllSeeds(fetchedSeeds)
+ setError(null)
+ } catch (err) {
+ console.error('Failed to fetch seeds:', err)
+ setError('Seeds konnten nicht geladen werden. API nicht erreichbar.')
+ }
+ }, [selectedCategory])
+
+ const fetchStats = useCallback(async (preserveCrawlStatus = false) => {
+ try {
+ const res = await fetch(`/api/admin/edu-search?action=stats`)
+ if (res.ok) {
+ const data = await res.json()
+ setStats(prev => ({
+ totalDocuments: data.total_documents || 0,
+ totalSeeds: data.total_seeds || 0,
+ lastCrawlTime: data.last_crawl_time || prev.lastCrawlTime,
+ crawlStatus: preserveCrawlStatus ? prev.crawlStatus : (data.crawl_status || 'idle'),
+ documentsPerCategory: data.seeds_per_category || {},
+ documentsPerDocType: {},
+ avgTrustScore: data.avg_trust_boost || 0,
+ }))
+ }
+ } catch (err) {
+ console.error('Failed to fetch stats:', err)
+ }
+ }, [])
+
+ useEffect(() => {
+ const loadData = async () => {
+ setInitialLoading(true)
+ await Promise.all([fetchCategories(), fetchSeeds(), fetchAllSeeds(), fetchStats()])
+ setInitialLoading(false)
+ }
+ loadData()
+ }, [fetchCategories, fetchSeeds, fetchAllSeeds, fetchStats])
+
+ useEffect(() => {
+ if (!initialLoading) fetchSeeds()
+ }, [selectedCategory, initialLoading, fetchSeeds])
+
+ const pollCrawlStatus = useCallback(async () => {
+ try {
+ const res = await fetch('/api/admin/edu-search?action=legal-crawler-status', {
+ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({}),
+ })
+ if (res.ok) return (await res.json()).status
+ } catch { /* Ignore */ }
+ return 'idle'
+ }, [])
+
+ const handleStartCrawl = async () => {
+ setLoading(true)
+ setError(null)
+ setStats(prev => ({ ...prev, crawlStatus: 'running', lastCrawlTime: new Date().toISOString() }))
+ try {
+ const response = await fetch('/api/admin/edu-search?action=crawl', {
+ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({}),
+ })
+ const data = await response.json()
+ if (response.ok) {
+ const checkStatus = async () => {
+ const status = await pollCrawlStatus()
+ if (status === 'running') { setStats(prev => ({ ...prev, crawlStatus: 'running' })); setTimeout(checkStatus, 3000) }
+ else if (status === 'completed' || status === 'idle') { setStats(prev => ({ ...prev, crawlStatus: 'idle' })); setLoading(false); await fetchStats(false) }
+ else { setStats(prev => ({ ...prev, crawlStatus: 'error' })); setLoading(false) }
+ }
+ setTimeout(checkStatus, 2000)
+ } else {
+ setError(data.error || 'Fehler beim Starten des Crawls')
+ setLoading(false)
+ setStats(prev => ({ ...prev, crawlStatus: 'idle' }))
+ }
+ } catch (err) {
+ setError('Netzwerkfehler beim Starten des Crawls')
+ setLoading(false)
+ setStats(prev => ({ ...prev, crawlStatus: 'idle' }))
+ }
+ }
+
+ const handleDelete = async (id: string) => {
+ if (!confirm('Seed-URL wirklich löschen?')) return
+ try {
+ const res = await fetch(`/api/admin/edu-search?id=${id}`, { method: 'DELETE' })
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
+ await fetchSeeds(); await fetchStats()
+ } catch (err) {
+ console.error('Failed to delete seed:', err)
+ alert('Fehler beim Löschen')
+ }
+ }
+
+ const handleToggleEnabled = async (id: string) => {
+ const seed = seeds.find(s => s.id === id)
+ if (!seed) return
+ try {
+ const res = await fetch(`/api/admin/edu-search?id=${id}`, {
+ method: 'PUT', headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ enabled: !seed.enabled }),
+ })
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
+ setSeeds(seeds.map(s => s.id === id ? { ...s, enabled: !s.enabled } : s))
+ } catch (err) {
+ console.error('Failed to toggle seed:', err); await fetchSeeds()
+ }
+ }
+
+ const handleSaved = async () => { await fetchSeeds(); await fetchStats() }
+
+ return {
+ seeds, allSeeds, categories, stats, selectedCategory, setSelectedCategory,
+ loading, initialLoading, error, setError,
+ handleStartCrawl, handleDelete, handleToggleEnabled, handleSaved,
+ fetchSeeds, fetchStats,
+ }
+}
diff --git a/website/app/admin/mac-mini/_components/DockerSection.tsx b/website/app/admin/mac-mini/_components/DockerSection.tsx
new file mode 100644
index 0000000..814903b
--- /dev/null
+++ b/website/app/admin/mac-mini/_components/DockerSection.tsx
@@ -0,0 +1,70 @@
+'use client'
+
+import type { MacMiniStatus } from '../types'
+
+export default function DockerSection({
+ status,
+ actionLoading,
+ onDockerUp,
+ onDockerDown,
+}: {
+ status: MacMiniStatus | null
+ actionLoading: string | null
+ onDockerUp: () => void
+ onDockerDown: () => void
+}) {
+ return (
+
+
+
+ 🐳 Docker Container
+
+
+
+ {actionLoading === 'docker-up' ? '...' : '▶ Start'}
+
+
+ {actionLoading === 'docker-down' ? '...' : '⏹ Stop'}
+
+
+
+
+ {status?.containers && status.containers.length > 0 ? (
+
+ {status.containers.map((container, idx) => (
+
+
+
+ {container.name}
+
+
+ {container.ports && (
+ {container.ports}
+ )}
+
+ {container.status}
+
+
+
+ ))}
+
+ ) : (
+
+ {status?.online ? 'Keine Container gefunden' : 'Server nicht erreichbar'}
+
+ )}
+
+ )
+}
diff --git a/website/app/admin/mac-mini/_components/InternetStatus.tsx b/website/app/admin/mac-mini/_components/InternetStatus.tsx
new file mode 100644
index 0000000..c9127c7
--- /dev/null
+++ b/website/app/admin/mac-mini/_components/InternetStatus.tsx
@@ -0,0 +1,56 @@
+'use client'
+
+import { INTERNET_REQUIRED_ACTIONS } from '../constants'
+
+export default function InternetStatus({ internet }: { internet?: boolean }) {
+ return (
+
+
+
+
{internet ? '🌐' : '📴'}
+
+
+ Internet: {internet ? 'Verbunden' : 'Offline (Normalbetrieb)'}
+
+
+ {internet
+ ? 'Mac Mini hat Internet-Zugang. LLM-Downloads und Updates möglich.'
+ : 'Mac Mini arbeitet offline. Für bestimmte Aktionen muss Internet aktiviert werden.'}
+
+
+
+
+ {internet ? 'Online' : 'Offline'}
+
+
+
+ {!internet && (
+
+
⚠️ Diese Aktionen benötigen Internet:
+
+ {INTERNET_REQUIRED_ACTIONS.map((item, idx) => (
+
+
•
+
+ {item.action}
+ – {item.description}
+
+
+ ))}
+
+
+ 💡 Tipp: Internet am Router/Switch nur bei Bedarf für den Mac Mini aktivieren.
+
+
+ )}
+
+ )
+}
diff --git a/website/app/admin/mac-mini/_components/OllamaSection.tsx b/website/app/admin/mac-mini/_components/OllamaSection.tsx
new file mode 100644
index 0000000..1b12f27
--- /dev/null
+++ b/website/app/admin/mac-mini/_components/OllamaSection.tsx
@@ -0,0 +1,289 @@
+'use client'
+
+import { useState } from 'react'
+import type { MacMiniStatus, DownloadProgress, ModelDescription } from '../types'
+import { MODEL_DATABASE, RECOMMENDED_MODELS } from '../constants'
+
+function getModelInfo(modelName: string): ModelDescription | null {
+ if (MODEL_DATABASE[modelName]) return MODEL_DATABASE[modelName]
+ const baseName = modelName.split(':')[0]
+ const matchingKey = Object.keys(MODEL_DATABASE).find(key =>
+ key.startsWith(baseName) || key === baseName
+ )
+ return matchingKey ? MODEL_DATABASE[matchingKey] : null
+}
+
+function formatBytes(bytes: number) {
+ if (bytes === 0) return '0 B'
+ const k = 1024
+ const sizes = ['B', 'KB', 'MB', 'GB', 'TB']
+ const i = Math.floor(Math.log(bytes) / Math.log(k))
+ return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]
+}
+
+export default function OllamaSection({
+ status,
+ actionLoading,
+ downloadProgress,
+ modelInput,
+ setModelInput,
+ onPullModel,
+}: {
+ status: MacMiniStatus | null
+ actionLoading: string | null
+ downloadProgress: DownloadProgress | null
+ modelInput: string
+ setModelInput: (v: string) => void
+ onPullModel: () => void
+}) {
+ const [selectedModel, setSelectedModel] = useState(null)
+ const [showRecommendations, setShowRecommendations] = useState(false)
+
+ const isModelInstalled = (modelName: string): boolean => {
+ if (!status?.models) return false
+ return status.models.some(m =>
+ m.name === modelName || m.name.startsWith(modelName.split(':')[0])
+ )
+ }
+
+ return (
+
+
+ 🤖 Ollama LLM Modelle
+
+
+ {/* Installed Models */}
+ {status?.models && status.models.length > 0 ? (
+
+ {status.models.map((model, idx) => {
+ const modelInfo = getModelInfo(model.name)
+ return (
+
+
+
+
{model.name}
+ {modelInfo && (
+
setSelectedModel(model.name)}
+ className="text-blue-500 hover:text-blue-700 transition-colors"
+ title="Modell-Info anzeigen"
+ >
+
+
+
+
+ )}
+ {modelInfo?.category === 'vision' && (
+
Vision
+ )}
+
+
+ {model.size}
+ {model.modified}
+
+
+ )
+ })}
+
+ ) : (
+
+ {status?.ollama ? 'Keine Modelle installiert' : 'Ollama nicht erreichbar'}
+
+ )}
+
+ {/* Model Info Modal */}
+ {selectedModel && (
+
setSelectedModel(null)}>
+
e.stopPropagation()}>
+ {(() => {
+ const info = getModelInfo(selectedModel)
+ if (!info) return
Keine Informationen verfügbar
+ return (
+ <>
+
+
+
{info.name}
+
+
+ {info.category === 'vision' ? '👁️ Vision' : info.category === 'text' ? '📝 Text' : info.category}
+
+ {info.size}
+
+
+
setSelectedModel(null)} className="text-slate-400 hover:text-slate-600">
+
+
+
+
+
+
{info.description}
+
+
Geeignet für:
+
+ {info.useCases.map((useCase, i) => (
+
+ {useCase}
+
+ ))}
+
+
+ >
+ )
+ })()}
+
+
+ )}
+
+ {/* Download New Model */}
+
+
Neues Modell herunterladen
+
+ setModelInput(e.target.value)}
+ placeholder="z.B. llama3.2, mistral, qwen2.5:14b"
+ className="flex-1 px-4 py-2 border border-slate-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-primary-500 focus:border-transparent"
+ disabled={actionLoading === 'pull'}
+ />
+
+ {actionLoading === 'pull' ? 'Lädt...' : 'Herunterladen'}
+
+
+
+ {/* Download Progress */}
+ {downloadProgress && (
+
+
+ {downloadProgress.model}
+
+ {formatBytes(downloadProgress.completed)} / {formatBytes(downloadProgress.total)}
+
+
+
+
+ {downloadProgress.percent}%
+
+
+ )}
+
+ {/* Toggle Recommendations */}
+
setShowRecommendations(!showRecommendations)}
+ className="mt-4 text-primary-600 hover:text-primary-700 font-medium text-sm flex items-center gap-2"
+ >
+
+
+
+ {showRecommendations ? 'Empfehlungen ausblenden' : 'Modell-Empfehlungen für Klausurkorrektur & Handschrift anzeigen'}
+
+
+
+ {/* Recommendations Section */}
+ {showRecommendations && (
+
+
📚 Empfohlene Modelle
+
+ {/* Handwriting Recognition */}
+
+
+ ✍️ Handschrifterkennung (Vision-Modelle)
+
+
+ {RECOMMENDED_MODELS.handwriting.map((rec, idx) => {
+ const info = MODEL_DATABASE[rec.model]
+ const installed = isModelInstalled(rec.model)
+ return (
+
+
+
+ {info?.name || rec.model}
+ Vision
+ {info?.recommended && ⭐ Empfohlen }
+ {installed && ✓ Installiert }
+
+
{rec.reason}
+
Größe: {info?.size || 'unbekannt'}
+
+ {!installed && (
+
{ setModelInput(rec.model); onPullModel() }}
+ disabled={actionLoading !== null || !status?.ollama}
+ className="ml-4 px-4 py-2 bg-primary-600 text-white text-sm rounded-lg font-medium hover:bg-primary-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+ >
+ Installieren
+
+ )}
+
+ )
+ })}
+
+
+
+ {/* Grading / Text Analysis */}
+
+
+ 📝 Klausurkorrektur (Text-Modelle)
+
+
+ {RECOMMENDED_MODELS.grading.map((rec, idx) => {
+ const info = MODEL_DATABASE[rec.model]
+ const installed = isModelInstalled(rec.model)
+ return (
+
+
+
+ {info?.name || rec.model}
+ Text
+ {info?.recommended && ⭐ Empfohlen }
+ {installed && ✓ Installiert }
+
+
{rec.reason}
+
Größe: {info?.size || 'unbekannt'}
+
+ {!installed && (
+
{ setModelInput(rec.model); onPullModel() }}
+ disabled={actionLoading !== null || !status?.ollama}
+ className="ml-4 px-4 py-2 bg-primary-600 text-white text-sm rounded-lg font-medium hover:bg-primary-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+ >
+ Installieren
+
+ )}
+
+ )
+ })}
+
+
+
+ {/* Info Box */}
+
+
+
💡
+
+
Tipp: Modell-Kombinationen
+
+ Für beste Ergebnisse bei Klausuren mit Handschrift kombiniere ein Vision-Modell (für OCR/Handschrifterkennung)
+ mit einem Text-Modell (für Bewertung und Feedback). Beispiel: llama3.2-vision:11b + qwen2.5:14b
+
+
+
+
+
+ )}
+
+ )
+}
diff --git a/website/app/admin/mac-mini/_components/PowerControls.tsx b/website/app/admin/mac-mini/_components/PowerControls.tsx
new file mode 100644
index 0000000..2679ea3
--- /dev/null
+++ b/website/app/admin/mac-mini/_components/PowerControls.tsx
@@ -0,0 +1,127 @@
+'use client'
+
+import type { MacMiniStatus } from '../types'
+
+export default function PowerControls({
+ status,
+ loading,
+ actionLoading,
+ message,
+ error,
+ onWake,
+ onRestart,
+ onShutdown,
+ onRefresh,
+}: {
+ status: MacMiniStatus | null
+ loading: boolean
+ actionLoading: string | null
+ message: string | null
+ error: string | null
+ onWake: () => void
+ onRestart: () => void
+ onShutdown: () => void
+ onRefresh: () => void
+}) {
+ const getStatusBadge = (online: boolean) => {
+ return online
+ ? 'px-3 py-1 rounded-full text-sm font-semibold bg-green-100 text-green-800'
+ : 'px-3 py-1 rounded-full text-sm font-semibold bg-red-100 text-red-800'
+ }
+
+ const getServiceStatus = (ok: boolean) => {
+ return ok
+ ? 'flex items-center gap-2 text-green-600'
+ : 'flex items-center gap-2 text-red-500'
+ }
+
+ return (
+
+
+
+
🖥️
+
+
Mac Mini Headless
+
IP: {status?.ip || '192.168.178.100'}
+
+
+
+ {loading ? 'Laden...' : status?.online ? 'Online' : 'Offline'}
+
+
+
+ {/* Power Buttons */}
+
+
+ {actionLoading === 'wake' ? '...' : '⚡ Wake on LAN'}
+
+
+ {actionLoading === 'restart' ? '...' : '🔄 Neustart'}
+
+
+ {actionLoading === 'shutdown' ? '...' : '⏻ Herunterfahren'}
+
+
+ {loading ? '...' : '🔍 Status aktualisieren'}
+
+
+ {message && {message} }
+ {error && {error} }
+
+
+ {/* Service Status Grid */}
+
+
+
Ping
+
+
+ {status?.ping ? 'Erreichbar' : 'Nicht erreichbar'}
+
+
+
+
SSH
+
+
+ {status?.ssh ? 'Verbunden' : 'Getrennt'}
+
+
+
+
Docker
+
+
+ {status?.docker ? 'Aktiv' : 'Inaktiv'}
+
+
+
+
Ollama
+
+
+ {status?.ollama ? 'Bereit' : 'Nicht bereit'}
+
+
+
+
Uptime
+
+ {status?.uptime || '-'}
+
+
+
+
+ )
+}
diff --git a/website/app/admin/mac-mini/constants.ts b/website/app/admin/mac-mini/constants.ts
new file mode 100644
index 0000000..3e6927f
--- /dev/null
+++ b/website/app/admin/mac-mini/constants.ts
@@ -0,0 +1,105 @@
+import type { ModelDescription } from './types'
+
+export const API_BASE = 'http://192.168.178.100:8000/api/mac-mini'
+
+// Aktionen die Internet benötigen
+export const INTERNET_REQUIRED_ACTIONS = [
+ { action: 'LLM Modelle herunterladen', description: 'Ollama pull benötigt Verbindung zu ollama.com' },
+ { action: 'Docker Base Images pullen', description: 'Neue Images von Docker Hub/GHCR' },
+ { action: 'npm/pip/go Packages', description: 'Beim ersten Build oder neuen Dependencies' },
+ { action: 'Git Pull/Push', description: 'Code-Synchronisation mit Remote-Repository' },
+]
+
+export const MODEL_DATABASE: Record = {
+ 'llama3.2-vision:11b': {
+ name: 'Llama 3.2 Vision 11B', category: 'vision', size: '7.8 GB',
+ description: 'Metas multimodales Vision-Modell. Kann Bilder und PDFs analysieren, Text aus Handschrift extrahieren.',
+ useCases: ['Handschrifterkennung', 'Bild-Analyse', 'Dokumentenverarbeitung', 'OCR-Aufgaben'],
+ recommended: true
+ },
+ 'llama3.2-vision:90b': {
+ name: 'Llama 3.2 Vision 90B', category: 'vision', size: '55 GB',
+ description: 'Größte Version von Llama Vision. Beste Qualität für komplexe Bildanalyse.',
+ useCases: ['Komplexe Handschrift', 'Detaillierte Bild-Analyse', 'Mathematische Formeln'],
+ },
+ 'minicpm-v': {
+ name: 'MiniCPM-V', category: 'vision', size: '5.5 GB',
+ description: 'Kompaktes Vision-Modell mit gutem Preis-Leistungs-Verhältnis für OCR.',
+ useCases: ['Schnelle OCR', 'Einfache Handschrift', 'Tabellen-Erkennung'],
+ recommended: true
+ },
+ 'llava:13b': {
+ name: 'LLaVA 13B', category: 'vision', size: '8 GB',
+ description: 'Large Language-and-Vision Assistant. Gut für Bild-zu-Text Aufgaben.',
+ useCases: ['Bildbeschreibung', 'Handschrift', 'Diagramm-Analyse'],
+ },
+ 'llava:34b': {
+ name: 'LLaVA 34B', category: 'vision', size: '20 GB',
+ description: 'Größere LLaVA-Version mit besserer Genauigkeit.',
+ useCases: ['Komplexe Dokumente', 'Wissenschaftliche Notation', 'Detailanalyse'],
+ },
+ 'bakllava': {
+ name: 'BakLLaVA', category: 'vision', size: '4.7 GB',
+ description: 'Verbesserte LLaVA-Variante mit Mistral-Basis.',
+ useCases: ['Schnelle Bildanalyse', 'Handschrift', 'Formular-Verarbeitung'],
+ },
+ 'qwen2.5:14b': {
+ name: 'Qwen 2.5 14B', category: 'text', size: '9 GB',
+ description: 'Alibabas neuestes Sprachmodell. Exzellent für deutsche Texte und Bewertungsaufgaben.',
+ useCases: ['Klausurkorrektur', 'Aufsatzbewertung', 'Feedback-Generierung', 'Grammatikprüfung'],
+ recommended: true
+ },
+ 'qwen2.5:7b': {
+ name: 'Qwen 2.5 7B', category: 'text', size: '4.7 GB',
+ description: 'Kleinere Qwen-Version, schneller bei ähnlicher Qualität.',
+ useCases: ['Schnelle Korrektur', 'Einfache Bewertungen', 'Rechtschreibprüfung'],
+ },
+ 'qwen2.5:32b': {
+ name: 'Qwen 2.5 32B', category: 'text', size: '19 GB',
+ description: 'Große Qwen-Version für komplexe Bewertungsaufgaben.',
+ useCases: ['Detaillierte Analyse', 'Abitur-Klausuren', 'Komplexe Argumentation'],
+ },
+ 'llama3.1:8b': {
+ name: 'Llama 3.1 8B', category: 'text', size: '4.7 GB',
+ description: 'Metas schnelles Textmodell. Gute Balance aus Geschwindigkeit und Qualität.',
+ useCases: ['Allgemeine Korrektur', 'Schnelles Feedback', 'Zusammenfassungen'],
+ },
+ 'llama3.1:70b': {
+ name: 'Llama 3.1 70B', category: 'text', size: '40 GB',
+ description: 'Großes Llama-Modell für anspruchsvolle Aufgaben.',
+ useCases: ['Komplexe Klausuren', 'Tiefgehende Analyse', 'Wissenschaftliche Texte'],
+ },
+ 'mistral': {
+ name: 'Mistral 7B', category: 'text', size: '4.1 GB',
+ description: 'Effizientes europäisches Modell mit guter deutscher Sprachunterstützung.',
+ useCases: ['Deutsche Texte', 'Schnelle Verarbeitung', 'Allgemeine Korrektur'],
+ },
+ 'mixtral:8x7b': {
+ name: 'Mixtral 8x7B', category: 'text', size: '26 GB',
+ description: 'Mixture-of-Experts Modell. Kombiniert Geschwindigkeit mit hoher Qualität.',
+ useCases: ['Komplexe Korrektur', 'Multi-Aspekt-Bewertung', 'Wissenschaftliche Arbeiten'],
+ },
+ 'gemma2:9b': {
+ name: 'Gemma 2 9B', category: 'text', size: '5.5 GB',
+ description: 'Googles kompaktes Modell. Gut für Instruktionen und Bewertungen.',
+ useCases: ['Strukturierte Bewertung', 'Feedback', 'Zusammenfassungen'],
+ },
+ 'phi3': {
+ name: 'Phi-3', category: 'text', size: '2.3 GB',
+ description: 'Microsofts kleines aber leistungsfähiges Modell.',
+ useCases: ['Schnelle Checks', 'Einfache Korrektur', 'Ressourcenschonend'],
+ },
+}
+
+export const RECOMMENDED_MODELS = {
+ handwriting: [
+ { model: 'llama3.2-vision:11b', reason: 'Beste Balance aus Qualität und Geschwindigkeit für Handschrift' },
+ { model: 'minicpm-v', reason: 'Schnell und ressourcenschonend für einfache Handschrift' },
+ { model: 'llava:13b', reason: 'Gute Alternative mit bewährter Vision-Architektur' },
+ ],
+ grading: [
+ { model: 'qwen2.5:14b', reason: 'Beste Qualität für deutsche Klausurkorrektur' },
+ { model: 'llama3.1:8b', reason: 'Schnell für einfache Bewertungen' },
+ { model: 'mistral', reason: 'Europäisches Modell mit guter Sprachqualität' },
+ ]
+}
diff --git a/website/app/admin/mac-mini/page.tsx b/website/app/admin/mac-mini/page.tsx
index 861e802..909b188 100644
--- a/website/app/admin/mac-mini/page.tsx
+++ b/website/app/admin/mac-mini/page.tsx
@@ -12,188 +12,12 @@
import AdminLayout from '@/components/admin/AdminLayout'
import { useEffect, useState, useCallback, useRef } from 'react'
-
-interface MacMiniStatus {
- online: boolean
- ping: boolean
- ssh: boolean
- docker: boolean
- ollama: boolean
- internet: boolean // Neuer Status: Hat Mac Mini Internet-Zugang?
- ip: string
- uptime?: string
- cpu_load?: string
- memory?: string
- containers?: ContainerInfo[]
- models?: ModelInfo[]
- error?: string
-}
-
-// Aktionen die Internet benötigen
-const INTERNET_REQUIRED_ACTIONS = [
- { action: 'LLM Modelle herunterladen', description: 'Ollama pull benötigt Verbindung zu ollama.com' },
- { action: 'Docker Base Images pullen', description: 'Neue Images von Docker Hub/GHCR' },
- { action: 'npm/pip/go Packages', description: 'Beim ersten Build oder neuen Dependencies' },
- { action: 'Git Pull/Push', description: 'Code-Synchronisation mit Remote-Repository' },
-]
-
-interface ContainerInfo {
- name: string
- status: string
- ports?: string
-}
-
-interface ModelInfo {
- name: string
- size: string
- modified: string
-}
-
-interface DownloadProgress {
- model: string
- status: string
- completed: number
- total: number
- percent: number
-}
-
-// Modell-Informationen für Beschreibungen und Empfehlungen
-interface ModelDescription {
- name: string
- category: 'vision' | 'text' | 'code' | 'embedding'
- size: string
- description: string
- useCases: string[]
- recommended?: boolean
-}
-
-const MODEL_DATABASE: Record = {
- // Vision-Modelle (Handschrifterkennung)
- 'llama3.2-vision:11b': {
- name: 'Llama 3.2 Vision 11B',
- category: 'vision',
- size: '7.8 GB',
- description: 'Metas multimodales Vision-Modell. Kann Bilder und PDFs analysieren, Text aus Handschrift extrahieren.',
- useCases: ['Handschrifterkennung', 'Bild-Analyse', 'Dokumentenverarbeitung', 'OCR-Aufgaben'],
- recommended: true
- },
- 'llama3.2-vision:90b': {
- name: 'Llama 3.2 Vision 90B',
- category: 'vision',
- size: '55 GB',
- description: 'Größte Version von Llama Vision. Beste Qualität für komplexe Bildanalyse.',
- useCases: ['Komplexe Handschrift', 'Detaillierte Bild-Analyse', 'Mathematische Formeln'],
- },
- 'minicpm-v': {
- name: 'MiniCPM-V',
- category: 'vision',
- size: '5.5 GB',
- description: 'Kompaktes Vision-Modell mit gutem Preis-Leistungs-Verhältnis für OCR.',
- useCases: ['Schnelle OCR', 'Einfache Handschrift', 'Tabellen-Erkennung'],
- recommended: true
- },
- 'llava:13b': {
- name: 'LLaVA 13B',
- category: 'vision',
- size: '8 GB',
- description: 'Large Language-and-Vision Assistant. Gut für Bild-zu-Text Aufgaben.',
- useCases: ['Bildbeschreibung', 'Handschrift', 'Diagramm-Analyse'],
- },
- 'llava:34b': {
- name: 'LLaVA 34B',
- category: 'vision',
- size: '20 GB',
- description: 'Größere LLaVA-Version mit besserer Genauigkeit.',
- useCases: ['Komplexe Dokumente', 'Wissenschaftliche Notation', 'Detailanalyse'],
- },
- 'bakllava': {
- name: 'BakLLaVA',
- category: 'vision',
- size: '4.7 GB',
- description: 'Verbesserte LLaVA-Variante mit Mistral-Basis.',
- useCases: ['Schnelle Bildanalyse', 'Handschrift', 'Formular-Verarbeitung'],
- },
-
- // Text-Modelle (Klausurkorrektur)
- 'qwen2.5:14b': {
- name: 'Qwen 2.5 14B',
- category: 'text',
- size: '9 GB',
- description: 'Alibabas neuestes Sprachmodell. Exzellent für deutsche Texte und Bewertungsaufgaben.',
- useCases: ['Klausurkorrektur', 'Aufsatzbewertung', 'Feedback-Generierung', 'Grammatikprüfung'],
- recommended: true
- },
- 'qwen2.5:7b': {
- name: 'Qwen 2.5 7B',
- category: 'text',
- size: '4.7 GB',
- description: 'Kleinere Qwen-Version, schneller bei ähnlicher Qualität.',
- useCases: ['Schnelle Korrektur', 'Einfache Bewertungen', 'Rechtschreibprüfung'],
- },
- 'qwen2.5:32b': {
- name: 'Qwen 2.5 32B',
- category: 'text',
- size: '19 GB',
- description: 'Große Qwen-Version für komplexe Bewertungsaufgaben.',
- useCases: ['Detaillierte Analyse', 'Abitur-Klausuren', 'Komplexe Argumentation'],
- },
- 'llama3.1:8b': {
- name: 'Llama 3.1 8B',
- category: 'text',
- size: '4.7 GB',
- description: 'Metas schnelles Textmodell. Gute Balance aus Geschwindigkeit und Qualität.',
- useCases: ['Allgemeine Korrektur', 'Schnelles Feedback', 'Zusammenfassungen'],
- },
- 'llama3.1:70b': {
- name: 'Llama 3.1 70B',
- category: 'text',
- size: '40 GB',
- description: 'Großes Llama-Modell für anspruchsvolle Aufgaben.',
- useCases: ['Komplexe Klausuren', 'Tiefgehende Analyse', 'Wissenschaftliche Texte'],
- },
- 'mistral': {
- name: 'Mistral 7B',
- category: 'text',
- size: '4.1 GB',
- description: 'Effizientes europäisches Modell mit guter deutscher Sprachunterstützung.',
- useCases: ['Deutsche Texte', 'Schnelle Verarbeitung', 'Allgemeine Korrektur'],
- },
- 'mixtral:8x7b': {
- name: 'Mixtral 8x7B',
- category: 'text',
- size: '26 GB',
- description: 'Mixture-of-Experts Modell. Kombiniert Geschwindigkeit mit hoher Qualität.',
- useCases: ['Komplexe Korrektur', 'Multi-Aspekt-Bewertung', 'Wissenschaftliche Arbeiten'],
- },
- 'gemma2:9b': {
- name: 'Gemma 2 9B',
- category: 'text',
- size: '5.5 GB',
- description: 'Googles kompaktes Modell. Gut für Instruktionen und Bewertungen.',
- useCases: ['Strukturierte Bewertung', 'Feedback', 'Zusammenfassungen'],
- },
- 'phi3': {
- name: 'Phi-3',
- category: 'text',
- size: '2.3 GB',
- description: 'Microsofts kleines aber leistungsfähiges Modell.',
- useCases: ['Schnelle Checks', 'Einfache Korrektur', 'Ressourcenschonend'],
- },
-}
-
-// Empfohlene Modelle für spezifische Anwendungsfälle
-const RECOMMENDED_MODELS = {
- handwriting: [
- { model: 'llama3.2-vision:11b', reason: 'Beste Balance aus Qualität und Geschwindigkeit für Handschrift' },
- { model: 'minicpm-v', reason: 'Schnell und ressourcenschonend für einfache Handschrift' },
- { model: 'llava:13b', reason: 'Gute Alternative mit bewährter Vision-Architektur' },
- ],
- grading: [
- { model: 'qwen2.5:14b', reason: 'Beste Qualität für deutsche Klausurkorrektur' },
- { model: 'llama3.1:8b', reason: 'Schnell für einfache Bewertungen' },
- { model: 'mistral', reason: 'Europäisches Modell mit guter Sprachqualität' },
- ]
-}
+import type { MacMiniStatus, DownloadProgress } from './types'
+import { API_BASE } from './constants'
+import PowerControls from './_components/PowerControls'
+import InternetStatus from './_components/InternetStatus'
+import DockerSection from './_components/DockerSection'
+import OllamaSection from './_components/OllamaSection'
export default function MacMiniControlPage() {
const [status, setStatus] = useState(null)
@@ -203,57 +27,21 @@ export default function MacMiniControlPage() {
const [message, setMessage] = useState(null)
const [downloadProgress, setDownloadProgress] = useState(null)
const [modelInput, setModelInput] = useState('')
- const [selectedModel, setSelectedModel] = useState(null)
- const [showRecommendations, setShowRecommendations] = useState(false)
const eventSourceRef = useRef(null)
- // Get model info from database
- const getModelInfo = (modelName: string): ModelDescription | null => {
- // Try exact match first
- if (MODEL_DATABASE[modelName]) return MODEL_DATABASE[modelName]
- // Try base name (without tag)
- const baseName = modelName.split(':')[0]
- const matchingKey = Object.keys(MODEL_DATABASE).find(key =>
- key.startsWith(baseName) || key === baseName
- )
- return matchingKey ? MODEL_DATABASE[matchingKey] : null
- }
-
- // Check if model is installed
- const isModelInstalled = (modelName: string): boolean => {
- if (!status?.models) return false
- return status.models.some(m =>
- m.name === modelName || m.name.startsWith(modelName.split(':')[0])
- )
- }
-
- // API Endpoint (Mac Mini Backend or local proxy)
- const API_BASE = 'http://192.168.178.100:8000/api/mac-mini'
-
- // Fetch status
const fetchStatus = useCallback(async () => {
setLoading(true)
setError(null)
-
try {
const response = await fetch(`${API_BASE}/status`)
const data = await response.json()
-
- if (!response.ok) {
- throw new Error(data.detail || `HTTP ${response.status}`)
- }
-
+ if (!response.ok) throw new Error(data.detail || `HTTP ${response.status}`)
setStatus(data)
} catch (err) {
setError(err instanceof Error ? err.message : 'Verbindungsfehler')
setStatus({
- online: false,
- ping: false,
- ssh: false,
- docker: false,
- ollama: false,
- internet: false,
- ip: '192.168.178.100',
+ online: false, ping: false, ssh: false, docker: false,
+ ollama: false, internet: false, ip: '192.168.178.100',
error: 'Verbindung fehlgeschlagen'
})
} finally {
@@ -261,161 +49,81 @@ export default function MacMiniControlPage() {
}
}, [])
- // Initial load
- useEffect(() => {
- fetchStatus()
- }, [fetchStatus])
-
- // Auto-refresh every 30 seconds
+ useEffect(() => { fetchStatus() }, [fetchStatus])
useEffect(() => {
const interval = setInterval(fetchStatus, 30000)
return () => clearInterval(interval)
}, [fetchStatus])
- // Wake on LAN
- const wakeOnLan = async () => {
- setActionLoading('wake')
+ const performAction = async (action: string, endpoint: string, confirmMsg?: string) => {
+ if (confirmMsg && !confirm(confirmMsg)) return
+ setActionLoading(action)
setError(null)
setMessage(null)
-
try {
- const response = await fetch(`${API_BASE}/wake`, { method: 'POST' })
+ const response = await fetch(`${API_BASE}/${endpoint}`, { method: 'POST' })
const data = await response.json()
+ if (!response.ok) throw new Error(data.detail || `${action} fehlgeschlagen`)
+ return data
+ } catch (err) {
+ setError(err instanceof Error ? err.message : `Fehler bei ${action}`)
+ return null
+ } finally {
+ setActionLoading(null)
+ }
+ }
- if (!response.ok) {
- throw new Error(data.detail || 'Wake-on-LAN fehlgeschlagen')
- }
-
+ const wakeOnLan = async () => {
+ const result = await performAction('wake', 'wake')
+ if (result) {
setMessage('Wake-on-LAN Paket gesendet')
setTimeout(fetchStatus, 5000)
setTimeout(fetchStatus, 15000)
- } catch (err) {
- setError(err instanceof Error ? err.message : 'Fehler beim Aufwecken')
- } finally {
- setActionLoading(null)
}
}
- // Restart
const restart = async () => {
- if (!confirm('Mac Mini wirklich neu starten?')) return
-
- setActionLoading('restart')
- setError(null)
- setMessage(null)
-
- try {
- const response = await fetch(`${API_BASE}/restart`, { method: 'POST' })
- const data = await response.json()
-
- if (!response.ok) {
- throw new Error(data.detail || 'Neustart fehlgeschlagen')
- }
-
+ const result = await performAction('restart', 'restart', 'Mac Mini wirklich neu starten?')
+ if (result) {
setMessage('Neustart eingeleitet')
setTimeout(fetchStatus, 30000)
- } catch (err) {
- setError(err instanceof Error ? err.message : 'Fehler beim Neustart')
- } finally {
- setActionLoading(null)
}
}
- // Shutdown
const shutdown = async () => {
- if (!confirm('Mac Mini wirklich herunterfahren?')) return
-
- setActionLoading('shutdown')
- setError(null)
- setMessage(null)
-
- try {
- const response = await fetch(`${API_BASE}/shutdown`, { method: 'POST' })
- const data = await response.json()
-
- if (!response.ok) {
- throw new Error(data.detail || 'Shutdown fehlgeschlagen')
- }
-
+ const result = await performAction('shutdown', 'shutdown', 'Mac Mini wirklich herunterfahren?')
+ if (result) {
setMessage('Shutdown eingeleitet')
setTimeout(fetchStatus, 10000)
- } catch (err) {
- setError(err instanceof Error ? err.message : 'Fehler beim Herunterfahren')
- } finally {
- setActionLoading(null)
}
}
- // Docker Up
const dockerUp = async () => {
- setActionLoading('docker-up')
- setError(null)
- setMessage(null)
-
- try {
- const response = await fetch(`${API_BASE}/docker/up`, { method: 'POST' })
- const data = await response.json()
-
- if (!response.ok) {
- throw new Error(data.detail || 'Docker Start fehlgeschlagen')
- }
-
+ const result = await performAction('docker-up', 'docker/up')
+ if (result) {
setMessage('Docker Container werden gestartet...')
setTimeout(fetchStatus, 5000)
- } catch (err) {
- setError(err instanceof Error ? err.message : 'Fehler beim Docker Start')
- } finally {
- setActionLoading(null)
}
}
- // Docker Down
const dockerDown = async () => {
- if (!confirm('Docker Container wirklich stoppen?')) return
-
- setActionLoading('docker-down')
- setError(null)
- setMessage(null)
-
- try {
- const response = await fetch(`${API_BASE}/docker/down`, { method: 'POST' })
- const data = await response.json()
-
- if (!response.ok) {
- throw new Error(data.detail || 'Docker Stop fehlgeschlagen')
- }
-
+ const result = await performAction('docker-down', 'docker/down', 'Docker Container wirklich stoppen?')
+ if (result) {
setMessage('Docker Container werden gestoppt...')
setTimeout(fetchStatus, 5000)
- } catch (err) {
- setError(err instanceof Error ? err.message : 'Fehler beim Docker Stop')
- } finally {
- setActionLoading(null)
}
}
- // Pull Model with SSE Progress
const pullModel = async () => {
if (!modelInput.trim()) return
-
setActionLoading('pull')
setError(null)
setMessage(null)
- setDownloadProgress({
- model: modelInput,
- status: 'starting',
- completed: 0,
- total: 0,
- percent: 0
- })
+ setDownloadProgress({ model: modelInput, status: 'starting', completed: 0, total: 0, percent: 0 })
try {
- // Close any existing EventSource
- if (eventSourceRef.current) {
- eventSourceRef.current.close()
- }
+ if (eventSourceRef.current) eventSourceRef.current.close()
- // Use fetch with streaming for progress
const response = await fetch(`${API_BASE}/ollama/pull`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
@@ -434,19 +142,15 @@ export default function MacMiniControlPage() {
while (true) {
const { done, value } = await reader.read()
if (done) break
-
const text = decoder.decode(value)
const lines = text.split('\n').filter(line => line.trim())
-
for (const line of lines) {
try {
const data = JSON.parse(line)
if (data.status === 'downloading' && data.total) {
setDownloadProgress({
- model: modelInput,
- status: data.status,
- completed: data.completed || 0,
- total: data.total,
+ model: modelInput, status: data.status,
+ completed: data.completed || 0, total: data.total,
percent: Math.round((data.completed || 0) / data.total * 100)
})
} else if (data.status === 'success') {
@@ -457,9 +161,7 @@ export default function MacMiniControlPage() {
} else if (data.error) {
throw new Error(data.error)
}
- } catch (e) {
- // Skip parsing errors for incomplete chunks
- }
+ } catch (e) { /* Skip parsing errors for incomplete chunks */ }
}
}
}
@@ -471,463 +173,37 @@ export default function MacMiniControlPage() {
}
}
- // Format bytes
- const formatBytes = (bytes: number) => {
- if (bytes === 0) return '0 B'
- const k = 1024
- const sizes = ['B', 'KB', 'MB', 'GB', 'TB']
- const i = Math.floor(Math.log(bytes) / Math.log(k))
- return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]
- }
-
- // Status badge styling
- const getStatusBadge = (online: boolean) => {
- return online
- ? 'px-3 py-1 rounded-full text-sm font-semibold bg-green-100 text-green-800'
- : 'px-3 py-1 rounded-full text-sm font-semibold bg-red-100 text-red-800'
- }
-
- const getServiceStatus = (ok: boolean) => {
- return ok
- ? 'flex items-center gap-2 text-green-600'
- : 'flex items-center gap-2 text-red-500'
- }
-
return (
- {/* Power Controls */}
-
-
-
-
🖥️
-
-
Mac Mini Headless
-
IP: {status?.ip || '192.168.178.100'}
-
-
-
- {loading ? 'Laden...' : status?.online ? 'Online' : 'Offline'}
-
-
+
- {/* Power Buttons */}
-
-
- {actionLoading === 'wake' ? '...' : '⚡ Wake on LAN'}
-
-
- {actionLoading === 'restart' ? '...' : '🔄 Neustart'}
-
-
- {actionLoading === 'shutdown' ? '...' : '⏻ Herunterfahren'}
-
-
- {loading ? '...' : '🔍 Status aktualisieren'}
-
+
- {message && {message} }
- {error && {error} }
-
+
- {/* Service Status Grid */}
-
-
-
Ping
-
-
- {status?.ping ? 'Erreichbar' : 'Nicht erreichbar'}
-
-
-
-
SSH
-
-
- {status?.ssh ? 'Verbunden' : 'Getrennt'}
-
-
-
-
Docker
-
-
- {status?.docker ? 'Aktiv' : 'Inaktiv'}
-
-
-
-
Ollama
-
-
- {status?.ollama ? 'Bereit' : 'Nicht bereit'}
-
-
-
-
Uptime
-
- {status?.uptime || '-'}
-
-
-
-
-
- {/* Internet Status Banner */}
-
-
-
-
{status?.internet ? '🌐' : '📴'}
-
-
- Internet: {status?.internet ? 'Verbunden' : 'Offline (Normalbetrieb)'}
-
-
- {status?.internet
- ? 'Mac Mini hat Internet-Zugang. LLM-Downloads und Updates möglich.'
- : 'Mac Mini arbeitet offline. Für bestimmte Aktionen muss Internet aktiviert werden.'}
-
-
-
-
- {status?.internet ? 'Online' : 'Offline'}
-
-
-
- {/* Internet Required Actions - nur anzeigen wenn offline */}
- {!status?.internet && (
-
-
⚠️ Diese Aktionen benötigen Internet:
-
- {INTERNET_REQUIRED_ACTIONS.map((item, idx) => (
-
-
•
-
- {item.action}
- – {item.description}
-
-
- ))}
-
-
- 💡 Tipp: Internet am Router/Switch nur bei Bedarf für den Mac Mini aktivieren.
-
-
- )}
-
-
- {/* Docker Section */}
-
-
-
- 🐳 Docker Container
-
-
-
- {actionLoading === 'docker-up' ? '...' : '▶ Start'}
-
-
- {actionLoading === 'docker-down' ? '...' : '⏹ Stop'}
-
-
-
-
- {status?.containers && status.containers.length > 0 ? (
-
- {status.containers.map((container, idx) => (
-
-
-
- {container.name}
-
-
- {container.ports && (
- {container.ports}
- )}
-
- {container.status}
-
-
-
- ))}
-
- ) : (
-
- {status?.online ? 'Keine Container gefunden' : 'Server nicht erreichbar'}
-
- )}
-
-
- {/* Ollama Section */}
-
-
- 🤖 Ollama LLM Modelle
-
-
- {/* Installed Models */}
- {status?.models && status.models.length > 0 ? (
-
- {status.models.map((model, idx) => {
- const modelInfo = getModelInfo(model.name)
- return (
-
-
-
-
{model.name}
- {modelInfo && (
-
setSelectedModel(model.name)}
- className="text-blue-500 hover:text-blue-700 transition-colors"
- title="Modell-Info anzeigen"
- >
-
-
-
-
- )}
- {modelInfo?.category === 'vision' && (
-
Vision
- )}
-
-
- {model.size}
- {model.modified}
-
-
- )
- })}
-
- ) : (
-
- {status?.ollama ? 'Keine Modelle installiert' : 'Ollama nicht erreichbar'}
-
- )}
-
- {/* Model Info Modal */}
- {selectedModel && (
-
setSelectedModel(null)}>
-
e.stopPropagation()}>
- {(() => {
- const info = getModelInfo(selectedModel)
- if (!info) return
Keine Informationen verfügbar
- return (
- <>
-
-
-
{info.name}
-
-
- {info.category === 'vision' ? '👁️ Vision' : info.category === 'text' ? '📝 Text' : info.category}
-
- {info.size}
-
-
-
setSelectedModel(null)} className="text-slate-400 hover:text-slate-600">
-
-
-
-
-
-
{info.description}
-
-
Geeignet für:
-
- {info.useCases.map((useCase, i) => (
-
- {useCase}
-
- ))}
-
-
- >
- )
- })()}
-
-
- )}
-
- {/* Download New Model */}
-
-
Neues Modell herunterladen
-
- setModelInput(e.target.value)}
- placeholder="z.B. llama3.2, mistral, qwen2.5:14b"
- className="flex-1 px-4 py-2 border border-slate-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-primary-500 focus:border-transparent"
- disabled={actionLoading === 'pull'}
- />
-
- {actionLoading === 'pull' ? 'Lädt...' : 'Herunterladen'}
-
-
-
- {/* Download Progress */}
- {downloadProgress && (
-
-
- {downloadProgress.model}
-
- {formatBytes(downloadProgress.completed)} / {formatBytes(downloadProgress.total)}
-
-
-
-
- {downloadProgress.percent}%
-
-
- )}
-
- {/* Toggle Recommendations */}
-
setShowRecommendations(!showRecommendations)}
- className="mt-4 text-primary-600 hover:text-primary-700 font-medium text-sm flex items-center gap-2"
- >
-
-
-
- {showRecommendations ? 'Empfehlungen ausblenden' : 'Modell-Empfehlungen für Klausurkorrektur & Handschrift anzeigen'}
-
-
-
- {/* Recommendations Section */}
- {showRecommendations && (
-
-
📚 Empfohlene Modelle
-
- {/* Handwriting Recognition */}
-
-
- ✍️ Handschrifterkennung (Vision-Modelle)
-
-
- {RECOMMENDED_MODELS.handwriting.map((rec, idx) => {
- const info = MODEL_DATABASE[rec.model]
- const installed = isModelInstalled(rec.model)
- return (
-
-
-
- {info?.name || rec.model}
- Vision
- {info?.recommended && ⭐ Empfohlen }
- {installed && ✓ Installiert }
-
-
{rec.reason}
-
Größe: {info?.size || 'unbekannt'}
-
- {!installed && (
-
{ setModelInput(rec.model); pullModel() }}
- disabled={actionLoading !== null || !status?.ollama}
- className="ml-4 px-4 py-2 bg-primary-600 text-white text-sm rounded-lg font-medium hover:bg-primary-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
- >
- Installieren
-
- )}
-
- )
- })}
-
-
-
- {/* Grading / Text Analysis */}
-
-
- 📝 Klausurkorrektur (Text-Modelle)
-
-
- {RECOMMENDED_MODELS.grading.map((rec, idx) => {
- const info = MODEL_DATABASE[rec.model]
- const installed = isModelInstalled(rec.model)
- return (
-
-
-
- {info?.name || rec.model}
- Text
- {info?.recommended && ⭐ Empfohlen }
- {installed && ✓ Installiert }
-
-
{rec.reason}
-
Größe: {info?.size || 'unbekannt'}
-
- {!installed && (
-
{ setModelInput(rec.model); pullModel() }}
- disabled={actionLoading !== null || !status?.ollama}
- className="ml-4 px-4 py-2 bg-primary-600 text-white text-sm rounded-lg font-medium hover:bg-primary-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
- >
- Installieren
-
- )}
-
- )
- })}
-
-
-
- {/* Info Box */}
-
-
-
💡
-
-
Tipp: Modell-Kombinationen
-
- Für beste Ergebnisse bei Klausuren mit Handschrift kombiniere ein Vision-Modell (für OCR/Handschrifterkennung)
- mit einem Text-Modell (für Bewertung und Feedback). Beispiel: llama3.2-vision:11b + qwen2.5:14b
-
-
-
-
-
- )}
-
+
{/* Info */}
diff --git a/website/app/admin/mac-mini/types.ts b/website/app/admin/mac-mini/types.ts
new file mode 100644
index 0000000..bd8146e
--- /dev/null
+++ b/website/app/admin/mac-mini/types.ts
@@ -0,0 +1,44 @@
+export interface MacMiniStatus {
+ online: boolean
+ ping: boolean
+ ssh: boolean
+ docker: boolean
+ ollama: boolean
+ internet: boolean
+ ip: string
+ uptime?: string
+ cpu_load?: string
+ memory?: string
+ containers?: ContainerInfo[]
+ models?: ModelInfo[]
+ error?: string
+}
+
+export interface ContainerInfo {
+ name: string
+ status: string
+ ports?: string
+}
+
+export interface ModelInfo {
+ name: string
+ size: string
+ modified: string
+}
+
+export interface DownloadProgress {
+ model: string
+ status: string
+ completed: number
+ total: number
+ percent: number
+}
+
+export interface ModelDescription {
+ name: string
+ category: 'vision' | 'text' | 'code' | 'embedding'
+ size: string
+ description: string
+ useCases: string[]
+ recommended?: boolean
+}
diff --git a/website/app/admin/mail/_components/AISettingsTab.tsx b/website/app/admin/mail/_components/AISettingsTab.tsx
new file mode 100644
index 0000000..b4cc45c
--- /dev/null
+++ b/website/app/admin/mail/_components/AISettingsTab.tsx
@@ -0,0 +1,120 @@
+'use client'
+
+import { useState } from 'react'
+
+export default function AISettingsTab() {
+ const [settings, setSettings] = useState({
+ autoAnalyze: true,
+ autoCreateTasks: true,
+ analysisModel: 'breakpilot-teacher-8b',
+ confidenceThreshold: 0.7,
+ })
+
+ return (
+
+
+
KI-Einstellungen
+
Konfigurieren Sie die automatische E-Mail-Analyse
+
+
+
+ {/* Auto-Analyze */}
+
+
+
Automatische Analyse
+
E-Mails automatisch beim Empfang analysieren
+
+
setSettings({ ...settings, autoAnalyze: !settings.autoAnalyze })}
+ className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors ${
+ settings.autoAnalyze ? 'bg-primary-600' : 'bg-slate-200'
+ }`}
+ >
+
+
+
+
+ {/* Auto-Create Tasks */}
+
+
+
Aufgaben automatisch erstellen
+
Erkannte Fristen als Aufgaben anlegen
+
+
setSettings({ ...settings, autoCreateTasks: !settings.autoCreateTasks })}
+ className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors ${
+ settings.autoCreateTasks ? 'bg-primary-600' : 'bg-slate-200'
+ }`}
+ >
+
+
+
+
+ {/* Model Selection */}
+
+ Analyse-Modell
+ setSettings({ ...settings, analysisModel: e.target.value })}
+ className="w-full md:w-64 px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
+ >
+ BreakPilot Teacher 8B (schnell)
+ BreakPilot Teacher 70B (genau)
+ Llama 3.1 8B Instruct
+
+
+
+ {/* Confidence Threshold */}
+
+
+ Konfidenz-Schwelle: {Math.round(settings.confidenceThreshold * 100)}%
+
+
setSettings({ ...settings, confidenceThreshold: parseFloat(e.target.value) })}
+ className="w-full md:w-64"
+ />
+
+ Mindest-Konfidenz für automatische Aufgabenerstellung
+
+
+
+
+ {/* Sender Classification */}
+
+
Bekannte Absender (Niedersachsen)
+
+ {[
+ { domain: '@mk.niedersachsen.de', type: 'Kultusministerium', priority: 'Hoch' },
+ { domain: '@rlsb.de', type: 'RLSB', priority: 'Hoch' },
+ { domain: '@landesschulbehoerde-nds.de', type: 'Landesschulbehörde', priority: 'Hoch' },
+ { domain: '@nibis.de', type: 'NiBiS', priority: 'Mittel' },
+ { domain: '@schultraeger.de', type: 'Schulträger', priority: 'Mittel' },
+ ].map((sender) => (
+
+
{sender.domain}
+
{sender.type}
+
+ {sender.priority}
+
+
+ ))}
+
+
+
+ )
+}
diff --git a/website/app/admin/mail/_components/AccountsTab.tsx b/website/app/admin/mail/_components/AccountsTab.tsx
new file mode 100644
index 0000000..b1e11f5
--- /dev/null
+++ b/website/app/admin/mail/_components/AccountsTab.tsx
@@ -0,0 +1,155 @@
+'use client'
+
+import { useState } from 'react'
+import type { EmailAccount } from '../types'
+import { API_BASE } from '../constants'
+import AddAccountModal from './AddAccountModal'
+
+export default function AccountsTab({
+ accounts,
+ loading,
+ onRefresh
+}: {
+ accounts: EmailAccount[]
+ loading: boolean
+ onRefresh: () => void
+}) {
+ const [showAddModal, setShowAddModal] = useState(false)
+
+ const testConnection = async (accountId: string) => {
+ try {
+ const res = await fetch(`${API_BASE}/api/v1/mail/accounts/${accountId}/test`, {
+ method: 'POST',
+ })
+ if (res.ok) {
+ alert('Verbindung erfolgreich!')
+ } else {
+ alert('Verbindungsfehler')
+ }
+ } catch (err) {
+ alert('Verbindungsfehler')
+ }
+ }
+
+ const statusColors = {
+ active: 'bg-green-100 text-green-800',
+ inactive: 'bg-gray-100 text-gray-800',
+ error: 'bg-red-100 text-red-800',
+ syncing: 'bg-yellow-100 text-yellow-800',
+ }
+
+ const statusLabels = {
+ active: 'Aktiv',
+ inactive: 'Inaktiv',
+ error: 'Fehler',
+ syncing: 'Synchronisiert...',
+ }
+
+ return (
+
+ {/* Header */}
+
+
+
E-Mail-Konten
+
Verwalten Sie die verbundenen E-Mail-Konten
+
+
setShowAddModal(true)}
+ className="px-4 py-2 text-sm font-medium text-white bg-primary-600 rounded-lg hover:bg-primary-700 flex items-center gap-2"
+ >
+
+
+
+ Konto hinzufügen
+
+
+
+ {/* Loading State */}
+ {loading && (
+
+ )}
+
+ {/* Accounts Grid */}
+ {!loading && (
+
+ {accounts.length === 0 ? (
+
+
+
+
+
Keine E-Mail-Konten
+
Fügen Sie Ihr erstes E-Mail-Konto hinzu.
+
+ ) : (
+ accounts.map((account) => (
+
+
+
+
+
+
+ {account.displayName || account.email}
+
+
{account.email}
+
+
+
+
+ {statusLabels[account.status]}
+
+
testConnection(account.id)}
+ className="p-2 text-slate-400 hover:text-slate-600"
+ title="Verbindung testen"
+ >
+
+
+
+
+
+
+
+
+
+
E-Mails
+
{account.emailCount}
+
+
+
Ungelesen
+
{account.unreadCount}
+
+
+
IMAP
+
{account.imapHost}:{account.imapPort}
+
+
+
Letzte Sync
+
+ {account.lastSync
+ ? new Date(account.lastSync).toLocaleString('de-DE')
+ : 'Nie'}
+
+
+
+
+ ))
+ )}
+
+ )}
+
+ {/* Add Account Modal */}
+ {showAddModal && (
+
setShowAddModal(false)} onSuccess={() => { setShowAddModal(false); onRefresh(); }} />
+ )}
+
+ )
+}
diff --git a/website/app/admin/mail/_components/AddAccountModal.tsx b/website/app/admin/mail/_components/AddAccountModal.tsx
new file mode 100644
index 0000000..fa18338
--- /dev/null
+++ b/website/app/admin/mail/_components/AddAccountModal.tsx
@@ -0,0 +1,185 @@
+'use client'
+
+import { useState } from 'react'
+import { API_BASE } from '../constants'
+
+export default function AddAccountModal({
+ onClose,
+ onSuccess
+}: {
+ onClose: () => void
+ onSuccess: () => void
+}) {
+ const [formData, setFormData] = useState({
+ email: '',
+ displayName: '',
+ imapHost: '',
+ imapPort: 993,
+ smtpHost: '',
+ smtpPort: 587,
+ username: '',
+ password: '',
+ })
+ const [submitting, setSubmitting] = useState(false)
+ const [error, setError] = useState
(null)
+
+ const handleSubmit = async (e: React.FormEvent) => {
+ e.preventDefault()
+ setSubmitting(true)
+ setError(null)
+
+ try {
+ const res = await fetch(`${API_BASE}/api/v1/mail/accounts`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({
+ email: formData.email,
+ display_name: formData.displayName,
+ imap_host: formData.imapHost,
+ imap_port: formData.imapPort,
+ smtp_host: formData.smtpHost,
+ smtp_port: formData.smtpPort,
+ username: formData.username,
+ password: formData.password,
+ }),
+ })
+
+ if (res.ok) {
+ onSuccess()
+ } else {
+ const data = await res.json()
+ setError(data.detail || 'Fehler beim Hinzufügen des Kontos')
+ }
+ } catch (err) {
+ setError('Netzwerkfehler')
+ } finally {
+ setSubmitting(false)
+ }
+ }
+
+ return (
+
+
+
+
E-Mail-Konto hinzufügen
+
+
+
+ {error && (
+ {error}
+ )}
+
+
+
+
+
+ Abbrechen
+
+
+ {submitting ? 'Speichern...' : 'Konto hinzufügen'}
+
+
+
+
+
+ )
+}
diff --git a/website/app/admin/mail/_components/AuditLogTab.tsx b/website/app/admin/mail/_components/AuditLogTab.tsx
new file mode 100644
index 0000000..1a8af97
--- /dev/null
+++ b/website/app/admin/mail/_components/AuditLogTab.tsx
@@ -0,0 +1,56 @@
+'use client'
+
+import { useState } from 'react'
+
+export default function AuditLogTab() {
+ const [logs] = useState([
+ { id: '1', action: 'account_created', user: 'admin@breakpilot.de', timestamp: new Date().toISOString(), details: 'Konto schulleitung@example.de hinzugefügt' },
+ { id: '2', action: 'email_analyzed', user: 'system', timestamp: new Date(Date.now() - 3600000).toISOString(), details: '5 E-Mails analysiert' },
+ { id: '3', action: 'task_created', user: 'system', timestamp: new Date(Date.now() - 7200000).toISOString(), details: 'Aufgabe aus Fristenerkennung erstellt' },
+ ])
+
+ const actionLabels: Record = {
+ account_created: 'Konto erstellt',
+ email_analyzed: 'E-Mail analysiert',
+ task_created: 'Aufgabe erstellt',
+ sync_completed: 'Sync abgeschlossen',
+ }
+
+ return (
+
+
+
Audit-Log
+
Alle Aktionen im Mail-System
+
+
+
+
+
+
+ Zeit
+ Aktion
+ Benutzer
+ Details
+
+
+
+ {logs.map((log) => (
+
+
+ {new Date(log.timestamp).toLocaleString('de-DE')}
+
+
+
+ {actionLabels[log.action] || log.action}
+
+
+ {log.user}
+ {log.details}
+
+ ))}
+
+
+
+
+ )
+}
diff --git a/website/app/admin/mail/_components/OverviewTab.tsx b/website/app/admin/mail/_components/OverviewTab.tsx
new file mode 100644
index 0000000..7bb8312
--- /dev/null
+++ b/website/app/admin/mail/_components/OverviewTab.tsx
@@ -0,0 +1,174 @@
+'use client'
+
+import type { MailStats, SyncStatus } from '../types'
+import { API_BASE } from '../constants'
+
+function StatCard({
+ title,
+ value,
+ subtitle,
+ color = 'blue'
+}: {
+ title: string
+ value: number
+ subtitle?: string
+ color?: 'blue' | 'green' | 'yellow' | 'red'
+}) {
+ const colorClasses = {
+ blue: 'text-blue-600',
+ green: 'text-green-600',
+ yellow: 'text-yellow-600',
+ red: 'text-red-600',
+ }
+
+ return (
+
+
{title}
+
{value.toLocaleString()}
+ {subtitle &&
{subtitle}
}
+
+ )
+}
+
+export default function OverviewTab({
+ stats,
+ syncStatus,
+ loading,
+ onRefresh
+}: {
+ stats: MailStats | null
+ syncStatus: SyncStatus | null
+ loading: boolean
+ onRefresh: () => void
+}) {
+ const triggerSync = async () => {
+ try {
+ await fetch(`${API_BASE}/api/v1/mail/sync/all`, {
+ method: 'POST',
+ })
+ onRefresh()
+ } catch (err) {
+ console.error('Failed to trigger sync:', err)
+ }
+ }
+
+ return (
+
+ {/* Header */}
+
+
+
System-Übersicht
+
Status aller E-Mail-Konten und Aufgaben
+
+
+
+ Aktualisieren
+
+
+ {syncStatus?.running ? 'Synchronisiert...' : 'Alle synchronisieren'}
+
+
+
+
+ {/* Loading State */}
+ {loading && (
+
+ )}
+
+ {/* Stats Grid */}
+ {!loading && stats && (
+ <>
+
+
+
+
+ 0 ? 'red' : 'green'}
+ />
+
+
+ {/* Sync Status */}
+
+
Synchronisierung
+
+ {syncStatus?.running ? (
+ <>
+
+
+ Synchronisiere {syncStatus.accountsInProgress.length} Konto(en)...
+
+ >
+ ) : (
+ <>
+
+
Bereit
+ >
+ )}
+ {stats.lastSyncTime && (
+
+ Letzte Sync: {new Date(stats.lastSyncTime).toLocaleString('de-DE')}
+
+ )}
+
+
+ {syncStatus?.errors && syncStatus.errors.length > 0 && (
+
+
Fehler
+
+ {syncStatus.errors.slice(0, 3).map((error, i) => (
+ {error}
+ ))}
+
+
+ )}
+
+
+ {/* AI Stats */}
+
+
KI-Analyse
+
+
+
Analysiert
+
{stats.aiAnalyzedCount}
+
+
+
Analyse-Rate
+
+ {stats.totalEmails > 0
+ ? `${Math.round((stats.aiAnalyzedCount / stats.totalEmails) * 100)}%`
+ : '0%'}
+
+
+
+
+ >
+ )}
+
+ )
+}
diff --git a/website/app/admin/mail/_components/TemplatesTab.tsx b/website/app/admin/mail/_components/TemplatesTab.tsx
new file mode 100644
index 0000000..5e5b624
--- /dev/null
+++ b/website/app/admin/mail/_components/TemplatesTab.tsx
@@ -0,0 +1,55 @@
+'use client'
+
+import { useState } from 'react'
+
+export default function TemplatesTab() {
+ const [templates] = useState([
+ { id: '1', name: 'Eingangsbestätigung', category: 'Standard', usageCount: 45 },
+ { id: '2', name: 'Terminbestätigung', category: 'Termine', usageCount: 23 },
+ { id: '3', name: 'Elternbrief-Vorlage', category: 'Eltern', usageCount: 67 },
+ ])
+
+ return (
+
+
+
+
E-Mail-Vorlagen
+
Verwalten Sie Antwort-Templates
+
+
+
+
+
+ Vorlage erstellen
+
+
+
+
+
+
+
+ Name
+ Kategorie
+ Verwendet
+ Aktionen
+
+
+
+ {templates.map((template) => (
+
+ {template.name}
+
+ {template.category}
+
+ {template.usageCount}x
+
+ Bearbeiten
+
+
+ ))}
+
+
+
+
+ )
+}
diff --git a/website/app/admin/mail/constants.ts b/website/app/admin/mail/constants.ts
new file mode 100644
index 0000000..d419fa0
--- /dev/null
+++ b/website/app/admin/mail/constants.ts
@@ -0,0 +1,53 @@
+import type { TabId } from './types'
+
+// API Base URL for klausur-service
+export const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086'
+
+// Tab definitions
+export const tabs: { id: TabId; name: string; icon: JSX.Element }[] = [
+ {
+ id: 'overview',
+ name: 'Übersicht',
+ icon: (
+
+
+
+ ),
+ },
+ {
+ id: 'accounts',
+ name: 'Konten',
+ icon: (
+
+
+
+ ),
+ },
+ {
+ id: 'ai-settings',
+ name: 'KI-Einstellungen',
+ icon: (
+
+
+
+ ),
+ },
+ {
+ id: 'templates',
+ name: 'Vorlagen',
+ icon: (
+
+
+
+ ),
+ },
+ {
+ id: 'logs',
+ name: 'Audit-Log',
+ icon: (
+
+
+
+ ),
+ },
+]
diff --git a/website/app/admin/mail/page.tsx b/website/app/admin/mail/page.tsx
index c7a1fd4..bc59830 100644
--- a/website/app/admin/mail/page.tsx
+++ b/website/app/admin/mail/page.tsx
@@ -11,97 +11,14 @@
import { useState, useEffect, useCallback } from 'react'
import AdminLayout from '@/components/admin/AdminLayout'
+import type { MailStats, SyncStatus, EmailAccount, TabId } from './types'
+import { API_BASE, tabs } from './constants'
+import OverviewTab from './_components/OverviewTab'
+import AccountsTab from './_components/AccountsTab'
+import AISettingsTab from './_components/AISettingsTab'
+import TemplatesTab from './_components/TemplatesTab'
+import AuditLogTab from './_components/AuditLogTab'
-// API Base URL for klausur-service
-const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086'
-
-// Types
-interface EmailAccount {
- id: string
- email: string
- displayName: string
- imapHost: string
- imapPort: number
- smtpHost: string
- smtpPort: number
- status: 'active' | 'inactive' | 'error' | 'syncing'
- lastSync: string | null
- emailCount: number
- unreadCount: number
- createdAt: string
-}
-
-interface MailStats {
- totalAccounts: number
- activeAccounts: number
- totalEmails: number
- unreadEmails: number
- totalTasks: number
- pendingTasks: number
- overdueTasks: number
- aiAnalyzedCount: number
- lastSyncTime: string | null
-}
-
-interface SyncStatus {
- running: boolean
- accountsInProgress: string[]
- lastCompleted: string | null
- errors: string[]
-}
-
-// Tab definitions
-type TabId = 'overview' | 'accounts' | 'ai-settings' | 'templates' | 'logs'
-
-const tabs: { id: TabId; name: string; icon: JSX.Element }[] = [
- {
- id: 'overview',
- name: 'Übersicht',
- icon: (
-
-
-
- ),
- },
- {
- id: 'accounts',
- name: 'Konten',
- icon: (
-
-
-
- ),
- },
- {
- id: 'ai-settings',
- name: 'KI-Einstellungen',
- icon: (
-
-
-
- ),
- },
- {
- id: 'templates',
- name: 'Vorlagen',
- icon: (
-
-
-
- ),
- },
- {
- id: 'logs',
- name: 'Audit-Log',
- icon: (
-
-
-
- ),
- },
-]
-
-// Main Component
export default function MailAdminPage() {
const [activeTab, setActiveTab] = useState('overview')
const [stats, setStats] = useState(null)
@@ -241,745 +158,3 @@ export default function MailAdminPage() {
)
}
-
-// ============================================================================
-// Overview Tab
-// ============================================================================
-
-function OverviewTab({
- stats,
- syncStatus,
- loading,
- onRefresh
-}: {
- stats: MailStats | null
- syncStatus: SyncStatus | null
- loading: boolean
- onRefresh: () => void
-}) {
- const triggerSync = async () => {
- try {
- await fetch(`${API_BASE}/api/v1/mail/sync/all`, {
- method: 'POST',
- })
- onRefresh()
- } catch (err) {
- console.error('Failed to trigger sync:', err)
- }
- }
-
- return (
-
- {/* Header */}
-
-
-
System-Übersicht
-
Status aller E-Mail-Konten und Aufgaben
-
-
-
- Aktualisieren
-
-
- {syncStatus?.running ? 'Synchronisiert...' : 'Alle synchronisieren'}
-
-
-
-
- {/* Loading State */}
- {loading && (
-
- )}
-
- {/* Stats Grid */}
- {!loading && stats && (
- <>
-
-
-
-
- 0 ? 'red' : 'green'}
- />
-
-
- {/* Sync Status */}
-
-
Synchronisierung
-
- {syncStatus?.running ? (
- <>
-
-
- Synchronisiere {syncStatus.accountsInProgress.length} Konto(en)...
-
- >
- ) : (
- <>
-
-
Bereit
- >
- )}
- {stats.lastSyncTime && (
-
- Letzte Sync: {new Date(stats.lastSyncTime).toLocaleString('de-DE')}
-
- )}
-
-
- {syncStatus?.errors && syncStatus.errors.length > 0 && (
-
-
Fehler
-
- {syncStatus.errors.slice(0, 3).map((error, i) => (
- {error}
- ))}
-
-
- )}
-
-
- {/* AI Stats */}
-
-
KI-Analyse
-
-
-
Analysiert
-
{stats.aiAnalyzedCount}
-
-
-
Analyse-Rate
-
- {stats.totalEmails > 0
- ? `${Math.round((stats.aiAnalyzedCount / stats.totalEmails) * 100)}%`
- : '0%'}
-
-
-
-
- >
- )}
-
- )
-}
-
-function StatCard({
- title,
- value,
- subtitle,
- color = 'blue'
-}: {
- title: string
- value: number
- subtitle?: string
- color?: 'blue' | 'green' | 'yellow' | 'red'
-}) {
- const colorClasses = {
- blue: 'text-blue-600',
- green: 'text-green-600',
- yellow: 'text-yellow-600',
- red: 'text-red-600',
- }
-
- return (
-
-
{title}
-
{value.toLocaleString()}
- {subtitle &&
{subtitle}
}
-
- )
-}
-
-// ============================================================================
-// Accounts Tab
-// ============================================================================
-
-function AccountsTab({
- accounts,
- loading,
- onRefresh
-}: {
- accounts: EmailAccount[]
- loading: boolean
- onRefresh: () => void
-}) {
- const [showAddModal, setShowAddModal] = useState(false)
-
- const testConnection = async (accountId: string) => {
- try {
- const res = await fetch(`${API_BASE}/api/v1/mail/accounts/${accountId}/test`, {
- method: 'POST',
- })
- if (res.ok) {
- alert('Verbindung erfolgreich!')
- } else {
- alert('Verbindungsfehler')
- }
- } catch (err) {
- alert('Verbindungsfehler')
- }
- }
-
- const statusColors = {
- active: 'bg-green-100 text-green-800',
- inactive: 'bg-gray-100 text-gray-800',
- error: 'bg-red-100 text-red-800',
- syncing: 'bg-yellow-100 text-yellow-800',
- }
-
- const statusLabels = {
- active: 'Aktiv',
- inactive: 'Inaktiv',
- error: 'Fehler',
- syncing: 'Synchronisiert...',
- }
-
- return (
-
- {/* Header */}
-
-
-
E-Mail-Konten
-
Verwalten Sie die verbundenen E-Mail-Konten
-
-
setShowAddModal(true)}
- className="px-4 py-2 text-sm font-medium text-white bg-primary-600 rounded-lg hover:bg-primary-700 flex items-center gap-2"
- >
-
-
-
- Konto hinzufügen
-
-
-
- {/* Loading State */}
- {loading && (
-
- )}
-
- {/* Accounts Grid */}
- {!loading && (
-
- {accounts.length === 0 ? (
-
-
-
-
-
Keine E-Mail-Konten
-
Fügen Sie Ihr erstes E-Mail-Konto hinzu.
-
- ) : (
- accounts.map((account) => (
-
-
-
-
-
-
- {account.displayName || account.email}
-
-
{account.email}
-
-
-
-
- {statusLabels[account.status]}
-
-
testConnection(account.id)}
- className="p-2 text-slate-400 hover:text-slate-600"
- title="Verbindung testen"
- >
-
-
-
-
-
-
-
-
-
-
E-Mails
-
{account.emailCount}
-
-
-
Ungelesen
-
{account.unreadCount}
-
-
-
IMAP
-
{account.imapHost}:{account.imapPort}
-
-
-
Letzte Sync
-
- {account.lastSync
- ? new Date(account.lastSync).toLocaleString('de-DE')
- : 'Nie'}
-
-
-
-
- ))
- )}
-
- )}
-
- {/* Add Account Modal */}
- {showAddModal && (
-
setShowAddModal(false)} onSuccess={() => { setShowAddModal(false); onRefresh(); }} />
- )}
-
- )
-}
-
-function AddAccountModal({
- onClose,
- onSuccess
-}: {
- onClose: () => void
- onSuccess: () => void
-}) {
- const [formData, setFormData] = useState({
- email: '',
- displayName: '',
- imapHost: '',
- imapPort: 993,
- smtpHost: '',
- smtpPort: 587,
- username: '',
- password: '',
- })
- const [submitting, setSubmitting] = useState(false)
- const [error, setError] = useState(null)
-
- const handleSubmit = async (e: React.FormEvent) => {
- e.preventDefault()
- setSubmitting(true)
- setError(null)
-
- try {
- const res = await fetch(`${API_BASE}/api/v1/mail/accounts`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({
- email: formData.email,
- display_name: formData.displayName,
- imap_host: formData.imapHost,
- imap_port: formData.imapPort,
- smtp_host: formData.smtpHost,
- smtp_port: formData.smtpPort,
- username: formData.username,
- password: formData.password,
- }),
- })
-
- if (res.ok) {
- onSuccess()
- } else {
- const data = await res.json()
- setError(data.detail || 'Fehler beim Hinzufügen des Kontos')
- }
- } catch (err) {
- setError('Netzwerkfehler')
- } finally {
- setSubmitting(false)
- }
- }
-
- return (
-
-
-
-
E-Mail-Konto hinzufügen
-
-
-
- {error && (
- {error}
- )}
-
-
-
-
-
- Abbrechen
-
-
- {submitting ? 'Speichern...' : 'Konto hinzufügen'}
-
-
-
-
-
- )
-}
-
-// ============================================================================
-// AI Settings Tab
-// ============================================================================
-
-function AISettingsTab() {
- const [settings, setSettings] = useState({
- autoAnalyze: true,
- autoCreateTasks: true,
- analysisModel: 'breakpilot-teacher-8b',
- confidenceThreshold: 0.7,
- })
-
- return (
-
-
-
KI-Einstellungen
-
Konfigurieren Sie die automatische E-Mail-Analyse
-
-
-
- {/* Auto-Analyze */}
-
-
-
Automatische Analyse
-
E-Mails automatisch beim Empfang analysieren
-
-
setSettings({ ...settings, autoAnalyze: !settings.autoAnalyze })}
- className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors ${
- settings.autoAnalyze ? 'bg-primary-600' : 'bg-slate-200'
- }`}
- >
-
-
-
-
- {/* Auto-Create Tasks */}
-
-
-
Aufgaben automatisch erstellen
-
Erkannte Fristen als Aufgaben anlegen
-
-
setSettings({ ...settings, autoCreateTasks: !settings.autoCreateTasks })}
- className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors ${
- settings.autoCreateTasks ? 'bg-primary-600' : 'bg-slate-200'
- }`}
- >
-
-
-
-
- {/* Model Selection */}
-
- Analyse-Modell
- setSettings({ ...settings, analysisModel: e.target.value })}
- className="w-full md:w-64 px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
- >
- BreakPilot Teacher 8B (schnell)
- BreakPilot Teacher 70B (genau)
- Llama 3.1 8B Instruct
-
-
-
- {/* Confidence Threshold */}
-
-
- Konfidenz-Schwelle: {Math.round(settings.confidenceThreshold * 100)}%
-
-
setSettings({ ...settings, confidenceThreshold: parseFloat(e.target.value) })}
- className="w-full md:w-64"
- />
-
- Mindest-Konfidenz für automatische Aufgabenerstellung
-
-
-
-
- {/* Sender Classification */}
-
-
Bekannte Absender (Niedersachsen)
-
- {[
- { domain: '@mk.niedersachsen.de', type: 'Kultusministerium', priority: 'Hoch' },
- { domain: '@rlsb.de', type: 'RLSB', priority: 'Hoch' },
- { domain: '@landesschulbehoerde-nds.de', type: 'Landesschulbehörde', priority: 'Hoch' },
- { domain: '@nibis.de', type: 'NiBiS', priority: 'Mittel' },
- { domain: '@schultraeger.de', type: 'Schulträger', priority: 'Mittel' },
- ].map((sender) => (
-
-
{sender.domain}
-
{sender.type}
-
- {sender.priority}
-
-
- ))}
-
-
-
- )
-}
-
-// ============================================================================
-// Templates Tab
-// ============================================================================
-
-function TemplatesTab() {
- const [templates] = useState([
- { id: '1', name: 'Eingangsbestätigung', category: 'Standard', usageCount: 45 },
- { id: '2', name: 'Terminbestätigung', category: 'Termine', usageCount: 23 },
- { id: '3', name: 'Elternbrief-Vorlage', category: 'Eltern', usageCount: 67 },
- ])
-
- return (
-
-
-
-
E-Mail-Vorlagen
-
Verwalten Sie Antwort-Templates
-
-
-
-
-
- Vorlage erstellen
-
-
-
-
-
-
-
- Name
- Kategorie
- Verwendet
- Aktionen
-
-
-
- {templates.map((template) => (
-
- {template.name}
-
- {template.category}
-
- {template.usageCount}x
-
- Bearbeiten
-
-
- ))}
-
-
-
-
- )
-}
-
-// ============================================================================
-// Audit Log Tab
-// ============================================================================
-
-function AuditLogTab() {
- const [logs] = useState([
- { id: '1', action: 'account_created', user: 'admin@breakpilot.de', timestamp: new Date().toISOString(), details: 'Konto schulleitung@example.de hinzugefügt' },
- { id: '2', action: 'email_analyzed', user: 'system', timestamp: new Date(Date.now() - 3600000).toISOString(), details: '5 E-Mails analysiert' },
- { id: '3', action: 'task_created', user: 'system', timestamp: new Date(Date.now() - 7200000).toISOString(), details: 'Aufgabe aus Fristenerkennung erstellt' },
- ])
-
- const actionLabels: Record = {
- account_created: 'Konto erstellt',
- email_analyzed: 'E-Mail analysiert',
- task_created: 'Aufgabe erstellt',
- sync_completed: 'Sync abgeschlossen',
- }
-
- return (
-
-
-
Audit-Log
-
Alle Aktionen im Mail-System
-
-
-
-
-
-
- Zeit
- Aktion
- Benutzer
- Details
-
-
-
- {logs.map((log) => (
-
-
- {new Date(log.timestamp).toLocaleString('de-DE')}
-
-
-
- {actionLabels[log.action] || log.action}
-
-
- {log.user}
- {log.details}
-
- ))}
-
-
-
-
- )
-}
diff --git a/website/app/admin/mail/types.ts b/website/app/admin/mail/types.ts
new file mode 100644
index 0000000..c0c3d33
--- /dev/null
+++ b/website/app/admin/mail/types.ts
@@ -0,0 +1,35 @@
+export interface EmailAccount {
+ id: string
+ email: string
+ displayName: string
+ imapHost: string
+ imapPort: number
+ smtpHost: string
+ smtpPort: number
+ status: 'active' | 'inactive' | 'error' | 'syncing'
+ lastSync: string | null
+ emailCount: number
+ unreadCount: number
+ createdAt: string
+}
+
+export interface MailStats {
+ totalAccounts: number
+ activeAccounts: number
+ totalEmails: number
+ unreadEmails: number
+ totalTasks: number
+ pendingTasks: number
+ overdueTasks: number
+ aiAnalyzedCount: number
+ lastSyncTime: string | null
+}
+
+export interface SyncStatus {
+ running: boolean
+ accountsInProgress: string[]
+ lastCompleted: string | null
+ errors: string[]
+}
+
+export type TabId = 'overview' | 'accounts' | 'ai-settings' | 'templates' | 'logs'
diff --git a/website/app/admin/ocr-labeling/_components/ExportTab.tsx b/website/app/admin/ocr-labeling/_components/ExportTab.tsx
new file mode 100644
index 0000000..1b5d938
--- /dev/null
+++ b/website/app/admin/ocr-labeling/_components/ExportTab.tsx
@@ -0,0 +1,113 @@
+'use client'
+
+import { useState } from 'react'
+import type { OCRSession, OCRStats } from '../types'
+
+const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086'
+
+export default function ExportTab({
+ sessions,
+ selectedSession,
+ setSelectedSession,
+ stats,
+ onError,
+}: {
+ sessions: OCRSession[]
+ selectedSession: string | null
+ setSelectedSession: (id: string | null) => void
+ stats: OCRStats | null
+ onError: (msg: string) => void
+}) {
+ const [exportFormat, setExportFormat] = useState<'generic' | 'trocr' | 'llama_vision'>('generic')
+ const [exporting, setExporting] = useState(false)
+ const [exportResult, setExportResult] = useState(null)
+
+ const handleExport = async () => {
+ setExporting(true)
+ try {
+ const res = await fetch(`${API_BASE}/api/v1/ocr-label/export`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({
+ export_format: exportFormat,
+ session_id: selectedSession,
+ }),
+ })
+
+ if (res.ok) {
+ const data = await res.json()
+ setExportResult(data)
+ } else {
+ onError('Export fehlgeschlagen')
+ }
+ } catch (err) {
+ onError('Netzwerkfehler')
+ } finally {
+ setExporting(false)
+ }
+ }
+
+ return (
+
+
+
Training-Daten exportieren
+
+
+ Export-Format
+ setExportFormat(e.target.value as typeof exportFormat)}
+ className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
+ >
+ Generic JSON
+ TrOCR Fine-Tuning
+ Llama Vision Fine-Tuning
+
+
+
+
+ Session (optional)
+ setSelectedSession(e.target.value || null)}
+ className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
+ >
+ Alle Sessions
+ {sessions.map((session) => (
+ {session.name}
+ ))}
+
+
+
+
+ {exporting ? 'Exportiere...' : `${stats?.exportable_items || 0} Samples exportieren`}
+
+
+
+
+ {exportResult && (
+
+
Export-Ergebnis
+
+
+ {exportResult.exported_count} Samples erfolgreich exportiert
+
+
+ Batch: {exportResult.batch_id}
+
+
+
+
{JSON.stringify(exportResult.samples?.slice(0, 3), null, 2)}
+ {(exportResult.samples?.length || 0) > 3 && (
+
... und {exportResult.samples.length - 3} weitere
+ )}
+
+
+ )}
+
+ )
+}
diff --git a/website/app/admin/ocr-labeling/_components/LabelingTab.tsx b/website/app/admin/ocr-labeling/_components/LabelingTab.tsx
new file mode 100644
index 0000000..168fcf2
--- /dev/null
+++ b/website/app/admin/ocr-labeling/_components/LabelingTab.tsx
@@ -0,0 +1,192 @@
+'use client'
+
+import type { OCRItem } from '../types'
+
+const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086'
+
+export default function LabelingTab({
+ queue,
+ currentItem,
+ currentIndex,
+ correctedText,
+ setCorrectedText,
+ onGoToPrev,
+ onGoToNext,
+ onConfirm,
+ onCorrect,
+ onSkip,
+ onSelectItem,
+}: {
+ queue: OCRItem[]
+ currentItem: OCRItem | null
+ currentIndex: number
+ correctedText: string
+ setCorrectedText: (text: string) => void
+ onGoToPrev: () => void
+ onGoToNext: () => void
+ onConfirm: () => void
+ onCorrect: () => void
+ onSkip: () => void
+ onSelectItem: (item: OCRItem, index: number) => void
+}) {
+ return (
+
+ {/* Left: Image Viewer */}
+
+
+
Bild
+
+
+
+
+
+
+
+ {currentIndex + 1} / {queue.length}
+
+
= queue.length - 1}
+ className="p-2 rounded hover:bg-slate-100 disabled:opacity-50"
+ title="Weiter (Pfeiltaste rechts)"
+ >
+
+
+
+
+
+
+
+ {currentItem ? (
+
+
{
+ const target = e.target as HTMLImageElement
+ target.style.display = 'none'
+ }}
+ />
+
+ ) : (
+
+
Keine Bilder in der Warteschlange
+
+ )}
+
+
+ {/* Right: OCR Text & Actions */}
+
+
+ {/* OCR Result */}
+
+
+
OCR-Ergebnis
+ {currentItem?.ocr_confidence && (
+ 0.8
+ ? 'bg-green-100 text-green-800'
+ : currentItem.ocr_confidence > 0.5
+ ? 'bg-yellow-100 text-yellow-800'
+ : 'bg-red-100 text-red-800'
+ }`}>
+ {Math.round(currentItem.ocr_confidence * 100)}% Konfidenz
+
+ )}
+
+
+ {currentItem?.ocr_text || Kein OCR-Text }
+
+
+
+ {/* Correction Input */}
+
+
Korrektur
+ setCorrectedText(e.target.value)}
+ placeholder="Korrigierter Text..."
+ className="w-full h-32 p-3 border border-slate-200 rounded-lg focus:ring-2 focus:ring-primary-500 focus:border-transparent"
+ />
+
+
+ {/* Actions */}
+
+
+
+
+
+ Korrekt (Enter)
+
+
+
+
+
+ Korrektur speichern
+
+
+
+
+
+ Ueberspringen (S)
+
+
+
+ {/* Keyboard Shortcuts */}
+
+
Tastaturkuerzel:
+
Enter = Bestaetigen | S = Ueberspringen
+
Pfeiltasten = Navigation
+
+
+
+
+ {/* Bottom: Queue Preview */}
+
+
Warteschlange ({queue.length} Items)
+
+ {queue.slice(0, 10).map((item, idx) => (
+
onSelectItem(item, idx)}
+ className={`flex-shrink-0 w-24 h-24 rounded-lg overflow-hidden border-2 ${
+ idx === currentIndex
+ ? 'border-primary-500'
+ : 'border-transparent hover:border-slate-300'
+ }`}
+ >
+
+
+ ))}
+ {queue.length > 10 && (
+
+ +{queue.length - 10} mehr
+
+ )}
+
+
+
+ )
+}
diff --git a/website/app/admin/ocr-labeling/_components/SessionsTab.tsx b/website/app/admin/ocr-labeling/_components/SessionsTab.tsx
new file mode 100644
index 0000000..8a6002a
--- /dev/null
+++ b/website/app/admin/ocr-labeling/_components/SessionsTab.tsx
@@ -0,0 +1,160 @@
+'use client'
+
+import { useState } from 'react'
+import type { OCRSession, CreateSessionRequest, OCRModel } from '../types'
+
+const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086'
+
+export default function SessionsTab({
+ sessions,
+ selectedSession,
+ setSelectedSession,
+ onSessionCreated,
+ onError,
+}: {
+ sessions: OCRSession[]
+ selectedSession: string | null
+ setSelectedSession: (id: string | null) => void
+ onSessionCreated: () => void
+ onError: (msg: string) => void
+}) {
+ const [newSession, setNewSession] = useState({
+ name: '',
+ source_type: 'klausur',
+ description: '',
+ ocr_model: 'llama3.2-vision:11b',
+ })
+
+ const createSession = async () => {
+ try {
+ const res = await fetch(`${API_BASE}/api/v1/ocr-label/sessions`, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify(newSession),
+ })
+ if (res.ok) {
+ setNewSession({ name: '', source_type: 'klausur', description: '', ocr_model: 'llama3.2-vision:11b' })
+ onSessionCreated()
+ } else {
+ onError('Session erstellen fehlgeschlagen')
+ }
+ } catch (err) {
+ onError('Netzwerkfehler')
+ }
+ }
+
+ return (
+
+ {/* Create Session */}
+
+
Neue Session erstellen
+
+
+ Name
+ setNewSession(prev => ({ ...prev, name: e.target.value }))}
+ placeholder="z.B. Mathe Klausur Q1 2025"
+ className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
+ />
+
+
+ Typ
+ setNewSession(prev => ({ ...prev, source_type: e.target.value as 'klausur' | 'handwriting_sample' | 'scan' }))}
+ className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
+ >
+ Klausur
+ Handschriftprobe
+ Scan
+
+
+
+
OCR Modell
+
setNewSession(prev => ({ ...prev, ocr_model: e.target.value as OCRModel }))}
+ className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
+ >
+ llama3.2-vision:11b - Vision LLM (Standard)
+ TrOCR - Microsoft Transformer (schnell)
+ PaddleOCR + LLM (4x schneller)
+ Donut - Document Understanding (strukturiert)
+
+
+ {newSession.ocr_model === 'paddleocr' && 'PaddleOCR erkennt Text schnell, LLM strukturiert die Ergebnisse.'}
+ {newSession.ocr_model === 'donut' && 'Speziell fuer Dokumente mit Tabellen und Formularen.'}
+ {newSession.ocr_model === 'trocr' && 'Schnelles Transformer-Modell fuer gedruckten Text.'}
+ {newSession.ocr_model === 'llama3.2-vision:11b' && 'Beste Qualitaet bei Handschrift, aber langsamer.'}
+
+
+
+ Beschreibung
+ setNewSession(prev => ({ ...prev, description: e.target.value }))}
+ placeholder="Optional..."
+ className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
+ />
+
+
+
+ Session erstellen
+
+
+
+ {/* Sessions List */}
+
+
+
Sessions ({sessions.length})
+
+
+ {sessions.map((session) => (
+
setSelectedSession(session.id === selectedSession ? null : session.id)}
+ >
+
+
+
{session.name}
+
+ {session.source_type} | {session.ocr_model}
+
+
+
+
+ {session.labeled_items}/{session.total_items} gelabelt
+
+
+
0 ? (session.labeled_items / session.total_items) * 100 : 0}%`
+ }}
+ />
+
+
+
+ {session.description && (
+
{session.description}
+ )}
+
+ ))}
+ {sessions.length === 0 && (
+
Keine Sessions vorhanden
+ )}
+
+
+
+ )
+}
diff --git a/website/app/admin/ocr-labeling/_components/StatsTab.tsx b/website/app/admin/ocr-labeling/_components/StatsTab.tsx
new file mode 100644
index 0000000..9f64191
--- /dev/null
+++ b/website/app/admin/ocr-labeling/_components/StatsTab.tsx
@@ -0,0 +1,68 @@
+'use client'
+
+import type { OCRStats } from '../types'
+
+export default function StatsTab({ stats }: { stats: OCRStats | null }) {
+ return (
+
+ {/* Global Stats */}
+
+
+
Gesamt Items
+
{stats?.total_items || 0}
+
+
+
Gelabelt
+
{stats?.labeled_items || 0}
+
+
+
Ausstehend
+
{stats?.pending_items || 0}
+
+
+
OCR-Genauigkeit
+
{stats?.accuracy_rate || 0}%
+
+
+
+ {/* Detailed Stats */}
+
+
Details
+
+
+
Bestaetigt
+
{stats?.confirmed_items || 0}
+
+
+
Korrigiert
+
{stats?.corrected_items || 0}
+
+
+
Exportierbar
+
{stats?.exportable_items || 0}
+
+
+
Durchschn. Label-Zeit
+
{stats?.avg_label_time_seconds || 0}s
+
+
+
+
+ {/* Progress Bar */}
+ {stats?.total_items ? (
+
+
Fortschritt
+
+
+ {Math.round((stats.labeled_items / stats.total_items) * 100)}% abgeschlossen
+
+
+ ) : null}
+
+ )
+}
diff --git a/website/app/admin/ocr-labeling/_components/UploadTab.tsx b/website/app/admin/ocr-labeling/_components/UploadTab.tsx
new file mode 100644
index 0000000..af290f8
--- /dev/null
+++ b/website/app/admin/ocr-labeling/_components/UploadTab.tsx
@@ -0,0 +1,152 @@
+'use client'
+
+import { useState, useRef } from 'react'
+import type { OCRSession } from '../types'
+
+const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086'
+
+export default function UploadTab({
+ sessions,
+ selectedSession,
+ setSelectedSession,
+ onUploadComplete,
+ onError,
+}: {
+ sessions: OCRSession[]
+ selectedSession: string | null
+ setSelectedSession: (id: string | null) => void
+ onUploadComplete: () => void
+ onError: (msg: string) => void
+}) {
+ const [uploading, setUploading] = useState(false)
+ const [uploadResults, setUploadResults] = useState
([])
+ const fileInputRef = useRef(null)
+
+ const handleUpload = async (files: FileList) => {
+ if (!selectedSession) {
+ onError('Bitte zuerst eine Session auswaehlen')
+ return
+ }
+
+ setUploading(true)
+ const formData = new FormData()
+ Array.from(files).forEach(file => formData.append('files', file))
+ formData.append('run_ocr', 'true')
+
+ try {
+ const res = await fetch(`${API_BASE}/api/v1/ocr-label/sessions/${selectedSession}/upload`, {
+ method: 'POST',
+ body: formData,
+ })
+
+ if (res.ok) {
+ const data = await res.json()
+ setUploadResults(data.items || [])
+ onUploadComplete()
+ } else {
+ onError('Upload fehlgeschlagen')
+ }
+ } catch (err) {
+ onError('Netzwerkfehler beim Upload')
+ } finally {
+ setUploading(false)
+ }
+ }
+
+ return (
+
+ {/* Session Selection */}
+
+
Session auswaehlen
+ setSelectedSession(e.target.value || null)}
+ className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
+ >
+ -- Session waehlen --
+ {sessions.map((session) => (
+
+ {session.name} ({session.total_items} Items)
+
+ ))}
+
+
+
+ {/* Upload Area */}
+
+
Bilder hochladen
+
{
+ e.preventDefault()
+ e.currentTarget.classList.add('border-primary-500', 'bg-primary-50')
+ }}
+ onDragLeave={(e) => {
+ e.currentTarget.classList.remove('border-primary-500', 'bg-primary-50')
+ }}
+ onDrop={(e) => {
+ e.preventDefault()
+ e.currentTarget.classList.remove('border-primary-500', 'bg-primary-50')
+ if (e.dataTransfer.files.length > 0) {
+ handleUpload(e.dataTransfer.files)
+ }
+ }}
+ >
+
e.target.files && handleUpload(e.target.files)}
+ className="hidden"
+ disabled={!selectedSession}
+ />
+ {uploading ? (
+
+
+
Hochladen & OCR ausfuehren...
+
+ ) : (
+ <>
+
+
+
+
+ Bilder hierher ziehen oder{' '}
+ fileInputRef.current?.click()}
+ disabled={!selectedSession}
+ className="text-primary-600 hover:underline"
+ >
+ auswaehlen
+
+
+
PNG, JPG (max. 10MB pro Bild)
+ >
+ )}
+
+
+
+ {/* Upload Results */}
+ {uploadResults.length > 0 && (
+
+
Upload-Ergebnisse ({uploadResults.length})
+
+ {uploadResults.map((result) => (
+
+ {result.filename}
+
+ {result.ocr_text ? `OCR OK (${Math.round((result.ocr_confidence || 0) * 100)}%)` : 'Kein OCR'}
+
+
+ ))}
+
+
+ )}
+
+ )
+}
diff --git a/website/app/admin/ocr-labeling/constants.tsx b/website/app/admin/ocr-labeling/constants.tsx
new file mode 100644
index 0000000..1fe77c5
--- /dev/null
+++ b/website/app/admin/ocr-labeling/constants.tsx
@@ -0,0 +1,11 @@
+export const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086'
+
+export type TabId = 'labeling' | 'sessions' | 'upload' | 'stats' | 'export'
+
+export const tabs: { id: TabId; name: string; icon: JSX.Element }[] = [
+ { id: 'labeling', name: 'Labeling', icon: },
+ { id: 'sessions', name: 'Sessions', icon: },
+ { id: 'upload', name: 'Upload', icon: },
+ { id: 'stats', name: 'Statistiken', icon: },
+ { id: 'export', name: 'Export', icon: },
+]
diff --git a/website/app/admin/ocr-labeling/page.tsx b/website/app/admin/ocr-labeling/page.tsx
index 69a0e24..210e1f6 100644
--- a/website/app/admin/ocr-labeling/page.tsx
+++ b/website/app/admin/ocr-labeling/page.tsx
@@ -7,70 +7,16 @@
* DSGVO-konform: Alle Verarbeitung lokal auf Mac Mini (Ollama).
*/
-import { useState, useEffect, useCallback, useRef } from 'react'
+import { useState, useEffect, useCallback } from 'react'
import AdminLayout from '@/components/admin/AdminLayout'
-import type {
- OCRSession,
- OCRItem,
- OCRStats,
- TrainingSample,
- CreateSessionRequest,
- OCRModel,
-} from './types'
-
-// API Base URL for klausur-service
-const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086'
-
-// Tab definitions
-type TabId = 'labeling' | 'sessions' | 'upload' | 'stats' | 'export'
-
-const tabs: { id: TabId; name: string; icon: JSX.Element }[] = [
- {
- id: 'labeling',
- name: 'Labeling',
- icon: (
-
-
-
- ),
- },
- {
- id: 'sessions',
- name: 'Sessions',
- icon: (
-
-
-
- ),
- },
- {
- id: 'upload',
- name: 'Upload',
- icon: (
-
-
-
- ),
- },
- {
- id: 'stats',
- name: 'Statistiken',
- icon: (
-
-
-
- ),
- },
- {
- id: 'export',
- name: 'Export',
- icon: (
-
-
-
- ),
- },
-]
+import type { OCRSession, OCRItem, OCRStats } from './types'
+import { API_BASE, tabs } from './constants'
+import type { TabId } from './constants'
+import LabelingTab from './_components/LabelingTab'
+import SessionsTab from './_components/SessionsTab'
+import UploadTab from './_components/UploadTab'
+import StatsTab from './_components/StatsTab'
+import ExportTab from './_components/ExportTab'
export default function OCRLabelingPage() {
const [activeTab, setActiveTab] = useState('labeling')
@@ -85,819 +31,65 @@ export default function OCRLabelingPage() {
const [correctedText, setCorrectedText] = useState('')
const [labelStartTime, setLabelStartTime] = useState(null)
- // Fetch sessions
const fetchSessions = useCallback(async () => {
- try {
- const res = await fetch(`${API_BASE}/api/v1/ocr-label/sessions`)
- if (res.ok) {
- const data = await res.json()
- setSessions(data)
- }
- } catch (err) {
- console.error('Failed to fetch sessions:', err)
- }
+ try { const r = await fetch(`${API_BASE}/api/v1/ocr-label/sessions`); if (r.ok) setSessions(await r.json()) }
+ catch (e) { console.error('Failed to fetch sessions:', e) }
}, [])
- // Fetch queue
const fetchQueue = useCallback(async () => {
try {
- const url = selectedSession
- ? `${API_BASE}/api/v1/ocr-label/queue?session_id=${selectedSession}&limit=20`
- : `${API_BASE}/api/v1/ocr-label/queue?limit=20`
- const res = await fetch(url)
- if (res.ok) {
- const data = await res.json()
- setQueue(data)
- if (data.length > 0 && !currentItem) {
- setCurrentItem(data[0])
- setCurrentIndex(0)
- setCorrectedText(data[0].ocr_text || '')
- setLabelStartTime(Date.now())
- }
+ const url = selectedSession ? `${API_BASE}/api/v1/ocr-label/queue?session_id=${selectedSession}&limit=20` : `${API_BASE}/api/v1/ocr-label/queue?limit=20`
+ const r = await fetch(url)
+ if (r.ok) {
+ const data = await r.json(); setQueue(data)
+ if (data.length > 0 && !currentItem) { setCurrentItem(data[0]); setCurrentIndex(0); setCorrectedText(data[0].ocr_text || ''); setLabelStartTime(Date.now()) }
}
- } catch (err) {
- console.error('Failed to fetch queue:', err)
- }
+ } catch (e) { console.error('Failed to fetch queue:', e) }
}, [selectedSession, currentItem])
- // Fetch stats
const fetchStats = useCallback(async () => {
try {
- const url = selectedSession
- ? `${API_BASE}/api/v1/ocr-label/stats?session_id=${selectedSession}`
- : `${API_BASE}/api/v1/ocr-label/stats`
- const res = await fetch(url)
- if (res.ok) {
- const data = await res.json()
- setStats(data)
- }
- } catch (err) {
- console.error('Failed to fetch stats:', err)
- }
+ const url = selectedSession ? `${API_BASE}/api/v1/ocr-label/stats?session_id=${selectedSession}` : `${API_BASE}/api/v1/ocr-label/stats`
+ const r = await fetch(url); if (r.ok) setStats(await r.json())
+ } catch (e) { console.error('Failed to fetch stats:', e) }
}, [selectedSession])
- // Initial data load
- useEffect(() => {
- const loadData = async () => {
- setLoading(true)
- await Promise.all([fetchSessions(), fetchQueue(), fetchStats()])
- setLoading(false)
- }
- loadData()
- }, [fetchSessions, fetchQueue, fetchStats])
+ useEffect(() => { setLoading(true); Promise.all([fetchSessions(), fetchQueue(), fetchStats()]).then(() => setLoading(false)) }, [fetchSessions, fetchQueue, fetchStats])
+ useEffect(() => { setCurrentItem(null); setCurrentIndex(0); fetchQueue(); fetchStats() }, [selectedSession, fetchQueue, fetchStats])
- // Refresh queue when session changes
- useEffect(() => {
- setCurrentItem(null)
- setCurrentIndex(0)
- fetchQueue()
- fetchStats()
- }, [selectedSession, fetchQueue, fetchStats])
+ const getLabelTime = () => labelStartTime ? Math.round((Date.now() - labelStartTime) / 1000) : undefined
+
+ const setItem = (item: OCRItem, idx: number) => { setCurrentIndex(idx); setCurrentItem(item); setCorrectedText(item.ocr_text || ''); setLabelStartTime(Date.now()) }
- // Navigate to next item
const goToNext = () => {
- if (currentIndex < queue.length - 1) {
- const nextIndex = currentIndex + 1
- setCurrentIndex(nextIndex)
- setCurrentItem(queue[nextIndex])
- setCorrectedText(queue[nextIndex].ocr_text || '')
- setLabelStartTime(Date.now())
- } else {
- // Refresh queue
- fetchQueue()
- }
+ if (currentIndex < queue.length - 1) setItem(queue[currentIndex + 1], currentIndex + 1)
+ else fetchQueue()
+ }
+ const goToPrev = () => { if (currentIndex > 0) setItem(queue[currentIndex - 1], currentIndex - 1) }
+
+ const postAction = async (endpoint: string, body: object) => {
+ const r = await fetch(`${API_BASE}/api/v1/ocr-label/${endpoint}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body) })
+ if (r.ok) { setQueue(prev => prev.filter(i => i.id !== currentItem?.id)); goToNext(); fetchStats() }
+ else setError(`${endpoint} fehlgeschlagen`)
}
- // Navigate to previous item
- const goToPrev = () => {
- if (currentIndex > 0) {
- const prevIndex = currentIndex - 1
- setCurrentIndex(prevIndex)
- setCurrentItem(queue[prevIndex])
- setCorrectedText(queue[prevIndex].ocr_text || '')
- setLabelStartTime(Date.now())
- }
- }
+ const confirmItem = () => { if (currentItem) postAction('confirm', { item_id: currentItem.id, label_time_seconds: getLabelTime() }).catch(() => setError('Netzwerkfehler')) }
+ const correctItem = () => { if (currentItem && correctedText.trim()) postAction('correct', { item_id: currentItem.id, ground_truth: correctedText.trim(), label_time_seconds: getLabelTime() }).catch(() => setError('Netzwerkfehler')) }
+ const skipItem = () => { if (currentItem) postAction('skip', { item_id: currentItem.id }).catch(() => setError('Netzwerkfehler')) }
- // Calculate label time
- const getLabelTime = (): number | undefined => {
- if (!labelStartTime) return undefined
- return Math.round((Date.now() - labelStartTime) / 1000)
- }
-
- // Confirm item
- const confirmItem = async () => {
- if (!currentItem) return
-
- try {
- const res = await fetch(`${API_BASE}/api/v1/ocr-label/confirm`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({
- item_id: currentItem.id,
- label_time_seconds: getLabelTime(),
- }),
- })
-
- if (res.ok) {
- // Remove from queue and go to next
- setQueue(prev => prev.filter(item => item.id !== currentItem.id))
- goToNext()
- fetchStats()
- } else {
- setError('Bestaetigung fehlgeschlagen')
- }
- } catch (err) {
- setError('Netzwerkfehler')
- }
- }
-
- // Correct item
- const correctItem = async () => {
- if (!currentItem || !correctedText.trim()) return
-
- try {
- const res = await fetch(`${API_BASE}/api/v1/ocr-label/correct`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({
- item_id: currentItem.id,
- ground_truth: correctedText.trim(),
- label_time_seconds: getLabelTime(),
- }),
- })
-
- if (res.ok) {
- setQueue(prev => prev.filter(item => item.id !== currentItem.id))
- goToNext()
- fetchStats()
- } else {
- setError('Korrektur fehlgeschlagen')
- }
- } catch (err) {
- setError('Netzwerkfehler')
- }
- }
-
- // Skip item
- const skipItem = async () => {
- if (!currentItem) return
-
- try {
- const res = await fetch(`${API_BASE}/api/v1/ocr-label/skip`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ item_id: currentItem.id }),
- })
-
- if (res.ok) {
- setQueue(prev => prev.filter(item => item.id !== currentItem.id))
- goToNext()
- fetchStats()
- } else {
- setError('Ueberspringen fehlgeschlagen')
- }
- } catch (err) {
- setError('Netzwerkfehler')
- }
- }
-
- // Keyboard shortcuts
useEffect(() => {
- const handleKeyDown = (e: KeyboardEvent) => {
- // Only handle if not in text input
+ const h = (e: KeyboardEvent) => {
if (e.target instanceof HTMLTextAreaElement) return
-
- if (e.key === 'Enter' && !e.shiftKey) {
- e.preventDefault()
- confirmItem()
- } else if (e.key === 'ArrowRight') {
- goToNext()
- } else if (e.key === 'ArrowLeft') {
- goToPrev()
- } else if (e.key === 's' && !e.ctrlKey && !e.metaKey) {
- skipItem()
- }
+ if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); confirmItem() }
+ else if (e.key === 'ArrowRight') goToNext()
+ else if (e.key === 'ArrowLeft') goToPrev()
+ else if (e.key === 's' && !e.ctrlKey && !e.metaKey) skipItem()
}
-
- window.addEventListener('keydown', handleKeyDown)
- return () => window.removeEventListener('keydown', handleKeyDown)
+ window.addEventListener('keydown', h); return () => window.removeEventListener('keydown', h)
}, [currentItem, correctedText])
- // Render Labeling Tab
- const renderLabelingTab = () => (
-
- {/* Left: Image Viewer */}
-
-
-
Bild
-
-
-
-
-
-
-
- {currentIndex + 1} / {queue.length}
-
-
= queue.length - 1}
- className="p-2 rounded hover:bg-slate-100 disabled:opacity-50"
- title="Weiter (Pfeiltaste rechts)"
- >
-
-
-
-
-
-
-
- {currentItem ? (
-
-
{
- // Fallback if image fails to load
- const target = e.target as HTMLImageElement
- target.style.display = 'none'
- }}
- />
-
- ) : (
-
-
Keine Bilder in der Warteschlange
-
- )}
-
-
- {/* Right: OCR Text & Actions */}
-
-
- {/* OCR Result */}
-
-
-
OCR-Ergebnis
- {currentItem?.ocr_confidence && (
- 0.8
- ? 'bg-green-100 text-green-800'
- : currentItem.ocr_confidence > 0.5
- ? 'bg-yellow-100 text-yellow-800'
- : 'bg-red-100 text-red-800'
- }`}>
- {Math.round(currentItem.ocr_confidence * 100)}% Konfidenz
-
- )}
-
-
- {currentItem?.ocr_text || Kein OCR-Text }
-
-
-
- {/* Correction Input */}
-
-
Korrektur
- setCorrectedText(e.target.value)}
- placeholder="Korrigierter Text..."
- className="w-full h-32 p-3 border border-slate-200 rounded-lg focus:ring-2 focus:ring-primary-500 focus:border-transparent"
- />
-
-
- {/* Actions */}
-
-
-
-
-
- Korrekt (Enter)
-
-
-
-
-
- Korrektur speichern
-
-
-
-
-
- Ueberspringen (S)
-
-
-
- {/* Keyboard Shortcuts */}
-
-
Tastaturkuerzel:
-
Enter = Bestaetigen | S = Ueberspringen
-
Pfeiltasten = Navigation
-
-
-
-
- {/* Bottom: Queue Preview */}
-
-
Warteschlange ({queue.length} Items)
-
- {queue.slice(0, 10).map((item, idx) => (
-
{
- setCurrentIndex(idx)
- setCurrentItem(item)
- setCorrectedText(item.ocr_text || '')
- setLabelStartTime(Date.now())
- }}
- className={`flex-shrink-0 w-24 h-24 rounded-lg overflow-hidden border-2 ${
- idx === currentIndex
- ? 'border-primary-500'
- : 'border-transparent hover:border-slate-300'
- }`}
- >
-
-
- ))}
- {queue.length > 10 && (
-
- +{queue.length - 10} mehr
-
- )}
-
-
-
- )
-
- // Render Sessions Tab
- const renderSessionsTab = () => {
- const [newSession, setNewSession] = useState({
- name: '',
- source_type: 'klausur',
- description: '',
- ocr_model: 'llama3.2-vision:11b',
- })
-
- const createSession = async () => {
- try {
- const res = await fetch(`${API_BASE}/api/v1/ocr-label/sessions`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify(newSession),
- })
-
- if (res.ok) {
- setNewSession({ name: '', source_type: 'klausur', description: '', ocr_model: 'llama3.2-vision:11b' })
- fetchSessions()
- } else {
- setError('Session erstellen fehlgeschlagen')
- }
- } catch (err) {
- setError('Netzwerkfehler')
- }
- }
-
- return (
-
- {/* Create Session */}
-
-
Neue Session erstellen
-
-
- Name
- setNewSession(prev => ({ ...prev, name: e.target.value }))}
- placeholder="z.B. Mathe Klausur Q1 2025"
- className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
- />
-
-
- Typ
- setNewSession(prev => ({ ...prev, source_type: e.target.value as 'klausur' | 'handwriting_sample' | 'scan' }))}
- className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
- >
- Klausur
- Handschriftprobe
- Scan
-
-
-
-
OCR Modell
-
setNewSession(prev => ({ ...prev, ocr_model: e.target.value as OCRModel }))}
- className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
- >
- llama3.2-vision:11b - Vision LLM (Standard)
- TrOCR - Microsoft Transformer (schnell)
- PaddleOCR + LLM (4x schneller)
- Donut - Document Understanding (strukturiert)
-
-
- {newSession.ocr_model === 'paddleocr' && 'PaddleOCR erkennt Text schnell, LLM strukturiert die Ergebnisse.'}
- {newSession.ocr_model === 'donut' && 'Speziell fuer Dokumente mit Tabellen und Formularen.'}
- {newSession.ocr_model === 'trocr' && 'Schnelles Transformer-Modell fuer gedruckten Text.'}
- {newSession.ocr_model === 'llama3.2-vision:11b' && 'Beste Qualitaet bei Handschrift, aber langsamer.'}
-
-
-
- Beschreibung
- setNewSession(prev => ({ ...prev, description: e.target.value }))}
- placeholder="Optional..."
- className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
- />
-
-
-
- Session erstellen
-
-
-
- {/* Sessions List */}
-
-
-
Sessions ({sessions.length})
-
-
- {sessions.map((session) => (
-
setSelectedSession(session.id === selectedSession ? null : session.id)}
- >
-
-
-
{session.name}
-
- {session.source_type} | {session.ocr_model}
-
-
-
-
- {session.labeled_items}/{session.total_items} gelabelt
-
-
-
0 ? (session.labeled_items / session.total_items) * 100 : 0}%`
- }}
- />
-
-
-
- {session.description && (
-
{session.description}
- )}
-
- ))}
- {sessions.length === 0 && (
-
Keine Sessions vorhanden
- )}
-
-
-
- )
- }
-
- // Render Upload Tab
- const renderUploadTab = () => {
- const [uploading, setUploading] = useState(false)
- const [uploadResults, setUploadResults] = useState
([])
- const fileInputRef = useRef(null)
-
- const handleUpload = async (files: FileList) => {
- if (!selectedSession) {
- setError('Bitte zuerst eine Session auswaehlen')
- return
- }
-
- setUploading(true)
- const formData = new FormData()
- Array.from(files).forEach(file => formData.append('files', file))
- formData.append('run_ocr', 'true')
-
- try {
- const res = await fetch(`${API_BASE}/api/v1/ocr-label/sessions/${selectedSession}/upload`, {
- method: 'POST',
- body: formData,
- })
-
- if (res.ok) {
- const data = await res.json()
- setUploadResults(data.items || [])
- fetchQueue()
- fetchStats()
- } else {
- setError('Upload fehlgeschlagen')
- }
- } catch (err) {
- setError('Netzwerkfehler beim Upload')
- } finally {
- setUploading(false)
- }
- }
-
- return (
-
- {/* Session Selection */}
-
-
Session auswaehlen
- setSelectedSession(e.target.value || null)}
- className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
- >
- -- Session waehlen --
- {sessions.map((session) => (
-
- {session.name} ({session.total_items} Items)
-
- ))}
-
-
-
- {/* Upload Area */}
-
-
Bilder hochladen
-
{
- e.preventDefault()
- e.currentTarget.classList.add('border-primary-500', 'bg-primary-50')
- }}
- onDragLeave={(e) => {
- e.currentTarget.classList.remove('border-primary-500', 'bg-primary-50')
- }}
- onDrop={(e) => {
- e.preventDefault()
- e.currentTarget.classList.remove('border-primary-500', 'bg-primary-50')
- if (e.dataTransfer.files.length > 0) {
- handleUpload(e.dataTransfer.files)
- }
- }}
- >
-
e.target.files && handleUpload(e.target.files)}
- className="hidden"
- disabled={!selectedSession}
- />
- {uploading ? (
-
-
-
Hochladen & OCR ausfuehren...
-
- ) : (
- <>
-
-
-
-
- Bilder hierher ziehen oder{' '}
- fileInputRef.current?.click()}
- disabled={!selectedSession}
- className="text-primary-600 hover:underline"
- >
- auswaehlen
-
-
-
PNG, JPG (max. 10MB pro Bild)
- >
- )}
-
-
-
- {/* Upload Results */}
- {uploadResults.length > 0 && (
-
-
Upload-Ergebnisse ({uploadResults.length})
-
- {uploadResults.map((result) => (
-
- {result.filename}
-
- {result.ocr_text ? `OCR OK (${Math.round((result.ocr_confidence || 0) * 100)}%)` : 'Kein OCR'}
-
-
- ))}
-
-
- )}
-
- )
- }
-
- // Render Stats Tab
- const renderStatsTab = () => (
-
- {/* Global Stats */}
-
-
-
Gesamt Items
-
{stats?.total_items || 0}
-
-
-
Gelabelt
-
{stats?.labeled_items || 0}
-
-
-
Ausstehend
-
{stats?.pending_items || 0}
-
-
-
OCR-Genauigkeit
-
{stats?.accuracy_rate || 0}%
-
-
-
- {/* Detailed Stats */}
-
-
Details
-
-
-
Bestaetigt
-
{stats?.confirmed_items || 0}
-
-
-
Korrigiert
-
{stats?.corrected_items || 0}
-
-
-
Exportierbar
-
{stats?.exportable_items || 0}
-
-
-
Durchschn. Label-Zeit
-
{stats?.avg_label_time_seconds || 0}s
-
-
-
-
- {/* Progress Bar */}
- {stats?.total_items ? (
-
-
Fortschritt
-
-
- {Math.round((stats.labeled_items / stats.total_items) * 100)}% abgeschlossen
-
-
- ) : null}
-
- )
-
- // Render Export Tab
- const renderExportTab = () => {
- const [exportFormat, setExportFormat] = useState<'generic' | 'trocr' | 'llama_vision'>('generic')
- const [exporting, setExporting] = useState(false)
- const [exportResult, setExportResult] = useState(null)
-
- const handleExport = async () => {
- setExporting(true)
- try {
- const res = await fetch(`${API_BASE}/api/v1/ocr-label/export`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({
- export_format: exportFormat,
- session_id: selectedSession,
- }),
- })
-
- if (res.ok) {
- const data = await res.json()
- setExportResult(data)
- } else {
- setError('Export fehlgeschlagen')
- }
- } catch (err) {
- setError('Netzwerkfehler')
- } finally {
- setExporting(false)
- }
- }
-
- return (
-
-
-
Training-Daten exportieren
-
-
- Export-Format
- setExportFormat(e.target.value as typeof exportFormat)}
- className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
- >
- Generic JSON
- TrOCR Fine-Tuning
- Llama Vision Fine-Tuning
-
-
-
-
- Session (optional)
- setSelectedSession(e.target.value || null)}
- className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-primary-500"
- >
- Alle Sessions
- {sessions.map((session) => (
- {session.name}
- ))}
-
-
-
-
- {exporting ? 'Exportiere...' : `${stats?.exportable_items || 0} Samples exportieren`}
-
-
-
-
- {exportResult && (
-
-
Export-Ergebnis
-
-
- {exportResult.exported_count} Samples erfolgreich exportiert
-
-
- Batch: {exportResult.batch_id}
-
-
-
-
{JSON.stringify(exportResult.samples?.slice(0, 3), null, 2)}
- {(exportResult.samples?.length || 0) > 3 && (
-
... und {exportResult.samples.length - 3} weitere
- )}
-
-
- )}
-
- )
- }
-
return (
-
- {/* Error Toast */}
+
{error && (
{error}
@@ -905,40 +97,25 @@ export default function OCRLabelingPage() {
)}
- {/* Tabs */}
-
-
-
- {tabs.map((tab) => (
- setActiveTab(tab.id)}
- className={`flex items-center gap-2 px-4 py-3 border-b-2 font-medium text-sm transition-colors ${
- activeTab === tab.id
- ? 'border-primary-500 text-primary-600'
- : 'border-transparent text-slate-500 hover:text-slate-700 hover:border-slate-300'
- }`}
- >
- {tab.icon}
- {tab.name}
-
- ))}
-
-
+
+
+ {tabs.map((tab) => (
+ setActiveTab(tab.id)} className={`flex items-center gap-2 px-4 py-3 border-b-2 font-medium text-sm transition-colors ${activeTab === tab.id ? 'border-primary-500 text-primary-600' : 'border-transparent text-slate-500 hover:text-slate-700 hover:border-slate-300'}`}>
+ {tab.icon}{tab.name}
+
+ ))}
+
- {/* Tab Content */}
{loading ? (
-
+
) : (
<>
- {activeTab === 'labeling' && renderLabelingTab()}
- {activeTab === 'sessions' && renderSessionsTab()}
- {activeTab === 'upload' && renderUploadTab()}
- {activeTab === 'stats' && renderStatsTab()}
- {activeTab === 'export' && renderExportTab()}
+ {activeTab === 'labeling' &&
}
+ {activeTab === 'sessions' &&
}
+ {activeTab === 'upload' &&
{ fetchQueue(); fetchStats() }} onError={setError} />}
+ {activeTab === 'stats' && }
+ {activeTab === 'export' && }
>
)}