[split-required] Split remaining 500-680 LOC files (final batch)

website (17 pages + 3 components):
- multiplayer/wizard, middleware/wizard+test-wizard, communication
- builds/wizard, staff-search, voice, sbom/wizard
- foerderantrag, mail/tasks, tools/communication, sbom
- compliance/evidence, uni-crawler, brandbook (already done)
- CollectionsTab, IngestionTab, RiskHeatmap

backend-lehrer (5 files):
- letters_api (641 → 2), certificates_api (636 → 2)
- alerts_agent/db/models (636 → 3)
- llm_gateway/communication_service (614 → 2)
- game/database already done in prior batch

klausur-service (2 files):
- hybrid_vocab_extractor (664 → 2)
- klausur-service/frontend: api.ts (620 → 3), EHUploadWizard (591 → 2)

voice-service (3 files):
- bqas/rag_judge (618 → 3), runner (529 → 2)
- enhanced_task_orchestrator (519 → 2)

studio-v2 (6 files):
- korrektur/[klausurId] (578 → 4), fairness (569 → 2)
- AlertsWizard (552 → 2), OnboardingWizard (513 → 2)
- korrektur/api.ts (506 → 3), geo-lernwelt (501 → 2)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:56:45 +02:00
parent b4613e26f3
commit 451365a312
115 changed files with 10694 additions and 13839 deletions

View File

@@ -1,425 +1,42 @@
"""
Hybrid OCR + LLM Vocabulary Extractor
Zweistufiger Ansatz fuer optimale Vokabel-Extraktion:
1. PaddleOCR fuer schnelle, praezise Texterkennung mit Bounding-Boxes
2. qwen2.5:14b (via LLM Gateway) fuer semantische Strukturierung
Split into:
- hybrid_vocab_ocr.py: PaddleOCR integration, parsing, row/column detection
- hybrid_vocab_extractor.py (this file): LLM structuring, public API, barrel re-exports
Vorteile gegenueber reinem Vision LLM:
- 4x schneller (~7-15 Sek vs 30-60 Sek pro Seite)
- Hoehere Genauigkeit bei gedrucktem Text (95-99%)
- Weniger Halluzinationen (LLM korrigiert nur, erfindet nicht)
- Position-basierte Spaltenerkennung moeglich
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal (Mac Mini).
All symbols re-exported for backward compatibility.
"""
import os
import io
import json
import logging
import re
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass
import uuid
from typing import List, Dict, Any, Tuple
import httpx
import numpy as np
from PIL import Image
# OpenCV is optional - only required for actual image processing
try:
import cv2
CV2_AVAILABLE = True
except ImportError:
cv2 = None
CV2_AVAILABLE = False
# Re-export everything from ocr module for backward compatibility
from hybrid_vocab_ocr import (
OCRRegion,
get_paddle_ocr,
preprocess_image,
run_paddle_ocr,
group_regions_by_rows,
detect_columns,
format_ocr_for_llm,
)
logger = logging.getLogger(__name__)
# Configuration - Use Ollama directly (no separate LLM Gateway)
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
LLM_MODEL = os.getenv("LLM_MODEL", "qwen2.5:14b")
# PaddleOCR - Lazy loading
_paddle_ocr = None
def get_paddle_ocr():
"""
Lazy load PaddleOCR to avoid startup delay.
PaddleOCR 3.x API (released May 2025):
- Only 'lang' parameter confirmed valid
- Removed parameters: use_gpu, device, show_log, det, rec, use_onnx
- GPU/CPU selection is automatic
"""
global _paddle_ocr
if _paddle_ocr is None:
try:
from paddleocr import PaddleOCR
import logging as std_logging
# Suppress verbose logging from PaddleOCR and PaddlePaddle
for logger_name in ['ppocr', 'paddle', 'paddleocr', 'root']:
std_logging.getLogger(logger_name).setLevel(std_logging.WARNING)
# PaddleOCR 3.x: Only use 'lang' parameter
# Try German first, then English, then minimal
try:
_paddle_ocr = PaddleOCR(lang="de")
logger.info("PaddleOCR 3.x initialized (lang=de)")
except Exception as e1:
logger.warning(f"PaddleOCR lang=de failed: {e1}")
try:
_paddle_ocr = PaddleOCR(lang="en")
logger.info("PaddleOCR 3.x initialized (lang=en)")
except Exception as e2:
logger.warning(f"PaddleOCR lang=en failed: {e2}")
_paddle_ocr = PaddleOCR()
logger.info("PaddleOCR 3.x initialized (defaults)")
except Exception as e:
logger.error(f"PaddleOCR initialization failed: {e}")
_paddle_ocr = None
return _paddle_ocr
@dataclass
class OCRRegion:
"""Ein erkannter Textbereich mit Position."""
text: str
confidence: float
x1: int
y1: int
x2: int
y2: int
@property
def center_x(self) -> int:
return (self.x1 + self.x2) // 2
@property
def center_y(self) -> int:
return (self.y1 + self.y2) // 2
# =============================================================================
# OCR Pipeline
# =============================================================================
def preprocess_image(img: Image.Image) -> np.ndarray:
"""
Bildvorverarbeitung fuer bessere OCR-Ergebnisse.
- Konvertierung zu RGB
- Optional: Kontrastverstarkung
Raises:
ImportError: If OpenCV is not available
"""
if not CV2_AVAILABLE:
raise ImportError(
"OpenCV (cv2) is required for image preprocessing. "
"Install with: pip install opencv-python-headless"
)
# PIL zu numpy array
img_array = np.array(img)
# Zu RGB konvertieren falls noetig
if len(img_array.shape) == 2:
# Graustufen zu RGB
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
elif img_array.shape[2] == 4:
# RGBA zu RGB
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
return img_array
def run_paddle_ocr(image_bytes: bytes) -> Tuple[List[OCRRegion], str]:
"""
Fuehrt PaddleOCR auf einem Bild aus.
PaddleOCR 3.x returns results in format:
- result = ocr.ocr(img) returns list of pages
- Each page contains list of text lines
- Each line: [bbox_points, (text, confidence)]
Returns:
Tuple of (list of OCRRegion, raw_text)
"""
ocr = get_paddle_ocr()
if ocr is None:
logger.error("PaddleOCR not available")
return [], ""
try:
# Bild laden und vorverarbeiten
img = Image.open(io.BytesIO(image_bytes))
img_array = preprocess_image(img)
# OCR ausfuehren - PaddleOCR 3.x API
# Note: cls parameter may not be supported in 3.x, try without it
try:
result = ocr.ocr(img_array)
except TypeError:
# Fallback if ocr() doesn't accept the array directly
logger.warning("Trying alternative OCR call method")
result = ocr.ocr(img_array)
if not result:
logger.warning("PaddleOCR returned empty result")
return [], ""
# Handle different result formats
# PaddleOCR 3.x returns list of OCRResult objects (dict-like)
if isinstance(result, dict):
# Direct dict format with 'rec_texts', 'rec_scores', 'dt_polys'
logger.info("Processing PaddleOCR 3.x dict format")
return _parse_paddleocr_v3_dict(result)
elif isinstance(result, list) and len(result) > 0:
first_item = result[0]
if first_item is None:
logger.warning("PaddleOCR returned None for first page")
return [], ""
# PaddleOCR 3.x: list contains OCRResult objects (dict-like)
# Check if first item has 'rec_texts' key (new format)
if hasattr(first_item, 'get') or isinstance(first_item, dict):
# Try to extract dict keys for new 3.x format
item_dict = dict(first_item) if hasattr(first_item, 'items') else first_item
if 'rec_texts' in item_dict or 'texts' in item_dict:
logger.info("Processing PaddleOCR 3.x OCRResult format")
return _parse_paddleocr_v3_dict(item_dict)
# Check if first item is a list (traditional format)
if isinstance(first_item, list):
# Check if it's the traditional line format [[bbox, (text, conf)], ...]
if len(first_item) > 0 and isinstance(first_item[0], (list, tuple)):
logger.info("Processing PaddleOCR traditional list format")
return _parse_paddleocr_list(first_item)
# Unknown format - try to inspect
logger.warning(f"Unknown result format. Type: {type(first_item)}, Keys: {dir(first_item) if hasattr(first_item, '__dir__') else 'N/A'}")
# Try dict conversion as last resort
try:
item_dict = dict(first_item)
if 'rec_texts' in item_dict:
return _parse_paddleocr_v3_dict(item_dict)
except Exception as e:
logger.warning(f"Could not convert to dict: {e}")
return [], ""
else:
logger.warning(f"Unexpected PaddleOCR result type: {type(result)}")
return [], ""
except Exception as e:
logger.error(f"PaddleOCR execution failed: {e}")
import traceback
logger.error(traceback.format_exc())
return [], ""
def _parse_paddleocr_v3_dict(result: dict) -> Tuple[List[OCRRegion], str]:
"""Parse PaddleOCR 3.x dict format result."""
regions = []
all_text_lines = []
texts = result.get('rec_texts', result.get('texts', []))
scores = result.get('rec_scores', result.get('scores', []))
polys = result.get('dt_polys', result.get('boxes', []))
# Also try rec_boxes which gives direct [x1, y1, x2, y2] format
rec_boxes = result.get('rec_boxes', [])
logger.info(f"PaddleOCR 3.x: {len(texts)} texts, {len(scores)} scores, {len(polys)} polys, {len(rec_boxes)} rec_boxes")
for i, (text, score) in enumerate(zip(texts, scores)):
if not text or not str(text).strip():
continue
# Try to get bounding box - prefer rec_boxes if available
x1, y1, x2, y2 = 0, 0, 100, 50 # Default fallback
if i < len(rec_boxes) and rec_boxes[i] is not None:
# rec_boxes format: [x1, y1, x2, y2] or [[x1, y1, x2, y2]]
box = rec_boxes[i]
try:
if hasattr(box, 'flatten'):
box = box.flatten().tolist()
if len(box) >= 4:
x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
except Exception as e:
logger.debug(f"Could not parse rec_box: {e}")
elif i < len(polys) and polys[i] is not None:
# dt_polys format: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] or numpy array
poly = polys[i]
try:
# Convert numpy array to list if needed
if hasattr(poly, 'tolist'):
poly = poly.tolist()
if len(poly) >= 4:
x_coords = [p[0] for p in poly]
y_coords = [p[1] for p in poly]
x1, y1 = int(min(x_coords)), int(min(y_coords))
x2, y2 = int(max(x_coords)), int(max(y_coords))
except Exception as e:
logger.debug(f"Could not parse polygon: {e}")
region = OCRRegion(
text=text.strip(),
confidence=float(score) if score else 0.5,
x1=x1, y1=y1, x2=x2, y2=y2
)
regions.append(region)
all_text_lines.append(text.strip())
regions.sort(key=lambda r: r.y1)
raw_text = "\n".join(all_text_lines)
logger.info(f"PaddleOCR 3.x extracted {len(regions)} text regions")
return regions, raw_text
def _parse_paddleocr_list(page_result: list) -> Tuple[List[OCRRegion], str]:
"""Parse PaddleOCR traditional list format result."""
regions = []
all_text_lines = []
for line in page_result:
if not line or len(line) < 2:
continue
bbox_points = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
text_info = line[1]
# Handle different text_info formats
if isinstance(text_info, tuple) and len(text_info) >= 2:
text, confidence = text_info[0], text_info[1]
elif isinstance(text_info, str):
text, confidence = text_info, 0.5
else:
continue
if not text or not text.strip():
continue
# Bounding Box extrahieren
x_coords = [p[0] for p in bbox_points]
y_coords = [p[1] for p in bbox_points]
region = OCRRegion(
text=text.strip(),
confidence=float(confidence),
x1=int(min(x_coords)),
y1=int(min(y_coords)),
x2=int(max(x_coords)),
y2=int(max(y_coords))
)
regions.append(region)
all_text_lines.append(text.strip())
# Regionen nach Y-Position sortieren (oben nach unten)
regions.sort(key=lambda r: r.y1)
raw_text = "\n".join(all_text_lines)
logger.info(f"PaddleOCR extracted {len(regions)} text regions")
return regions, raw_text
def group_regions_by_rows(regions: List[OCRRegion], y_tolerance: int = 20) -> List[List[OCRRegion]]:
"""
Gruppiert Textregionen in Zeilen basierend auf Y-Position.
Args:
regions: Liste von OCRRegion
y_tolerance: Max Y-Differenz um zur gleichen Zeile zu gehoeren
Returns:
Liste von Zeilen, jede Zeile ist eine Liste von OCRRegion sortiert nach X
"""
if not regions:
return []
rows = []
current_row = [regions[0]]
current_y = regions[0].center_y
for region in regions[1:]:
if abs(region.center_y - current_y) <= y_tolerance:
# Gleiche Zeile
current_row.append(region)
else:
# Neue Zeile
# Sortiere aktuelle Zeile nach X
current_row.sort(key=lambda r: r.x1)
rows.append(current_row)
current_row = [region]
current_y = region.center_y
# Letzte Zeile nicht vergessen
if current_row:
current_row.sort(key=lambda r: r.x1)
rows.append(current_row)
return rows
def detect_columns(rows: List[List[OCRRegion]]) -> int:
"""
Erkennt die Anzahl der Spalten basierend auf den Textpositionen.
Returns:
Geschaetzte Spaltenanzahl (2 oder 3 fuer Vokabellisten)
"""
if not rows:
return 2
# Zaehle wie viele Elemente pro Zeile
items_per_row = [len(row) for row in rows if len(row) >= 2]
if not items_per_row:
return 2
# Durchschnitt und haeufigster Wert
avg_items = sum(items_per_row) / len(items_per_row)
if avg_items >= 2.5:
return 3 # 3 Spalten: Englisch | Deutsch | Beispiel
else:
return 2 # 2 Spalten: Englisch | Deutsch
def format_ocr_for_llm(regions: List[OCRRegion]) -> str:
"""
Formatiert OCR-Output fuer LLM-Verarbeitung.
Inkludiert Positionsinformationen fuer bessere Strukturerkennung.
"""
rows = group_regions_by_rows(regions)
num_columns = detect_columns(rows)
lines = []
lines.append(f"Erkannte Spalten: {num_columns}")
lines.append("---")
for row in rows:
if len(row) >= 2:
# Tab-separierte Werte fuer LLM
row_text = "\t".join(r.text for r in row)
lines.append(row_text)
elif len(row) == 1:
lines.append(row[0].text)
return "\n".join(lines)
# =============================================================================
# LLM Strukturierung
# =============================================================================
STRUCTURE_PROMPT = """Du erhältst OCR-Output einer Vokabelliste aus einem englischen Schulbuch.
STRUCTURE_PROMPT = """Du erhaeltst OCR-Output einer Vokabelliste aus einem englischen Schulbuch.
Die Zeilen sind Tab-separiert und enthalten typischerweise:
- 2 Spalten: Englisch | Deutsch
- 3 Spalten: Englisch | Deutsch | Beispielsatz
@@ -429,7 +46,7 @@ OCR-Text:
AUFGABE: Strukturiere die Vokabeln als JSON-Array.
AUSGABE-FORMAT (nur JSON, keine Erklärungen):
AUSGABE-FORMAT (nur JSON, keine Erklaerungen):
{{
"vocabulary": [
{{"english": "to improve", "german": "verbessern", "example": "I want to improve my English."}},
@@ -439,50 +56,32 @@ AUSGABE-FORMAT (nur JSON, keine Erklärungen):
REGELN:
1. Erkenne das Spalten-Layout aus den Tab-Trennungen
2. Korrigiere offensichtliche OCR-Fehler kontextuell (z.B. "vereessern" "verbessern", "0" "o")
3. Bei fehlenden Beispielsätzen: "example": null
4. Überspringe Überschriften, Seitenzahlen, Kapitelnummern
2. Korrigiere offensichtliche OCR-Fehler kontextuell (z.B. "vereessern" -> "verbessern", "0" -> "o")
3. Bei fehlenden Beispielsaetzen: "example": null
4. Ueberspringe Ueberschriften, Seitenzahlen, Kapitelnummern
5. Behalte Wortarten bei wenn vorhanden (n, v, adj am Ende des englischen Worts)
6. Gib NUR valides JSON zurück"""
6. Gib NUR valides JSON zurueck"""
async def structure_vocabulary_with_llm(ocr_text: str) -> List[Dict[str, Any]]:
"""
Verwendet Ollama LLM um OCR-Text zu strukturieren.
Args:
ocr_text: Formatierter OCR-Output
Returns:
Liste von Vokabel-Dictionaries
"""
"""Verwendet Ollama LLM um OCR-Text zu strukturieren."""
prompt = STRUCTURE_PROMPT.format(ocr_text=ocr_text)
try:
async with httpx.AsyncClient(timeout=120.0) as client:
# Use Ollama's native /api/chat endpoint
response = await client.post(
f"{OLLAMA_URL}/api/chat",
json={
"model": LLM_MODEL,
"messages": [
{"role": "user", "content": prompt}
],
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"options": {
"temperature": 0.1,
"num_predict": 4096
}
"options": {"temperature": 0.1, "num_predict": 4096}
}
)
response.raise_for_status()
data = response.json()
content = data.get("message", {}).get("content", "")
logger.info(f"Ollama LLM response received: {len(content)} chars")
# JSON parsen
return parse_llm_vocabulary_json(content)
except httpx.TimeoutException:
@@ -499,37 +98,29 @@ async def structure_vocabulary_with_llm(ocr_text: str) -> List[Dict[str, Any]]:
def parse_llm_vocabulary_json(text: str) -> List[Dict[str, Any]]:
"""Robustes JSON-Parsing des LLM-Outputs."""
try:
# JSON im Text finden
start = text.find('{')
end = text.rfind('}') + 1
if start == -1 or end == 0:
logger.warning("No JSON found in LLM response")
return []
json_str = text[start:end]
data = json.loads(json_str)
vocabulary = data.get("vocabulary", [])
# Validierung
valid_entries = []
for entry in vocabulary:
english = entry.get("english", "").strip()
german = entry.get("german", "").strip()
if english and german:
valid_entries.append({
"english": english,
"german": german,
"english": english, "german": german,
"example": entry.get("example")
})
return valid_entries
except json.JSONDecodeError as e:
logger.error(f"JSON parse error: {e}")
# Fallback: Regex extraction
return extract_vocabulary_regex(text)
except Exception as e:
logger.error(f"Vocabulary parsing failed: {e}")
@@ -544,11 +135,8 @@ def extract_vocabulary_regex(text: str) -> List[Dict[str, Any]]:
vocabulary = []
for english, german in matches:
vocabulary.append({
"english": english.strip(),
"german": german.strip(),
"example": None
"english": english.strip(), "german": german.strip(), "example": None
})
logger.info(f"Regex fallback extracted {len(vocabulary)} entries")
return vocabulary
@@ -558,46 +146,29 @@ def extract_vocabulary_regex(text: str) -> List[Dict[str, Any]]:
# =============================================================================
async def extract_vocabulary_hybrid(
image_bytes: bytes,
page_number: int = 0
image_bytes: bytes, page_number: int = 0
) -> Tuple[List[Dict[str, Any]], float, str]:
"""
Hybrid-Extraktion: PaddleOCR + LLM Strukturierung.
Args:
image_bytes: Bild als Bytes
page_number: Seitennummer (0-indexed) fuer Fehlermeldungen
Returns:
Tuple of (vocabulary_list, confidence, error_message)
"""
"""Hybrid-Extraktion: PaddleOCR + LLM Strukturierung."""
try:
# Step 1: PaddleOCR
logger.info(f"Starting hybrid extraction for page {page_number + 1}")
regions, raw_text = run_paddle_ocr(image_bytes)
if not regions:
return [], 0.0, f"Seite {page_number + 1}: Kein Text erkannt (OCR)"
# Step 2: Formatieren fuer LLM
formatted_text = format_ocr_for_llm(regions)
logger.info(f"Formatted OCR text: {len(formatted_text)} chars")
# Step 3: LLM Strukturierung
vocabulary = await structure_vocabulary_with_llm(formatted_text)
if not vocabulary:
# Fallback: Versuche direkte Zeilen-Analyse
vocabulary = extract_from_rows_directly(regions)
if not vocabulary:
return [], 0.0, f"Seite {page_number + 1}: Keine Vokabeln erkannt"
# Durchschnittliche OCR-Confidence
avg_confidence = sum(r.confidence for r in regions) / len(regions) if regions else 0.0
logger.info(f"Hybrid extraction completed: {len(vocabulary)} entries, {avg_confidence:.2f} confidence")
return vocabulary, avg_confidence, ""
except Exception as e:
@@ -608,10 +179,7 @@ async def extract_vocabulary_hybrid(
def extract_from_rows_directly(regions: List[OCRRegion]) -> List[Dict[str, Any]]:
"""
Direkter Fallback: Extrahiere Vokabeln ohne LLM basierend auf Zeilen-Struktur.
Funktioniert nur bei klarem 2-3 Spalten-Layout.
"""
"""Direkter Fallback: Extrahiere Vokabeln ohne LLM."""
rows = group_regions_by_rows(regions)
vocabulary = []
@@ -620,13 +188,9 @@ def extract_from_rows_directly(regions: List[OCRRegion]) -> List[Dict[str, Any]]
english = row[0].text.strip()
german = row[1].text.strip()
example = row[2].text.strip() if len(row) >= 3 else None
# Einfache Validierung
if english and german and len(english) > 1 and len(german) > 1:
vocabulary.append({
"english": english,
"german": german,
"example": example
"english": english, "german": german, "example": example
})
logger.info(f"Direct row extraction: {len(vocabulary)} entries")

View File

@@ -0,0 +1,300 @@
"""
Hybrid Vocab OCR - PaddleOCR integration and result parsing.
Handles:
- PaddleOCR lazy loading and initialization
- Running OCR on image bytes
- Parsing PaddleOCR v3 dict and traditional list formats
- Grouping regions by rows and detecting columns
"""
import io
import logging
from typing import List, Tuple
from dataclasses import dataclass
import numpy as np
from PIL import Image
# OpenCV is optional
try:
import cv2
CV2_AVAILABLE = True
except ImportError:
cv2 = None
CV2_AVAILABLE = False
logger = logging.getLogger(__name__)
_paddle_ocr = None
@dataclass
class OCRRegion:
"""Ein erkannter Textbereich mit Position."""
text: str
confidence: float
x1: int
y1: int
x2: int
y2: int
@property
def center_x(self) -> int:
return (self.x1 + self.x2) // 2
@property
def center_y(self) -> int:
return (self.y1 + self.y2) // 2
def get_paddle_ocr():
"""Lazy load PaddleOCR to avoid startup delay."""
global _paddle_ocr
if _paddle_ocr is None:
try:
from paddleocr import PaddleOCR
import logging as std_logging
for logger_name in ['ppocr', 'paddle', 'paddleocr', 'root']:
std_logging.getLogger(logger_name).setLevel(std_logging.WARNING)
try:
_paddle_ocr = PaddleOCR(lang="de")
logger.info("PaddleOCR 3.x initialized (lang=de)")
except Exception as e1:
logger.warning(f"PaddleOCR lang=de failed: {e1}")
try:
_paddle_ocr = PaddleOCR(lang="en")
logger.info("PaddleOCR 3.x initialized (lang=en)")
except Exception as e2:
logger.warning(f"PaddleOCR lang=en failed: {e2}")
_paddle_ocr = PaddleOCR()
logger.info("PaddleOCR 3.x initialized (defaults)")
except Exception as e:
logger.error(f"PaddleOCR initialization failed: {e}")
_paddle_ocr = None
return _paddle_ocr
def preprocess_image(img: Image.Image) -> np.ndarray:
"""Bildvorverarbeitung fuer bessere OCR-Ergebnisse."""
if not CV2_AVAILABLE:
raise ImportError(
"OpenCV (cv2) is required for image preprocessing. "
"Install with: pip install opencv-python-headless"
)
img_array = np.array(img)
if len(img_array.shape) == 2:
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
elif img_array.shape[2] == 4:
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
return img_array
def run_paddle_ocr(image_bytes: bytes) -> Tuple[List[OCRRegion], str]:
"""Fuehrt PaddleOCR auf einem Bild aus."""
ocr = get_paddle_ocr()
if ocr is None:
logger.error("PaddleOCR not available")
return [], ""
try:
img = Image.open(io.BytesIO(image_bytes))
img_array = preprocess_image(img)
try:
result = ocr.ocr(img_array)
except TypeError:
logger.warning("Trying alternative OCR call method")
result = ocr.ocr(img_array)
if not result:
logger.warning("PaddleOCR returned empty result")
return [], ""
if isinstance(result, dict):
logger.info("Processing PaddleOCR 3.x dict format")
return _parse_paddleocr_v3_dict(result)
elif isinstance(result, list) and len(result) > 0:
first_item = result[0]
if first_item is None:
logger.warning("PaddleOCR returned None for first page")
return [], ""
if hasattr(first_item, 'get') or isinstance(first_item, dict):
item_dict = dict(first_item) if hasattr(first_item, 'items') else first_item
if 'rec_texts' in item_dict or 'texts' in item_dict:
logger.info("Processing PaddleOCR 3.x OCRResult format")
return _parse_paddleocr_v3_dict(item_dict)
if isinstance(first_item, list):
if len(first_item) > 0 and isinstance(first_item[0], (list, tuple)):
logger.info("Processing PaddleOCR traditional list format")
return _parse_paddleocr_list(first_item)
logger.warning(f"Unknown result format. Type: {type(first_item)}")
try:
item_dict = dict(first_item)
if 'rec_texts' in item_dict:
return _parse_paddleocr_v3_dict(item_dict)
except Exception as e:
logger.warning(f"Could not convert to dict: {e}")
return [], ""
else:
logger.warning(f"Unexpected PaddleOCR result type: {type(result)}")
return [], ""
except Exception as e:
logger.error(f"PaddleOCR execution failed: {e}")
import traceback
logger.error(traceback.format_exc())
return [], ""
def _parse_paddleocr_v3_dict(result: dict) -> Tuple[List[OCRRegion], str]:
"""Parse PaddleOCR 3.x dict format result."""
regions = []
all_text_lines = []
texts = result.get('rec_texts', result.get('texts', []))
scores = result.get('rec_scores', result.get('scores', []))
polys = result.get('dt_polys', result.get('boxes', []))
rec_boxes = result.get('rec_boxes', [])
logger.info(f"PaddleOCR 3.x: {len(texts)} texts, {len(scores)} scores, {len(polys)} polys, {len(rec_boxes)} rec_boxes")
for i, (text, score) in enumerate(zip(texts, scores)):
if not text or not str(text).strip():
continue
x1, y1, x2, y2 = 0, 0, 100, 50
if i < len(rec_boxes) and rec_boxes[i] is not None:
box = rec_boxes[i]
try:
if hasattr(box, 'flatten'):
box = box.flatten().tolist()
if len(box) >= 4:
x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
except Exception as e:
logger.debug(f"Could not parse rec_box: {e}")
elif i < len(polys) and polys[i] is not None:
poly = polys[i]
try:
if hasattr(poly, 'tolist'):
poly = poly.tolist()
if len(poly) >= 4:
x_coords = [p[0] for p in poly]
y_coords = [p[1] for p in poly]
x1, y1 = int(min(x_coords)), int(min(y_coords))
x2, y2 = int(max(x_coords)), int(max(y_coords))
except Exception as e:
logger.debug(f"Could not parse polygon: {e}")
region = OCRRegion(
text=text.strip(), confidence=float(score) if score else 0.5,
x1=x1, y1=y1, x2=x2, y2=y2
)
regions.append(region)
all_text_lines.append(text.strip())
regions.sort(key=lambda r: r.y1)
raw_text = "\n".join(all_text_lines)
logger.info(f"PaddleOCR 3.x extracted {len(regions)} text regions")
return regions, raw_text
def _parse_paddleocr_list(page_result: list) -> Tuple[List[OCRRegion], str]:
"""Parse PaddleOCR traditional list format result."""
regions = []
all_text_lines = []
for line in page_result:
if not line or len(line) < 2:
continue
bbox_points = line[0]
text_info = line[1]
if isinstance(text_info, tuple) and len(text_info) >= 2:
text, confidence = text_info[0], text_info[1]
elif isinstance(text_info, str):
text, confidence = text_info, 0.5
else:
continue
if not text or not text.strip():
continue
x_coords = [p[0] for p in bbox_points]
y_coords = [p[1] for p in bbox_points]
region = OCRRegion(
text=text.strip(), confidence=float(confidence),
x1=int(min(x_coords)), y1=int(min(y_coords)),
x2=int(max(x_coords)), y2=int(max(y_coords))
)
regions.append(region)
all_text_lines.append(text.strip())
regions.sort(key=lambda r: r.y1)
raw_text = "\n".join(all_text_lines)
logger.info(f"PaddleOCR extracted {len(regions)} text regions")
return regions, raw_text
def group_regions_by_rows(regions: List[OCRRegion], y_tolerance: int = 20) -> List[List[OCRRegion]]:
"""Gruppiert Textregionen in Zeilen basierend auf Y-Position."""
if not regions:
return []
rows = []
current_row = [regions[0]]
current_y = regions[0].center_y
for region in regions[1:]:
if abs(region.center_y - current_y) <= y_tolerance:
current_row.append(region)
else:
current_row.sort(key=lambda r: r.x1)
rows.append(current_row)
current_row = [region]
current_y = region.center_y
if current_row:
current_row.sort(key=lambda r: r.x1)
rows.append(current_row)
return rows
def detect_columns(rows: List[List[OCRRegion]]) -> int:
"""Erkennt die Anzahl der Spalten basierend auf den Textpositionen."""
if not rows:
return 2
items_per_row = [len(row) for row in rows if len(row) >= 2]
if not items_per_row:
return 2
avg_items = sum(items_per_row) / len(items_per_row)
return 3 if avg_items >= 2.5 else 2
def format_ocr_for_llm(regions: List[OCRRegion]) -> str:
"""Formatiert OCR-Output fuer LLM-Verarbeitung."""
rows = group_regions_by_rows(regions)
num_columns = detect_columns(rows)
lines = [f"Erkannte Spalten: {num_columns}", "---"]
for row in rows:
if len(row) >= 2:
lines.append("\t".join(r.text for r in row))
elif len(row) == 1:
lines.append(row[0].text)
return "\n".join(lines)

View File

@@ -1,586 +1,137 @@
/**
* BYOEH Upload Wizard Component
*
* 5-step wizard for uploading Erwartungshorizonte with client-side encryption:
* 1. File Selection - Choose PDF file
* 2. Metadata - Title, Subject, Niveau, Year
* 3. Rights Confirmation - Legal acknowledgment (required)
* 4. Encryption - Set passphrase (2x confirmation)
* 5. Summary & Upload - Review and confirm
* 5-step wizard for uploading Erwartungshorizonte with client-side encryption.
* Step content extracted to eh-wizard/EHWizardSteps.tsx.
*/
import { useState, useEffect, useCallback } from 'react'
import {
encryptFile,
generateSalt,
isEncryptionSupported
} from '../services/encryption'
import { encryptFile, generateSalt, isEncryptionSupported } from '../services/encryption'
import { ehApi } from '../services/api'
interface EHMetadata {
title: string
subject: string
niveau: 'eA' | 'gA'
year: number
aufgaben_nummer?: string
}
import { FileStep, MetadataStep, RightsStep, EncryptionStep, SummaryStep } from './eh-wizard/EHWizardSteps'
import type { EHMetadata } from './eh-wizard/EHWizardSteps'
interface EHUploadWizardProps {
onClose: () => void
onComplete?: (ehId: string) => void
onSuccess?: (ehId: string) => void // Legacy alias for onComplete
onSuccess?: (ehId: string) => void
klausurSubject?: string
klausurYear?: number
defaultSubject?: string // Alias for klausurSubject
defaultYear?: number // Alias for klausurYear
klausurId?: string // If provided, automatically link EH to this Klausur
defaultSubject?: string
defaultYear?: number
klausurId?: string
}
type WizardStep = 'file' | 'metadata' | 'rights' | 'encryption' | 'summary'
const WIZARD_STEPS: WizardStep[] = ['file', 'metadata', 'rights', 'encryption', 'summary']
const STEP_LABELS: Record<WizardStep, string> = { file: 'Datei', metadata: 'Metadaten', rights: 'Rechte', encryption: 'Verschluesselung', summary: 'Zusammenfassung' }
const STEP_LABELS: Record<WizardStep, string> = {
file: 'Datei',
metadata: 'Metadaten',
rights: 'Rechte',
encryption: 'Verschluesselung',
summary: 'Zusammenfassung'
}
const SUBJECTS = [
'deutsch', 'englisch', 'mathematik', 'physik', 'chemie', 'biologie',
'geschichte', 'politik', 'erdkunde', 'kunst', 'musik', 'sport',
'informatik', 'latein', 'franzoesisch', 'spanisch'
]
const RIGHTS_TEXT = `Ich bestaetige hiermit, dass:
1. Ich das Urheberrecht oder die notwendigen Nutzungsrechte an diesem
Erwartungshorizont besitze.
2. Breakpilot diesen Erwartungshorizont NICHT fuer KI-Training verwendet,
sondern ausschliesslich fuer RAG-gestuetzte Korrekturvorschlaege
in meinem persoenlichen Arbeitsbereich.
3. Der Inhalt verschluesselt gespeichert wird und Breakpilot-Mitarbeiter
keinen Zugriff auf den Klartext haben.
4. Ich diesen Erwartungshorizont jederzeit loeschen kann.`
function EHUploadWizard({
onClose,
onComplete,
onSuccess,
klausurSubject,
klausurYear,
defaultSubject,
defaultYear,
klausurId
}: EHUploadWizardProps) {
// Resolve aliases
function EHUploadWizard({ onClose, onComplete, onSuccess, klausurSubject, klausurYear, defaultSubject, defaultYear, klausurId }: EHUploadWizardProps) {
const effectiveSubject = klausurSubject || defaultSubject || 'deutsch'
const effectiveYear = klausurYear || defaultYear || new Date().getFullYear()
const handleComplete = onComplete || onSuccess || (() => {})
// Step state
const [currentStep, setCurrentStep] = useState<WizardStep>('file')
// File step
const [selectedFile, setSelectedFile] = useState<File | null>(null)
const [fileError, setFileError] = useState<string | null>(null)
// Metadata step
const [metadata, setMetadata] = useState<EHMetadata>({
title: '',
subject: effectiveSubject,
niveau: 'eA',
year: effectiveYear,
aufgaben_nummer: ''
})
// Rights step
const [metadata, setMetadata] = useState<EHMetadata>({ title: '', subject: effectiveSubject, niveau: 'eA', year: effectiveYear, aufgaben_nummer: '' })
const [rightsConfirmed, setRightsConfirmed] = useState(false)
// Encryption step
const [passphrase, setPassphrase] = useState('')
const [passphraseConfirm, setPassphraseConfirm] = useState('')
const [showPassphrase, setShowPassphrase] = useState(false)
const [passphraseStrength, setPassphraseStrength] = useState<'weak' | 'medium' | 'strong'>('weak')
// Upload state
const [uploading, setUploading] = useState(false)
const [uploadProgress, setUploadProgress] = useState(0)
const [uploadError, setUploadError] = useState<string | null>(null)
// Check encryption support
const encryptionSupported = isEncryptionSupported()
// Calculate passphrase strength
useEffect(() => {
if (passphrase.length < 8) {
setPassphraseStrength('weak')
} else if (passphrase.length < 12 || !/\d/.test(passphrase) || !/[A-Z]/.test(passphrase)) {
setPassphraseStrength('medium')
} else {
setPassphraseStrength('strong')
}
if (passphrase.length < 8) setPassphraseStrength('weak')
else if (passphrase.length < 12 || !/\d/.test(passphrase) || !/[A-Z]/.test(passphrase)) setPassphraseStrength('medium')
else setPassphraseStrength('strong')
}, [passphrase])
// Step navigation
const currentStepIndex = WIZARD_STEPS.indexOf(currentStep)
const isFirstStep = currentStepIndex === 0
const isLastStep = currentStepIndex === WIZARD_STEPS.length - 1
const goNext = useCallback(() => {
if (!isLastStep) {
setCurrentStep(WIZARD_STEPS[currentStepIndex + 1])
}
}, [currentStepIndex, isLastStep])
const goNext = useCallback(() => { if (!isLastStep) setCurrentStep(WIZARD_STEPS[currentStepIndex + 1]) }, [currentStepIndex, isLastStep])
const goBack = useCallback(() => { if (!isFirstStep) setCurrentStep(WIZARD_STEPS[currentStepIndex - 1]) }, [currentStepIndex, isFirstStep])
const goBack = useCallback(() => {
if (!isFirstStep) {
setCurrentStep(WIZARD_STEPS[currentStepIndex - 1])
}
}, [currentStepIndex, isFirstStep])
// Validation
const isStepValid = useCallback((step: WizardStep): boolean => {
switch (step) {
case 'file':
return selectedFile !== null && fileError === null
case 'metadata':
return metadata.title.trim().length > 0 && metadata.subject.length > 0
case 'rights':
return rightsConfirmed
case 'encryption':
return passphrase.length >= 8 && passphrase === passphraseConfirm
case 'summary':
return true
default:
return false
case 'file': return selectedFile !== null && fileError === null
case 'metadata': return metadata.title.trim().length > 0 && metadata.subject.length > 0
case 'rights': return rightsConfirmed
case 'encryption': return passphrase.length >= 8 && passphrase === passphraseConfirm
case 'summary': return true
default: return false
}
}, [selectedFile, fileError, metadata, rightsConfirmed, passphrase, passphraseConfirm])
// File handling
const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0]
if (file) {
if (file.type !== 'application/pdf') {
setFileError('Nur PDF-Dateien sind erlaubt')
setSelectedFile(null)
return
}
if (file.size > 50 * 1024 * 1024) { // 50MB limit
setFileError('Datei ist zu gross (max. 50MB)')
setSelectedFile(null)
return
}
setFileError(null)
setSelectedFile(file)
// Auto-fill title from filename
if (!metadata.title) {
const name = file.name.replace(/\.pdf$/i, '').replace(/[_-]/g, ' ')
setMetadata(prev => ({ ...prev, title: name }))
}
if (file.type !== 'application/pdf') { setFileError('Nur PDF-Dateien sind erlaubt'); setSelectedFile(null); return }
if (file.size > 50 * 1024 * 1024) { setFileError('Datei ist zu gross (max. 50MB)'); setSelectedFile(null); return }
setFileError(null); setSelectedFile(file)
if (!metadata.title) { const name = file.name.replace(/\.pdf$/i, '').replace(/[_-]/g, ' '); setMetadata(prev => ({ ...prev, title: name })) }
}
}
// Upload handler
const handleUpload = async () => {
if (!selectedFile || !encryptionSupported) return
setUploading(true)
setUploadProgress(10)
setUploadError(null)
setUploading(true); setUploadProgress(10); setUploadError(null)
try {
// Step 1: Generate salt (used via encrypted.salt below)
generateSalt()
setUploadProgress(20)
// Step 2: Encrypt file client-side
const encrypted = await encryptFile(selectedFile, passphrase)
setUploadProgress(50)
// Step 3: Create form data
generateSalt(); setUploadProgress(20)
const encrypted = await encryptFile(selectedFile, passphrase); setUploadProgress(50)
const formData = new FormData()
const encryptedBlob = new Blob([encrypted.encryptedData], { type: 'application/octet-stream' })
formData.append('file', encryptedBlob, 'encrypted.bin')
const metadataJson = JSON.stringify({
metadata: {
title: metadata.title,
subject: metadata.subject,
niveau: metadata.niveau,
year: metadata.year,
aufgaben_nummer: metadata.aufgaben_nummer || null
},
encryption_key_hash: encrypted.keyHash,
salt: encrypted.salt,
rights_confirmed: true,
original_filename: selectedFile.name
})
formData.append('metadata_json', metadataJson)
formData.append('file', new Blob([encrypted.encryptedData], { type: 'application/octet-stream' }), 'encrypted.bin')
formData.append('metadata_json', JSON.stringify({
metadata: { title: metadata.title, subject: metadata.subject, niveau: metadata.niveau, year: metadata.year, aufgaben_nummer: metadata.aufgaben_nummer || null },
encryption_key_hash: encrypted.keyHash, salt: encrypted.salt, rights_confirmed: true, original_filename: selectedFile.name
}))
setUploadProgress(70)
// Step 4: Upload to server
const response = await ehApi.uploadEH(formData)
setUploadProgress(90)
// Step 5: Link to Klausur if klausurId provided
if (klausurId && response.id) {
try {
await ehApi.linkToKlausur(response.id, klausurId)
} catch (linkError) {
console.warn('Failed to auto-link EH to Klausur:', linkError)
// Don't fail the whole upload if linking fails
}
}
setUploadProgress(100)
// Success!
handleComplete(response.id)
} catch (error) {
console.error('Upload failed:', error)
setUploadError(error instanceof Error ? error.message : 'Upload fehlgeschlagen')
} finally {
setUploading(false)
}
const response = await ehApi.uploadEH(formData); setUploadProgress(90)
if (klausurId && response.id) { try { await ehApi.linkToKlausur(response.id, klausurId) } catch (linkError) { console.warn('Failed to auto-link EH:', linkError) } }
setUploadProgress(100); handleComplete(response.id)
} catch (error) { console.error('Upload failed:', error); setUploadError(error instanceof Error ? error.message : 'Upload fehlgeschlagen') }
finally { setUploading(false) }
}
// Render step content
const renderStepContent = () => {
switch (currentStep) {
case 'file':
return (
<div className="eh-wizard-step">
<h3>Erwartungshorizont hochladen</h3>
<p className="eh-wizard-description">
Waehlen Sie die PDF-Datei Ihres Erwartungshorizonts aus.
Die Datei wird verschluesselt und kann nur von Ihnen entschluesselt werden.
</p>
<div className="eh-file-drop">
<input
type="file"
accept=".pdf"
onChange={handleFileSelect}
id="eh-file-input"
/>
<label htmlFor="eh-file-input" className="eh-file-label">
{selectedFile ? (
<>
<span className="eh-file-icon">&#128196;</span>
<span className="eh-file-name">{selectedFile.name}</span>
<span className="eh-file-size">
({(selectedFile.size / 1024 / 1024).toFixed(2)} MB)
</span>
</>
) : (
<>
<span className="eh-file-icon">&#128194;</span>
<span>PDF-Datei hier ablegen oder klicken</span>
</>
)}
</label>
</div>
{fileError && <p className="eh-error">{fileError}</p>}
{!encryptionSupported && (
<p className="eh-warning">
Ihr Browser unterstuetzt keine Verschluesselung.
Bitte verwenden Sie einen modernen Browser (Chrome, Firefox, Safari, Edge).
</p>
)}
</div>
)
case 'metadata':
return (
<div className="eh-wizard-step">
<h3>Metadaten</h3>
<p className="eh-wizard-description">
Geben Sie Informationen zum Erwartungshorizont ein.
</p>
<div className="eh-form-group">
<label htmlFor="eh-title">Titel *</label>
<input
id="eh-title"
type="text"
value={metadata.title}
onChange={e => setMetadata(prev => ({ ...prev, title: e.target.value }))}
placeholder="z.B. Deutsch Abitur 2024 Aufgabe 1"
/>
</div>
<div className="eh-form-row">
<div className="eh-form-group">
<label htmlFor="eh-subject">Fach *</label>
<select
id="eh-subject"
value={metadata.subject}
onChange={e => setMetadata(prev => ({ ...prev, subject: e.target.value }))}
>
{SUBJECTS.map(s => (
<option key={s} value={s}>
{s.charAt(0).toUpperCase() + s.slice(1)}
</option>
))}
</select>
</div>
<div className="eh-form-group">
<label htmlFor="eh-niveau">Niveau *</label>
<select
id="eh-niveau"
value={metadata.niveau}
onChange={e => setMetadata(prev => ({ ...prev, niveau: e.target.value as 'eA' | 'gA' }))}
>
<option value="eA">Erhoehtes Anforderungsniveau (eA)</option>
<option value="gA">Grundlegendes Anforderungsniveau (gA)</option>
</select>
</div>
</div>
<div className="eh-form-row">
<div className="eh-form-group">
<label htmlFor="eh-year">Jahr *</label>
<input
id="eh-year"
type="number"
min={2000}
max={2050}
value={metadata.year}
onChange={e => setMetadata(prev => ({ ...prev, year: parseInt(e.target.value) }))}
/>
</div>
<div className="eh-form-group">
<label htmlFor="eh-aufgabe">Aufgabennummer</label>
<input
id="eh-aufgabe"
type="text"
value={metadata.aufgaben_nummer || ''}
onChange={e => setMetadata(prev => ({ ...prev, aufgaben_nummer: e.target.value }))}
placeholder="z.B. 1a, 2.1"
/>
</div>
</div>
</div>
)
case 'rights':
return (
<div className="eh-wizard-step">
<h3>Rechte-Bestaetigung</h3>
<p className="eh-wizard-description">
Bitte lesen und bestaetigen Sie die folgenden Bedingungen.
</p>
<div className="eh-rights-box">
<pre>{RIGHTS_TEXT}</pre>
</div>
<div className="eh-checkbox-group">
<input
type="checkbox"
id="eh-rights-confirm"
checked={rightsConfirmed}
onChange={e => setRightsConfirmed(e.target.checked)}
/>
<label htmlFor="eh-rights-confirm">
Ich habe die Bedingungen gelesen und stimme ihnen zu.
</label>
</div>
<div className="eh-info-box">
<strong>Wichtig:</strong> Ihr Erwartungshorizont wird niemals fuer
KI-Training verwendet. Er dient ausschliesslich als Referenz fuer
Ihre persoenlichen Korrekturvorschlaege.
</div>
</div>
)
case 'encryption':
return (
<div className="eh-wizard-step">
<h3>Verschluesselung</h3>
<p className="eh-wizard-description">
Waehlen Sie ein sicheres Passwort fuer Ihren Erwartungshorizont.
Dieses Passwort wird <strong>niemals</strong> an den Server gesendet.
</p>
<div className="eh-form-group">
<label htmlFor="eh-passphrase">Passwort *</label>
<div className="eh-password-input">
<input
id="eh-passphrase"
type={showPassphrase ? 'text' : 'password'}
value={passphrase}
onChange={e => setPassphrase(e.target.value)}
placeholder="Mindestens 8 Zeichen"
/>
<button
type="button"
className="eh-toggle-password"
onClick={() => setShowPassphrase(!showPassphrase)}
>
{showPassphrase ? '&#128065;' : '&#128064;'}
</button>
</div>
<div className={`eh-password-strength eh-strength-${passphraseStrength}`}>
Staerke: {passphraseStrength === 'weak' ? 'Schwach' : passphraseStrength === 'medium' ? 'Mittel' : 'Stark'}
</div>
</div>
<div className="eh-form-group">
<label htmlFor="eh-passphrase-confirm">Passwort bestaetigen *</label>
<input
id="eh-passphrase-confirm"
type={showPassphrase ? 'text' : 'password'}
value={passphraseConfirm}
onChange={e => setPassphraseConfirm(e.target.value)}
placeholder="Passwort wiederholen"
/>
{passphraseConfirm && passphrase !== passphraseConfirm && (
<p className="eh-error">Passwoerter stimmen nicht ueberein</p>
)}
</div>
<div className="eh-warning-box">
<strong>Achtung:</strong> Merken Sie sich dieses Passwort gut!
Ohne das Passwort kann der Erwartungshorizont nicht fuer
Korrekturvorschlaege verwendet werden. Breakpilot kann Ihr
Passwort nicht wiederherstellen.
</div>
</div>
)
case 'summary':
return (
<div className="eh-wizard-step">
<h3>Zusammenfassung</h3>
<p className="eh-wizard-description">
Pruefen Sie Ihre Eingaben und starten Sie den Upload.
</p>
<div className="eh-summary-table">
<div className="eh-summary-row">
<span className="eh-summary-label">Datei:</span>
<span className="eh-summary-value">{selectedFile?.name}</span>
</div>
<div className="eh-summary-row">
<span className="eh-summary-label">Titel:</span>
<span className="eh-summary-value">{metadata.title}</span>
</div>
<div className="eh-summary-row">
<span className="eh-summary-label">Fach:</span>
<span className="eh-summary-value">
{metadata.subject.charAt(0).toUpperCase() + metadata.subject.slice(1)}
</span>
</div>
<div className="eh-summary-row">
<span className="eh-summary-label">Niveau:</span>
<span className="eh-summary-value">{metadata.niveau}</span>
</div>
<div className="eh-summary-row">
<span className="eh-summary-label">Jahr:</span>
<span className="eh-summary-value">{metadata.year}</span>
</div>
<div className="eh-summary-row">
<span className="eh-summary-label">Verschluesselung:</span>
<span className="eh-summary-value">AES-256-GCM</span>
</div>
<div className="eh-summary-row">
<span className="eh-summary-label">Rechte bestaetigt:</span>
<span className="eh-summary-value">Ja</span>
</div>
</div>
{uploading && (
<div className="eh-upload-progress">
<div
className="eh-progress-bar"
style={{ width: `${uploadProgress}%` }}
/>
<span>{uploadProgress}%</span>
</div>
)}
{uploadError && (
<p className="eh-error">{uploadError}</p>
)}
</div>
)
default:
return null
case 'file': return <FileStep selectedFile={selectedFile} fileError={fileError} encryptionSupported={encryptionSupported} onFileSelect={handleFileSelect} />
case 'metadata': return <MetadataStep metadata={metadata} onMetadataChange={setMetadata} />
case 'rights': return <RightsStep rightsConfirmed={rightsConfirmed} onRightsConfirmedChange={setRightsConfirmed} />
case 'encryption': return <EncryptionStep passphrase={passphrase} passphraseConfirm={passphraseConfirm} showPassphrase={showPassphrase} passphraseStrength={passphraseStrength} onPassphraseChange={setPassphrase} onPassphraseConfirmChange={setPassphraseConfirm} onToggleShow={() => setShowPassphrase(!showPassphrase)} />
case 'summary': return <SummaryStep selectedFile={selectedFile} metadata={metadata} uploading={uploading} uploadProgress={uploadProgress} uploadError={uploadError} />
default: return null
}
}
return (
<div className="eh-wizard-overlay">
<div className="eh-wizard-modal">
{/* Header */}
<div className="eh-wizard-header">
<h2>Erwartungshorizont hochladen</h2>
<button className="eh-wizard-close" onClick={onClose}>&times;</button>
</div>
{/* Progress */}
<div className="eh-wizard-progress">
{WIZARD_STEPS.map((step, index) => (
<div
key={step}
className={`eh-progress-step ${
index < currentStepIndex ? 'completed' :
index === currentStepIndex ? 'active' : ''
}`}
>
<div className="eh-progress-dot">
{index < currentStepIndex ? '\u2713' : index + 1}
</div>
<div key={step} className={`eh-progress-step ${index < currentStepIndex ? 'completed' : index === currentStepIndex ? 'active' : ''}`}>
<div className="eh-progress-dot">{index < currentStepIndex ? '\u2713' : index + 1}</div>
<span className="eh-progress-label">{STEP_LABELS[step]}</span>
</div>
))}
</div>
{/* Content */}
<div className="eh-wizard-content">
{renderStepContent()}
</div>
{/* Footer */}
<div className="eh-wizard-content">{renderStepContent()}</div>
<div className="eh-wizard-footer">
<button
className="eh-btn eh-btn-secondary"
onClick={isFirstStep ? onClose : goBack}
disabled={uploading}
>
{isFirstStep ? 'Abbrechen' : 'Zurueck'}
</button>
<button className="eh-btn eh-btn-secondary" onClick={isFirstStep ? onClose : goBack} disabled={uploading}>{isFirstStep ? 'Abbrechen' : 'Zurueck'}</button>
{isLastStep ? (
<button
className="eh-btn eh-btn-primary"
onClick={handleUpload}
disabled={uploading || !isStepValid(currentStep)}
>
{uploading ? 'Wird hochgeladen...' : 'Hochladen'}
</button>
<button className="eh-btn eh-btn-primary" onClick={handleUpload} disabled={uploading || !isStepValid(currentStep)}>{uploading ? 'Wird hochgeladen...' : 'Hochladen'}</button>
) : (
<button
className="eh-btn eh-btn-primary"
onClick={goNext}
disabled={!isStepValid(currentStep)}
>
Weiter
</button>
<button className="eh-btn eh-btn-primary" onClick={goNext} disabled={!isStepValid(currentStep)}>Weiter</button>
)}
</div>
</div>

View File

@@ -0,0 +1,168 @@
/**
* EH Upload Wizard Steps - Individual step content renderers
*/
import { useState } from 'react'
interface EHMetadata {
title: string
subject: string
niveau: 'eA' | 'gA'
year: number
aufgaben_nummer?: string
}
const SUBJECTS = [
'deutsch', 'englisch', 'mathematik', 'physik', 'chemie', 'biologie',
'geschichte', 'politik', 'erdkunde', 'kunst', 'musik', 'sport',
'informatik', 'latein', 'franzoesisch', 'spanisch'
]
const RIGHTS_TEXT = `Ich bestaetige hiermit, dass:
1. Ich das Urheberrecht oder die notwendigen Nutzungsrechte an diesem
Erwartungshorizont besitze.
2. Breakpilot diesen Erwartungshorizont NICHT fuer KI-Training verwendet,
sondern ausschliesslich fuer RAG-gestuetzte Korrekturvorschlaege
in meinem persoenlichen Arbeitsbereich.
3. Der Inhalt verschluesselt gespeichert wird und Breakpilot-Mitarbeiter
keinen Zugriff auf den Klartext haben.
4. Ich diesen Erwartungshorizont jederzeit loeschen kann.`
// Step 1: File Selection
export function FileStep({ selectedFile, fileError, encryptionSupported, onFileSelect }: {
selectedFile: File | null; fileError: string | null; encryptionSupported: boolean;
onFileSelect: (e: React.ChangeEvent<HTMLInputElement>) => void
}) {
return (
<div className="eh-wizard-step">
<h3>Erwartungshorizont hochladen</h3>
<p className="eh-wizard-description">Waehlen Sie die PDF-Datei Ihres Erwartungshorizonts aus. Die Datei wird verschluesselt und kann nur von Ihnen entschluesselt werden.</p>
<div className="eh-file-drop">
<input type="file" accept=".pdf" onChange={onFileSelect} id="eh-file-input" />
<label htmlFor="eh-file-input" className="eh-file-label">
{selectedFile ? (<><span className="eh-file-icon">&#128196;</span><span className="eh-file-name">{selectedFile.name}</span><span className="eh-file-size">({(selectedFile.size / 1024 / 1024).toFixed(2)} MB)</span></>) : (<><span className="eh-file-icon">&#128194;</span><span>PDF-Datei hier ablegen oder klicken</span></>)}
</label>
</div>
{fileError && <p className="eh-error">{fileError}</p>}
{!encryptionSupported && <p className="eh-warning">Ihr Browser unterstuetzt keine Verschluesselung. Bitte verwenden Sie einen modernen Browser.</p>}
</div>
)
}
// Step 2: Metadata
export function MetadataStep({ metadata, onMetadataChange }: {
metadata: EHMetadata; onMetadataChange: (metadata: EHMetadata) => void
}) {
return (
<div className="eh-wizard-step">
<h3>Metadaten</h3>
<p className="eh-wizard-description">Geben Sie Informationen zum Erwartungshorizont ein.</p>
<div className="eh-form-group">
<label htmlFor="eh-title">Titel *</label>
<input id="eh-title" type="text" value={metadata.title} onChange={e => onMetadataChange({ ...metadata, title: e.target.value })} placeholder="z.B. Deutsch Abitur 2024 Aufgabe 1" />
</div>
<div className="eh-form-row">
<div className="eh-form-group">
<label htmlFor="eh-subject">Fach *</label>
<select id="eh-subject" value={metadata.subject} onChange={e => onMetadataChange({ ...metadata, subject: e.target.value })}>
{SUBJECTS.map(s => (<option key={s} value={s}>{s.charAt(0).toUpperCase() + s.slice(1)}</option>))}
</select>
</div>
<div className="eh-form-group">
<label htmlFor="eh-niveau">Niveau *</label>
<select id="eh-niveau" value={metadata.niveau} onChange={e => onMetadataChange({ ...metadata, niveau: e.target.value as 'eA' | 'gA' })}>
<option value="eA">Erhoehtes Anforderungsniveau (eA)</option>
<option value="gA">Grundlegendes Anforderungsniveau (gA)</option>
</select>
</div>
</div>
<div className="eh-form-row">
<div className="eh-form-group">
<label htmlFor="eh-year">Jahr *</label>
<input id="eh-year" type="number" min={2000} max={2050} value={metadata.year} onChange={e => onMetadataChange({ ...metadata, year: parseInt(e.target.value) })} />
</div>
<div className="eh-form-group">
<label htmlFor="eh-aufgabe">Aufgabennummer</label>
<input id="eh-aufgabe" type="text" value={metadata.aufgaben_nummer || ''} onChange={e => onMetadataChange({ ...metadata, aufgaben_nummer: e.target.value })} placeholder="z.B. 1a, 2.1" />
</div>
</div>
</div>
)
}
// Step 3: Rights Confirmation
export function RightsStep({ rightsConfirmed, onRightsConfirmedChange }: {
rightsConfirmed: boolean; onRightsConfirmedChange: (confirmed: boolean) => void
}) {
return (
<div className="eh-wizard-step">
<h3>Rechte-Bestaetigung</h3>
<p className="eh-wizard-description">Bitte lesen und bestaetigen Sie die folgenden Bedingungen.</p>
<div className="eh-rights-box"><pre>{RIGHTS_TEXT}</pre></div>
<div className="eh-checkbox-group">
<input type="checkbox" id="eh-rights-confirm" checked={rightsConfirmed} onChange={e => onRightsConfirmedChange(e.target.checked)} />
<label htmlFor="eh-rights-confirm">Ich habe die Bedingungen gelesen und stimme ihnen zu.</label>
</div>
<div className="eh-info-box"><strong>Wichtig:</strong> Ihr Erwartungshorizont wird niemals fuer KI-Training verwendet. Er dient ausschliesslich als Referenz fuer Ihre persoenlichen Korrekturvorschlaege.</div>
</div>
)
}
// Step 4: Encryption
export function EncryptionStep({ passphrase, passphraseConfirm, showPassphrase, passphraseStrength, onPassphraseChange, onPassphraseConfirmChange, onToggleShow }: {
passphrase: string; passphraseConfirm: string; showPassphrase: boolean;
passphraseStrength: 'weak' | 'medium' | 'strong';
onPassphraseChange: (v: string) => void; onPassphraseConfirmChange: (v: string) => void; onToggleShow: () => void
}) {
return (
<div className="eh-wizard-step">
<h3>Verschluesselung</h3>
<p className="eh-wizard-description">Waehlen Sie ein sicheres Passwort fuer Ihren Erwartungshorizont. Dieses Passwort wird <strong>niemals</strong> an den Server gesendet.</p>
<div className="eh-form-group">
<label htmlFor="eh-passphrase">Passwort *</label>
<div className="eh-password-input">
<input id="eh-passphrase" type={showPassphrase ? 'text' : 'password'} value={passphrase} onChange={e => onPassphraseChange(e.target.value)} placeholder="Mindestens 8 Zeichen" />
<button type="button" className="eh-toggle-password" onClick={onToggleShow}>{showPassphrase ? '&#128065;' : '&#128064;'}</button>
</div>
<div className={`eh-password-strength eh-strength-${passphraseStrength}`}>Staerke: {passphraseStrength === 'weak' ? 'Schwach' : passphraseStrength === 'medium' ? 'Mittel' : 'Stark'}</div>
</div>
<div className="eh-form-group">
<label htmlFor="eh-passphrase-confirm">Passwort bestaetigen *</label>
<input id="eh-passphrase-confirm" type={showPassphrase ? 'text' : 'password'} value={passphraseConfirm} onChange={e => onPassphraseConfirmChange(e.target.value)} placeholder="Passwort wiederholen" />
{passphraseConfirm && passphrase !== passphraseConfirm && <p className="eh-error">Passwoerter stimmen nicht ueberein</p>}
</div>
<div className="eh-warning-box"><strong>Achtung:</strong> Merken Sie sich dieses Passwort gut! Ohne das Passwort kann der Erwartungshorizont nicht fuer Korrekturvorschlaege verwendet werden.</div>
</div>
)
}
// Step 5: Summary
export function SummaryStep({ selectedFile, metadata, uploading, uploadProgress, uploadError }: {
selectedFile: File | null; metadata: EHMetadata; uploading: boolean;
uploadProgress: number; uploadError: string | null
}) {
return (
<div className="eh-wizard-step">
<h3>Zusammenfassung</h3>
<p className="eh-wizard-description">Pruefen Sie Ihre Eingaben und starten Sie den Upload.</p>
<div className="eh-summary-table">
<div className="eh-summary-row"><span className="eh-summary-label">Datei:</span><span className="eh-summary-value">{selectedFile?.name}</span></div>
<div className="eh-summary-row"><span className="eh-summary-label">Titel:</span><span className="eh-summary-value">{metadata.title}</span></div>
<div className="eh-summary-row"><span className="eh-summary-label">Fach:</span><span className="eh-summary-value">{metadata.subject.charAt(0).toUpperCase() + metadata.subject.slice(1)}</span></div>
<div className="eh-summary-row"><span className="eh-summary-label">Niveau:</span><span className="eh-summary-value">{metadata.niveau}</span></div>
<div className="eh-summary-row"><span className="eh-summary-label">Jahr:</span><span className="eh-summary-value">{metadata.year}</span></div>
<div className="eh-summary-row"><span className="eh-summary-label">Verschluesselung:</span><span className="eh-summary-value">AES-256-GCM</span></div>
<div className="eh-summary-row"><span className="eh-summary-label">Rechte bestaetigt:</span><span className="eh-summary-value">Ja</span></div>
</div>
{uploading && (<div className="eh-upload-progress"><div className="eh-progress-bar" style={{ width: `${uploadProgress}%` }} /><span>{uploadProgress}%</span></div>)}
{uploadError && <p className="eh-error">{uploadError}</p>}
</div>
)
}
export { SUBJECTS, RIGHTS_TEXT }
export type { EHMetadata }

View File

@@ -0,0 +1,92 @@
/**
* BYOEH (Erwartungshorizont) Types
*
* Split from api.ts for file size compliance.
*/
export interface Erwartungshorizont {
id: string
tenant_id: string
teacher_id: string
title: string
subject: string
niveau: 'eA' | 'gA'
year: number
aufgaben_nummer: string | null
status: 'pending_rights' | 'processing' | 'indexed' | 'error'
chunk_count: number
rights_confirmed: boolean
rights_confirmed_at: string | null
indexed_at: string | null
file_size_bytes: number
original_filename: string
training_allowed: boolean
created_at: string
deleted_at: string | null
}
export interface EHRAGResult {
context: string
sources: Array<{
text: string; eh_id: string; eh_title: string;
chunk_index: number; score: number; reranked?: boolean
}>
query: string
search_info?: {
retrieval_time_ms?: number; rerank_time_ms?: number; total_time_ms?: number;
reranked?: boolean; rerank_applied?: boolean; hybrid_search_applied?: boolean;
embedding_model?: string; total_candidates?: number; original_count?: number
}
}
export interface EHAuditEntry {
id: string; eh_id: string | null; tenant_id: string; user_id: string;
action: string; details: Record<string, unknown> | null; created_at: string
}
export interface EHKeyShare {
id: string; eh_id: string; user_id: string; passphrase_hint: string;
granted_by: string; granted_at: string;
role: 'second_examiner' | 'third_examiner' | 'supervisor' | 'department_head';
klausur_id: string | null; active: boolean
}
export interface EHKlausurLink {
id: string; eh_id: string; klausur_id: string;
linked_by: string; linked_at: string
}
export interface SharedEHInfo { eh: Erwartungshorizont; share: EHKeyShare }
export interface LinkedEHInfo {
eh: Erwartungshorizont; link: EHKlausurLink;
is_owner: boolean; share: EHKeyShare | null
}
export interface EHShareInvitation {
id: string; eh_id: string; inviter_id: string; invitee_id: string;
invitee_email: string;
role: 'second_examiner' | 'third_examiner' | 'supervisor' | 'department_head' | 'fachvorsitz';
klausur_id: string | null; message: string | null;
status: 'pending' | 'accepted' | 'declined' | 'expired' | 'revoked';
expires_at: string; created_at: string;
accepted_at: string | null; declined_at: string | null
}
export interface PendingInvitationInfo {
invitation: EHShareInvitation
eh: { id: string; title: string; subject: string; niveau: string; year: number } | null
}
export interface SentInvitationInfo {
invitation: EHShareInvitation
eh: { id: string; title: string; subject: string } | null
}
export interface EHAccessChain {
eh_id: string; eh_title: string;
owner: { user_id: string; role: string };
active_shares: EHKeyShare[];
pending_invitations: EHShareInvitation[];
revoked_shares: EHKeyShare[]
}

View File

@@ -0,0 +1,98 @@
/**
* BYOEH (Erwartungshorizont) API
*
* Split from api.ts for file size compliance.
*/
import { apiCall, getAuthToken } from './api'
import type {
Erwartungshorizont, EHRAGResult, EHAuditEntry, EHKeyShare,
SharedEHInfo, LinkedEHInfo, PendingInvitationInfo, SentInvitationInfo,
EHAccessChain,
} from './api-eh-types'
export const ehApi = {
listEH: (params?: { subject?: string; year?: number }): Promise<Erwartungshorizont[]> => {
const query = new URLSearchParams()
if (params?.subject) query.append('subject', params.subject)
if (params?.year) query.append('year', params.year.toString())
const queryStr = query.toString()
return apiCall(`/eh${queryStr ? `?${queryStr}` : ''}`)
},
getEH: (id: string): Promise<Erwartungshorizont> => apiCall(`/eh/${id}`),
uploadEH: async (formData: FormData): Promise<Erwartungshorizont> => {
const token = getAuthToken()
const headers: Record<string, string> = {}
if (token) headers['Authorization'] = `Bearer ${token}`
const response = await fetch('/api/v1/eh/upload', { method: 'POST', headers, body: formData })
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: 'Upload failed' }))
throw new Error(error.detail || `HTTP ${response.status}`)
}
return response.json()
},
deleteEH: (id: string): Promise<{ status: string; id: string }> =>
apiCall(`/eh/${id}`, { method: 'DELETE' }),
indexEH: (id: string, passphrase: string): Promise<{ status: string; id: string; chunk_count: number }> =>
apiCall(`/eh/${id}/index`, { method: 'POST', body: JSON.stringify({ passphrase }) }),
ragQuery: (params: { query_text: string; passphrase: string; subject?: string; limit?: number; rerank?: boolean }): Promise<EHRAGResult> =>
apiCall('/eh/rag-query', { method: 'POST', body: JSON.stringify({ query_text: params.query_text, passphrase: params.passphrase, subject: params.subject, limit: params.limit ?? 5, rerank: params.rerank ?? false }) }),
getAuditLog: (ehId?: string, limit?: number): Promise<EHAuditEntry[]> => {
const query = new URLSearchParams()
if (ehId) query.append('eh_id', ehId)
if (limit) query.append('limit', limit.toString())
const queryStr = query.toString()
return apiCall(`/eh/audit-log${queryStr ? `?${queryStr}` : ''}`)
},
getRightsText: (): Promise<{ text: string; version: string }> => apiCall('/eh/rights-text'),
getQdrantStatus: (): Promise<{ name: string; vectors_count: number; points_count: number; status: string }> =>
apiCall('/eh/qdrant-status'),
// Key Sharing
shareEH: (ehId: string, params: { user_id: string; role: 'second_examiner' | 'third_examiner' | 'supervisor' | 'department_head'; encrypted_passphrase: string; passphrase_hint?: string; klausur_id?: string }): Promise<{ status: string; share_id: string; eh_id: string; shared_with: string; role: string }> =>
apiCall(`/eh/${ehId}/share`, { method: 'POST', body: JSON.stringify(params) }),
listShares: (ehId: string): Promise<EHKeyShare[]> => apiCall(`/eh/${ehId}/shares`),
revokeShare: (ehId: string, shareId: string): Promise<{ status: string; share_id: string }> =>
apiCall(`/eh/${ehId}/shares/${shareId}`, { method: 'DELETE' }),
getSharedWithMe: (): Promise<SharedEHInfo[]> => apiCall('/eh/shared-with-me'),
linkToKlausur: (ehId: string, klausurId: string): Promise<{ status: string; link_id: string; eh_id: string; klausur_id: string }> =>
apiCall(`/eh/${ehId}/link-klausur`, { method: 'POST', body: JSON.stringify({ klausur_id: klausurId }) }),
unlinkFromKlausur: (ehId: string, klausurId: string): Promise<{ status: string; eh_id: string; klausur_id: string }> =>
apiCall(`/eh/${ehId}/link-klausur/${klausurId}`, { method: 'DELETE' }),
// Invitation Flow
inviteToEH: (ehId: string, params: { invitee_email: string; invitee_id?: string; role: 'second_examiner' | 'third_examiner' | 'supervisor' | 'department_head' | 'fachvorsitz'; klausur_id?: string; message?: string; expires_in_days?: number }): Promise<{ status: string; invitation_id: string; eh_id: string; invitee_email: string; role: string; expires_at: string; eh_title: string }> =>
apiCall(`/eh/${ehId}/invite`, { method: 'POST', body: JSON.stringify(params) }),
getPendingInvitations: (): Promise<PendingInvitationInfo[]> => apiCall('/eh/invitations/pending'),
getSentInvitations: (): Promise<SentInvitationInfo[]> => apiCall('/eh/invitations/sent'),
acceptInvitation: (invitationId: string, encryptedPassphrase: string): Promise<{ status: string; share_id: string; eh_id: string; role: string; klausur_id: string | null }> =>
apiCall(`/eh/invitations/${invitationId}/accept`, { method: 'POST', body: JSON.stringify({ encrypted_passphrase: encryptedPassphrase }) }),
declineInvitation: (invitationId: string): Promise<{ status: string; invitation_id: string; eh_id: string }> =>
apiCall(`/eh/invitations/${invitationId}/decline`, { method: 'POST' }),
revokeInvitation: (invitationId: string): Promise<{ status: string; invitation_id: string; eh_id: string }> =>
apiCall(`/eh/invitations/${invitationId}`, { method: 'DELETE' }),
getAccessChain: (ehId: string): Promise<EHAccessChain> => apiCall(`/eh/${ehId}/access-chain`),
}
export const klausurEHApi = {
getLinkedEH: (klausurId: string): Promise<LinkedEHInfo[]> => apiCall(`/klausuren/${klausurId}/linked-eh`),
}

View File

@@ -1,620 +1,123 @@
// API Types
/**
* Klausur Service API - Core types and Klausur/Student API
*
* Split into:
* - api.ts (this file): Core types, auth, base API, klausurApi, uploadStudentWork
* - api-eh-types.ts: BYOEH type definitions
* - api-eh.ts: ehApi and klausurEHApi
*/
// Re-export EH types and API for backward compatibility
export type {
Erwartungshorizont, EHRAGResult, EHAuditEntry, EHKeyShare, EHKlausurLink,
SharedEHInfo, LinkedEHInfo, EHShareInvitation, PendingInvitationInfo,
SentInvitationInfo, EHAccessChain,
} from './api-eh-types'
export { ehApi, klausurEHApi } from './api-eh'
// ============================================================================
// Core Types
// ============================================================================
export interface StudentKlausur {
id: string
klausur_id: string
student_name: string
student_id: string | null
file_path: string | null
ocr_text: string | null
status: string
criteria_scores: Record<string, { score: number; annotations: string[] }>
gutachten: {
einleitung: string
hauptteil: string
fazit: string
staerken: string[]
schwaechen: string[]
} | null
raw_points: number
grade_points: number
created_at: string
id: string; klausur_id: string; student_name: string; student_id: string | null;
file_path: string | null; ocr_text: string | null; status: string;
criteria_scores: Record<string, { score: number; annotations: string[] }>;
gutachten: { einleitung: string; hauptteil: string; fazit: string; staerken: string[]; schwaechen: string[] } | null;
raw_points: number; grade_points: number; created_at: string
}
export interface Klausur {
id: string
title: string
subject: string
modus: 'landes_abitur' | 'vorabitur'
class_id: string | null
year: number
semester: string
erwartungshorizont: Record<string, unknown> | null
student_count: number
students: StudentKlausur[]
created_at: string
teacher_id: string
id: string; title: string; subject: string; modus: 'landes_abitur' | 'vorabitur';
class_id: string | null; year: number; semester: string;
erwartungshorizont: Record<string, unknown> | null; student_count: number;
students: StudentKlausur[]; created_at: string; teacher_id: string
}
export interface GradeInfo {
thresholds: Record<number, number>
labels: Record<number, string>
thresholds: Record<number, number>; labels: Record<number, string>;
criteria: Record<string, { weight: number; label: string }>
}
// Get auth token from parent window or localStorage
function getAuthToken(): string | null {
// Try to get from parent window (iframe scenario)
// ============================================================================
// Auth & Base API
// ============================================================================
export function getAuthToken(): string | null {
try {
if (window.parent !== window) {
const parentToken = (window.parent as unknown as { authToken?: string }).authToken
if (parentToken) return parentToken
}
} catch {
// Cross-origin access denied
}
// Try localStorage
} catch { /* Cross-origin */ }
return localStorage.getItem('auth_token')
}
// Base API call
async function apiCall<T>(
endpoint: string,
options: RequestInit = {}
): Promise<T> {
export async function apiCall<T>(endpoint: string, options: RequestInit = {}): Promise<T> {
const token = getAuthToken()
const headers: Record<string, string> = {
'Content-Type': 'application/json',
...(options.headers as Record<string, string> || {})
}
if (token) {
headers['Authorization'] = `Bearer ${token}`
}
const response = await fetch(`/api/v1${endpoint}`, {
...options,
headers
})
if (token) headers['Authorization'] = `Bearer ${token}`
const response = await fetch(`/api/v1${endpoint}`, { ...options, headers })
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: 'Request failed' }))
throw new Error(error.detail || `HTTP ${response.status}`)
}
return response.json()
}
// ============================================================================
// Klausuren API
// ============================================================================
export const klausurApi = {
listKlausuren: (): Promise<Klausur[]> =>
apiCall('/klausuren'),
listKlausuren: (): Promise<Klausur[]> => apiCall('/klausuren'),
getKlausur: (id: string): Promise<Klausur> => apiCall(`/klausuren/${id}`),
createKlausur: (data: Partial<Klausur>): Promise<Klausur> => apiCall('/klausuren', { method: 'POST', body: JSON.stringify(data) }),
updateKlausur: (id: string, data: Partial<Klausur>): Promise<Klausur> => apiCall(`/klausuren/${id}`, { method: 'PUT', body: JSON.stringify(data) }),
deleteKlausur: (id: string): Promise<{ success: boolean }> => apiCall(`/klausuren/${id}`, { method: 'DELETE' }),
getKlausur: (id: string): Promise<Klausur> =>
apiCall(`/klausuren/${id}`),
listStudents: (klausurId: string): Promise<StudentKlausur[]> => apiCall(`/klausuren/${klausurId}/students`),
deleteStudent: (studentId: string): Promise<{ success: boolean }> => apiCall(`/students/${studentId}`, { method: 'DELETE' }),
createKlausur: (data: Partial<Klausur>): Promise<Klausur> =>
apiCall('/klausuren', {
method: 'POST',
body: JSON.stringify(data)
}),
updateCriteria: (studentId: string, criterion: string, score: number, annotations?: string[]): Promise<StudentKlausur> =>
apiCall(`/students/${studentId}/criteria`, { method: 'PUT', body: JSON.stringify({ criterion, score, annotations }) }),
updateKlausur: (id: string, data: Partial<Klausur>): Promise<Klausur> =>
apiCall(`/klausuren/${id}`, {
method: 'PUT',
body: JSON.stringify(data)
}),
updateGutachten: (studentId: string, gutachten: { einleitung: string; hauptteil: string; fazit: string; staerken?: string[]; schwaechen?: string[] }): Promise<StudentKlausur> =>
apiCall(`/students/${studentId}/gutachten`, { method: 'PUT', body: JSON.stringify(gutachten) }),
deleteKlausur: (id: string): Promise<{ success: boolean }> =>
apiCall(`/klausuren/${id}`, { method: 'DELETE' }),
finalizeStudent: (studentId: string): Promise<StudentKlausur> => apiCall(`/students/${studentId}/finalize`, { method: 'POST' }),
// Students
listStudents: (klausurId: string): Promise<StudentKlausur[]> =>
apiCall(`/klausuren/${klausurId}/students`),
generateGutachten: (studentId: string, options: { include_strengths?: boolean; include_weaknesses?: boolean; tone?: 'formal' | 'friendly' | 'constructive' } = {}): Promise<{ einleitung: string; hauptteil: string; fazit: string; staerken: string[]; schwaechen: string[]; generated_at: string; is_ki_generated: boolean; tone: string }> =>
apiCall(`/students/${studentId}/gutachten/generate`, { method: 'POST', body: JSON.stringify({ include_strengths: options.include_strengths ?? true, include_weaknesses: options.include_weaknesses ?? true, tone: options.tone ?? 'formal' }) }),
deleteStudent: (studentId: string): Promise<{ success: boolean }> =>
apiCall(`/students/${studentId}`, { method: 'DELETE' }),
// Grading
updateCriteria: (
studentId: string,
criterion: string,
score: number,
annotations?: string[]
): Promise<StudentKlausur> =>
apiCall(`/students/${studentId}/criteria`, {
method: 'PUT',
body: JSON.stringify({ criterion, score, annotations })
}),
updateGutachten: (
studentId: string,
gutachten: {
einleitung: string
hauptteil: string
fazit: string
staerken?: string[]
schwaechen?: string[]
}
): Promise<StudentKlausur> =>
apiCall(`/students/${studentId}/gutachten`, {
method: 'PUT',
body: JSON.stringify(gutachten)
}),
finalizeStudent: (studentId: string): Promise<StudentKlausur> =>
apiCall(`/students/${studentId}/finalize`, { method: 'POST' }),
// KI-Gutachten Generation
generateGutachten: (
studentId: string,
options: {
include_strengths?: boolean
include_weaknesses?: boolean
tone?: 'formal' | 'friendly' | 'constructive'
} = {}
): Promise<{
einleitung: string
hauptteil: string
fazit: string
staerken: string[]
schwaechen: string[]
generated_at: string
is_ki_generated: boolean
tone: string
}> =>
apiCall(`/students/${studentId}/gutachten/generate`, {
method: 'POST',
body: JSON.stringify({
include_strengths: options.include_strengths ?? true,
include_weaknesses: options.include_weaknesses ?? true,
tone: options.tone ?? 'formal'
})
}),
// Fairness Analysis
getFairnessAnalysis: (klausurId: string): Promise<{
klausur_id: string
students_count: number
graded_count: number
statistics: {
average_grade: number
average_raw_points: number
min_grade: number
max_grade: number
spread: number
standard_deviation: number
}
criteria_breakdown: Record<string, { average: number; min: number; max: number; count: number }>
outliers: Array<{
student_id: string
student_name: string
grade_points: number
deviation: number
direction: 'above' | 'below'
}>
fairness_score: number
warnings: string[]
recommendation: string
}> =>
getFairnessAnalysis: (klausurId: string): Promise<{ klausur_id: string; students_count: number; graded_count: number; statistics: { average_grade: number; average_raw_points: number; min_grade: number; max_grade: number; spread: number; standard_deviation: number }; criteria_breakdown: Record<string, { average: number; min: number; max: number; count: number }>; outliers: Array<{ student_id: string; student_name: string; grade_points: number; deviation: number; direction: 'above' | 'below' }>; fairness_score: number; warnings: string[]; recommendation: string }> =>
apiCall(`/klausuren/${klausurId}/fairness`),
// Audit Log
getStudentAuditLog: (studentId: string): Promise<Array<{
id: string
timestamp: string
user_id: string
action: string
entity_type: string
entity_id: string
field: string | null
old_value: string | null
new_value: string | null
details: Record<string, unknown> | null
}>> =>
getStudentAuditLog: (studentId: string): Promise<Array<{ id: string; timestamp: string; user_id: string; action: string; entity_type: string; entity_id: string; field: string | null; old_value: string | null; new_value: string | null; details: Record<string, unknown> | null }>> =>
apiCall(`/students/${studentId}/audit-log`),
// Utilities
getGradeInfo: (): Promise<GradeInfo> =>
apiCall('/grade-info')
getGradeInfo: (): Promise<GradeInfo> => apiCall('/grade-info'),
}
// File upload (special handling for multipart)
export async function uploadStudentWork(
klausurId: string,
studentName: string,
file: File
): Promise<StudentKlausur> {
const token = getAuthToken()
// ============================================================================
// File Upload
// ============================================================================
export async function uploadStudentWork(klausurId: string, studentName: string, file: File): Promise<StudentKlausur> {
const token = getAuthToken()
const formData = new FormData()
formData.append('file', file)
formData.append('student_name', studentName)
const headers: Record<string, string> = {}
if (token) {
headers['Authorization'] = `Bearer ${token}`
}
const response = await fetch(`/api/v1/klausuren/${klausurId}/students`, {
method: 'POST',
headers,
body: formData
})
if (token) headers['Authorization'] = `Bearer ${token}`
const response = await fetch(`/api/v1/klausuren/${klausurId}/students`, { method: 'POST', headers, body: formData })
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: 'Upload failed' }))
throw new Error(error.detail || `HTTP ${response.status}`)
}
return response.json()
}
// =============================================
// BYOEH (Erwartungshorizont) Types & API
// =============================================
export interface Erwartungshorizont {
id: string
tenant_id: string
teacher_id: string
title: string
subject: string
niveau: 'eA' | 'gA'
year: number
aufgaben_nummer: string | null
status: 'pending_rights' | 'processing' | 'indexed' | 'error'
chunk_count: number
rights_confirmed: boolean
rights_confirmed_at: string | null
indexed_at: string | null
file_size_bytes: number
original_filename: string
training_allowed: boolean
created_at: string
deleted_at: string | null
}
export interface EHRAGResult {
context: string
sources: Array<{
text: string
eh_id: string
eh_title: string
chunk_index: number
score: number
reranked?: boolean
}>
query: string
search_info?: {
retrieval_time_ms?: number
rerank_time_ms?: number
total_time_ms?: number
reranked?: boolean
rerank_applied?: boolean
hybrid_search_applied?: boolean
embedding_model?: string
total_candidates?: number
original_count?: number
}
}
export interface EHAuditEntry {
id: string
eh_id: string | null
tenant_id: string
user_id: string
action: string
details: Record<string, unknown> | null
created_at: string
}
export interface EHKeyShare {
id: string
eh_id: string
user_id: string
passphrase_hint: string
granted_by: string
granted_at: string
role: 'second_examiner' | 'third_examiner' | 'supervisor' | 'department_head'
klausur_id: string | null
active: boolean
}
export interface EHKlausurLink {
id: string
eh_id: string
klausur_id: string
linked_by: string
linked_at: string
}
export interface SharedEHInfo {
eh: Erwartungshorizont
share: EHKeyShare
}
export interface LinkedEHInfo {
eh: Erwartungshorizont
link: EHKlausurLink
is_owner: boolean
share: EHKeyShare | null
}
// Invitation types for Invite/Accept/Revoke flow
export interface EHShareInvitation {
id: string
eh_id: string
inviter_id: string
invitee_id: string
invitee_email: string
role: 'second_examiner' | 'third_examiner' | 'supervisor' | 'department_head' | 'fachvorsitz'
klausur_id: string | null
message: string | null
status: 'pending' | 'accepted' | 'declined' | 'expired' | 'revoked'
expires_at: string
created_at: string
accepted_at: string | null
declined_at: string | null
}
export interface PendingInvitationInfo {
invitation: EHShareInvitation
eh: {
id: string
title: string
subject: string
niveau: string
year: number
} | null
}
export interface SentInvitationInfo {
invitation: EHShareInvitation
eh: {
id: string
title: string
subject: string
} | null
}
export interface EHAccessChain {
eh_id: string
eh_title: string
owner: {
user_id: string
role: string
}
active_shares: EHKeyShare[]
pending_invitations: EHShareInvitation[]
revoked_shares: EHKeyShare[]
}
// Erwartungshorizont API
export const ehApi = {
// List all EH for current teacher
listEH: (params?: { subject?: string; year?: number }): Promise<Erwartungshorizont[]> => {
const query = new URLSearchParams()
if (params?.subject) query.append('subject', params.subject)
if (params?.year) query.append('year', params.year.toString())
const queryStr = query.toString()
return apiCall(`/eh${queryStr ? `?${queryStr}` : ''}`)
},
// Get single EH by ID
getEH: (id: string): Promise<Erwartungshorizont> =>
apiCall(`/eh/${id}`),
// Upload encrypted EH (special handling for FormData)
uploadEH: async (formData: FormData): Promise<Erwartungshorizont> => {
const token = getAuthToken()
const headers: Record<string, string> = {}
if (token) {
headers['Authorization'] = `Bearer ${token}`
}
const response = await fetch('/api/v1/eh/upload', {
method: 'POST',
headers,
body: formData
})
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: 'Upload failed' }))
throw new Error(error.detail || `HTTP ${response.status}`)
}
return response.json()
},
// Delete EH (soft delete)
deleteEH: (id: string): Promise<{ status: string; id: string }> =>
apiCall(`/eh/${id}`, { method: 'DELETE' }),
// Index EH for RAG (requires passphrase)
indexEH: (id: string, passphrase: string): Promise<{ status: string; id: string; chunk_count: number }> =>
apiCall(`/eh/${id}/index`, {
method: 'POST',
body: JSON.stringify({ passphrase })
}),
// RAG query against EH
ragQuery: (params: {
query_text: string
passphrase: string
subject?: string
limit?: number
rerank?: boolean
}): Promise<EHRAGResult> =>
apiCall('/eh/rag-query', {
method: 'POST',
body: JSON.stringify({
query_text: params.query_text,
passphrase: params.passphrase,
subject: params.subject,
limit: params.limit ?? 5,
rerank: params.rerank ?? false
})
}),
// Get audit log
getAuditLog: (ehId?: string, limit?: number): Promise<EHAuditEntry[]> => {
const query = new URLSearchParams()
if (ehId) query.append('eh_id', ehId)
if (limit) query.append('limit', limit.toString())
const queryStr = query.toString()
return apiCall(`/eh/audit-log${queryStr ? `?${queryStr}` : ''}`)
},
// Get rights confirmation text
getRightsText: (): Promise<{ text: string; version: string }> =>
apiCall('/eh/rights-text'),
// Get Qdrant status (admin only)
getQdrantStatus: (): Promise<{
name: string
vectors_count: number
points_count: number
status: string
}> =>
apiCall('/eh/qdrant-status'),
// =============================================
// KEY SHARING
// =============================================
// Share EH with another examiner
shareEH: (
ehId: string,
params: {
user_id: string
role: 'second_examiner' | 'third_examiner' | 'supervisor' | 'department_head'
encrypted_passphrase: string
passphrase_hint?: string
klausur_id?: string
}
): Promise<{
status: string
share_id: string
eh_id: string
shared_with: string
role: string
}> =>
apiCall(`/eh/${ehId}/share`, {
method: 'POST',
body: JSON.stringify(params)
}),
// List shares for an EH (owner only)
listShares: (ehId: string): Promise<EHKeyShare[]> =>
apiCall(`/eh/${ehId}/shares`),
// Revoke a share
revokeShare: (ehId: string, shareId: string): Promise<{ status: string; share_id: string }> =>
apiCall(`/eh/${ehId}/shares/${shareId}`, { method: 'DELETE' }),
// Get EH shared with current user
getSharedWithMe: (): Promise<SharedEHInfo[]> =>
apiCall('/eh/shared-with-me'),
// Link EH to a Klausur
linkToKlausur: (ehId: string, klausurId: string): Promise<{
status: string
link_id: string
eh_id: string
klausur_id: string
}> =>
apiCall(`/eh/${ehId}/link-klausur`, {
method: 'POST',
body: JSON.stringify({ klausur_id: klausurId })
}),
// Unlink EH from a Klausur
unlinkFromKlausur: (ehId: string, klausurId: string): Promise<{
status: string
eh_id: string
klausur_id: string
}> =>
apiCall(`/eh/${ehId}/link-klausur/${klausurId}`, { method: 'DELETE' }),
// =============================================
// INVITATION FLOW (Invite / Accept / Revoke)
// =============================================
// Send invitation to share EH
inviteToEH: (
ehId: string,
params: {
invitee_email: string
invitee_id?: string
role: 'second_examiner' | 'third_examiner' | 'supervisor' | 'department_head' | 'fachvorsitz'
klausur_id?: string
message?: string
expires_in_days?: number
}
): Promise<{
status: string
invitation_id: string
eh_id: string
invitee_email: string
role: string
expires_at: string
eh_title: string
}> =>
apiCall(`/eh/${ehId}/invite`, {
method: 'POST',
body: JSON.stringify(params)
}),
// Get pending invitations for current user
getPendingInvitations: (): Promise<PendingInvitationInfo[]> =>
apiCall('/eh/invitations/pending'),
// Get sent invitations (as inviter)
getSentInvitations: (): Promise<SentInvitationInfo[]> =>
apiCall('/eh/invitations/sent'),
// Accept an invitation
acceptInvitation: (
invitationId: string,
encryptedPassphrase: string
): Promise<{
status: string
share_id: string
eh_id: string
role: string
klausur_id: string | null
}> =>
apiCall(`/eh/invitations/${invitationId}/accept`, {
method: 'POST',
body: JSON.stringify({ encrypted_passphrase: encryptedPassphrase })
}),
// Decline an invitation
declineInvitation: (invitationId: string): Promise<{
status: string
invitation_id: string
eh_id: string
}> =>
apiCall(`/eh/invitations/${invitationId}/decline`, { method: 'POST' }),
// Revoke an invitation (as inviter)
revokeInvitation: (invitationId: string): Promise<{
status: string
invitation_id: string
eh_id: string
}> =>
apiCall(`/eh/invitations/${invitationId}`, { method: 'DELETE' }),
// Get the complete access chain for an EH
getAccessChain: (ehId: string): Promise<EHAccessChain> =>
apiCall(`/eh/${ehId}/access-chain`)
}
// Get linked EH for a Klausur (separate from ehApi for clarity)
export const klausurEHApi = {
// Get all EH linked to a Klausur that the user has access to
getLinkedEH: (klausurId: string): Promise<LinkedEHInfo[]> =>
apiCall(`/klausuren/${klausurId}/linked-eh`)
}