Files
breakpilot-lehrer/klausur-service/backend/main.py
Benjamin Admin 2e0f8632f8
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m49s
CI / test-python-agent-core (push) Successful in 14s
CI / test-nodejs-website (push) Successful in 15s
feat(klausur): Handschrift entfernen + Klausur-HTR implementiert
Feature 1: Handschrift entfernen via OCR-Pipeline Session
- services/handwriting_detection.py: _detect_pencil() + target_ink Parameter
  ("all" | "colored" | "pencil") für gezielte Tinten-Erkennung
- ocr_pipeline_session_store.py: clean_png + handwriting_removal_meta Spalten
  (idempotentes ALTER TABLE in init_ocr_pipeline_tables)
- ocr_pipeline_api.py: POST /sessions/{id}/remove-handwriting Endpoint
  + "clean" zu valid_types für Image-Serving hinzugefügt

Feature 2: Klausur-HTR (Hochwertige Handschriftenerkennung)
- handwriting_htr_api.py: Neuer Router /api/v1/htr/recognize + /recognize-session
  Primary: qwen2.5vl:32b via Ollama, Fallback: trocr-large-handwritten
- services/trocr_service.py: size Parameter (base | large) für get_trocr_model()
  + run_trocr_ocr() - unterstützt jetzt trocr-large-handwritten
- main.py: HTR Router registriert

Config:
- docker-compose.yml: OLLAMA_HTR_MODEL, HTR_FALLBACK_MODEL
- .env.example: HTR Env-Vars dokumentiert

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-03 12:04:26 +01:00

220 lines
7.2 KiB
Python

"""
Klausur-Service - Abitur/Vorabitur Klausurkorrektur Microservice
Eigenstaendiger Service fuer:
- Klausurverwaltung (Abitur/Vorabitur)
- OCR-Verarbeitung handschriftlicher Arbeiten
- KI-gestuetzte Bewertung
- Gutachten-Generierung
- 15-Punkte-Notensystem
- BYOEH (Bring-Your-Own-Expectation-Horizon)
This is the main entry point. All functionality is organized in modular packages:
- models/: Data models and Pydantic schemas
- routes/: API endpoint handlers
- services/: Business logic
- storage.py: In-memory data storage
- config.py: Configuration constants
"""
import os
from contextlib import asynccontextmanager
import asyncpg
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
# Configuration
from config import EH_UPLOAD_DIR, FRONTEND_PATH
# Routes
from routes import api_router
# External module routers (already modular)
from admin_api import router as admin_router
from zeugnis_api import router as zeugnis_router
from training_api import router as training_router
from mail.api import router as mail_router
try:
from trocr_api import router as trocr_router
except ImportError:
trocr_router = None
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
from ocr_pipeline_api import router as ocr_pipeline_router
from ocr_pipeline_session_store import init_ocr_pipeline_tables
try:
from handwriting_htr_api import router as htr_router
except ImportError:
htr_router = None
try:
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
except ImportError:
dsfa_rag_router = None
set_dsfa_db_pool = None
DSFAQdrantService = None
DSFA_DATABASE_URL = None
# BYOEH Qdrant initialization
from qdrant_service import init_qdrant_collection
# =============================================
# APP SETUP
# =============================================
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager for startup and shutdown events."""
print("Klausur-Service starting...")
# Initialize database pool for Vocab Sessions
vocab_db_pool = None
try:
vocab_db_pool = await asyncpg.create_pool(VOCAB_DATABASE_URL, min_size=2, max_size=5)
set_vocab_db_pool(vocab_db_pool)
await _init_vocab_table()
await _load_all_sessions()
print(f"Vocab sessions database initialized")
except Exception as e:
print(f"Warning: Vocab sessions database initialization failed: {e}")
# Initialize OCR Pipeline session tables
try:
await init_ocr_pipeline_tables()
print("OCR Pipeline session tables initialized")
except Exception as e:
print(f"Warning: OCR Pipeline tables initialization failed: {e}")
# Initialize database pool for DSFA RAG
dsfa_db_pool = None
if DSFA_DATABASE_URL and set_dsfa_db_pool:
try:
dsfa_db_pool = await asyncpg.create_pool(DSFA_DATABASE_URL, min_size=2, max_size=10)
set_dsfa_db_pool(dsfa_db_pool)
print(f"DSFA database pool initialized: {DSFA_DATABASE_URL}")
except Exception as e:
print(f"Warning: DSFA database pool initialization failed: {e}")
# Initialize Qdrant collection for BYOEH
try:
await init_qdrant_collection()
print("Qdrant BYOEH collection initialized")
except Exception as e:
print(f"Warning: Qdrant BYOEH initialization failed: {e}")
# Initialize Qdrant collection for DSFA RAG
if DSFAQdrantService:
try:
dsfa_qdrant = DSFAQdrantService()
await dsfa_qdrant.ensure_collection()
print("Qdrant DSFA corpus collection initialized")
except Exception as e:
print(f"Warning: Qdrant DSFA initialization failed: {e}")
# Ensure EH upload directory exists
os.makedirs(EH_UPLOAD_DIR, exist_ok=True)
# Preload LightOnOCR model if OCR_ENGINE=lighton (avoids cold-start on first request)
ocr_engine_env = os.getenv("OCR_ENGINE", "auto")
if ocr_engine_env == "lighton":
try:
import asyncio
from services.lighton_ocr_service import get_lighton_model
loop = asyncio.get_event_loop()
print("Preloading LightOnOCR-2-1B at startup (OCR_ENGINE=lighton)...")
await loop.run_in_executor(None, get_lighton_model)
print("LightOnOCR-2-1B preloaded")
except Exception as e:
print(f"Warning: LightOnOCR preload failed: {e}")
yield
print("Klausur-Service shutting down...")
# Close Vocab sessions database pool
if vocab_db_pool:
await vocab_db_pool.close()
print("Vocab sessions database pool closed")
# Close DSFA database pool
if dsfa_db_pool:
await dsfa_db_pool.close()
print("DSFA database pool closed")
app = FastAPI(
title="Klausur-Service",
description="Abitur/Vorabitur Klausurkorrektur Microservice",
version="1.0.0",
lifespan=lifespan
)
# CORS Middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# =============================================
# INCLUDE ROUTERS
# =============================================
# Main API routes (modular)
app.include_router(api_router)
# External module routers
app.include_router(admin_router) # NiBiS Ingestion
app.include_router(zeugnis_router) # Zeugnis Rights-Aware Crawler
app.include_router(training_router) # Training Management
app.include_router(mail_router) # Unified Inbox Mail
if trocr_router:
app.include_router(trocr_router) # TrOCR Handwriting OCR
app.include_router(vocab_router) # Vocabulary Worksheet Generator
app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step)
if htr_router:
app.include_router(htr_router) # Handwriting HTR (Klausur)
if dsfa_rag_router:
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
# =============================================
# HEALTH CHECK
# =============================================
@app.get("/health")
async def health():
"""Health check endpoint."""
return {"status": "healthy", "service": "klausur-service"}
# =============================================
# SERVE FRONTEND
# =============================================
if os.path.exists(FRONTEND_PATH):
app.mount("/assets", StaticFiles(directory=f"{FRONTEND_PATH}/assets"), name="assets")
@app.get("/")
async def serve_frontend():
"""Serve the React frontend."""
return FileResponse(f"{FRONTEND_PATH}/index.html")
@app.get("/{path:path}")
async def serve_frontend_routes(path: str):
"""Serve index.html for all non-API routes (SPA routing)."""
if not path.startswith("api/") and not path.startswith("health"):
return FileResponse(f"{FRONTEND_PATH}/index.html")
from fastapi import HTTPException
raise HTTPException(status_code=404)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8086)