This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/klausur-service/backend/main.py
BreakPilot Dev ee0c4b859c feat(klausur-service): Add Tesseract OCR, DSFA RAG, TrOCR, grid detection and vocab session store
New modules:
- tesseract_vocab_extractor.py: Bounding-box OCR with multi-PSM pipeline
- grid_detection_service.py: CV-based grid/table detection for worksheets
- vocab_session_store.py: PostgreSQL persistence for vocab sessions
- trocr_api.py: TrOCR handwriting recognition endpoint
- dsfa_rag_api.py + dsfa_corpus_ingestion.py: DSFA RAG corpus search

Changes:
- Dockerfile: Install tesseract-ocr + deu/eng language packs
- requirements.txt: Add PyMuPDF, pytesseract, Pillow
- main.py: Register new routers, init DB pools + Qdrant collections

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 00:00:19 +01:00

191 lines
6.0 KiB
Python

"""
Klausur-Service - Abitur/Vorabitur Klausurkorrektur Microservice
Eigenstaendiger Service fuer:
- Klausurverwaltung (Abitur/Vorabitur)
- OCR-Verarbeitung handschriftlicher Arbeiten
- KI-gestuetzte Bewertung
- Gutachten-Generierung
- 15-Punkte-Notensystem
- BYOEH (Bring-Your-Own-Expectation-Horizon)
This is the main entry point. All functionality is organized in modular packages:
- models/: Data models and Pydantic schemas
- routes/: API endpoint handlers
- services/: Business logic
- storage.py: In-memory data storage
- config.py: Configuration constants
"""
import os
from contextlib import asynccontextmanager
import asyncpg
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
# Configuration
from config import EH_UPLOAD_DIR, FRONTEND_PATH
# Routes
from routes import api_router
# External module routers (already modular)
from admin_api import router as admin_router
from zeugnis_api import router as zeugnis_router
from training_api import router as training_router
from mail.api import router as mail_router
try:
from trocr_api import router as trocr_router
except ImportError:
trocr_router = None
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
try:
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
except ImportError:
dsfa_rag_router = None
set_dsfa_db_pool = None
DSFAQdrantService = None
DSFA_DATABASE_URL = None
# BYOEH Qdrant initialization
from qdrant_service import init_qdrant_collection
# =============================================
# APP SETUP
# =============================================
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager for startup and shutdown events."""
print("Klausur-Service starting...")
# Initialize database pool for Vocab Sessions
vocab_db_pool = None
try:
vocab_db_pool = await asyncpg.create_pool(VOCAB_DATABASE_URL, min_size=2, max_size=5)
set_vocab_db_pool(vocab_db_pool)
await _init_vocab_table()
await _load_all_sessions()
print(f"Vocab sessions database initialized")
except Exception as e:
print(f"Warning: Vocab sessions database initialization failed: {e}")
# Initialize database pool for DSFA RAG
dsfa_db_pool = None
if DSFA_DATABASE_URL and set_dsfa_db_pool:
try:
dsfa_db_pool = await asyncpg.create_pool(DSFA_DATABASE_URL, min_size=2, max_size=10)
set_dsfa_db_pool(dsfa_db_pool)
print(f"DSFA database pool initialized: {DSFA_DATABASE_URL}")
except Exception as e:
print(f"Warning: DSFA database pool initialization failed: {e}")
# Initialize Qdrant collection for BYOEH
try:
await init_qdrant_collection()
print("Qdrant BYOEH collection initialized")
except Exception as e:
print(f"Warning: Qdrant BYOEH initialization failed: {e}")
# Initialize Qdrant collection for DSFA RAG
if DSFAQdrantService:
try:
dsfa_qdrant = DSFAQdrantService()
await dsfa_qdrant.ensure_collection()
print("Qdrant DSFA corpus collection initialized")
except Exception as e:
print(f"Warning: Qdrant DSFA initialization failed: {e}")
# Ensure EH upload directory exists
os.makedirs(EH_UPLOAD_DIR, exist_ok=True)
yield
print("Klausur-Service shutting down...")
# Close Vocab sessions database pool
if vocab_db_pool:
await vocab_db_pool.close()
print("Vocab sessions database pool closed")
# Close DSFA database pool
if dsfa_db_pool:
await dsfa_db_pool.close()
print("DSFA database pool closed")
app = FastAPI(
title="Klausur-Service",
description="Abitur/Vorabitur Klausurkorrektur Microservice",
version="1.0.0",
lifespan=lifespan
)
# CORS Middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# =============================================
# INCLUDE ROUTERS
# =============================================
# Main API routes (modular)
app.include_router(api_router)
# External module routers
app.include_router(admin_router) # NiBiS Ingestion
app.include_router(zeugnis_router) # Zeugnis Rights-Aware Crawler
app.include_router(training_router) # Training Management
app.include_router(mail_router) # Unified Inbox Mail
if trocr_router:
app.include_router(trocr_router) # TrOCR Handwriting OCR
app.include_router(vocab_router) # Vocabulary Worksheet Generator
if dsfa_rag_router:
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
# =============================================
# HEALTH CHECK
# =============================================
@app.get("/health")
async def health():
"""Health check endpoint."""
return {"status": "healthy", "service": "klausur-service"}
# =============================================
# SERVE FRONTEND
# =============================================
if os.path.exists(FRONTEND_PATH):
app.mount("/assets", StaticFiles(directory=f"{FRONTEND_PATH}/assets"), name="assets")
@app.get("/")
async def serve_frontend():
"""Serve the React frontend."""
return FileResponse(f"{FRONTEND_PATH}/index.html")
@app.get("/{path:path}")
async def serve_frontend_routes(path: str):
"""Serve index.html for all non-API routes (SPA routing)."""
if not path.startswith("api/") and not path.startswith("health"):
return FileResponse(f"{FRONTEND_PATH}/index.html")
from fastapi import HTTPException
raise HTTPException(status_code=404)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8086)