New modules: - tesseract_vocab_extractor.py: Bounding-box OCR with multi-PSM pipeline - grid_detection_service.py: CV-based grid/table detection for worksheets - vocab_session_store.py: PostgreSQL persistence for vocab sessions - trocr_api.py: TrOCR handwriting recognition endpoint - dsfa_rag_api.py + dsfa_corpus_ingestion.py: DSFA RAG corpus search Changes: - Dockerfile: Install tesseract-ocr + deu/eng language packs - requirements.txt: Add PyMuPDF, pytesseract, Pillow - main.py: Register new routers, init DB pools + Qdrant collections Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
191 lines
6.0 KiB
Python
191 lines
6.0 KiB
Python
"""
|
|
Klausur-Service - Abitur/Vorabitur Klausurkorrektur Microservice
|
|
|
|
Eigenstaendiger Service fuer:
|
|
- Klausurverwaltung (Abitur/Vorabitur)
|
|
- OCR-Verarbeitung handschriftlicher Arbeiten
|
|
- KI-gestuetzte Bewertung
|
|
- Gutachten-Generierung
|
|
- 15-Punkte-Notensystem
|
|
- BYOEH (Bring-Your-Own-Expectation-Horizon)
|
|
|
|
This is the main entry point. All functionality is organized in modular packages:
|
|
- models/: Data models and Pydantic schemas
|
|
- routes/: API endpoint handlers
|
|
- services/: Business logic
|
|
- storage.py: In-memory data storage
|
|
- config.py: Configuration constants
|
|
"""
|
|
|
|
import os
|
|
from contextlib import asynccontextmanager
|
|
|
|
import asyncpg
|
|
from fastapi import FastAPI
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.staticfiles import StaticFiles
|
|
from fastapi.responses import FileResponse
|
|
|
|
# Configuration
|
|
from config import EH_UPLOAD_DIR, FRONTEND_PATH
|
|
|
|
# Routes
|
|
from routes import api_router
|
|
|
|
# External module routers (already modular)
|
|
from admin_api import router as admin_router
|
|
from zeugnis_api import router as zeugnis_router
|
|
from training_api import router as training_router
|
|
from mail.api import router as mail_router
|
|
try:
|
|
from trocr_api import router as trocr_router
|
|
except ImportError:
|
|
trocr_router = None
|
|
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
|
|
try:
|
|
from dsfa_rag_api import router as dsfa_rag_router, set_db_pool as set_dsfa_db_pool
|
|
from dsfa_corpus_ingestion import DSFAQdrantService, DATABASE_URL as DSFA_DATABASE_URL
|
|
except ImportError:
|
|
dsfa_rag_router = None
|
|
set_dsfa_db_pool = None
|
|
DSFAQdrantService = None
|
|
DSFA_DATABASE_URL = None
|
|
|
|
# BYOEH Qdrant initialization
|
|
from qdrant_service import init_qdrant_collection
|
|
|
|
|
|
# =============================================
|
|
# APP SETUP
|
|
# =============================================
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Application lifespan manager for startup and shutdown events."""
|
|
print("Klausur-Service starting...")
|
|
|
|
# Initialize database pool for Vocab Sessions
|
|
vocab_db_pool = None
|
|
try:
|
|
vocab_db_pool = await asyncpg.create_pool(VOCAB_DATABASE_URL, min_size=2, max_size=5)
|
|
set_vocab_db_pool(vocab_db_pool)
|
|
await _init_vocab_table()
|
|
await _load_all_sessions()
|
|
print(f"Vocab sessions database initialized")
|
|
except Exception as e:
|
|
print(f"Warning: Vocab sessions database initialization failed: {e}")
|
|
|
|
# Initialize database pool for DSFA RAG
|
|
dsfa_db_pool = None
|
|
if DSFA_DATABASE_URL and set_dsfa_db_pool:
|
|
try:
|
|
dsfa_db_pool = await asyncpg.create_pool(DSFA_DATABASE_URL, min_size=2, max_size=10)
|
|
set_dsfa_db_pool(dsfa_db_pool)
|
|
print(f"DSFA database pool initialized: {DSFA_DATABASE_URL}")
|
|
except Exception as e:
|
|
print(f"Warning: DSFA database pool initialization failed: {e}")
|
|
|
|
# Initialize Qdrant collection for BYOEH
|
|
try:
|
|
await init_qdrant_collection()
|
|
print("Qdrant BYOEH collection initialized")
|
|
except Exception as e:
|
|
print(f"Warning: Qdrant BYOEH initialization failed: {e}")
|
|
|
|
# Initialize Qdrant collection for DSFA RAG
|
|
if DSFAQdrantService:
|
|
try:
|
|
dsfa_qdrant = DSFAQdrantService()
|
|
await dsfa_qdrant.ensure_collection()
|
|
print("Qdrant DSFA corpus collection initialized")
|
|
except Exception as e:
|
|
print(f"Warning: Qdrant DSFA initialization failed: {e}")
|
|
|
|
# Ensure EH upload directory exists
|
|
os.makedirs(EH_UPLOAD_DIR, exist_ok=True)
|
|
|
|
yield
|
|
|
|
print("Klausur-Service shutting down...")
|
|
|
|
# Close Vocab sessions database pool
|
|
if vocab_db_pool:
|
|
await vocab_db_pool.close()
|
|
print("Vocab sessions database pool closed")
|
|
|
|
# Close DSFA database pool
|
|
if dsfa_db_pool:
|
|
await dsfa_db_pool.close()
|
|
print("DSFA database pool closed")
|
|
|
|
|
|
app = FastAPI(
|
|
title="Klausur-Service",
|
|
description="Abitur/Vorabitur Klausurkorrektur Microservice",
|
|
version="1.0.0",
|
|
lifespan=lifespan
|
|
)
|
|
|
|
# CORS Middleware
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# =============================================
|
|
# INCLUDE ROUTERS
|
|
# =============================================
|
|
|
|
# Main API routes (modular)
|
|
app.include_router(api_router)
|
|
|
|
# External module routers
|
|
app.include_router(admin_router) # NiBiS Ingestion
|
|
app.include_router(zeugnis_router) # Zeugnis Rights-Aware Crawler
|
|
app.include_router(training_router) # Training Management
|
|
app.include_router(mail_router) # Unified Inbox Mail
|
|
if trocr_router:
|
|
app.include_router(trocr_router) # TrOCR Handwriting OCR
|
|
app.include_router(vocab_router) # Vocabulary Worksheet Generator
|
|
if dsfa_rag_router:
|
|
app.include_router(dsfa_rag_router) # DSFA RAG Corpus Search
|
|
|
|
|
|
# =============================================
|
|
# HEALTH CHECK
|
|
# =============================================
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
"""Health check endpoint."""
|
|
return {"status": "healthy", "service": "klausur-service"}
|
|
|
|
|
|
# =============================================
|
|
# SERVE FRONTEND
|
|
# =============================================
|
|
|
|
if os.path.exists(FRONTEND_PATH):
|
|
app.mount("/assets", StaticFiles(directory=f"{FRONTEND_PATH}/assets"), name="assets")
|
|
|
|
@app.get("/")
|
|
async def serve_frontend():
|
|
"""Serve the React frontend."""
|
|
return FileResponse(f"{FRONTEND_PATH}/index.html")
|
|
|
|
@app.get("/{path:path}")
|
|
async def serve_frontend_routes(path: str):
|
|
"""Serve index.html for all non-API routes (SPA routing)."""
|
|
if not path.startswith("api/") and not path.startswith("health"):
|
|
return FileResponse(f"{FRONTEND_PATH}/index.html")
|
|
from fastapi import HTTPException
|
|
raise HTTPException(status_code=404)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(app, host="0.0.0.0", port=8086)
|