[split-required] Split final batch of monoliths >1000 LOC
Python (6 files in klausur-service): - rbac.py (1,132 → 4), admin_api.py (1,012 → 4) - routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5) Python (2 files in backend-lehrer): - unit_api.py (1,226 → 6), game_api.py (1,129 → 5) Website (6 page files): - 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components in website/components/klausur-korrektur/ (17 shared files) - companion (1,057 → 10), magic-help (1,017 → 8) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
281
klausur-service/backend/admin_rag.py
Normal file
281
klausur-service/backend/admin_rag.py
Normal file
@@ -0,0 +1,281 @@
|
||||
"""
|
||||
Admin API - RAG Upload & Metrics
|
||||
|
||||
Endpoints for uploading documents, tracking uploads, RAG metrics,
|
||||
search feedback, storage stats, and service initialization.
|
||||
Extracted from admin_api.py for file-size compliance.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query, UploadFile, File, Form
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import zipfile
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
from nibis_ingestion import run_ingestion, DOCS_BASE_PATH
|
||||
|
||||
# Import ingestion status from nibis module for auto-ingest
|
||||
from admin_nibis import _ingestion_status
|
||||
|
||||
# Optional: MinIO and PostgreSQL integrations
|
||||
try:
|
||||
from minio_storage import upload_rag_document, get_storage_stats, init_minio_bucket
|
||||
MINIO_AVAILABLE = True
|
||||
except ImportError:
|
||||
MINIO_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from metrics_db import (
|
||||
init_metrics_tables, store_feedback, log_search, log_upload,
|
||||
calculate_metrics, get_recent_feedback, get_upload_history
|
||||
)
|
||||
METRICS_DB_AVAILABLE = True
|
||||
except ImportError:
|
||||
METRICS_DB_AVAILABLE = False
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
|
||||
|
||||
# Upload directory configuration
|
||||
RAG_UPLOAD_BASE = Path(os.getenv("RAG_UPLOAD_BASE", str(DOCS_BASE_PATH)))
|
||||
|
||||
# Store for upload tracking
|
||||
_upload_history: List[Dict] = []
|
||||
|
||||
|
||||
class UploadResult(BaseModel):
|
||||
status: str
|
||||
files_received: int
|
||||
pdfs_extracted: int
|
||||
target_directory: str
|
||||
errors: List[str]
|
||||
|
||||
|
||||
@router.post("/rag/upload", response_model=UploadResult)
|
||||
async def upload_rag_documents(
|
||||
background_tasks: BackgroundTasks,
|
||||
file: UploadFile = File(...),
|
||||
collection: str = Form(default="bp_nibis_eh"),
|
||||
year: Optional[int] = Form(default=None),
|
||||
auto_ingest: bool = Form(default=False),
|
||||
):
|
||||
"""
|
||||
Upload documents for RAG indexing.
|
||||
|
||||
Supports:
|
||||
- ZIP archives (automatically extracted)
|
||||
- Individual PDF files
|
||||
"""
|
||||
errors = []
|
||||
pdfs_extracted = 0
|
||||
|
||||
# Determine target year
|
||||
target_year = year or datetime.now().year
|
||||
|
||||
# Target directory: za-download/YYYY/
|
||||
target_dir = RAG_UPLOAD_BASE / "za-download" / str(target_year)
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
filename = file.filename or "upload"
|
||||
|
||||
if filename.lower().endswith(".zip"):
|
||||
# Handle ZIP file
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
|
||||
content = await file.read()
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(tmp_path, 'r') as zf:
|
||||
for member in zf.namelist():
|
||||
if member.lower().endswith(".pdf") and not member.startswith("__MACOSX"):
|
||||
pdf_name = Path(member).name
|
||||
if pdf_name:
|
||||
target_path = target_dir / pdf_name
|
||||
with zf.open(member) as src:
|
||||
with open(target_path, 'wb') as dst:
|
||||
dst.write(src.read())
|
||||
pdfs_extracted += 1
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
elif filename.lower().endswith(".pdf"):
|
||||
target_path = target_dir / filename
|
||||
content = await file.read()
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(content)
|
||||
pdfs_extracted = 1
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported file type: {filename}. Only .zip and .pdf are allowed."
|
||||
)
|
||||
|
||||
# Track upload in memory
|
||||
upload_record = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"filename": filename,
|
||||
"collection": collection,
|
||||
"year": target_year,
|
||||
"pdfs_extracted": pdfs_extracted,
|
||||
"target_directory": str(target_dir),
|
||||
}
|
||||
_upload_history.append(upload_record)
|
||||
|
||||
# Keep only last 100 uploads in memory
|
||||
if len(_upload_history) > 100:
|
||||
_upload_history.pop(0)
|
||||
|
||||
# Store in PostgreSQL if available
|
||||
if METRICS_DB_AVAILABLE:
|
||||
await log_upload(
|
||||
filename=filename,
|
||||
collection_name=collection,
|
||||
year=target_year,
|
||||
pdfs_extracted=pdfs_extracted,
|
||||
minio_path=str(target_dir),
|
||||
)
|
||||
|
||||
# Auto-ingest if requested
|
||||
if auto_ingest and not _ingestion_status["running"]:
|
||||
async def run_auto_ingest():
|
||||
global _ingestion_status
|
||||
_ingestion_status["running"] = True
|
||||
_ingestion_status["last_run"] = datetime.now().isoformat()
|
||||
|
||||
try:
|
||||
result = await run_ingestion(
|
||||
ewh_only=True,
|
||||
dry_run=False,
|
||||
year_filter=target_year,
|
||||
)
|
||||
_ingestion_status["last_result"] = result
|
||||
except Exception as e:
|
||||
_ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
|
||||
finally:
|
||||
_ingestion_status["running"] = False
|
||||
|
||||
background_tasks.add_task(run_auto_ingest)
|
||||
|
||||
return UploadResult(
|
||||
status="success",
|
||||
files_received=1,
|
||||
pdfs_extracted=pdfs_extracted,
|
||||
target_directory=str(target_dir),
|
||||
errors=errors,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
errors.append(str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/rag/upload/history")
|
||||
async def get_upload_history_endpoint(limit: int = Query(default=20, le=100)):
|
||||
"""Get recent upload history."""
|
||||
return {
|
||||
"uploads": _upload_history[-limit:][::-1],
|
||||
"total": len(_upload_history),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/rag/metrics")
|
||||
async def get_rag_metrics(
|
||||
collection: Optional[str] = Query(default=None),
|
||||
days: int = Query(default=7, le=90),
|
||||
):
|
||||
"""Get RAG quality metrics."""
|
||||
if METRICS_DB_AVAILABLE:
|
||||
metrics = await calculate_metrics(collection_name=collection, days=days)
|
||||
if metrics.get("connected"):
|
||||
return metrics
|
||||
|
||||
# Fallback: Return placeholder metrics
|
||||
return {
|
||||
"precision_at_5": 0.78,
|
||||
"recall_at_10": 0.85,
|
||||
"mrr": 0.72,
|
||||
"avg_latency_ms": 52,
|
||||
"total_ratings": len(_upload_history),
|
||||
"error_rate": 0.3,
|
||||
"score_distribution": {
|
||||
"0.9+": 23,
|
||||
"0.7-0.9": 41,
|
||||
"0.5-0.7": 28,
|
||||
"<0.5": 8,
|
||||
},
|
||||
"note": "Placeholder metrics - PostgreSQL not connected",
|
||||
"connected": False,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/rag/search/feedback")
|
||||
async def submit_search_feedback(
|
||||
result_id: str = Form(...),
|
||||
rating: int = Form(..., ge=1, le=5),
|
||||
notes: Optional[str] = Form(default=None),
|
||||
query: Optional[str] = Form(default=None),
|
||||
collection: Optional[str] = Form(default=None),
|
||||
score: Optional[float] = Form(default=None),
|
||||
):
|
||||
"""Submit feedback for a search result."""
|
||||
feedback_record = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"result_id": result_id,
|
||||
"rating": rating,
|
||||
"notes": notes,
|
||||
}
|
||||
|
||||
stored = False
|
||||
if METRICS_DB_AVAILABLE:
|
||||
stored = await store_feedback(
|
||||
result_id=result_id,
|
||||
rating=rating,
|
||||
query_text=query,
|
||||
collection_name=collection,
|
||||
score=score,
|
||||
notes=notes,
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "stored" if stored else "received",
|
||||
"feedback": feedback_record,
|
||||
"persisted": stored,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/rag/storage/stats")
|
||||
async def get_storage_statistics():
|
||||
"""Get MinIO storage statistics."""
|
||||
if MINIO_AVAILABLE:
|
||||
stats = await get_storage_stats()
|
||||
return stats
|
||||
return {
|
||||
"error": "MinIO not available",
|
||||
"connected": False,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/rag/init")
|
||||
async def initialize_rag_services():
|
||||
"""Initialize RAG services (MinIO bucket, PostgreSQL tables)."""
|
||||
results = {
|
||||
"minio": False,
|
||||
"postgres": False,
|
||||
}
|
||||
|
||||
if MINIO_AVAILABLE:
|
||||
results["minio"] = await init_minio_bucket()
|
||||
|
||||
if METRICS_DB_AVAILABLE:
|
||||
results["postgres"] = await init_metrics_tables()
|
||||
|
||||
return {
|
||||
"status": "initialized",
|
||||
"services": results,
|
||||
}
|
||||
Reference in New Issue
Block a user