Python (6 files in klausur-service): - rbac.py (1,132 → 4), admin_api.py (1,012 → 4) - routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5) Python (2 files in backend-lehrer): - unit_api.py (1,226 → 6), game_api.py (1,129 → 5) Website (6 page files): - 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components in website/components/klausur-korrektur/ (17 shared files) - companion (1,057 → 10), magic-help (1,017 → 8) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
282 lines
8.4 KiB
Python
282 lines
8.4 KiB
Python
"""
|
|
Admin API - RAG Upload & Metrics
|
|
|
|
Endpoints for uploading documents, tracking uploads, RAG metrics,
|
|
search feedback, storage stats, and service initialization.
|
|
Extracted from admin_api.py for file-size compliance.
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query, UploadFile, File, Form
|
|
from pydantic import BaseModel
|
|
from typing import Optional, List, Dict
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
import zipfile
|
|
import tempfile
|
|
import os
|
|
|
|
from nibis_ingestion import run_ingestion, DOCS_BASE_PATH
|
|
|
|
# Import ingestion status from nibis module for auto-ingest
|
|
from admin_nibis import _ingestion_status
|
|
|
|
# Optional: MinIO and PostgreSQL integrations
|
|
try:
|
|
from minio_storage import upload_rag_document, get_storage_stats, init_minio_bucket
|
|
MINIO_AVAILABLE = True
|
|
except ImportError:
|
|
MINIO_AVAILABLE = False
|
|
|
|
try:
|
|
from metrics_db import (
|
|
init_metrics_tables, store_feedback, log_search, log_upload,
|
|
calculate_metrics, get_recent_feedback, get_upload_history
|
|
)
|
|
METRICS_DB_AVAILABLE = True
|
|
except ImportError:
|
|
METRICS_DB_AVAILABLE = False
|
|
|
|
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
|
|
|
|
# Upload directory configuration
|
|
RAG_UPLOAD_BASE = Path(os.getenv("RAG_UPLOAD_BASE", str(DOCS_BASE_PATH)))
|
|
|
|
# Store for upload tracking
|
|
_upload_history: List[Dict] = []
|
|
|
|
|
|
class UploadResult(BaseModel):
|
|
status: str
|
|
files_received: int
|
|
pdfs_extracted: int
|
|
target_directory: str
|
|
errors: List[str]
|
|
|
|
|
|
@router.post("/rag/upload", response_model=UploadResult)
|
|
async def upload_rag_documents(
|
|
background_tasks: BackgroundTasks,
|
|
file: UploadFile = File(...),
|
|
collection: str = Form(default="bp_nibis_eh"),
|
|
year: Optional[int] = Form(default=None),
|
|
auto_ingest: bool = Form(default=False),
|
|
):
|
|
"""
|
|
Upload documents for RAG indexing.
|
|
|
|
Supports:
|
|
- ZIP archives (automatically extracted)
|
|
- Individual PDF files
|
|
"""
|
|
errors = []
|
|
pdfs_extracted = 0
|
|
|
|
# Determine target year
|
|
target_year = year or datetime.now().year
|
|
|
|
# Target directory: za-download/YYYY/
|
|
target_dir = RAG_UPLOAD_BASE / "za-download" / str(target_year)
|
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
try:
|
|
filename = file.filename or "upload"
|
|
|
|
if filename.lower().endswith(".zip"):
|
|
# Handle ZIP file
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
|
|
content = await file.read()
|
|
tmp.write(content)
|
|
tmp_path = tmp.name
|
|
|
|
try:
|
|
with zipfile.ZipFile(tmp_path, 'r') as zf:
|
|
for member in zf.namelist():
|
|
if member.lower().endswith(".pdf") and not member.startswith("__MACOSX"):
|
|
pdf_name = Path(member).name
|
|
if pdf_name:
|
|
target_path = target_dir / pdf_name
|
|
with zf.open(member) as src:
|
|
with open(target_path, 'wb') as dst:
|
|
dst.write(src.read())
|
|
pdfs_extracted += 1
|
|
finally:
|
|
os.unlink(tmp_path)
|
|
|
|
elif filename.lower().endswith(".pdf"):
|
|
target_path = target_dir / filename
|
|
content = await file.read()
|
|
with open(target_path, 'wb') as f:
|
|
f.write(content)
|
|
pdfs_extracted = 1
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unsupported file type: {filename}. Only .zip and .pdf are allowed."
|
|
)
|
|
|
|
# Track upload in memory
|
|
upload_record = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"filename": filename,
|
|
"collection": collection,
|
|
"year": target_year,
|
|
"pdfs_extracted": pdfs_extracted,
|
|
"target_directory": str(target_dir),
|
|
}
|
|
_upload_history.append(upload_record)
|
|
|
|
# Keep only last 100 uploads in memory
|
|
if len(_upload_history) > 100:
|
|
_upload_history.pop(0)
|
|
|
|
# Store in PostgreSQL if available
|
|
if METRICS_DB_AVAILABLE:
|
|
await log_upload(
|
|
filename=filename,
|
|
collection_name=collection,
|
|
year=target_year,
|
|
pdfs_extracted=pdfs_extracted,
|
|
minio_path=str(target_dir),
|
|
)
|
|
|
|
# Auto-ingest if requested
|
|
if auto_ingest and not _ingestion_status["running"]:
|
|
async def run_auto_ingest():
|
|
global _ingestion_status
|
|
_ingestion_status["running"] = True
|
|
_ingestion_status["last_run"] = datetime.now().isoformat()
|
|
|
|
try:
|
|
result = await run_ingestion(
|
|
ewh_only=True,
|
|
dry_run=False,
|
|
year_filter=target_year,
|
|
)
|
|
_ingestion_status["last_result"] = result
|
|
except Exception as e:
|
|
_ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
|
|
finally:
|
|
_ingestion_status["running"] = False
|
|
|
|
background_tasks.add_task(run_auto_ingest)
|
|
|
|
return UploadResult(
|
|
status="success",
|
|
files_received=1,
|
|
pdfs_extracted=pdfs_extracted,
|
|
target_directory=str(target_dir),
|
|
errors=errors,
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
errors.append(str(e))
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/rag/upload/history")
|
|
async def get_upload_history_endpoint(limit: int = Query(default=20, le=100)):
|
|
"""Get recent upload history."""
|
|
return {
|
|
"uploads": _upload_history[-limit:][::-1],
|
|
"total": len(_upload_history),
|
|
}
|
|
|
|
|
|
@router.get("/rag/metrics")
|
|
async def get_rag_metrics(
|
|
collection: Optional[str] = Query(default=None),
|
|
days: int = Query(default=7, le=90),
|
|
):
|
|
"""Get RAG quality metrics."""
|
|
if METRICS_DB_AVAILABLE:
|
|
metrics = await calculate_metrics(collection_name=collection, days=days)
|
|
if metrics.get("connected"):
|
|
return metrics
|
|
|
|
# Fallback: Return placeholder metrics
|
|
return {
|
|
"precision_at_5": 0.78,
|
|
"recall_at_10": 0.85,
|
|
"mrr": 0.72,
|
|
"avg_latency_ms": 52,
|
|
"total_ratings": len(_upload_history),
|
|
"error_rate": 0.3,
|
|
"score_distribution": {
|
|
"0.9+": 23,
|
|
"0.7-0.9": 41,
|
|
"0.5-0.7": 28,
|
|
"<0.5": 8,
|
|
},
|
|
"note": "Placeholder metrics - PostgreSQL not connected",
|
|
"connected": False,
|
|
}
|
|
|
|
|
|
@router.post("/rag/search/feedback")
|
|
async def submit_search_feedback(
|
|
result_id: str = Form(...),
|
|
rating: int = Form(..., ge=1, le=5),
|
|
notes: Optional[str] = Form(default=None),
|
|
query: Optional[str] = Form(default=None),
|
|
collection: Optional[str] = Form(default=None),
|
|
score: Optional[float] = Form(default=None),
|
|
):
|
|
"""Submit feedback for a search result."""
|
|
feedback_record = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"result_id": result_id,
|
|
"rating": rating,
|
|
"notes": notes,
|
|
}
|
|
|
|
stored = False
|
|
if METRICS_DB_AVAILABLE:
|
|
stored = await store_feedback(
|
|
result_id=result_id,
|
|
rating=rating,
|
|
query_text=query,
|
|
collection_name=collection,
|
|
score=score,
|
|
notes=notes,
|
|
)
|
|
|
|
return {
|
|
"status": "stored" if stored else "received",
|
|
"feedback": feedback_record,
|
|
"persisted": stored,
|
|
}
|
|
|
|
|
|
@router.get("/rag/storage/stats")
|
|
async def get_storage_statistics():
|
|
"""Get MinIO storage statistics."""
|
|
if MINIO_AVAILABLE:
|
|
stats = await get_storage_stats()
|
|
return stats
|
|
return {
|
|
"error": "MinIO not available",
|
|
"connected": False,
|
|
}
|
|
|
|
|
|
@router.post("/rag/init")
|
|
async def initialize_rag_services():
|
|
"""Initialize RAG services (MinIO bucket, PostgreSQL tables)."""
|
|
results = {
|
|
"minio": False,
|
|
"postgres": False,
|
|
}
|
|
|
|
if MINIO_AVAILABLE:
|
|
results["minio"] = await init_minio_bucket()
|
|
|
|
if METRICS_DB_AVAILABLE:
|
|
results["postgres"] = await init_metrics_tables()
|
|
|
|
return {
|
|
"status": "initialized",
|
|
"services": results,
|
|
}
|