[split-required] Split final batch of monoliths >1000 LOC
Python (6 files in klausur-service): - rbac.py (1,132 → 4), admin_api.py (1,012 → 4) - routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5) Python (2 files in backend-lehrer): - unit_api.py (1,226 → 6), game_api.py (1,129 → 5) Website (6 page files): - 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components in website/components/klausur-korrektur/ (17 shared files) - companion (1,057 → 10), magic-help (1,017 → 8) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
316
klausur-service/backend/admin_nibis.py
Normal file
316
klausur-service/backend/admin_nibis.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Admin API - NiBiS Ingestion & Search
|
||||
|
||||
Endpoints for NiBiS data discovery, ingestion, search, and statistics.
|
||||
Extracted from admin_api.py for file-size compliance.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict
|
||||
from datetime import datetime
|
||||
|
||||
from nibis_ingestion import (
|
||||
run_ingestion,
|
||||
discover_documents,
|
||||
extract_zip_files,
|
||||
DOCS_BASE_PATH,
|
||||
)
|
||||
from qdrant_service import QdrantService, search_nibis_eh, get_qdrant_client
|
||||
from eh_pipeline import generate_single_embedding
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
|
||||
|
||||
# Store for background task status
|
||||
_ingestion_status: Dict = {
|
||||
"running": False,
|
||||
"last_run": None,
|
||||
"last_result": None,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Models
|
||||
# =============================================================================
|
||||
|
||||
class IngestionRequest(BaseModel):
|
||||
ewh_only: bool = True
|
||||
year_filter: Optional[int] = None
|
||||
subject_filter: Optional[str] = None
|
||||
|
||||
|
||||
class IngestionStatus(BaseModel):
|
||||
running: bool
|
||||
last_run: Optional[str]
|
||||
documents_indexed: Optional[int]
|
||||
chunks_created: Optional[int]
|
||||
errors: Optional[List[str]]
|
||||
|
||||
|
||||
class NiBiSSearchRequest(BaseModel):
|
||||
query: str
|
||||
year: Optional[int] = None
|
||||
subject: Optional[str] = None
|
||||
niveau: Optional[str] = None
|
||||
limit: int = 5
|
||||
|
||||
|
||||
class NiBiSSearchResult(BaseModel):
|
||||
id: str
|
||||
score: float
|
||||
text: str
|
||||
year: Optional[int]
|
||||
subject: Optional[str]
|
||||
niveau: Optional[str]
|
||||
task_number: Optional[int]
|
||||
|
||||
|
||||
class DataSourceStats(BaseModel):
|
||||
source_dir: str
|
||||
year: int
|
||||
document_count: int
|
||||
subjects: List[str]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/nibis/status", response_model=IngestionStatus)
|
||||
async def get_ingestion_status():
|
||||
"""Get status of NiBiS ingestion pipeline."""
|
||||
last_result = _ingestion_status.get("last_result") or {}
|
||||
return IngestionStatus(
|
||||
running=_ingestion_status["running"],
|
||||
last_run=_ingestion_status.get("last_run"),
|
||||
documents_indexed=last_result.get("documents_indexed"),
|
||||
chunks_created=last_result.get("chunks_created"),
|
||||
errors=(last_result.get("errors") or [])[:10],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/nibis/extract-zips")
|
||||
async def extract_zip_files_endpoint():
|
||||
"""Extract all ZIP files in za-download directories."""
|
||||
try:
|
||||
extracted = extract_zip_files(DOCS_BASE_PATH)
|
||||
return {
|
||||
"status": "success",
|
||||
"extracted_count": len(extracted),
|
||||
"directories": [str(d) for d in extracted],
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/nibis/discover")
|
||||
async def discover_nibis_documents(
|
||||
ewh_only: bool = Query(True, description="Only return Erwartungshorizonte"),
|
||||
year: Optional[int] = Query(None, description="Filter by year"),
|
||||
subject: Optional[str] = Query(None, description="Filter by subject"),
|
||||
):
|
||||
"""
|
||||
Discover available NiBiS documents without indexing.
|
||||
Useful for previewing what will be indexed.
|
||||
"""
|
||||
try:
|
||||
documents = discover_documents(DOCS_BASE_PATH, ewh_only=ewh_only)
|
||||
|
||||
# Apply filters
|
||||
if year:
|
||||
documents = [d for d in documents if d.year == year]
|
||||
if subject:
|
||||
documents = [d for d in documents if subject.lower() in d.subject.lower()]
|
||||
|
||||
# Group by year and subject
|
||||
by_year: Dict[int, int] = {}
|
||||
by_subject: Dict[str, int] = {}
|
||||
for doc in documents:
|
||||
by_year[doc.year] = by_year.get(doc.year, 0) + 1
|
||||
by_subject[doc.subject] = by_subject.get(doc.subject, 0) + 1
|
||||
|
||||
return {
|
||||
"total_documents": len(documents),
|
||||
"by_year": dict(sorted(by_year.items())),
|
||||
"by_subject": dict(sorted(by_subject.items(), key=lambda x: -x[1])),
|
||||
"sample_documents": [
|
||||
{
|
||||
"id": d.id,
|
||||
"filename": d.raw_filename,
|
||||
"year": d.year,
|
||||
"subject": d.subject,
|
||||
"niveau": d.niveau,
|
||||
"doc_type": d.doc_type,
|
||||
}
|
||||
for d in documents[:20]
|
||||
],
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/nibis/ingest")
|
||||
async def start_ingestion(
|
||||
request: IngestionRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
):
|
||||
"""
|
||||
Start NiBiS data ingestion in background.
|
||||
"""
|
||||
if _ingestion_status["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Ingestion already running. Check /nibis/status for progress."
|
||||
)
|
||||
|
||||
async def run_ingestion_task():
|
||||
global _ingestion_status
|
||||
_ingestion_status["running"] = True
|
||||
_ingestion_status["last_run"] = datetime.now().isoformat()
|
||||
|
||||
try:
|
||||
result = await run_ingestion(
|
||||
ewh_only=request.ewh_only,
|
||||
dry_run=False,
|
||||
year_filter=request.year_filter,
|
||||
subject_filter=request.subject_filter,
|
||||
)
|
||||
_ingestion_status["last_result"] = result
|
||||
except Exception as e:
|
||||
_ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
|
||||
finally:
|
||||
_ingestion_status["running"] = False
|
||||
|
||||
background_tasks.add_task(run_ingestion_task)
|
||||
|
||||
return {
|
||||
"status": "started",
|
||||
"message": "Ingestion started in background. Check /nibis/status for progress.",
|
||||
"filters": {
|
||||
"ewh_only": request.ewh_only,
|
||||
"year": request.year_filter,
|
||||
"subject": request.subject_filter,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@router.post("/nibis/search", response_model=List[NiBiSSearchResult])
|
||||
async def search_nibis(request: NiBiSSearchRequest):
|
||||
"""
|
||||
Semantic search in NiBiS Erwartungshorizonte.
|
||||
"""
|
||||
try:
|
||||
query_embedding = await generate_single_embedding(request.query)
|
||||
|
||||
if not query_embedding:
|
||||
raise HTTPException(status_code=500, detail="Failed to generate embedding")
|
||||
|
||||
results = await search_nibis_eh(
|
||||
query_embedding=query_embedding,
|
||||
year=request.year,
|
||||
subject=request.subject,
|
||||
niveau=request.niveau,
|
||||
limit=request.limit,
|
||||
)
|
||||
|
||||
return [
|
||||
NiBiSSearchResult(
|
||||
id=r["id"],
|
||||
score=r["score"],
|
||||
text=r.get("text", "")[:500],
|
||||
year=r.get("year"),
|
||||
subject=r.get("subject"),
|
||||
niveau=r.get("niveau"),
|
||||
task_number=r.get("task_number"),
|
||||
)
|
||||
for r in results
|
||||
]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/nibis/collections")
|
||||
async def get_collections_info():
|
||||
"""Get information about all Qdrant collections."""
|
||||
try:
|
||||
client = get_qdrant_client()
|
||||
collections = client.get_collections().collections
|
||||
|
||||
result = []
|
||||
for c in collections:
|
||||
try:
|
||||
info = client.get_collection(c.name)
|
||||
result.append({
|
||||
"name": c.name,
|
||||
"vectors_count": info.vectors_count,
|
||||
"points_count": info.points_count,
|
||||
"status": info.status.value,
|
||||
})
|
||||
except Exception as e:
|
||||
result.append({
|
||||
"name": c.name,
|
||||
"error": str(e),
|
||||
})
|
||||
|
||||
return {"collections": result}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/nibis/stats")
|
||||
async def get_nibis_stats():
|
||||
"""Get detailed statistics about indexed NiBiS data."""
|
||||
try:
|
||||
qdrant = QdrantService()
|
||||
stats = await qdrant.get_stats("bp_nibis_eh")
|
||||
|
||||
if "error" in stats:
|
||||
return {
|
||||
"indexed": False,
|
||||
"message": "NiBiS collection not yet created. Run ingestion first.",
|
||||
}
|
||||
|
||||
client = get_qdrant_client()
|
||||
scroll_result = client.scroll(
|
||||
collection_name="bp_nibis_eh",
|
||||
limit=1000,
|
||||
with_payload=True,
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
years = set()
|
||||
subjects = set()
|
||||
niveaus = set()
|
||||
|
||||
for point in scroll_result[0]:
|
||||
if point.payload:
|
||||
if "year" in point.payload:
|
||||
years.add(point.payload["year"])
|
||||
if "subject" in point.payload:
|
||||
subjects.add(point.payload["subject"])
|
||||
if "niveau" in point.payload:
|
||||
niveaus.add(point.payload["niveau"])
|
||||
|
||||
return {
|
||||
"indexed": True,
|
||||
"total_chunks": stats.get("points_count", 0),
|
||||
"years": sorted(list(years)),
|
||||
"subjects": sorted(list(subjects)),
|
||||
"niveaus": sorted(list(niveaus)),
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"indexed": False,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/nibis/collection")
|
||||
async def delete_nibis_collection():
|
||||
"""Delete the entire NiBiS collection. WARNING: removes all indexed data!"""
|
||||
try:
|
||||
client = get_qdrant_client()
|
||||
client.delete_collection("bp_nibis_eh")
|
||||
return {"status": "deleted", "collection": "bp_nibis_eh"}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user