[split-required] Split final batch of monoliths >1000 LOC

Python (6 files in klausur-service): - rbac.py (1,132 → 4), admin_api.py (1,012 → 4) - routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5) Python (2 files in backend-lehrer): - unit_api.py (1,226 → 6), game_api.py (1,129 → 5) Website (6 page files): - 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components in website/components/klausur-korrektur/ (17 shared files) - companion (1,057 → 10), magic-help (1,017 → 8) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 23:17:30 +02:00
parent b2a0126f14
commit 6811264756
67 changed files with 12270 additions and 13651 deletions
--- a/klausur-service/backend/admin_nibis.py
+++ b/klausur-service/backend/admin_nibis.py
@@ -0,0 +1,316 @@
+"""
+Admin API - NiBiS Ingestion & Search
+
+Endpoints for NiBiS data discovery, ingestion, search, and statistics.
+Extracted from admin_api.py for file-size compliance.
+"""
+
+from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
+from pydantic import BaseModel
+from typing import Optional, List, Dict
+from datetime import datetime
+
+from nibis_ingestion import (
+    run_ingestion,
+    discover_documents,
+    extract_zip_files,
+    DOCS_BASE_PATH,
+)
+from qdrant_service import QdrantService, search_nibis_eh, get_qdrant_client
+from eh_pipeline import generate_single_embedding
+
+router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
+
+# Store for background task status
+_ingestion_status: Dict = {
+    "running": False,
+    "last_run": None,
+    "last_result": None,
+}
+
+
+# =============================================================================
+# Models
+# =============================================================================
+
+class IngestionRequest(BaseModel):
+    ewh_only: bool = True
+    year_filter: Optional[int] = None
+    subject_filter: Optional[str] = None
+
+
+class IngestionStatus(BaseModel):
+    running: bool
+    last_run: Optional[str]
+    documents_indexed: Optional[int]
+    chunks_created: Optional[int]
+    errors: Optional[List[str]]
+
+
+class NiBiSSearchRequest(BaseModel):
+    query: str
+    year: Optional[int] = None
+    subject: Optional[str] = None
+    niveau: Optional[str] = None
+    limit: int = 5
+
+
+class NiBiSSearchResult(BaseModel):
+    id: str
+    score: float
+    text: str
+    year: Optional[int]
+    subject: Optional[str]
+    niveau: Optional[str]
+    task_number: Optional[int]
+
+
+class DataSourceStats(BaseModel):
+    source_dir: str
+    year: int
+    document_count: int
+    subjects: List[str]
+
+
+# =============================================================================
+# Endpoints
+# =============================================================================
+
+@router.get("/nibis/status", response_model=IngestionStatus)
+async def get_ingestion_status():
+    """Get status of NiBiS ingestion pipeline."""
+    last_result = _ingestion_status.get("last_result") or {}
+    return IngestionStatus(
+        running=_ingestion_status["running"],
+        last_run=_ingestion_status.get("last_run"),
+        documents_indexed=last_result.get("documents_indexed"),
+        chunks_created=last_result.get("chunks_created"),
+        errors=(last_result.get("errors") or [])[:10],
+    )
+
+
+@router.post("/nibis/extract-zips")
+async def extract_zip_files_endpoint():
+    """Extract all ZIP files in za-download directories."""
+    try:
+        extracted = extract_zip_files(DOCS_BASE_PATH)
+        return {
+            "status": "success",
+            "extracted_count": len(extracted),
+            "directories": [str(d) for d in extracted],
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/nibis/discover")
+async def discover_nibis_documents(
+    ewh_only: bool = Query(True, description="Only return Erwartungshorizonte"),
+    year: Optional[int] = Query(None, description="Filter by year"),
+    subject: Optional[str] = Query(None, description="Filter by subject"),
+):
+    """
+    Discover available NiBiS documents without indexing.
+    Useful for previewing what will be indexed.
+    """
+    try:
+        documents = discover_documents(DOCS_BASE_PATH, ewh_only=ewh_only)
+
+        # Apply filters
+        if year:
+            documents = [d for d in documents if d.year == year]
+        if subject:
+            documents = [d for d in documents if subject.lower() in d.subject.lower()]
+
+        # Group by year and subject
+        by_year: Dict[int, int] = {}
+        by_subject: Dict[str, int] = {}
+        for doc in documents:
+            by_year[doc.year] = by_year.get(doc.year, 0) + 1
+            by_subject[doc.subject] = by_subject.get(doc.subject, 0) + 1
+
+        return {
+            "total_documents": len(documents),
+            "by_year": dict(sorted(by_year.items())),
+            "by_subject": dict(sorted(by_subject.items(), key=lambda x: -x[1])),
+            "sample_documents": [
+                {
+                    "id": d.id,
+                    "filename": d.raw_filename,
+                    "year": d.year,
+                    "subject": d.subject,
+                    "niveau": d.niveau,
+                    "doc_type": d.doc_type,
+                }
+                for d in documents[:20]
+            ],
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/nibis/ingest")
+async def start_ingestion(
+    request: IngestionRequest,
+    background_tasks: BackgroundTasks,
+):
+    """
+    Start NiBiS data ingestion in background.
+    """
+    if _ingestion_status["running"]:
+        raise HTTPException(
+            status_code=409,
+            detail="Ingestion already running. Check /nibis/status for progress."
+        )
+
+    async def run_ingestion_task():
+        global _ingestion_status
+        _ingestion_status["running"] = True
+        _ingestion_status["last_run"] = datetime.now().isoformat()
+
+        try:
+            result = await run_ingestion(
+                ewh_only=request.ewh_only,
+                dry_run=False,
+                year_filter=request.year_filter,
+                subject_filter=request.subject_filter,
+            )
+            _ingestion_status["last_result"] = result
+        except Exception as e:
+            _ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
+        finally:
+            _ingestion_status["running"] = False
+
+    background_tasks.add_task(run_ingestion_task)
+
+    return {
+        "status": "started",
+        "message": "Ingestion started in background. Check /nibis/status for progress.",
+        "filters": {
+            "ewh_only": request.ewh_only,
+            "year": request.year_filter,
+            "subject": request.subject_filter,
+        },
+    }
+
+
+@router.post("/nibis/search", response_model=List[NiBiSSearchResult])
+async def search_nibis(request: NiBiSSearchRequest):
+    """
+    Semantic search in NiBiS Erwartungshorizonte.
+    """
+    try:
+        query_embedding = await generate_single_embedding(request.query)
+
+        if not query_embedding:
+            raise HTTPException(status_code=500, detail="Failed to generate embedding")
+
+        results = await search_nibis_eh(
+            query_embedding=query_embedding,
+            year=request.year,
+            subject=request.subject,
+            niveau=request.niveau,
+            limit=request.limit,
+        )
+
+        return [
+            NiBiSSearchResult(
+                id=r["id"],
+                score=r["score"],
+                text=r.get("text", "")[:500],
+                year=r.get("year"),
+                subject=r.get("subject"),
+                niveau=r.get("niveau"),
+                task_number=r.get("task_number"),
+            )
+            for r in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/nibis/collections")
+async def get_collections_info():
+    """Get information about all Qdrant collections."""
+    try:
+        client = get_qdrant_client()
+        collections = client.get_collections().collections
+
+        result = []
+        for c in collections:
+            try:
+                info = client.get_collection(c.name)
+                result.append({
+                    "name": c.name,
+                    "vectors_count": info.vectors_count,
+                    "points_count": info.points_count,
+                    "status": info.status.value,
+                })
+            except Exception as e:
+                result.append({
+                    "name": c.name,
+                    "error": str(e),
+                })
+
+        return {"collections": result}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/nibis/stats")
+async def get_nibis_stats():
+    """Get detailed statistics about indexed NiBiS data."""
+    try:
+        qdrant = QdrantService()
+        stats = await qdrant.get_stats("bp_nibis_eh")
+
+        if "error" in stats:
+            return {
+                "indexed": False,
+                "message": "NiBiS collection not yet created. Run ingestion first.",
+            }
+
+        client = get_qdrant_client()
+        scroll_result = client.scroll(
+            collection_name="bp_nibis_eh",
+            limit=1000,
+            with_payload=True,
+            with_vectors=False,
+        )
+
+        years = set()
+        subjects = set()
+        niveaus = set()
+
+        for point in scroll_result[0]:
+            if point.payload:
+                if "year" in point.payload:
+                    years.add(point.payload["year"])
+                if "subject" in point.payload:
+                    subjects.add(point.payload["subject"])
+                if "niveau" in point.payload:
+                    niveaus.add(point.payload["niveau"])
+
+        return {
+            "indexed": True,
+            "total_chunks": stats.get("points_count", 0),
+            "years": sorted(list(years)),
+            "subjects": sorted(list(subjects)),
+            "niveaus": sorted(list(niveaus)),
+        }
+    except Exception as e:
+        return {
+            "indexed": False,
+            "error": str(e),
+        }
+
+
+@router.delete("/nibis/collection")
+async def delete_nibis_collection():
+    """Delete the entire NiBiS collection. WARNING: removes all indexed data!"""
+    try:
+        client = get_qdrant_client()
+        client.delete_collection("bp_nibis_eh")
+        return {"status": "deleted", "collection": "bp_nibis_eh"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))