[split-required] Split final batch of monoliths >1000 LOC

Python (6 files in klausur-service):
- rbac.py (1,132 → 4), admin_api.py (1,012 → 4)
- routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5)

Python (2 files in backend-lehrer):
- unit_api.py (1,226 → 6), game_api.py (1,129 → 5)

Website (6 page files):
- 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components
  in website/components/klausur-korrektur/ (17 shared files)
- companion (1,057 → 10), magic-help (1,017 → 8)

All re-export barrels preserve backward compatibility.
Zero import errors verified.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-24 23:17:30 +02:00
parent b2a0126f14
commit 6811264756
67 changed files with 12270 additions and 13651 deletions

View File

@@ -0,0 +1,455 @@
"""
BYOEH Upload, List, and Core CRUD Routes
Endpoints for uploading, listing, getting, deleting,
indexing, and RAG-querying Erwartungshorizonte.
Extracted from routes/eh.py for file-size compliance.
"""
import os
import uuid
import json
from datetime import datetime, timezone
from typing import Optional
from fastapi import APIRouter, HTTPException, Request, UploadFile, File, Form, BackgroundTasks
from models.enums import EHStatus
from models.eh import (
Erwartungshorizont,
EHRightsConfirmation,
)
from models.requests import (
EHUploadMetadata,
EHRAGQuery,
EHIndexRequest,
)
from services.auth_service import get_current_user
from services.eh_service import log_eh_audit
from config import EH_UPLOAD_DIR, OPENAI_API_KEY, ENVIRONMENT, RIGHTS_CONFIRMATION_TEXT
import storage
# BYOEH imports
from qdrant_service import (
get_collection_info, delete_eh_vectors, search_eh, index_eh_chunks
)
from eh_pipeline import (
decrypt_text, verify_key_hash, process_eh_for_indexing,
generate_single_embedding, EncryptionError, EmbeddingError
)
router = APIRouter()
# =============================================
# EH UPLOAD & LIST
# =============================================
@router.post("/api/v1/eh/upload")
async def upload_erwartungshorizont(
file: UploadFile = File(...),
metadata_json: str = Form(...),
request: Request = None,
background_tasks: BackgroundTasks = None
):
"""
Upload an encrypted Erwartungshorizont.
The file MUST be client-side encrypted.
Server stores only the encrypted blob + key hash (never the passphrase).
"""
user = get_current_user(request)
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
try:
data = EHUploadMetadata(**json.loads(metadata_json))
except Exception as e:
raise HTTPException(status_code=400, detail=f"Invalid metadata: {str(e)}")
if not data.rights_confirmed:
raise HTTPException(status_code=400, detail="Rights confirmation required")
eh_id = str(uuid.uuid4())
# Create tenant-isolated directory
upload_dir = f"{EH_UPLOAD_DIR}/{tenant_id}/{eh_id}"
os.makedirs(upload_dir, exist_ok=True)
# Save encrypted file
encrypted_path = f"{upload_dir}/encrypted.bin"
content = await file.read()
with open(encrypted_path, "wb") as f:
f.write(content)
# Save salt separately
with open(f"{upload_dir}/salt.txt", "w") as f:
f.write(data.salt)
# Create EH record
eh = Erwartungshorizont(
id=eh_id,
tenant_id=tenant_id,
teacher_id=user["user_id"],
title=data.metadata.title,
subject=data.metadata.subject,
niveau=data.metadata.niveau,
year=data.metadata.year,
aufgaben_nummer=data.metadata.aufgaben_nummer,
encryption_key_hash=data.encryption_key_hash,
salt=data.salt,
encrypted_file_path=encrypted_path,
file_size_bytes=len(content),
original_filename=data.original_filename,
rights_confirmed=True,
rights_confirmed_at=datetime.now(timezone.utc),
status=EHStatus.PENDING_RIGHTS,
chunk_count=0,
indexed_at=None,
error_message=None,
training_allowed=False, # ALWAYS FALSE - critical for compliance
created_at=datetime.now(timezone.utc),
deleted_at=None
)
storage.eh_db[eh_id] = eh
# Store rights confirmation
rights_confirmation = EHRightsConfirmation(
id=str(uuid.uuid4()),
eh_id=eh_id,
teacher_id=user["user_id"],
confirmation_type="upload",
confirmation_text=RIGHTS_CONFIRMATION_TEXT,
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent"),
confirmed_at=datetime.now(timezone.utc)
)
storage.eh_rights_db[rights_confirmation.id] = rights_confirmation
# Audit log
log_eh_audit(
tenant_id=tenant_id,
user_id=user["user_id"],
action="upload",
eh_id=eh_id,
details={
"subject": data.metadata.subject,
"year": data.metadata.year,
"file_size": len(content)
},
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent")
)
return eh.to_dict()
@router.get("/api/v1/eh")
async def list_erwartungshorizonte(
request: Request,
subject: Optional[str] = None,
year: Optional[int] = None
):
"""List all Erwartungshorizonte for the current teacher."""
user = get_current_user(request)
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
results = []
for eh in storage.eh_db.values():
if eh.tenant_id == tenant_id and eh.deleted_at is None:
if subject and eh.subject != subject:
continue
if year and eh.year != year:
continue
results.append(eh.to_dict())
return results
# =============================================
# SPECIFIC EH ROUTES (must come before {eh_id} catch-all)
# =============================================
@router.get("/api/v1/eh/audit-log")
async def get_eh_audit_log(
request: Request,
eh_id: Optional[str] = None,
limit: int = 100
):
"""Get BYOEH audit log entries."""
user = get_current_user(request)
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
# Filter by tenant
entries = [e for e in storage.eh_audit_db if e.tenant_id == tenant_id]
# Filter by EH if specified
if eh_id:
entries = [e for e in entries if e.eh_id == eh_id]
# Sort and limit
entries = sorted(entries, key=lambda e: e.created_at, reverse=True)[:limit]
return [e.to_dict() for e in entries]
@router.get("/api/v1/eh/rights-text")
async def get_rights_confirmation_text():
"""Get the rights confirmation text for display in UI."""
return {
"text": RIGHTS_CONFIRMATION_TEXT,
"version": "v1.0"
}
@router.get("/api/v1/eh/qdrant-status")
async def get_qdrant_status(request: Request):
"""Get Qdrant collection status (admin only)."""
user = get_current_user(request)
if user.get("role") != "admin" and ENVIRONMENT != "development":
raise HTTPException(status_code=403, detail="Admin access required")
return await get_collection_info()
@router.get("/api/v1/eh/shared-with-me")
async def list_shared_eh(request: Request):
"""List all EH shared with the current user."""
user = get_current_user(request)
user_id = user["user_id"]
shared_ehs = []
for eh_id, shares in storage.eh_key_shares_db.items():
for share in shares:
if share.user_id == user_id and share.active:
if eh_id in storage.eh_db:
eh = storage.eh_db[eh_id]
shared_ehs.append({
"eh": eh.to_dict(),
"share": share.to_dict()
})
return shared_ehs
# =============================================
# GENERIC EH ROUTES
# =============================================
@router.get("/api/v1/eh/{eh_id}")
async def get_erwartungshorizont(eh_id: str, request: Request):
"""Get a specific Erwartungshorizont by ID."""
user = get_current_user(request)
if eh_id not in storage.eh_db:
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
eh = storage.eh_db[eh_id]
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
raise HTTPException(status_code=403, detail="Access denied")
if eh.deleted_at is not None:
raise HTTPException(status_code=404, detail="Erwartungshorizont was deleted")
return eh.to_dict()
@router.delete("/api/v1/eh/{eh_id}")
async def delete_erwartungshorizont(eh_id: str, request: Request):
"""Soft-delete an Erwartungshorizont and remove vectors from Qdrant."""
user = get_current_user(request)
if eh_id not in storage.eh_db:
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
eh = storage.eh_db[eh_id]
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
raise HTTPException(status_code=403, detail="Access denied")
# Soft delete
eh.deleted_at = datetime.now(timezone.utc)
# Delete vectors from Qdrant
try:
deleted_count = await delete_eh_vectors(eh_id)
print(f"Deleted {deleted_count} vectors for EH {eh_id}")
except Exception as e:
print(f"Warning: Failed to delete vectors: {e}")
# Audit log
log_eh_audit(
tenant_id=eh.tenant_id,
user_id=user["user_id"],
action="delete",
eh_id=eh_id,
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent")
)
return {"status": "deleted", "id": eh_id}
@router.post("/api/v1/eh/{eh_id}/index")
async def index_erwartungshorizont(
eh_id: str,
data: EHIndexRequest,
request: Request
):
"""
Index an Erwartungshorizont for RAG queries.
Requires the passphrase to decrypt, chunk, embed, and re-encrypt chunks.
The passphrase is only used transiently and never stored.
"""
user = get_current_user(request)
if eh_id not in storage.eh_db:
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
eh = storage.eh_db[eh_id]
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
raise HTTPException(status_code=403, detail="Access denied")
# Verify passphrase matches key hash
if not verify_key_hash(data.passphrase, eh.salt, eh.encryption_key_hash):
raise HTTPException(status_code=401, detail="Invalid passphrase")
eh.status = EHStatus.PROCESSING
try:
# Read encrypted file
with open(eh.encrypted_file_path, "rb") as f:
encrypted_content = f.read()
# Decrypt the file
decrypted_text = decrypt_text(
encrypted_content.decode('utf-8'),
data.passphrase,
eh.salt
)
# Process for indexing
chunk_count, chunks_data = await process_eh_for_indexing(
eh_id=eh_id,
tenant_id=eh.tenant_id,
subject=eh.subject,
text_content=decrypted_text,
passphrase=data.passphrase,
salt_hex=eh.salt
)
# Index in Qdrant
await index_eh_chunks(
eh_id=eh_id,
tenant_id=eh.tenant_id,
subject=eh.subject,
chunks=chunks_data
)
# Update EH record
eh.status = EHStatus.INDEXED
eh.chunk_count = chunk_count
eh.indexed_at = datetime.now(timezone.utc)
# Audit log
log_eh_audit(
tenant_id=eh.tenant_id,
user_id=user["user_id"],
action="indexed",
eh_id=eh_id,
details={"chunk_count": chunk_count}
)
return {
"status": "indexed",
"id": eh_id,
"chunk_count": chunk_count
}
except EncryptionError as e:
eh.status = EHStatus.ERROR
eh.error_message = str(e)
raise HTTPException(status_code=400, detail=f"Decryption failed: {str(e)}")
except EmbeddingError as e:
eh.status = EHStatus.ERROR
eh.error_message = str(e)
raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}")
except Exception as e:
eh.status = EHStatus.ERROR
eh.error_message = str(e)
raise HTTPException(status_code=500, detail=f"Indexing failed: {str(e)}")
@router.post("/api/v1/eh/rag-query")
async def rag_query_eh(data: EHRAGQuery, request: Request):
"""
RAG query against teacher's Erwartungshorizonte.
1. Semantic search in Qdrant (tenant-isolated)
2. Decrypt relevant chunks on-the-fly
3. Return context for LLM usage
"""
user = get_current_user(request)
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
if not OPENAI_API_KEY:
raise HTTPException(status_code=500, detail="OpenAI API key not configured")
try:
# Generate embedding for query
query_embedding = await generate_single_embedding(data.query_text)
# Search in Qdrant (tenant-isolated)
results = await search_eh(
query_embedding=query_embedding,
tenant_id=tenant_id,
subject=data.subject,
limit=data.limit
)
# Decrypt matching chunks
decrypted_chunks = []
for r in results:
eh = storage.eh_db.get(r["eh_id"])
if eh and r.get("encrypted_content"):
try:
decrypted = decrypt_text(
r["encrypted_content"],
data.passphrase,
eh.salt
)
decrypted_chunks.append({
"text": decrypted,
"eh_id": r["eh_id"],
"eh_title": eh.title,
"chunk_index": r["chunk_index"],
"score": r["score"]
})
except EncryptionError:
# Skip chunks that can't be decrypted (wrong passphrase for different EH)
pass
# Audit log
log_eh_audit(
tenant_id=tenant_id,
user_id=user["user_id"],
action="rag_query",
details={
"query_length": len(data.query_text),
"results_count": len(results),
"decrypted_count": len(decrypted_chunks)
},
ip_address=request.client.host if request.client else None,
user_agent=request.headers.get("user-agent")
)
return {
"context": "\n\n---\n\n".join([c["text"] for c in decrypted_chunks]),
"sources": decrypted_chunks,
"query": data.query_text
}
except EmbeddingError as e:
raise HTTPException(status_code=500, detail=f"Query embedding failed: {str(e)}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"RAG query failed: {str(e)}")