[split-required] Split final batch of monoliths >1000 LOC
Python (6 files in klausur-service): - rbac.py (1,132 → 4), admin_api.py (1,012 → 4) - routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5) Python (2 files in backend-lehrer): - unit_api.py (1,226 → 6), game_api.py (1,129 → 5) Website (6 page files): - 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components in website/components/klausur-korrektur/ (17 shared files) - companion (1,057 → 10), magic-help (1,017 → 8) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
455
klausur-service/backend/routes/eh_upload.py
Normal file
455
klausur-service/backend/routes/eh_upload.py
Normal file
@@ -0,0 +1,455 @@
|
||||
"""
|
||||
BYOEH Upload, List, and Core CRUD Routes
|
||||
|
||||
Endpoints for uploading, listing, getting, deleting,
|
||||
indexing, and RAG-querying Erwartungshorizonte.
|
||||
Extracted from routes/eh.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request, UploadFile, File, Form, BackgroundTasks
|
||||
|
||||
from models.enums import EHStatus
|
||||
from models.eh import (
|
||||
Erwartungshorizont,
|
||||
EHRightsConfirmation,
|
||||
)
|
||||
from models.requests import (
|
||||
EHUploadMetadata,
|
||||
EHRAGQuery,
|
||||
EHIndexRequest,
|
||||
)
|
||||
from services.auth_service import get_current_user
|
||||
from services.eh_service import log_eh_audit
|
||||
from config import EH_UPLOAD_DIR, OPENAI_API_KEY, ENVIRONMENT, RIGHTS_CONFIRMATION_TEXT
|
||||
import storage
|
||||
|
||||
# BYOEH imports
|
||||
from qdrant_service import (
|
||||
get_collection_info, delete_eh_vectors, search_eh, index_eh_chunks
|
||||
)
|
||||
from eh_pipeline import (
|
||||
decrypt_text, verify_key_hash, process_eh_for_indexing,
|
||||
generate_single_embedding, EncryptionError, EmbeddingError
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# =============================================
|
||||
# EH UPLOAD & LIST
|
||||
# =============================================
|
||||
|
||||
@router.post("/api/v1/eh/upload")
|
||||
async def upload_erwartungshorizont(
|
||||
file: UploadFile = File(...),
|
||||
metadata_json: str = Form(...),
|
||||
request: Request = None,
|
||||
background_tasks: BackgroundTasks = None
|
||||
):
|
||||
"""
|
||||
Upload an encrypted Erwartungshorizont.
|
||||
|
||||
The file MUST be client-side encrypted.
|
||||
Server stores only the encrypted blob + key hash (never the passphrase).
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
try:
|
||||
data = EHUploadMetadata(**json.loads(metadata_json))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid metadata: {str(e)}")
|
||||
|
||||
if not data.rights_confirmed:
|
||||
raise HTTPException(status_code=400, detail="Rights confirmation required")
|
||||
|
||||
eh_id = str(uuid.uuid4())
|
||||
|
||||
# Create tenant-isolated directory
|
||||
upload_dir = f"{EH_UPLOAD_DIR}/{tenant_id}/{eh_id}"
|
||||
os.makedirs(upload_dir, exist_ok=True)
|
||||
|
||||
# Save encrypted file
|
||||
encrypted_path = f"{upload_dir}/encrypted.bin"
|
||||
content = await file.read()
|
||||
with open(encrypted_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
# Save salt separately
|
||||
with open(f"{upload_dir}/salt.txt", "w") as f:
|
||||
f.write(data.salt)
|
||||
|
||||
# Create EH record
|
||||
eh = Erwartungshorizont(
|
||||
id=eh_id,
|
||||
tenant_id=tenant_id,
|
||||
teacher_id=user["user_id"],
|
||||
title=data.metadata.title,
|
||||
subject=data.metadata.subject,
|
||||
niveau=data.metadata.niveau,
|
||||
year=data.metadata.year,
|
||||
aufgaben_nummer=data.metadata.aufgaben_nummer,
|
||||
encryption_key_hash=data.encryption_key_hash,
|
||||
salt=data.salt,
|
||||
encrypted_file_path=encrypted_path,
|
||||
file_size_bytes=len(content),
|
||||
original_filename=data.original_filename,
|
||||
rights_confirmed=True,
|
||||
rights_confirmed_at=datetime.now(timezone.utc),
|
||||
status=EHStatus.PENDING_RIGHTS,
|
||||
chunk_count=0,
|
||||
indexed_at=None,
|
||||
error_message=None,
|
||||
training_allowed=False, # ALWAYS FALSE - critical for compliance
|
||||
created_at=datetime.now(timezone.utc),
|
||||
deleted_at=None
|
||||
)
|
||||
|
||||
storage.eh_db[eh_id] = eh
|
||||
|
||||
# Store rights confirmation
|
||||
rights_confirmation = EHRightsConfirmation(
|
||||
id=str(uuid.uuid4()),
|
||||
eh_id=eh_id,
|
||||
teacher_id=user["user_id"],
|
||||
confirmation_type="upload",
|
||||
confirmation_text=RIGHTS_CONFIRMATION_TEXT,
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent"),
|
||||
confirmed_at=datetime.now(timezone.utc)
|
||||
)
|
||||
storage.eh_rights_db[rights_confirmation.id] = rights_confirmation
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="upload",
|
||||
eh_id=eh_id,
|
||||
details={
|
||||
"subject": data.metadata.subject,
|
||||
"year": data.metadata.year,
|
||||
"file_size": len(content)
|
||||
},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
|
||||
return eh.to_dict()
|
||||
|
||||
|
||||
@router.get("/api/v1/eh")
|
||||
async def list_erwartungshorizonte(
|
||||
request: Request,
|
||||
subject: Optional[str] = None,
|
||||
year: Optional[int] = None
|
||||
):
|
||||
"""List all Erwartungshorizonte for the current teacher."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
results = []
|
||||
for eh in storage.eh_db.values():
|
||||
if eh.tenant_id == tenant_id and eh.deleted_at is None:
|
||||
if subject and eh.subject != subject:
|
||||
continue
|
||||
if year and eh.year != year:
|
||||
continue
|
||||
results.append(eh.to_dict())
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# =============================================
|
||||
# SPECIFIC EH ROUTES (must come before {eh_id} catch-all)
|
||||
# =============================================
|
||||
|
||||
@router.get("/api/v1/eh/audit-log")
|
||||
async def get_eh_audit_log(
|
||||
request: Request,
|
||||
eh_id: Optional[str] = None,
|
||||
limit: int = 100
|
||||
):
|
||||
"""Get BYOEH audit log entries."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
# Filter by tenant
|
||||
entries = [e for e in storage.eh_audit_db if e.tenant_id == tenant_id]
|
||||
|
||||
# Filter by EH if specified
|
||||
if eh_id:
|
||||
entries = [e for e in entries if e.eh_id == eh_id]
|
||||
|
||||
# Sort and limit
|
||||
entries = sorted(entries, key=lambda e: e.created_at, reverse=True)[:limit]
|
||||
|
||||
return [e.to_dict() for e in entries]
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/rights-text")
|
||||
async def get_rights_confirmation_text():
|
||||
"""Get the rights confirmation text for display in UI."""
|
||||
return {
|
||||
"text": RIGHTS_CONFIRMATION_TEXT,
|
||||
"version": "v1.0"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/qdrant-status")
|
||||
async def get_qdrant_status(request: Request):
|
||||
"""Get Qdrant collection status (admin only)."""
|
||||
user = get_current_user(request)
|
||||
if user.get("role") != "admin" and ENVIRONMENT != "development":
|
||||
raise HTTPException(status_code=403, detail="Admin access required")
|
||||
|
||||
return await get_collection_info()
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/shared-with-me")
|
||||
async def list_shared_eh(request: Request):
|
||||
"""List all EH shared with the current user."""
|
||||
user = get_current_user(request)
|
||||
user_id = user["user_id"]
|
||||
|
||||
shared_ehs = []
|
||||
for eh_id, shares in storage.eh_key_shares_db.items():
|
||||
for share in shares:
|
||||
if share.user_id == user_id and share.active:
|
||||
if eh_id in storage.eh_db:
|
||||
eh = storage.eh_db[eh_id]
|
||||
shared_ehs.append({
|
||||
"eh": eh.to_dict(),
|
||||
"share": share.to_dict()
|
||||
})
|
||||
|
||||
return shared_ehs
|
||||
|
||||
|
||||
# =============================================
|
||||
# GENERIC EH ROUTES
|
||||
# =============================================
|
||||
|
||||
@router.get("/api/v1/eh/{eh_id}")
|
||||
async def get_erwartungshorizont(eh_id: str, request: Request):
|
||||
"""Get a specific Erwartungshorizont by ID."""
|
||||
user = get_current_user(request)
|
||||
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
if eh.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont was deleted")
|
||||
|
||||
return eh.to_dict()
|
||||
|
||||
|
||||
@router.delete("/api/v1/eh/{eh_id}")
|
||||
async def delete_erwartungshorizont(eh_id: str, request: Request):
|
||||
"""Soft-delete an Erwartungshorizont and remove vectors from Qdrant."""
|
||||
user = get_current_user(request)
|
||||
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Soft delete
|
||||
eh.deleted_at = datetime.now(timezone.utc)
|
||||
|
||||
# Delete vectors from Qdrant
|
||||
try:
|
||||
deleted_count = await delete_eh_vectors(eh_id)
|
||||
print(f"Deleted {deleted_count} vectors for EH {eh_id}")
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to delete vectors: {e}")
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=eh.tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="delete",
|
||||
eh_id=eh_id,
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
|
||||
return {"status": "deleted", "id": eh_id}
|
||||
|
||||
|
||||
@router.post("/api/v1/eh/{eh_id}/index")
|
||||
async def index_erwartungshorizont(
|
||||
eh_id: str,
|
||||
data: EHIndexRequest,
|
||||
request: Request
|
||||
):
|
||||
"""
|
||||
Index an Erwartungshorizont for RAG queries.
|
||||
|
||||
Requires the passphrase to decrypt, chunk, embed, and re-encrypt chunks.
|
||||
The passphrase is only used transiently and never stored.
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Verify passphrase matches key hash
|
||||
if not verify_key_hash(data.passphrase, eh.salt, eh.encryption_key_hash):
|
||||
raise HTTPException(status_code=401, detail="Invalid passphrase")
|
||||
|
||||
eh.status = EHStatus.PROCESSING
|
||||
|
||||
try:
|
||||
# Read encrypted file
|
||||
with open(eh.encrypted_file_path, "rb") as f:
|
||||
encrypted_content = f.read()
|
||||
|
||||
# Decrypt the file
|
||||
decrypted_text = decrypt_text(
|
||||
encrypted_content.decode('utf-8'),
|
||||
data.passphrase,
|
||||
eh.salt
|
||||
)
|
||||
|
||||
# Process for indexing
|
||||
chunk_count, chunks_data = await process_eh_for_indexing(
|
||||
eh_id=eh_id,
|
||||
tenant_id=eh.tenant_id,
|
||||
subject=eh.subject,
|
||||
text_content=decrypted_text,
|
||||
passphrase=data.passphrase,
|
||||
salt_hex=eh.salt
|
||||
)
|
||||
|
||||
# Index in Qdrant
|
||||
await index_eh_chunks(
|
||||
eh_id=eh_id,
|
||||
tenant_id=eh.tenant_id,
|
||||
subject=eh.subject,
|
||||
chunks=chunks_data
|
||||
)
|
||||
|
||||
# Update EH record
|
||||
eh.status = EHStatus.INDEXED
|
||||
eh.chunk_count = chunk_count
|
||||
eh.indexed_at = datetime.now(timezone.utc)
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=eh.tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="indexed",
|
||||
eh_id=eh_id,
|
||||
details={"chunk_count": chunk_count}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "indexed",
|
||||
"id": eh_id,
|
||||
"chunk_count": chunk_count
|
||||
}
|
||||
|
||||
except EncryptionError as e:
|
||||
eh.status = EHStatus.ERROR
|
||||
eh.error_message = str(e)
|
||||
raise HTTPException(status_code=400, detail=f"Decryption failed: {str(e)}")
|
||||
except EmbeddingError as e:
|
||||
eh.status = EHStatus.ERROR
|
||||
eh.error_message = str(e)
|
||||
raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}")
|
||||
except Exception as e:
|
||||
eh.status = EHStatus.ERROR
|
||||
eh.error_message = str(e)
|
||||
raise HTTPException(status_code=500, detail=f"Indexing failed: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/api/v1/eh/rag-query")
|
||||
async def rag_query_eh(data: EHRAGQuery, request: Request):
|
||||
"""
|
||||
RAG query against teacher's Erwartungshorizonte.
|
||||
|
||||
1. Semantic search in Qdrant (tenant-isolated)
|
||||
2. Decrypt relevant chunks on-the-fly
|
||||
3. Return context for LLM usage
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
if not OPENAI_API_KEY:
|
||||
raise HTTPException(status_code=500, detail="OpenAI API key not configured")
|
||||
|
||||
try:
|
||||
# Generate embedding for query
|
||||
query_embedding = await generate_single_embedding(data.query_text)
|
||||
|
||||
# Search in Qdrant (tenant-isolated)
|
||||
results = await search_eh(
|
||||
query_embedding=query_embedding,
|
||||
tenant_id=tenant_id,
|
||||
subject=data.subject,
|
||||
limit=data.limit
|
||||
)
|
||||
|
||||
# Decrypt matching chunks
|
||||
decrypted_chunks = []
|
||||
for r in results:
|
||||
eh = storage.eh_db.get(r["eh_id"])
|
||||
if eh and r.get("encrypted_content"):
|
||||
try:
|
||||
decrypted = decrypt_text(
|
||||
r["encrypted_content"],
|
||||
data.passphrase,
|
||||
eh.salt
|
||||
)
|
||||
decrypted_chunks.append({
|
||||
"text": decrypted,
|
||||
"eh_id": r["eh_id"],
|
||||
"eh_title": eh.title,
|
||||
"chunk_index": r["chunk_index"],
|
||||
"score": r["score"]
|
||||
})
|
||||
except EncryptionError:
|
||||
# Skip chunks that can't be decrypted (wrong passphrase for different EH)
|
||||
pass
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="rag_query",
|
||||
details={
|
||||
"query_length": len(data.query_text),
|
||||
"results_count": len(results),
|
||||
"decrypted_count": len(decrypted_chunks)
|
||||
},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
|
||||
return {
|
||||
"context": "\n\n---\n\n".join([c["text"] for c in decrypted_chunks]),
|
||||
"sources": decrypted_chunks,
|
||||
"query": data.query_text
|
||||
}
|
||||
|
||||
except EmbeddingError as e:
|
||||
raise HTTPException(status_code=500, detail=f"Query embedding failed: {str(e)}")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"RAG query failed: {str(e)}")
|
||||
Reference in New Issue
Block a user