""" BYOEH Upload, List, and Core CRUD Routes Endpoints for uploading, listing, getting, deleting, indexing, and RAG-querying Erwartungshorizonte. Extracted from routes/eh.py for file-size compliance. """ import os import uuid import json from datetime import datetime, timezone from typing import Optional from fastapi import APIRouter, HTTPException, Request, UploadFile, File, Form, BackgroundTasks from models.enums import EHStatus from models.eh import ( Erwartungshorizont, EHRightsConfirmation, ) from models.requests import ( EHUploadMetadata, EHRAGQuery, EHIndexRequest, ) from services.auth_service import get_current_user from services.eh_service import log_eh_audit from config import EH_UPLOAD_DIR, OPENAI_API_KEY, ENVIRONMENT, RIGHTS_CONFIRMATION_TEXT import storage # BYOEH imports from qdrant_service import ( get_collection_info, delete_eh_vectors, search_eh, index_eh_chunks ) from eh_pipeline import ( decrypt_text, verify_key_hash, process_eh_for_indexing, generate_single_embedding, EncryptionError, EmbeddingError ) router = APIRouter() # ============================================= # EH UPLOAD & LIST # ============================================= @router.post("/api/v1/eh/upload") async def upload_erwartungshorizont( file: UploadFile = File(...), metadata_json: str = Form(...), request: Request = None, background_tasks: BackgroundTasks = None ): """ Upload an encrypted Erwartungshorizont. The file MUST be client-side encrypted. Server stores only the encrypted blob + key hash (never the passphrase). """ user = get_current_user(request) tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"] try: data = EHUploadMetadata(**json.loads(metadata_json)) except Exception as e: raise HTTPException(status_code=400, detail=f"Invalid metadata: {str(e)}") if not data.rights_confirmed: raise HTTPException(status_code=400, detail="Rights confirmation required") eh_id = str(uuid.uuid4()) # Create tenant-isolated directory upload_dir = f"{EH_UPLOAD_DIR}/{tenant_id}/{eh_id}" os.makedirs(upload_dir, exist_ok=True) # Save encrypted file encrypted_path = f"{upload_dir}/encrypted.bin" content = await file.read() with open(encrypted_path, "wb") as f: f.write(content) # Save salt separately with open(f"{upload_dir}/salt.txt", "w") as f: f.write(data.salt) # Create EH record eh = Erwartungshorizont( id=eh_id, tenant_id=tenant_id, teacher_id=user["user_id"], title=data.metadata.title, subject=data.metadata.subject, niveau=data.metadata.niveau, year=data.metadata.year, aufgaben_nummer=data.metadata.aufgaben_nummer, encryption_key_hash=data.encryption_key_hash, salt=data.salt, encrypted_file_path=encrypted_path, file_size_bytes=len(content), original_filename=data.original_filename, rights_confirmed=True, rights_confirmed_at=datetime.now(timezone.utc), status=EHStatus.PENDING_RIGHTS, chunk_count=0, indexed_at=None, error_message=None, training_allowed=False, # ALWAYS FALSE - critical for compliance created_at=datetime.now(timezone.utc), deleted_at=None ) storage.eh_db[eh_id] = eh # Store rights confirmation rights_confirmation = EHRightsConfirmation( id=str(uuid.uuid4()), eh_id=eh_id, teacher_id=user["user_id"], confirmation_type="upload", confirmation_text=RIGHTS_CONFIRMATION_TEXT, ip_address=request.client.host if request.client else None, user_agent=request.headers.get("user-agent"), confirmed_at=datetime.now(timezone.utc) ) storage.eh_rights_db[rights_confirmation.id] = rights_confirmation # Audit log log_eh_audit( tenant_id=tenant_id, user_id=user["user_id"], action="upload", eh_id=eh_id, details={ "subject": data.metadata.subject, "year": data.metadata.year, "file_size": len(content) }, ip_address=request.client.host if request.client else None, user_agent=request.headers.get("user-agent") ) return eh.to_dict() @router.get("/api/v1/eh") async def list_erwartungshorizonte( request: Request, subject: Optional[str] = None, year: Optional[int] = None ): """List all Erwartungshorizonte for the current teacher.""" user = get_current_user(request) tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"] results = [] for eh in storage.eh_db.values(): if eh.tenant_id == tenant_id and eh.deleted_at is None: if subject and eh.subject != subject: continue if year and eh.year != year: continue results.append(eh.to_dict()) return results # ============================================= # SPECIFIC EH ROUTES (must come before {eh_id} catch-all) # ============================================= @router.get("/api/v1/eh/audit-log") async def get_eh_audit_log( request: Request, eh_id: Optional[str] = None, limit: int = 100 ): """Get BYOEH audit log entries.""" user = get_current_user(request) tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"] # Filter by tenant entries = [e for e in storage.eh_audit_db if e.tenant_id == tenant_id] # Filter by EH if specified if eh_id: entries = [e for e in entries if e.eh_id == eh_id] # Sort and limit entries = sorted(entries, key=lambda e: e.created_at, reverse=True)[:limit] return [e.to_dict() for e in entries] @router.get("/api/v1/eh/rights-text") async def get_rights_confirmation_text(): """Get the rights confirmation text for display in UI.""" return { "text": RIGHTS_CONFIRMATION_TEXT, "version": "v1.0" } @router.get("/api/v1/eh/qdrant-status") async def get_qdrant_status(request: Request): """Get Qdrant collection status (admin only).""" user = get_current_user(request) if user.get("role") != "admin" and ENVIRONMENT != "development": raise HTTPException(status_code=403, detail="Admin access required") return await get_collection_info() @router.get("/api/v1/eh/shared-with-me") async def list_shared_eh(request: Request): """List all EH shared with the current user.""" user = get_current_user(request) user_id = user["user_id"] shared_ehs = [] for eh_id, shares in storage.eh_key_shares_db.items(): for share in shares: if share.user_id == user_id and share.active: if eh_id in storage.eh_db: eh = storage.eh_db[eh_id] shared_ehs.append({ "eh": eh.to_dict(), "share": share.to_dict() }) return shared_ehs # ============================================= # GENERIC EH ROUTES # ============================================= @router.get("/api/v1/eh/{eh_id}") async def get_erwartungshorizont(eh_id: str, request: Request): """Get a specific Erwartungshorizont by ID.""" user = get_current_user(request) if eh_id not in storage.eh_db: raise HTTPException(status_code=404, detail="Erwartungshorizont not found") eh = storage.eh_db[eh_id] if eh.teacher_id != user["user_id"] and user.get("role") != "admin": raise HTTPException(status_code=403, detail="Access denied") if eh.deleted_at is not None: raise HTTPException(status_code=404, detail="Erwartungshorizont was deleted") return eh.to_dict() @router.delete("/api/v1/eh/{eh_id}") async def delete_erwartungshorizont(eh_id: str, request: Request): """Soft-delete an Erwartungshorizont and remove vectors from Qdrant.""" user = get_current_user(request) if eh_id not in storage.eh_db: raise HTTPException(status_code=404, detail="Erwartungshorizont not found") eh = storage.eh_db[eh_id] if eh.teacher_id != user["user_id"] and user.get("role") != "admin": raise HTTPException(status_code=403, detail="Access denied") # Soft delete eh.deleted_at = datetime.now(timezone.utc) # Delete vectors from Qdrant try: deleted_count = await delete_eh_vectors(eh_id) print(f"Deleted {deleted_count} vectors for EH {eh_id}") except Exception as e: print(f"Warning: Failed to delete vectors: {e}") # Audit log log_eh_audit( tenant_id=eh.tenant_id, user_id=user["user_id"], action="delete", eh_id=eh_id, ip_address=request.client.host if request.client else None, user_agent=request.headers.get("user-agent") ) return {"status": "deleted", "id": eh_id} @router.post("/api/v1/eh/{eh_id}/index") async def index_erwartungshorizont( eh_id: str, data: EHIndexRequest, request: Request ): """ Index an Erwartungshorizont for RAG queries. Requires the passphrase to decrypt, chunk, embed, and re-encrypt chunks. The passphrase is only used transiently and never stored. """ user = get_current_user(request) if eh_id not in storage.eh_db: raise HTTPException(status_code=404, detail="Erwartungshorizont not found") eh = storage.eh_db[eh_id] if eh.teacher_id != user["user_id"] and user.get("role") != "admin": raise HTTPException(status_code=403, detail="Access denied") # Verify passphrase matches key hash if not verify_key_hash(data.passphrase, eh.salt, eh.encryption_key_hash): raise HTTPException(status_code=401, detail="Invalid passphrase") eh.status = EHStatus.PROCESSING try: # Read encrypted file with open(eh.encrypted_file_path, "rb") as f: encrypted_content = f.read() # Decrypt the file decrypted_text = decrypt_text( encrypted_content.decode('utf-8'), data.passphrase, eh.salt ) # Process for indexing chunk_count, chunks_data = await process_eh_for_indexing( eh_id=eh_id, tenant_id=eh.tenant_id, subject=eh.subject, text_content=decrypted_text, passphrase=data.passphrase, salt_hex=eh.salt ) # Index in Qdrant await index_eh_chunks( eh_id=eh_id, tenant_id=eh.tenant_id, subject=eh.subject, chunks=chunks_data ) # Update EH record eh.status = EHStatus.INDEXED eh.chunk_count = chunk_count eh.indexed_at = datetime.now(timezone.utc) # Audit log log_eh_audit( tenant_id=eh.tenant_id, user_id=user["user_id"], action="indexed", eh_id=eh_id, details={"chunk_count": chunk_count} ) return { "status": "indexed", "id": eh_id, "chunk_count": chunk_count } except EncryptionError as e: eh.status = EHStatus.ERROR eh.error_message = str(e) raise HTTPException(status_code=400, detail=f"Decryption failed: {str(e)}") except EmbeddingError as e: eh.status = EHStatus.ERROR eh.error_message = str(e) raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}") except Exception as e: eh.status = EHStatus.ERROR eh.error_message = str(e) raise HTTPException(status_code=500, detail=f"Indexing failed: {str(e)}") @router.post("/api/v1/eh/rag-query") async def rag_query_eh(data: EHRAGQuery, request: Request): """ RAG query against teacher's Erwartungshorizonte. 1. Semantic search in Qdrant (tenant-isolated) 2. Decrypt relevant chunks on-the-fly 3. Return context for LLM usage """ user = get_current_user(request) tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"] if not OPENAI_API_KEY: raise HTTPException(status_code=500, detail="OpenAI API key not configured") try: # Generate embedding for query query_embedding = await generate_single_embedding(data.query_text) # Search in Qdrant (tenant-isolated) results = await search_eh( query_embedding=query_embedding, tenant_id=tenant_id, subject=data.subject, limit=data.limit ) # Decrypt matching chunks decrypted_chunks = [] for r in results: eh = storage.eh_db.get(r["eh_id"]) if eh and r.get("encrypted_content"): try: decrypted = decrypt_text( r["encrypted_content"], data.passphrase, eh.salt ) decrypted_chunks.append({ "text": decrypted, "eh_id": r["eh_id"], "eh_title": eh.title, "chunk_index": r["chunk_index"], "score": r["score"] }) except EncryptionError: # Skip chunks that can't be decrypted (wrong passphrase for different EH) pass # Audit log log_eh_audit( tenant_id=tenant_id, user_id=user["user_id"], action="rag_query", details={ "query_length": len(data.query_text), "results_count": len(results), "decrypted_count": len(decrypted_chunks) }, ip_address=request.client.host if request.client else None, user_agent=request.headers.get("user-agent") ) return { "context": "\n\n---\n\n".join([c["text"] for c in decrypted_chunks]), "sources": decrypted_chunks, "query": data.query_text } except EmbeddingError as e: raise HTTPException(status_code=500, detail=f"Query embedding failed: {str(e)}") except Exception as e: raise HTTPException(status_code=500, detail=f"RAG query failed: {str(e)}")