""" Mobile Upload API for Klausur-Service Provides chunked upload endpoints for large PDF files (100MB+) from mobile devices. DSGVO-konform: Data stays local in WLAN, no external transmission. """ import os import uuid import shutil import hashlib from pathlib import Path from datetime import datetime, timezone from typing import Dict, Optional from fastapi import APIRouter, HTTPException, UploadFile, File, Form from fastapi.responses import HTMLResponse from pydantic import BaseModel # Configuration UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "/app/uploads")) CHUNK_DIR = Path(os.getenv("CHUNK_DIR", "/app/chunks")) EH_UPLOAD_DIR = Path(os.getenv("EH_UPLOAD_DIR", "/app/eh-uploads")) # Ensure directories exist UPLOAD_DIR.mkdir(parents=True, exist_ok=True) CHUNK_DIR.mkdir(parents=True, exist_ok=True) EH_UPLOAD_DIR.mkdir(parents=True, exist_ok=True) # In-memory storage for upload sessions (for simplicity) # In production, use Redis or database _upload_sessions: Dict[str, dict] = {} router = APIRouter(prefix="/api/v1/upload", tags=["Mobile Upload"]) class InitUploadRequest(BaseModel): filename: str filesize: int chunks: int destination: str = "klausur" # "klausur" or "rag" class InitUploadResponse(BaseModel): upload_id: str chunk_size: int total_chunks: int message: str class ChunkUploadResponse(BaseModel): upload_id: str chunk_index: int received: bool chunks_received: int total_chunks: int class FinalizeResponse(BaseModel): upload_id: str filename: str filepath: str filesize: int checksum: str message: str @router.post("/init", response_model=InitUploadResponse) async def init_upload(request: InitUploadRequest): """ Initialize a chunked upload session. Returns an upload_id that must be used for subsequent chunk uploads. """ upload_id = str(uuid.uuid4()) # Create session directory session_dir = CHUNK_DIR / upload_id session_dir.mkdir(parents=True, exist_ok=True) # Store session info _upload_sessions[upload_id] = { "filename": request.filename, "filesize": request.filesize, "total_chunks": request.chunks, "received_chunks": set(), "destination": request.destination, "session_dir": str(session_dir), "created_at": datetime.now(timezone.utc).isoformat(), } return InitUploadResponse( upload_id=upload_id, chunk_size=5 * 1024 * 1024, # 5 MB total_chunks=request.chunks, message="Upload-Session erstellt" ) @router.post("/chunk", response_model=ChunkUploadResponse) async def upload_chunk( chunk: UploadFile = File(...), upload_id: str = Form(...), chunk_index: int = Form(...) ): """ Upload a single chunk of a file. Chunks are stored temporarily until finalize is called. """ if upload_id not in _upload_sessions: raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden") session = _upload_sessions[upload_id] if chunk_index < 0 or chunk_index >= session["total_chunks"]: raise HTTPException( status_code=400, detail=f"Ungueltiger Chunk-Index: {chunk_index}" ) # Save chunk chunk_path = Path(session["session_dir"]) / f"chunk_{chunk_index:05d}" with open(chunk_path, "wb") as f: content = await chunk.read() f.write(content) # Track received chunks session["received_chunks"].add(chunk_index) return ChunkUploadResponse( upload_id=upload_id, chunk_index=chunk_index, received=True, chunks_received=len(session["received_chunks"]), total_chunks=session["total_chunks"] ) @router.post("/finalize", response_model=FinalizeResponse) async def finalize_upload(upload_id: str = Form(...)): """ Finalize the upload by combining all chunks into a single file. Validates that all chunks were received and calculates checksum. """ if upload_id not in _upload_sessions: raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden") session = _upload_sessions[upload_id] # Check if all chunks received if len(session["received_chunks"]) != session["total_chunks"]: missing = session["total_chunks"] - len(session["received_chunks"]) raise HTTPException( status_code=400, detail=f"Nicht alle Chunks empfangen. Fehlend: {missing}" ) # Determine destination directory if session["destination"] == "rag": dest_dir = EH_UPLOAD_DIR else: dest_dir = UPLOAD_DIR # Generate unique filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") safe_filename = session["filename"].replace(" ", "_") final_filename = f"{timestamp}_{safe_filename}" final_path = dest_dir / final_filename # Combine chunks hasher = hashlib.sha256() total_size = 0 with open(final_path, "wb") as outfile: for i in range(session["total_chunks"]): chunk_path = Path(session["session_dir"]) / f"chunk_{i:05d}" if not chunk_path.exists(): raise HTTPException( status_code=500, detail=f"Chunk {i} nicht gefunden" ) with open(chunk_path, "rb") as infile: data = infile.read() outfile.write(data) hasher.update(data) total_size += len(data) # Clean up chunks shutil.rmtree(session["session_dir"], ignore_errors=True) del _upload_sessions[upload_id] checksum = hasher.hexdigest() return FinalizeResponse( upload_id=upload_id, filename=final_filename, filepath=str(final_path), filesize=total_size, checksum=checksum, message="Upload erfolgreich abgeschlossen" ) @router.post("/simple") async def simple_upload( file: UploadFile = File(...), destination: str = Form("klausur") ): """ Simple single-request upload for smaller files (<10MB). For larger files, use the chunked upload endpoints. """ # Determine destination directory if destination == "rag": dest_dir = EH_UPLOAD_DIR else: dest_dir = UPLOAD_DIR # Generate unique filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") safe_filename = file.filename.replace(" ", "_") if file.filename else "upload.pdf" final_filename = f"{timestamp}_{safe_filename}" final_path = dest_dir / final_filename # Calculate checksum while writing hasher = hashlib.sha256() total_size = 0 with open(final_path, "wb") as f: while True: chunk = await file.read(1024 * 1024) # Read 1MB at a time if not chunk: break f.write(chunk) hasher.update(chunk) total_size += len(chunk) return { "filename": final_filename, "filepath": str(final_path), "filesize": total_size, "checksum": hasher.hexdigest(), "message": "Upload erfolgreich" } @router.get("/status/{upload_id}") async def get_upload_status(upload_id: str): """ Get the status of an ongoing upload. """ if upload_id not in _upload_sessions: raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden") session = _upload_sessions[upload_id] return { "upload_id": upload_id, "filename": session["filename"], "total_chunks": session["total_chunks"], "received_chunks": len(session["received_chunks"]), "progress_percent": round( len(session["received_chunks"]) / session["total_chunks"] * 100, 1 ), "destination": session["destination"], "created_at": session["created_at"] } @router.delete("/cancel/{upload_id}") async def cancel_upload(upload_id: str): """ Cancel an ongoing upload and clean up temporary files. """ if upload_id not in _upload_sessions: raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden") session = _upload_sessions[upload_id] # Clean up chunks shutil.rmtree(session["session_dir"], ignore_errors=True) del _upload_sessions[upload_id] return {"message": "Upload abgebrochen", "upload_id": upload_id} @router.get("/list") async def list_uploads(destination: str = "klausur"): """ List all uploaded files in the specified destination. """ if destination == "rag": dest_dir = EH_UPLOAD_DIR else: dest_dir = UPLOAD_DIR files = [] for f in dest_dir.iterdir(): if f.is_file() and f.suffix.lower() == ".pdf": stat = f.stat() files.append({ "filename": f.name, "size": stat.st_size, "modified": datetime.fromtimestamp(stat.st_mtime).isoformat(), }) files.sort(key=lambda x: x["modified"], reverse=True) return { "destination": destination, "count": len(files), "files": files[:50] # Limit to 50 most recent } @router.get("/mobile", response_class=HTMLResponse) async def mobile_upload_page(): """ Serve the mobile upload page directly from the klausur-service. This allows mobile devices to upload without needing the Next.js website. """ from fastapi.responses import HTMLResponse html_content = ''' BreakPilot Upload

BreakPilot Upload

DSGVO-konform
PDF-Dateien hochladen
Tippen zum Auswaehlen oder hierher ziehen
Grosse Dateien bis 200 MB werden automatisch in Teilen hochgeladen

Hinweise:

Server: wird ermittelt...
''' return HTMLResponse(content=html_content)