klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
321 lines
8.9 KiB
Python
321 lines
8.9 KiB
Python
"""
|
|
Chunked Upload API — init, chunk, finalize, simple upload, status, cancel, list.
|
|
|
|
Extracted from upload_api.py for modularity.
|
|
|
|
DSGVO-konform: Data stays local in WLAN, no external transmission.
|
|
"""
|
|
|
|
import os
|
|
import uuid
|
|
import shutil
|
|
import hashlib
|
|
from pathlib import Path
|
|
from datetime import datetime, timezone
|
|
from typing import Dict, Optional
|
|
|
|
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
|
from pydantic import BaseModel
|
|
|
|
# Configuration
|
|
UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "/app/uploads"))
|
|
CHUNK_DIR = Path(os.getenv("CHUNK_DIR", "/app/chunks"))
|
|
EH_UPLOAD_DIR = Path(os.getenv("EH_UPLOAD_DIR", "/app/eh-uploads"))
|
|
|
|
# Ensure directories exist
|
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
|
CHUNK_DIR.mkdir(parents=True, exist_ok=True)
|
|
EH_UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
# In-memory storage for upload sessions (for simplicity)
|
|
# In production, use Redis or database
|
|
_upload_sessions: Dict[str, dict] = {}
|
|
|
|
router = APIRouter(prefix="/api/v1/upload", tags=["Mobile Upload"])
|
|
|
|
|
|
class InitUploadRequest(BaseModel):
|
|
filename: str
|
|
filesize: int
|
|
chunks: int
|
|
destination: str = "klausur" # "klausur" or "rag"
|
|
|
|
|
|
class InitUploadResponse(BaseModel):
|
|
upload_id: str
|
|
chunk_size: int
|
|
total_chunks: int
|
|
message: str
|
|
|
|
|
|
class ChunkUploadResponse(BaseModel):
|
|
upload_id: str
|
|
chunk_index: int
|
|
received: bool
|
|
chunks_received: int
|
|
total_chunks: int
|
|
|
|
|
|
class FinalizeResponse(BaseModel):
|
|
upload_id: str
|
|
filename: str
|
|
filepath: str
|
|
filesize: int
|
|
checksum: str
|
|
message: str
|
|
|
|
|
|
@router.post("/init", response_model=InitUploadResponse)
|
|
async def init_upload(request: InitUploadRequest):
|
|
"""
|
|
Initialize a chunked upload session.
|
|
|
|
Returns an upload_id that must be used for subsequent chunk uploads.
|
|
"""
|
|
upload_id = str(uuid.uuid4())
|
|
|
|
# Create session directory
|
|
session_dir = CHUNK_DIR / upload_id
|
|
session_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Store session info
|
|
_upload_sessions[upload_id] = {
|
|
"filename": request.filename,
|
|
"filesize": request.filesize,
|
|
"total_chunks": request.chunks,
|
|
"received_chunks": set(),
|
|
"destination": request.destination,
|
|
"session_dir": str(session_dir),
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
}
|
|
|
|
return InitUploadResponse(
|
|
upload_id=upload_id,
|
|
chunk_size=5 * 1024 * 1024, # 5 MB
|
|
total_chunks=request.chunks,
|
|
message="Upload-Session erstellt"
|
|
)
|
|
|
|
|
|
@router.post("/chunk", response_model=ChunkUploadResponse)
|
|
async def upload_chunk(
|
|
chunk: UploadFile = File(...),
|
|
upload_id: str = Form(...),
|
|
chunk_index: int = Form(...)
|
|
):
|
|
"""
|
|
Upload a single chunk of a file.
|
|
|
|
Chunks are stored temporarily until finalize is called.
|
|
"""
|
|
if upload_id not in _upload_sessions:
|
|
raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden")
|
|
|
|
session = _upload_sessions[upload_id]
|
|
|
|
if chunk_index < 0 or chunk_index >= session["total_chunks"]:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Ungueltiger Chunk-Index: {chunk_index}"
|
|
)
|
|
|
|
# Save chunk
|
|
chunk_path = Path(session["session_dir"]) / f"chunk_{chunk_index:05d}"
|
|
|
|
with open(chunk_path, "wb") as f:
|
|
content = await chunk.read()
|
|
f.write(content)
|
|
|
|
# Track received chunks
|
|
session["received_chunks"].add(chunk_index)
|
|
|
|
return ChunkUploadResponse(
|
|
upload_id=upload_id,
|
|
chunk_index=chunk_index,
|
|
received=True,
|
|
chunks_received=len(session["received_chunks"]),
|
|
total_chunks=session["total_chunks"]
|
|
)
|
|
|
|
|
|
@router.post("/finalize", response_model=FinalizeResponse)
|
|
async def finalize_upload(upload_id: str = Form(...)):
|
|
"""
|
|
Finalize the upload by combining all chunks into a single file.
|
|
|
|
Validates that all chunks were received and calculates checksum.
|
|
"""
|
|
if upload_id not in _upload_sessions:
|
|
raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden")
|
|
|
|
session = _upload_sessions[upload_id]
|
|
|
|
# Check if all chunks received
|
|
if len(session["received_chunks"]) != session["total_chunks"]:
|
|
missing = session["total_chunks"] - len(session["received_chunks"])
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Nicht alle Chunks empfangen. Fehlend: {missing}"
|
|
)
|
|
|
|
# Determine destination directory
|
|
if session["destination"] == "rag":
|
|
dest_dir = EH_UPLOAD_DIR
|
|
else:
|
|
dest_dir = UPLOAD_DIR
|
|
|
|
# Generate unique filename
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
safe_filename = session["filename"].replace(" ", "_")
|
|
final_filename = f"{timestamp}_{safe_filename}"
|
|
final_path = dest_dir / final_filename
|
|
|
|
# Combine chunks
|
|
hasher = hashlib.sha256()
|
|
total_size = 0
|
|
|
|
with open(final_path, "wb") as outfile:
|
|
for i in range(session["total_chunks"]):
|
|
chunk_path = Path(session["session_dir"]) / f"chunk_{i:05d}"
|
|
|
|
if not chunk_path.exists():
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Chunk {i} nicht gefunden"
|
|
)
|
|
|
|
with open(chunk_path, "rb") as infile:
|
|
data = infile.read()
|
|
outfile.write(data)
|
|
hasher.update(data)
|
|
total_size += len(data)
|
|
|
|
# Clean up chunks
|
|
shutil.rmtree(session["session_dir"], ignore_errors=True)
|
|
del _upload_sessions[upload_id]
|
|
|
|
checksum = hasher.hexdigest()
|
|
|
|
return FinalizeResponse(
|
|
upload_id=upload_id,
|
|
filename=final_filename,
|
|
filepath=str(final_path),
|
|
filesize=total_size,
|
|
checksum=checksum,
|
|
message="Upload erfolgreich abgeschlossen"
|
|
)
|
|
|
|
|
|
@router.post("/simple")
|
|
async def simple_upload(
|
|
file: UploadFile = File(...),
|
|
destination: str = Form("klausur")
|
|
):
|
|
"""
|
|
Simple single-request upload for smaller files (<10MB).
|
|
|
|
For larger files, use the chunked upload endpoints.
|
|
"""
|
|
# Determine destination directory
|
|
if destination == "rag":
|
|
dest_dir = EH_UPLOAD_DIR
|
|
else:
|
|
dest_dir = UPLOAD_DIR
|
|
|
|
# Generate unique filename
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
safe_filename = file.filename.replace(" ", "_") if file.filename else "upload.pdf"
|
|
final_filename = f"{timestamp}_{safe_filename}"
|
|
final_path = dest_dir / final_filename
|
|
|
|
# Calculate checksum while writing
|
|
hasher = hashlib.sha256()
|
|
total_size = 0
|
|
|
|
with open(final_path, "wb") as f:
|
|
while True:
|
|
chunk = await file.read(1024 * 1024) # Read 1MB at a time
|
|
if not chunk:
|
|
break
|
|
f.write(chunk)
|
|
hasher.update(chunk)
|
|
total_size += len(chunk)
|
|
|
|
return {
|
|
"filename": final_filename,
|
|
"filepath": str(final_path),
|
|
"filesize": total_size,
|
|
"checksum": hasher.hexdigest(),
|
|
"message": "Upload erfolgreich"
|
|
}
|
|
|
|
|
|
@router.get("/status/{upload_id}")
|
|
async def get_upload_status(upload_id: str):
|
|
"""
|
|
Get the status of an ongoing upload.
|
|
"""
|
|
if upload_id not in _upload_sessions:
|
|
raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden")
|
|
|
|
session = _upload_sessions[upload_id]
|
|
|
|
return {
|
|
"upload_id": upload_id,
|
|
"filename": session["filename"],
|
|
"total_chunks": session["total_chunks"],
|
|
"received_chunks": len(session["received_chunks"]),
|
|
"progress_percent": round(
|
|
len(session["received_chunks"]) / session["total_chunks"] * 100, 1
|
|
),
|
|
"destination": session["destination"],
|
|
"created_at": session["created_at"]
|
|
}
|
|
|
|
|
|
@router.delete("/cancel/{upload_id}")
|
|
async def cancel_upload(upload_id: str):
|
|
"""
|
|
Cancel an ongoing upload and clean up temporary files.
|
|
"""
|
|
if upload_id not in _upload_sessions:
|
|
raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden")
|
|
|
|
session = _upload_sessions[upload_id]
|
|
|
|
# Clean up chunks
|
|
shutil.rmtree(session["session_dir"], ignore_errors=True)
|
|
del _upload_sessions[upload_id]
|
|
|
|
return {"message": "Upload abgebrochen", "upload_id": upload_id}
|
|
|
|
|
|
@router.get("/list")
|
|
async def list_uploads(destination: str = "klausur"):
|
|
"""
|
|
List all uploaded files in the specified destination.
|
|
"""
|
|
if destination == "rag":
|
|
dest_dir = EH_UPLOAD_DIR
|
|
else:
|
|
dest_dir = UPLOAD_DIR
|
|
|
|
files = []
|
|
|
|
for f in dest_dir.iterdir():
|
|
if f.is_file() and f.suffix.lower() == ".pdf":
|
|
stat = f.stat()
|
|
files.append({
|
|
"filename": f.name,
|
|
"size": stat.st_size,
|
|
"modified": datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
})
|
|
|
|
files.sort(key=lambda x: x["modified"], reverse=True)
|
|
|
|
return {
|
|
"destination": destination,
|
|
"count": len(files),
|
|
"files": files[:50] # Limit to 50 most recent
|
|
}
|