Restructure: Move final 12 root files into packages (klausur-service)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m23s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 19s

ocr/spell/  (3): smart_spell, core, text
upload/     (3): api, chunked, mobile
crawler/    (3): github, github_core, github_parsers
+ unified_grid → grid/, tesseract_extractor → ocr/engines/, htr_api → ocr/pipeline/

12 shims added. Only main.py, config.py, storage + RAG files remain at root.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 23:19:11 +02:00
parent cba877c65a
commit d093a4d388
27 changed files with 3116 additions and 3049 deletions

View File

@@ -0,0 +1,6 @@
"""
Upload package — chunked and mobile upload endpoints.
Moved from backend/ flat modules (upload_api*.py).
Backward-compatible shim files remain at the old locations.
"""

View File

@@ -0,0 +1,29 @@
"""
Mobile Upload API — barrel re-export.
All implementation split into:
upload_api_chunked — chunked upload endpoints (init, chunk, finalize, simple, status, cancel, list)
upload_api_mobile — mobile HTML upload page
DSGVO-konform: Data stays local in WLAN, no external transmission.
"""
from fastapi import APIRouter
from .chunked import ( # noqa: F401
router as _chunked_router,
UPLOAD_DIR,
CHUNK_DIR,
EH_UPLOAD_DIR,
_upload_sessions,
InitUploadRequest,
InitUploadResponse,
ChunkUploadResponse,
FinalizeResponse,
)
from .mobile import router as _mobile_router # noqa: F401
# Composite router that includes both sub-routers
router = APIRouter()
router.include_router(_chunked_router)
router.include_router(_mobile_router)

View File

@@ -0,0 +1,320 @@
"""
Chunked Upload API — init, chunk, finalize, simple upload, status, cancel, list.
Extracted from upload_api.py for modularity.
DSGVO-konform: Data stays local in WLAN, no external transmission.
"""
import os
import uuid
import shutil
import hashlib
from pathlib import Path
from datetime import datetime, timezone
from typing import Dict, Optional
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from pydantic import BaseModel
# Configuration
UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "/app/uploads"))
CHUNK_DIR = Path(os.getenv("CHUNK_DIR", "/app/chunks"))
EH_UPLOAD_DIR = Path(os.getenv("EH_UPLOAD_DIR", "/app/eh-uploads"))
# Ensure directories exist
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
CHUNK_DIR.mkdir(parents=True, exist_ok=True)
EH_UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
# In-memory storage for upload sessions (for simplicity)
# In production, use Redis or database
_upload_sessions: Dict[str, dict] = {}
router = APIRouter(prefix="/api/v1/upload", tags=["Mobile Upload"])
class InitUploadRequest(BaseModel):
filename: str
filesize: int
chunks: int
destination: str = "klausur" # "klausur" or "rag"
class InitUploadResponse(BaseModel):
upload_id: str
chunk_size: int
total_chunks: int
message: str
class ChunkUploadResponse(BaseModel):
upload_id: str
chunk_index: int
received: bool
chunks_received: int
total_chunks: int
class FinalizeResponse(BaseModel):
upload_id: str
filename: str
filepath: str
filesize: int
checksum: str
message: str
@router.post("/init", response_model=InitUploadResponse)
async def init_upload(request: InitUploadRequest):
"""
Initialize a chunked upload session.
Returns an upload_id that must be used for subsequent chunk uploads.
"""
upload_id = str(uuid.uuid4())
# Create session directory
session_dir = CHUNK_DIR / upload_id
session_dir.mkdir(parents=True, exist_ok=True)
# Store session info
_upload_sessions[upload_id] = {
"filename": request.filename,
"filesize": request.filesize,
"total_chunks": request.chunks,
"received_chunks": set(),
"destination": request.destination,
"session_dir": str(session_dir),
"created_at": datetime.now(timezone.utc).isoformat(),
}
return InitUploadResponse(
upload_id=upload_id,
chunk_size=5 * 1024 * 1024, # 5 MB
total_chunks=request.chunks,
message="Upload-Session erstellt"
)
@router.post("/chunk", response_model=ChunkUploadResponse)
async def upload_chunk(
chunk: UploadFile = File(...),
upload_id: str = Form(...),
chunk_index: int = Form(...)
):
"""
Upload a single chunk of a file.
Chunks are stored temporarily until finalize is called.
"""
if upload_id not in _upload_sessions:
raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden")
session = _upload_sessions[upload_id]
if chunk_index < 0 or chunk_index >= session["total_chunks"]:
raise HTTPException(
status_code=400,
detail=f"Ungueltiger Chunk-Index: {chunk_index}"
)
# Save chunk
chunk_path = Path(session["session_dir"]) / f"chunk_{chunk_index:05d}"
with open(chunk_path, "wb") as f:
content = await chunk.read()
f.write(content)
# Track received chunks
session["received_chunks"].add(chunk_index)
return ChunkUploadResponse(
upload_id=upload_id,
chunk_index=chunk_index,
received=True,
chunks_received=len(session["received_chunks"]),
total_chunks=session["total_chunks"]
)
@router.post("/finalize", response_model=FinalizeResponse)
async def finalize_upload(upload_id: str = Form(...)):
"""
Finalize the upload by combining all chunks into a single file.
Validates that all chunks were received and calculates checksum.
"""
if upload_id not in _upload_sessions:
raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden")
session = _upload_sessions[upload_id]
# Check if all chunks received
if len(session["received_chunks"]) != session["total_chunks"]:
missing = session["total_chunks"] - len(session["received_chunks"])
raise HTTPException(
status_code=400,
detail=f"Nicht alle Chunks empfangen. Fehlend: {missing}"
)
# Determine destination directory
if session["destination"] == "rag":
dest_dir = EH_UPLOAD_DIR
else:
dest_dir = UPLOAD_DIR
# Generate unique filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_filename = session["filename"].replace(" ", "_")
final_filename = f"{timestamp}_{safe_filename}"
final_path = dest_dir / final_filename
# Combine chunks
hasher = hashlib.sha256()
total_size = 0
with open(final_path, "wb") as outfile:
for i in range(session["total_chunks"]):
chunk_path = Path(session["session_dir"]) / f"chunk_{i:05d}"
if not chunk_path.exists():
raise HTTPException(
status_code=500,
detail=f"Chunk {i} nicht gefunden"
)
with open(chunk_path, "rb") as infile:
data = infile.read()
outfile.write(data)
hasher.update(data)
total_size += len(data)
# Clean up chunks
shutil.rmtree(session["session_dir"], ignore_errors=True)
del _upload_sessions[upload_id]
checksum = hasher.hexdigest()
return FinalizeResponse(
upload_id=upload_id,
filename=final_filename,
filepath=str(final_path),
filesize=total_size,
checksum=checksum,
message="Upload erfolgreich abgeschlossen"
)
@router.post("/simple")
async def simple_upload(
file: UploadFile = File(...),
destination: str = Form("klausur")
):
"""
Simple single-request upload for smaller files (<10MB).
For larger files, use the chunked upload endpoints.
"""
# Determine destination directory
if destination == "rag":
dest_dir = EH_UPLOAD_DIR
else:
dest_dir = UPLOAD_DIR
# Generate unique filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_filename = file.filename.replace(" ", "_") if file.filename else "upload.pdf"
final_filename = f"{timestamp}_{safe_filename}"
final_path = dest_dir / final_filename
# Calculate checksum while writing
hasher = hashlib.sha256()
total_size = 0
with open(final_path, "wb") as f:
while True:
chunk = await file.read(1024 * 1024) # Read 1MB at a time
if not chunk:
break
f.write(chunk)
hasher.update(chunk)
total_size += len(chunk)
return {
"filename": final_filename,
"filepath": str(final_path),
"filesize": total_size,
"checksum": hasher.hexdigest(),
"message": "Upload erfolgreich"
}
@router.get("/status/{upload_id}")
async def get_upload_status(upload_id: str):
"""
Get the status of an ongoing upload.
"""
if upload_id not in _upload_sessions:
raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden")
session = _upload_sessions[upload_id]
return {
"upload_id": upload_id,
"filename": session["filename"],
"total_chunks": session["total_chunks"],
"received_chunks": len(session["received_chunks"]),
"progress_percent": round(
len(session["received_chunks"]) / session["total_chunks"] * 100, 1
),
"destination": session["destination"],
"created_at": session["created_at"]
}
@router.delete("/cancel/{upload_id}")
async def cancel_upload(upload_id: str):
"""
Cancel an ongoing upload and clean up temporary files.
"""
if upload_id not in _upload_sessions:
raise HTTPException(status_code=404, detail="Upload-Session nicht gefunden")
session = _upload_sessions[upload_id]
# Clean up chunks
shutil.rmtree(session["session_dir"], ignore_errors=True)
del _upload_sessions[upload_id]
return {"message": "Upload abgebrochen", "upload_id": upload_id}
@router.get("/list")
async def list_uploads(destination: str = "klausur"):
"""
List all uploaded files in the specified destination.
"""
if destination == "rag":
dest_dir = EH_UPLOAD_DIR
else:
dest_dir = UPLOAD_DIR
files = []
for f in dest_dir.iterdir():
if f.is_file() and f.suffix.lower() == ".pdf":
stat = f.stat()
files.append({
"filename": f.name,
"size": stat.st_size,
"modified": datetime.fromtimestamp(stat.st_mtime).isoformat(),
})
files.sort(key=lambda x: x["modified"], reverse=True)
return {
"destination": destination,
"count": len(files),
"files": files[:50] # Limit to 50 most recent
}

View File

@@ -0,0 +1,292 @@
"""
Mobile Upload HTML Page — serves the mobile upload UI directly from klausur-service.
Extracted from upload_api.py for modularity.
DSGVO-konform: Data stays local in WLAN, no external transmission.
"""
from fastapi import APIRouter
from fastapi.responses import HTMLResponse
router = APIRouter(prefix="/api/v1/upload", tags=["Mobile Upload"])
@router.get("/mobile", response_class=HTMLResponse)
async def mobile_upload_page():
"""
Serve the mobile upload page directly from the klausur-service.
This allows mobile devices to upload without needing the Next.js website.
"""
html_content = '''<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<title>BreakPilot Upload</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
color: white;
min-height: 100vh;
padding: 16px;
}
.header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 16px;
border-bottom: 1px solid #334155;
margin-bottom: 24px;
}
.header h1 { font-size: 20px; color: #60a5fa; }
.badge { font-size: 10px; background: #1e293b; padding: 4px 8px; border-radius: 4px; color: #94a3b8; }
.destination-selector {
display: flex;
gap: 8px;
margin-bottom: 24px;
}
.dest-btn {
flex: 1;
padding: 14px;
border: none;
border-radius: 10px;
font-size: 14px;
font-weight: 600;
cursor: pointer;
transition: all 0.2s;
}
.dest-btn.active-klausur { background: #2563eb; color: white; box-shadow: 0 4px 15px rgba(37, 99, 235, 0.3); }
.dest-btn.active-rag { background: #7c3aed; color: white; box-shadow: 0 4px 15px rgba(124, 58, 237, 0.3); }
.dest-btn:not(.active-klausur):not(.active-rag) { background: #1e293b; color: #94a3b8; }
.upload-zone {
border: 2px dashed #475569;
border-radius: 16px;
padding: 40px 20px;
text-align: center;
margin-bottom: 24px;
transition: all 0.2s;
position: relative;
}
.upload-zone.dragover { border-color: #60a5fa; background: rgba(96, 165, 250, 0.1); transform: scale(1.02); }
.upload-zone input[type="file"] {
position: absolute;
inset: 0;
opacity: 0;
cursor: pointer;
}
.upload-icon {
width: 64px;
height: 64px;
background: #334155;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
margin: 0 auto 16px;
font-size: 28px;
}
.upload-title { font-size: 18px; margin-bottom: 8px; }
.upload-subtitle { font-size: 14px; color: #94a3b8; margin-bottom: 16px; }
.upload-hint { font-size: 12px; color: #64748b; }
.file-list { margin-bottom: 24px; }
.file-item {
background: #1e293b;
border-radius: 12px;
padding: 16px;
margin-bottom: 12px;
}
.file-item.error { border: 2px solid rgba(239, 68, 68, 0.5); }
.file-item.complete { border: 2px solid rgba(34, 197, 94, 0.3); }
.file-header { display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 8px; }
.file-name { font-weight: 500; word-break: break-all; }
.file-size { font-size: 14px; color: #94a3b8; }
.remove-btn { background: none; border: none; color: #94a3b8; font-size: 20px; cursor: pointer; padding: 4px; }
.progress-bar { height: 6px; background: #334155; border-radius: 3px; overflow: hidden; margin-top: 12px; }
.progress-fill { height: 100%; background: linear-gradient(90deg, #3b82f6, #60a5fa); transition: width 0.3s; }
.progress-text { font-size: 12px; color: #94a3b8; margin-top: 4px; }
.status-complete { display: flex; align-items: center; gap: 8px; color: #22c55e; font-size: 14px; margin-top: 12px; }
.status-error { display: flex; align-items: center; gap: 8px; color: #ef4444; font-size: 14px; margin-top: 12px; }
.info-box {
background: rgba(30, 41, 59, 0.5);
border-radius: 12px;
padding: 16px;
font-size: 14px;
color: #94a3b8;
}
.info-box h3 { color: #cbd5e1; margin-bottom: 8px; font-size: 14px; }
.info-box ul { padding-left: 20px; }
.info-box li { margin-bottom: 4px; }
.server-info { text-align: center; font-size: 12px; color: #64748b; margin-top: 16px; }
.stats { display: flex; justify-content: space-between; font-size: 14px; color: #94a3b8; padding: 0 8px; margin-bottom: 12px; }
</style>
</head>
<body>
<header class="header">
<h1>BreakPilot Upload</h1>
<span class="badge">DSGVO-konform</span>
</header>
<div class="destination-selector">
<button class="dest-btn active-klausur" id="btn-klausur" onclick="setDestination('klausur')">Klausuren</button>
<button class="dest-btn" id="btn-rag" onclick="setDestination('rag')">Erwartungshorizonte</button>
</div>
<div class="upload-zone" id="upload-zone">
<input type="file" accept=".pdf" multiple onchange="handleFiles(this.files)">
<div class="upload-icon">&#x2601;</div>
<div class="upload-title">PDF-Dateien hochladen</div>
<div class="upload-subtitle">Tippen zum Auswaehlen oder hierher ziehen</div>
<div class="upload-hint">Grosse Dateien bis 200 MB werden automatisch in Teilen hochgeladen</div>
</div>
<div class="stats" id="stats" style="display: none;">
<span id="completed-count">0 von 0 fertig</span>
<span id="total-size">0 B gesamt</span>
</div>
<div class="file-list" id="file-list"></div>
<div class="info-box">
<h3>Hinweise:</h3>
<ul>
<li>Die Dateien werden lokal im WLAN uebertragen</li>
<li>Keine Daten werden ins Internet gesendet</li>
<li>Unterstuetzte Formate: PDF</li>
</ul>
</div>
<div class="server-info" id="server-info">Server: wird ermittelt...</div>
<script>
const CHUNK_SIZE = 5 * 1024 * 1024;
let destination = 'klausur';
let files = [];
const serverUrl = window.location.origin;
document.getElementById('server-info').textContent = 'Server: ' + serverUrl;
function setDestination(dest) {
destination = dest;
document.querySelectorAll('.dest-btn').forEach(btn => {
btn.classList.remove('active-klausur', 'active-rag');
});
if (dest === 'klausur') {
document.getElementById('btn-klausur').classList.add('active-klausur');
} else {
document.getElementById('btn-rag').classList.add('active-rag');
}
}
function formatSize(bytes) {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
}
function updateStats() {
const completed = files.filter(f => f.status === 'complete').length;
const total = files.reduce((sum, f) => sum + f.size, 0);
document.getElementById('completed-count').textContent = completed + ' von ' + files.length + ' fertig';
document.getElementById('total-size').textContent = formatSize(total) + ' gesamt';
document.getElementById('stats').style.display = files.length > 0 ? 'flex' : 'none';
}
function renderFiles() {
const list = document.getElementById('file-list');
list.innerHTML = files.map(f => {
let statusHtml = '';
if (f.status === 'uploading' || f.status === 'pending') {
statusHtml = '<div class="progress-bar"><div class="progress-fill" style="width: ' + f.progress + '%"></div></div><div class="progress-text">' + f.progress + '% hochgeladen</div>';
} else if (f.status === 'complete') {
statusHtml = '<div class="status-complete">&#x2713; Erfolgreich hochgeladen</div>';
} else if (f.status === 'error') {
statusHtml = '<div class="status-error">&#x26A0; ' + (f.error || 'Fehler beim Hochladen') + '</div>';
}
return '<div class="file-item ' + f.status + '"><div class="file-header"><div><div class="file-name">' + f.name + '</div><div class="file-size">' + formatSize(f.size) + '</div></div><button class="remove-btn" onclick="removeFile(\\'' + f.id + '\\')">&times;</button></div>' + statusHtml + '</div>';
}).join('');
updateStats();
}
function removeFile(id) {
files = files.filter(f => f.id !== id);
renderFiles();
}
async function uploadFile(file, fileId) {
const updateProgress = (progress) => {
const f = files.find(f => f.id === fileId);
if (f) { f.progress = progress; renderFiles(); }
};
const setStatus = (status, error) => {
const f = files.find(f => f.id === fileId);
if (f) { f.status = status; if (error) f.error = error; renderFiles(); }
};
try {
setStatus('uploading');
if (file.size > 10 * 1024 * 1024) {
// Chunked upload
const totalChunks = Math.ceil(file.size / CHUNK_SIZE);
const initRes = await fetch(serverUrl + '/api/v1/upload/init', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ filename: file.name, filesize: file.size, chunks: totalChunks, destination: destination })
});
if (!initRes.ok) throw new Error('Konnte Upload nicht starten');
const { upload_id } = await initRes.json();
for (let i = 0; i < totalChunks; i++) {
const start = i * CHUNK_SIZE;
const end = Math.min(start + CHUNK_SIZE, file.size);
const chunk = file.slice(start, end);
const formData = new FormData();
formData.append('chunk', chunk);
formData.append('upload_id', upload_id);
formData.append('chunk_index', i.toString());
const chunkRes = await fetch(serverUrl + '/api/v1/upload/chunk', { method: 'POST', body: formData });
if (!chunkRes.ok) throw new Error('Fehler bei Teil ' + (i + 1));
updateProgress(Math.round(((i + 1) / totalChunks) * 100));
}
const finalizeForm = new FormData();
finalizeForm.append('upload_id', upload_id);
const finalRes = await fetch(serverUrl + '/api/v1/upload/finalize', { method: 'POST', body: finalizeForm });
if (!finalRes.ok) throw new Error('Fehler beim Abschliessen');
} else {
// Simple upload
const formData = new FormData();
formData.append('file', file);
formData.append('destination', destination);
const res = await fetch(serverUrl + '/api/v1/upload/simple', { method: 'POST', body: formData });
if (!res.ok) throw new Error('Upload fehlgeschlagen');
updateProgress(100);
}
setStatus('complete');
} catch (e) {
setStatus('error', e.message);
}
}
function handleFiles(fileList) {
const newFiles = Array.from(fileList).filter(f => f.type === 'application/pdf');
newFiles.forEach(file => {
const id = Math.random().toString(36).substr(2, 9);
files.push({ id, name: file.name, size: file.size, progress: 0, status: 'pending', file });
renderFiles();
uploadFile(file, id);
});
}
// Drag & Drop
const zone = document.getElementById('upload-zone');
zone.addEventListener('dragover', e => { e.preventDefault(); zone.classList.add('dragover'); });
zone.addEventListener('dragleave', e => { e.preventDefault(); zone.classList.remove('dragover'); });
zone.addEventListener('drop', e => { e.preventDefault(); zone.classList.remove('dragover'); handleFiles(e.dataTransfer.files); });
</script>
</body>
</html>'''
return HTMLResponse(content=html_content)