A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
198 lines
5.9 KiB
Python
198 lines
5.9 KiB
Python
"""
|
|
Storage Service for Klausur Documents.
|
|
|
|
PRIVACY BY DESIGN:
|
|
- Documents stored with doc_token as identifier (not student names)
|
|
- Organized by session_id/doc_token for teacher isolation
|
|
- Auto-cleanup when retention period expires
|
|
"""
|
|
import os
|
|
import io
|
|
import logging
|
|
from typing import Optional, BinaryIO
|
|
from pathlib import Path
|
|
from minio import Minio
|
|
from minio.error import S3Error
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class KlausurStorageService:
|
|
"""
|
|
MinIO/S3 Storage Service for exam documents.
|
|
|
|
Structure:
|
|
klausur-exams/
|
|
{session_id}/
|
|
{doc_token}.{ext}
|
|
{doc_token}_redacted.{ext} # After header redaction
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.endpoint = os.getenv("MINIO_ENDPOINT", "minio:9000")
|
|
self.access_key = os.getenv("MINIO_ROOT_USER", "breakpilot_dev")
|
|
self.secret_key = os.getenv("MINIO_ROOT_PASSWORD", "breakpilot_dev_123")
|
|
self.secure = os.getenv("MINIO_SECURE", "false").lower() == "true"
|
|
self.bucket_name = os.getenv("KLAUSUR_BUCKET", "klausur-exams")
|
|
|
|
self._client: Optional[Minio] = None
|
|
|
|
@property
|
|
def client(self) -> Minio:
|
|
"""Lazy-init MinIO client."""
|
|
if self._client is None:
|
|
self._client = Minio(
|
|
self.endpoint,
|
|
access_key=self.access_key,
|
|
secret_key=self.secret_key,
|
|
secure=self.secure
|
|
)
|
|
self._ensure_bucket()
|
|
return self._client
|
|
|
|
def _ensure_bucket(self):
|
|
"""Create bucket if it doesn't exist."""
|
|
try:
|
|
if not self._client.bucket_exists(self.bucket_name):
|
|
self._client.make_bucket(self.bucket_name)
|
|
logger.info(f"Created Klausur bucket: {self.bucket_name}")
|
|
except S3Error as e:
|
|
logger.warning(f"MinIO bucket check failed: {e}")
|
|
|
|
def upload_document(
|
|
self,
|
|
session_id: str,
|
|
doc_token: str,
|
|
file_data: bytes,
|
|
file_extension: str = "png",
|
|
is_redacted: bool = False
|
|
) -> str:
|
|
"""
|
|
Upload exam document to storage.
|
|
|
|
Args:
|
|
session_id: Exam session ID
|
|
doc_token: Pseudonymized document token
|
|
file_data: Document binary data
|
|
file_extension: File extension (png, jpg, pdf)
|
|
is_redacted: Whether this is the redacted version
|
|
|
|
Returns:
|
|
Object path in storage
|
|
"""
|
|
suffix = "_redacted" if is_redacted else ""
|
|
object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"
|
|
|
|
# Determine content type
|
|
content_types = {
|
|
"png": "image/png",
|
|
"jpg": "image/jpeg",
|
|
"jpeg": "image/jpeg",
|
|
"pdf": "application/pdf",
|
|
}
|
|
content_type = content_types.get(file_extension.lower(), "application/octet-stream")
|
|
|
|
try:
|
|
self.client.put_object(
|
|
bucket_name=self.bucket_name,
|
|
object_name=object_name,
|
|
data=io.BytesIO(file_data),
|
|
length=len(file_data),
|
|
content_type=content_type
|
|
)
|
|
logger.info(f"Uploaded document: {object_name}")
|
|
return object_name
|
|
|
|
except S3Error as e:
|
|
logger.error(f"Failed to upload document: {e}")
|
|
raise
|
|
|
|
def get_document(
|
|
self,
|
|
session_id: str,
|
|
doc_token: str,
|
|
file_extension: str = "png",
|
|
is_redacted: bool = False
|
|
) -> Optional[bytes]:
|
|
"""
|
|
Download exam document from storage.
|
|
|
|
Args:
|
|
session_id: Exam session ID
|
|
doc_token: Pseudonymized document token
|
|
file_extension: File extension
|
|
is_redacted: Whether to get the redacted version
|
|
|
|
Returns:
|
|
Document binary data or None if not found
|
|
"""
|
|
suffix = "_redacted" if is_redacted else ""
|
|
object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"
|
|
|
|
try:
|
|
response = self.client.get_object(self.bucket_name, object_name)
|
|
data = response.read()
|
|
response.close()
|
|
response.release_conn()
|
|
return data
|
|
|
|
except S3Error as e:
|
|
if e.code == "NoSuchKey":
|
|
logger.warning(f"Document not found: {object_name}")
|
|
return None
|
|
logger.error(f"Failed to get document: {e}")
|
|
raise
|
|
|
|
def delete_session_documents(self, session_id: str) -> int:
|
|
"""
|
|
Delete all documents for a session.
|
|
|
|
Args:
|
|
session_id: Exam session ID
|
|
|
|
Returns:
|
|
Number of deleted objects
|
|
"""
|
|
deleted_count = 0
|
|
prefix = f"{session_id}/"
|
|
|
|
try:
|
|
objects = self.client.list_objects(self.bucket_name, prefix=prefix)
|
|
for obj in objects:
|
|
self.client.remove_object(self.bucket_name, obj.object_name)
|
|
deleted_count += 1
|
|
logger.debug(f"Deleted: {obj.object_name}")
|
|
|
|
logger.info(f"Deleted {deleted_count} documents for session {session_id}")
|
|
return deleted_count
|
|
|
|
except S3Error as e:
|
|
logger.error(f"Failed to delete session documents: {e}")
|
|
raise
|
|
|
|
def document_exists(
|
|
self,
|
|
session_id: str,
|
|
doc_token: str,
|
|
file_extension: str = "png"
|
|
) -> bool:
|
|
"""Check if document exists in storage."""
|
|
object_name = f"{session_id}/{doc_token}.{file_extension}"
|
|
try:
|
|
self.client.stat_object(self.bucket_name, object_name)
|
|
return True
|
|
except S3Error:
|
|
return False
|
|
|
|
|
|
# Singleton instance
|
|
_storage_service: Optional[KlausurStorageService] = None
|
|
|
|
|
|
def get_storage_service() -> KlausurStorageService:
|
|
"""Get or create the storage service singleton."""
|
|
global _storage_service
|
|
if _storage_service is None:
|
|
_storage_service = KlausurStorageService()
|
|
return _storage_service
|