This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/klausur/services/storage_service.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

198 lines
5.9 KiB
Python

"""
Storage Service for Klausur Documents.
PRIVACY BY DESIGN:
- Documents stored with doc_token as identifier (not student names)
- Organized by session_id/doc_token for teacher isolation
- Auto-cleanup when retention period expires
"""
import os
import io
import logging
from typing import Optional, BinaryIO
from pathlib import Path
from minio import Minio
from minio.error import S3Error
logger = logging.getLogger(__name__)
class KlausurStorageService:
"""
MinIO/S3 Storage Service for exam documents.
Structure:
klausur-exams/
{session_id}/
{doc_token}.{ext}
{doc_token}_redacted.{ext} # After header redaction
"""
def __init__(self):
self.endpoint = os.getenv("MINIO_ENDPOINT", "minio:9000")
self.access_key = os.getenv("MINIO_ROOT_USER", "breakpilot_dev")
self.secret_key = os.getenv("MINIO_ROOT_PASSWORD", "breakpilot_dev_123")
self.secure = os.getenv("MINIO_SECURE", "false").lower() == "true"
self.bucket_name = os.getenv("KLAUSUR_BUCKET", "klausur-exams")
self._client: Optional[Minio] = None
@property
def client(self) -> Minio:
"""Lazy-init MinIO client."""
if self._client is None:
self._client = Minio(
self.endpoint,
access_key=self.access_key,
secret_key=self.secret_key,
secure=self.secure
)
self._ensure_bucket()
return self._client
def _ensure_bucket(self):
"""Create bucket if it doesn't exist."""
try:
if not self._client.bucket_exists(self.bucket_name):
self._client.make_bucket(self.bucket_name)
logger.info(f"Created Klausur bucket: {self.bucket_name}")
except S3Error as e:
logger.warning(f"MinIO bucket check failed: {e}")
def upload_document(
self,
session_id: str,
doc_token: str,
file_data: bytes,
file_extension: str = "png",
is_redacted: bool = False
) -> str:
"""
Upload exam document to storage.
Args:
session_id: Exam session ID
doc_token: Pseudonymized document token
file_data: Document binary data
file_extension: File extension (png, jpg, pdf)
is_redacted: Whether this is the redacted version
Returns:
Object path in storage
"""
suffix = "_redacted" if is_redacted else ""
object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"
# Determine content type
content_types = {
"png": "image/png",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"pdf": "application/pdf",
}
content_type = content_types.get(file_extension.lower(), "application/octet-stream")
try:
self.client.put_object(
bucket_name=self.bucket_name,
object_name=object_name,
data=io.BytesIO(file_data),
length=len(file_data),
content_type=content_type
)
logger.info(f"Uploaded document: {object_name}")
return object_name
except S3Error as e:
logger.error(f"Failed to upload document: {e}")
raise
def get_document(
self,
session_id: str,
doc_token: str,
file_extension: str = "png",
is_redacted: bool = False
) -> Optional[bytes]:
"""
Download exam document from storage.
Args:
session_id: Exam session ID
doc_token: Pseudonymized document token
file_extension: File extension
is_redacted: Whether to get the redacted version
Returns:
Document binary data or None if not found
"""
suffix = "_redacted" if is_redacted else ""
object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"
try:
response = self.client.get_object(self.bucket_name, object_name)
data = response.read()
response.close()
response.release_conn()
return data
except S3Error as e:
if e.code == "NoSuchKey":
logger.warning(f"Document not found: {object_name}")
return None
logger.error(f"Failed to get document: {e}")
raise
def delete_session_documents(self, session_id: str) -> int:
"""
Delete all documents for a session.
Args:
session_id: Exam session ID
Returns:
Number of deleted objects
"""
deleted_count = 0
prefix = f"{session_id}/"
try:
objects = self.client.list_objects(self.bucket_name, prefix=prefix)
for obj in objects:
self.client.remove_object(self.bucket_name, obj.object_name)
deleted_count += 1
logger.debug(f"Deleted: {obj.object_name}")
logger.info(f"Deleted {deleted_count} documents for session {session_id}")
return deleted_count
except S3Error as e:
logger.error(f"Failed to delete session documents: {e}")
raise
def document_exists(
self,
session_id: str,
doc_token: str,
file_extension: str = "png"
) -> bool:
"""Check if document exists in storage."""
object_name = f"{session_id}/{doc_token}.{file_extension}"
try:
self.client.stat_object(self.bucket_name, object_name)
return True
except S3Error:
return False
# Singleton instance
_storage_service: Optional[KlausurStorageService] = None
def get_storage_service() -> KlausurStorageService:
"""Get or create the storage service singleton."""
global _storage_service
if _storage_service is None:
_storage_service = KlausurStorageService()
return _storage_service