fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
197
backend/klausur/services/storage_service.py
Normal file
197
backend/klausur/services/storage_service.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
Storage Service for Klausur Documents.
|
||||
|
||||
PRIVACY BY DESIGN:
|
||||
- Documents stored with doc_token as identifier (not student names)
|
||||
- Organized by session_id/doc_token for teacher isolation
|
||||
- Auto-cleanup when retention period expires
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
import logging
|
||||
from typing import Optional, BinaryIO
|
||||
from pathlib import Path
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KlausurStorageService:
|
||||
"""
|
||||
MinIO/S3 Storage Service for exam documents.
|
||||
|
||||
Structure:
|
||||
klausur-exams/
|
||||
{session_id}/
|
||||
{doc_token}.{ext}
|
||||
{doc_token}_redacted.{ext} # After header redaction
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.endpoint = os.getenv("MINIO_ENDPOINT", "minio:9000")
|
||||
self.access_key = os.getenv("MINIO_ROOT_USER", "breakpilot_dev")
|
||||
self.secret_key = os.getenv("MINIO_ROOT_PASSWORD", "breakpilot_dev_123")
|
||||
self.secure = os.getenv("MINIO_SECURE", "false").lower() == "true"
|
||||
self.bucket_name = os.getenv("KLAUSUR_BUCKET", "klausur-exams")
|
||||
|
||||
self._client: Optional[Minio] = None
|
||||
|
||||
@property
|
||||
def client(self) -> Minio:
|
||||
"""Lazy-init MinIO client."""
|
||||
if self._client is None:
|
||||
self._client = Minio(
|
||||
self.endpoint,
|
||||
access_key=self.access_key,
|
||||
secret_key=self.secret_key,
|
||||
secure=self.secure
|
||||
)
|
||||
self._ensure_bucket()
|
||||
return self._client
|
||||
|
||||
def _ensure_bucket(self):
|
||||
"""Create bucket if it doesn't exist."""
|
||||
try:
|
||||
if not self._client.bucket_exists(self.bucket_name):
|
||||
self._client.make_bucket(self.bucket_name)
|
||||
logger.info(f"Created Klausur bucket: {self.bucket_name}")
|
||||
except S3Error as e:
|
||||
logger.warning(f"MinIO bucket check failed: {e}")
|
||||
|
||||
def upload_document(
|
||||
self,
|
||||
session_id: str,
|
||||
doc_token: str,
|
||||
file_data: bytes,
|
||||
file_extension: str = "png",
|
||||
is_redacted: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Upload exam document to storage.
|
||||
|
||||
Args:
|
||||
session_id: Exam session ID
|
||||
doc_token: Pseudonymized document token
|
||||
file_data: Document binary data
|
||||
file_extension: File extension (png, jpg, pdf)
|
||||
is_redacted: Whether this is the redacted version
|
||||
|
||||
Returns:
|
||||
Object path in storage
|
||||
"""
|
||||
suffix = "_redacted" if is_redacted else ""
|
||||
object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"
|
||||
|
||||
# Determine content type
|
||||
content_types = {
|
||||
"png": "image/png",
|
||||
"jpg": "image/jpeg",
|
||||
"jpeg": "image/jpeg",
|
||||
"pdf": "application/pdf",
|
||||
}
|
||||
content_type = content_types.get(file_extension.lower(), "application/octet-stream")
|
||||
|
||||
try:
|
||||
self.client.put_object(
|
||||
bucket_name=self.bucket_name,
|
||||
object_name=object_name,
|
||||
data=io.BytesIO(file_data),
|
||||
length=len(file_data),
|
||||
content_type=content_type
|
||||
)
|
||||
logger.info(f"Uploaded document: {object_name}")
|
||||
return object_name
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(f"Failed to upload document: {e}")
|
||||
raise
|
||||
|
||||
def get_document(
|
||||
self,
|
||||
session_id: str,
|
||||
doc_token: str,
|
||||
file_extension: str = "png",
|
||||
is_redacted: bool = False
|
||||
) -> Optional[bytes]:
|
||||
"""
|
||||
Download exam document from storage.
|
||||
|
||||
Args:
|
||||
session_id: Exam session ID
|
||||
doc_token: Pseudonymized document token
|
||||
file_extension: File extension
|
||||
is_redacted: Whether to get the redacted version
|
||||
|
||||
Returns:
|
||||
Document binary data or None if not found
|
||||
"""
|
||||
suffix = "_redacted" if is_redacted else ""
|
||||
object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"
|
||||
|
||||
try:
|
||||
response = self.client.get_object(self.bucket_name, object_name)
|
||||
data = response.read()
|
||||
response.close()
|
||||
response.release_conn()
|
||||
return data
|
||||
|
||||
except S3Error as e:
|
||||
if e.code == "NoSuchKey":
|
||||
logger.warning(f"Document not found: {object_name}")
|
||||
return None
|
||||
logger.error(f"Failed to get document: {e}")
|
||||
raise
|
||||
|
||||
def delete_session_documents(self, session_id: str) -> int:
|
||||
"""
|
||||
Delete all documents for a session.
|
||||
|
||||
Args:
|
||||
session_id: Exam session ID
|
||||
|
||||
Returns:
|
||||
Number of deleted objects
|
||||
"""
|
||||
deleted_count = 0
|
||||
prefix = f"{session_id}/"
|
||||
|
||||
try:
|
||||
objects = self.client.list_objects(self.bucket_name, prefix=prefix)
|
||||
for obj in objects:
|
||||
self.client.remove_object(self.bucket_name, obj.object_name)
|
||||
deleted_count += 1
|
||||
logger.debug(f"Deleted: {obj.object_name}")
|
||||
|
||||
logger.info(f"Deleted {deleted_count} documents for session {session_id}")
|
||||
return deleted_count
|
||||
|
||||
except S3Error as e:
|
||||
logger.error(f"Failed to delete session documents: {e}")
|
||||
raise
|
||||
|
||||
def document_exists(
|
||||
self,
|
||||
session_id: str,
|
||||
doc_token: str,
|
||||
file_extension: str = "png"
|
||||
) -> bool:
|
||||
"""Check if document exists in storage."""
|
||||
object_name = f"{session_id}/{doc_token}.{file_extension}"
|
||||
try:
|
||||
self.client.stat_object(self.bucket_name, object_name)
|
||||
return True
|
||||
except S3Error:
|
||||
return False
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_storage_service: Optional[KlausurStorageService] = None
|
||||
|
||||
|
||||
def get_storage_service() -> KlausurStorageService:
|
||||
"""Get or create the storage service singleton."""
|
||||
global _storage_service
|
||||
if _storage_service is None:
|
||||
_storage_service = KlausurStorageService()
|
||||
return _storage_service
|
||||
Reference in New Issue
Block a user