""" Storage Service for Klausur Documents. PRIVACY BY DESIGN: - Documents stored with doc_token as identifier (not student names) - Organized by session_id/doc_token for teacher isolation - Auto-cleanup when retention period expires """ import os import io import logging from typing import Optional, BinaryIO from pathlib import Path from minio import Minio from minio.error import S3Error logger = logging.getLogger(__name__) class KlausurStorageService: """ MinIO/S3 Storage Service for exam documents. Structure: klausur-exams/ {session_id}/ {doc_token}.{ext} {doc_token}_redacted.{ext} # After header redaction """ def __init__(self): self.endpoint = os.getenv("MINIO_ENDPOINT", "minio:9000") self.access_key = os.getenv("MINIO_ROOT_USER", "breakpilot_dev") self.secret_key = os.getenv("MINIO_ROOT_PASSWORD", "breakpilot_dev_123") self.secure = os.getenv("MINIO_SECURE", "false").lower() == "true" self.bucket_name = os.getenv("KLAUSUR_BUCKET", "klausur-exams") self._client: Optional[Minio] = None @property def client(self) -> Minio: """Lazy-init MinIO client.""" if self._client is None: self._client = Minio( self.endpoint, access_key=self.access_key, secret_key=self.secret_key, secure=self.secure ) self._ensure_bucket() return self._client def _ensure_bucket(self): """Create bucket if it doesn't exist.""" try: if not self._client.bucket_exists(self.bucket_name): self._client.make_bucket(self.bucket_name) logger.info(f"Created Klausur bucket: {self.bucket_name}") except S3Error as e: logger.warning(f"MinIO bucket check failed: {e}") def upload_document( self, session_id: str, doc_token: str, file_data: bytes, file_extension: str = "png", is_redacted: bool = False ) -> str: """ Upload exam document to storage. Args: session_id: Exam session ID doc_token: Pseudonymized document token file_data: Document binary data file_extension: File extension (png, jpg, pdf) is_redacted: Whether this is the redacted version Returns: Object path in storage """ suffix = "_redacted" if is_redacted else "" object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}" # Determine content type content_types = { "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", "pdf": "application/pdf", } content_type = content_types.get(file_extension.lower(), "application/octet-stream") try: self.client.put_object( bucket_name=self.bucket_name, object_name=object_name, data=io.BytesIO(file_data), length=len(file_data), content_type=content_type ) logger.info(f"Uploaded document: {object_name}") return object_name except S3Error as e: logger.error(f"Failed to upload document: {e}") raise def get_document( self, session_id: str, doc_token: str, file_extension: str = "png", is_redacted: bool = False ) -> Optional[bytes]: """ Download exam document from storage. Args: session_id: Exam session ID doc_token: Pseudonymized document token file_extension: File extension is_redacted: Whether to get the redacted version Returns: Document binary data or None if not found """ suffix = "_redacted" if is_redacted else "" object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}" try: response = self.client.get_object(self.bucket_name, object_name) data = response.read() response.close() response.release_conn() return data except S3Error as e: if e.code == "NoSuchKey": logger.warning(f"Document not found: {object_name}") return None logger.error(f"Failed to get document: {e}") raise def delete_session_documents(self, session_id: str) -> int: """ Delete all documents for a session. Args: session_id: Exam session ID Returns: Number of deleted objects """ deleted_count = 0 prefix = f"{session_id}/" try: objects = self.client.list_objects(self.bucket_name, prefix=prefix) for obj in objects: self.client.remove_object(self.bucket_name, obj.object_name) deleted_count += 1 logger.debug(f"Deleted: {obj.object_name}") logger.info(f"Deleted {deleted_count} documents for session {session_id}") return deleted_count except S3Error as e: logger.error(f"Failed to delete session documents: {e}") raise def document_exists( self, session_id: str, doc_token: str, file_extension: str = "png" ) -> bool: """Check if document exists in storage.""" object_name = f"{session_id}/{doc_token}.{file_extension}" try: self.client.stat_object(self.bucket_name, object_name) return True except S3Error: return False # Singleton instance _storage_service: Optional[KlausurStorageService] = None def get_storage_service() -> KlausurStorageService: """Get or create the storage service singleton.""" global _storage_service if _storage_service is None: _storage_service = KlausurStorageService() return _storage_service