breakpilot-pwa/backend/klausur/services/storage_service.py

"""
Storage Service for Klausur Documents.

PRIVACY BY DESIGN:
- Documents stored with doc_token as identifier (not student names)
- Organized by session_id/doc_token for teacher isolation
- Auto-cleanup when retention period expires
"""
import os
import io
import logging
from typing import Optional, BinaryIO
from pathlib import Path
from minio import Minio
from minio.error import S3Error

logger = logging.getLogger(__name__)


class KlausurStorageService:
    """
    MinIO/S3 Storage Service for exam documents.

    Structure:
        klausur-exams/
            {session_id}/
                {doc_token}.{ext}
                {doc_token}_redacted.{ext}  # After header redaction
    """

    def __init__(self):
        self.endpoint = os.getenv("MINIO_ENDPOINT", "minio:9000")
        self.access_key = os.getenv("MINIO_ROOT_USER", "breakpilot_dev")
        self.secret_key = os.getenv("MINIO_ROOT_PASSWORD", "breakpilot_dev_123")
        self.secure = os.getenv("MINIO_SECURE", "false").lower() == "true"
        self.bucket_name = os.getenv("KLAUSUR_BUCKET", "klausur-exams")

        self._client: Optional[Minio] = None

    @property
    def client(self) -> Minio:
        """Lazy-init MinIO client."""
        if self._client is None:
            self._client = Minio(
                self.endpoint,
                access_key=self.access_key,
                secret_key=self.secret_key,
                secure=self.secure
            )
            self._ensure_bucket()
        return self._client

    def _ensure_bucket(self):
        """Create bucket if it doesn't exist."""
        try:
            if not self._client.bucket_exists(self.bucket_name):
                self._client.make_bucket(self.bucket_name)
                logger.info(f"Created Klausur bucket: {self.bucket_name}")
        except S3Error as e:
            logger.warning(f"MinIO bucket check failed: {e}")

    def upload_document(
        self,
        session_id: str,
        doc_token: str,
        file_data: bytes,
        file_extension: str = "png",
        is_redacted: bool = False
    ) -> str:
        """
        Upload exam document to storage.

        Args:
            session_id: Exam session ID
            doc_token: Pseudonymized document token
            file_data: Document binary data
            file_extension: File extension (png, jpg, pdf)
            is_redacted: Whether this is the redacted version

        Returns:
            Object path in storage
        """
        suffix = "_redacted" if is_redacted else ""
        object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"

        # Determine content type
        content_types = {
            "png": "image/png",
            "jpg": "image/jpeg",
            "jpeg": "image/jpeg",
            "pdf": "application/pdf",
        }
        content_type = content_types.get(file_extension.lower(), "application/octet-stream")

        try:
            self.client.put_object(
                bucket_name=self.bucket_name,
                object_name=object_name,
                data=io.BytesIO(file_data),
                length=len(file_data),
                content_type=content_type
            )
            logger.info(f"Uploaded document: {object_name}")
            return object_name

        except S3Error as e:
            logger.error(f"Failed to upload document: {e}")
            raise

    def get_document(
        self,
        session_id: str,
        doc_token: str,
        file_extension: str = "png",
        is_redacted: bool = False
    ) -> Optional[bytes]:
        """
        Download exam document from storage.

        Args:
            session_id: Exam session ID
            doc_token: Pseudonymized document token
            file_extension: File extension
            is_redacted: Whether to get the redacted version

        Returns:
            Document binary data or None if not found
        """
        suffix = "_redacted" if is_redacted else ""
        object_name = f"{session_id}/{doc_token}{suffix}.{file_extension}"

        try:
            response = self.client.get_object(self.bucket_name, object_name)
            data = response.read()
            response.close()
            response.release_conn()
            return data

        except S3Error as e:
            if e.code == "NoSuchKey":
                logger.warning(f"Document not found: {object_name}")
                return None
            logger.error(f"Failed to get document: {e}")
            raise

    def delete_session_documents(self, session_id: str) -> int:
        """
        Delete all documents for a session.

        Args:
            session_id: Exam session ID

        Returns:
            Number of deleted objects
        """
        deleted_count = 0
        prefix = f"{session_id}/"

        try:
            objects = self.client.list_objects(self.bucket_name, prefix=prefix)
            for obj in objects:
                self.client.remove_object(self.bucket_name, obj.object_name)
                deleted_count += 1
                logger.debug(f"Deleted: {obj.object_name}")

            logger.info(f"Deleted {deleted_count} documents for session {session_id}")
            return deleted_count

        except S3Error as e:
            logger.error(f"Failed to delete session documents: {e}")
            raise

    def document_exists(
        self,
        session_id: str,
        doc_token: str,
        file_extension: str = "png"
    ) -> bool:
        """Check if document exists in storage."""
        object_name = f"{session_id}/{doc_token}.{file_extension}"
        try:
            self.client.stat_object(self.bucket_name, object_name)
            return True
        except S3Error:
            return False


# Singleton instance
_storage_service: Optional[KlausurStorageService] = None


def get_storage_service() -> KlausurStorageService:
    """Get or create the storage service singleton."""
    global _storage_service
    if _storage_service is None:
        _storage_service = KlausurStorageService()
    return _storage_service