breakpilot-pwa/backend/klausur/repository.py

"""
Repository for Klausurkorrektur Module.

All queries are filtered by teacher_id to ensure complete namespace isolation.
No cross-teacher data access is possible.
"""
from datetime import datetime, timedelta
from typing import Optional, List
from sqlalchemy.orm import Session
from sqlalchemy import and_, func

from .db_models import (
    ExamSession, PseudonymizedDocument, QRBatchJob,
    SessionStatus, DocumentStatus
)


class KlausurRepository:
    """
    Repository for exam correction data.

    PRIVACY DESIGN:
    - All queries MUST include teacher_id filter
    - No method allows access to other teachers' data
    - Bulk operations are scoped to teacher namespace
    """

    def __init__(self, db: Session):
        self.db = db

    # ==================== Session Operations ====================

    def create_session(
        self,
        teacher_id: str,
        name: str,
        subject: str = "",
        class_name: str = "",
        total_points: int = 100,
        rubric: str = "",
        questions: Optional[List[dict]] = None,
        retention_days: int = 30
    ) -> ExamSession:
        """Create a new exam correction session."""
        session = ExamSession(
            teacher_id=teacher_id,
            name=name,
            subject=subject,
            class_name=class_name,
            total_points=total_points,
            rubric=rubric,
            questions=questions or [],
            status=SessionStatus.CREATED,
            retention_until=datetime.utcnow() + timedelta(days=retention_days)
        )
        self.db.add(session)
        self.db.commit()
        self.db.refresh(session)
        return session

    def get_session(
        self,
        session_id: str,
        teacher_id: str
    ) -> Optional[ExamSession]:
        """Get a session by ID (teacher-scoped)."""
        return self.db.query(ExamSession).filter(
            and_(
                ExamSession.id == session_id,
                ExamSession.teacher_id == teacher_id,
                ExamSession.status != SessionStatus.DELETED
            )
        ).first()

    def list_sessions(
        self,
        teacher_id: str,
        include_archived: bool = False,
        limit: int = 50,
        offset: int = 0
    ) -> List[ExamSession]:
        """List all sessions for a teacher."""
        query = self.db.query(ExamSession).filter(
            and_(
                ExamSession.teacher_id == teacher_id,
                ExamSession.status != SessionStatus.DELETED
            )
        )
        if not include_archived:
            query = query.filter(ExamSession.status != SessionStatus.ARCHIVED)

        return query.order_by(ExamSession.created_at.desc()).offset(offset).limit(limit).all()

    def update_session_status(
        self,
        session_id: str,
        teacher_id: str,
        status: SessionStatus
    ) -> Optional[ExamSession]:
        """Update session status."""
        session = self.get_session(session_id, teacher_id)
        if session:
            session.status = status
            if status == SessionStatus.COMPLETED:
                session.completed_at = datetime.utcnow()
            self.db.commit()
            self.db.refresh(session)
        return session

    def update_session_identity_map(
        self,
        session_id: str,
        teacher_id: str,
        encrypted_map: bytes,
        iv: str
    ) -> Optional[ExamSession]:
        """Store encrypted identity map (teacher-scoped)."""
        session = self.get_session(session_id, teacher_id)
        if session:
            session.encrypted_identity_map = encrypted_map
            session.identity_map_iv = iv
            self.db.commit()
            self.db.refresh(session)
        return session

    def delete_session(
        self,
        session_id: str,
        teacher_id: str,
        hard_delete: bool = False
    ) -> bool:
        """Delete a session (soft or hard delete)."""
        session = self.get_session(session_id, teacher_id)
        if not session:
            return False

        if hard_delete:
            self.db.delete(session)
        else:
            session.status = SessionStatus.DELETED
        self.db.commit()
        return True

    # ==================== Document Operations ====================

    def create_document(
        self,
        session_id: str,
        teacher_id: str,
        doc_token: Optional[str] = None,
        page_number: int = 1,
        total_pages: int = 1
    ) -> Optional[PseudonymizedDocument]:
        """Create a new pseudonymized document."""
        # Verify session belongs to teacher
        session = self.get_session(session_id, teacher_id)
        if not session:
            return None

        doc = PseudonymizedDocument(
            session_id=session_id,
            page_number=page_number,
            total_pages=total_pages,
            status=DocumentStatus.UPLOADED
        )
        if doc_token:
            doc.doc_token = doc_token

        self.db.add(doc)

        # Update session document count
        session.document_count += 1
        self.db.commit()
        self.db.refresh(doc)
        return doc

    def get_document(
        self,
        doc_token: str,
        teacher_id: str
    ) -> Optional[PseudonymizedDocument]:
        """Get a document by token (teacher-scoped via session)."""
        return self.db.query(PseudonymizedDocument).join(
            ExamSession
        ).filter(
            and_(
                PseudonymizedDocument.doc_token == doc_token,
                ExamSession.teacher_id == teacher_id,
                ExamSession.status != SessionStatus.DELETED
            )
        ).first()

    def list_documents(
        self,
        session_id: str,
        teacher_id: str
    ) -> List[PseudonymizedDocument]:
        """List all documents in a session (teacher-scoped)."""
        # Verify session belongs to teacher
        session = self.get_session(session_id, teacher_id)
        if not session:
            return []

        return self.db.query(PseudonymizedDocument).filter(
            PseudonymizedDocument.session_id == session_id
        ).order_by(PseudonymizedDocument.created_at).all()

    def update_document_ocr(
        self,
        doc_token: str,
        teacher_id: str,
        ocr_text: str,
        confidence: int = 0
    ) -> Optional[PseudonymizedDocument]:
        """Update document with OCR results."""
        doc = self.get_document(doc_token, teacher_id)
        if doc:
            doc.ocr_text = ocr_text
            doc.ocr_confidence = confidence
            doc.status = DocumentStatus.OCR_COMPLETED
            self.db.commit()
            self.db.refresh(doc)
        return doc

    def update_document_ai_result(
        self,
        doc_token: str,
        teacher_id: str,
        feedback: str,
        score: Optional[int] = None,
        grade: Optional[str] = None,
        details: Optional[dict] = None
    ) -> Optional[PseudonymizedDocument]:
        """Update document with AI correction results."""
        doc = self.get_document(doc_token, teacher_id)
        if doc:
            doc.ai_feedback = feedback
            doc.ai_score = score
            doc.ai_grade = grade
            doc.ai_details = details or {}
            doc.status = DocumentStatus.COMPLETED
            doc.processing_completed_at = datetime.utcnow()

            # Update session processed count
            session = doc.session
            session.processed_count += 1

            # Check if all documents are processed
            if session.processed_count >= session.document_count:
                session.status = SessionStatus.COMPLETED
                session.completed_at = datetime.utcnow()

            self.db.commit()
            self.db.refresh(doc)
        return doc

    def update_document_status(
        self,
        doc_token: str,
        teacher_id: str,
        status: DocumentStatus,
        error: Optional[str] = None
    ) -> Optional[PseudonymizedDocument]:
        """Update document processing status."""
        doc = self.get_document(doc_token, teacher_id)
        if doc:
            doc.status = status
            if error:
                doc.processing_error = error
            if status in [DocumentStatus.OCR_PROCESSING, DocumentStatus.AI_PROCESSING]:
                doc.processing_started_at = datetime.utcnow()
            self.db.commit()
            self.db.refresh(doc)
        return doc

    # ==================== QR Batch Operations ====================

    def create_qr_batch(
        self,
        session_id: str,
        teacher_id: str,
        student_count: int,
        generated_tokens: List[str]
    ) -> Optional[QRBatchJob]:
        """Create a QR code batch job."""
        # Verify session belongs to teacher
        session = self.get_session(session_id, teacher_id)
        if not session:
            return None

        batch = QRBatchJob(
            session_id=session_id,
            teacher_id=teacher_id,
            student_count=student_count,
            generated_tokens=generated_tokens
        )
        self.db.add(batch)
        self.db.commit()
        self.db.refresh(batch)
        return batch

    def get_qr_batch(
        self,
        batch_id: str,
        teacher_id: str
    ) -> Optional[QRBatchJob]:
        """Get a QR batch by ID (teacher-scoped)."""
        return self.db.query(QRBatchJob).filter(
            and_(
                QRBatchJob.id == batch_id,
                QRBatchJob.teacher_id == teacher_id
            )
        ).first()

    # ==================== Statistics (Anonymized) ====================

    def get_session_stats(
        self,
        session_id: str,
        teacher_id: str
    ) -> dict:
        """Get anonymized statistics for a session."""
        session = self.get_session(session_id, teacher_id)
        if not session:
            return {}

        # Count documents by status
        status_counts = self.db.query(
            PseudonymizedDocument.status,
            func.count(PseudonymizedDocument.doc_token)
        ).filter(
            PseudonymizedDocument.session_id == session_id
        ).group_by(PseudonymizedDocument.status).all()

        # Score statistics (anonymized)
        score_stats = self.db.query(
            func.avg(PseudonymizedDocument.ai_score),
            func.min(PseudonymizedDocument.ai_score),
            func.max(PseudonymizedDocument.ai_score)
        ).filter(
            and_(
                PseudonymizedDocument.session_id == session_id,
                PseudonymizedDocument.ai_score.isnot(None)
            )
        ).first()

        return {
            "session_id": session_id,
            "total_documents": session.document_count,
            "processed_documents": session.processed_count,
            "status_breakdown": {s.value: c for s, c in status_counts},
            "score_average": float(score_stats[0]) if score_stats[0] else None,
            "score_min": score_stats[1],
            "score_max": score_stats[2]
        }

    # ==================== Data Retention ====================

    def cleanup_expired_sessions(self) -> int:
        """Delete sessions past their retention date. Returns count deleted."""
        now = datetime.utcnow()
        expired = self.db.query(ExamSession).filter(
            and_(
                ExamSession.retention_until < now,
                ExamSession.status != SessionStatus.DELETED
            )
        ).all()

        count = len(expired)
        for session in expired:
            session.status = SessionStatus.DELETED
            # Clear sensitive data
            session.encrypted_identity_map = None
            session.identity_map_iv = None

        self.db.commit()
        return count