""" Repository for Klausurkorrektur Module. All queries are filtered by teacher_id to ensure complete namespace isolation. No cross-teacher data access is possible. """ from datetime import datetime, timedelta from typing import Optional, List from sqlalchemy.orm import Session from sqlalchemy import and_, func from .db_models import ( ExamSession, PseudonymizedDocument, QRBatchJob, SessionStatus, DocumentStatus ) class KlausurRepository: """ Repository for exam correction data. PRIVACY DESIGN: - All queries MUST include teacher_id filter - No method allows access to other teachers' data - Bulk operations are scoped to teacher namespace """ def __init__(self, db: Session): self.db = db # ==================== Session Operations ==================== def create_session( self, teacher_id: str, name: str, subject: str = "", class_name: str = "", total_points: int = 100, rubric: str = "", questions: Optional[List[dict]] = None, retention_days: int = 30 ) -> ExamSession: """Create a new exam correction session.""" session = ExamSession( teacher_id=teacher_id, name=name, subject=subject, class_name=class_name, total_points=total_points, rubric=rubric, questions=questions or [], status=SessionStatus.CREATED, retention_until=datetime.utcnow() + timedelta(days=retention_days) ) self.db.add(session) self.db.commit() self.db.refresh(session) return session def get_session( self, session_id: str, teacher_id: str ) -> Optional[ExamSession]: """Get a session by ID (teacher-scoped).""" return self.db.query(ExamSession).filter( and_( ExamSession.id == session_id, ExamSession.teacher_id == teacher_id, ExamSession.status != SessionStatus.DELETED ) ).first() def list_sessions( self, teacher_id: str, include_archived: bool = False, limit: int = 50, offset: int = 0 ) -> List[ExamSession]: """List all sessions for a teacher.""" query = self.db.query(ExamSession).filter( and_( ExamSession.teacher_id == teacher_id, ExamSession.status != SessionStatus.DELETED ) ) if not include_archived: query = query.filter(ExamSession.status != SessionStatus.ARCHIVED) return query.order_by(ExamSession.created_at.desc()).offset(offset).limit(limit).all() def update_session_status( self, session_id: str, teacher_id: str, status: SessionStatus ) -> Optional[ExamSession]: """Update session status.""" session = self.get_session(session_id, teacher_id) if session: session.status = status if status == SessionStatus.COMPLETED: session.completed_at = datetime.utcnow() self.db.commit() self.db.refresh(session) return session def update_session_identity_map( self, session_id: str, teacher_id: str, encrypted_map: bytes, iv: str ) -> Optional[ExamSession]: """Store encrypted identity map (teacher-scoped).""" session = self.get_session(session_id, teacher_id) if session: session.encrypted_identity_map = encrypted_map session.identity_map_iv = iv self.db.commit() self.db.refresh(session) return session def delete_session( self, session_id: str, teacher_id: str, hard_delete: bool = False ) -> bool: """Delete a session (soft or hard delete).""" session = self.get_session(session_id, teacher_id) if not session: return False if hard_delete: self.db.delete(session) else: session.status = SessionStatus.DELETED self.db.commit() return True # ==================== Document Operations ==================== def create_document( self, session_id: str, teacher_id: str, doc_token: Optional[str] = None, page_number: int = 1, total_pages: int = 1 ) -> Optional[PseudonymizedDocument]: """Create a new pseudonymized document.""" # Verify session belongs to teacher session = self.get_session(session_id, teacher_id) if not session: return None doc = PseudonymizedDocument( session_id=session_id, page_number=page_number, total_pages=total_pages, status=DocumentStatus.UPLOADED ) if doc_token: doc.doc_token = doc_token self.db.add(doc) # Update session document count session.document_count += 1 self.db.commit() self.db.refresh(doc) return doc def get_document( self, doc_token: str, teacher_id: str ) -> Optional[PseudonymizedDocument]: """Get a document by token (teacher-scoped via session).""" return self.db.query(PseudonymizedDocument).join( ExamSession ).filter( and_( PseudonymizedDocument.doc_token == doc_token, ExamSession.teacher_id == teacher_id, ExamSession.status != SessionStatus.DELETED ) ).first() def list_documents( self, session_id: str, teacher_id: str ) -> List[PseudonymizedDocument]: """List all documents in a session (teacher-scoped).""" # Verify session belongs to teacher session = self.get_session(session_id, teacher_id) if not session: return [] return self.db.query(PseudonymizedDocument).filter( PseudonymizedDocument.session_id == session_id ).order_by(PseudonymizedDocument.created_at).all() def update_document_ocr( self, doc_token: str, teacher_id: str, ocr_text: str, confidence: int = 0 ) -> Optional[PseudonymizedDocument]: """Update document with OCR results.""" doc = self.get_document(doc_token, teacher_id) if doc: doc.ocr_text = ocr_text doc.ocr_confidence = confidence doc.status = DocumentStatus.OCR_COMPLETED self.db.commit() self.db.refresh(doc) return doc def update_document_ai_result( self, doc_token: str, teacher_id: str, feedback: str, score: Optional[int] = None, grade: Optional[str] = None, details: Optional[dict] = None ) -> Optional[PseudonymizedDocument]: """Update document with AI correction results.""" doc = self.get_document(doc_token, teacher_id) if doc: doc.ai_feedback = feedback doc.ai_score = score doc.ai_grade = grade doc.ai_details = details or {} doc.status = DocumentStatus.COMPLETED doc.processing_completed_at = datetime.utcnow() # Update session processed count session = doc.session session.processed_count += 1 # Check if all documents are processed if session.processed_count >= session.document_count: session.status = SessionStatus.COMPLETED session.completed_at = datetime.utcnow() self.db.commit() self.db.refresh(doc) return doc def update_document_status( self, doc_token: str, teacher_id: str, status: DocumentStatus, error: Optional[str] = None ) -> Optional[PseudonymizedDocument]: """Update document processing status.""" doc = self.get_document(doc_token, teacher_id) if doc: doc.status = status if error: doc.processing_error = error if status in [DocumentStatus.OCR_PROCESSING, DocumentStatus.AI_PROCESSING]: doc.processing_started_at = datetime.utcnow() self.db.commit() self.db.refresh(doc) return doc # ==================== QR Batch Operations ==================== def create_qr_batch( self, session_id: str, teacher_id: str, student_count: int, generated_tokens: List[str] ) -> Optional[QRBatchJob]: """Create a QR code batch job.""" # Verify session belongs to teacher session = self.get_session(session_id, teacher_id) if not session: return None batch = QRBatchJob( session_id=session_id, teacher_id=teacher_id, student_count=student_count, generated_tokens=generated_tokens ) self.db.add(batch) self.db.commit() self.db.refresh(batch) return batch def get_qr_batch( self, batch_id: str, teacher_id: str ) -> Optional[QRBatchJob]: """Get a QR batch by ID (teacher-scoped).""" return self.db.query(QRBatchJob).filter( and_( QRBatchJob.id == batch_id, QRBatchJob.teacher_id == teacher_id ) ).first() # ==================== Statistics (Anonymized) ==================== def get_session_stats( self, session_id: str, teacher_id: str ) -> dict: """Get anonymized statistics for a session.""" session = self.get_session(session_id, teacher_id) if not session: return {} # Count documents by status status_counts = self.db.query( PseudonymizedDocument.status, func.count(PseudonymizedDocument.doc_token) ).filter( PseudonymizedDocument.session_id == session_id ).group_by(PseudonymizedDocument.status).all() # Score statistics (anonymized) score_stats = self.db.query( func.avg(PseudonymizedDocument.ai_score), func.min(PseudonymizedDocument.ai_score), func.max(PseudonymizedDocument.ai_score) ).filter( and_( PseudonymizedDocument.session_id == session_id, PseudonymizedDocument.ai_score.isnot(None) ) ).first() return { "session_id": session_id, "total_documents": session.document_count, "processed_documents": session.processed_count, "status_breakdown": {s.value: c for s, c in status_counts}, "score_average": float(score_stats[0]) if score_stats[0] else None, "score_min": score_stats[1], "score_max": score_stats[2] } # ==================== Data Retention ==================== def cleanup_expired_sessions(self) -> int: """Delete sessions past their retention date. Returns count deleted.""" now = datetime.utcnow() expired = self.db.query(ExamSession).filter( and_( ExamSession.retention_until < now, ExamSession.status != SessionStatus.DELETED ) ).all() count = len(expired) for session in expired: session.status = SessionStatus.DELETED # Clear sensitive data session.encrypted_identity_map = None session.identity_map_iv = None self.db.commit() return count