""" Tests for KlausurRepository. Verifies: - Teacher isolation (critical for privacy) - CRUD operations - Data retention cleanup """ import pytest from datetime import datetime, timedelta from unittest.mock import MagicMock, patch from sqlalchemy.orm import Session from klausur.repository import KlausurRepository from klausur.db_models import ( ExamSession, PseudonymizedDocument, QRBatchJob, SessionStatus, DocumentStatus ) @pytest.fixture def mock_db(): """Create a mock database session.""" return MagicMock(spec=Session) @pytest.fixture def repo(mock_db): """Create a repository with mock DB.""" return KlausurRepository(mock_db) class TestTeacherIsolation: """Tests for teacher namespace isolation (CRITICAL for privacy).""" def test_get_session_requires_teacher_id(self, repo, mock_db): """Getting a session must require teacher_id.""" # Setup mock mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.filter.return_value = mock_query mock_query.first.return_value = None # Attempt to get session result = repo.get_session("session-123", "teacher-A") # Verify filter was called (teacher isolation) mock_db.query.assert_called_with(ExamSession) mock_query.filter.assert_called() def test_list_sessions_only_returns_teacher_sessions(self, repo, mock_db): """Listing sessions must filter by teacher_id.""" mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.filter.return_value = mock_query mock_query.order_by.return_value = mock_query mock_query.offset.return_value = mock_query mock_query.limit.return_value = mock_query mock_query.all.return_value = [] result = repo.list_sessions("teacher-A") # Verify query chain mock_db.query.assert_called_with(ExamSession) def test_get_document_verifies_teacher_ownership(self, repo, mock_db): """Getting a document must verify teacher owns the session.""" mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.join.return_value = mock_query mock_query.filter.return_value = mock_query mock_query.first.return_value = None result = repo.get_document("doc-token-123", "teacher-A") # Must join with ExamSession to verify teacher_id mock_query.join.assert_called() def test_different_teachers_cannot_see_each_others_sessions(self, repo, mock_db): """Teacher A cannot access Teacher B's sessions.""" # Create mock session owned by teacher-B session_b = MagicMock(spec=ExamSession) session_b.teacher_id = "teacher-B" session_b.id = "session-123" mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.filter.return_value = mock_query # Return None because filter should exclude teacher-B's session mock_query.first.return_value = None # Teacher A tries to access result = repo.get_session("session-123", "teacher-A") assert result is None class TestSessionOperations: """Tests for session CRUD operations.""" def test_create_session_sets_teacher_id(self, repo, mock_db): """Creating a session must set the teacher_id.""" repo.create_session( teacher_id="teacher-123", name="Mathe Klausur", subject="Mathematik" ) # Verify session was added with teacher_id mock_db.add.assert_called_once() added_session = mock_db.add.call_args[0][0] assert added_session.teacher_id == "teacher-123" assert added_session.name == "Mathe Klausur" def test_create_session_sets_retention_date(self, repo, mock_db): """Sessions must have a retention date for auto-deletion.""" repo.create_session( teacher_id="teacher-123", name="Test", retention_days=30 ) added_session = mock_db.add.call_args[0][0] assert added_session.retention_until is not None # Should be approximately 30 days in the future expected = datetime.utcnow() + timedelta(days=30) diff = abs((added_session.retention_until - expected).total_seconds()) assert diff < 60 # Within 1 minute def test_delete_session_soft_delete_by_default(self, repo, mock_db): """Deleting should soft-delete by default.""" mock_session = MagicMock(spec=ExamSession) mock_session.status = SessionStatus.CREATED mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.filter.return_value = mock_query mock_query.first.return_value = mock_session result = repo.delete_session("session-123", "teacher-A") # Should set status to DELETED, not actually delete assert mock_session.status == SessionStatus.DELETED mock_db.delete.assert_not_called() def test_delete_session_hard_delete_when_requested(self, repo, mock_db): """Hard delete should actually delete the record.""" mock_session = MagicMock(spec=ExamSession) mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.filter.return_value = mock_query mock_query.first.return_value = mock_session result = repo.delete_session("session-123", "teacher-A", hard_delete=True) mock_db.delete.assert_called_once_with(mock_session) class TestDocumentOperations: """Tests for document CRUD operations.""" def test_create_document_requires_valid_session(self, repo, mock_db): """Creating a document requires a valid session owned by teacher.""" # Session not found (wrong teacher or doesn't exist) mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.filter.return_value = mock_query mock_query.first.return_value = None result = repo.create_document( session_id="session-123", teacher_id="teacher-A" ) assert result is None def test_update_document_ocr_changes_status(self, repo, mock_db): """Updating OCR results should update document status.""" mock_doc = MagicMock(spec=PseudonymizedDocument) mock_doc.status = DocumentStatus.UPLOADED # Mock get_document with patch.object(repo, 'get_document', return_value=mock_doc): result = repo.update_document_ocr( doc_token="doc-123", teacher_id="teacher-A", ocr_text="Student answer text", confidence=95 ) assert mock_doc.ocr_text == "Student answer text" assert mock_doc.ocr_confidence == 95 assert mock_doc.status == DocumentStatus.OCR_COMPLETED class TestDataRetention: """Tests for data retention and cleanup.""" def test_cleanup_expired_sessions(self, repo, mock_db): """Cleanup should mark expired sessions as deleted.""" # Create expired session expired_session = MagicMock(spec=ExamSession) expired_session.retention_until = datetime.utcnow() - timedelta(days=1) expired_session.status = SessionStatus.COMPLETED expired_session.encrypted_identity_map = b"encrypted_data" mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.filter.return_value = mock_query mock_query.all.return_value = [expired_session] count = repo.cleanup_expired_sessions() assert count == 1 assert expired_session.status == SessionStatus.DELETED # Identity map should be cleared assert expired_session.encrypted_identity_map is None class TestStatistics: """Tests for anonymized statistics.""" def test_get_session_stats_returns_anonymized_data(self, repo, mock_db): """Statistics should not contain any PII.""" mock_session = MagicMock(spec=ExamSession) mock_session.document_count = 25 mock_session.processed_count = 20 mock_query = MagicMock() mock_db.query.return_value = mock_query mock_query.filter.return_value = mock_query # first() is called twice: once for status counts and once for score stats # Return a tuple for score_stats that can be subscripted mock_query.first.return_value = (85.0, 60, 100) # avg, min, max scores mock_query.group_by.return_value = mock_query mock_query.all.return_value = [] with patch.object(repo, 'get_session', return_value=mock_session): stats = repo.get_session_stats("session-123", "teacher-A") # Stats should contain only aggregate data, no PII assert "session_id" in stats assert "total_documents" in stats # Should NOT contain student names or tokens assert "student_names" not in stats assert "doc_tokens" not in stats