""" Tests for PseudonymizationService. Verifies that: - doc_tokens are cryptographically random - QR codes are generated correctly - Header redaction works as expected - No personal data leaks through pseudonymization """ import pytest import uuid from unittest.mock import patch, MagicMock from klausur.services.pseudonymizer import ( PseudonymizationService, get_pseudonymizer, RedactionResult, QRDetectionResult, ) class TestDocTokenGeneration: """Tests for doc_token generation.""" def test_generate_doc_token_returns_valid_uuid(self): """doc_token should be a valid UUID4.""" service = PseudonymizationService() token = service.generate_doc_token() # Should be a valid UUID parsed = uuid.UUID(token) assert parsed.version == 4 def test_generate_doc_token_is_unique(self): """Each generated token should be unique.""" service = PseudonymizationService() tokens = [service.generate_doc_token() for _ in range(1000)] # All tokens should be unique assert len(set(tokens)) == 1000 def test_generate_batch_tokens_correct_count(self): """Batch generation should return correct number of tokens.""" service = PseudonymizationService() tokens = service.generate_batch_tokens(25) assert len(tokens) == 25 assert len(set(tokens)) == 25 # All unique def test_token_no_correlation_to_index(self): """Tokens should not correlate to their generation order.""" service = PseudonymizationService() # Generate multiple batches batch1 = service.generate_batch_tokens(10) batch2 = service.generate_batch_tokens(10) # No overlap between batches assert not set(batch1).intersection(set(batch2)) class TestQRCodeGeneration: """Tests for QR code generation.""" def test_generate_qr_code_returns_bytes(self): """QR code generation should return PNG bytes.""" service = PseudonymizationService() token = service.generate_doc_token() try: qr_bytes = service.generate_qr_code(token) assert isinstance(qr_bytes, bytes) # PNG magic bytes assert qr_bytes[:8] == b'\x89PNG\r\n\x1a\n' except RuntimeError: pytest.skip("qrcode library not installed") def test_generate_qr_code_custom_size(self): """QR code should respect custom size.""" service = PseudonymizationService() token = service.generate_doc_token() try: # Generate with different sizes small = service.generate_qr_code(token, size=100) large = service.generate_qr_code(token, size=400) # Both should be valid PNG assert small[:8] == b'\x89PNG\r\n\x1a\n' assert large[:8] == b'\x89PNG\r\n\x1a\n' # Large should be bigger assert len(large) > len(small) except RuntimeError: pytest.skip("qrcode library not installed") class TestHeaderRedaction: """Tests for header redaction.""" def test_redact_header_returns_redaction_result(self): """Redaction should return proper RedactionResult.""" service = PseudonymizationService() # Create a simple test image (1x1 white pixel PNG) # This is a minimal valid PNG test_png = ( b'\x89PNG\r\n\x1a\n' # PNG signature b'\x00\x00\x00\rIHDR' # IHDR chunk b'\x00\x00\x00\x01' # Width: 1 b'\x00\x00\x00\x01' # Height: 1 b'\x08\x02' # Bit depth: 8, Color type: RGB b'\x00\x00\x00' # Compression, Filter, Interlace b'\x90wS\xde' # CRC b'\x00\x00\x00\x0cIDATx\x9cc\xf8\x0f\x00\x00\x01\x01\x00\x05\x18\xd8N' # IDAT b'\x00\x00\x00\x00IEND\xaeB`\x82' # IEND ) result = service.redact_header(test_png) assert isinstance(result, RedactionResult) assert isinstance(result.redacted_image, bytes) def test_redact_header_with_invalid_image_returns_original(self): """Invalid images should return original bytes with redaction_applied=False.""" service = PseudonymizationService() invalid_data = b'not an image' result = service.redact_header(invalid_data) assert result.redacted_image == invalid_data assert result.redaction_applied is False class TestQRDetection: """Tests for QR code detection.""" def test_detect_qr_code_no_qr_returns_none(self): """Image without QR should return None token.""" service = PseudonymizationService() # Empty/invalid image result = service.detect_qr_code(b'not an image with qr') assert result.doc_token is None assert result.confidence == 0.0 class TestSingleton: """Tests for singleton pattern.""" def test_get_pseudonymizer_returns_same_instance(self): """Singleton should return same instance.""" instance1 = get_pseudonymizer() instance2 = get_pseudonymizer() assert instance1 is instance2 def test_pseudonymizer_is_service_instance(self): """Singleton should be PseudonymizationService.""" instance = get_pseudonymizer() assert isinstance(instance, PseudonymizationService) class TestPrivacyGuarantees: """Tests verifying privacy guarantees.""" def test_token_cannot_be_reversed_to_name(self): """Tokens should have no mathematical relationship to any input.""" service = PseudonymizationService() # Generate tokens for "students" student_names = ["Max Mustermann", "Anna Schmidt", "Tim Mueller"] tokens = service.generate_batch_tokens(len(student_names)) # Tokens should not contain any part of names for token in tokens: for name in student_names: assert name.lower() not in token.lower() for part in name.split(): assert part.lower() not in token.lower() def test_token_generation_is_not_deterministic(self): """Same input should not produce same token.""" service = PseudonymizationService() # Even with "same student count", tokens should differ batch1 = service.generate_batch_tokens(5) batch2 = service.generate_batch_tokens(5) # No tokens should match assert not set(batch1).intersection(set(batch2)) def test_token_entropy(self): """Tokens should have sufficient entropy.""" service = PseudonymizationService() tokens = service.generate_batch_tokens(100) # Each token should be 36 chars (UUID format: 8-4-4-4-12) for token in tokens: assert len(token) == 36 assert token.count('-') == 4 # Check character distribution (rough entropy check) all_chars = ''.join(t.replace('-', '') for t in tokens) unique_chars = set(all_chars) # Should use all hex digits (0-9, a-f) assert len(unique_chars) >= 10