fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
--- a/backend/klausur/tests/test_pseudonymizer.py
+++ b/backend/klausur/tests/test_pseudonymizer.py
@@ -0,0 +1,209 @@
+"""
+Tests for PseudonymizationService.
+
+Verifies that:
+- doc_tokens are cryptographically random
+- QR codes are generated correctly
+- Header redaction works as expected
+- No personal data leaks through pseudonymization
+"""
+import pytest
+import uuid
+from unittest.mock import patch, MagicMock
+
+from klausur.services.pseudonymizer import (
+    PseudonymizationService,
+    get_pseudonymizer,
+    RedactionResult,
+    QRDetectionResult,
+)
+
+
+class TestDocTokenGeneration:
+    """Tests for doc_token generation."""
+
+    def test_generate_doc_token_returns_valid_uuid(self):
+        """doc_token should be a valid UUID4."""
+        service = PseudonymizationService()
+        token = service.generate_doc_token()
+
+        # Should be a valid UUID
+        parsed = uuid.UUID(token)
+        assert parsed.version == 4
+
+    def test_generate_doc_token_is_unique(self):
+        """Each generated token should be unique."""
+        service = PseudonymizationService()
+        tokens = [service.generate_doc_token() for _ in range(1000)]
+
+        # All tokens should be unique
+        assert len(set(tokens)) == 1000
+
+    def test_generate_batch_tokens_correct_count(self):
+        """Batch generation should return correct number of tokens."""
+        service = PseudonymizationService()
+        tokens = service.generate_batch_tokens(25)
+
+        assert len(tokens) == 25
+        assert len(set(tokens)) == 25  # All unique
+
+    def test_token_no_correlation_to_index(self):
+        """Tokens should not correlate to their generation order."""
+        service = PseudonymizationService()
+
+        # Generate multiple batches
+        batch1 = service.generate_batch_tokens(10)
+        batch2 = service.generate_batch_tokens(10)
+
+        # No overlap between batches
+        assert not set(batch1).intersection(set(batch2))
+
+
+class TestQRCodeGeneration:
+    """Tests for QR code generation."""
+
+    def test_generate_qr_code_returns_bytes(self):
+        """QR code generation should return PNG bytes."""
+        service = PseudonymizationService()
+        token = service.generate_doc_token()
+
+        try:
+            qr_bytes = service.generate_qr_code(token)
+            assert isinstance(qr_bytes, bytes)
+            # PNG magic bytes
+            assert qr_bytes[:8] == b'\x89PNG\r\n\x1a\n'
+        except RuntimeError:
+            pytest.skip("qrcode library not installed")
+
+    def test_generate_qr_code_custom_size(self):
+        """QR code should respect custom size."""
+        service = PseudonymizationService()
+        token = service.generate_doc_token()
+
+        try:
+            # Generate with different sizes
+            small = service.generate_qr_code(token, size=100)
+            large = service.generate_qr_code(token, size=400)
+
+            # Both should be valid PNG
+            assert small[:8] == b'\x89PNG\r\n\x1a\n'
+            assert large[:8] == b'\x89PNG\r\n\x1a\n'
+
+            # Large should be bigger
+            assert len(large) > len(small)
+        except RuntimeError:
+            pytest.skip("qrcode library not installed")
+
+
+class TestHeaderRedaction:
+    """Tests for header redaction."""
+
+    def test_redact_header_returns_redaction_result(self):
+        """Redaction should return proper RedactionResult."""
+        service = PseudonymizationService()
+
+        # Create a simple test image (1x1 white pixel PNG)
+        # This is a minimal valid PNG
+        test_png = (
+            b'\x89PNG\r\n\x1a\n'  # PNG signature
+            b'\x00\x00\x00\rIHDR'  # IHDR chunk
+            b'\x00\x00\x00\x01'    # Width: 1
+            b'\x00\x00\x00\x01'    # Height: 1
+            b'\x08\x02'            # Bit depth: 8, Color type: RGB
+            b'\x00\x00\x00'        # Compression, Filter, Interlace
+            b'\x90wS\xde'          # CRC
+            b'\x00\x00\x00\x0cIDATx\x9cc\xf8\x0f\x00\x00\x01\x01\x00\x05\x18\xd8N'  # IDAT
+            b'\x00\x00\x00\x00IEND\xaeB`\x82'  # IEND
+        )
+
+        result = service.redact_header(test_png)
+
+        assert isinstance(result, RedactionResult)
+        assert isinstance(result.redacted_image, bytes)
+
+    def test_redact_header_with_invalid_image_returns_original(self):
+        """Invalid images should return original bytes with redaction_applied=False."""
+        service = PseudonymizationService()
+
+        invalid_data = b'not an image'
+        result = service.redact_header(invalid_data)
+
+        assert result.redacted_image == invalid_data
+        assert result.redaction_applied is False
+
+
+class TestQRDetection:
+    """Tests for QR code detection."""
+
+    def test_detect_qr_code_no_qr_returns_none(self):
+        """Image without QR should return None token."""
+        service = PseudonymizationService()
+
+        # Empty/invalid image
+        result = service.detect_qr_code(b'not an image with qr')
+
+        assert result.doc_token is None
+        assert result.confidence == 0.0
+
+
+class TestSingleton:
+    """Tests for singleton pattern."""
+
+    def test_get_pseudonymizer_returns_same_instance(self):
+        """Singleton should return same instance."""
+        instance1 = get_pseudonymizer()
+        instance2 = get_pseudonymizer()
+
+        assert instance1 is instance2
+
+    def test_pseudonymizer_is_service_instance(self):
+        """Singleton should be PseudonymizationService."""
+        instance = get_pseudonymizer()
+        assert isinstance(instance, PseudonymizationService)
+
+
+class TestPrivacyGuarantees:
+    """Tests verifying privacy guarantees."""
+
+    def test_token_cannot_be_reversed_to_name(self):
+        """Tokens should have no mathematical relationship to any input."""
+        service = PseudonymizationService()
+
+        # Generate tokens for "students"
+        student_names = ["Max Mustermann", "Anna Schmidt", "Tim Mueller"]
+        tokens = service.generate_batch_tokens(len(student_names))
+
+        # Tokens should not contain any part of names
+        for token in tokens:
+            for name in student_names:
+                assert name.lower() not in token.lower()
+                for part in name.split():
+                    assert part.lower() not in token.lower()
+
+    def test_token_generation_is_not_deterministic(self):
+        """Same input should not produce same token."""
+        service = PseudonymizationService()
+
+        # Even with "same student count", tokens should differ
+        batch1 = service.generate_batch_tokens(5)
+        batch2 = service.generate_batch_tokens(5)
+
+        # No tokens should match
+        assert not set(batch1).intersection(set(batch2))
+
+    def test_token_entropy(self):
+        """Tokens should have sufficient entropy."""
+        service = PseudonymizationService()
+        tokens = service.generate_batch_tokens(100)
+
+        # Each token should be 36 chars (UUID format: 8-4-4-4-12)
+        for token in tokens:
+            assert len(token) == 36
+            assert token.count('-') == 4
+
+        # Check character distribution (rough entropy check)
+        all_chars = ''.join(t.replace('-', '') for t in tokens)
+        unique_chars = set(all_chars)
+
+        # Should use all hex digits (0-9, a-f)
+        assert len(unique_chars) >= 10