fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
209
backend/klausur/tests/test_pseudonymizer.py
Normal file
209
backend/klausur/tests/test_pseudonymizer.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Tests for PseudonymizationService.
|
||||
|
||||
Verifies that:
|
||||
- doc_tokens are cryptographically random
|
||||
- QR codes are generated correctly
|
||||
- Header redaction works as expected
|
||||
- No personal data leaks through pseudonymization
|
||||
"""
|
||||
import pytest
|
||||
import uuid
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from klausur.services.pseudonymizer import (
|
||||
PseudonymizationService,
|
||||
get_pseudonymizer,
|
||||
RedactionResult,
|
||||
QRDetectionResult,
|
||||
)
|
||||
|
||||
|
||||
class TestDocTokenGeneration:
|
||||
"""Tests for doc_token generation."""
|
||||
|
||||
def test_generate_doc_token_returns_valid_uuid(self):
|
||||
"""doc_token should be a valid UUID4."""
|
||||
service = PseudonymizationService()
|
||||
token = service.generate_doc_token()
|
||||
|
||||
# Should be a valid UUID
|
||||
parsed = uuid.UUID(token)
|
||||
assert parsed.version == 4
|
||||
|
||||
def test_generate_doc_token_is_unique(self):
|
||||
"""Each generated token should be unique."""
|
||||
service = PseudonymizationService()
|
||||
tokens = [service.generate_doc_token() for _ in range(1000)]
|
||||
|
||||
# All tokens should be unique
|
||||
assert len(set(tokens)) == 1000
|
||||
|
||||
def test_generate_batch_tokens_correct_count(self):
|
||||
"""Batch generation should return correct number of tokens."""
|
||||
service = PseudonymizationService()
|
||||
tokens = service.generate_batch_tokens(25)
|
||||
|
||||
assert len(tokens) == 25
|
||||
assert len(set(tokens)) == 25 # All unique
|
||||
|
||||
def test_token_no_correlation_to_index(self):
|
||||
"""Tokens should not correlate to their generation order."""
|
||||
service = PseudonymizationService()
|
||||
|
||||
# Generate multiple batches
|
||||
batch1 = service.generate_batch_tokens(10)
|
||||
batch2 = service.generate_batch_tokens(10)
|
||||
|
||||
# No overlap between batches
|
||||
assert not set(batch1).intersection(set(batch2))
|
||||
|
||||
|
||||
class TestQRCodeGeneration:
|
||||
"""Tests for QR code generation."""
|
||||
|
||||
def test_generate_qr_code_returns_bytes(self):
|
||||
"""QR code generation should return PNG bytes."""
|
||||
service = PseudonymizationService()
|
||||
token = service.generate_doc_token()
|
||||
|
||||
try:
|
||||
qr_bytes = service.generate_qr_code(token)
|
||||
assert isinstance(qr_bytes, bytes)
|
||||
# PNG magic bytes
|
||||
assert qr_bytes[:8] == b'\x89PNG\r\n\x1a\n'
|
||||
except RuntimeError:
|
||||
pytest.skip("qrcode library not installed")
|
||||
|
||||
def test_generate_qr_code_custom_size(self):
|
||||
"""QR code should respect custom size."""
|
||||
service = PseudonymizationService()
|
||||
token = service.generate_doc_token()
|
||||
|
||||
try:
|
||||
# Generate with different sizes
|
||||
small = service.generate_qr_code(token, size=100)
|
||||
large = service.generate_qr_code(token, size=400)
|
||||
|
||||
# Both should be valid PNG
|
||||
assert small[:8] == b'\x89PNG\r\n\x1a\n'
|
||||
assert large[:8] == b'\x89PNG\r\n\x1a\n'
|
||||
|
||||
# Large should be bigger
|
||||
assert len(large) > len(small)
|
||||
except RuntimeError:
|
||||
pytest.skip("qrcode library not installed")
|
||||
|
||||
|
||||
class TestHeaderRedaction:
|
||||
"""Tests for header redaction."""
|
||||
|
||||
def test_redact_header_returns_redaction_result(self):
|
||||
"""Redaction should return proper RedactionResult."""
|
||||
service = PseudonymizationService()
|
||||
|
||||
# Create a simple test image (1x1 white pixel PNG)
|
||||
# This is a minimal valid PNG
|
||||
test_png = (
|
||||
b'\x89PNG\r\n\x1a\n' # PNG signature
|
||||
b'\x00\x00\x00\rIHDR' # IHDR chunk
|
||||
b'\x00\x00\x00\x01' # Width: 1
|
||||
b'\x00\x00\x00\x01' # Height: 1
|
||||
b'\x08\x02' # Bit depth: 8, Color type: RGB
|
||||
b'\x00\x00\x00' # Compression, Filter, Interlace
|
||||
b'\x90wS\xde' # CRC
|
||||
b'\x00\x00\x00\x0cIDATx\x9cc\xf8\x0f\x00\x00\x01\x01\x00\x05\x18\xd8N' # IDAT
|
||||
b'\x00\x00\x00\x00IEND\xaeB`\x82' # IEND
|
||||
)
|
||||
|
||||
result = service.redact_header(test_png)
|
||||
|
||||
assert isinstance(result, RedactionResult)
|
||||
assert isinstance(result.redacted_image, bytes)
|
||||
|
||||
def test_redact_header_with_invalid_image_returns_original(self):
|
||||
"""Invalid images should return original bytes with redaction_applied=False."""
|
||||
service = PseudonymizationService()
|
||||
|
||||
invalid_data = b'not an image'
|
||||
result = service.redact_header(invalid_data)
|
||||
|
||||
assert result.redacted_image == invalid_data
|
||||
assert result.redaction_applied is False
|
||||
|
||||
|
||||
class TestQRDetection:
|
||||
"""Tests for QR code detection."""
|
||||
|
||||
def test_detect_qr_code_no_qr_returns_none(self):
|
||||
"""Image without QR should return None token."""
|
||||
service = PseudonymizationService()
|
||||
|
||||
# Empty/invalid image
|
||||
result = service.detect_qr_code(b'not an image with qr')
|
||||
|
||||
assert result.doc_token is None
|
||||
assert result.confidence == 0.0
|
||||
|
||||
|
||||
class TestSingleton:
|
||||
"""Tests for singleton pattern."""
|
||||
|
||||
def test_get_pseudonymizer_returns_same_instance(self):
|
||||
"""Singleton should return same instance."""
|
||||
instance1 = get_pseudonymizer()
|
||||
instance2 = get_pseudonymizer()
|
||||
|
||||
assert instance1 is instance2
|
||||
|
||||
def test_pseudonymizer_is_service_instance(self):
|
||||
"""Singleton should be PseudonymizationService."""
|
||||
instance = get_pseudonymizer()
|
||||
assert isinstance(instance, PseudonymizationService)
|
||||
|
||||
|
||||
class TestPrivacyGuarantees:
|
||||
"""Tests verifying privacy guarantees."""
|
||||
|
||||
def test_token_cannot_be_reversed_to_name(self):
|
||||
"""Tokens should have no mathematical relationship to any input."""
|
||||
service = PseudonymizationService()
|
||||
|
||||
# Generate tokens for "students"
|
||||
student_names = ["Max Mustermann", "Anna Schmidt", "Tim Mueller"]
|
||||
tokens = service.generate_batch_tokens(len(student_names))
|
||||
|
||||
# Tokens should not contain any part of names
|
||||
for token in tokens:
|
||||
for name in student_names:
|
||||
assert name.lower() not in token.lower()
|
||||
for part in name.split():
|
||||
assert part.lower() not in token.lower()
|
||||
|
||||
def test_token_generation_is_not_deterministic(self):
|
||||
"""Same input should not produce same token."""
|
||||
service = PseudonymizationService()
|
||||
|
||||
# Even with "same student count", tokens should differ
|
||||
batch1 = service.generate_batch_tokens(5)
|
||||
batch2 = service.generate_batch_tokens(5)
|
||||
|
||||
# No tokens should match
|
||||
assert not set(batch1).intersection(set(batch2))
|
||||
|
||||
def test_token_entropy(self):
|
||||
"""Tokens should have sufficient entropy."""
|
||||
service = PseudonymizationService()
|
||||
tokens = service.generate_batch_tokens(100)
|
||||
|
||||
# Each token should be 36 chars (UUID format: 8-4-4-4-12)
|
||||
for token in tokens:
|
||||
assert len(token) == 36
|
||||
assert token.count('-') == 4
|
||||
|
||||
# Check character distribution (rough entropy check)
|
||||
all_chars = ''.join(t.replace('-', '') for t in tokens)
|
||||
unique_chars = set(all_chars)
|
||||
|
||||
# Should use all hex digits (0-9, a-f)
|
||||
assert len(unique_chars) >= 10
|
||||
Reference in New Issue
Block a user