Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
210 lines
7.0 KiB
Python
210 lines
7.0 KiB
Python
"""
|
|
Tests for PseudonymizationService.
|
|
|
|
Verifies that:
|
|
- doc_tokens are cryptographically random
|
|
- QR codes are generated correctly
|
|
- Header redaction works as expected
|
|
- No personal data leaks through pseudonymization
|
|
"""
|
|
import pytest
|
|
import uuid
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
from klausur.services.pseudonymizer import (
|
|
PseudonymizationService,
|
|
get_pseudonymizer,
|
|
RedactionResult,
|
|
QRDetectionResult,
|
|
)
|
|
|
|
|
|
class TestDocTokenGeneration:
|
|
"""Tests for doc_token generation."""
|
|
|
|
def test_generate_doc_token_returns_valid_uuid(self):
|
|
"""doc_token should be a valid UUID4."""
|
|
service = PseudonymizationService()
|
|
token = service.generate_doc_token()
|
|
|
|
# Should be a valid UUID
|
|
parsed = uuid.UUID(token)
|
|
assert parsed.version == 4
|
|
|
|
def test_generate_doc_token_is_unique(self):
|
|
"""Each generated token should be unique."""
|
|
service = PseudonymizationService()
|
|
tokens = [service.generate_doc_token() for _ in range(1000)]
|
|
|
|
# All tokens should be unique
|
|
assert len(set(tokens)) == 1000
|
|
|
|
def test_generate_batch_tokens_correct_count(self):
|
|
"""Batch generation should return correct number of tokens."""
|
|
service = PseudonymizationService()
|
|
tokens = service.generate_batch_tokens(25)
|
|
|
|
assert len(tokens) == 25
|
|
assert len(set(tokens)) == 25 # All unique
|
|
|
|
def test_token_no_correlation_to_index(self):
|
|
"""Tokens should not correlate to their generation order."""
|
|
service = PseudonymizationService()
|
|
|
|
# Generate multiple batches
|
|
batch1 = service.generate_batch_tokens(10)
|
|
batch2 = service.generate_batch_tokens(10)
|
|
|
|
# No overlap between batches
|
|
assert not set(batch1).intersection(set(batch2))
|
|
|
|
|
|
class TestQRCodeGeneration:
|
|
"""Tests for QR code generation."""
|
|
|
|
def test_generate_qr_code_returns_bytes(self):
|
|
"""QR code generation should return PNG bytes."""
|
|
service = PseudonymizationService()
|
|
token = service.generate_doc_token()
|
|
|
|
try:
|
|
qr_bytes = service.generate_qr_code(token)
|
|
assert isinstance(qr_bytes, bytes)
|
|
# PNG magic bytes
|
|
assert qr_bytes[:8] == b'\x89PNG\r\n\x1a\n'
|
|
except RuntimeError:
|
|
pytest.skip("qrcode library not installed")
|
|
|
|
def test_generate_qr_code_custom_size(self):
|
|
"""QR code should respect custom size."""
|
|
service = PseudonymizationService()
|
|
token = service.generate_doc_token()
|
|
|
|
try:
|
|
# Generate with different sizes
|
|
small = service.generate_qr_code(token, size=100)
|
|
large = service.generate_qr_code(token, size=400)
|
|
|
|
# Both should be valid PNG
|
|
assert small[:8] == b'\x89PNG\r\n\x1a\n'
|
|
assert large[:8] == b'\x89PNG\r\n\x1a\n'
|
|
|
|
# Large should be bigger
|
|
assert len(large) > len(small)
|
|
except RuntimeError:
|
|
pytest.skip("qrcode library not installed")
|
|
|
|
|
|
class TestHeaderRedaction:
|
|
"""Tests for header redaction."""
|
|
|
|
def test_redact_header_returns_redaction_result(self):
|
|
"""Redaction should return proper RedactionResult."""
|
|
service = PseudonymizationService()
|
|
|
|
# Create a simple test image (1x1 white pixel PNG)
|
|
# This is a minimal valid PNG
|
|
test_png = (
|
|
b'\x89PNG\r\n\x1a\n' # PNG signature
|
|
b'\x00\x00\x00\rIHDR' # IHDR chunk
|
|
b'\x00\x00\x00\x01' # Width: 1
|
|
b'\x00\x00\x00\x01' # Height: 1
|
|
b'\x08\x02' # Bit depth: 8, Color type: RGB
|
|
b'\x00\x00\x00' # Compression, Filter, Interlace
|
|
b'\x90wS\xde' # CRC
|
|
b'\x00\x00\x00\x0cIDATx\x9cc\xf8\x0f\x00\x00\x01\x01\x00\x05\x18\xd8N' # IDAT
|
|
b'\x00\x00\x00\x00IEND\xaeB`\x82' # IEND
|
|
)
|
|
|
|
result = service.redact_header(test_png)
|
|
|
|
assert isinstance(result, RedactionResult)
|
|
assert isinstance(result.redacted_image, bytes)
|
|
|
|
def test_redact_header_with_invalid_image_returns_original(self):
|
|
"""Invalid images should return original bytes with redaction_applied=False."""
|
|
service = PseudonymizationService()
|
|
|
|
invalid_data = b'not an image'
|
|
result = service.redact_header(invalid_data)
|
|
|
|
assert result.redacted_image == invalid_data
|
|
assert result.redaction_applied is False
|
|
|
|
|
|
class TestQRDetection:
|
|
"""Tests for QR code detection."""
|
|
|
|
def test_detect_qr_code_no_qr_returns_none(self):
|
|
"""Image without QR should return None token."""
|
|
service = PseudonymizationService()
|
|
|
|
# Empty/invalid image
|
|
result = service.detect_qr_code(b'not an image with qr')
|
|
|
|
assert result.doc_token is None
|
|
assert result.confidence == 0.0
|
|
|
|
|
|
class TestSingleton:
|
|
"""Tests for singleton pattern."""
|
|
|
|
def test_get_pseudonymizer_returns_same_instance(self):
|
|
"""Singleton should return same instance."""
|
|
instance1 = get_pseudonymizer()
|
|
instance2 = get_pseudonymizer()
|
|
|
|
assert instance1 is instance2
|
|
|
|
def test_pseudonymizer_is_service_instance(self):
|
|
"""Singleton should be PseudonymizationService."""
|
|
instance = get_pseudonymizer()
|
|
assert isinstance(instance, PseudonymizationService)
|
|
|
|
|
|
class TestPrivacyGuarantees:
|
|
"""Tests verifying privacy guarantees."""
|
|
|
|
def test_token_cannot_be_reversed_to_name(self):
|
|
"""Tokens should have no mathematical relationship to any input."""
|
|
service = PseudonymizationService()
|
|
|
|
# Generate tokens for "students"
|
|
student_names = ["Max Mustermann", "Anna Schmidt", "Tim Mueller"]
|
|
tokens = service.generate_batch_tokens(len(student_names))
|
|
|
|
# Tokens should not contain any part of names
|
|
for token in tokens:
|
|
for name in student_names:
|
|
assert name.lower() not in token.lower()
|
|
for part in name.split():
|
|
assert part.lower() not in token.lower()
|
|
|
|
def test_token_generation_is_not_deterministic(self):
|
|
"""Same input should not produce same token."""
|
|
service = PseudonymizationService()
|
|
|
|
# Even with "same student count", tokens should differ
|
|
batch1 = service.generate_batch_tokens(5)
|
|
batch2 = service.generate_batch_tokens(5)
|
|
|
|
# No tokens should match
|
|
assert not set(batch1).intersection(set(batch2))
|
|
|
|
def test_token_entropy(self):
|
|
"""Tokens should have sufficient entropy."""
|
|
service = PseudonymizationService()
|
|
tokens = service.generate_batch_tokens(100)
|
|
|
|
# Each token should be 36 chars (UUID format: 8-4-4-4-12)
|
|
for token in tokens:
|
|
assert len(token) == 36
|
|
assert token.count('-') == 4
|
|
|
|
# Check character distribution (rough entropy check)
|
|
all_chars = ''.join(t.replace('-', '') for t in tokens)
|
|
unique_chars = set(all_chars)
|
|
|
|
# Should use all hex digits (0-9, a-f)
|
|
assert len(unique_chars) >= 10
|