This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/klausur/tests/test_pseudonymizer.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

210 lines
7.0 KiB
Python

"""
Tests for PseudonymizationService.
Verifies that:
- doc_tokens are cryptographically random
- QR codes are generated correctly
- Header redaction works as expected
- No personal data leaks through pseudonymization
"""
import pytest
import uuid
from unittest.mock import patch, MagicMock
from klausur.services.pseudonymizer import (
PseudonymizationService,
get_pseudonymizer,
RedactionResult,
QRDetectionResult,
)
class TestDocTokenGeneration:
"""Tests for doc_token generation."""
def test_generate_doc_token_returns_valid_uuid(self):
"""doc_token should be a valid UUID4."""
service = PseudonymizationService()
token = service.generate_doc_token()
# Should be a valid UUID
parsed = uuid.UUID(token)
assert parsed.version == 4
def test_generate_doc_token_is_unique(self):
"""Each generated token should be unique."""
service = PseudonymizationService()
tokens = [service.generate_doc_token() for _ in range(1000)]
# All tokens should be unique
assert len(set(tokens)) == 1000
def test_generate_batch_tokens_correct_count(self):
"""Batch generation should return correct number of tokens."""
service = PseudonymizationService()
tokens = service.generate_batch_tokens(25)
assert len(tokens) == 25
assert len(set(tokens)) == 25 # All unique
def test_token_no_correlation_to_index(self):
"""Tokens should not correlate to their generation order."""
service = PseudonymizationService()
# Generate multiple batches
batch1 = service.generate_batch_tokens(10)
batch2 = service.generate_batch_tokens(10)
# No overlap between batches
assert not set(batch1).intersection(set(batch2))
class TestQRCodeGeneration:
"""Tests for QR code generation."""
def test_generate_qr_code_returns_bytes(self):
"""QR code generation should return PNG bytes."""
service = PseudonymizationService()
token = service.generate_doc_token()
try:
qr_bytes = service.generate_qr_code(token)
assert isinstance(qr_bytes, bytes)
# PNG magic bytes
assert qr_bytes[:8] == b'\x89PNG\r\n\x1a\n'
except RuntimeError:
pytest.skip("qrcode library not installed")
def test_generate_qr_code_custom_size(self):
"""QR code should respect custom size."""
service = PseudonymizationService()
token = service.generate_doc_token()
try:
# Generate with different sizes
small = service.generate_qr_code(token, size=100)
large = service.generate_qr_code(token, size=400)
# Both should be valid PNG
assert small[:8] == b'\x89PNG\r\n\x1a\n'
assert large[:8] == b'\x89PNG\r\n\x1a\n'
# Large should be bigger
assert len(large) > len(small)
except RuntimeError:
pytest.skip("qrcode library not installed")
class TestHeaderRedaction:
"""Tests for header redaction."""
def test_redact_header_returns_redaction_result(self):
"""Redaction should return proper RedactionResult."""
service = PseudonymizationService()
# Create a simple test image (1x1 white pixel PNG)
# This is a minimal valid PNG
test_png = (
b'\x89PNG\r\n\x1a\n' # PNG signature
b'\x00\x00\x00\rIHDR' # IHDR chunk
b'\x00\x00\x00\x01' # Width: 1
b'\x00\x00\x00\x01' # Height: 1
b'\x08\x02' # Bit depth: 8, Color type: RGB
b'\x00\x00\x00' # Compression, Filter, Interlace
b'\x90wS\xde' # CRC
b'\x00\x00\x00\x0cIDATx\x9cc\xf8\x0f\x00\x00\x01\x01\x00\x05\x18\xd8N' # IDAT
b'\x00\x00\x00\x00IEND\xaeB`\x82' # IEND
)
result = service.redact_header(test_png)
assert isinstance(result, RedactionResult)
assert isinstance(result.redacted_image, bytes)
def test_redact_header_with_invalid_image_returns_original(self):
"""Invalid images should return original bytes with redaction_applied=False."""
service = PseudonymizationService()
invalid_data = b'not an image'
result = service.redact_header(invalid_data)
assert result.redacted_image == invalid_data
assert result.redaction_applied is False
class TestQRDetection:
"""Tests for QR code detection."""
def test_detect_qr_code_no_qr_returns_none(self):
"""Image without QR should return None token."""
service = PseudonymizationService()
# Empty/invalid image
result = service.detect_qr_code(b'not an image with qr')
assert result.doc_token is None
assert result.confidence == 0.0
class TestSingleton:
"""Tests for singleton pattern."""
def test_get_pseudonymizer_returns_same_instance(self):
"""Singleton should return same instance."""
instance1 = get_pseudonymizer()
instance2 = get_pseudonymizer()
assert instance1 is instance2
def test_pseudonymizer_is_service_instance(self):
"""Singleton should be PseudonymizationService."""
instance = get_pseudonymizer()
assert isinstance(instance, PseudonymizationService)
class TestPrivacyGuarantees:
"""Tests verifying privacy guarantees."""
def test_token_cannot_be_reversed_to_name(self):
"""Tokens should have no mathematical relationship to any input."""
service = PseudonymizationService()
# Generate tokens for "students"
student_names = ["Max Mustermann", "Anna Schmidt", "Tim Mueller"]
tokens = service.generate_batch_tokens(len(student_names))
# Tokens should not contain any part of names
for token in tokens:
for name in student_names:
assert name.lower() not in token.lower()
for part in name.split():
assert part.lower() not in token.lower()
def test_token_generation_is_not_deterministic(self):
"""Same input should not produce same token."""
service = PseudonymizationService()
# Even with "same student count", tokens should differ
batch1 = service.generate_batch_tokens(5)
batch2 = service.generate_batch_tokens(5)
# No tokens should match
assert not set(batch1).intersection(set(batch2))
def test_token_entropy(self):
"""Tokens should have sufficient entropy."""
service = PseudonymizationService()
tokens = service.generate_batch_tokens(100)
# Each token should be 36 chars (UUID format: 8-4-4-4-12)
for token in tokens:
assert len(token) == 36
assert token.count('-') == 4
# Check character distribution (rough entropy check)
all_chars = ''.join(t.replace('-', '') for t in tokens)
unique_chars = set(all_chars)
# Should use all hex digits (0-9, a-f)
assert len(unique_chars) >= 10