fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions

View File

@@ -0,0 +1,9 @@
"""
Tests for Klausurkorrektur Module.
Tests cover:
- Database models and repository
- Pseudonymization service
- API routes
- Privacy guarantees
"""

View File

@@ -0,0 +1,455 @@
"""
Tests for Magic Onboarding functionality.
Tests cover:
- OnboardingSession lifecycle
- Student detection and confirmation
- Roster parsing
- School resolution
- Module linking
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from datetime import datetime
# Import models
from klausur.db_models import (
OnboardingSession, DetectedStudent, ModuleLink,
OnboardingStatus, ModuleLinkType
)
# Import services
from klausur.services.roster_parser import RosterParser, RosterEntry, NameMatch
from klausur.services.school_resolver import SchoolResolver, BUNDESLAENDER, SCHULFORMEN
from klausur.services.module_linker import (
ModuleLinker, CorrectionResult, MeetingUrgency, ParentMeetingSuggestion
)
# =============================================================================
# ROSTER PARSER TESTS
# =============================================================================
class TestRosterParser:
"""Tests for RosterParser service."""
def test_match_first_names_exact_match(self):
"""Test exact name matching."""
parser = RosterParser()
roster = [
RosterEntry(first_name="Max", last_name="Mueller"),
RosterEntry(first_name="Anna", last_name="Schmidt"),
RosterEntry(first_name="Tim", last_name="Weber"),
]
detected = ["Max", "Anna", "Tim"]
matches = parser.match_first_names(detected, roster)
# Check all names matched
assert len(matches) == 3
# Find Max match
max_match = next(m for m in matches if m.detected_name == "Max")
assert max_match.matched_entry is not None
assert max_match.matched_entry.last_name == "Mueller"
assert max_match.match_type == "exact"
assert max_match.confidence == 1.0
def test_match_first_names_fuzzy_match(self):
"""Test fuzzy matching for similar names."""
parser = RosterParser()
roster = [
RosterEntry(first_name="Maximilian", last_name="Mueller"),
RosterEntry(first_name="Anna-Lena", last_name="Schmidt"),
]
# "Max" should fuzzy-match "Maximilian" (starts with)
detected = ["Max"]
matches = parser.match_first_names(detected, roster)
assert len(matches) == 1
max_match = matches[0]
# Should match to Maximilian via first_name matching
if max_match.matched_entry is not None:
assert max_match.match_type in ["first_name", "fuzzy"]
def test_match_first_names_no_match(self):
"""Test handling of unmatched names."""
parser = RosterParser()
roster = [
RosterEntry(first_name="Max", last_name="Mueller"),
]
detected = ["Sophie", "Lisa"]
matches = parser.match_first_names(detected, roster)
# Both should be unmatched
assert len(matches) == 2
for match in matches:
assert match.matched_entry is None
assert match.match_type == "none"
def test_roster_entry_creation(self):
"""Test RosterEntry dataclass creation."""
entry = RosterEntry(
first_name="Max",
last_name="Mueller",
student_number="12345",
parent_email="eltern@example.com",
parent_phone="+49123456789"
)
assert entry.first_name == "Max"
assert entry.last_name == "Mueller"
assert entry.parent_email == "eltern@example.com"
def test_name_match_dataclass(self):
"""Test NameMatch dataclass creation."""
entry = RosterEntry(first_name="Max", last_name="Mueller")
match = NameMatch(
detected_name="Max",
matched_entry=entry,
confidence=1.0,
match_type="exact"
)
assert match.detected_name == "Max"
assert match.matched_entry.last_name == "Mueller"
assert match.confidence == 1.0
# =============================================================================
# SCHOOL RESOLVER TESTS
# =============================================================================
class TestSchoolResolver:
"""Tests for SchoolResolver service."""
def test_bundeslaender_completeness(self):
"""Test that all 16 German states are included."""
assert len(BUNDESLAENDER) == 16
# BUNDESLAENDER is a dict with codes as keys
assert "NI" in BUNDESLAENDER # Niedersachsen
assert "BY" in BUNDESLAENDER # Bayern
assert "BE" in BUNDESLAENDER # Berlin
# Check values too
assert BUNDESLAENDER["NI"] == "Niedersachsen"
def test_schulformen_have_grades(self):
"""Test that each Schulform has grade ranges."""
for schulform, info in SCHULFORMEN.items():
assert "grades" in info
assert isinstance(info["grades"], list)
assert len(info["grades"]) > 0
def test_detect_grade_from_class_name(self):
"""Test grade detection from class names."""
resolver = SchoolResolver()
# Test various formats
assert resolver.detect_grade_from_class_name("3a") == 3
assert resolver.detect_grade_from_class_name("10b") == 10
assert resolver.detect_grade_from_class_name("Q1") == 11
assert resolver.detect_grade_from_class_name("Q2") == 12
assert resolver.detect_grade_from_class_name("12") == 12
def test_detect_grade_returns_none_for_invalid(self):
"""Test grade detection returns None for invalid input."""
resolver = SchoolResolver()
assert resolver.detect_grade_from_class_name("abc") is None
assert resolver.detect_grade_from_class_name("") is None
def test_local_storage_initialization(self):
"""Test that local storage starts empty."""
resolver = SchoolResolver()
assert resolver._local_schools == {}
assert resolver._local_classes == {}
# =============================================================================
# MODULE LINKER TESTS
# =============================================================================
class TestModuleLinker:
"""Tests for ModuleLinker service."""
def test_suggest_elternabend_for_weak_students(self):
"""Test parent meeting suggestions for failing grades."""
linker = ModuleLinker()
results = [
CorrectionResult(
doc_token="token1", score=25, max_score=100,
grade="5", feedback=""
),
CorrectionResult(
doc_token="token2", score=85, max_score=100,
grade="2", feedback=""
),
CorrectionResult(
doc_token="token3", score=30, max_score=100,
grade="5-", feedback=""
),
CorrectionResult(
doc_token="token4", score=20, max_score=100,
grade="6", feedback=""
),
]
suggestions = linker.suggest_elternabend(
results, subject="Mathematik", threshold_grade="4"
)
# Should suggest meetings for students with grades 4 or worse
# Grades 5, 5-, and 6 should trigger meetings
assert len(suggestions) == 3
# Verify suggestions use doc_tokens (privacy)
for suggestion in suggestions:
assert suggestion.doc_token in ["token1", "token3", "token4"]
def test_suggest_elternabend_empty_for_good_class(self):
"""Test no suggestions for good performers."""
linker = ModuleLinker()
results = [
CorrectionResult(
doc_token="token1", score=95, max_score=100,
grade="1", feedback=""
),
CorrectionResult(
doc_token="token2", score=85, max_score=100,
grade="2", feedback=""
),
CorrectionResult(
doc_token="token3", score=78, max_score=100,
grade="3", feedback=""
),
]
suggestions = linker.suggest_elternabend(
results, subject="Deutsch", threshold_grade="4"
)
assert len(suggestions) == 0
def test_calculate_grade_statistics(self):
"""Test grade distribution calculation."""
linker = ModuleLinker()
results = [
CorrectionResult(doc_token="t1", score=95, max_score=100, grade="1", feedback=""),
CorrectionResult(doc_token="t2", score=85, max_score=100, grade="2", feedback=""),
CorrectionResult(doc_token="t3", score=85, max_score=100, grade="2", feedback=""),
CorrectionResult(doc_token="t4", score=75, max_score=100, grade="3", feedback=""),
CorrectionResult(doc_token="t5", score=55, max_score=100, grade="4", feedback=""),
CorrectionResult(doc_token="t6", score=25, max_score=100, grade="5", feedback=""),
]
stats = linker.calculate_grade_statistics(results)
assert isinstance(stats, dict)
assert stats["count"] == 6
# Check grade distribution
assert stats["distribution"].get("1", 0) == 1
assert stats["distribution"].get("2", 0) == 2
assert stats["distribution"].get("3", 0) == 1
# Check passing/failing counts
assert stats["passing_count"] == 5 # Grades 1-4 pass
assert stats["failing_count"] == 1 # Grade 5 fails
def test_calculate_statistics_empty_results(self):
"""Test statistics with no results."""
linker = ModuleLinker()
stats = linker.calculate_grade_statistics([])
assert stats == {}
def test_correction_result_creation(self):
"""Test CorrectionResult dataclass."""
result = CorrectionResult(
doc_token="abc-123",
score=87,
max_score=100,
grade="2+",
feedback="Gut geloest",
question_results=[{"aufgabe": 1, "punkte": 10}]
)
assert result.doc_token == "abc-123"
assert result.score == 87
assert result.grade == "2+"
# =============================================================================
# DB MODEL TESTS
# =============================================================================
class TestOnboardingModels:
"""Tests for Magic Onboarding database models."""
def test_onboarding_status_enum_values(self):
"""Test OnboardingStatus enum has all required values."""
assert OnboardingStatus.ANALYZING.value == "analyzing"
assert OnboardingStatus.CONFIRMING.value == "confirming"
assert OnboardingStatus.PROCESSING.value == "processing"
assert OnboardingStatus.LINKING.value == "linking"
assert OnboardingStatus.COMPLETE.value == "complete"
def test_module_link_type_enum_values(self):
"""Test ModuleLinkType enum has all required values."""
assert ModuleLinkType.NOTENBUCH.value == "notenbuch"
assert ModuleLinkType.ELTERNABEND.value == "elternabend"
assert ModuleLinkType.ZEUGNIS.value == "zeugnis"
assert ModuleLinkType.CALENDAR.value == "calendar"
assert ModuleLinkType.KLASSENBUCH.value == "klassenbuch"
def test_onboarding_session_repr(self):
"""Test OnboardingSession string representation."""
session = OnboardingSession(
id="12345678-1234-1234-1234-123456789abc",
teacher_id="teacher-1",
detected_class="3a",
status=OnboardingStatus.ANALYZING
)
repr_str = repr(session)
assert "12345678" in repr_str
assert "3a" in repr_str
assert "analyzing" in repr_str
def test_detected_student_repr(self):
"""Test DetectedStudent string representation."""
student = DetectedStudent(
id="12345678-1234-1234-1234-123456789abc",
detected_first_name="Max"
)
repr_str = repr(student)
assert "Max" in repr_str
def test_module_link_repr(self):
"""Test ModuleLink string representation."""
link = ModuleLink(
id="12345678-1234-1234-1234-123456789abc",
klausur_session_id="session-1",
link_type=ModuleLinkType.NOTENBUCH,
target_module="school"
)
repr_str = repr(link)
assert "notenbuch" in repr_str
assert "school" in repr_str
# =============================================================================
# PRIVACY TESTS
# =============================================================================
class TestPrivacyInMagicOnboarding:
"""Tests ensuring privacy is maintained in Magic Onboarding."""
def test_detected_student_no_full_last_name_in_detection(self):
"""Test that detection only captures hints, not full last names."""
student = DetectedStudent(
id="12345678-1234-1234-1234-123456789abc",
detected_first_name="Max",
detected_last_name_hint="M." # Only initial/hint, not full name
)
# The detection phase should only have hints
assert student.detected_last_name_hint == "M."
# Full name is only set after teacher confirmation
assert student.confirmed_last_name is None
def test_module_link_uses_doc_tokens_not_names(self):
"""Test that module links use pseudonymized tokens."""
linker = ModuleLinker()
# Results should only contain doc_tokens, not student names
results = [
CorrectionResult(
doc_token="uuid-token-1", score=45, max_score=100,
grade="4", feedback=""
),
]
suggestions = linker.suggest_elternabend(
results, subject="Deutsch", threshold_grade="4"
)
# Suggestions reference doc_tokens, not names
for suggestion in suggestions:
assert hasattr(suggestion, 'doc_token')
# Verify doc_token is the pseudonymized one
assert suggestion.doc_token == "uuid-token-1"
# =============================================================================
# INTEGRATION FLOW TESTS
# =============================================================================
class TestMagicOnboardingFlow:
"""Tests for the complete Magic Onboarding flow."""
def test_onboarding_status_progression(self):
"""Test that status progresses correctly through the flow."""
statuses = list(OnboardingStatus)
# Verify correct order
assert statuses[0] == OnboardingStatus.ANALYZING
assert statuses[1] == OnboardingStatus.CONFIRMING
assert statuses[2] == OnboardingStatus.PROCESSING
assert statuses[3] == OnboardingStatus.LINKING
assert statuses[4] == OnboardingStatus.COMPLETE
def test_grade_conversion_german_scale(self):
"""Test that German grading scale (1-6) is used correctly."""
linker = ModuleLinker()
# Test the internal grade checking
# Grades 1-4 are passing, 5-6 are failing
results = [
CorrectionResult(doc_token="t1", score=95, max_score=100, grade="1", feedback=""),
CorrectionResult(doc_token="t2", score=80, max_score=100, grade="2", feedback=""),
CorrectionResult(doc_token="t3", score=65, max_score=100, grade="3", feedback=""),
CorrectionResult(doc_token="t4", score=50, max_score=100, grade="4", feedback=""),
CorrectionResult(doc_token="t5", score=30, max_score=100, grade="5", feedback=""),
CorrectionResult(doc_token="t6", score=15, max_score=100, grade="6", feedback=""),
]
stats = linker.calculate_grade_statistics(results)
# 4 passing (grades 1-4), 2 failing (grades 5, 6)
assert stats["passing_count"] == 4
assert stats["failing_count"] == 2
def test_meeting_urgency_levels(self):
"""Test meeting urgency assignment based on grades."""
linker = ModuleLinker()
results = [
CorrectionResult(doc_token="t1", score=55, max_score=100, grade="4", feedback=""),
CorrectionResult(doc_token="t2", score=30, max_score=100, grade="5", feedback=""),
CorrectionResult(doc_token="t3", score=15, max_score=100, grade="6", feedback=""),
]
suggestions = linker.suggest_elternabend(
results, subject="Mathe", threshold_grade="4"
)
# Verify urgency levels exist and are meaningful
urgencies = [s.urgency for s in suggestions]
assert len(urgencies) == 3
# Grade 6 should be high urgency
grade_6_suggestion = next(s for s in suggestions if s.grade == "6")
assert grade_6_suggestion.urgency == MeetingUrgency.HIGH

View File

@@ -0,0 +1,209 @@
"""
Tests for PseudonymizationService.
Verifies that:
- doc_tokens are cryptographically random
- QR codes are generated correctly
- Header redaction works as expected
- No personal data leaks through pseudonymization
"""
import pytest
import uuid
from unittest.mock import patch, MagicMock
from klausur.services.pseudonymizer import (
PseudonymizationService,
get_pseudonymizer,
RedactionResult,
QRDetectionResult,
)
class TestDocTokenGeneration:
"""Tests for doc_token generation."""
def test_generate_doc_token_returns_valid_uuid(self):
"""doc_token should be a valid UUID4."""
service = PseudonymizationService()
token = service.generate_doc_token()
# Should be a valid UUID
parsed = uuid.UUID(token)
assert parsed.version == 4
def test_generate_doc_token_is_unique(self):
"""Each generated token should be unique."""
service = PseudonymizationService()
tokens = [service.generate_doc_token() for _ in range(1000)]
# All tokens should be unique
assert len(set(tokens)) == 1000
def test_generate_batch_tokens_correct_count(self):
"""Batch generation should return correct number of tokens."""
service = PseudonymizationService()
tokens = service.generate_batch_tokens(25)
assert len(tokens) == 25
assert len(set(tokens)) == 25 # All unique
def test_token_no_correlation_to_index(self):
"""Tokens should not correlate to their generation order."""
service = PseudonymizationService()
# Generate multiple batches
batch1 = service.generate_batch_tokens(10)
batch2 = service.generate_batch_tokens(10)
# No overlap between batches
assert not set(batch1).intersection(set(batch2))
class TestQRCodeGeneration:
"""Tests for QR code generation."""
def test_generate_qr_code_returns_bytes(self):
"""QR code generation should return PNG bytes."""
service = PseudonymizationService()
token = service.generate_doc_token()
try:
qr_bytes = service.generate_qr_code(token)
assert isinstance(qr_bytes, bytes)
# PNG magic bytes
assert qr_bytes[:8] == b'\x89PNG\r\n\x1a\n'
except RuntimeError:
pytest.skip("qrcode library not installed")
def test_generate_qr_code_custom_size(self):
"""QR code should respect custom size."""
service = PseudonymizationService()
token = service.generate_doc_token()
try:
# Generate with different sizes
small = service.generate_qr_code(token, size=100)
large = service.generate_qr_code(token, size=400)
# Both should be valid PNG
assert small[:8] == b'\x89PNG\r\n\x1a\n'
assert large[:8] == b'\x89PNG\r\n\x1a\n'
# Large should be bigger
assert len(large) > len(small)
except RuntimeError:
pytest.skip("qrcode library not installed")
class TestHeaderRedaction:
"""Tests for header redaction."""
def test_redact_header_returns_redaction_result(self):
"""Redaction should return proper RedactionResult."""
service = PseudonymizationService()
# Create a simple test image (1x1 white pixel PNG)
# This is a minimal valid PNG
test_png = (
b'\x89PNG\r\n\x1a\n' # PNG signature
b'\x00\x00\x00\rIHDR' # IHDR chunk
b'\x00\x00\x00\x01' # Width: 1
b'\x00\x00\x00\x01' # Height: 1
b'\x08\x02' # Bit depth: 8, Color type: RGB
b'\x00\x00\x00' # Compression, Filter, Interlace
b'\x90wS\xde' # CRC
b'\x00\x00\x00\x0cIDATx\x9cc\xf8\x0f\x00\x00\x01\x01\x00\x05\x18\xd8N' # IDAT
b'\x00\x00\x00\x00IEND\xaeB`\x82' # IEND
)
result = service.redact_header(test_png)
assert isinstance(result, RedactionResult)
assert isinstance(result.redacted_image, bytes)
def test_redact_header_with_invalid_image_returns_original(self):
"""Invalid images should return original bytes with redaction_applied=False."""
service = PseudonymizationService()
invalid_data = b'not an image'
result = service.redact_header(invalid_data)
assert result.redacted_image == invalid_data
assert result.redaction_applied is False
class TestQRDetection:
"""Tests for QR code detection."""
def test_detect_qr_code_no_qr_returns_none(self):
"""Image without QR should return None token."""
service = PseudonymizationService()
# Empty/invalid image
result = service.detect_qr_code(b'not an image with qr')
assert result.doc_token is None
assert result.confidence == 0.0
class TestSingleton:
"""Tests for singleton pattern."""
def test_get_pseudonymizer_returns_same_instance(self):
"""Singleton should return same instance."""
instance1 = get_pseudonymizer()
instance2 = get_pseudonymizer()
assert instance1 is instance2
def test_pseudonymizer_is_service_instance(self):
"""Singleton should be PseudonymizationService."""
instance = get_pseudonymizer()
assert isinstance(instance, PseudonymizationService)
class TestPrivacyGuarantees:
"""Tests verifying privacy guarantees."""
def test_token_cannot_be_reversed_to_name(self):
"""Tokens should have no mathematical relationship to any input."""
service = PseudonymizationService()
# Generate tokens for "students"
student_names = ["Max Mustermann", "Anna Schmidt", "Tim Mueller"]
tokens = service.generate_batch_tokens(len(student_names))
# Tokens should not contain any part of names
for token in tokens:
for name in student_names:
assert name.lower() not in token.lower()
for part in name.split():
assert part.lower() not in token.lower()
def test_token_generation_is_not_deterministic(self):
"""Same input should not produce same token."""
service = PseudonymizationService()
# Even with "same student count", tokens should differ
batch1 = service.generate_batch_tokens(5)
batch2 = service.generate_batch_tokens(5)
# No tokens should match
assert not set(batch1).intersection(set(batch2))
def test_token_entropy(self):
"""Tokens should have sufficient entropy."""
service = PseudonymizationService()
tokens = service.generate_batch_tokens(100)
# Each token should be 36 chars (UUID format: 8-4-4-4-12)
for token in tokens:
assert len(token) == 36
assert token.count('-') == 4
# Check character distribution (rough entropy check)
all_chars = ''.join(t.replace('-', '') for t in tokens)
unique_chars = set(all_chars)
# Should use all hex digits (0-9, a-f)
assert len(unique_chars) >= 10

View File

@@ -0,0 +1,248 @@
"""
Tests for KlausurRepository.
Verifies:
- Teacher isolation (critical for privacy)
- CRUD operations
- Data retention cleanup
"""
import pytest
from datetime import datetime, timedelta
from unittest.mock import MagicMock, patch
from sqlalchemy.orm import Session
from klausur.repository import KlausurRepository
from klausur.db_models import (
ExamSession, PseudonymizedDocument, QRBatchJob,
SessionStatus, DocumentStatus
)
@pytest.fixture
def mock_db():
"""Create a mock database session."""
return MagicMock(spec=Session)
@pytest.fixture
def repo(mock_db):
"""Create a repository with mock DB."""
return KlausurRepository(mock_db)
class TestTeacherIsolation:
"""Tests for teacher namespace isolation (CRITICAL for privacy)."""
def test_get_session_requires_teacher_id(self, repo, mock_db):
"""Getting a session must require teacher_id."""
# Setup mock
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.filter.return_value = mock_query
mock_query.first.return_value = None
# Attempt to get session
result = repo.get_session("session-123", "teacher-A")
# Verify filter was called (teacher isolation)
mock_db.query.assert_called_with(ExamSession)
mock_query.filter.assert_called()
def test_list_sessions_only_returns_teacher_sessions(self, repo, mock_db):
"""Listing sessions must filter by teacher_id."""
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.filter.return_value = mock_query
mock_query.order_by.return_value = mock_query
mock_query.offset.return_value = mock_query
mock_query.limit.return_value = mock_query
mock_query.all.return_value = []
result = repo.list_sessions("teacher-A")
# Verify query chain
mock_db.query.assert_called_with(ExamSession)
def test_get_document_verifies_teacher_ownership(self, repo, mock_db):
"""Getting a document must verify teacher owns the session."""
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.join.return_value = mock_query
mock_query.filter.return_value = mock_query
mock_query.first.return_value = None
result = repo.get_document("doc-token-123", "teacher-A")
# Must join with ExamSession to verify teacher_id
mock_query.join.assert_called()
def test_different_teachers_cannot_see_each_others_sessions(self, repo, mock_db):
"""Teacher A cannot access Teacher B's sessions."""
# Create mock session owned by teacher-B
session_b = MagicMock(spec=ExamSession)
session_b.teacher_id = "teacher-B"
session_b.id = "session-123"
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.filter.return_value = mock_query
# Return None because filter should exclude teacher-B's session
mock_query.first.return_value = None
# Teacher A tries to access
result = repo.get_session("session-123", "teacher-A")
assert result is None
class TestSessionOperations:
"""Tests for session CRUD operations."""
def test_create_session_sets_teacher_id(self, repo, mock_db):
"""Creating a session must set the teacher_id."""
repo.create_session(
teacher_id="teacher-123",
name="Mathe Klausur",
subject="Mathematik"
)
# Verify session was added with teacher_id
mock_db.add.assert_called_once()
added_session = mock_db.add.call_args[0][0]
assert added_session.teacher_id == "teacher-123"
assert added_session.name == "Mathe Klausur"
def test_create_session_sets_retention_date(self, repo, mock_db):
"""Sessions must have a retention date for auto-deletion."""
repo.create_session(
teacher_id="teacher-123",
name="Test",
retention_days=30
)
added_session = mock_db.add.call_args[0][0]
assert added_session.retention_until is not None
# Should be approximately 30 days in the future
expected = datetime.utcnow() + timedelta(days=30)
diff = abs((added_session.retention_until - expected).total_seconds())
assert diff < 60 # Within 1 minute
def test_delete_session_soft_delete_by_default(self, repo, mock_db):
"""Deleting should soft-delete by default."""
mock_session = MagicMock(spec=ExamSession)
mock_session.status = SessionStatus.CREATED
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.filter.return_value = mock_query
mock_query.first.return_value = mock_session
result = repo.delete_session("session-123", "teacher-A")
# Should set status to DELETED, not actually delete
assert mock_session.status == SessionStatus.DELETED
mock_db.delete.assert_not_called()
def test_delete_session_hard_delete_when_requested(self, repo, mock_db):
"""Hard delete should actually delete the record."""
mock_session = MagicMock(spec=ExamSession)
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.filter.return_value = mock_query
mock_query.first.return_value = mock_session
result = repo.delete_session("session-123", "teacher-A", hard_delete=True)
mock_db.delete.assert_called_once_with(mock_session)
class TestDocumentOperations:
"""Tests for document CRUD operations."""
def test_create_document_requires_valid_session(self, repo, mock_db):
"""Creating a document requires a valid session owned by teacher."""
# Session not found (wrong teacher or doesn't exist)
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.filter.return_value = mock_query
mock_query.first.return_value = None
result = repo.create_document(
session_id="session-123",
teacher_id="teacher-A"
)
assert result is None
def test_update_document_ocr_changes_status(self, repo, mock_db):
"""Updating OCR results should update document status."""
mock_doc = MagicMock(spec=PseudonymizedDocument)
mock_doc.status = DocumentStatus.UPLOADED
# Mock get_document
with patch.object(repo, 'get_document', return_value=mock_doc):
result = repo.update_document_ocr(
doc_token="doc-123",
teacher_id="teacher-A",
ocr_text="Student answer text",
confidence=95
)
assert mock_doc.ocr_text == "Student answer text"
assert mock_doc.ocr_confidence == 95
assert mock_doc.status == DocumentStatus.OCR_COMPLETED
class TestDataRetention:
"""Tests for data retention and cleanup."""
def test_cleanup_expired_sessions(self, repo, mock_db):
"""Cleanup should mark expired sessions as deleted."""
# Create expired session
expired_session = MagicMock(spec=ExamSession)
expired_session.retention_until = datetime.utcnow() - timedelta(days=1)
expired_session.status = SessionStatus.COMPLETED
expired_session.encrypted_identity_map = b"encrypted_data"
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.filter.return_value = mock_query
mock_query.all.return_value = [expired_session]
count = repo.cleanup_expired_sessions()
assert count == 1
assert expired_session.status == SessionStatus.DELETED
# Identity map should be cleared
assert expired_session.encrypted_identity_map is None
class TestStatistics:
"""Tests for anonymized statistics."""
def test_get_session_stats_returns_anonymized_data(self, repo, mock_db):
"""Statistics should not contain any PII."""
mock_session = MagicMock(spec=ExamSession)
mock_session.document_count = 25
mock_session.processed_count = 20
mock_query = MagicMock()
mock_db.query.return_value = mock_query
mock_query.filter.return_value = mock_query
# first() is called twice: once for status counts and once for score stats
# Return a tuple for score_stats that can be subscripted
mock_query.first.return_value = (85.0, 60, 100) # avg, min, max scores
mock_query.group_by.return_value = mock_query
mock_query.all.return_value = []
with patch.object(repo, 'get_session', return_value=mock_session):
stats = repo.get_session_stats("session-123", "teacher-A")
# Stats should contain only aggregate data, no PII
assert "session_id" in stats
assert "total_documents" in stats
# Should NOT contain student names or tokens
assert "student_names" not in stats
assert "doc_tokens" not in stats

View File

@@ -0,0 +1,346 @@
"""
Tests for Klausur API Routes.
Verifies:
- API endpoint behavior
- Request validation
- Response format
- Privacy guarantees at API level
"""
import pytest
from unittest.mock import MagicMock, patch, AsyncMock
from fastapi.testclient import TestClient
from fastapi import FastAPI
from klausur.routes import router
from klausur.db_models import SessionStatus, DocumentStatus
@pytest.fixture
def app():
"""Create test FastAPI app."""
app = FastAPI()
app.include_router(router, prefix="/api")
return app
@pytest.fixture
def client(app):
"""Create test client."""
return TestClient(app)
class TestSessionEndpoints:
"""Tests for session-related endpoints."""
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_db')
def test_create_session_returns_201(self, mock_get_db, mock_repo_class, client):
"""Creating a session should return 201."""
# Setup mocks
mock_db = MagicMock()
mock_get_db.return_value = iter([mock_db])
mock_repo = MagicMock()
mock_repo_class.return_value = mock_repo
mock_session = MagicMock()
mock_session.id = "session-123"
mock_session.name = "Test Klausur"
mock_session.subject = "Mathe"
mock_session.class_name = "10a"
mock_session.total_points = 100
mock_session.status = SessionStatus.CREATED
mock_session.document_count = 0
mock_session.processed_count = 0
mock_session.created_at = "2024-01-15T10:00:00"
mock_session.completed_at = None
mock_session.retention_until = "2024-02-15T10:00:00"
mock_repo.create_session.return_value = mock_session
response = client.post("/api/klausur/sessions", json={
"name": "Test Klausur",
"subject": "Mathe",
"class_name": "10a"
})
assert response.status_code == 201
data = response.json()
assert data["name"] == "Test Klausur"
assert data["status"] == "created"
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_db')
def test_create_session_validates_name(self, mock_get_db, mock_repo_class, client):
"""Session name is required and must not be empty."""
response = client.post("/api/klausur/sessions", json={
"name": "", # Empty name
"subject": "Mathe"
})
assert response.status_code == 422 # Validation error
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_db')
def test_list_sessions_returns_array(self, mock_get_db, mock_repo_class, client):
"""Listing sessions should return an array."""
mock_db = MagicMock()
mock_get_db.return_value = iter([mock_db])
mock_repo = MagicMock()
mock_repo_class.return_value = mock_repo
mock_repo.list_sessions.return_value = []
response = client.get("/api/klausur/sessions")
assert response.status_code == 200
data = response.json()
assert "sessions" in data
assert isinstance(data["sessions"], list)
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_db')
def test_get_session_404_when_not_found(self, mock_get_db, mock_repo_class, client):
"""Getting non-existent session should return 404."""
mock_db = MagicMock()
mock_get_db.return_value = iter([mock_db])
mock_repo = MagicMock()
mock_repo_class.return_value = mock_repo
mock_repo.get_session.return_value = None
response = client.get("/api/klausur/sessions/nonexistent-123")
assert response.status_code == 404
class TestQREndpoints:
"""Tests for QR code generation endpoints."""
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_pseudonymizer')
@patch('klausur.routes.get_db')
def test_generate_qr_batch_creates_tokens(
self, mock_get_db, mock_get_pseudonymizer, mock_repo_class, client
):
"""QR batch generation should create correct number of tokens."""
mock_db = MagicMock()
mock_get_db.return_value = iter([mock_db])
mock_repo = MagicMock()
mock_repo_class.return_value = mock_repo
mock_session = MagicMock()
mock_repo.get_session.return_value = mock_session
mock_batch = MagicMock()
mock_batch.id = "batch-123"
mock_batch.student_count = 5
mock_repo.create_qr_batch.return_value = mock_batch
mock_pseudonymizer = MagicMock()
mock_pseudonymizer.generate_batch_tokens.return_value = [
"token-1", "token-2", "token-3", "token-4", "token-5"
]
mock_get_pseudonymizer.return_value = mock_pseudonymizer
response = client.post("/api/klausur/sessions/session-123/qr-batch", json={
"student_count": 5
})
assert response.status_code == 200
data = response.json()
assert len(data["generated_tokens"]) == 5
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_db')
def test_qr_batch_validates_student_count(self, mock_get_db, mock_repo_class, client):
"""Student count must be within valid range."""
# Too many students
response = client.post("/api/klausur/sessions/session-123/qr-batch", json={
"student_count": 200 # Max is 100
})
assert response.status_code == 422
class TestUploadEndpoints:
"""Tests for document upload endpoints."""
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_pseudonymizer')
@patch('klausur.routes.get_db')
def test_upload_applies_redaction_by_default(
self, mock_get_db, mock_get_pseudonymizer, mock_repo_class, client
):
"""Upload should apply header redaction by default."""
mock_db = MagicMock()
mock_get_db.return_value = iter([mock_db])
mock_repo = MagicMock()
mock_repo_class.return_value = mock_repo
mock_session = MagicMock()
mock_repo.get_session.return_value = mock_session
mock_doc = MagicMock()
mock_doc.doc_token = "doc-token-123"
mock_doc.session_id = "session-123"
mock_doc.status = DocumentStatus.UPLOADED
mock_doc.page_number = 1
mock_doc.total_pages = 1
mock_doc.ocr_confidence = 0
mock_doc.ai_score = None
mock_doc.ai_grade = None
mock_doc.ai_feedback = None
mock_doc.created_at = "2024-01-15T10:00:00"
mock_doc.processing_completed_at = None
mock_repo.create_document.return_value = mock_doc
mock_pseudonymizer = MagicMock()
mock_pseudonymizer.detect_qr_code.return_value = MagicMock(doc_token=None)
mock_pseudonymizer.generate_doc_token.return_value = "doc-token-123"
mock_pseudonymizer.smart_redact_header.return_value = MagicMock(
redaction_applied=True,
redacted_image=b"redacted",
redacted_height=300
)
mock_get_pseudonymizer.return_value = mock_pseudonymizer
# Create a minimal file upload
response = client.post(
"/api/klausur/sessions/session-123/upload",
files={"file": ("test.png", b"fake image data", "image/png")}
)
# Verify redaction was called
mock_pseudonymizer.smart_redact_header.assert_called_once()
class TestResultsEndpoints:
"""Tests for results endpoints."""
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_db')
def test_results_only_return_pseudonymized_data(
self, mock_get_db, mock_repo_class, client
):
"""Results should only contain doc_tokens, not names."""
mock_db = MagicMock()
mock_get_db.return_value = iter([mock_db])
mock_repo = MagicMock()
mock_repo_class.return_value = mock_repo
mock_session = MagicMock()
mock_session.total_points = 100
mock_repo.get_session.return_value = mock_session
mock_doc = MagicMock()
mock_doc.doc_token = "anonymous-token-123"
mock_doc.status = DocumentStatus.COMPLETED
mock_doc.ai_score = 85
mock_doc.ai_grade = "2+"
mock_doc.ai_feedback = "Good work"
mock_doc.ai_details = {}
mock_repo.list_documents.return_value = [mock_doc]
response = client.get("/api/klausur/sessions/session-123/results")
assert response.status_code == 200
data = response.json()
# Results should use doc_token, not student name
assert len(data) == 1
assert "doc_token" in data[0]
assert "student_name" not in data[0]
assert "name" not in data[0]
class TestIdentityMapEndpoints:
"""Tests for identity map (vault) endpoints."""
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_db')
def test_store_identity_map_accepts_encrypted_data(
self, mock_get_db, mock_repo_class, client
):
"""Identity map endpoint should accept encrypted data."""
mock_db = MagicMock()
mock_get_db.return_value = iter([mock_db])
mock_repo = MagicMock()
mock_repo_class.return_value = mock_repo
mock_session = MagicMock()
mock_repo.update_session_identity_map.return_value = mock_session
# Base64 encoded "encrypted" data
import base64
encrypted = base64.b64encode(b"encrypted identity map").decode()
response = client.post("/api/klausur/sessions/session-123/identity-map", json={
"encrypted_data": encrypted,
"iv": "base64iv=="
})
assert response.status_code == 204
@patch('klausur.routes.KlausurRepository')
@patch('klausur.routes.get_db')
def test_get_identity_map_returns_encrypted_blob(
self, mock_get_db, mock_repo_class, client
):
"""Getting identity map should return encrypted blob."""
mock_db = MagicMock()
mock_get_db.return_value = iter([mock_db])
mock_repo = MagicMock()
mock_repo_class.return_value = mock_repo
mock_session = MagicMock()
mock_session.encrypted_identity_map = b"encrypted data"
mock_session.identity_map_iv = "ivvalue"
mock_repo.get_session.return_value = mock_session
response = client.get("/api/klausur/sessions/session-123/identity-map")
assert response.status_code == 200
data = response.json()
assert "encrypted_data" in data
assert "iv" in data
class TestPrivacyAtAPILevel:
"""Tests verifying privacy guarantees at API level."""
def test_no_student_names_in_any_response_schema(self):
"""Verify response schemas don't include student names."""
from klausur.routes import (
SessionResponse, DocumentResponse, CorrectionResultResponse
)
# Check all response model fields
session_fields = SessionResponse.model_fields.keys()
doc_fields = DocumentResponse.model_fields.keys()
result_fields = CorrectionResultResponse.model_fields.keys()
all_fields = list(session_fields) + list(doc_fields) + list(result_fields)
# Should not contain student-name-related fields
# Note: "name" alone is allowed (e.g., session/exam name like "Mathe Klausur")
forbidden = ["student_name", "schueler_name", "student", "pupil", "schueler"]
for field in all_fields:
assert field.lower() not in forbidden, f"Field '{field}' may contain PII"
def test_identity_map_request_requires_encryption(self):
"""Identity map must be encrypted before storage."""
from klausur.routes import IdentityMapUpdate
# Check that schema requires encrypted_data, not plain names
fields = IdentityMapUpdate.model_fields.keys()
assert "encrypted_data" in fields
assert "names" not in fields
assert "student_names" not in fields