""" SQLAlchemy Database Models for Klausurkorrektur Module. Privacy-by-Design: No personal data (student names) is stored in these models. Only pseudonymized doc_tokens are used to reference exam documents. """ from datetime import datetime from sqlalchemy import ( Column, String, Integer, DateTime, JSON, Boolean, Text, Enum as SQLEnum, ForeignKey, LargeBinary ) from sqlalchemy.orm import relationship import enum import uuid from .database import Base class SessionStatus(str, enum.Enum): """Status of an exam correction session.""" CREATED = "created" # Session created, awaiting uploads UPLOADING = "uploading" # Documents being uploaded PROCESSING = "processing" # OCR and AI correction in progress COMPLETED = "completed" # All documents processed ARCHIVED = "archived" # Session archived (data retention) DELETED = "deleted" # Soft delete class OnboardingStatus(str, enum.Enum): """Status of a magic onboarding session.""" ANALYZING = "analyzing" # Local LLM extracting headers CONFIRMING = "confirming" # User confirming detected data PROCESSING = "processing" # Cloud LLM correcting exams LINKING = "linking" # Creating module links COMPLETE = "complete" # Onboarding finished class ModuleLinkType(str, enum.Enum): """Type of cross-module link.""" NOTENBUCH = "notenbuch" # Link to grade book ELTERNABEND = "elternabend" # Link to parent meetings ZEUGNIS = "zeugnis" # Link to certificates CALENDAR = "calendar" # Link to calendar events KLASSENBUCH = "klassenbuch" # Link to class book class DocumentStatus(str, enum.Enum): """Status of a single pseudonymized document.""" UPLOADED = "uploaded" # Document uploaded, awaiting OCR OCR_PROCESSING = "ocr_processing" # OCR in progress OCR_COMPLETED = "ocr_completed" # OCR done, awaiting AI correction AI_PROCESSING = "ai_processing" # AI correction in progress COMPLETED = "completed" # Fully processed FAILED = "failed" # Processing failed class ExamSession(Base): """ Exam Correction Session. Groups multiple pseudonymized documents for a single exam correction task. No personal data is stored - teacher_id is the only identifying info. """ __tablename__ = 'klausur_sessions' # Primary Key id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) # Teacher isolation (mandatory) teacher_id = Column(String(100), nullable=False, index=True) # Session metadata name = Column(String(200), nullable=False) # e.g., "Mathe 10a - Klausur 1" subject = Column(String(100), default="") class_name = Column(String(100), default="") # e.g., "10a" # Exam configuration total_points = Column(Integer, default=100) rubric = Column(Text, default="") # Bewertungskriterien questions = Column(JSON, default=list) # [{question, points, rubric}] # Status status = Column( SQLEnum(SessionStatus), default=SessionStatus.CREATED, nullable=False, index=True ) # Statistics (anonymized) document_count = Column(Integer, default=0) processed_count = Column(Integer, default=0) # Encrypted identity map (only teacher can decrypt) # This is stored encrypted with teacher's password encrypted_identity_map = Column(LargeBinary, nullable=True) identity_map_iv = Column(String(64), nullable=True) # IV for AES decryption # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) completed_at = Column(DateTime, nullable=True) # Data retention: auto-delete after this date retention_until = Column(DateTime, nullable=True) # Magic Onboarding: Link to school class (optional) linked_school_class_id = Column(String(36), nullable=True) linked_subject_id = Column(String(36), nullable=True) # Relationship to documents documents = relationship( "PseudonymizedDocument", back_populates="session", cascade="all, delete-orphan" ) def __repr__(self): return f"" class PseudonymizedDocument(Base): """ Pseudonymized Exam Document. PRIVACY DESIGN: - doc_token is a 128-bit random UUID, NOT derivable from student identity - No student name or personal info is stored here - Identity mapping is stored encrypted in ExamSession.encrypted_identity_map - The backend CANNOT de-pseudonymize documents Only the teacher (with their encryption key) can map doc_token -> student name. """ __tablename__ = 'klausur_documents' # Primary Key: The pseudonymization token doc_token = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) # Session relationship session_id = Column(String(36), ForeignKey('klausur_sessions.id'), nullable=False, index=True) # Processing status status = Column( SQLEnum(DocumentStatus), default=DocumentStatus.UPLOADED, nullable=False, index=True ) # Page info page_number = Column(Integer, default=1) total_pages = Column(Integer, default=1) # OCR result (redacted - no header/name visible) ocr_text = Column(Text, default="") ocr_confidence = Column(Integer, default=0) # 0-100 # AI correction result (pseudonymized) ai_feedback = Column(Text, default="") ai_score = Column(Integer, nullable=True) # Points achieved ai_grade = Column(String(10), nullable=True) # e.g., "2+" or "B" ai_details = Column(JSON, default=dict) # Per-question scores # Processing metadata processing_started_at = Column(DateTime, nullable=True) processing_completed_at = Column(DateTime, nullable=True) processing_error = Column(Text, nullable=True) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationship session = relationship("ExamSession", back_populates="documents") def __repr__(self): return f"" class QRBatchJob(Base): """ QR Code Generation Batch Job. Tracks generation of QR overlay sheets for printing. The generated PDF contains QR codes with doc_tokens. """ __tablename__ = 'klausur_qr_batches' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) # Session relationship session_id = Column(String(36), ForeignKey('klausur_sessions.id'), nullable=False, index=True) teacher_id = Column(String(100), nullable=False, index=True) # Batch info student_count = Column(Integer, nullable=False) generated_tokens = Column(JSON, default=list) # List of generated doc_tokens # Generated PDF (stored as path reference, not in DB) pdf_path = Column(String(500), nullable=True) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) downloaded_at = Column(DateTime, nullable=True) def __repr__(self): return f"" class OnboardingSession(Base): """ Magic Onboarding Session. Tracks the automatic class/student detection and setup process. Temporary data structure - merged into ExamSession after confirmation. """ __tablename__ = 'klausur_onboarding_sessions' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) # Links klausur_session_id = Column(String(36), ForeignKey('klausur_sessions.id'), nullable=True) teacher_id = Column(String(100), nullable=False, index=True) # Detected metadata (from local LLM) detected_class = Column(String(100), nullable=True) detected_subject = Column(String(100), nullable=True) detected_date = Column(DateTime, nullable=True) detected_student_count = Column(Integer, default=0) detection_confidence = Column(Integer, default=0) # 0-100 # Confirmed data (after user review) confirmed_class = Column(String(100), nullable=True) confirmed_subject = Column(String(100), nullable=True) # Linked school entities (after confirmation) linked_school_id = Column(String(36), nullable=True) linked_class_id = Column(String(36), nullable=True) # School context bundesland = Column(String(50), nullable=True) schulform = Column(String(50), nullable=True) school_name = Column(String(200), nullable=True) # Status status = Column( SQLEnum(OnboardingStatus), default=OnboardingStatus.ANALYZING, nullable=False, index=True ) # Progress tracking analysis_completed_at = Column(DateTime, nullable=True) confirmation_completed_at = Column(DateTime, nullable=True) processing_started_at = Column(DateTime, nullable=True) processing_completed_at = Column(DateTime, nullable=True) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationships detected_students = relationship( "DetectedStudent", back_populates="onboarding_session", cascade="all, delete-orphan" ) def __repr__(self): return f"" class DetectedStudent(Base): """ Student detected during Magic Onboarding. Temporary storage for detected student data before confirmation. After confirmation, students are created in the School Service. """ __tablename__ = 'klausur_detected_students' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) # Onboarding session onboarding_session_id = Column( String(36), ForeignKey('klausur_onboarding_sessions.id'), nullable=False, index=True ) # Detected data (from exam header) detected_first_name = Column(String(100), nullable=True) detected_last_name_hint = Column(String(100), nullable=True) # Partial, e.g. "M." # Confirmed data (after roster matching) confirmed_first_name = Column(String(100), nullable=True) confirmed_last_name = Column(String(100), nullable=True) # Matched to School Service student matched_student_id = Column(String(36), nullable=True) # Parent contact (extracted from roster) parent_email = Column(String(200), nullable=True) parent_phone = Column(String(50), nullable=True) # Link to pseudonymized document doc_token = Column(String(36), nullable=True) # Confidence confidence = Column(Integer, default=0) # 0-100 # Timestamps created_at = Column(DateTime, default=datetime.utcnow) # Relationship onboarding_session = relationship("OnboardingSession", back_populates="detected_students") def __repr__(self): name = self.confirmed_first_name or self.detected_first_name or "?" return f"" class ModuleLink(Base): """ Cross-module link from Klausur to other BreakPilot modules. Tracks connections to: Notenbuch, Elternabend, Zeugnis, Calendar """ __tablename__ = 'klausur_module_links' id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) # Source klausur_session_id = Column( String(36), ForeignKey('klausur_sessions.id'), nullable=False, index=True ) # Link type link_type = Column( SQLEnum(ModuleLinkType), nullable=False, index=True ) # Target target_module = Column(String(50), nullable=False) # school, calendar, etc. target_entity_id = Column(String(36), nullable=True) target_url = Column(String(500), nullable=True) # Link metadata link_metadata = Column(JSON, default=dict) # Timestamps created_at = Column(DateTime, default=datetime.utcnow) def __repr__(self): return f" {self.target_module}>" # Export all models __all__ = [ "SessionStatus", "DocumentStatus", "OnboardingStatus", "ModuleLinkType", "ExamSession", "PseudonymizedDocument", "QRBatchJob", "OnboardingSession", "DetectedStudent", "ModuleLink", ]