""" OCR Labeling - Pydantic Models and Constants Extracted from ocr_labeling_api.py to keep files under 500 LOC. """ import os from pydantic import BaseModel from typing import Optional, Dict from datetime import datetime # Local storage path (fallback if MinIO not available) LOCAL_STORAGE_PATH = os.getenv("OCR_STORAGE_PATH", "/app/ocr-labeling") # ============================================================================= # Pydantic Models # ============================================================================= class SessionCreate(BaseModel): name: str source_type: str = "klausur" # klausur, handwriting_sample, scan description: Optional[str] = None ocr_model: Optional[str] = "llama3.2-vision:11b" class SessionResponse(BaseModel): id: str name: str source_type: str description: Optional[str] ocr_model: Optional[str] total_items: int labeled_items: int confirmed_items: int corrected_items: int skipped_items: int created_at: datetime class ItemResponse(BaseModel): id: str session_id: str session_name: str image_path: str image_url: Optional[str] ocr_text: Optional[str] ocr_confidence: Optional[float] ground_truth: Optional[str] status: str metadata: Optional[Dict] created_at: datetime class ConfirmRequest(BaseModel): item_id: str label_time_seconds: Optional[int] = None class CorrectRequest(BaseModel): item_id: str ground_truth: str label_time_seconds: Optional[int] = None class SkipRequest(BaseModel): item_id: str class ExportRequest(BaseModel): export_format: str = "generic" # generic, trocr, llama_vision session_id: Optional[str] = None batch_id: Optional[str] = None class StatsResponse(BaseModel): total_sessions: Optional[int] = None total_items: int labeled_items: int confirmed_items: int corrected_items: int pending_items: int exportable_items: Optional[int] = None accuracy_rate: float avg_label_time_seconds: Optional[float] = None