Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m25s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 20s
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
87 lines
2.0 KiB
Python
87 lines
2.0 KiB
Python
"""
|
|
OCR Labeling - Pydantic Models and Constants
|
|
|
|
Extracted from ocr_labeling_api.py to keep files under 500 LOC.
|
|
"""
|
|
|
|
import os
|
|
from pydantic import BaseModel
|
|
from typing import Optional, Dict
|
|
from datetime import datetime
|
|
|
|
|
|
# Local storage path (fallback if MinIO not available)
|
|
LOCAL_STORAGE_PATH = os.getenv("OCR_STORAGE_PATH", "/app/ocr-labeling")
|
|
|
|
|
|
# =============================================================================
|
|
# Pydantic Models
|
|
# =============================================================================
|
|
|
|
class SessionCreate(BaseModel):
|
|
name: str
|
|
source_type: str = "klausur" # klausur, handwriting_sample, scan
|
|
description: Optional[str] = None
|
|
ocr_model: Optional[str] = "llama3.2-vision:11b"
|
|
|
|
|
|
class SessionResponse(BaseModel):
|
|
id: str
|
|
name: str
|
|
source_type: str
|
|
description: Optional[str]
|
|
ocr_model: Optional[str]
|
|
total_items: int
|
|
labeled_items: int
|
|
confirmed_items: int
|
|
corrected_items: int
|
|
skipped_items: int
|
|
created_at: datetime
|
|
|
|
|
|
class ItemResponse(BaseModel):
|
|
id: str
|
|
session_id: str
|
|
session_name: str
|
|
image_path: str
|
|
image_url: Optional[str]
|
|
ocr_text: Optional[str]
|
|
ocr_confidence: Optional[float]
|
|
ground_truth: Optional[str]
|
|
status: str
|
|
metadata: Optional[Dict]
|
|
created_at: datetime
|
|
|
|
|
|
class ConfirmRequest(BaseModel):
|
|
item_id: str
|
|
label_time_seconds: Optional[int] = None
|
|
|
|
|
|
class CorrectRequest(BaseModel):
|
|
item_id: str
|
|
ground_truth: str
|
|
label_time_seconds: Optional[int] = None
|
|
|
|
|
|
class SkipRequest(BaseModel):
|
|
item_id: str
|
|
|
|
|
|
class ExportRequest(BaseModel):
|
|
export_format: str = "generic" # generic, trocr, llama_vision
|
|
session_id: Optional[str] = None
|
|
batch_id: Optional[str] = None
|
|
|
|
|
|
class StatsResponse(BaseModel):
|
|
total_sessions: Optional[int] = None
|
|
total_items: int
|
|
labeled_items: int
|
|
confirmed_items: int
|
|
corrected_items: int
|
|
pending_items: int
|
|
exportable_items: Optional[int] = None
|
|
accuracy_rate: float
|
|
avg_label_time_seconds: Optional[float] = None
|