backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
87 lines
2.0 KiB
Python
87 lines
2.0 KiB
Python
"""
|
|
OCR Labeling - Pydantic Models and Constants
|
|
|
|
Extracted from ocr_labeling_api.py to keep files under 500 LOC.
|
|
"""
|
|
|
|
import os
|
|
from pydantic import BaseModel
|
|
from typing import Optional, Dict
|
|
from datetime import datetime
|
|
|
|
|
|
# Local storage path (fallback if MinIO not available)
|
|
LOCAL_STORAGE_PATH = os.getenv("OCR_STORAGE_PATH", "/app/ocr-labeling")
|
|
|
|
|
|
# =============================================================================
|
|
# Pydantic Models
|
|
# =============================================================================
|
|
|
|
class SessionCreate(BaseModel):
|
|
name: str
|
|
source_type: str = "klausur" # klausur, handwriting_sample, scan
|
|
description: Optional[str] = None
|
|
ocr_model: Optional[str] = "llama3.2-vision:11b"
|
|
|
|
|
|
class SessionResponse(BaseModel):
|
|
id: str
|
|
name: str
|
|
source_type: str
|
|
description: Optional[str]
|
|
ocr_model: Optional[str]
|
|
total_items: int
|
|
labeled_items: int
|
|
confirmed_items: int
|
|
corrected_items: int
|
|
skipped_items: int
|
|
created_at: datetime
|
|
|
|
|
|
class ItemResponse(BaseModel):
|
|
id: str
|
|
session_id: str
|
|
session_name: str
|
|
image_path: str
|
|
image_url: Optional[str]
|
|
ocr_text: Optional[str]
|
|
ocr_confidence: Optional[float]
|
|
ground_truth: Optional[str]
|
|
status: str
|
|
metadata: Optional[Dict]
|
|
created_at: datetime
|
|
|
|
|
|
class ConfirmRequest(BaseModel):
|
|
item_id: str
|
|
label_time_seconds: Optional[int] = None
|
|
|
|
|
|
class CorrectRequest(BaseModel):
|
|
item_id: str
|
|
ground_truth: str
|
|
label_time_seconds: Optional[int] = None
|
|
|
|
|
|
class SkipRequest(BaseModel):
|
|
item_id: str
|
|
|
|
|
|
class ExportRequest(BaseModel):
|
|
export_format: str = "generic" # generic, trocr, llama_vision
|
|
session_id: Optional[str] = None
|
|
batch_id: Optional[str] = None
|
|
|
|
|
|
class StatsResponse(BaseModel):
|
|
total_sessions: Optional[int] = None
|
|
total_items: int
|
|
labeled_items: int
|
|
confirmed_items: int
|
|
corrected_items: int
|
|
pending_items: int
|
|
exportable_items: Optional[int] = None
|
|
accuracy_rate: float
|
|
avg_label_time_seconds: Optional[float] = None
|