[split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
86
klausur-service/backend/ocr_labeling_models.py
Normal file
86
klausur-service/backend/ocr_labeling_models.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
OCR Labeling - Pydantic Models and Constants
|
||||
|
||||
Extracted from ocr_labeling_api.py to keep files under 500 LOC.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Dict
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
# Local storage path (fallback if MinIO not available)
|
||||
LOCAL_STORAGE_PATH = os.getenv("OCR_STORAGE_PATH", "/app/ocr-labeling")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pydantic Models
|
||||
# =============================================================================
|
||||
|
||||
class SessionCreate(BaseModel):
|
||||
name: str
|
||||
source_type: str = "klausur" # klausur, handwriting_sample, scan
|
||||
description: Optional[str] = None
|
||||
ocr_model: Optional[str] = "llama3.2-vision:11b"
|
||||
|
||||
|
||||
class SessionResponse(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
source_type: str
|
||||
description: Optional[str]
|
||||
ocr_model: Optional[str]
|
||||
total_items: int
|
||||
labeled_items: int
|
||||
confirmed_items: int
|
||||
corrected_items: int
|
||||
skipped_items: int
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class ItemResponse(BaseModel):
|
||||
id: str
|
||||
session_id: str
|
||||
session_name: str
|
||||
image_path: str
|
||||
image_url: Optional[str]
|
||||
ocr_text: Optional[str]
|
||||
ocr_confidence: Optional[float]
|
||||
ground_truth: Optional[str]
|
||||
status: str
|
||||
metadata: Optional[Dict]
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class ConfirmRequest(BaseModel):
|
||||
item_id: str
|
||||
label_time_seconds: Optional[int] = None
|
||||
|
||||
|
||||
class CorrectRequest(BaseModel):
|
||||
item_id: str
|
||||
ground_truth: str
|
||||
label_time_seconds: Optional[int] = None
|
||||
|
||||
|
||||
class SkipRequest(BaseModel):
|
||||
item_id: str
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
export_format: str = "generic" # generic, trocr, llama_vision
|
||||
session_id: Optional[str] = None
|
||||
batch_id: Optional[str] = None
|
||||
|
||||
|
||||
class StatsResponse(BaseModel):
|
||||
total_sessions: Optional[int] = None
|
||||
total_items: int
|
||||
labeled_items: int
|
||||
confirmed_items: int
|
||||
corrected_items: int
|
||||
pending_items: int
|
||||
exportable_items: Optional[int] = None
|
||||
accuracy_rate: float
|
||||
avg_label_time_seconds: Optional[float] = None
|
||||
Reference in New Issue
Block a user