[split-required] Split 500-850 LOC files (batch 2)
backend-lehrer (10 files): - game/database.py (785 → 5), correction_api.py (683 → 4) - classroom_engine/antizipation.py (676 → 5) - llm_gateway schools/edu_search already done in prior batch klausur-service (12 files): - orientation_crop_api.py (694 → 5), pdf_export.py (677 → 4) - zeugnis_crawler.py (676 → 5), grid_editor_api.py (671 → 5) - eh_templates.py (658 → 5), mail/api.py (651 → 5) - qdrant_service.py (638 → 5), training_api.py (625 → 4) website (6 pages): - middleware (696 → 8), mail (733 → 6), consent (628 → 8) - compliance/risks (622 → 5), export (502 → 5), brandbook (629 → 7) studio-v2 (3 components): - B2BMigrationWizard (848 → 3), CleanupPanel (765 → 2) - dashboard-experimental (739 → 2) admin-lehrer (4 files): - uebersetzungen (769 → 4), manager (670 → 2) - ChunkBrowserQA (675 → 6), dsfa/page (674 → 5) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
118
klausur-service/backend/training_models.py
Normal file
118
klausur-service/backend/training_models.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""
|
||||
Training API — enums, request/response models, and in-memory state.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# ENUMS
|
||||
# ============================================================================
|
||||
|
||||
class TrainingStatus(str, Enum):
|
||||
QUEUED = "queued"
|
||||
PREPARING = "preparing"
|
||||
TRAINING = "training"
|
||||
VALIDATING = "validating"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
PAUSED = "paused"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
class ModelType(str, Enum):
|
||||
ZEUGNIS = "zeugnis"
|
||||
KLAUSUR = "klausur"
|
||||
GENERAL = "general"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# REQUEST/RESPONSE MODELS
|
||||
# ============================================================================
|
||||
|
||||
class TrainingConfig(BaseModel):
|
||||
"""Configuration for a training job."""
|
||||
name: str = Field(..., description="Name for the training job")
|
||||
model_type: ModelType = Field(ModelType.ZEUGNIS, description="Type of model to train")
|
||||
bundeslaender: List[str] = Field(..., description="List of Bundesland codes to include")
|
||||
batch_size: int = Field(16, ge=1, le=128)
|
||||
learning_rate: float = Field(0.00005, ge=0.000001, le=0.1)
|
||||
epochs: int = Field(10, ge=1, le=100)
|
||||
warmup_steps: int = Field(500, ge=0, le=10000)
|
||||
weight_decay: float = Field(0.01, ge=0, le=1)
|
||||
gradient_accumulation: int = Field(4, ge=1, le=32)
|
||||
mixed_precision: bool = Field(True, description="Use FP16 mixed precision training")
|
||||
|
||||
|
||||
class TrainingMetrics(BaseModel):
|
||||
"""Metrics from a training job."""
|
||||
precision: float = 0.0
|
||||
recall: float = 0.0
|
||||
f1_score: float = 0.0
|
||||
accuracy: float = 0.0
|
||||
loss_history: List[float] = []
|
||||
val_loss_history: List[float] = []
|
||||
|
||||
|
||||
class TrainingJob(BaseModel):
|
||||
"""A training job with full details."""
|
||||
id: str
|
||||
name: str
|
||||
model_type: ModelType
|
||||
status: TrainingStatus
|
||||
progress: float
|
||||
current_epoch: int
|
||||
total_epochs: int
|
||||
loss: float
|
||||
val_loss: float
|
||||
learning_rate: float
|
||||
documents_processed: int
|
||||
total_documents: int
|
||||
started_at: Optional[datetime]
|
||||
estimated_completion: Optional[datetime]
|
||||
completed_at: Optional[datetime]
|
||||
error_message: Optional[str]
|
||||
metrics: TrainingMetrics
|
||||
config: TrainingConfig
|
||||
|
||||
|
||||
class ModelVersion(BaseModel):
|
||||
"""A trained model version."""
|
||||
id: str
|
||||
job_id: str
|
||||
version: str
|
||||
model_type: ModelType
|
||||
created_at: datetime
|
||||
metrics: TrainingMetrics
|
||||
is_active: bool
|
||||
size_mb: float
|
||||
bundeslaender: List[str]
|
||||
|
||||
|
||||
class DatasetStats(BaseModel):
|
||||
"""Statistics about the training dataset."""
|
||||
total_documents: int
|
||||
total_chunks: int
|
||||
training_allowed: int
|
||||
by_bundesland: Dict[str, int]
|
||||
by_doc_type: Dict[str, int]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# IN-MEMORY STATE (Replace with database in production)
|
||||
# ============================================================================
|
||||
|
||||
@dataclass
|
||||
class TrainingState:
|
||||
"""Global training state."""
|
||||
jobs: Dict[str, dict] = field(default_factory=dict)
|
||||
model_versions: Dict[str, dict] = field(default_factory=dict)
|
||||
active_job_id: Optional[str] = None
|
||||
|
||||
|
||||
_state = TrainingState()
|
||||
Reference in New Issue
Block a user