import type { SystemInfoConfig } from './types' export const trainingConfig: SystemInfoConfig = { title: 'Training System-Info', description: 'ML Model Training und Fine-Tuning Dashboard.', version: '1.0', architecture: { layers: [ { title: 'Training UI', components: ['Experiment Tracker', 'Metrics Dashboard', 'Model Registry'], color: '#3b82f6' }, { title: 'Training Pipeline', components: ['Data Loader', 'Trainer', 'Evaluator'], color: '#8b5cf6' }, { title: 'Compute', components: ['GPU Cluster', 'Job Scheduler', 'Resource Manager'], color: '#10b981' }, { title: 'Storage', components: ['Dataset Store', 'Model Artifacts', 'Checkpoints'], color: '#f59e0b' }, ], }, features: [ { name: 'Experiment Tracking', status: 'active', description: 'Versuchsprotokollierung' }, { name: 'Hyperparameter Tuning', status: 'active', description: 'Automatische Optimierung' }, { name: 'Model Versioning', status: 'active', description: 'Modellversionierung' }, { name: 'Distributed Training', status: 'planned', description: 'Multi-GPU Training' }, ], roadmap: [ { phase: 'Phase 1: MLOps (Q1)', priority: 'high', items: ['Experiment Tracking', 'Model Registry', 'Artifact Storage', 'Reproducibility'] }, { phase: 'Phase 2: Automation (Q2)', priority: 'medium', items: ['Auto-Tuning', 'Pipeline Orchestration', 'A/B Deployment', 'Rollback'] }, { phase: 'Phase 3: Scale (Q3)', priority: 'medium', items: ['Distributed Training', 'Cost Optimization', 'Resource Scheduling', 'Multi-Cloud'] }, ], technicalDetails: [ { component: 'ML', technology: 'PyTorch', version: '2.x', description: 'Training Framework' }, { component: 'Tracking', technology: 'MLflow/W&B', description: 'Experiment Tracking' }, { component: 'GPU', technology: 'vast.ai', description: 'Cloud GPUs' }, { component: 'Storage', technology: 'MinIO', description: 'Artifact Store' }, ], auditInfo: [ { category: 'Training Pipeline', items: [ { label: 'Aktive Jobs', value: 'Tracking', status: 'ok' }, { label: 'GPU Verfuegbarkeit', value: 'vast.ai', status: 'ok' }, { label: 'Model Registry', value: 'MLflow', status: 'ok' }, { label: 'Experiment Tracking', value: 'Aktiviert', status: 'ok' }, ], }, { category: 'Ressourcen', items: [ { label: 'GPU Budget', value: 'Konfiguriert', status: 'ok' }, { label: 'Storage', value: 'MinIO', status: 'ok' }, { label: 'Compute Limits', value: 'Definiert', status: 'ok' }, { label: 'Auto-Shutdown', value: 'Aktiviert', status: 'ok' }, ], }, { category: 'Compliance', items: [ { label: 'Datensatz-Herkunft', value: 'Dokumentiert', status: 'ok' }, { label: 'Model Cards', value: 'Vorhanden', status: 'ok' }, { label: 'Bias-Pruefung', value: 'Geplant', status: 'warning' }, { label: 'Audit Trail', value: 'Aktiviert', status: 'ok' }, ], }, ], fullDocumentation: `
Das Training-Modul ermoeglicht das Training und Fine-Tuning von ML-Modellen mit Experiment-Tracking, Model Registry und GPU-Management.
Training Dashboard → Training Pipeline → GPU Cluster (vast.ai) + Storage (MinIO)
| Endpoint | Methode | Beschreibung |
|---|---|---|
| /api/training/experiments | GET | Experimente listen |
| /api/training/experiments | POST | Neues Experiment |
| /api/training/experiments/{id} | GET | Experiment-Details |
| /api/training/models | GET | Model Registry |
| /api/training/runs/{id}/stop | POST | Training stoppen |