import type { SystemInfoConfig } from './types' export const trainingConfig: SystemInfoConfig = { title: 'Training System-Info', description: 'ML Model Training und Fine-Tuning Dashboard.', version: '1.0', architecture: { layers: [ { title: 'Training UI', components: ['Experiment Tracker', 'Metrics Dashboard', 'Model Registry'], color: '#3b82f6' }, { title: 'Training Pipeline', components: ['Data Loader', 'Trainer', 'Evaluator'], color: '#8b5cf6' }, { title: 'Compute', components: ['GPU Cluster', 'Job Scheduler', 'Resource Manager'], color: '#10b981' }, { title: 'Storage', components: ['Dataset Store', 'Model Artifacts', 'Checkpoints'], color: '#f59e0b' }, ], }, features: [ { name: 'Experiment Tracking', status: 'active', description: 'Versuchsprotokollierung' }, { name: 'Hyperparameter Tuning', status: 'active', description: 'Automatische Optimierung' }, { name: 'Model Versioning', status: 'active', description: 'Modellversionierung' }, { name: 'Distributed Training', status: 'planned', description: 'Multi-GPU Training' }, ], roadmap: [ { phase: 'Phase 1: MLOps (Q1)', priority: 'high', items: ['Experiment Tracking', 'Model Registry', 'Artifact Storage', 'Reproducibility'] }, { phase: 'Phase 2: Automation (Q2)', priority: 'medium', items: ['Auto-Tuning', 'Pipeline Orchestration', 'A/B Deployment', 'Rollback'] }, { phase: 'Phase 3: Scale (Q3)', priority: 'medium', items: ['Distributed Training', 'Cost Optimization', 'Resource Scheduling', 'Multi-Cloud'] }, ], technicalDetails: [ { component: 'ML', technology: 'PyTorch', version: '2.x', description: 'Training Framework' }, { component: 'Tracking', technology: 'MLflow/W&B', description: 'Experiment Tracking' }, { component: 'GPU', technology: 'vast.ai', description: 'Cloud GPUs' }, { component: 'Storage', technology: 'MinIO', description: 'Artifact Store' }, ], auditInfo: [ { category: 'Training Pipeline', items: [ { label: 'Aktive Jobs', value: 'Tracking', status: 'ok' }, { label: 'GPU Verfuegbarkeit', value: 'vast.ai', status: 'ok' }, { label: 'Model Registry', value: 'MLflow', status: 'ok' }, { label: 'Experiment Tracking', value: 'Aktiviert', status: 'ok' }, ], }, { category: 'Ressourcen', items: [ { label: 'GPU Budget', value: 'Konfiguriert', status: 'ok' }, { label: 'Storage', value: 'MinIO', status: 'ok' }, { label: 'Compute Limits', value: 'Definiert', status: 'ok' }, { label: 'Auto-Shutdown', value: 'Aktiviert', status: 'ok' }, ], }, { category: 'Compliance', items: [ { label: 'Datensatz-Herkunft', value: 'Dokumentiert', status: 'ok' }, { label: 'Model Cards', value: 'Vorhanden', status: 'ok' }, { label: 'Bias-Pruefung', value: 'Geplant', status: 'warning' }, { label: 'Audit Trail', value: 'Aktiviert', status: 'ok' }, ], }, ], fullDocumentation: `

ML Training Dashboard

1. Uebersicht

Das Training-Modul ermoeglicht das Training und Fine-Tuning von ML-Modellen mit Experiment-Tracking, Model Registry und GPU-Management.

2. Training-Architektur

Training Dashboard → Training Pipeline → GPU Cluster (vast.ai) + Storage (MinIO)

3. API Endpoints

EndpointMethodeBeschreibung
/api/training/experimentsGETExperimente listen
/api/training/experimentsPOSTNeues Experiment
/api/training/experiments/{id}GETExperiment-Details
/api/training/modelsGETModel Registry
/api/training/runs/{id}/stopPOSTTraining stoppen

4. Best Practices

  1. Immer Baseline-Experiment als Referenz
  2. Checkpoints alle N Schritte speichern
  3. Early Stopping bei Overfitting
  4. Model Card fuer jedes produktive Modell
  5. Reproduzierbarkeit durch Seed-Setting
`, }