breakpilot-lehrer/website/components/admin/system-info-configs/training-config.ts

import type { SystemInfoConfig } from './types'

export const trainingConfig: SystemInfoConfig = {
  title: 'Training System-Info',
  description: 'ML Model Training und Fine-Tuning Dashboard.',
  version: '1.0',
  architecture: {
    layers: [
      { title: 'Training UI', components: ['Experiment Tracker', 'Metrics Dashboard', 'Model Registry'], color: '#3b82f6' },
      { title: 'Training Pipeline', components: ['Data Loader', 'Trainer', 'Evaluator'], color: '#8b5cf6' },
      { title: 'Compute', components: ['GPU Cluster', 'Job Scheduler', 'Resource Manager'], color: '#10b981' },
      { title: 'Storage', components: ['Dataset Store', 'Model Artifacts', 'Checkpoints'], color: '#f59e0b' },
    ],
  },
  features: [
    { name: 'Experiment Tracking', status: 'active', description: 'Versuchsprotokollierung' },
    { name: 'Hyperparameter Tuning', status: 'active', description: 'Automatische Optimierung' },
    { name: 'Model Versioning', status: 'active', description: 'Modellversionierung' },
    { name: 'Distributed Training', status: 'planned', description: 'Multi-GPU Training' },
  ],
  roadmap: [
    { phase: 'Phase 1: MLOps (Q1)', priority: 'high', items: ['Experiment Tracking', 'Model Registry', 'Artifact Storage', 'Reproducibility'] },
    { phase: 'Phase 2: Automation (Q2)', priority: 'medium', items: ['Auto-Tuning', 'Pipeline Orchestration', 'A/B Deployment', 'Rollback'] },
    { phase: 'Phase 3: Scale (Q3)', priority: 'medium', items: ['Distributed Training', 'Cost Optimization', 'Resource Scheduling', 'Multi-Cloud'] },
  ],
  technicalDetails: [
    { component: 'ML', technology: 'PyTorch', version: '2.x', description: 'Training Framework' },
    { component: 'Tracking', technology: 'MLflow/W&B', description: 'Experiment Tracking' },
    { component: 'GPU', technology: 'vast.ai', description: 'Cloud GPUs' },
    { component: 'Storage', technology: 'MinIO', description: 'Artifact Store' },
  ],
  auditInfo: [
    {
      category: 'Training Pipeline',
      items: [
        { label: 'Aktive Jobs', value: 'Tracking', status: 'ok' },
        { label: 'GPU Verfuegbarkeit', value: 'vast.ai', status: 'ok' },
        { label: 'Model Registry', value: 'MLflow', status: 'ok' },
        { label: 'Experiment Tracking', value: 'Aktiviert', status: 'ok' },
      ],
    },
    {
      category: 'Ressourcen',
      items: [
        { label: 'GPU Budget', value: 'Konfiguriert', status: 'ok' },
        { label: 'Storage', value: 'MinIO', status: 'ok' },
        { label: 'Compute Limits', value: 'Definiert', status: 'ok' },
        { label: 'Auto-Shutdown', value: 'Aktiviert', status: 'ok' },
      ],
    },
    {
      category: 'Compliance',
      items: [
        { label: 'Datensatz-Herkunft', value: 'Dokumentiert', status: 'ok' },
        { label: 'Model Cards', value: 'Vorhanden', status: 'ok' },
        { label: 'Bias-Pruefung', value: 'Geplant', status: 'warning' },
        { label: 'Audit Trail', value: 'Aktiviert', status: 'ok' },
      ],
    },
  ],
  fullDocumentation: `
<h2>ML Training Dashboard</h2>

<h3>1. Uebersicht</h3>
<p>Das Training-Modul ermoeglicht das Training und Fine-Tuning von ML-Modellen mit Experiment-Tracking, Model Registry und GPU-Management.</p>

<h3>2. Training-Architektur</h3>
<pre>
Training Dashboard → Training Pipeline → GPU Cluster (vast.ai) + Storage (MinIO)
</pre>

<h3>3. API Endpoints</h3>
<table>
  <tr><th>Endpoint</th><th>Methode</th><th>Beschreibung</th></tr>
  <tr><td>/api/training/experiments</td><td>GET</td><td>Experimente listen</td></tr>
  <tr><td>/api/training/experiments</td><td>POST</td><td>Neues Experiment</td></tr>
  <tr><td>/api/training/experiments/{id}</td><td>GET</td><td>Experiment-Details</td></tr>
  <tr><td>/api/training/models</td><td>GET</td><td>Model Registry</td></tr>
  <tr><td>/api/training/runs/{id}/stop</td><td>POST</td><td>Training stoppen</td></tr>
</table>

<h3>4. Best Practices</h3>
<ol>
  <li>Immer Baseline-Experiment als Referenz</li>
  <li>Checkpoints alle N Schritte speichern</li>
  <li>Early Stopping bei Overfitting</li>
  <li>Model Card fuer jedes produktive Modell</li>
  <li>Reproduzierbarkeit durch Seed-Setting</li>
</ol>
`,
}