Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
92 lines
4.1 KiB
TypeScript
92 lines
4.1 KiB
TypeScript
import type { SystemInfoConfig } from './types'
|
|
|
|
export const trainingConfig: SystemInfoConfig = {
|
|
title: 'Training System-Info',
|
|
description: 'ML Model Training und Fine-Tuning Dashboard.',
|
|
version: '1.0',
|
|
architecture: {
|
|
layers: [
|
|
{ title: 'Training UI', components: ['Experiment Tracker', 'Metrics Dashboard', 'Model Registry'], color: '#3b82f6' },
|
|
{ title: 'Training Pipeline', components: ['Data Loader', 'Trainer', 'Evaluator'], color: '#8b5cf6' },
|
|
{ title: 'Compute', components: ['GPU Cluster', 'Job Scheduler', 'Resource Manager'], color: '#10b981' },
|
|
{ title: 'Storage', components: ['Dataset Store', 'Model Artifacts', 'Checkpoints'], color: '#f59e0b' },
|
|
],
|
|
},
|
|
features: [
|
|
{ name: 'Experiment Tracking', status: 'active', description: 'Versuchsprotokollierung' },
|
|
{ name: 'Hyperparameter Tuning', status: 'active', description: 'Automatische Optimierung' },
|
|
{ name: 'Model Versioning', status: 'active', description: 'Modellversionierung' },
|
|
{ name: 'Distributed Training', status: 'planned', description: 'Multi-GPU Training' },
|
|
],
|
|
roadmap: [
|
|
{ phase: 'Phase 1: MLOps (Q1)', priority: 'high', items: ['Experiment Tracking', 'Model Registry', 'Artifact Storage', 'Reproducibility'] },
|
|
{ phase: 'Phase 2: Automation (Q2)', priority: 'medium', items: ['Auto-Tuning', 'Pipeline Orchestration', 'A/B Deployment', 'Rollback'] },
|
|
{ phase: 'Phase 3: Scale (Q3)', priority: 'medium', items: ['Distributed Training', 'Cost Optimization', 'Resource Scheduling', 'Multi-Cloud'] },
|
|
],
|
|
technicalDetails: [
|
|
{ component: 'ML', technology: 'PyTorch', version: '2.x', description: 'Training Framework' },
|
|
{ component: 'Tracking', technology: 'MLflow/W&B', description: 'Experiment Tracking' },
|
|
{ component: 'GPU', technology: 'vast.ai', description: 'Cloud GPUs' },
|
|
{ component: 'Storage', technology: 'MinIO', description: 'Artifact Store' },
|
|
],
|
|
auditInfo: [
|
|
{
|
|
category: 'Training Pipeline',
|
|
items: [
|
|
{ label: 'Aktive Jobs', value: 'Tracking', status: 'ok' },
|
|
{ label: 'GPU Verfuegbarkeit', value: 'vast.ai', status: 'ok' },
|
|
{ label: 'Model Registry', value: 'MLflow', status: 'ok' },
|
|
{ label: 'Experiment Tracking', value: 'Aktiviert', status: 'ok' },
|
|
],
|
|
},
|
|
{
|
|
category: 'Ressourcen',
|
|
items: [
|
|
{ label: 'GPU Budget', value: 'Konfiguriert', status: 'ok' },
|
|
{ label: 'Storage', value: 'MinIO', status: 'ok' },
|
|
{ label: 'Compute Limits', value: 'Definiert', status: 'ok' },
|
|
{ label: 'Auto-Shutdown', value: 'Aktiviert', status: 'ok' },
|
|
],
|
|
},
|
|
{
|
|
category: 'Compliance',
|
|
items: [
|
|
{ label: 'Datensatz-Herkunft', value: 'Dokumentiert', status: 'ok' },
|
|
{ label: 'Model Cards', value: 'Vorhanden', status: 'ok' },
|
|
{ label: 'Bias-Pruefung', value: 'Geplant', status: 'warning' },
|
|
{ label: 'Audit Trail', value: 'Aktiviert', status: 'ok' },
|
|
],
|
|
},
|
|
],
|
|
fullDocumentation: `
|
|
<h2>ML Training Dashboard</h2>
|
|
|
|
<h3>1. Uebersicht</h3>
|
|
<p>Das Training-Modul ermoeglicht das Training und Fine-Tuning von ML-Modellen mit Experiment-Tracking, Model Registry und GPU-Management.</p>
|
|
|
|
<h3>2. Training-Architektur</h3>
|
|
<pre>
|
|
Training Dashboard → Training Pipeline → GPU Cluster (vast.ai) + Storage (MinIO)
|
|
</pre>
|
|
|
|
<h3>3. API Endpoints</h3>
|
|
<table>
|
|
<tr><th>Endpoint</th><th>Methode</th><th>Beschreibung</th></tr>
|
|
<tr><td>/api/training/experiments</td><td>GET</td><td>Experimente listen</td></tr>
|
|
<tr><td>/api/training/experiments</td><td>POST</td><td>Neues Experiment</td></tr>
|
|
<tr><td>/api/training/experiments/{id}</td><td>GET</td><td>Experiment-Details</td></tr>
|
|
<tr><td>/api/training/models</td><td>GET</td><td>Model Registry</td></tr>
|
|
<tr><td>/api/training/runs/{id}/stop</td><td>POST</td><td>Training stoppen</td></tr>
|
|
</table>
|
|
|
|
<h3>4. Best Practices</h3>
|
|
<ol>
|
|
<li>Immer Baseline-Experiment als Referenz</li>
|
|
<li>Checkpoints alle N Schritte speichern</li>
|
|
<li>Early Stopping bei Overfitting</li>
|
|
<li>Model Card fuer jedes produktive Modell</li>
|
|
<li>Reproduzierbarkeit durch Seed-Setting</li>
|
|
</ol>
|
|
`,
|
|
}
|