import type { SystemInfoConfig } from './types' export const llmCompareConfig: SystemInfoConfig = { title: 'LLM Vergleich System-Info', description: 'Vergleich und Benchmarking verschiedener KI-Provider und Modelle.', version: '1.0', architecture: { layers: [ { title: 'Vergleichs-UI', components: ['Provider-Auswahl', 'Prompt-Editor', 'Ergebnis-Vergleich'], color: '#3b82f6' }, { title: 'Provider Adapters', components: ['OpenAI', 'Anthropic', 'Google', 'Local'], color: '#8b5cf6' }, { title: 'Evaluation Engine', components: ['Latenz-Messung', 'Qualitaets-Scoring', 'Cost Calculator'], color: '#10b981' }, { title: 'Logging', components: ['Request Logs', 'Token Tracking', 'Error Logs'], color: '#f59e0b' }, ], }, features: [ { name: 'Multi-Provider Vergleich', status: 'active', description: 'Parallele Anfragen an mehrere LLMs' }, { name: 'Latenz-Tracking', status: 'active', description: 'Echtzeit-Performance-Messung' }, { name: 'Kosten-Kalkulation', status: 'active', description: 'Token-basierte Kostenberechnung' }, { name: 'Qualitaets-Bewertung', status: 'planned', description: 'Automatisches Scoring der Antworten' }, { name: 'A/B Testing', status: 'planned', description: 'Statistische Signifikanz-Tests' }, ], roadmap: [ { phase: 'Phase 1: Provider (Q1)', priority: 'high', items: ['Mistral Integration', 'Llama 3 Integration', 'Gemini Pro Integration', 'Rate Limiting'] }, { phase: 'Phase 2: Evaluation (Q2)', priority: 'high', items: ['Automatisches Scoring', 'Benchmark-Suite', 'Domain-spezifische Tests', 'Halluzinations-Erkennung'] }, { phase: 'Phase 3: Optimierung (Q3)', priority: 'medium', items: ['Prompt-Optimierung', 'Modell-Routing', 'Fallback-Strategien', 'Caching'] }, ], technicalDetails: [ { component: 'OpenAI', technology: 'GPT-4o / o1', description: 'Haupt-Provider' }, { component: 'Anthropic', technology: 'Claude 3.5', description: 'Alternative' }, { component: 'Google', technology: 'Gemini 2.0', description: 'Multimodal' }, { component: 'Local', technology: 'Ollama', description: 'Self-hosted' }, ], auditInfo: [ { category: 'Provider-Status', items: [ { label: 'OpenAI', value: 'Aktiv', status: 'ok' }, { label: 'Anthropic', value: 'Aktiv', status: 'ok' }, { label: 'Google Gemini', value: 'Aktiv', status: 'ok' }, { label: 'Ollama (Local)', value: 'Verfuegbar', status: 'ok' }, ], }, { category: 'Kosten & Limits', items: [ { label: 'Monatliches Budget', value: 'Konfigurierbar', status: 'ok' }, { label: 'Rate Limiting', value: 'Pro Provider', status: 'ok' }, { label: 'Token Tracking', value: 'Aktiviert', status: 'ok' }, { label: 'Cost Alerts', value: 'E-Mail', status: 'ok' }, ], }, { category: 'Datenschutz', items: [ { label: 'Prompt-Logging', value: 'Optional', status: 'ok' }, { label: 'PII Detection', value: 'Geplant', status: 'warning' }, { label: 'Data Residency', value: 'EU verfuegbar', status: 'ok' }, { label: 'Audit-Log', value: 'Aktiviert', status: 'ok' }, ], }, ], fullDocumentation: `

LLM Provider Vergleich & Benchmarking

1. Uebersicht

Das LLM-Vergleichsmodul ermoeglicht den direkten Vergleich verschiedener KI-Provider hinsichtlich Qualitaet, Latenz und Kosten. Es dient der Auswahl des optimalen Modells fuer spezifische Use Cases.

2. Unterstuetzte Provider

Provider	Modelle	Staerken	Preisbereich
OpenAI	GPT-4o, GPT-4o-mini, o1	Allrounder, Coding	$0.15-15/1M Token
Anthropic	Claude 3.5 Sonnet/Haiku	Lange Kontexte, Safety	$0.25-15/1M Token
Google	Gemini 2.0 Flash/Pro	Multimodal, Speed	$0.075-5/1M Token
Ollama	Llama 3, Mistral, Phi	Lokal, Datenschutz	Nur Hardware-Kosten

3. Vergleichs-Architektur

┌────────────────────────────────────────────────────────────────┐
│                        Frontend UI                              │
│  ┌──────────────┐  ┌──────────────┐  ┌──────────────────────┐ │
│  │ Prompt Input │  │ Provider Sel │  │ Results Comparison   │ │
│  └──────────────┘  └──────────────┘  └──────────────────────┘ │
└────────────────────────────────────────────────────────────────┘
                              │
                              v
┌────────────────────────────────────────────────────────────────┐
│                    Backend Orchestrator                         │
│  ┌──────────────────────────────────────────────────────────┐ │
│  │  Parallel Request Handler  |  Response Aggregator        │ │
│  └──────────────────────────────────────────────────────────┘ │
└────────────────────────────────────────────────────────────────┘
          │              │              │              │
          v              v              v              v
     ┌─────────┐   ┌─────────┐   ┌─────────┐   ┌─────────┐
     │ OpenAI  │   │Anthropic│   │ Google  │   │ Ollama  │
     └─────────┘   └─────────┘   └─────────┘   └─────────┘

4. Metriken

Metrik	Beschreibung	Messung
TTFB	Time to First Byte	Millisekunden
Total Latency	Gesamtantwortzeit	Millisekunden
Tokens/Sekunde	Generierungsgeschwindigkeit	Output Tokens/s
Kosten	Gesamtkosten	USD
Qualitaet	Manuelle/Auto Bewertung	1-5 Sterne

5. API Endpoints

Endpoint	Methode	Beschreibung
/api/llm/compare	POST	Parallelen Vergleich starten
/api/llm/providers	GET	Verfuegbare Provider listen
/api/llm/stats	GET	Nutzungsstatistiken
/api/llm/benchmark	POST	Benchmark-Suite ausfuehren

6. Benchmark-Suite

Vordefinierte Tests fuer verschiedene Use Cases:

Summarization: Textzusammenfassung verschiedener Laengen
QA: Frage-Antwort auf Dokumenten
Coding: Code-Generierung und -Erklaerung
Classification: Textkategorisierung
Translation: Mehrsprachige Uebersetzung

7. Kostenmanagement

Budgetkontrolle
     │
     ├── Monatliches Limit pro Provider
     ├── Echtzeit-Kostentracking
     ├── Alerts bei 80%/90%/100%
     └── Auto-Fallback bei Limit

8. Datenschutz-Konfiguration

Einstellung	Optionen	Default
Prompt-Logging	Ein/Aus/Anonymisiert	Anonymisiert
Response-Speicherung	Ein/Aus/24h	24h
Metriken-Retention	30/90/365 Tage	90 Tage
PII-Filter	Ein/Aus	Ein (geplant)

9. Fehlerbehandlung

Timeout: 30 Sekunden default, konfigurierbar
Rate Limit: Automatisches Retry mit Backoff
Provider Down: Skip und Warnung
API Error: Fehlerdetails in Response

10. Best Practices

Immer mindestens 3 Provider fuer aussagekraeftigen Vergleich
Benchmark-Suite fuer reproduzierbare Ergebnisse nutzen
Kosten und Qualitaet gemeinsam bewerten
Lokale Modelle fuer sensible Daten bevorzugen

`, }