Files
breakpilot-lehrer/website/components/admin/system-info-configs/rag-config.ts
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

242 lines
12 KiB
TypeScript

import type { SystemInfoConfig } from './types'
export const ragConfig: SystemInfoConfig = {
title: 'RAG System-Info',
description: 'Retrieval Augmented Generation fuer intelligente Dokumentensuche.',
version: '2.5',
architecture: {
layers: [
{ title: 'RAG UI', components: ['Chat Interface', 'Document Viewer', 'Source Citations'], color: '#3b82f6' },
{ title: 'RAG Pipeline', components: ['Query Processor', 'Retriever', 'Generator'], color: '#8b5cf6' },
{ title: 'Vector Store', components: ['Qdrant', 'Embedding Service', 'Index Manager'], color: '#10b981' },
{ title: 'Document Store', components: ['MinIO', 'PostgreSQL', 'Chunk Cache'], color: '#f59e0b' },
],
},
features: [
{ name: 'Semantic Search', status: 'active', description: 'Bedeutungsbasierte Suche' },
{ name: 'Source Citations', status: 'active', description: 'Quellenangaben in Antworten' },
{ name: 'Multi-Document', status: 'active', description: 'Suche ueber mehrere Dokumente' },
{ name: 'Hybrid Search', status: 'active', description: 'Kombination semantisch + keyword' },
{ name: 'Conversation Memory', status: 'planned', description: 'Kontextbewahrung' },
],
roadmap: [
{ phase: 'Phase 1: Quality (Q1)', priority: 'high', items: ['Relevanz-Tuning', 'Chunk Optimization', 'Re-Ranking', 'Answer Quality'] },
{ phase: 'Phase 2: Scale (Q2)', priority: 'high', items: ['Index Sharding', 'Cache Layer', 'Batch Processing', 'Multi-Tenant'] },
{ phase: 'Phase 3: Intelligence (Q3)', priority: 'medium', items: ['Query Expansion', 'Self-Reflection', 'Multi-Hop QA', 'Knowledge Graph'] },
],
technicalDetails: [
{ component: 'Vector DB', technology: 'Qdrant', version: '1.12+', description: 'Similarity Search' },
{ component: 'Embeddings', technology: 'OpenAI', description: 'text-embedding-3-large' },
{ component: 'LLM', technology: 'GPT-4o / Claude', description: 'Answer Generation' },
{ component: 'Chunking', technology: 'LangChain', description: 'Document Processing' },
],
privacyNotes: [
'Dokumente werden lokal verarbeitet',
'Embeddings enthalten keine Klartextdaten',
'LLM-Anfragen sind tenant-isoliert',
],
auditInfo: [
{
category: 'RAG Pipeline',
items: [
{ label: 'Indexierte Dokumente', value: 'Tracking', status: 'ok' },
{ label: 'Vector DB Status', value: 'Qdrant Online', status: 'ok' },
{ label: 'Embedding Model', value: 'text-embedding-3', status: 'ok' },
{ label: 'LLM Provider', value: 'Multi-Provider', status: 'ok' },
],
},
{
category: 'Qualitaet',
items: [
{ label: 'Retrieval Precision', value: 'Tracking', status: 'ok' },
{ label: 'Answer Accuracy', value: 'Evaluation', status: 'ok' },
{ label: 'Halluzinations-Rate', value: 'Monitoring', status: 'ok' },
{ label: 'Citation Accuracy', value: '> 95%', status: 'ok' },
],
},
{
category: 'Performance',
items: [
{ label: 'Avg. Latency', value: '< 3s', status: 'ok' },
{ label: 'Cache Hit Rate', value: '> 30%', status: 'ok' },
{ label: 'Concurrent Users', value: 'Skalierbar', status: 'ok' },
{ label: 'Cost per Query', value: 'Tracking', status: 'ok' },
],
},
],
fullDocumentation: `
<h2>RAG System - Retrieval Augmented Generation</h2>
<h3>1. Uebersicht</h3>
<p>Das RAG-System kombiniert Dokumenten-Retrieval mit LLM-Generation fuer praezise, quellenbasierte Antworten auf Nutzeranfragen.</p>
<h3>2. RAG-Pipeline</h3>
<pre>
User Query
v
┌───────────────────────────────────────────────────────────────┐
│ Query Processing │
│ ├── Query Analysis (Intent, Entities) │
│ ├── Query Expansion (Synonyme, Reformulierung) │
│ └── Embedding Generation │
└────────────────────────────┬──────────────────────────────────┘
v
┌───────────────────────────────────────────────────────────────┐
│ Retrieval │
│ ┌─────────────────┐ ┌─────────────────┐ │
│ │ Vector Search │ │ Keyword Search │ │
│ │ (Qdrant) │ │ (PostgreSQL) │ │
│ └────────┬────────┘ └────────┬────────┘ │
│ │ │ │
│ └──────────┬───────────┘ │
│ v │
│ ┌─────────────────────┐ │
│ │ Hybrid Fusion │ │
│ │ (RRF Algorithm) │ │
│ └─────────────────────┘ │
└────────────────────────┬──────────────────────────────────────┘
v
┌───────────────────────────────────────────────────────────────┐
│ Re-Ranking │
│ ├── Cross-Encoder Scoring │
│ ├── Metadata Filtering │
│ └── Diversity Sampling │
└────────────────────────┬──────────────────────────────────────┘
v
┌───────────────────────────────────────────────────────────────┐
│ Generation │
│ ├── Context Assembly │
│ ├── Prompt Engineering │
│ ├── LLM Call (GPT-4o / Claude) │
│ └── Citation Extraction │
└────────────────────────┬──────────────────────────────────────┘
v
┌───────────────────────────────────────────────────────────────┐
│ Response │
│ ├── Answer Text │
│ ├── Source Citations │
│ └── Confidence Score │
└───────────────────────────────────────────────────────────────┘
</pre>
<h3>3. Chunking-Strategie</h3>
<table>
<tr><th>Parameter</th><th>Wert</th><th>Grund</th></tr>
<tr><td>Chunk Size</td><td>512 Tokens</td><td>Optimal fuer Retrieval</td></tr>
<tr><td>Overlap</td><td>50 Tokens</td><td>Kontext-Erhalt</td></tr>
<tr><td>Splitter</td><td>Semantic</td><td>Natuerliche Grenzen</td></tr>
<tr><td>Metadata</td><td>Document, Page, Section</td><td>Zitation</td></tr>
</table>
<h3>4. API Endpoints</h3>
<table>
<tr><th>Endpoint</th><th>Methode</th><th>Beschreibung</th></tr>
<tr><td>/api/rag/query</td><td>POST</td><td>Frage stellen</td></tr>
<tr><td>/api/rag/documents</td><td>GET</td><td>Dokumente listen</td></tr>
<tr><td>/api/rag/documents</td><td>POST</td><td>Dokument indexieren</td></tr>
<tr><td>/api/rag/collections</td><td>GET</td><td>Sammlungen</td></tr>
<tr><td>/api/rag/evaluate</td><td>POST</td><td>Antwort bewerten</td></tr>
</table>
<h3>5. Prompt-Template</h3>
<pre>
System:
Du bist ein hilfreicher Assistent fuer Bildungsinhalte.
Beantworte Fragen basierend auf den gegebenen Dokumenten.
Zitiere immer die Quellen in [1], [2] Format.
Wenn du dir nicht sicher bist, sage es.
Context:
[1] {chunk_1_text}
[2] {chunk_2_text}
[3] {chunk_3_text}
User:
{user_question}
Regeln:
- Nur Informationen aus dem Kontext verwenden
- Bei Unsicherheit "Ich weiss nicht" sagen
- Quellen mit [n] zitieren
- Praezise und strukturiert antworten
</pre>
<h3>6. Qualitaets-Metriken</h3>
<table>
<tr><th>Metrik</th><th>Beschreibung</th><th>Ziel</th></tr>
<tr><td>Precision@K</td><td>Relevante unter Top-K</td><td>> 80%</td></tr>
<tr><td>Recall@K</td><td>Gefundene Relevante</td><td>> 70%</td></tr>
<tr><td>NDCG</td><td>Ranking-Qualitaet</td><td>> 0.85</td></tr>
<tr><td>Answer Relevance</td><td>LLM-Evaluation</td><td>> 4/5</td></tr>
<tr><td>Faithfulness</td><td>Fakten-Treue</td><td>> 95%</td></tr>
</table>
<h3>7. Caching</h3>
<pre>
Cache-Strategie:
├── Query Cache
│ ├── Key: Query Embedding Hash
│ ├── TTL: 24 Stunden
│ └── Invalidierung: Bei Index-Update
├── Retrieval Cache
│ ├── Key: Query + Collection
│ ├── TTL: 1 Stunde
│ └── Top-K Results
└── Answer Cache
├── Key: Query Hash + Context Hash
├── TTL: 24 Stunden
└── Full Response
</pre>
<h3>8. Multi-Tenant</h3>
<ul>
<li><strong>Collection per Tenant:</strong> Isolierte Vektoren</li>
<li><strong>Access Control:</strong> RBAC auf Collection-Ebene</li>
<li><strong>Query Routing:</strong> Automatisch zu Tenant-Collection</li>
<li><strong>Audit-Log:</strong> Pro Tenant</li>
</ul>
<h3>9. Evaluation-Pipeline</h3>
<pre>
Test-Datensatz
├── Frage-Antwort-Paare (Golden Set)
v
┌───────────────────────────────────────┐
│ RAG Pipeline │
└─────────────────┬─────────────────────┘
v
┌───────────────────────────────────────┐
│ Evaluation Metrics │
│ ├── Retrieval: Precision, Recall │
│ ├── Generation: BLEU, ROUGE │
│ ├── LLM-as-Judge: Relevance, Correct │
│ └── Human Eval: Subset │
└─────────────────┬─────────────────────┘
v
┌───────────────────────────────────────┐
│ Report & Regression Detection │
└───────────────────────────────────────┘
</pre>
<h3>10. Optimierungs-Roadmap</h3>
<ol>
<li><strong>Hybrid Search:</strong> Kombination Vektor + Keyword (✓)</li>
<li><strong>Re-Ranking:</strong> Cross-Encoder (✓)</li>
<li><strong>Query Expansion:</strong> LLM-basiert (In Arbeit)</li>
<li><strong>Multi-Hop QA:</strong> Verkettete Abfragen (Geplant)</li>
<li><strong>Self-Reflection:</strong> Antwort-Pruefung (Geplant)</li>
</ol>
`,
}