Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
242 lines
12 KiB
TypeScript
242 lines
12 KiB
TypeScript
import type { SystemInfoConfig } from './types'
|
|
|
|
export const ragConfig: SystemInfoConfig = {
|
|
title: 'RAG System-Info',
|
|
description: 'Retrieval Augmented Generation fuer intelligente Dokumentensuche.',
|
|
version: '2.5',
|
|
architecture: {
|
|
layers: [
|
|
{ title: 'RAG UI', components: ['Chat Interface', 'Document Viewer', 'Source Citations'], color: '#3b82f6' },
|
|
{ title: 'RAG Pipeline', components: ['Query Processor', 'Retriever', 'Generator'], color: '#8b5cf6' },
|
|
{ title: 'Vector Store', components: ['Qdrant', 'Embedding Service', 'Index Manager'], color: '#10b981' },
|
|
{ title: 'Document Store', components: ['MinIO', 'PostgreSQL', 'Chunk Cache'], color: '#f59e0b' },
|
|
],
|
|
},
|
|
features: [
|
|
{ name: 'Semantic Search', status: 'active', description: 'Bedeutungsbasierte Suche' },
|
|
{ name: 'Source Citations', status: 'active', description: 'Quellenangaben in Antworten' },
|
|
{ name: 'Multi-Document', status: 'active', description: 'Suche ueber mehrere Dokumente' },
|
|
{ name: 'Hybrid Search', status: 'active', description: 'Kombination semantisch + keyword' },
|
|
{ name: 'Conversation Memory', status: 'planned', description: 'Kontextbewahrung' },
|
|
],
|
|
roadmap: [
|
|
{ phase: 'Phase 1: Quality (Q1)', priority: 'high', items: ['Relevanz-Tuning', 'Chunk Optimization', 'Re-Ranking', 'Answer Quality'] },
|
|
{ phase: 'Phase 2: Scale (Q2)', priority: 'high', items: ['Index Sharding', 'Cache Layer', 'Batch Processing', 'Multi-Tenant'] },
|
|
{ phase: 'Phase 3: Intelligence (Q3)', priority: 'medium', items: ['Query Expansion', 'Self-Reflection', 'Multi-Hop QA', 'Knowledge Graph'] },
|
|
],
|
|
technicalDetails: [
|
|
{ component: 'Vector DB', technology: 'Qdrant', version: '1.12+', description: 'Similarity Search' },
|
|
{ component: 'Embeddings', technology: 'OpenAI', description: 'text-embedding-3-large' },
|
|
{ component: 'LLM', technology: 'GPT-4o / Claude', description: 'Answer Generation' },
|
|
{ component: 'Chunking', technology: 'LangChain', description: 'Document Processing' },
|
|
],
|
|
privacyNotes: [
|
|
'Dokumente werden lokal verarbeitet',
|
|
'Embeddings enthalten keine Klartextdaten',
|
|
'LLM-Anfragen sind tenant-isoliert',
|
|
],
|
|
auditInfo: [
|
|
{
|
|
category: 'RAG Pipeline',
|
|
items: [
|
|
{ label: 'Indexierte Dokumente', value: 'Tracking', status: 'ok' },
|
|
{ label: 'Vector DB Status', value: 'Qdrant Online', status: 'ok' },
|
|
{ label: 'Embedding Model', value: 'text-embedding-3', status: 'ok' },
|
|
{ label: 'LLM Provider', value: 'Multi-Provider', status: 'ok' },
|
|
],
|
|
},
|
|
{
|
|
category: 'Qualitaet',
|
|
items: [
|
|
{ label: 'Retrieval Precision', value: 'Tracking', status: 'ok' },
|
|
{ label: 'Answer Accuracy', value: 'Evaluation', status: 'ok' },
|
|
{ label: 'Halluzinations-Rate', value: 'Monitoring', status: 'ok' },
|
|
{ label: 'Citation Accuracy', value: '> 95%', status: 'ok' },
|
|
],
|
|
},
|
|
{
|
|
category: 'Performance',
|
|
items: [
|
|
{ label: 'Avg. Latency', value: '< 3s', status: 'ok' },
|
|
{ label: 'Cache Hit Rate', value: '> 30%', status: 'ok' },
|
|
{ label: 'Concurrent Users', value: 'Skalierbar', status: 'ok' },
|
|
{ label: 'Cost per Query', value: 'Tracking', status: 'ok' },
|
|
],
|
|
},
|
|
],
|
|
fullDocumentation: `
|
|
<h2>RAG System - Retrieval Augmented Generation</h2>
|
|
|
|
<h3>1. Uebersicht</h3>
|
|
<p>Das RAG-System kombiniert Dokumenten-Retrieval mit LLM-Generation fuer praezise, quellenbasierte Antworten auf Nutzeranfragen.</p>
|
|
|
|
<h3>2. RAG-Pipeline</h3>
|
|
<pre>
|
|
User Query
|
|
│
|
|
v
|
|
┌───────────────────────────────────────────────────────────────┐
|
|
│ Query Processing │
|
|
│ ├── Query Analysis (Intent, Entities) │
|
|
│ ├── Query Expansion (Synonyme, Reformulierung) │
|
|
│ └── Embedding Generation │
|
|
└────────────────────────────┬──────────────────────────────────┘
|
|
│
|
|
v
|
|
┌───────────────────────────────────────────────────────────────┐
|
|
│ Retrieval │
|
|
│ ┌─────────────────┐ ┌─────────────────┐ │
|
|
│ │ Vector Search │ │ Keyword Search │ │
|
|
│ │ (Qdrant) │ │ (PostgreSQL) │ │
|
|
│ └────────┬────────┘ └────────┬────────┘ │
|
|
│ │ │ │
|
|
│ └──────────┬───────────┘ │
|
|
│ v │
|
|
│ ┌─────────────────────┐ │
|
|
│ │ Hybrid Fusion │ │
|
|
│ │ (RRF Algorithm) │ │
|
|
│ └─────────────────────┘ │
|
|
└────────────────────────┬──────────────────────────────────────┘
|
|
│
|
|
v
|
|
┌───────────────────────────────────────────────────────────────┐
|
|
│ Re-Ranking │
|
|
│ ├── Cross-Encoder Scoring │
|
|
│ ├── Metadata Filtering │
|
|
│ └── Diversity Sampling │
|
|
└────────────────────────┬──────────────────────────────────────┘
|
|
│
|
|
v
|
|
┌───────────────────────────────────────────────────────────────┐
|
|
│ Generation │
|
|
│ ├── Context Assembly │
|
|
│ ├── Prompt Engineering │
|
|
│ ├── LLM Call (GPT-4o / Claude) │
|
|
│ └── Citation Extraction │
|
|
└────────────────────────┬──────────────────────────────────────┘
|
|
│
|
|
v
|
|
┌───────────────────────────────────────────────────────────────┐
|
|
│ Response │
|
|
│ ├── Answer Text │
|
|
│ ├── Source Citations │
|
|
│ └── Confidence Score │
|
|
└───────────────────────────────────────────────────────────────┘
|
|
</pre>
|
|
|
|
<h3>3. Chunking-Strategie</h3>
|
|
<table>
|
|
<tr><th>Parameter</th><th>Wert</th><th>Grund</th></tr>
|
|
<tr><td>Chunk Size</td><td>512 Tokens</td><td>Optimal fuer Retrieval</td></tr>
|
|
<tr><td>Overlap</td><td>50 Tokens</td><td>Kontext-Erhalt</td></tr>
|
|
<tr><td>Splitter</td><td>Semantic</td><td>Natuerliche Grenzen</td></tr>
|
|
<tr><td>Metadata</td><td>Document, Page, Section</td><td>Zitation</td></tr>
|
|
</table>
|
|
|
|
<h3>4. API Endpoints</h3>
|
|
<table>
|
|
<tr><th>Endpoint</th><th>Methode</th><th>Beschreibung</th></tr>
|
|
<tr><td>/api/rag/query</td><td>POST</td><td>Frage stellen</td></tr>
|
|
<tr><td>/api/rag/documents</td><td>GET</td><td>Dokumente listen</td></tr>
|
|
<tr><td>/api/rag/documents</td><td>POST</td><td>Dokument indexieren</td></tr>
|
|
<tr><td>/api/rag/collections</td><td>GET</td><td>Sammlungen</td></tr>
|
|
<tr><td>/api/rag/evaluate</td><td>POST</td><td>Antwort bewerten</td></tr>
|
|
</table>
|
|
|
|
<h3>5. Prompt-Template</h3>
|
|
<pre>
|
|
System:
|
|
Du bist ein hilfreicher Assistent fuer Bildungsinhalte.
|
|
Beantworte Fragen basierend auf den gegebenen Dokumenten.
|
|
Zitiere immer die Quellen in [1], [2] Format.
|
|
Wenn du dir nicht sicher bist, sage es.
|
|
|
|
Context:
|
|
[1] {chunk_1_text}
|
|
[2] {chunk_2_text}
|
|
[3] {chunk_3_text}
|
|
|
|
User:
|
|
{user_question}
|
|
|
|
Regeln:
|
|
- Nur Informationen aus dem Kontext verwenden
|
|
- Bei Unsicherheit "Ich weiss nicht" sagen
|
|
- Quellen mit [n] zitieren
|
|
- Praezise und strukturiert antworten
|
|
</pre>
|
|
|
|
<h3>6. Qualitaets-Metriken</h3>
|
|
<table>
|
|
<tr><th>Metrik</th><th>Beschreibung</th><th>Ziel</th></tr>
|
|
<tr><td>Precision@K</td><td>Relevante unter Top-K</td><td>> 80%</td></tr>
|
|
<tr><td>Recall@K</td><td>Gefundene Relevante</td><td>> 70%</td></tr>
|
|
<tr><td>NDCG</td><td>Ranking-Qualitaet</td><td>> 0.85</td></tr>
|
|
<tr><td>Answer Relevance</td><td>LLM-Evaluation</td><td>> 4/5</td></tr>
|
|
<tr><td>Faithfulness</td><td>Fakten-Treue</td><td>> 95%</td></tr>
|
|
</table>
|
|
|
|
<h3>7. Caching</h3>
|
|
<pre>
|
|
Cache-Strategie:
|
|
├── Query Cache
|
|
│ ├── Key: Query Embedding Hash
|
|
│ ├── TTL: 24 Stunden
|
|
│ └── Invalidierung: Bei Index-Update
|
|
│
|
|
├── Retrieval Cache
|
|
│ ├── Key: Query + Collection
|
|
│ ├── TTL: 1 Stunde
|
|
│ └── Top-K Results
|
|
│
|
|
└── Answer Cache
|
|
├── Key: Query Hash + Context Hash
|
|
├── TTL: 24 Stunden
|
|
└── Full Response
|
|
</pre>
|
|
|
|
<h3>8. Multi-Tenant</h3>
|
|
<ul>
|
|
<li><strong>Collection per Tenant:</strong> Isolierte Vektoren</li>
|
|
<li><strong>Access Control:</strong> RBAC auf Collection-Ebene</li>
|
|
<li><strong>Query Routing:</strong> Automatisch zu Tenant-Collection</li>
|
|
<li><strong>Audit-Log:</strong> Pro Tenant</li>
|
|
</ul>
|
|
|
|
<h3>9. Evaluation-Pipeline</h3>
|
|
<pre>
|
|
Test-Datensatz
|
|
│
|
|
├── Frage-Antwort-Paare (Golden Set)
|
|
│
|
|
v
|
|
┌───────────────────────────────────────┐
|
|
│ RAG Pipeline │
|
|
└─────────────────┬─────────────────────┘
|
|
│
|
|
v
|
|
┌───────────────────────────────────────┐
|
|
│ Evaluation Metrics │
|
|
│ ├── Retrieval: Precision, Recall │
|
|
│ ├── Generation: BLEU, ROUGE │
|
|
│ ├── LLM-as-Judge: Relevance, Correct │
|
|
│ └── Human Eval: Subset │
|
|
└─────────────────┬─────────────────────┘
|
|
│
|
|
v
|
|
┌───────────────────────────────────────┐
|
|
│ Report & Regression Detection │
|
|
└───────────────────────────────────────┘
|
|
</pre>
|
|
|
|
<h3>10. Optimierungs-Roadmap</h3>
|
|
<ol>
|
|
<li><strong>Hybrid Search:</strong> Kombination Vektor + Keyword (✓)</li>
|
|
<li><strong>Re-Ranking:</strong> Cross-Encoder (✓)</li>
|
|
<li><strong>Query Expansion:</strong> LLM-basiert (In Arbeit)</li>
|
|
<li><strong>Multi-Hop QA:</strong> Verkettete Abfragen (Geplant)</li>
|
|
<li><strong>Self-Reflection:</strong> Antwort-Pruefung (Geplant)</li>
|
|
</ol>
|
|
`,
|
|
}
|