Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
585 lines
18 KiB
Python
585 lines
18 KiB
Python
"""
|
|
LLM Comparison Route - Vergleicht Antworten verschiedener LLM Backends.
|
|
|
|
Dieses Modul ermoeglicht:
|
|
- Parallele Anfragen an OpenAI, Claude, Self-hosted+Tavily, Self-hosted+EduSearch
|
|
- Speichern von Vergleichsergebnissen fuer QA
|
|
- Parameter-Tuning fuer Self-hosted Modelle
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from typing import Optional
|
|
from pydantic import BaseModel, Field
|
|
from fastapi import APIRouter, HTTPException, Depends
|
|
|
|
from ..models.chat import ChatMessage
|
|
from ..middleware.auth import verify_api_key
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/comparison", tags=["LLM Comparison"])
|
|
|
|
|
|
class ComparisonRequest(BaseModel):
|
|
"""Request fuer LLM-Vergleich."""
|
|
prompt: str = Field(..., description="User prompt (z.B. Lehrer-Frage)")
|
|
system_prompt: Optional[str] = Field(None, description="Optionaler System Prompt")
|
|
enable_openai: bool = Field(True, description="OpenAI/ChatGPT aktivieren")
|
|
enable_claude: bool = Field(True, description="Claude aktivieren")
|
|
enable_selfhosted_tavily: bool = Field(True, description="Self-hosted + Tavily aktivieren")
|
|
enable_selfhosted_edusearch: bool = Field(True, description="Self-hosted + EduSearch aktivieren")
|
|
|
|
# Parameter fuer Self-hosted Modelle
|
|
selfhosted_model: str = Field("llama3.2:3b", description="Self-hosted Modell")
|
|
temperature: float = Field(0.7, ge=0.0, le=2.0, description="Temperature")
|
|
top_p: float = Field(0.9, ge=0.0, le=1.0, description="Top-p Sampling")
|
|
max_tokens: int = Field(2048, ge=1, le=8192, description="Max Tokens")
|
|
|
|
# Search Parameter
|
|
search_results_count: int = Field(5, ge=1, le=20, description="Anzahl Suchergebnisse")
|
|
edu_search_filters: Optional[dict] = Field(None, description="Filter fuer EduSearch")
|
|
|
|
|
|
class LLMResponse(BaseModel):
|
|
"""Antwort eines einzelnen LLM."""
|
|
provider: str
|
|
model: str
|
|
response: str
|
|
latency_ms: int
|
|
tokens_used: Optional[int] = None
|
|
search_results: Optional[list] = None
|
|
error: Optional[str] = None
|
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
class ComparisonResponse(BaseModel):
|
|
"""Gesamt-Antwort des Vergleichs."""
|
|
comparison_id: str
|
|
prompt: str
|
|
system_prompt: Optional[str]
|
|
responses: list[LLMResponse]
|
|
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
class SavedComparison(BaseModel):
|
|
"""Gespeicherter Vergleich fuer QA."""
|
|
comparison_id: str
|
|
prompt: str
|
|
system_prompt: Optional[str]
|
|
responses: list[LLMResponse]
|
|
notes: Optional[str] = None
|
|
rating: Optional[dict] = None # {"openai": 4, "claude": 5, ...}
|
|
created_at: datetime
|
|
created_by: Optional[str] = None
|
|
|
|
|
|
# In-Memory Storage (in Production: Database)
|
|
_comparisons_store: dict[str, SavedComparison] = {}
|
|
_system_prompts_store: dict[str, dict] = {
|
|
"default": {
|
|
"id": "default",
|
|
"name": "Standard Lehrer-Assistent",
|
|
"prompt": """Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland.
|
|
Deine Aufgaben:
|
|
- Hilfe bei der Unterrichtsplanung
|
|
- Erklaerung von Fachinhalten
|
|
- Erstellung von Arbeitsblaettern und Pruefungen
|
|
- Beratung zu paedagogischen Methoden
|
|
|
|
Antworte immer auf Deutsch und beachte den deutschen Lehrplankontext.""",
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
},
|
|
"curriculum": {
|
|
"id": "curriculum",
|
|
"name": "Lehrplan-Experte",
|
|
"prompt": """Du bist ein Experte fuer deutsche Lehrplaene und Bildungsstandards.
|
|
Du kennst:
|
|
- Lehrplaene aller 16 Bundeslaender
|
|
- KMK Bildungsstandards
|
|
- Kompetenzorientierung im deutschen Bildungssystem
|
|
|
|
Beziehe dich immer auf konkrete Lehrplanvorgaben wenn moeglich.""",
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
},
|
|
"worksheet": {
|
|
"id": "worksheet",
|
|
"name": "Arbeitsblatt-Generator",
|
|
"prompt": """Du bist ein spezialisierter Assistent fuer die Erstellung von Arbeitsblaettern.
|
|
Erstelle didaktisch sinnvolle Aufgaben mit:
|
|
- Klaren Arbeitsanweisungen
|
|
- Differenzierungsmoeglichkeiten
|
|
- Loesungshinweisen
|
|
|
|
Format: Markdown mit klarer Struktur.""",
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
},
|
|
}
|
|
|
|
|
|
async def _call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
|
|
"""Ruft OpenAI ChatGPT auf."""
|
|
import os
|
|
import httpx
|
|
|
|
start_time = time.time()
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
if not api_key:
|
|
return LLMResponse(
|
|
provider="openai",
|
|
model="gpt-4o-mini",
|
|
response="",
|
|
latency_ms=0,
|
|
error="OPENAI_API_KEY nicht konfiguriert"
|
|
)
|
|
|
|
messages = []
|
|
if system_prompt:
|
|
messages.append({"role": "system", "content": system_prompt})
|
|
messages.append({"role": "user", "content": prompt})
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
response = await client.post(
|
|
"https://api.openai.com/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
json={
|
|
"model": "gpt-4o-mini",
|
|
"messages": messages,
|
|
"temperature": 0.7,
|
|
"max_tokens": 2048,
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
content = data["choices"][0]["message"]["content"]
|
|
tokens = data.get("usage", {}).get("total_tokens")
|
|
|
|
return LLMResponse(
|
|
provider="openai",
|
|
model="gpt-4o-mini",
|
|
response=content,
|
|
latency_ms=latency_ms,
|
|
tokens_used=tokens,
|
|
)
|
|
except Exception as e:
|
|
return LLMResponse(
|
|
provider="openai",
|
|
model="gpt-4o-mini",
|
|
response="",
|
|
latency_ms=int((time.time() - start_time) * 1000),
|
|
error=str(e),
|
|
)
|
|
|
|
|
|
async def _call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
|
|
"""Ruft Anthropic Claude auf."""
|
|
import os
|
|
|
|
start_time = time.time()
|
|
api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
|
|
if not api_key:
|
|
return LLMResponse(
|
|
provider="claude",
|
|
model="claude-3-5-sonnet-20241022",
|
|
response="",
|
|
latency_ms=0,
|
|
error="ANTHROPIC_API_KEY nicht konfiguriert"
|
|
)
|
|
|
|
try:
|
|
import anthropic
|
|
client = anthropic.AsyncAnthropic(api_key=api_key)
|
|
|
|
response = await client.messages.create(
|
|
model="claude-3-5-sonnet-20241022",
|
|
max_tokens=2048,
|
|
system=system_prompt or "",
|
|
messages=[{"role": "user", "content": prompt}],
|
|
)
|
|
|
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
content = response.content[0].text if response.content else ""
|
|
tokens = response.usage.input_tokens + response.usage.output_tokens
|
|
|
|
return LLMResponse(
|
|
provider="claude",
|
|
model="claude-3-5-sonnet-20241022",
|
|
response=content,
|
|
latency_ms=latency_ms,
|
|
tokens_used=tokens,
|
|
)
|
|
except Exception as e:
|
|
return LLMResponse(
|
|
provider="claude",
|
|
model="claude-3-5-sonnet-20241022",
|
|
response="",
|
|
latency_ms=int((time.time() - start_time) * 1000),
|
|
error=str(e),
|
|
)
|
|
|
|
|
|
async def _search_tavily(query: str, count: int = 5) -> list[dict]:
|
|
"""Sucht mit Tavily API."""
|
|
import os
|
|
import httpx
|
|
|
|
api_key = os.getenv("TAVILY_API_KEY")
|
|
if not api_key:
|
|
return []
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
response = await client.post(
|
|
"https://api.tavily.com/search",
|
|
json={
|
|
"api_key": api_key,
|
|
"query": query,
|
|
"max_results": count,
|
|
"include_domains": [
|
|
"kmk.org", "bildungsserver.de", "bpb.de",
|
|
"bayern.de", "nrw.de", "berlin.de",
|
|
],
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return data.get("results", [])
|
|
except Exception as e:
|
|
logger.error(f"Tavily search error: {e}")
|
|
return []
|
|
|
|
|
|
async def _search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]:
|
|
"""Sucht mit EduSearch API."""
|
|
import os
|
|
import httpx
|
|
|
|
edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084")
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
payload = {
|
|
"q": query,
|
|
"limit": count,
|
|
"mode": "keyword",
|
|
}
|
|
if filters:
|
|
payload["filters"] = filters
|
|
|
|
response = await client.post(
|
|
f"{edu_search_url}/v1/search",
|
|
json=payload,
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
# Formatiere Ergebnisse
|
|
results = []
|
|
for r in data.get("results", []):
|
|
results.append({
|
|
"title": r.get("title", ""),
|
|
"url": r.get("url", ""),
|
|
"content": r.get("snippet", ""),
|
|
"score": r.get("scores", {}).get("final", 0),
|
|
})
|
|
return results
|
|
except Exception as e:
|
|
logger.error(f"EduSearch error: {e}")
|
|
return []
|
|
|
|
|
|
async def _call_selfhosted_with_search(
|
|
prompt: str,
|
|
system_prompt: Optional[str],
|
|
search_provider: str,
|
|
search_results: list[dict],
|
|
model: str,
|
|
temperature: float,
|
|
top_p: float,
|
|
max_tokens: int,
|
|
) -> LLMResponse:
|
|
"""Ruft Self-hosted LLM mit Suchergebnissen auf."""
|
|
import os
|
|
import httpx
|
|
|
|
start_time = time.time()
|
|
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
|
|
|
# Baue Kontext aus Suchergebnissen
|
|
context_parts = []
|
|
for i, result in enumerate(search_results, 1):
|
|
context_parts.append(f"[{i}] {result.get('title', 'Untitled')}")
|
|
context_parts.append(f" URL: {result.get('url', '')}")
|
|
context_parts.append(f" {result.get('content', '')[:500]}")
|
|
context_parts.append("")
|
|
|
|
search_context = "\n".join(context_parts)
|
|
|
|
# Erweitere System Prompt mit Suchergebnissen
|
|
augmented_system = f"""{system_prompt or ''}
|
|
|
|
Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}:
|
|
|
|
{search_context}
|
|
|
|
Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer]."""
|
|
|
|
messages = [
|
|
{"role": "system", "content": augmented_system},
|
|
{"role": "user", "content": prompt},
|
|
]
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
response = await client.post(
|
|
f"{ollama_url}/api/chat",
|
|
json={
|
|
"model": model,
|
|
"messages": messages,
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": temperature,
|
|
"top_p": top_p,
|
|
"num_predict": max_tokens,
|
|
},
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
content = data.get("message", {}).get("content", "")
|
|
tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
|
|
|
|
return LLMResponse(
|
|
provider=f"selfhosted_{search_provider}",
|
|
model=model,
|
|
response=content,
|
|
latency_ms=latency_ms,
|
|
tokens_used=tokens,
|
|
search_results=search_results,
|
|
)
|
|
except Exception as e:
|
|
return LLMResponse(
|
|
provider=f"selfhosted_{search_provider}",
|
|
model=model,
|
|
response="",
|
|
latency_ms=int((time.time() - start_time) * 1000),
|
|
error=str(e),
|
|
search_results=search_results,
|
|
)
|
|
|
|
|
|
@router.post("/run", response_model=ComparisonResponse)
|
|
async def run_comparison(
|
|
request: ComparisonRequest,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""
|
|
Fuehrt LLM-Vergleich durch.
|
|
|
|
Sendet den Prompt parallel an alle aktivierten Provider und
|
|
sammelt die Antworten.
|
|
"""
|
|
comparison_id = f"cmp-{uuid.uuid4().hex[:12]}"
|
|
tasks = []
|
|
|
|
# System Prompt vorbereiten
|
|
system_prompt = request.system_prompt
|
|
|
|
# OpenAI
|
|
if request.enable_openai:
|
|
tasks.append(("openai", _call_openai(request.prompt, system_prompt)))
|
|
|
|
# Claude
|
|
if request.enable_claude:
|
|
tasks.append(("claude", _call_claude(request.prompt, system_prompt)))
|
|
|
|
# Self-hosted + Tavily
|
|
if request.enable_selfhosted_tavily:
|
|
tavily_results = await _search_tavily(request.prompt, request.search_results_count)
|
|
tasks.append((
|
|
"selfhosted_tavily",
|
|
_call_selfhosted_with_search(
|
|
request.prompt,
|
|
system_prompt,
|
|
"tavily",
|
|
tavily_results,
|
|
request.selfhosted_model,
|
|
request.temperature,
|
|
request.top_p,
|
|
request.max_tokens,
|
|
)
|
|
))
|
|
|
|
# Self-hosted + EduSearch
|
|
if request.enable_selfhosted_edusearch:
|
|
edu_results = await _search_edusearch(
|
|
request.prompt,
|
|
request.search_results_count,
|
|
request.edu_search_filters,
|
|
)
|
|
tasks.append((
|
|
"selfhosted_edusearch",
|
|
_call_selfhosted_with_search(
|
|
request.prompt,
|
|
system_prompt,
|
|
"edusearch",
|
|
edu_results,
|
|
request.selfhosted_model,
|
|
request.temperature,
|
|
request.top_p,
|
|
request.max_tokens,
|
|
)
|
|
))
|
|
|
|
# Parallele Ausfuehrung
|
|
responses = []
|
|
if tasks:
|
|
results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True)
|
|
for (name, _), result in zip(tasks, results):
|
|
if isinstance(result, Exception):
|
|
responses.append(LLMResponse(
|
|
provider=name,
|
|
model="unknown",
|
|
response="",
|
|
latency_ms=0,
|
|
error=str(result),
|
|
))
|
|
else:
|
|
responses.append(result)
|
|
|
|
return ComparisonResponse(
|
|
comparison_id=comparison_id,
|
|
prompt=request.prompt,
|
|
system_prompt=system_prompt,
|
|
responses=responses,
|
|
)
|
|
|
|
|
|
@router.post("/save/{comparison_id}")
|
|
async def save_comparison(
|
|
comparison_id: str,
|
|
comparison: ComparisonResponse,
|
|
notes: Optional[str] = None,
|
|
rating: Optional[dict] = None,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""Speichert einen Vergleich fuer spaetere Analyse."""
|
|
saved = SavedComparison(
|
|
comparison_id=comparison_id,
|
|
prompt=comparison.prompt,
|
|
system_prompt=comparison.system_prompt,
|
|
responses=comparison.responses,
|
|
notes=notes,
|
|
rating=rating,
|
|
created_at=comparison.created_at,
|
|
)
|
|
_comparisons_store[comparison_id] = saved
|
|
return {"status": "saved", "comparison_id": comparison_id}
|
|
|
|
|
|
@router.get("/history")
|
|
async def get_comparison_history(
|
|
limit: int = 50,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""Gibt gespeicherte Vergleiche zurueck."""
|
|
comparisons = list(_comparisons_store.values())
|
|
comparisons.sort(key=lambda x: x.created_at, reverse=True)
|
|
return {"comparisons": comparisons[:limit]}
|
|
|
|
|
|
@router.get("/history/{comparison_id}")
|
|
async def get_comparison(
|
|
comparison_id: str,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""Gibt einen bestimmten Vergleich zurueck."""
|
|
if comparison_id not in _comparisons_store:
|
|
raise HTTPException(status_code=404, detail="Vergleich nicht gefunden")
|
|
return _comparisons_store[comparison_id]
|
|
|
|
|
|
# System Prompt Management
|
|
|
|
@router.get("/prompts")
|
|
async def list_system_prompts(
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""Listet alle gespeicherten System Prompts."""
|
|
return {"prompts": list(_system_prompts_store.values())}
|
|
|
|
|
|
@router.post("/prompts")
|
|
async def create_system_prompt(
|
|
name: str,
|
|
prompt: str,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""Erstellt einen neuen System Prompt."""
|
|
prompt_id = f"sp-{uuid.uuid4().hex[:8]}"
|
|
_system_prompts_store[prompt_id] = {
|
|
"id": prompt_id,
|
|
"name": name,
|
|
"prompt": prompt,
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
}
|
|
return {"status": "created", "prompt_id": prompt_id}
|
|
|
|
|
|
@router.put("/prompts/{prompt_id}")
|
|
async def update_system_prompt(
|
|
prompt_id: str,
|
|
name: str,
|
|
prompt: str,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""Aktualisiert einen System Prompt."""
|
|
if prompt_id not in _system_prompts_store:
|
|
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
|
|
|
|
_system_prompts_store[prompt_id].update({
|
|
"name": name,
|
|
"prompt": prompt,
|
|
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
})
|
|
return {"status": "updated", "prompt_id": prompt_id}
|
|
|
|
|
|
@router.delete("/prompts/{prompt_id}")
|
|
async def delete_system_prompt(
|
|
prompt_id: str,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""Loescht einen System Prompt."""
|
|
if prompt_id not in _system_prompts_store:
|
|
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
|
|
if prompt_id in ["default", "curriculum", "worksheet"]:
|
|
raise HTTPException(status_code=400, detail="Standard-Prompts koennen nicht geloescht werden")
|
|
|
|
del _system_prompts_store[prompt_id]
|
|
return {"status": "deleted", "prompt_id": prompt_id}
|
|
|
|
|
|
@router.get("/prompts/{prompt_id}")
|
|
async def get_system_prompt(
|
|
prompt_id: str,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""Gibt einen System Prompt zurueck."""
|
|
if prompt_id not in _system_prompts_store:
|
|
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
|
|
return _system_prompts_store[prompt_id]
|