Files
breakpilot-lehrer/backend-lehrer/llm_gateway/routes/comparison.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

585 lines
18 KiB
Python

"""
LLM Comparison Route - Vergleicht Antworten verschiedener LLM Backends.
Dieses Modul ermoeglicht:
- Parallele Anfragen an OpenAI, Claude, Self-hosted+Tavily, Self-hosted+EduSearch
- Speichern von Vergleichsergebnissen fuer QA
- Parameter-Tuning fuer Self-hosted Modelle
"""
import asyncio
import logging
import time
import uuid
from datetime import datetime, timezone
from typing import Optional
from pydantic import BaseModel, Field
from fastapi import APIRouter, HTTPException, Depends
from ..models.chat import ChatMessage
from ..middleware.auth import verify_api_key
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/comparison", tags=["LLM Comparison"])
class ComparisonRequest(BaseModel):
"""Request fuer LLM-Vergleich."""
prompt: str = Field(..., description="User prompt (z.B. Lehrer-Frage)")
system_prompt: Optional[str] = Field(None, description="Optionaler System Prompt")
enable_openai: bool = Field(True, description="OpenAI/ChatGPT aktivieren")
enable_claude: bool = Field(True, description="Claude aktivieren")
enable_selfhosted_tavily: bool = Field(True, description="Self-hosted + Tavily aktivieren")
enable_selfhosted_edusearch: bool = Field(True, description="Self-hosted + EduSearch aktivieren")
# Parameter fuer Self-hosted Modelle
selfhosted_model: str = Field("llama3.2:3b", description="Self-hosted Modell")
temperature: float = Field(0.7, ge=0.0, le=2.0, description="Temperature")
top_p: float = Field(0.9, ge=0.0, le=1.0, description="Top-p Sampling")
max_tokens: int = Field(2048, ge=1, le=8192, description="Max Tokens")
# Search Parameter
search_results_count: int = Field(5, ge=1, le=20, description="Anzahl Suchergebnisse")
edu_search_filters: Optional[dict] = Field(None, description="Filter fuer EduSearch")
class LLMResponse(BaseModel):
"""Antwort eines einzelnen LLM."""
provider: str
model: str
response: str
latency_ms: int
tokens_used: Optional[int] = None
search_results: Optional[list] = None
error: Optional[str] = None
timestamp: datetime = Field(default_factory=datetime.utcnow)
class ComparisonResponse(BaseModel):
"""Gesamt-Antwort des Vergleichs."""
comparison_id: str
prompt: str
system_prompt: Optional[str]
responses: list[LLMResponse]
created_at: datetime = Field(default_factory=datetime.utcnow)
class SavedComparison(BaseModel):
"""Gespeicherter Vergleich fuer QA."""
comparison_id: str
prompt: str
system_prompt: Optional[str]
responses: list[LLMResponse]
notes: Optional[str] = None
rating: Optional[dict] = None # {"openai": 4, "claude": 5, ...}
created_at: datetime
created_by: Optional[str] = None
# In-Memory Storage (in Production: Database)
_comparisons_store: dict[str, SavedComparison] = {}
_system_prompts_store: dict[str, dict] = {
"default": {
"id": "default",
"name": "Standard Lehrer-Assistent",
"prompt": """Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland.
Deine Aufgaben:
- Hilfe bei der Unterrichtsplanung
- Erklaerung von Fachinhalten
- Erstellung von Arbeitsblaettern und Pruefungen
- Beratung zu paedagogischen Methoden
Antworte immer auf Deutsch und beachte den deutschen Lehrplankontext.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
"curriculum": {
"id": "curriculum",
"name": "Lehrplan-Experte",
"prompt": """Du bist ein Experte fuer deutsche Lehrplaene und Bildungsstandards.
Du kennst:
- Lehrplaene aller 16 Bundeslaender
- KMK Bildungsstandards
- Kompetenzorientierung im deutschen Bildungssystem
Beziehe dich immer auf konkrete Lehrplanvorgaben wenn moeglich.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
"worksheet": {
"id": "worksheet",
"name": "Arbeitsblatt-Generator",
"prompt": """Du bist ein spezialisierter Assistent fuer die Erstellung von Arbeitsblaettern.
Erstelle didaktisch sinnvolle Aufgaben mit:
- Klaren Arbeitsanweisungen
- Differenzierungsmoeglichkeiten
- Loesungshinweisen
Format: Markdown mit klarer Struktur.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
}
async def _call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
"""Ruft OpenAI ChatGPT auf."""
import os
import httpx
start_time = time.time()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response="",
latency_ms=0,
error="OPENAI_API_KEY nicht konfiguriert"
)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json={
"model": "gpt-4o-mini",
"messages": messages,
"temperature": 0.7,
"max_tokens": 2048,
},
)
response.raise_for_status()
data = response.json()
latency_ms = int((time.time() - start_time) * 1000)
content = data["choices"][0]["message"]["content"]
tokens = data.get("usage", {}).get("total_tokens")
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
)
except Exception as e:
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
)
async def _call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
"""Ruft Anthropic Claude auf."""
import os
start_time = time.time()
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response="",
latency_ms=0,
error="ANTHROPIC_API_KEY nicht konfiguriert"
)
try:
import anthropic
client = anthropic.AsyncAnthropic(api_key=api_key)
response = await client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2048,
system=system_prompt or "",
messages=[{"role": "user", "content": prompt}],
)
latency_ms = int((time.time() - start_time) * 1000)
content = response.content[0].text if response.content else ""
tokens = response.usage.input_tokens + response.usage.output_tokens
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
)
except Exception as e:
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
)
async def _search_tavily(query: str, count: int = 5) -> list[dict]:
"""Sucht mit Tavily API."""
import os
import httpx
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
return []
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"https://api.tavily.com/search",
json={
"api_key": api_key,
"query": query,
"max_results": count,
"include_domains": [
"kmk.org", "bildungsserver.de", "bpb.de",
"bayern.de", "nrw.de", "berlin.de",
],
},
)
response.raise_for_status()
data = response.json()
return data.get("results", [])
except Exception as e:
logger.error(f"Tavily search error: {e}")
return []
async def _search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]:
"""Sucht mit EduSearch API."""
import os
import httpx
edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084")
try:
async with httpx.AsyncClient(timeout=30.0) as client:
payload = {
"q": query,
"limit": count,
"mode": "keyword",
}
if filters:
payload["filters"] = filters
response = await client.post(
f"{edu_search_url}/v1/search",
json=payload,
)
response.raise_for_status()
data = response.json()
# Formatiere Ergebnisse
results = []
for r in data.get("results", []):
results.append({
"title": r.get("title", ""),
"url": r.get("url", ""),
"content": r.get("snippet", ""),
"score": r.get("scores", {}).get("final", 0),
})
return results
except Exception as e:
logger.error(f"EduSearch error: {e}")
return []
async def _call_selfhosted_with_search(
prompt: str,
system_prompt: Optional[str],
search_provider: str,
search_results: list[dict],
model: str,
temperature: float,
top_p: float,
max_tokens: int,
) -> LLMResponse:
"""Ruft Self-hosted LLM mit Suchergebnissen auf."""
import os
import httpx
start_time = time.time()
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
# Baue Kontext aus Suchergebnissen
context_parts = []
for i, result in enumerate(search_results, 1):
context_parts.append(f"[{i}] {result.get('title', 'Untitled')}")
context_parts.append(f" URL: {result.get('url', '')}")
context_parts.append(f" {result.get('content', '')[:500]}")
context_parts.append("")
search_context = "\n".join(context_parts)
# Erweitere System Prompt mit Suchergebnissen
augmented_system = f"""{system_prompt or ''}
Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}:
{search_context}
Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer]."""
messages = [
{"role": "system", "content": augmented_system},
{"role": "user", "content": prompt},
]
try:
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{ollama_url}/api/chat",
json={
"model": model,
"messages": messages,
"stream": False,
"options": {
"temperature": temperature,
"top_p": top_p,
"num_predict": max_tokens,
},
},
)
response.raise_for_status()
data = response.json()
latency_ms = int((time.time() - start_time) * 1000)
content = data.get("message", {}).get("content", "")
tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
return LLMResponse(
provider=f"selfhosted_{search_provider}",
model=model,
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
search_results=search_results,
)
except Exception as e:
return LLMResponse(
provider=f"selfhosted_{search_provider}",
model=model,
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
search_results=search_results,
)
@router.post("/run", response_model=ComparisonResponse)
async def run_comparison(
request: ComparisonRequest,
_: str = Depends(verify_api_key),
):
"""
Fuehrt LLM-Vergleich durch.
Sendet den Prompt parallel an alle aktivierten Provider und
sammelt die Antworten.
"""
comparison_id = f"cmp-{uuid.uuid4().hex[:12]}"
tasks = []
# System Prompt vorbereiten
system_prompt = request.system_prompt
# OpenAI
if request.enable_openai:
tasks.append(("openai", _call_openai(request.prompt, system_prompt)))
# Claude
if request.enable_claude:
tasks.append(("claude", _call_claude(request.prompt, system_prompt)))
# Self-hosted + Tavily
if request.enable_selfhosted_tavily:
tavily_results = await _search_tavily(request.prompt, request.search_results_count)
tasks.append((
"selfhosted_tavily",
_call_selfhosted_with_search(
request.prompt,
system_prompt,
"tavily",
tavily_results,
request.selfhosted_model,
request.temperature,
request.top_p,
request.max_tokens,
)
))
# Self-hosted + EduSearch
if request.enable_selfhosted_edusearch:
edu_results = await _search_edusearch(
request.prompt,
request.search_results_count,
request.edu_search_filters,
)
tasks.append((
"selfhosted_edusearch",
_call_selfhosted_with_search(
request.prompt,
system_prompt,
"edusearch",
edu_results,
request.selfhosted_model,
request.temperature,
request.top_p,
request.max_tokens,
)
))
# Parallele Ausfuehrung
responses = []
if tasks:
results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True)
for (name, _), result in zip(tasks, results):
if isinstance(result, Exception):
responses.append(LLMResponse(
provider=name,
model="unknown",
response="",
latency_ms=0,
error=str(result),
))
else:
responses.append(result)
return ComparisonResponse(
comparison_id=comparison_id,
prompt=request.prompt,
system_prompt=system_prompt,
responses=responses,
)
@router.post("/save/{comparison_id}")
async def save_comparison(
comparison_id: str,
comparison: ComparisonResponse,
notes: Optional[str] = None,
rating: Optional[dict] = None,
_: str = Depends(verify_api_key),
):
"""Speichert einen Vergleich fuer spaetere Analyse."""
saved = SavedComparison(
comparison_id=comparison_id,
prompt=comparison.prompt,
system_prompt=comparison.system_prompt,
responses=comparison.responses,
notes=notes,
rating=rating,
created_at=comparison.created_at,
)
_comparisons_store[comparison_id] = saved
return {"status": "saved", "comparison_id": comparison_id}
@router.get("/history")
async def get_comparison_history(
limit: int = 50,
_: str = Depends(verify_api_key),
):
"""Gibt gespeicherte Vergleiche zurueck."""
comparisons = list(_comparisons_store.values())
comparisons.sort(key=lambda x: x.created_at, reverse=True)
return {"comparisons": comparisons[:limit]}
@router.get("/history/{comparison_id}")
async def get_comparison(
comparison_id: str,
_: str = Depends(verify_api_key),
):
"""Gibt einen bestimmten Vergleich zurueck."""
if comparison_id not in _comparisons_store:
raise HTTPException(status_code=404, detail="Vergleich nicht gefunden")
return _comparisons_store[comparison_id]
# System Prompt Management
@router.get("/prompts")
async def list_system_prompts(
_: str = Depends(verify_api_key),
):
"""Listet alle gespeicherten System Prompts."""
return {"prompts": list(_system_prompts_store.values())}
@router.post("/prompts")
async def create_system_prompt(
name: str,
prompt: str,
_: str = Depends(verify_api_key),
):
"""Erstellt einen neuen System Prompt."""
prompt_id = f"sp-{uuid.uuid4().hex[:8]}"
_system_prompts_store[prompt_id] = {
"id": prompt_id,
"name": name,
"prompt": prompt,
"created_at": datetime.now(timezone.utc).isoformat(),
}
return {"status": "created", "prompt_id": prompt_id}
@router.put("/prompts/{prompt_id}")
async def update_system_prompt(
prompt_id: str,
name: str,
prompt: str,
_: str = Depends(verify_api_key),
):
"""Aktualisiert einen System Prompt."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
_system_prompts_store[prompt_id].update({
"name": name,
"prompt": prompt,
"updated_at": datetime.now(timezone.utc).isoformat(),
})
return {"status": "updated", "prompt_id": prompt_id}
@router.delete("/prompts/{prompt_id}")
async def delete_system_prompt(
prompt_id: str,
_: str = Depends(verify_api_key),
):
"""Loescht einen System Prompt."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
if prompt_id in ["default", "curriculum", "worksheet"]:
raise HTTPException(status_code=400, detail="Standard-Prompts koennen nicht geloescht werden")
del _system_prompts_store[prompt_id]
return {"status": "deleted", "prompt_id": prompt_id}
@router.get("/prompts/{prompt_id}")
async def get_system_prompt(
prompt_id: str,
_: str = Depends(verify_api_key),
):
"""Gibt einen System Prompt zurueck."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
return _system_prompts_store[prompt_id]