""" LLM Comparison Route - Vergleicht Antworten verschiedener LLM Backends. Dieses Modul ermoeglicht: - Parallele Anfragen an OpenAI, Claude, Self-hosted+Tavily, Self-hosted+EduSearch - Speichern von Vergleichsergebnissen fuer QA - Parameter-Tuning fuer Self-hosted Modelle """ import asyncio import logging import time import uuid from datetime import datetime, timezone from typing import Optional from pydantic import BaseModel, Field from fastapi import APIRouter, HTTPException, Depends from ..models.chat import ChatMessage from ..middleware.auth import verify_api_key logger = logging.getLogger(__name__) router = APIRouter(prefix="/comparison", tags=["LLM Comparison"]) class ComparisonRequest(BaseModel): """Request fuer LLM-Vergleich.""" prompt: str = Field(..., description="User prompt (z.B. Lehrer-Frage)") system_prompt: Optional[str] = Field(None, description="Optionaler System Prompt") enable_openai: bool = Field(True, description="OpenAI/ChatGPT aktivieren") enable_claude: bool = Field(True, description="Claude aktivieren") enable_selfhosted_tavily: bool = Field(True, description="Self-hosted + Tavily aktivieren") enable_selfhosted_edusearch: bool = Field(True, description="Self-hosted + EduSearch aktivieren") # Parameter fuer Self-hosted Modelle selfhosted_model: str = Field("llama3.2:3b", description="Self-hosted Modell") temperature: float = Field(0.7, ge=0.0, le=2.0, description="Temperature") top_p: float = Field(0.9, ge=0.0, le=1.0, description="Top-p Sampling") max_tokens: int = Field(2048, ge=1, le=8192, description="Max Tokens") # Search Parameter search_results_count: int = Field(5, ge=1, le=20, description="Anzahl Suchergebnisse") edu_search_filters: Optional[dict] = Field(None, description="Filter fuer EduSearch") class LLMResponse(BaseModel): """Antwort eines einzelnen LLM.""" provider: str model: str response: str latency_ms: int tokens_used: Optional[int] = None search_results: Optional[list] = None error: Optional[str] = None timestamp: datetime = Field(default_factory=datetime.utcnow) class ComparisonResponse(BaseModel): """Gesamt-Antwort des Vergleichs.""" comparison_id: str prompt: str system_prompt: Optional[str] responses: list[LLMResponse] created_at: datetime = Field(default_factory=datetime.utcnow) class SavedComparison(BaseModel): """Gespeicherter Vergleich fuer QA.""" comparison_id: str prompt: str system_prompt: Optional[str] responses: list[LLMResponse] notes: Optional[str] = None rating: Optional[dict] = None # {"openai": 4, "claude": 5, ...} created_at: datetime created_by: Optional[str] = None # In-Memory Storage (in Production: Database) _comparisons_store: dict[str, SavedComparison] = {} _system_prompts_store: dict[str, dict] = { "default": { "id": "default", "name": "Standard Lehrer-Assistent", "prompt": """Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland. Deine Aufgaben: - Hilfe bei der Unterrichtsplanung - Erklaerung von Fachinhalten - Erstellung von Arbeitsblaettern und Pruefungen - Beratung zu paedagogischen Methoden Antworte immer auf Deutsch und beachte den deutschen Lehrplankontext.""", "created_at": datetime.now(timezone.utc).isoformat(), }, "curriculum": { "id": "curriculum", "name": "Lehrplan-Experte", "prompt": """Du bist ein Experte fuer deutsche Lehrplaene und Bildungsstandards. Du kennst: - Lehrplaene aller 16 Bundeslaender - KMK Bildungsstandards - Kompetenzorientierung im deutschen Bildungssystem Beziehe dich immer auf konkrete Lehrplanvorgaben wenn moeglich.""", "created_at": datetime.now(timezone.utc).isoformat(), }, "worksheet": { "id": "worksheet", "name": "Arbeitsblatt-Generator", "prompt": """Du bist ein spezialisierter Assistent fuer die Erstellung von Arbeitsblaettern. Erstelle didaktisch sinnvolle Aufgaben mit: - Klaren Arbeitsanweisungen - Differenzierungsmoeglichkeiten - Loesungshinweisen Format: Markdown mit klarer Struktur.""", "created_at": datetime.now(timezone.utc).isoformat(), }, } async def _call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse: """Ruft OpenAI ChatGPT auf.""" import os import httpx start_time = time.time() api_key = os.getenv("OPENAI_API_KEY") if not api_key: return LLMResponse( provider="openai", model="gpt-4o-mini", response="", latency_ms=0, error="OPENAI_API_KEY nicht konfiguriert" ) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) try: async with httpx.AsyncClient(timeout=60.0) as client: response = await client.post( "https://api.openai.com/v1/chat/completions", headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", }, json={ "model": "gpt-4o-mini", "messages": messages, "temperature": 0.7, "max_tokens": 2048, }, ) response.raise_for_status() data = response.json() latency_ms = int((time.time() - start_time) * 1000) content = data["choices"][0]["message"]["content"] tokens = data.get("usage", {}).get("total_tokens") return LLMResponse( provider="openai", model="gpt-4o-mini", response=content, latency_ms=latency_ms, tokens_used=tokens, ) except Exception as e: return LLMResponse( provider="openai", model="gpt-4o-mini", response="", latency_ms=int((time.time() - start_time) * 1000), error=str(e), ) async def _call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse: """Ruft Anthropic Claude auf.""" import os start_time = time.time() api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: return LLMResponse( provider="claude", model="claude-3-5-sonnet-20241022", response="", latency_ms=0, error="ANTHROPIC_API_KEY nicht konfiguriert" ) try: import anthropic client = anthropic.AsyncAnthropic(api_key=api_key) response = await client.messages.create( model="claude-3-5-sonnet-20241022", max_tokens=2048, system=system_prompt or "", messages=[{"role": "user", "content": prompt}], ) latency_ms = int((time.time() - start_time) * 1000) content = response.content[0].text if response.content else "" tokens = response.usage.input_tokens + response.usage.output_tokens return LLMResponse( provider="claude", model="claude-3-5-sonnet-20241022", response=content, latency_ms=latency_ms, tokens_used=tokens, ) except Exception as e: return LLMResponse( provider="claude", model="claude-3-5-sonnet-20241022", response="", latency_ms=int((time.time() - start_time) * 1000), error=str(e), ) async def _search_tavily(query: str, count: int = 5) -> list[dict]: """Sucht mit Tavily API.""" import os import httpx api_key = os.getenv("TAVILY_API_KEY") if not api_key: return [] try: async with httpx.AsyncClient(timeout=30.0) as client: response = await client.post( "https://api.tavily.com/search", json={ "api_key": api_key, "query": query, "max_results": count, "include_domains": [ "kmk.org", "bildungsserver.de", "bpb.de", "bayern.de", "nrw.de", "berlin.de", ], }, ) response.raise_for_status() data = response.json() return data.get("results", []) except Exception as e: logger.error(f"Tavily search error: {e}") return [] async def _search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]: """Sucht mit EduSearch API.""" import os import httpx edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084") try: async with httpx.AsyncClient(timeout=30.0) as client: payload = { "q": query, "limit": count, "mode": "keyword", } if filters: payload["filters"] = filters response = await client.post( f"{edu_search_url}/v1/search", json=payload, ) response.raise_for_status() data = response.json() # Formatiere Ergebnisse results = [] for r in data.get("results", []): results.append({ "title": r.get("title", ""), "url": r.get("url", ""), "content": r.get("snippet", ""), "score": r.get("scores", {}).get("final", 0), }) return results except Exception as e: logger.error(f"EduSearch error: {e}") return [] async def _call_selfhosted_with_search( prompt: str, system_prompt: Optional[str], search_provider: str, search_results: list[dict], model: str, temperature: float, top_p: float, max_tokens: int, ) -> LLMResponse: """Ruft Self-hosted LLM mit Suchergebnissen auf.""" import os import httpx start_time = time.time() ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434") # Baue Kontext aus Suchergebnissen context_parts = [] for i, result in enumerate(search_results, 1): context_parts.append(f"[{i}] {result.get('title', 'Untitled')}") context_parts.append(f" URL: {result.get('url', '')}") context_parts.append(f" {result.get('content', '')[:500]}") context_parts.append("") search_context = "\n".join(context_parts) # Erweitere System Prompt mit Suchergebnissen augmented_system = f"""{system_prompt or ''} Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}: {search_context} Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer].""" messages = [ {"role": "system", "content": augmented_system}, {"role": "user", "content": prompt}, ] try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( f"{ollama_url}/api/chat", json={ "model": model, "messages": messages, "stream": False, "options": { "temperature": temperature, "top_p": top_p, "num_predict": max_tokens, }, }, ) response.raise_for_status() data = response.json() latency_ms = int((time.time() - start_time) * 1000) content = data.get("message", {}).get("content", "") tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0) return LLMResponse( provider=f"selfhosted_{search_provider}", model=model, response=content, latency_ms=latency_ms, tokens_used=tokens, search_results=search_results, ) except Exception as e: return LLMResponse( provider=f"selfhosted_{search_provider}", model=model, response="", latency_ms=int((time.time() - start_time) * 1000), error=str(e), search_results=search_results, ) @router.post("/run", response_model=ComparisonResponse) async def run_comparison( request: ComparisonRequest, _: str = Depends(verify_api_key), ): """ Fuehrt LLM-Vergleich durch. Sendet den Prompt parallel an alle aktivierten Provider und sammelt die Antworten. """ comparison_id = f"cmp-{uuid.uuid4().hex[:12]}" tasks = [] # System Prompt vorbereiten system_prompt = request.system_prompt # OpenAI if request.enable_openai: tasks.append(("openai", _call_openai(request.prompt, system_prompt))) # Claude if request.enable_claude: tasks.append(("claude", _call_claude(request.prompt, system_prompt))) # Self-hosted + Tavily if request.enable_selfhosted_tavily: tavily_results = await _search_tavily(request.prompt, request.search_results_count) tasks.append(( "selfhosted_tavily", _call_selfhosted_with_search( request.prompt, system_prompt, "tavily", tavily_results, request.selfhosted_model, request.temperature, request.top_p, request.max_tokens, ) )) # Self-hosted + EduSearch if request.enable_selfhosted_edusearch: edu_results = await _search_edusearch( request.prompt, request.search_results_count, request.edu_search_filters, ) tasks.append(( "selfhosted_edusearch", _call_selfhosted_with_search( request.prompt, system_prompt, "edusearch", edu_results, request.selfhosted_model, request.temperature, request.top_p, request.max_tokens, ) )) # Parallele Ausfuehrung responses = [] if tasks: results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True) for (name, _), result in zip(tasks, results): if isinstance(result, Exception): responses.append(LLMResponse( provider=name, model="unknown", response="", latency_ms=0, error=str(result), )) else: responses.append(result) return ComparisonResponse( comparison_id=comparison_id, prompt=request.prompt, system_prompt=system_prompt, responses=responses, ) @router.post("/save/{comparison_id}") async def save_comparison( comparison_id: str, comparison: ComparisonResponse, notes: Optional[str] = None, rating: Optional[dict] = None, _: str = Depends(verify_api_key), ): """Speichert einen Vergleich fuer spaetere Analyse.""" saved = SavedComparison( comparison_id=comparison_id, prompt=comparison.prompt, system_prompt=comparison.system_prompt, responses=comparison.responses, notes=notes, rating=rating, created_at=comparison.created_at, ) _comparisons_store[comparison_id] = saved return {"status": "saved", "comparison_id": comparison_id} @router.get("/history") async def get_comparison_history( limit: int = 50, _: str = Depends(verify_api_key), ): """Gibt gespeicherte Vergleiche zurueck.""" comparisons = list(_comparisons_store.values()) comparisons.sort(key=lambda x: x.created_at, reverse=True) return {"comparisons": comparisons[:limit]} @router.get("/history/{comparison_id}") async def get_comparison( comparison_id: str, _: str = Depends(verify_api_key), ): """Gibt einen bestimmten Vergleich zurueck.""" if comparison_id not in _comparisons_store: raise HTTPException(status_code=404, detail="Vergleich nicht gefunden") return _comparisons_store[comparison_id] # System Prompt Management @router.get("/prompts") async def list_system_prompts( _: str = Depends(verify_api_key), ): """Listet alle gespeicherten System Prompts.""" return {"prompts": list(_system_prompts_store.values())} @router.post("/prompts") async def create_system_prompt( name: str, prompt: str, _: str = Depends(verify_api_key), ): """Erstellt einen neuen System Prompt.""" prompt_id = f"sp-{uuid.uuid4().hex[:8]}" _system_prompts_store[prompt_id] = { "id": prompt_id, "name": name, "prompt": prompt, "created_at": datetime.now(timezone.utc).isoformat(), } return {"status": "created", "prompt_id": prompt_id} @router.put("/prompts/{prompt_id}") async def update_system_prompt( prompt_id: str, name: str, prompt: str, _: str = Depends(verify_api_key), ): """Aktualisiert einen System Prompt.""" if prompt_id not in _system_prompts_store: raise HTTPException(status_code=404, detail="System Prompt nicht gefunden") _system_prompts_store[prompt_id].update({ "name": name, "prompt": prompt, "updated_at": datetime.now(timezone.utc).isoformat(), }) return {"status": "updated", "prompt_id": prompt_id} @router.delete("/prompts/{prompt_id}") async def delete_system_prompt( prompt_id: str, _: str = Depends(verify_api_key), ): """Loescht einen System Prompt.""" if prompt_id not in _system_prompts_store: raise HTTPException(status_code=404, detail="System Prompt nicht gefunden") if prompt_id in ["default", "curriculum", "worksheet"]: raise HTTPException(status_code=400, detail="Standard-Prompts koennen nicht geloescht werden") del _system_prompts_store[prompt_id] return {"status": "deleted", "prompt_id": prompt_id} @router.get("/prompts/{prompt_id}") async def get_system_prompt( prompt_id: str, _: str = Depends(verify_api_key), ): """Gibt einen System Prompt zurueck.""" if prompt_id not in _system_prompts_store: raise HTTPException(status_code=404, detail="System Prompt nicht gefunden") return _system_prompts_store[prompt_id]