""" LLM Comparison - Provider-Aufrufe (OpenAI, Claude, Self-hosted, Search). """ import logging import time from typing import Optional from .comparison_models import LLMResponse logger = logging.getLogger(__name__) async def call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse: """Ruft OpenAI ChatGPT auf.""" import os import httpx start_time = time.time() api_key = os.getenv("OPENAI_API_KEY") if not api_key: return LLMResponse( provider="openai", model="gpt-4o-mini", response="", latency_ms=0, error="OPENAI_API_KEY nicht konfiguriert" ) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) try: async with httpx.AsyncClient(timeout=60.0) as client: response = await client.post( "https://api.openai.com/v1/chat/completions", headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", }, json={ "model": "gpt-4o-mini", "messages": messages, "temperature": 0.7, "max_tokens": 2048, }, ) response.raise_for_status() data = response.json() latency_ms = int((time.time() - start_time) * 1000) content = data["choices"][0]["message"]["content"] tokens = data.get("usage", {}).get("total_tokens") return LLMResponse( provider="openai", model="gpt-4o-mini", response=content, latency_ms=latency_ms, tokens_used=tokens, ) except Exception as e: return LLMResponse( provider="openai", model="gpt-4o-mini", response="", latency_ms=int((time.time() - start_time) * 1000), error=str(e), ) async def call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse: """Ruft Anthropic Claude auf.""" import os start_time = time.time() api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: return LLMResponse( provider="claude", model="claude-3-5-sonnet-20241022", response="", latency_ms=0, error="ANTHROPIC_API_KEY nicht konfiguriert" ) try: import anthropic client = anthropic.AsyncAnthropic(api_key=api_key) response = await client.messages.create( model="claude-3-5-sonnet-20241022", max_tokens=2048, system=system_prompt or "", messages=[{"role": "user", "content": prompt}], ) latency_ms = int((time.time() - start_time) * 1000) content = response.content[0].text if response.content else "" tokens = response.usage.input_tokens + response.usage.output_tokens return LLMResponse( provider="claude", model="claude-3-5-sonnet-20241022", response=content, latency_ms=latency_ms, tokens_used=tokens, ) except Exception as e: return LLMResponse( provider="claude", model="claude-3-5-sonnet-20241022", response="", latency_ms=int((time.time() - start_time) * 1000), error=str(e), ) async def search_tavily(query: str, count: int = 5) -> list[dict]: """Sucht mit Tavily API.""" import os import httpx api_key = os.getenv("TAVILY_API_KEY") if not api_key: return [] try: async with httpx.AsyncClient(timeout=30.0) as client: response = await client.post( "https://api.tavily.com/search", json={ "api_key": api_key, "query": query, "max_results": count, "include_domains": [ "kmk.org", "bildungsserver.de", "bpb.de", "bayern.de", "nrw.de", "berlin.de", ], }, ) response.raise_for_status() data = response.json() return data.get("results", []) except Exception as e: logger.error(f"Tavily search error: {e}") return [] async def search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]: """Sucht mit EduSearch API.""" import os import httpx edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084") try: async with httpx.AsyncClient(timeout=30.0) as client: payload = { "q": query, "limit": count, "mode": "keyword", } if filters: payload["filters"] = filters response = await client.post( f"{edu_search_url}/v1/search", json=payload, ) response.raise_for_status() data = response.json() results = [] for r in data.get("results", []): results.append({ "title": r.get("title", ""), "url": r.get("url", ""), "content": r.get("snippet", ""), "score": r.get("scores", {}).get("final", 0), }) return results except Exception as e: logger.error(f"EduSearch error: {e}") return [] async def call_selfhosted_with_search( prompt: str, system_prompt: Optional[str], search_provider: str, search_results: list[dict], model: str, temperature: float, top_p: float, max_tokens: int, ) -> LLMResponse: """Ruft Self-hosted LLM mit Suchergebnissen auf.""" import os import httpx start_time = time.time() ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434") # Baue Kontext aus Suchergebnissen context_parts = [] for i, result in enumerate(search_results, 1): context_parts.append(f"[{i}] {result.get('title', 'Untitled')}") context_parts.append(f" URL: {result.get('url', '')}") context_parts.append(f" {result.get('content', '')[:500]}") context_parts.append("") search_context = "\n".join(context_parts) augmented_system = f"""{system_prompt or ''} Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}: {search_context} Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer].""" messages = [ {"role": "system", "content": augmented_system}, {"role": "user", "content": prompt}, ] try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( f"{ollama_url}/api/chat", json={ "model": model, "messages": messages, "stream": False, "options": { "temperature": temperature, "top_p": top_p, "num_predict": max_tokens, }, }, ) response.raise_for_status() data = response.json() latency_ms = int((time.time() - start_time) * 1000) content = data.get("message", {}).get("content", "") tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0) return LLMResponse( provider=f"selfhosted_{search_provider}", model=model, response=content, latency_ms=latency_ms, tokens_used=tokens, search_results=search_results, ) except Exception as e: return LLMResponse( provider=f"selfhosted_{search_provider}", model=model, response="", latency_ms=int((time.time() - start_time) * 1000), error=str(e), search_results=search_results, )