Files
breakpilot-lehrer/backend-lehrer/llm_gateway/routes/comparison_providers.py
Benjamin Admin bd4b956e3c [split-required] Split final 43 files (500-668 LOC) to complete refactoring
klausur-service (11 files):
- cv_gutter_repair, ocr_pipeline_regression, upload_api
- ocr_pipeline_sessions, smart_spell, nru_worksheet_generator
- ocr_pipeline_overlays, mail/aggregator, zeugnis_api
- cv_syllable_detect, self_rag

backend-lehrer (17 files):
- classroom_engine/suggestions, generators/quiz_generator
- worksheets_api, llm_gateway/comparison, state_engine_api
- classroom/models (→ 4 submodules), services/file_processor
- alerts_agent/api/wizard+digests+routes, content_generators/pdf
- classroom/routes/sessions, llm_gateway/inference
- classroom_engine/analytics, auth/keycloak_auth
- alerts_agent/processing/rule_engine, ai_processor/print_versions

agent-core (5 files):
- brain/memory_store, brain/knowledge_graph, brain/context_manager
- orchestrator/supervisor, sessions/session_manager

admin-lehrer (5 components):
- GridOverlay, StepGridReview, DevOpsPipelineSidebar
- DataFlowDiagram, sbom/wizard/page

website (2 files):
- DependencyMap, lehrer/abitur-archiv

Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 09:41:42 +02:00

271 lines
8.2 KiB
Python

"""
LLM Comparison - Provider-Aufrufe (OpenAI, Claude, Self-hosted, Search).
"""
import logging
import time
from typing import Optional
from .comparison_models import LLMResponse
logger = logging.getLogger(__name__)
async def call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
"""Ruft OpenAI ChatGPT auf."""
import os
import httpx
start_time = time.time()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response="",
latency_ms=0,
error="OPENAI_API_KEY nicht konfiguriert"
)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json={
"model": "gpt-4o-mini",
"messages": messages,
"temperature": 0.7,
"max_tokens": 2048,
},
)
response.raise_for_status()
data = response.json()
latency_ms = int((time.time() - start_time) * 1000)
content = data["choices"][0]["message"]["content"]
tokens = data.get("usage", {}).get("total_tokens")
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
)
except Exception as e:
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
)
async def call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
"""Ruft Anthropic Claude auf."""
import os
start_time = time.time()
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response="",
latency_ms=0,
error="ANTHROPIC_API_KEY nicht konfiguriert"
)
try:
import anthropic
client = anthropic.AsyncAnthropic(api_key=api_key)
response = await client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2048,
system=system_prompt or "",
messages=[{"role": "user", "content": prompt}],
)
latency_ms = int((time.time() - start_time) * 1000)
content = response.content[0].text if response.content else ""
tokens = response.usage.input_tokens + response.usage.output_tokens
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
)
except Exception as e:
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
)
async def search_tavily(query: str, count: int = 5) -> list[dict]:
"""Sucht mit Tavily API."""
import os
import httpx
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
return []
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"https://api.tavily.com/search",
json={
"api_key": api_key,
"query": query,
"max_results": count,
"include_domains": [
"kmk.org", "bildungsserver.de", "bpb.de",
"bayern.de", "nrw.de", "berlin.de",
],
},
)
response.raise_for_status()
data = response.json()
return data.get("results", [])
except Exception as e:
logger.error(f"Tavily search error: {e}")
return []
async def search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]:
"""Sucht mit EduSearch API."""
import os
import httpx
edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084")
try:
async with httpx.AsyncClient(timeout=30.0) as client:
payload = {
"q": query,
"limit": count,
"mode": "keyword",
}
if filters:
payload["filters"] = filters
response = await client.post(
f"{edu_search_url}/v1/search",
json=payload,
)
response.raise_for_status()
data = response.json()
results = []
for r in data.get("results", []):
results.append({
"title": r.get("title", ""),
"url": r.get("url", ""),
"content": r.get("snippet", ""),
"score": r.get("scores", {}).get("final", 0),
})
return results
except Exception as e:
logger.error(f"EduSearch error: {e}")
return []
async def call_selfhosted_with_search(
prompt: str,
system_prompt: Optional[str],
search_provider: str,
search_results: list[dict],
model: str,
temperature: float,
top_p: float,
max_tokens: int,
) -> LLMResponse:
"""Ruft Self-hosted LLM mit Suchergebnissen auf."""
import os
import httpx
start_time = time.time()
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
# Baue Kontext aus Suchergebnissen
context_parts = []
for i, result in enumerate(search_results, 1):
context_parts.append(f"[{i}] {result.get('title', 'Untitled')}")
context_parts.append(f" URL: {result.get('url', '')}")
context_parts.append(f" {result.get('content', '')[:500]}")
context_parts.append("")
search_context = "\n".join(context_parts)
augmented_system = f"""{system_prompt or ''}
Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}:
{search_context}
Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer]."""
messages = [
{"role": "system", "content": augmented_system},
{"role": "user", "content": prompt},
]
try:
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{ollama_url}/api/chat",
json={
"model": model,
"messages": messages,
"stream": False,
"options": {
"temperature": temperature,
"top_p": top_p,
"num_predict": max_tokens,
},
},
)
response.raise_for_status()
data = response.json()
latency_ms = int((time.time() - start_time) * 1000)
content = data.get("message", {}).get("content", "")
tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
return LLMResponse(
provider=f"selfhosted_{search_provider}",
model=model,
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
search_results=search_results,
)
except Exception as e:
return LLMResponse(
provider=f"selfhosted_{search_provider}",
model=model,
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
search_results=search_results,
)