klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
271 lines
8.2 KiB
Python
271 lines
8.2 KiB
Python
"""
|
|
LLM Comparison - Provider-Aufrufe (OpenAI, Claude, Self-hosted, Search).
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
from typing import Optional
|
|
|
|
from .comparison_models import LLMResponse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
|
|
"""Ruft OpenAI ChatGPT auf."""
|
|
import os
|
|
import httpx
|
|
|
|
start_time = time.time()
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
if not api_key:
|
|
return LLMResponse(
|
|
provider="openai",
|
|
model="gpt-4o-mini",
|
|
response="",
|
|
latency_ms=0,
|
|
error="OPENAI_API_KEY nicht konfiguriert"
|
|
)
|
|
|
|
messages = []
|
|
if system_prompt:
|
|
messages.append({"role": "system", "content": system_prompt})
|
|
messages.append({"role": "user", "content": prompt})
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
response = await client.post(
|
|
"https://api.openai.com/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
json={
|
|
"model": "gpt-4o-mini",
|
|
"messages": messages,
|
|
"temperature": 0.7,
|
|
"max_tokens": 2048,
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
content = data["choices"][0]["message"]["content"]
|
|
tokens = data.get("usage", {}).get("total_tokens")
|
|
|
|
return LLMResponse(
|
|
provider="openai",
|
|
model="gpt-4o-mini",
|
|
response=content,
|
|
latency_ms=latency_ms,
|
|
tokens_used=tokens,
|
|
)
|
|
except Exception as e:
|
|
return LLMResponse(
|
|
provider="openai",
|
|
model="gpt-4o-mini",
|
|
response="",
|
|
latency_ms=int((time.time() - start_time) * 1000),
|
|
error=str(e),
|
|
)
|
|
|
|
|
|
async def call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
|
|
"""Ruft Anthropic Claude auf."""
|
|
import os
|
|
|
|
start_time = time.time()
|
|
api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
|
|
if not api_key:
|
|
return LLMResponse(
|
|
provider="claude",
|
|
model="claude-3-5-sonnet-20241022",
|
|
response="",
|
|
latency_ms=0,
|
|
error="ANTHROPIC_API_KEY nicht konfiguriert"
|
|
)
|
|
|
|
try:
|
|
import anthropic
|
|
client = anthropic.AsyncAnthropic(api_key=api_key)
|
|
|
|
response = await client.messages.create(
|
|
model="claude-3-5-sonnet-20241022",
|
|
max_tokens=2048,
|
|
system=system_prompt or "",
|
|
messages=[{"role": "user", "content": prompt}],
|
|
)
|
|
|
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
content = response.content[0].text if response.content else ""
|
|
tokens = response.usage.input_tokens + response.usage.output_tokens
|
|
|
|
return LLMResponse(
|
|
provider="claude",
|
|
model="claude-3-5-sonnet-20241022",
|
|
response=content,
|
|
latency_ms=latency_ms,
|
|
tokens_used=tokens,
|
|
)
|
|
except Exception as e:
|
|
return LLMResponse(
|
|
provider="claude",
|
|
model="claude-3-5-sonnet-20241022",
|
|
response="",
|
|
latency_ms=int((time.time() - start_time) * 1000),
|
|
error=str(e),
|
|
)
|
|
|
|
|
|
async def search_tavily(query: str, count: int = 5) -> list[dict]:
|
|
"""Sucht mit Tavily API."""
|
|
import os
|
|
import httpx
|
|
|
|
api_key = os.getenv("TAVILY_API_KEY")
|
|
if not api_key:
|
|
return []
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
response = await client.post(
|
|
"https://api.tavily.com/search",
|
|
json={
|
|
"api_key": api_key,
|
|
"query": query,
|
|
"max_results": count,
|
|
"include_domains": [
|
|
"kmk.org", "bildungsserver.de", "bpb.de",
|
|
"bayern.de", "nrw.de", "berlin.de",
|
|
],
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return data.get("results", [])
|
|
except Exception as e:
|
|
logger.error(f"Tavily search error: {e}")
|
|
return []
|
|
|
|
|
|
async def search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]:
|
|
"""Sucht mit EduSearch API."""
|
|
import os
|
|
import httpx
|
|
|
|
edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084")
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
payload = {
|
|
"q": query,
|
|
"limit": count,
|
|
"mode": "keyword",
|
|
}
|
|
if filters:
|
|
payload["filters"] = filters
|
|
|
|
response = await client.post(
|
|
f"{edu_search_url}/v1/search",
|
|
json=payload,
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
results = []
|
|
for r in data.get("results", []):
|
|
results.append({
|
|
"title": r.get("title", ""),
|
|
"url": r.get("url", ""),
|
|
"content": r.get("snippet", ""),
|
|
"score": r.get("scores", {}).get("final", 0),
|
|
})
|
|
return results
|
|
except Exception as e:
|
|
logger.error(f"EduSearch error: {e}")
|
|
return []
|
|
|
|
|
|
async def call_selfhosted_with_search(
|
|
prompt: str,
|
|
system_prompt: Optional[str],
|
|
search_provider: str,
|
|
search_results: list[dict],
|
|
model: str,
|
|
temperature: float,
|
|
top_p: float,
|
|
max_tokens: int,
|
|
) -> LLMResponse:
|
|
"""Ruft Self-hosted LLM mit Suchergebnissen auf."""
|
|
import os
|
|
import httpx
|
|
|
|
start_time = time.time()
|
|
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
|
|
|
# Baue Kontext aus Suchergebnissen
|
|
context_parts = []
|
|
for i, result in enumerate(search_results, 1):
|
|
context_parts.append(f"[{i}] {result.get('title', 'Untitled')}")
|
|
context_parts.append(f" URL: {result.get('url', '')}")
|
|
context_parts.append(f" {result.get('content', '')[:500]}")
|
|
context_parts.append("")
|
|
|
|
search_context = "\n".join(context_parts)
|
|
|
|
augmented_system = f"""{system_prompt or ''}
|
|
|
|
Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}:
|
|
|
|
{search_context}
|
|
|
|
Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer]."""
|
|
|
|
messages = [
|
|
{"role": "system", "content": augmented_system},
|
|
{"role": "user", "content": prompt},
|
|
]
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
response = await client.post(
|
|
f"{ollama_url}/api/chat",
|
|
json={
|
|
"model": model,
|
|
"messages": messages,
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": temperature,
|
|
"top_p": top_p,
|
|
"num_predict": max_tokens,
|
|
},
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
content = data.get("message", {}).get("content", "")
|
|
tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
|
|
|
|
return LLMResponse(
|
|
provider=f"selfhosted_{search_provider}",
|
|
model=model,
|
|
response=content,
|
|
latency_ms=latency_ms,
|
|
tokens_used=tokens,
|
|
search_results=search_results,
|
|
)
|
|
except Exception as e:
|
|
return LLMResponse(
|
|
provider=f"selfhosted_{search_provider}",
|
|
model=model,
|
|
response="",
|
|
latency_ms=int((time.time() - start_time) * 1000),
|
|
error=str(e),
|
|
search_results=search_results,
|
|
)
|