Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
123 lines
3.7 KiB
Python
123 lines
3.7 KiB
Python
"""
|
|
LLM Gateway Konfiguration
|
|
|
|
Lädt Einstellungen aus Umgebungsvariablen.
|
|
"""
|
|
|
|
import os
|
|
from typing import Optional
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass
|
|
class LLMBackendConfig:
|
|
"""Konfiguration für ein LLM Backend."""
|
|
name: str
|
|
base_url: str
|
|
api_key: Optional[str] = None
|
|
default_model: str = ""
|
|
timeout: int = 120
|
|
enabled: bool = True
|
|
|
|
|
|
@dataclass
|
|
class GatewayConfig:
|
|
"""Hauptkonfiguration für das LLM Gateway."""
|
|
|
|
# Server
|
|
host: str = "0.0.0.0"
|
|
port: int = 8002
|
|
debug: bool = False
|
|
|
|
# Auth
|
|
jwt_secret: str = ""
|
|
api_keys: list[str] = field(default_factory=list)
|
|
|
|
# Rate Limiting
|
|
rate_limit_requests_per_minute: int = 60
|
|
rate_limit_tokens_per_minute: int = 100000
|
|
|
|
# Backends
|
|
ollama: Optional[LLMBackendConfig] = None
|
|
vllm: Optional[LLMBackendConfig] = None
|
|
anthropic: Optional[LLMBackendConfig] = None
|
|
|
|
# Default Backend Priorität
|
|
backend_priority: list[str] = field(default_factory=lambda: ["ollama", "vllm", "anthropic"])
|
|
|
|
# Playbooks
|
|
playbooks_enabled: bool = True
|
|
|
|
# Logging
|
|
log_level: str = "INFO"
|
|
audit_logging: bool = True
|
|
|
|
|
|
def load_config() -> GatewayConfig:
|
|
"""Lädt Konfiguration aus Umgebungsvariablen."""
|
|
|
|
config = GatewayConfig(
|
|
host=os.getenv("LLM_GATEWAY_HOST", "0.0.0.0"),
|
|
port=int(os.getenv("LLM_GATEWAY_PORT", "8002")),
|
|
debug=os.getenv("LLM_GATEWAY_DEBUG", "false").lower() == "true",
|
|
jwt_secret=os.getenv("JWT_SECRET", ""),
|
|
api_keys=os.getenv("LLM_API_KEYS", "").split(",") if os.getenv("LLM_API_KEYS") else [],
|
|
rate_limit_requests_per_minute=int(os.getenv("LLM_RATE_LIMIT_RPM", "60")),
|
|
rate_limit_tokens_per_minute=int(os.getenv("LLM_RATE_LIMIT_TPM", "100000")),
|
|
log_level=os.getenv("LLM_LOG_LEVEL", "INFO"),
|
|
audit_logging=os.getenv("LLM_AUDIT_LOGGING", "true").lower() == "true",
|
|
)
|
|
|
|
# Ollama Backend (lokal)
|
|
ollama_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
|
|
if ollama_url:
|
|
config.ollama = LLMBackendConfig(
|
|
name="ollama",
|
|
base_url=ollama_url,
|
|
default_model=os.getenv("OLLAMA_DEFAULT_MODEL", "llama3.1:8b"),
|
|
timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")),
|
|
enabled=os.getenv("OLLAMA_ENABLED", "true").lower() == "true",
|
|
)
|
|
|
|
# vLLM Backend (remote, z.B. vast.ai)
|
|
vllm_url = os.getenv("VLLM_BASE_URL")
|
|
if vllm_url:
|
|
config.vllm = LLMBackendConfig(
|
|
name="vllm",
|
|
base_url=vllm_url,
|
|
api_key=os.getenv("VLLM_API_KEY"),
|
|
default_model=os.getenv("VLLM_DEFAULT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct"),
|
|
timeout=int(os.getenv("VLLM_TIMEOUT", "120")),
|
|
enabled=os.getenv("VLLM_ENABLED", "true").lower() == "true",
|
|
)
|
|
|
|
# Anthropic Backend (Claude API Fallback)
|
|
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
|
|
if anthropic_key:
|
|
config.anthropic = LLMBackendConfig(
|
|
name="anthropic",
|
|
base_url="https://api.anthropic.com",
|
|
api_key=anthropic_key,
|
|
default_model=os.getenv("ANTHROPIC_DEFAULT_MODEL", "claude-3-5-sonnet-20241022"),
|
|
timeout=int(os.getenv("ANTHROPIC_TIMEOUT", "120")),
|
|
enabled=os.getenv("ANTHROPIC_ENABLED", "true").lower() == "true",
|
|
)
|
|
|
|
# Backend Priorität
|
|
priority = os.getenv("LLM_BACKEND_PRIORITY", "ollama,vllm,anthropic")
|
|
config.backend_priority = [b.strip() for b in priority.split(",")]
|
|
|
|
return config
|
|
|
|
|
|
# Globale Konfiguration (Singleton)
|
|
_config: Optional[GatewayConfig] = None
|
|
|
|
|
|
def get_config() -> GatewayConfig:
|
|
"""Gibt die globale Konfiguration zurück."""
|
|
global _config
|
|
if _config is None:
|
|
_config = load_config()
|
|
return _config
|