Files
breakpilot-lehrer/backend-lehrer/llm_gateway/config.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

123 lines
3.7 KiB
Python

"""
LLM Gateway Konfiguration
Lädt Einstellungen aus Umgebungsvariablen.
"""
import os
from typing import Optional
from dataclasses import dataclass, field
@dataclass
class LLMBackendConfig:
"""Konfiguration für ein LLM Backend."""
name: str
base_url: str
api_key: Optional[str] = None
default_model: str = ""
timeout: int = 120
enabled: bool = True
@dataclass
class GatewayConfig:
"""Hauptkonfiguration für das LLM Gateway."""
# Server
host: str = "0.0.0.0"
port: int = 8002
debug: bool = False
# Auth
jwt_secret: str = ""
api_keys: list[str] = field(default_factory=list)
# Rate Limiting
rate_limit_requests_per_minute: int = 60
rate_limit_tokens_per_minute: int = 100000
# Backends
ollama: Optional[LLMBackendConfig] = None
vllm: Optional[LLMBackendConfig] = None
anthropic: Optional[LLMBackendConfig] = None
# Default Backend Priorität
backend_priority: list[str] = field(default_factory=lambda: ["ollama", "vllm", "anthropic"])
# Playbooks
playbooks_enabled: bool = True
# Logging
log_level: str = "INFO"
audit_logging: bool = True
def load_config() -> GatewayConfig:
"""Lädt Konfiguration aus Umgebungsvariablen."""
config = GatewayConfig(
host=os.getenv("LLM_GATEWAY_HOST", "0.0.0.0"),
port=int(os.getenv("LLM_GATEWAY_PORT", "8002")),
debug=os.getenv("LLM_GATEWAY_DEBUG", "false").lower() == "true",
jwt_secret=os.getenv("JWT_SECRET", ""),
api_keys=os.getenv("LLM_API_KEYS", "").split(",") if os.getenv("LLM_API_KEYS") else [],
rate_limit_requests_per_minute=int(os.getenv("LLM_RATE_LIMIT_RPM", "60")),
rate_limit_tokens_per_minute=int(os.getenv("LLM_RATE_LIMIT_TPM", "100000")),
log_level=os.getenv("LLM_LOG_LEVEL", "INFO"),
audit_logging=os.getenv("LLM_AUDIT_LOGGING", "true").lower() == "true",
)
# Ollama Backend (lokal)
ollama_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
if ollama_url:
config.ollama = LLMBackendConfig(
name="ollama",
base_url=ollama_url,
default_model=os.getenv("OLLAMA_DEFAULT_MODEL", "llama3.1:8b"),
timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")),
enabled=os.getenv("OLLAMA_ENABLED", "true").lower() == "true",
)
# vLLM Backend (remote, z.B. vast.ai)
vllm_url = os.getenv("VLLM_BASE_URL")
if vllm_url:
config.vllm = LLMBackendConfig(
name="vllm",
base_url=vllm_url,
api_key=os.getenv("VLLM_API_KEY"),
default_model=os.getenv("VLLM_DEFAULT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct"),
timeout=int(os.getenv("VLLM_TIMEOUT", "120")),
enabled=os.getenv("VLLM_ENABLED", "true").lower() == "true",
)
# Anthropic Backend (Claude API Fallback)
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key:
config.anthropic = LLMBackendConfig(
name="anthropic",
base_url="https://api.anthropic.com",
api_key=anthropic_key,
default_model=os.getenv("ANTHROPIC_DEFAULT_MODEL", "claude-3-5-sonnet-20241022"),
timeout=int(os.getenv("ANTHROPIC_TIMEOUT", "120")),
enabled=os.getenv("ANTHROPIC_ENABLED", "true").lower() == "true",
)
# Backend Priorität
priority = os.getenv("LLM_BACKEND_PRIORITY", "ollama,vllm,anthropic")
config.backend_priority = [b.strip() for b in priority.split(",")]
return config
# Globale Konfiguration (Singleton)
_config: Optional[GatewayConfig] = None
def get_config() -> GatewayConfig:
"""Gibt die globale Konfiguration zurück."""
global _config
if _config is None:
_config = load_config()
return _config