This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/llm_gateway/config.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

123 lines
3.7 KiB
Python

"""
LLM Gateway Konfiguration
Lädt Einstellungen aus Umgebungsvariablen.
"""
import os
from typing import Optional
from dataclasses import dataclass, field
@dataclass
class LLMBackendConfig:
"""Konfiguration für ein LLM Backend."""
name: str
base_url: str
api_key: Optional[str] = None
default_model: str = ""
timeout: int = 120
enabled: bool = True
@dataclass
class GatewayConfig:
"""Hauptkonfiguration für das LLM Gateway."""
# Server
host: str = "0.0.0.0"
port: int = 8002
debug: bool = False
# Auth
jwt_secret: str = ""
api_keys: list[str] = field(default_factory=list)
# Rate Limiting
rate_limit_requests_per_minute: int = 60
rate_limit_tokens_per_minute: int = 100000
# Backends
ollama: Optional[LLMBackendConfig] = None
vllm: Optional[LLMBackendConfig] = None
anthropic: Optional[LLMBackendConfig] = None
# Default Backend Priorität
backend_priority: list[str] = field(default_factory=lambda: ["ollama", "vllm", "anthropic"])
# Playbooks
playbooks_enabled: bool = True
# Logging
log_level: str = "INFO"
audit_logging: bool = True
def load_config() -> GatewayConfig:
"""Lädt Konfiguration aus Umgebungsvariablen."""
config = GatewayConfig(
host=os.getenv("LLM_GATEWAY_HOST", "0.0.0.0"),
port=int(os.getenv("LLM_GATEWAY_PORT", "8002")),
debug=os.getenv("LLM_GATEWAY_DEBUG", "false").lower() == "true",
jwt_secret=os.getenv("JWT_SECRET", ""),
api_keys=os.getenv("LLM_API_KEYS", "").split(",") if os.getenv("LLM_API_KEYS") else [],
rate_limit_requests_per_minute=int(os.getenv("LLM_RATE_LIMIT_RPM", "60")),
rate_limit_tokens_per_minute=int(os.getenv("LLM_RATE_LIMIT_TPM", "100000")),
log_level=os.getenv("LLM_LOG_LEVEL", "INFO"),
audit_logging=os.getenv("LLM_AUDIT_LOGGING", "true").lower() == "true",
)
# Ollama Backend (lokal)
ollama_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
if ollama_url:
config.ollama = LLMBackendConfig(
name="ollama",
base_url=ollama_url,
default_model=os.getenv("OLLAMA_DEFAULT_MODEL", "llama3.1:8b"),
timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")),
enabled=os.getenv("OLLAMA_ENABLED", "true").lower() == "true",
)
# vLLM Backend (remote, z.B. vast.ai)
vllm_url = os.getenv("VLLM_BASE_URL")
if vllm_url:
config.vllm = LLMBackendConfig(
name="vllm",
base_url=vllm_url,
api_key=os.getenv("VLLM_API_KEY"),
default_model=os.getenv("VLLM_DEFAULT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct"),
timeout=int(os.getenv("VLLM_TIMEOUT", "120")),
enabled=os.getenv("VLLM_ENABLED", "true").lower() == "true",
)
# Anthropic Backend (Claude API Fallback)
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key:
config.anthropic = LLMBackendConfig(
name="anthropic",
base_url="https://api.anthropic.com",
api_key=anthropic_key,
default_model=os.getenv("ANTHROPIC_DEFAULT_MODEL", "claude-3-5-sonnet-20241022"),
timeout=int(os.getenv("ANTHROPIC_TIMEOUT", "120")),
enabled=os.getenv("ANTHROPIC_ENABLED", "true").lower() == "true",
)
# Backend Priorität
priority = os.getenv("LLM_BACKEND_PRIORITY", "ollama,vllm,anthropic")
config.backend_priority = [b.strip() for b in priority.split(",")]
return config
# Globale Konfiguration (Singleton)
_config: Optional[GatewayConfig] = None
def get_config() -> GatewayConfig:
"""Gibt die globale Konfiguration zurück."""
global _config
if _config is None:
_config = load_config()
return _config