""" LLM Gateway Konfiguration Lädt Einstellungen aus Umgebungsvariablen. """ import os from typing import Optional from dataclasses import dataclass, field @dataclass class LLMBackendConfig: """Konfiguration für ein LLM Backend.""" name: str base_url: str api_key: Optional[str] = None default_model: str = "" timeout: int = 120 enabled: bool = True @dataclass class GatewayConfig: """Hauptkonfiguration für das LLM Gateway.""" # Server host: str = "0.0.0.0" port: int = 8002 debug: bool = False # Auth jwt_secret: str = "" api_keys: list[str] = field(default_factory=list) # Rate Limiting rate_limit_requests_per_minute: int = 60 rate_limit_tokens_per_minute: int = 100000 # Backends ollama: Optional[LLMBackendConfig] = None vllm: Optional[LLMBackendConfig] = None anthropic: Optional[LLMBackendConfig] = None # Default Backend Priorität backend_priority: list[str] = field(default_factory=lambda: ["ollama", "vllm", "anthropic"]) # Playbooks playbooks_enabled: bool = True # Logging log_level: str = "INFO" audit_logging: bool = True def load_config() -> GatewayConfig: """Lädt Konfiguration aus Umgebungsvariablen.""" config = GatewayConfig( host=os.getenv("LLM_GATEWAY_HOST", "0.0.0.0"), port=int(os.getenv("LLM_GATEWAY_PORT", "8002")), debug=os.getenv("LLM_GATEWAY_DEBUG", "false").lower() == "true", jwt_secret=os.getenv("JWT_SECRET", ""), api_keys=os.getenv("LLM_API_KEYS", "").split(",") if os.getenv("LLM_API_KEYS") else [], rate_limit_requests_per_minute=int(os.getenv("LLM_RATE_LIMIT_RPM", "60")), rate_limit_tokens_per_minute=int(os.getenv("LLM_RATE_LIMIT_TPM", "100000")), log_level=os.getenv("LLM_LOG_LEVEL", "INFO"), audit_logging=os.getenv("LLM_AUDIT_LOGGING", "true").lower() == "true", ) # Ollama Backend (lokal) ollama_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") if ollama_url: config.ollama = LLMBackendConfig( name="ollama", base_url=ollama_url, default_model=os.getenv("OLLAMA_DEFAULT_MODEL", "llama3.1:8b"), timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")), enabled=os.getenv("OLLAMA_ENABLED", "true").lower() == "true", ) # vLLM Backend (remote, z.B. vast.ai) vllm_url = os.getenv("VLLM_BASE_URL") if vllm_url: config.vllm = LLMBackendConfig( name="vllm", base_url=vllm_url, api_key=os.getenv("VLLM_API_KEY"), default_model=os.getenv("VLLM_DEFAULT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct"), timeout=int(os.getenv("VLLM_TIMEOUT", "120")), enabled=os.getenv("VLLM_ENABLED", "true").lower() == "true", ) # Anthropic Backend (Claude API Fallback) anthropic_key = os.getenv("ANTHROPIC_API_KEY") if anthropic_key: config.anthropic = LLMBackendConfig( name="anthropic", base_url="https://api.anthropic.com", api_key=anthropic_key, default_model=os.getenv("ANTHROPIC_DEFAULT_MODEL", "claude-3-5-sonnet-20241022"), timeout=int(os.getenv("ANTHROPIC_TIMEOUT", "120")), enabled=os.getenv("ANTHROPIC_ENABLED", "true").lower() == "true", ) # Backend Priorität priority = os.getenv("LLM_BACKEND_PRIORITY", "ollama,vllm,anthropic") config.backend_priority = [b.strip() for b in priority.split(",")] return config # Globale Konfiguration (Singleton) _config: Optional[GatewayConfig] = None def get_config() -> GatewayConfig: """Gibt die globale Konfiguration zurück.""" global _config if _config is None: _config = load_config() return _config