breakpilot-pwa/backend/llm_gateway/config.py

"""
LLM Gateway Konfiguration

Lädt Einstellungen aus Umgebungsvariablen.
"""

import os
from typing import Optional
from dataclasses import dataclass, field


@dataclass
class LLMBackendConfig:
    """Konfiguration für ein LLM Backend."""
    name: str
    base_url: str
    api_key: Optional[str] = None
    default_model: str = ""
    timeout: int = 120
    enabled: bool = True


@dataclass
class GatewayConfig:
    """Hauptkonfiguration für das LLM Gateway."""

    # Server
    host: str = "0.0.0.0"
    port: int = 8002
    debug: bool = False

    # Auth
    jwt_secret: str = ""
    api_keys: list[str] = field(default_factory=list)

    # Rate Limiting
    rate_limit_requests_per_minute: int = 60
    rate_limit_tokens_per_minute: int = 100000

    # Backends
    ollama: Optional[LLMBackendConfig] = None
    vllm: Optional[LLMBackendConfig] = None
    anthropic: Optional[LLMBackendConfig] = None

    # Default Backend Priorität
    backend_priority: list[str] = field(default_factory=lambda: ["ollama", "vllm", "anthropic"])

    # Playbooks
    playbooks_enabled: bool = True

    # Logging
    log_level: str = "INFO"
    audit_logging: bool = True


def load_config() -> GatewayConfig:
    """Lädt Konfiguration aus Umgebungsvariablen."""

    config = GatewayConfig(
        host=os.getenv("LLM_GATEWAY_HOST", "0.0.0.0"),
        port=int(os.getenv("LLM_GATEWAY_PORT", "8002")),
        debug=os.getenv("LLM_GATEWAY_DEBUG", "false").lower() == "true",
        jwt_secret=os.getenv("JWT_SECRET", ""),
        api_keys=os.getenv("LLM_API_KEYS", "").split(",") if os.getenv("LLM_API_KEYS") else [],
        rate_limit_requests_per_minute=int(os.getenv("LLM_RATE_LIMIT_RPM", "60")),
        rate_limit_tokens_per_minute=int(os.getenv("LLM_RATE_LIMIT_TPM", "100000")),
        log_level=os.getenv("LLM_LOG_LEVEL", "INFO"),
        audit_logging=os.getenv("LLM_AUDIT_LOGGING", "true").lower() == "true",
    )

    # Ollama Backend (lokal)
    ollama_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
    if ollama_url:
        config.ollama = LLMBackendConfig(
            name="ollama",
            base_url=ollama_url,
            default_model=os.getenv("OLLAMA_DEFAULT_MODEL", "llama3.1:8b"),
            timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")),
            enabled=os.getenv("OLLAMA_ENABLED", "true").lower() == "true",
        )

    # vLLM Backend (remote, z.B. vast.ai)
    vllm_url = os.getenv("VLLM_BASE_URL")
    if vllm_url:
        config.vllm = LLMBackendConfig(
            name="vllm",
            base_url=vllm_url,
            api_key=os.getenv("VLLM_API_KEY"),
            default_model=os.getenv("VLLM_DEFAULT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct"),
            timeout=int(os.getenv("VLLM_TIMEOUT", "120")),
            enabled=os.getenv("VLLM_ENABLED", "true").lower() == "true",
        )

    # Anthropic Backend (Claude API Fallback)
    anthropic_key = os.getenv("ANTHROPIC_API_KEY")
    if anthropic_key:
        config.anthropic = LLMBackendConfig(
            name="anthropic",
            base_url="https://api.anthropic.com",
            api_key=anthropic_key,
            default_model=os.getenv("ANTHROPIC_DEFAULT_MODEL", "claude-3-5-sonnet-20241022"),
            timeout=int(os.getenv("ANTHROPIC_TIMEOUT", "120")),
            enabled=os.getenv("ANTHROPIC_ENABLED", "true").lower() == "true",
        )

    # Backend Priorität
    priority = os.getenv("LLM_BACKEND_PRIORITY", "ollama,vllm,anthropic")
    config.backend_priority = [b.strip() for b in priority.split(",")]

    return config


# Globale Konfiguration (Singleton)
_config: Optional[GatewayConfig] = None


def get_config() -> GatewayConfig:
    """Gibt die globale Konfiguration zurück."""
    global _config
    if _config is None:
        _config = load_config()
    return _config