breakpilot-pwa/backend/tests/test_llm_gateway/test_inference_service.py

"""
Tests für Inference Service.
"""

import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from llm_gateway.services.inference import (
    InferenceService,
    InferenceResult,
    get_inference_service,
)
from llm_gateway.models.chat import (
    ChatCompletionRequest,
    ChatMessage,
    Usage,
)


class TestInferenceServiceModelMapping:
    """Tests für Model Mapping."""

    def setup_method(self):
        """Setup für jeden Test."""
        self.service = InferenceService()

    def test_map_breakpilot_model_to_ollama(self):
        """Test Mapping von BreakPilot Modell zu Ollama."""
        # Mock Ollama als verfügbares Backend
        with patch.object(self.service, 'config') as mock_config:
            mock_config.ollama = MagicMock()
            mock_config.ollama.name = "ollama"
            mock_config.ollama.enabled = True
            mock_config.vllm = None
            mock_config.anthropic = None
            mock_config.backend_priority = ["ollama", "vllm", "anthropic"]

            actual_model, backend = self.service._map_model_to_backend("breakpilot-teacher-8b")
            assert actual_model == "llama3.1:8b"
            assert backend.name == "ollama"

    def test_map_breakpilot_70b_model(self):
        """Test Mapping von 70B Modell."""
        with patch.object(self.service, 'config') as mock_config:
            mock_config.ollama = MagicMock()
            mock_config.ollama.name = "ollama"
            mock_config.ollama.enabled = True
            mock_config.vllm = None
            mock_config.anthropic = None
            mock_config.backend_priority = ["ollama"]

            actual_model, backend = self.service._map_model_to_backend("breakpilot-teacher-70b")
            assert "70b" in actual_model.lower()

    def test_map_claude_model_to_anthropic(self):
        """Test Mapping von Claude Modell zu Anthropic."""
        with patch.object(self.service, 'config') as mock_config:
            mock_config.ollama = None
            mock_config.vllm = None
            mock_config.anthropic = MagicMock()
            mock_config.anthropic.name = "anthropic"
            mock_config.anthropic.enabled = True
            mock_config.anthropic.default_model = "claude-3-5-sonnet-20241022"
            mock_config.backend_priority = ["anthropic"]

            actual_model, backend = self.service._map_model_to_backend("claude-3-5-sonnet")
            assert backend.name == "anthropic"
            assert "claude" in actual_model.lower()

    def test_map_model_no_backend_available(self):
        """Test Fehler wenn kein Backend verfügbar."""
        with patch.object(self.service, 'config') as mock_config:
            mock_config.ollama = None
            mock_config.vllm = None
            mock_config.anthropic = None
            mock_config.backend_priority = []

            with pytest.raises(ValueError, match="No LLM backend available"):
                self.service._map_model_to_backend("breakpilot-teacher-8b")


class TestInferenceServiceBackendSelection:
    """Tests für Backend-Auswahl."""

    def setup_method(self):
        """Setup für jeden Test."""
        self.service = InferenceService()

    def test_get_available_backend_priority(self):
        """Test Backend-Auswahl nach Priorität."""
        with patch.object(self.service, 'config') as mock_config:
            # Beide Backends verfügbar
            mock_config.ollama = MagicMock()
            mock_config.ollama.enabled = True
            mock_config.vllm = MagicMock()
            mock_config.vllm.enabled = True
            mock_config.anthropic = None
            mock_config.backend_priority = ["vllm", "ollama"]

            backend = self.service._get_available_backend()
            # vLLM hat höhere Priorität
            assert backend == mock_config.vllm

    def test_get_available_backend_fallback(self):
        """Test Fallback wenn primäres Backend nicht verfügbar."""
        with patch.object(self.service, 'config') as mock_config:
            mock_config.ollama = MagicMock()
            mock_config.ollama.enabled = True
            mock_config.vllm = MagicMock()
            mock_config.vllm.enabled = False  # Deaktiviert
            mock_config.anthropic = None
            mock_config.backend_priority = ["vllm", "ollama"]

            backend = self.service._get_available_backend()
            # Ollama als Fallback
            assert backend == mock_config.ollama

    def test_get_available_backend_none_available(self):
        """Test wenn kein Backend verfügbar."""
        with patch.object(self.service, 'config') as mock_config:
            mock_config.ollama = None
            mock_config.vllm = None
            mock_config.anthropic = None
            mock_config.backend_priority = ["ollama", "vllm", "anthropic"]

            backend = self.service._get_available_backend()
            assert backend is None


class TestInferenceResult:
    """Tests für InferenceResult Dataclass."""

    def test_inference_result_creation(self):
        """Test InferenceResult erstellen."""
        result = InferenceResult(
            content="Hello, world!",
            model="llama3.1:8b",
            backend="ollama",
            usage=Usage(prompt_tokens=10, completion_tokens=5, total_tokens=15),
            finish_reason="stop",
        )
        assert result.content == "Hello, world!"
        assert result.model == "llama3.1:8b"
        assert result.backend == "ollama"
        assert result.usage.total_tokens == 15

    def test_inference_result_defaults(self):
        """Test Standardwerte."""
        result = InferenceResult(
            content="Test",
            model="test",
            backend="test",
        )
        assert result.usage is None
        assert result.finish_reason == "stop"


class TestInferenceServiceComplete:
    """Tests für complete() Methode."""

    @pytest.mark.asyncio
    async def test_complete_calls_correct_backend(self):
        """Test dass correct Backend aufgerufen wird."""
        service = InferenceService()

        request = ChatCompletionRequest(
            model="breakpilot-teacher-8b",
            messages=[ChatMessage(role="user", content="Hello")],
        )

        # Mock das Backend
        with patch.object(service, '_map_model_to_backend') as mock_map:
            with patch.object(service, '_call_ollama') as mock_call:
                mock_backend = MagicMock()
                mock_backend.name = "ollama"
                mock_map.return_value = ("llama3.1:8b", mock_backend)
                mock_call.return_value = InferenceResult(
                    content="Hello!",
                    model="llama3.1:8b",
                    backend="ollama",
                )

                response = await service.complete(request)

                mock_call.assert_called_once()
                assert response.choices[0].message.content == "Hello!"


class TestGetInferenceServiceSingleton:
    """Tests für Singleton Pattern."""

    def test_singleton_returns_same_instance(self):
        """Test dass get_inference_service Singleton zurückgibt."""
        service1 = get_inference_service()
        service2 = get_inference_service()
        assert service1 is service2