"""Regression tests for the OVH (gpt-oss-120b) tier of the LLM cascade. gpt-oss-120b is a reasoning model: it spends output tokens on chain-of-thought before the answer. Two bugs this pins: 1. A small max_tokens (deep_check passed 400) length-caps it mid-reasoning → content=null → the tier silently returns nothing. _call_ovh must floor the budget so reasoning + the JSON answer fit. 2. When length-capped, the JSON can land in reasoning_content, not content → _call_ovh must fall back to reasoning_content. """ import pytest from unittest.mock import AsyncMock, MagicMock, patch from compliance.services import llm_cascade def _resp(data): r = MagicMock() r.raise_for_status = MagicMock() r.json = MagicMock(return_value=data) return r def _client(resp): inst = AsyncMock() inst.post.return_value = resp inst.__aenter__ = AsyncMock(return_value=inst) inst.__aexit__ = AsyncMock(return_value=False) return inst class TestCallOvhReasoning: @pytest.mark.asyncio async def test_reasoning_content_used_when_content_null(self, monkeypatch): monkeypatch.setenv("OVH_LLM_URL", "https://llm.example.com") monkeypatch.setenv("OVH_LLM_MODEL", "gpt-oss-120b") monkeypatch.setenv("OVH_LLM_KEY", "k") resp = _resp({"choices": [{"message": { "content": None, "reasoning_content": '{"erfuellt": true, "confidence": 0.9}'}}]}) with patch("httpx.AsyncClient", return_value=_client(resp)): out = await llm_cascade._call_ovh("sys", "user", max_tokens=400) assert '"erfuellt": true' in out @pytest.mark.asyncio async def test_small_budget_is_floored(self, monkeypatch): monkeypatch.setenv("OVH_LLM_URL", "https://llm.example.com") monkeypatch.setenv("OVH_LLM_MODEL", "gpt-oss-120b") inst = _client(_resp({"choices": [{"message": {"content": "{}"}}]})) with patch("httpx.AsyncClient", return_value=inst): await llm_cascade._call_ovh("sys", "user", max_tokens=400) assert inst.post.call_args.kwargs["json"]["max_tokens"] >= 2000 @pytest.mark.asyncio async def test_large_budget_is_preserved(self, monkeypatch): monkeypatch.setenv("OVH_LLM_URL", "https://llm.example.com") monkeypatch.setenv("OVH_LLM_MODEL", "gpt-oss-120b") inst = _client(_resp({"choices": [{"message": {"content": "{}"}}]})) with patch("httpx.AsyncClient", return_value=inst): await llm_cascade._call_ovh("sys", "user", max_tokens=6000) assert inst.post.call_args.kwargs["json"]["max_tokens"] == 6000 @pytest.mark.asyncio async def test_content_preferred_when_present(self, monkeypatch): monkeypatch.setenv("OVH_LLM_URL", "https://llm.example.com") monkeypatch.setenv("OVH_LLM_MODEL", "gpt-oss-120b") resp = _resp({"choices": [{"message": { "content": '{"erfuellt": false}', "reasoning_content": "noise"}}]}) with patch("httpx.AsyncClient", return_value=_client(resp)): out = await llm_cascade._call_ovh("sys", "user") assert out == '{"erfuellt": false}' @pytest.mark.asyncio async def test_unconfigured_returns_empty(self, monkeypatch): monkeypatch.delenv("OVH_LLM_URL", raising=False) monkeypatch.delenv("OVH_LLM_MODEL", raising=False) assert await llm_cascade._call_ovh("sys", "user") == ""