Files
breakpilot-compliance/backend-compliance/tests/test_llm_cascade_ovh.py
T
Benjamin Admin 067118b12d
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / loc-budget (push) Successful in 20s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 25s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 6s
CI / validate-canonical-controls (push) Successful in 5s
fix(cascade): give OVH/gpt-oss reasoning headroom so Tier-2 isn't silently dead
gpt-oss-120b is a reasoning model: it spends output tokens on chain-of-thought
before the answer. deep_check called _call_ovh with max_tokens=400, which
length-capped it mid-reasoning -> content=null -> the OVH tier returned nothing
and the cascade always skipped Tier-2. Floor the OVH budget to >=2000, fall back
to reasoning_content when content is null, and raise the client timeout to 90s
for the slower reasoning path.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-22 17:37:48 +02:00

78 lines
3.3 KiB
Python

"""Regression tests for the OVH (gpt-oss-120b) tier of the LLM cascade.
gpt-oss-120b is a reasoning model: it spends output tokens on chain-of-thought
before the answer. Two bugs this pins:
1. A small max_tokens (deep_check passed 400) length-caps it mid-reasoning →
content=null → the tier silently returns nothing. _call_ovh must floor the
budget so reasoning + the JSON answer fit.
2. When length-capped, the JSON can land in reasoning_content, not content →
_call_ovh must fall back to reasoning_content.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from compliance.services import llm_cascade
def _resp(data):
r = MagicMock()
r.raise_for_status = MagicMock()
r.json = MagicMock(return_value=data)
return r
def _client(resp):
inst = AsyncMock()
inst.post.return_value = resp
inst.__aenter__ = AsyncMock(return_value=inst)
inst.__aexit__ = AsyncMock(return_value=False)
return inst
class TestCallOvhReasoning:
@pytest.mark.asyncio
async def test_reasoning_content_used_when_content_null(self, monkeypatch):
monkeypatch.setenv("OVH_LLM_URL", "https://llm.example.com")
monkeypatch.setenv("OVH_LLM_MODEL", "gpt-oss-120b")
monkeypatch.setenv("OVH_LLM_KEY", "k")
resp = _resp({"choices": [{"message": {
"content": None,
"reasoning_content": '{"erfuellt": true, "confidence": 0.9}'}}]})
with patch("httpx.AsyncClient", return_value=_client(resp)):
out = await llm_cascade._call_ovh("sys", "user", max_tokens=400)
assert '"erfuellt": true' in out
@pytest.mark.asyncio
async def test_small_budget_is_floored(self, monkeypatch):
monkeypatch.setenv("OVH_LLM_URL", "https://llm.example.com")
monkeypatch.setenv("OVH_LLM_MODEL", "gpt-oss-120b")
inst = _client(_resp({"choices": [{"message": {"content": "{}"}}]}))
with patch("httpx.AsyncClient", return_value=inst):
await llm_cascade._call_ovh("sys", "user", max_tokens=400)
assert inst.post.call_args.kwargs["json"]["max_tokens"] >= 2000
@pytest.mark.asyncio
async def test_large_budget_is_preserved(self, monkeypatch):
monkeypatch.setenv("OVH_LLM_URL", "https://llm.example.com")
monkeypatch.setenv("OVH_LLM_MODEL", "gpt-oss-120b")
inst = _client(_resp({"choices": [{"message": {"content": "{}"}}]}))
with patch("httpx.AsyncClient", return_value=inst):
await llm_cascade._call_ovh("sys", "user", max_tokens=6000)
assert inst.post.call_args.kwargs["json"]["max_tokens"] == 6000
@pytest.mark.asyncio
async def test_content_preferred_when_present(self, monkeypatch):
monkeypatch.setenv("OVH_LLM_URL", "https://llm.example.com")
monkeypatch.setenv("OVH_LLM_MODEL", "gpt-oss-120b")
resp = _resp({"choices": [{"message": {
"content": '{"erfuellt": false}', "reasoning_content": "noise"}}]})
with patch("httpx.AsyncClient", return_value=_client(resp)):
out = await llm_cascade._call_ovh("sys", "user")
assert out == '{"erfuellt": false}'
@pytest.mark.asyncio
async def test_unconfigured_returns_empty(self, monkeypatch):
monkeypatch.delenv("OVH_LLM_URL", raising=False)
monkeypatch.delenv("OVH_LLM_MODEL", raising=False)
assert await llm_cascade._call_ovh("sys", "user") == ""