breakpilot-compliance/backend-compliance/tests/test_cookie_sufficiency_judge.py

"""Layer-3 cookie sufficiency-judge: only embedding/boost-RESCUED passes are
re-judged by Haiku; keyword passes are untouched; a FEHLT verdict un-passes."""
import pytest
from unittest.mock import AsyncMock, patch

from compliance.services.specialist_agents.cookie_policy._sufficiency_judge import (
    judge_rescued,
)

_ANTHROPIC = "compliance.services.llm_cascade._call_anthropic"


def _r(cid, source, passed=True):
    return {"control_id": cid, "source": source, "passed": passed,
            "label": cid, "_pass_criteria": ["konkrete Angabe nötig"]}


@pytest.mark.asyncio
async def test_rescued_unpassed_when_judge_fehlt():
    results = [_r("A", "keyword+embedding")]
    fake = AsyncMock(return_value='{"erfuellt": false, "confidence": 0.9, "begruendung": "fehlt"}')
    with patch(_ANTHROPIC, new=fake):
        n = await judge_rescued("text", results)
    assert n == 1
    assert results[0]["passed"] is False
    assert "+llm_failed" in results[0]["source"]


@pytest.mark.asyncio
async def test_rescued_kept_when_judge_erfuellt():
    results = [_r("A", "keyword+embedding")]
    fake = AsyncMock(return_value='{"erfuellt": true, "confidence": 0.9}')
    with patch(_ANTHROPIC, new=fake):
        n = await judge_rescued("text", results)
    assert n == 0
    assert results[0]["passed"] is True


@pytest.mark.asyncio
async def test_keyword_pass_not_judged():
    """Deterministisch (keyword) bestandene Controls werden NICHT befragt."""
    results = [_r("A", "keyword")]
    fake = AsyncMock(return_value='{"erfuellt": false}')
    with patch(_ANTHROPIC, new=fake):
        n = await judge_rescued("text", results)
    assert n == 0
    assert results[0]["passed"] is True
    assert fake.call_count == 0


@pytest.mark.asyncio
async def test_boost_rescue_is_judged():
    results = [_r("A", "keyword+regex_boost")]
    fake = AsyncMock(return_value='{"erfuellt": false}')
    with patch(_ANTHROPIC, new=fake):
        n = await judge_rescued("text", results)
    assert n == 1 and results[0]["passed"] is False


@pytest.mark.asyncio
async def test_failed_controls_ignored():
    """Nicht-bestandene (failed) Controls sind nicht Sache dieser Schicht."""
    results = [_r("A", "keyword+embedding", passed=False)]
    fake = AsyncMock(return_value='{"erfuellt": false}')
    with patch(_ANTHROPIC, new=fake):
        n = await judge_rescued("text", results)
    assert n == 0 and fake.call_count == 0