Files
breakpilot-compliance/backend-compliance/tests/test_checker_router.py
T
Benjamin Admin 3e3644f83d feat(checkers): platform router + Haiku sufficiency tier; cookie is first consumer
Generalise "Embedding finds, Claude decides" into the shared Pruefer-Library:
- router.route_and_check dispatches control -> sensor_classification -> Checker.
- build_spec reads sensor_classification (CONTENT/LLM -> judge=haiku, the
  validated sufficiency tier; the Qwen-first cascade is disproven for sufficiency).
- LLMChecker gains a Haiku-direct tier (reuses the validated deep_check prompt).
- Cookie Layer-3 now routes through route_and_check instead of bespoke code, so
  cookie is the first real router consumer -- proves the architecture end-to-end.

Reproduces the validated result via the shared path: FN 159->14, recall
0.13->0.92, precision 0.89 (vs bespoke 12/0.93/0.90 -- within Haiku noise).
Tests: 10/10 (router dispatch + build_spec + haiku tier + cookie rewire).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-22 17:37:48 +02:00

52 lines
2.2 KiB
Python

"""Prüfer-Router: build_spec aus sensor_classification + method-agnostischer
Dispatch. CONTENT/LLM -> Haiku-Sufficiency-Tier (validiert), unbekannte
decision_methods -> fail-safe present=None."""
import pytest
from unittest.mock import AsyncMock, patch
from compliance.services.checkers.base import DocContext
from compliance.services.checkers.router import build_spec, route_and_check
_ANTHROPIC = "compliance.services.llm_cascade._call_anthropic"
def test_build_spec_content_llm_uses_haiku():
s = build_spec("X", {"verification_method": "CONTENT", "decision_method": "LLM"},
label="L", criteria=["a", "b"])
assert s.verification_method == "CONTENT" and s.decision_method == "LLM"
assert s.extra.get("judge") == "haiku"
assert s.paraphrases == ["a", "b"]
def test_build_spec_embedding_no_haiku():
s = build_spec("X", {"verification_method": "CONTENT", "decision_method": "EMBEDDING"})
assert s.extra.get("judge") is None
@pytest.mark.asyncio
async def test_route_unknown_decision_is_failsafe():
s = build_spec("X", {"verification_method": "BEHAVIOR", "decision_method": "PLAYWRIGHT"})
r = await route_and_check(s, DocContext(text="x" * 200))
assert r.present is None and "no_checker" in r.source
@pytest.mark.asyncio
async def test_route_content_llm_haiku_fehlt():
s = build_spec("X", {"verification_method": "CONTENT", "decision_method": "LLM"},
label="Speicherdauer", criteria=["Höchstdauer pro Kategorie"])
fake = AsyncMock(return_value='{"erfuellt": false, "confidence": 0.9, "begruendung": "fehlt"}')
with patch(_ANTHROPIC, new=fake):
r = await route_and_check(s, DocContext(text="Wir nutzen Cookies. " * 30))
assert r.present is False and r.source == "haiku"
assert fake.call_count >= 1
@pytest.mark.asyncio
async def test_route_content_llm_haiku_erfuellt():
s = build_spec("X", {"verification_method": "CONTENT", "decision_method": "LLM"},
label="L", criteria=["x"])
fake = AsyncMock(return_value='{"erfuellt": true, "confidence": 0.8}')
with patch(_ANTHROPIC, new=fake):
r = await route_and_check(s, DocContext(text="text " * 40))
assert r.present is True