feat(checkers): platform router + Haiku sufficiency tier; cookie is first consumer
Generalise "Embedding finds, Claude decides" into the shared Pruefer-Library: - router.route_and_check dispatches control -> sensor_classification -> Checker. - build_spec reads sensor_classification (CONTENT/LLM -> judge=haiku, the validated sufficiency tier; the Qwen-first cascade is disproven for sufficiency). - LLMChecker gains a Haiku-direct tier (reuses the validated deep_check prompt). - Cookie Layer-3 now routes through route_and_check instead of bespoke code, so cookie is the first real router consumer -- proves the architecture end-to-end. Reproduces the validated result via the shared path: FN 159->14, recall 0.13->0.92, precision 0.89 (vs bespoke 12/0.93/0.90 -- within Haiku noise). Tests: 10/10 (router dispatch + build_spec + haiku tier + cookie rewire). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,51 @@
|
||||
"""Prüfer-Router: build_spec aus sensor_classification + method-agnostischer
|
||||
Dispatch. CONTENT/LLM -> Haiku-Sufficiency-Tier (validiert), unbekannte
|
||||
decision_methods -> fail-safe present=None."""
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from compliance.services.checkers.base import DocContext
|
||||
from compliance.services.checkers.router import build_spec, route_and_check
|
||||
|
||||
_ANTHROPIC = "compliance.services.llm_cascade._call_anthropic"
|
||||
|
||||
|
||||
def test_build_spec_content_llm_uses_haiku():
|
||||
s = build_spec("X", {"verification_method": "CONTENT", "decision_method": "LLM"},
|
||||
label="L", criteria=["a", "b"])
|
||||
assert s.verification_method == "CONTENT" and s.decision_method == "LLM"
|
||||
assert s.extra.get("judge") == "haiku"
|
||||
assert s.paraphrases == ["a", "b"]
|
||||
|
||||
|
||||
def test_build_spec_embedding_no_haiku():
|
||||
s = build_spec("X", {"verification_method": "CONTENT", "decision_method": "EMBEDDING"})
|
||||
assert s.extra.get("judge") is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_route_unknown_decision_is_failsafe():
|
||||
s = build_spec("X", {"verification_method": "BEHAVIOR", "decision_method": "PLAYWRIGHT"})
|
||||
r = await route_and_check(s, DocContext(text="x" * 200))
|
||||
assert r.present is None and "no_checker" in r.source
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_route_content_llm_haiku_fehlt():
|
||||
s = build_spec("X", {"verification_method": "CONTENT", "decision_method": "LLM"},
|
||||
label="Speicherdauer", criteria=["Höchstdauer pro Kategorie"])
|
||||
fake = AsyncMock(return_value='{"erfuellt": false, "confidence": 0.9, "begruendung": "fehlt"}')
|
||||
with patch(_ANTHROPIC, new=fake):
|
||||
r = await route_and_check(s, DocContext(text="Wir nutzen Cookies. " * 30))
|
||||
assert r.present is False and r.source == "haiku"
|
||||
assert fake.call_count >= 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_route_content_llm_haiku_erfuellt():
|
||||
s = build_spec("X", {"verification_method": "CONTENT", "decision_method": "LLM"},
|
||||
label="L", criteria=["x"])
|
||||
fake = AsyncMock(return_value='{"erfuellt": true, "confidence": 0.8}')
|
||||
with patch(_ANTHROPIC, new=fake):
|
||||
r = await route_and_check(s, DocContext(text="text " * 40))
|
||||
assert r.present is True
|
||||
@@ -8,6 +8,7 @@ from compliance.services.specialist_agents.cookie_policy._sufficiency_judge impo
|
||||
)
|
||||
|
||||
_ANTHROPIC = "compliance.services.llm_cascade._call_anthropic"
|
||||
_DOC = "Volltext der Cookie-Richtlinie mit ausreichend Inhalt. " * 4
|
||||
|
||||
|
||||
def _r(cid, source, passed=True):
|
||||
@@ -20,7 +21,7 @@ async def test_rescued_unpassed_when_judge_fehlt():
|
||||
results = [_r("A", "keyword+embedding")]
|
||||
fake = AsyncMock(return_value='{"erfuellt": false, "confidence": 0.9, "begruendung": "fehlt"}')
|
||||
with patch(_ANTHROPIC, new=fake):
|
||||
n = await judge_rescued("text", results)
|
||||
n = await judge_rescued(_DOC, results)
|
||||
assert n == 1
|
||||
assert results[0]["passed"] is False
|
||||
assert "+llm_failed" in results[0]["source"]
|
||||
@@ -31,7 +32,7 @@ async def test_rescued_kept_when_judge_erfuellt():
|
||||
results = [_r("A", "keyword+embedding")]
|
||||
fake = AsyncMock(return_value='{"erfuellt": true, "confidence": 0.9}')
|
||||
with patch(_ANTHROPIC, new=fake):
|
||||
n = await judge_rescued("text", results)
|
||||
n = await judge_rescued(_DOC, results)
|
||||
assert n == 0
|
||||
assert results[0]["passed"] is True
|
||||
|
||||
@@ -42,7 +43,7 @@ async def test_keyword_pass_not_judged():
|
||||
results = [_r("A", "keyword")]
|
||||
fake = AsyncMock(return_value='{"erfuellt": false}')
|
||||
with patch(_ANTHROPIC, new=fake):
|
||||
n = await judge_rescued("text", results)
|
||||
n = await judge_rescued(_DOC, results)
|
||||
assert n == 0
|
||||
assert results[0]["passed"] is True
|
||||
assert fake.call_count == 0
|
||||
@@ -53,7 +54,7 @@ async def test_boost_rescue_is_judged():
|
||||
results = [_r("A", "keyword+regex_boost")]
|
||||
fake = AsyncMock(return_value='{"erfuellt": false}')
|
||||
with patch(_ANTHROPIC, new=fake):
|
||||
n = await judge_rescued("text", results)
|
||||
n = await judge_rescued(_DOC, results)
|
||||
assert n == 1 and results[0]["passed"] is False
|
||||
|
||||
|
||||
@@ -63,5 +64,5 @@ async def test_failed_controls_ignored():
|
||||
results = [_r("A", "keyword+embedding", passed=False)]
|
||||
fake = AsyncMock(return_value='{"erfuellt": false}')
|
||||
with patch(_ANTHROPIC, new=fake):
|
||||
n = await judge_rescued("text", results)
|
||||
n = await judge_rescued(_DOC, results)
|
||||
assert n == 0 and fake.call_count == 0
|
||||
|
||||
Reference in New Issue
Block a user