feat(checkers): platform router + Haiku sufficiency tier; cookie is first consumer
Generalise "Embedding finds, Claude decides" into the shared Pruefer-Library: - router.route_and_check dispatches control -> sensor_classification -> Checker. - build_spec reads sensor_classification (CONTENT/LLM -> judge=haiku, the validated sufficiency tier; the Qwen-first cascade is disproven for sufficiency). - LLMChecker gains a Haiku-direct tier (reuses the validated deep_check prompt). - Cookie Layer-3 now routes through route_and_check instead of bespoke code, so cookie is the first real router consumer -- proves the architecture end-to-end. Reproduces the validated result via the shared path: FN 159->14, recall 0.13->0.92, precision 0.89 (vs bespoke 12/0.93/0.90 -- within Haiku noise). Tests: 10/10 (router dispatch + build_spec + haiku tier + cookie rewire). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -45,6 +45,11 @@ class LLMChecker:
|
||||
text = doc.text or ""
|
||||
if len(text) < 50:
|
||||
return CheckResult(present=None, source="llm")
|
||||
# decision_method=LLM mit judge='haiku': Sufficiency-Pfad (validiert
|
||||
# P0.89/R0.91). Der Qwen-first-Cascade ist als Sufficiency-Judge
|
||||
# widerlegt -> hier Haiku direkt, kriteriengeführte Subsumtion.
|
||||
if (ctrl.extra or {}).get("judge") == "haiku":
|
||||
return await self._haiku(ctrl, text)
|
||||
secs = _sections(text)
|
||||
if ctrl.topic_regex:
|
||||
rel = [s for s in secs if re.search(ctrl.topic_regex, s, re.I)][:6] or secs[:6]
|
||||
@@ -71,3 +76,31 @@ class LLMChecker:
|
||||
except Exception as e:
|
||||
logger.info("llm checker fail %s: %s", ctrl.control_id, str(e)[:80])
|
||||
return CheckResult(present=None, source="error")
|
||||
|
||||
async def _haiku(self, ctrl: ControlSpec, text: str) -> CheckResult:
|
||||
"""Sufficiency via Haiku direkt (validierter Judge). Kriteriengeführt:
|
||||
die Rechts-Elemente stehen in ctrl.paraphrases; wiederverwendet den
|
||||
validierten deep_check-Sufficiency-Prompt."""
|
||||
try:
|
||||
from compliance.services.llm_cascade import _call_anthropic
|
||||
from compliance.services.specialist_agents.dse.deep_check import (
|
||||
_JUDGE_SYS, _build_user, _parse as _parse_judge,
|
||||
)
|
||||
crit = ctrl.paraphrases or [ctrl.label or ctrl.control_id]
|
||||
user = _build_user(text, ctrl.label or ctrl.control_id, crit)
|
||||
obj = None
|
||||
for _ in range(2):
|
||||
obj = _parse_judge(await _call_anthropic(_JUDGE_SYS, user, max_tokens=400))
|
||||
if obj:
|
||||
break
|
||||
if not obj:
|
||||
return CheckResult(present=None, source="haiku")
|
||||
return CheckResult(
|
||||
present=bool(obj.get("erfuellt")),
|
||||
evidence=(obj.get("begruendung") or "")[:120],
|
||||
confidence=float(obj.get("confidence") or 0.0),
|
||||
source="haiku",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.info("llm haiku checker fail %s: %s", ctrl.control_id, str(e)[:80])
|
||||
return CheckResult(present=None, source="error")
|
||||
|
||||
Reference in New Issue
Block a user