feat(checkers): platform router + Haiku sufficiency tier; cookie is first consumer
Generalise "Embedding finds, Claude decides" into the shared Pruefer-Library: - router.route_and_check dispatches control -> sensor_classification -> Checker. - build_spec reads sensor_classification (CONTENT/LLM -> judge=haiku, the validated sufficiency tier; the Qwen-first cascade is disproven for sufficiency). - LLMChecker gains a Haiku-direct tier (reuses the validated deep_check prompt). - Cookie Layer-3 now routes through route_and_check instead of bespoke code, so cookie is the first real router consumer -- proves the architecture end-to-end. Reproduces the validated result via the shared path: FN 159->14, recall 0.13->0.92, precision 0.89 (vs bespoke 12/0.93/0.90 -- within Haiku noise). Tests: 10/10 (router dispatch + build_spec + haiku tier + cookie rewire). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+12
-14
@@ -32,32 +32,30 @@ async def judge_rescued(text: str, results: list[dict[str, Any]]) -> int:
|
||||
Nimmt passed zurueck, wenn der Judge die Pflicht als NICHT erfuellt sieht.
|
||||
Gibt die Anzahl zurueckgenommener (korrigierter) Rescues zurueck.
|
||||
"""
|
||||
from compliance.services.llm_cascade import _call_anthropic
|
||||
from compliance.services.specialist_agents.dse.deep_check import (
|
||||
_JUDGE_SYS, _build_user, _parse,
|
||||
)
|
||||
# Über den gemeinsamen Prüfer-Router (kein Cookie-Sonderfall mehr):
|
||||
# CONTENT/LLM → build_spec setzt judge='haiku' → LLMChecker (validierter
|
||||
# Sufficiency-Judge). Damit ist Cookie der erste echte Router-Consumer.
|
||||
from compliance.services.checkers.base import DocContext
|
||||
from compliance.services.checkers.router import build_spec, route_and_check
|
||||
|
||||
candidates = [r for r in results if _is_rescued(r)]
|
||||
if not candidates:
|
||||
return 0
|
||||
doc = DocContext(text=text)
|
||||
sc = {"verification_method": "CONTENT", "decision_method": "LLM"}
|
||||
corrected = 0
|
||||
for r in candidates:
|
||||
crit = r.get("_pass_criteria") or [r.get("label") or r.get("hint") or ""]
|
||||
if not isinstance(crit, list):
|
||||
crit = [str(crit)]
|
||||
title = r.get("label") or r.get("hint") or r.get("control_id") or ""
|
||||
user = _build_user(text, title, crit)
|
||||
verdict = None
|
||||
for _ in range(2): # retry on transient/malformed
|
||||
p = _parse(await _call_anthropic(_JUDGE_SYS, user, max_tokens=400))
|
||||
if p:
|
||||
verdict = p
|
||||
break
|
||||
if verdict is not None and verdict.get("erfuellt") is False:
|
||||
label = r.get("label") or r.get("hint") or r.get("control_id") or ""
|
||||
spec = build_spec(r.get("control_id") or "", sc, label=label, criteria=crit)
|
||||
res = await route_and_check(spec, doc)
|
||||
if res.present is False:
|
||||
r["passed"] = False
|
||||
r["source"] = (r.get("source") or "") + "+llm_failed"
|
||||
r["matched_text"] = "[layer-3 sufficiency-judge: nicht erfuellt]"
|
||||
r["_judge_reason"] = (verdict.get("begruendung") or "")[:200]
|
||||
r["_judge_reason"] = (res.evidence or "")[:200]
|
||||
corrected += 1
|
||||
if corrected:
|
||||
logger.info("cookie layer-3 sufficiency-judge: %d/%d rescues zurueckgenommen",
|
||||
|
||||
Reference in New Issue
Block a user