0ce4794767
Adds the semantic judgement layer on top of the slice-1 detector + GT wall. DEV-TIME, propose-only — nothing mutates the library or runtime. - CandidateJudge interface with two implementations: HeuristicJudge (deterministic default/fallback, used in tests) and LLMJudge (offline, over the shared llm.ProviderRegistry via the LLMCompleter adapter). LLMJudge degrades to "uncertain" on any transport/parse error — it can never break a run. - BuildJudgePrompt: the ISO 12100 same-vs-distinct prompt, unit-tested deterministically even though the call is not. - RenderProposalQueue: markdown human-review queue with a suggested action per candidate (supersede / keep both / needs review). On real warewashing output the heuristic punts to "uncertain — needs the LLM judge" for exactly the two recall-safe near-dupes (HP807/HP033 update, HP101/HP096 winding-vs-friction), making the LLM's role explicit. All 3 GTs unaffected (read-only). Live qwen wiring + a CLI/file queue are slice 3. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
48 lines
1.9 KiB
Go
48 lines
1.9 KiB
Go
package iace
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// RenderProposalQueue turns judged dedup proposals into the human-review queue
|
|
// (markdown). Deterministic. Nothing here applies a change — every entry is a
|
|
// suggestion for a human to confirm, edit, commit, and pin with a GT case.
|
|
func RenderProposalQueue(machine string, proposals []JudgedProposal) string {
|
|
var b strings.Builder
|
|
fmt.Fprintf(&b, "# Dedup proposal queue — %s\n\n", machine)
|
|
fmt.Fprintf(&b, "%d candidate(s) survived the deterministic GT wall. Propose-only — nothing is applied automatically.\n\n", len(proposals))
|
|
|
|
for i, p := range proposals {
|
|
c := p.Candidate
|
|
fmt.Fprintf(&b, "## %d. keep %s ⊃ drop %s [%s → %s (%s)]\n",
|
|
i+1, c.KeepPattern, c.DropPattern, p.Judge, p.Verdict, p.Confidence)
|
|
fmt.Fprintf(&b, "- category %s · score %.2f (measures %.0f%%, zone %.0f%%, scenario %.0f%%)\n",
|
|
c.Category, c.Score, c.MeasureJaccard*100, c.ZoneJaccard*100, c.ScenarioJaccard*100)
|
|
fmt.Fprintf(&b, "- GT recall %.1f%% → %.1f%% when %s is dropped (wall: %s)\n",
|
|
p.Screen.RecallBefore*100, p.Screen.RecallAfter*100, c.DropPattern, wallNote(p.Screen))
|
|
fmt.Fprintf(&b, "- keep: %s\n- drop: %s\n", c.KeepHazardName, c.DropName)
|
|
fmt.Fprintf(&b, "- judge rationale: %s\n", p.Rationale)
|
|
fmt.Fprintf(&b, "- suggested action: %s\n\n", suggestedAction(p))
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
func wallNote(s ScreenResult) string {
|
|
if s.DistinctGT {
|
|
return fmt.Sprintf("distinct GT %s vs %s", s.KeepGT, s.DropGT)
|
|
}
|
|
return "recall-safe"
|
|
}
|
|
|
|
func suggestedAction(p JudgedProposal) string {
|
|
switch p.Verdict {
|
|
case VerdictDuplicate:
|
|
return fmt.Sprintf("add %s to a supersession set, then a human confirms + commits + pins a GT case", p.Candidate.DropPattern)
|
|
case VerdictDistinct:
|
|
return "keep both — judge considers them distinct hazards"
|
|
default:
|
|
return "needs human (or higher-confidence LLM) review — no automatic action"
|
|
}
|
|
}
|