feat(ai-sdk): offline dedup-candidate proposer + deterministic GT wall (P2 slice 1)
First thin slice of the offline library-improvement proposer. DEV-TIME ONLY, propose-only — it never mutates the pattern library or the runtime. - FindDedupCandidates (proposer_dedup.go): structural near-duplicate detection over the fired patterns (category + measure/zone/scenario overlap). Bakes in the P1 lesson: only same-category pairs compare, and pairs with different operational states are never proposed (normal-operation vs maintenance are legitimately distinct, e.g. HP011 vs HP077). - ScreenSupersession (proposer_screen.go): the wall. A proposal is safe only if (1) dropping the hazard does not reduce GT recall AND (2) keep/drop do not credit DIFFERENT GT entries. Check 2 catches distinct hazards that merely share measures (HP2201 hot surface GT 1.3 vs HP2202 hot ware GT 1.4) which recall alone would wave through. On real warewashing output: 3 candidates -> 1 BLOCKED (distinct GT), 2 RECALL-SAFE for human/LLM review (the update + winding/friction near-dupes). Nothing auto-applied. All 3 GTs unaffected (read-only). The LLM judgement and a CLI/file queue are slice 2. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,61 @@
|
||||
package iace
|
||||
|
||||
import "github.com/google/uuid"
|
||||
|
||||
// ScreenResult is the deterministic GT verdict for one proposed supersession.
|
||||
type ScreenResult struct {
|
||||
RecallBefore float64 `json:"recall_before"`
|
||||
RecallAfter float64 `json:"recall_after"`
|
||||
KeepGT string `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any)
|
||||
DropGT string `json:"drop_gt,omitempty"` // GT entry the drop credits (if any)
|
||||
DistinctGT bool `json:"distinct_gt"` // keep & drop credit DIFFERENT GT entries -> distinct hazards
|
||||
Safe bool `json:"safe"` // recall preserved AND not distinct
|
||||
}
|
||||
|
||||
// ScreenSupersession is the WALL between "propose" and "decide". A proposal is
|
||||
// safe only if BOTH deterministic checks pass:
|
||||
//
|
||||
// 1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed
|
||||
// — otherwise the drop is load-bearing for GT coverage.
|
||||
// 2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone
|
||||
// is necessary but not sufficient: two genuinely distinct hazards that share
|
||||
// the same measures (e.g. hot boiler surface vs hot ware on unloading) keep
|
||||
// recall at 100% when one is dropped, yet must NOT be merged. If keep and
|
||||
// drop each match a different GT entry, they are distinct.
|
||||
//
|
||||
// Whatever survives both is still only RECALL-SAFE — a candidate for a human (and
|
||||
// in slice 2, an LLM) to confirm semantically. Deterministic; reuses
|
||||
// CompareBenchmark; touches neither the library nor the runtime.
|
||||
func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult {
|
||||
before := CompareBenchmark(gt, hazards, mits)
|
||||
|
||||
gtOf := map[string]string{}
|
||||
for _, p := range before.MatchedPairs {
|
||||
gtOf[p.EngineHazard.Name] = p.GTEntry.Nr
|
||||
}
|
||||
keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName]
|
||||
distinct := keepGT != "" && dropGT != "" && keepGT != dropGT
|
||||
|
||||
kept := make([]Hazard, 0, len(hazards))
|
||||
dropped := map[uuid.UUID]bool{}
|
||||
for _, h := range hazards {
|
||||
if h.Name == dropHazardName {
|
||||
dropped[h.ID] = true
|
||||
continue
|
||||
}
|
||||
kept = append(kept, h)
|
||||
}
|
||||
keptMits := make([]Mitigation, 0, len(mits))
|
||||
for _, m := range mits {
|
||||
if !dropped[m.HazardID] {
|
||||
keptMits = append(keptMits, m)
|
||||
}
|
||||
}
|
||||
after := CompareBenchmark(gt, kept, keptMits)
|
||||
|
||||
return ScreenResult{
|
||||
RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore,
|
||||
KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct,
|
||||
Safe: after.CoverageScore >= before.CoverageScore && !distinct,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user