8674b2cd9a
First thin slice of the offline library-improvement proposer. DEV-TIME ONLY, propose-only — it never mutates the pattern library or the runtime. - FindDedupCandidates (proposer_dedup.go): structural near-duplicate detection over the fired patterns (category + measure/zone/scenario overlap). Bakes in the P1 lesson: only same-category pairs compare, and pairs with different operational states are never proposed (normal-operation vs maintenance are legitimately distinct, e.g. HP011 vs HP077). - ScreenSupersession (proposer_screen.go): the wall. A proposal is safe only if (1) dropping the hazard does not reduce GT recall AND (2) keep/drop do not credit DIFFERENT GT entries. Check 2 catches distinct hazards that merely share measures (HP2201 hot surface GT 1.3 vs HP2202 hot ware GT 1.4) which recall alone would wave through. On real warewashing output: 3 candidates -> 1 BLOCKED (distinct GT), 2 RECALL-SAFE for human/LLM review (the update + winding/friction near-dupes). Nothing auto-applied. All 3 GTs unaffected (read-only). The LLM judgement and a CLI/file queue are slice 2. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
62 lines
2.5 KiB
Go
62 lines
2.5 KiB
Go
package iace
|
|
|
|
import "github.com/google/uuid"
|
|
|
|
// ScreenResult is the deterministic GT verdict for one proposed supersession.
|
|
type ScreenResult struct {
|
|
RecallBefore float64 `json:"recall_before"`
|
|
RecallAfter float64 `json:"recall_after"`
|
|
KeepGT string `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any)
|
|
DropGT string `json:"drop_gt,omitempty"` // GT entry the drop credits (if any)
|
|
DistinctGT bool `json:"distinct_gt"` // keep & drop credit DIFFERENT GT entries -> distinct hazards
|
|
Safe bool `json:"safe"` // recall preserved AND not distinct
|
|
}
|
|
|
|
// ScreenSupersession is the WALL between "propose" and "decide". A proposal is
|
|
// safe only if BOTH deterministic checks pass:
|
|
//
|
|
// 1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed
|
|
// — otherwise the drop is load-bearing for GT coverage.
|
|
// 2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone
|
|
// is necessary but not sufficient: two genuinely distinct hazards that share
|
|
// the same measures (e.g. hot boiler surface vs hot ware on unloading) keep
|
|
// recall at 100% when one is dropped, yet must NOT be merged. If keep and
|
|
// drop each match a different GT entry, they are distinct.
|
|
//
|
|
// Whatever survives both is still only RECALL-SAFE — a candidate for a human (and
|
|
// in slice 2, an LLM) to confirm semantically. Deterministic; reuses
|
|
// CompareBenchmark; touches neither the library nor the runtime.
|
|
func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult {
|
|
before := CompareBenchmark(gt, hazards, mits)
|
|
|
|
gtOf := map[string]string{}
|
|
for _, p := range before.MatchedPairs {
|
|
gtOf[p.EngineHazard.Name] = p.GTEntry.Nr
|
|
}
|
|
keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName]
|
|
distinct := keepGT != "" && dropGT != "" && keepGT != dropGT
|
|
|
|
kept := make([]Hazard, 0, len(hazards))
|
|
dropped := map[uuid.UUID]bool{}
|
|
for _, h := range hazards {
|
|
if h.Name == dropHazardName {
|
|
dropped[h.ID] = true
|
|
continue
|
|
}
|
|
kept = append(kept, h)
|
|
}
|
|
keptMits := make([]Mitigation, 0, len(mits))
|
|
for _, m := range mits {
|
|
if !dropped[m.HazardID] {
|
|
keptMits = append(keptMits, m)
|
|
}
|
|
}
|
|
after := CompareBenchmark(gt, kept, keptMits)
|
|
|
|
return ScreenResult{
|
|
RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore,
|
|
KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct,
|
|
Safe: after.CoverageScore >= before.CoverageScore && !distinct,
|
|
}
|
|
}
|