package iace import "github.com/google/uuid" // ScreenResult is the deterministic GT verdict for one proposed supersession. type ScreenResult struct { RecallBefore float64 `json:"recall_before"` RecallAfter float64 `json:"recall_after"` KeepGT string `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any) DropGT string `json:"drop_gt,omitempty"` // GT entry the drop credits (if any) DistinctGT bool `json:"distinct_gt"` // keep & drop credit DIFFERENT GT entries -> distinct hazards Safe bool `json:"safe"` // recall preserved AND not distinct } // ScreenSupersession is the WALL between "propose" and "decide". A proposal is // safe only if BOTH deterministic checks pass: // // 1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed // — otherwise the drop is load-bearing for GT coverage. // 2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone // is necessary but not sufficient: two genuinely distinct hazards that share // the same measures (e.g. hot boiler surface vs hot ware on unloading) keep // recall at 100% when one is dropped, yet must NOT be merged. If keep and // drop each match a different GT entry, they are distinct. // // Whatever survives both is still only RECALL-SAFE — a candidate for a human (and // in slice 2, an LLM) to confirm semantically. Deterministic; reuses // CompareBenchmark; touches neither the library nor the runtime. func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult { before := CompareBenchmark(gt, hazards, mits) gtOf := map[string]string{} for _, p := range before.MatchedPairs { gtOf[p.EngineHazard.Name] = p.GTEntry.Nr } keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName] distinct := keepGT != "" && dropGT != "" && keepGT != dropGT kept := make([]Hazard, 0, len(hazards)) dropped := map[uuid.UUID]bool{} for _, h := range hazards { if h.Name == dropHazardName { dropped[h.ID] = true continue } kept = append(kept, h) } keptMits := make([]Mitigation, 0, len(mits)) for _, m := range mits { if !dropped[m.HazardID] { keptMits = append(keptMits, m) } } after := CompareBenchmark(gt, kept, keptMits) return ScreenResult{ RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore, KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct, Safe: after.CoverageScore >= before.CoverageScore && !distinct, } }