feat(iace): add distance dimension to GT benchmark
CompareBenchmark now also compares the engine's numeric dimensions (mm gaps, mm/s speeds) against the professional's GT measures: parses distance tokens from both sides (German thousands/decimal aware), reports matched / gt_only (gaps) / engine_only + an agreement %. Surfaces as result.distances on the existing benchmark endpoint. Deterministic, no LLM. On the GT-derived seed sessions it mainly guards DRIFT; its real value is new sessions. Real-GT test pins that the engine covers the Bremse (250 mm/s, 250/850 mm) and Kistenhub (25/120 mm, 150/75 mm/s) headline dimensions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -149,6 +149,8 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
|
||||
coverage = float64(len(matched)) / float64(len(gt.Entries))
|
||||
}
|
||||
|
||||
dist := CompareSessionDistances(gt, mitigations)
|
||||
|
||||
return &BenchmarkResult{
|
||||
CoverageScore: coverage,
|
||||
MeasureCoverage: measCov,
|
||||
@@ -159,6 +161,7 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
|
||||
ExtraInEngine: extra,
|
||||
CategoryBreakdown: breakdown,
|
||||
RiskRankPairs: rankPairs,
|
||||
Distances: &dist,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -439,9 +442,9 @@ func buildRiskRankPairs(matched []HazardMatchPair) []RiskRankPair {
|
||||
|
||||
// Sort by GT risk descending to get GT rank
|
||||
type ranked struct {
|
||||
idx int
|
||||
gtRisk int
|
||||
name string
|
||||
idx int
|
||||
gtRisk int
|
||||
name string
|
||||
}
|
||||
items := make([]ranked, len(matched))
|
||||
for i, m := range matched {
|
||||
|
||||
Reference in New Issue
Block a user