feat(iace): add distance dimension to GT benchmark

CompareBenchmark now also compares the engine's numeric dimensions (mm gaps, mm/s speeds) against the professional's GT measures: parses distance tokens from both sides (German thousands/decimal aware), reports matched / gt_only (gaps) / engine_only + an agreement %. Surfaces as result.distances on the existing benchmark endpoint. Deterministic, no LLM. On the GT-derived seed sessions it mainly guards DRIFT; its real value is new sessions. Real-GT test pins that the engine covers the Bremse (250 mm/s, 250/850 mm) and Kistenhub (25/120 mm, 150/75 mm/s) headline dimensions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-11 14:59:47 +02:00
parent b63f49344a
commit 0d7194ef89
4 changed files with 252 additions and 22 deletions
@@ -149,6 +149,8 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
 		coverage = float64(len(matched)) / float64(len(gt.Entries))
 	}

+	dist := CompareSessionDistances(gt, mitigations)
+
 	return &BenchmarkResult{
 		CoverageScore:     coverage,
 		MeasureCoverage:   measCov,
@@ -159,6 +161,7 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
 		ExtraInEngine:     extra,
 		CategoryBreakdown: breakdown,
 		RiskRankPairs:     rankPairs,
+		Distances:         &dist,
 	}
 }

@@ -439,9 +442,9 @@ func buildRiskRankPairs(matched []HazardMatchPair) []RiskRankPair {

 	// Sort by GT risk descending to get GT rank
 	type ranked struct {
-		idx     int
-		gtRisk  int
-		name    string
+		idx    int
+		gtRisk int
+		name   string
 	}
 	items := make([]ranked, len(matched))
 	for i, m := range matched {