feat(iace): add distance dimension to GT benchmark

CompareBenchmark now also compares the engine's numeric dimensions (mm gaps, mm/s speeds) against the professional's GT measures: parses distance tokens from both sides (German thousands/decimal aware), reports matched / gt_only (gaps) / engine_only + an agreement %. Surfaces as result.distances on the existing benchmark endpoint. Deterministic, no LLM. On the GT-derived seed sessions it mainly guards DRIFT; its real value is new sessions. Real-GT test pins that the engine covers the Bremse (250 mm/s, 250/850 mm) and Kistenhub (25/120 mm, 150/75 mm/s) headline dimensions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-11 14:59:47 +02:00
parent b63f49344a
commit 0d7194ef89
4 changed files with 252 additions and 22 deletions
@@ -0,0 +1,137 @@
+package iace
+
+import (
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// Distance benchmark dimension: does the engine suggest the same numeric
+// dimensions (mm gaps, mm/s speeds) as the professional (GT) for a session?
+// The engine measures are partly GT-derived, so on the seed sessions this
+// mainly guards DRIFT; its real value is NEW sessions, where the engine has not
+// been fitted to the assessor. Pure + deterministic (no LLM) — parses prose.
+
+// DistanceToken is one numeric dimension parsed from measure text.
+type DistanceToken struct {
+	Value float64 `json:"value"`
+	Unit  string  `json:"unit"` // "mm" | "mm/s"
+	Raw   string  `json:"raw"`
+}
+
+// DistanceComparison reports engine vs GT dimensional coverage for one session.
+type DistanceComparison struct {
+	GTCount      int             `json:"gt_count"`
+	MatchedCount int             `json:"matched_count"`
+	AgreementPct float64         `json:"agreement_pct"`
+	Matched      []DistanceToken `json:"matched"`
+	GTOnly       []DistanceToken `json:"gt_only"`     // Fachmann-Maße ohne Engine-Entsprechung (Lücken)
+	EngineOnly   []DistanceToken `json:"engine_only"` // Engine-Maße ohne GT-Entsprechung
+}
+
+// matches a number (incl. German thousands "1.600" / decimal "2,5") + mm[/s].
+var distanceRe = regexp.MustCompile(`(\d{1,3}(?:\.\d{3})+|\d+(?:[,.]\d+)?)\s*mm(/s)?`)
+var thousandsRe = regexp.MustCompile(`^\d{1,3}(\.\d{3})+$`)
+
+func normalizeNumber(s string) float64 {
+	if thousandsRe.MatchString(s) {
+		s = strings.ReplaceAll(s, ".", "") // German thousands separator
+	} else {
+		s = strings.ReplaceAll(s, ",", ".") // German decimal separator
+	}
+	v, _ := strconv.ParseFloat(s, 64)
+	return v
+}
+
+// extractDistanceTokens pulls the distinct (value,unit) dimensions out of prose.
+func extractDistanceTokens(texts []string) []DistanceToken {
+	seen := map[string]bool{}
+	var out []DistanceToken
+	for _, t := range texts {
+		for _, m := range distanceRe.FindAllStringSubmatch(t, -1) {
+			unit := "mm"
+			if m[2] == "/s" {
+				unit = "mm/s"
+			}
+			val := normalizeNumber(m[1])
+			if val == 0 {
+				continue
+			}
+			key := unit + ":" + strconv.FormatFloat(val, 'f', 1, 64)
+			if seen[key] {
+				continue
+			}
+			seen[key] = true
+			out = append(out, DistanceToken{Value: val, Unit: unit, Raw: strings.TrimSpace(m[0])})
+		}
+	}
+	return out
+}
+
+func tokensMatch(a, b DistanceToken) bool {
+	if a.Unit != b.Unit {
+		return false
+	}
+	d := a.Value - b.Value
+	if d < 0 {
+		d = -d
+	}
+	return d < 0.05
+}
+
+// CompareDistances matches the professional's dimensions (gtTexts) against the
+// engine's (engineTexts) and reports coverage + the gaps in both directions.
+func CompareDistances(gtTexts, engineTexts []string) DistanceComparison {
+	gt := extractDistanceTokens(gtTexts)
+	eng := extractDistanceTokens(engineTexts)
+	res := DistanceComparison{
+		GTCount:    len(gt),
+		Matched:    []DistanceToken{},
+		GTOnly:     []DistanceToken{},
+		EngineOnly: []DistanceToken{},
+	}
+	engMatched := make([]bool, len(eng))
+	for _, g := range gt {
+		found := false
+		for i, e := range eng {
+			if !engMatched[i] && tokensMatch(g, e) {
+				found, engMatched[i] = true, true
+				break
+			}
+		}
+		if found {
+			res.MatchedCount++
+			res.Matched = append(res.Matched, g)
+		} else {
+			res.GTOnly = append(res.GTOnly, g)
+		}
+	}
+	for i, e := range eng {
+		if !engMatched[i] {
+			res.EngineOnly = append(res.EngineOnly, e)
+		}
+	}
+	if res.GTCount > 0 {
+		res.AgreementPct = float64(res.MatchedCount) / float64(res.GTCount) * 100
+	}
+	return res
+}
+
+// CompareSessionDistances is the benchmark-facing helper: it pulls the measure
+// prose from the GT entries and the engine mitigations and compares them.
+func CompareSessionDistances(gt *GroundTruth, mitigations []Mitigation) DistanceComparison {
+	var gtTexts []string
+	if gt != nil {
+		for _, e := range gt.Entries {
+			gtTexts = append(gtTexts, e.Measures...)
+			if e.Comment != "" {
+				gtTexts = append(gtTexts, e.Comment)
+			}
+		}
+	}
+	var engTexts []string
+	for _, m := range mitigations {
+		engTexts = append(engTexts, m.Name, m.Description)
+	}
+	return CompareDistances(gtTexts, engTexts)
+}