feat(iace): add distance dimension to GT benchmark

CompareBenchmark now also compares the engine's numeric dimensions (mm gaps, mm/s speeds) against the professional's GT measures: parses distance tokens from both sides (German thousands/decimal aware), reports matched / gt_only (gaps) / engine_only + an agreement %. Surfaces as result.distances on the existing benchmark endpoint. Deterministic, no LLM. On the GT-derived seed sessions it mainly guards DRIFT; its real value is new sessions. Real-GT test pins that the engine covers the Bremse (250 mm/s, 250/850 mm) and Kistenhub (25/120 mm, 150/75 mm/s) headline dimensions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-11 14:59:47 +02:00
parent b63f49344a
commit 0d7194ef89
4 changed files with 252 additions and 22 deletions
@@ -149,6 +149,8 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
 		coverage = float64(len(matched)) / float64(len(gt.Entries))
 	}
 	dist := CompareSessionDistances(gt, mitigations)
 	return &BenchmarkResult{
 		CoverageScore:     coverage,
 		MeasureCoverage:   measCov,
@@ -159,6 +161,7 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
 		ExtraInEngine:     extra,
 		CategoryBreakdown: breakdown,
 		RiskRankPairs:     rankPairs,
 		Distances:         &dist,
 	}
 }
@@ -439,9 +442,9 @@ func buildRiskRankPairs(matched []HazardMatchPair) []RiskRankPair {
 	// Sort by GT risk descending to get GT rank
 	type ranked struct {
-		idx     int
+		idx    int
-		gtRisk  int
+		gtRisk int
-		name    string
+		name   string
 	}
 	items := make([]ranked, len(matched))
 	for i, m := range matched {
@@ -16,23 +16,23 @@ type GroundTruth struct {
 // GroundTruthEntry represents a single hazard from a professional risk assessment.
 type GroundTruthEntry struct {
-	Nr                  string           `json:"nr"`
+	Nr                    string            `json:"nr"`
-	HazardGroup         string           `json:"hazard_group"`
+	HazardGroup           string            `json:"hazard_group"`
-	HazardGroupApplicable bool           `json:"hazard_group_applicable"`
+	HazardGroupApplicable bool              `json:"hazard_group_applicable"`
-	HazardSubgroup      string           `json:"hazard_subgroup"`
+	HazardSubgroup        string            `json:"hazard_subgroup"`
-	HazardType          string           `json:"hazard_type"`
+	HazardType            string            `json:"hazard_type"`
-	HazardCause         string           `json:"hazard_cause"`
+	HazardCause           string            `json:"hazard_cause"`
-	LifecyclePhases     []string         `json:"lifecycle_phases"`
+	LifecyclePhases       []string          `json:"lifecycle_phases"`
-	ComponentZone       string           `json:"component_zone"`
+	ComponentZone         string            `json:"component_zone"`
-	RiskIn              GTRisk           `json:"risk_in"`
+	RiskIn                GTRisk            `json:"risk_in"`
-	PLr                 *GTPLr           `json:"plr,omitempty"`
+	PLr                   *GTPLr            `json:"plr,omitempty"`
-	Measures            []string         `json:"measures"`
+	Measures              []string          `json:"measures"`
-	MeasureType         string           `json:"measure_type"`
+	MeasureType           string            `json:"measure_type"`
-	RiskOut             GTRisk           `json:"risk_out"`
+	RiskOut               GTRisk            `json:"risk_out"`
-	NormReferences      []string         `json:"norm_references"`
+	NormReferences        []string          `json:"norm_references"`
-	Sufficient          bool             `json:"sufficient"`
+	Sufficient            bool              `json:"sufficient"`
-	Comment             string           `json:"comment,omitempty"`
+	Comment               string            `json:"comment,omitempty"`
-	ReductionSteps      []GTReductionStep `json:"reduction_steps,omitempty"`
+	ReductionSteps        []GTReductionStep `json:"reduction_steps,omitempty"`
 }
 // GTRisk represents the EN 62061 additive risk: R = (F + W + P) * S.
@@ -81,8 +81,10 @@ type BenchmarkResult struct {
 	CategoryBreakdown []CategoryScore    `json:"category_breakdown"`
 	RiskRankPairs     []RiskRankPair     `json:"risk_rank_pairs"`
 	// Risk-number comparison (tool vs professional) per matched hazard + aggregate.
-	RiskComparison    []RiskComparisonPair `json:"risk_comparison,omitempty"`
+	RiskComparison []RiskComparisonPair `json:"risk_comparison,omitempty"`
-	RiskAgreement     RiskAgreement        `json:"risk_agreement"`
+	RiskAgreement  RiskAgreement        `json:"risk_agreement"`
 	// Dimensional comparison: do the engine's mm/mm-s values match the GT's?
 	Distances *DistanceComparison `json:"distances,omitempty"`
 }
 // HazardMatchPair links a GT entry to an engine hazard.
@@ -0,0 +1,137 @@
 package iace
 import (
 	"regexp"
 	"strconv"
 	"strings"
 )
 // Distance benchmark dimension: does the engine suggest the same numeric
 // dimensions (mm gaps, mm/s speeds) as the professional (GT) for a session?
 // The engine measures are partly GT-derived, so on the seed sessions this
 // mainly guards DRIFT; its real value is NEW sessions, where the engine has not
 // been fitted to the assessor. Pure + deterministic (no LLM) — parses prose.
 // DistanceToken is one numeric dimension parsed from measure text.
 type DistanceToken struct {
 	Value float64 `json:"value"`
 	Unit  string  `json:"unit"` // "mm" | "mm/s"
 	Raw   string  `json:"raw"`
 }
 // DistanceComparison reports engine vs GT dimensional coverage for one session.
 type DistanceComparison struct {
 	GTCount      int             `json:"gt_count"`
 	MatchedCount int             `json:"matched_count"`
 	AgreementPct float64         `json:"agreement_pct"`
 	Matched      []DistanceToken `json:"matched"`
 	GTOnly       []DistanceToken `json:"gt_only"`     // Fachmann-Maße ohne Engine-Entsprechung (Lücken)
 	EngineOnly   []DistanceToken `json:"engine_only"` // Engine-Maße ohne GT-Entsprechung
 }
 // matches a number (incl. German thousands "1.600" / decimal "2,5") + mm[/s].
 var distanceRe = regexp.MustCompile(`(\d{1,3}(?:\.\d{3})+|\d+(?:[,.]\d+)?)\s*mm(/s)?`)
 var thousandsRe = regexp.MustCompile(`^\d{1,3}(\.\d{3})+$`)
 func normalizeNumber(s string) float64 {
 	if thousandsRe.MatchString(s) {
 		s = strings.ReplaceAll(s, ".", "") // German thousands separator
 	} else {
 		s = strings.ReplaceAll(s, ",", ".") // German decimal separator
 	}
 	v, _ := strconv.ParseFloat(s, 64)
 	return v
 }
 // extractDistanceTokens pulls the distinct (value,unit) dimensions out of prose.
 func extractDistanceTokens(texts []string) []DistanceToken {
 	seen := map[string]bool{}
 	var out []DistanceToken
 	for _, t := range texts {
 		for _, m := range distanceRe.FindAllStringSubmatch(t, -1) {
 			unit := "mm"
 			if m[2] == "/s" {
 				unit = "mm/s"
 			}
 			val := normalizeNumber(m[1])
 			if val == 0 {
 				continue
 			}
 			key := unit + ":" + strconv.FormatFloat(val, 'f', 1, 64)
 			if seen[key] {
 				continue
 			}
 			seen[key] = true
 			out = append(out, DistanceToken{Value: val, Unit: unit, Raw: strings.TrimSpace(m[0])})
 		}
 	}
 	return out
 }
 func tokensMatch(a, b DistanceToken) bool {
 	if a.Unit != b.Unit {
 		return false
 	}
 	d := a.Value - b.Value
 	if d < 0 {
 		d = -d
 	}
 	return d < 0.05
 }
 // CompareDistances matches the professional's dimensions (gtTexts) against the
 // engine's (engineTexts) and reports coverage + the gaps in both directions.
 func CompareDistances(gtTexts, engineTexts []string) DistanceComparison {
 	gt := extractDistanceTokens(gtTexts)
 	eng := extractDistanceTokens(engineTexts)
 	res := DistanceComparison{
 		GTCount:    len(gt),
 		Matched:    []DistanceToken{},
 		GTOnly:     []DistanceToken{},
 		EngineOnly: []DistanceToken{},
 	}
 	engMatched := make([]bool, len(eng))
 	for _, g := range gt {
 		found := false
 		for i, e := range eng {
 			if !engMatched[i] && tokensMatch(g, e) {
 				found, engMatched[i] = true, true
 				break
 			}
 		}
 		if found {
 			res.MatchedCount++
 			res.Matched = append(res.Matched, g)
 		} else {
 			res.GTOnly = append(res.GTOnly, g)
 		}
 	}
 	for i, e := range eng {
 		if !engMatched[i] {
 			res.EngineOnly = append(res.EngineOnly, e)
 		}
 	}
 	if res.GTCount > 0 {
 		res.AgreementPct = float64(res.MatchedCount) / float64(res.GTCount) * 100
 	}
 	return res
 }
 // CompareSessionDistances is the benchmark-facing helper: it pulls the measure
 // prose from the GT entries and the engine mitigations and compares them.
 func CompareSessionDistances(gt *GroundTruth, mitigations []Mitigation) DistanceComparison {
 	var gtTexts []string
 	if gt != nil {
 		for _, e := range gt.Entries {
 			gtTexts = append(gtTexts, e.Measures...)
 			if e.Comment != "" {
 				gtTexts = append(gtTexts, e.Comment)
 			}
 		}
 	}
 	var engTexts []string
 	for _, m := range mitigations {
 		engTexts = append(engTexts, m.Name, m.Description)
 	}
 	return CompareDistances(gtTexts, engTexts)
 }
@@ -0,0 +1,88 @@
 package iace
 import (
 	"encoding/json"
 	"os"
 	"strconv"
 	"testing"
 )
 func TestExtractDistanceTokens_Normalisation(t *testing.T) {
 	toks := extractDistanceTokens([]string{
 		"Abstand >= 25 mm und max. 250 mm/s",
 		"Hand-Speed 1.600 mm/s", // German thousands → 1600
 		"Querschnitt 2,5 mm",    // German decimal → 2.5
 		"850mm ohne Leerzeichen",
 	})
 	got := map[string]bool{}
 	for _, tk := range toks {
 		got[tk.Unit+":"+strconv.FormatFloat(tk.Value, 'f', 1, 64)] = true
 	}
 	for _, want := range []string{"mm:25.0", "mm/s:250.0", "mm/s:1600.0", "mm:2.5", "mm:850.0"} {
 		if !got[want] {
 			t.Errorf("expected token %s, got %+v", want, toks)
 		}
 	}
 }
 func TestCompareDistances_MatchesAndGaps(t *testing.T) {
 	gt := []string{"Abstand >= 25 mm", "max. 250 mm/s", "min. 850 mm", "<= 150 mm/s"}
 	eng := []string{"Spalt 25 mm Fingerschutz", "Teach 250 mm/s", "850 mm Tunnel"}
 	cmp := CompareDistances(gt, eng)
 	if cmp.GTCount != 4 || cmp.MatchedCount != 3 {
 		t.Fatalf("expected 3/4 matched, got %d/%d", cmp.MatchedCount, cmp.GTCount)
 	}
 	if len(cmp.GTOnly) != 1 || cmp.GTOnly[0].Value != 150 {
 		t.Errorf("expected 150 mm/s as the gap, got %+v", cmp.GTOnly)
 	}
 }
 // Real GT sessions: the engine library must cover the professional's headline
 // dimensions (the engine measures were authored from these sessions).
 func TestCompareSessionDistances_RealGT(t *testing.T) {
 	var engTexts []string
 	for _, m := range GetProtectiveMeasureLibrary() {
 		engTexts = append(engTexts, m.Name, m.Description)
 	}
 	cases := []struct {
 		file string
 		must []DistanceToken
 	}{
 		{"testdata/ground_truth_bremse.json", []DistanceToken{
 			{Value: 250, Unit: "mm/s"}, {Value: 250, Unit: "mm"}, {Value: 850, Unit: "mm"},
 		}},
 		{"testdata/ground_truth_kistenhub.json", []DistanceToken{
 			{Value: 25, Unit: "mm"}, {Value: 120, Unit: "mm"},
 			{Value: 150, Unit: "mm/s"}, {Value: 75, Unit: "mm/s"}, // filled by M603/M605
 		}},
 	}
 	for _, tc := range cases {
 		raw, err := os.ReadFile(tc.file)
 		if err != nil {
 			t.Fatalf("read %s: %v", tc.file, err)
 		}
 		var gt GroundTruth
 		if err := json.Unmarshal(raw, &gt); err != nil {
 			t.Fatalf("parse %s: %v", tc.file, err)
 		}
 		var gtTexts []string
 		for _, e := range gt.Entries {
 			gtTexts = append(gtTexts, e.Measures...)
 		}
 		cmp := CompareDistances(gtTexts, engTexts)
 		for _, want := range tc.must {
 			matched := false
 			for _, m := range cmp.Matched {
 				if m.Unit == want.Unit && m.Value == want.Value {
 					matched = true
 					break
 				}
 			}
 			if !matched {
 				t.Errorf("%s: engine should cover %.0f %s but it is a gap", tc.file, want.Value, want.Unit)
 			}
 		}
 	}
 }