breakpilot-compliance/ai-compliance-sdk/internal/iace/gt_kistenhub_test.go

package iace

import (
	"encoding/json"
	"fmt"
	"os"
	"path/filepath"
	"sort"
	"testing"

	"github.com/google/uuid"
)

// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
// against the current pattern engine + measure library and reports the
// recall/precision split. Pure in-memory — no DB required.
//
// Composition:
//   - C014 Hubwerk supplies the lift-relevant tags (crush_point,
//     gravity_risk, person_under_load).
//   - EN01 electric + EN03 potential/gravity match HP2100-2102's
//     RequiredEnergyTags ("gravitational").
//   - MachineTypes {lift, hoist, scissor_lift, elevator} gates the new
//     lift-bridge patterns.
//
// The test does not assert hard coverage thresholds — it logs the
// metrics so the user can read them via `go test -v`. Use it as a
// reproducible benchmark when changing the lift-bridge library.
func TestKistenhub_GTCoverage(t *testing.T) {
	gtPath := filepath.Join("testdata", "ground_truth_kistenhub.json")
	raw, err := os.ReadFile(gtPath)
	if err != nil {
		t.Fatalf("read GT: %v", err)
	}
	var gt GroundTruth
	if err := json.Unmarshal(raw, &gt); err != nil {
		t.Fatalf("parse GT: %v", err)
	}
	t.Logf("Loaded %d GT entries from %s", len(gt.Entries), gtPath)

	input := MatchInput{
		ComponentLibraryIDs: []string{"C014"},
		EnergySourceIDs:     []string{"EN01", "EN03"},
		LifecyclePhases: []string{
			"normal_operation", "maintenance", "cleaning",
			"setup", "transport", "manual_operation",
		},
		CustomTags: []string{
			"lift", "hoist", "scissor_lift", "manual_lift",
			"mobile_machine", "hand_operated",
		},
		OperationalStates: []string{"normal_operation", "maintenance", "manual_operation"},
		HumanRoles:        []string{"operator", "maintenance_tech"},
		MachineTypes:      []string{"lift", "hoist", "scissor_lift", "elevator"},
	}

	engine := NewPatternEngine()
	out := engine.Match(input)
	t.Logf("Pattern engine matched %d patterns", len(out.MatchedPatterns))

	hazards, mitigations := patternsToHazardsAndMitigations(out)

	result := CompareBenchmark(&gt, hazards, mitigations)

	precision := 0.0
	if result.TotalEngine > 0 {
		precision = float64(len(result.MatchedPairs)) / float64(result.TotalEngine)
	}
	t.Logf("=== Kistenhub-GT Benchmark Result ===")
	t.Logf("Hazard Coverage:    %.1f%% (%d/%d, %d missing)",
		result.CoverageScore*100, len(result.MatchedPairs), result.TotalGT, len(result.MissingFromEngine))
	t.Logf("Measure Coverage:   %.1f%%", result.MeasureCoverage*100)
	t.Logf("Engine Hazards:     %d (%d extra)", result.TotalEngine, len(result.ExtraInEngine))
	t.Logf("Precision:          %.1f%%", precision*100)

	t.Logf("\n--- Category breakdown ---")
	for _, cb := range result.CategoryBreakdown {
		t.Logf("  %-50s %d/%d (%.0f%%)", cb.Category, cb.MatchCount, cb.GTCount, cb.Coverage*100)
	}

	if len(result.MissingFromEngine) > 0 {
		t.Logf("\n--- Missing from engine (%d) ---", len(result.MissingFromEngine))
		for _, m := range result.MissingFromEngine {
			t.Logf("  GT %s [%s]: %q — %q",
				m.Nr, abbrev(m.HazardGroup, 25), abbrev(m.HazardType, 30), abbrev(m.HazardCause, 60))
		}
	}

	liftPatterns := map[string]bool{"HP2100": false, "HP2101": false, "HP2102": false}
	liftMeasures := map[string]bool{"M600": false, "M601": false, "M602": false, "M603": false, "M604": false}
	for _, pm := range out.MatchedPatterns {
		if _, ok := liftPatterns[pm.PatternID]; ok {
			liftPatterns[pm.PatternID] = true
		}
	}
	for _, sm := range out.SuggestedMeasures {
		if _, ok := liftMeasures[sm.MeasureID]; ok {
			liftMeasures[sm.MeasureID] = true
		}
	}
	t.Logf("\n--- Lift-Bridge verification (SHA c771d8e from 2026-05-22) ---")
	t.Logf("HP2100-2102 fired:  %s", formatPresence(liftPatterns))
	t.Logf("M600-M604 fired:    %s", formatPresence(liftMeasures))

	if firedPatterns := countTrue(liftPatterns); firedPatterns == 0 {
		t.Log("WARNING: none of the lift-bridge patterns fired — check tag composition")
	}
}

// patternsToHazardsAndMitigations converts a pattern match output into the
// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
// iace_handler_init.go does in production but without DB writes.
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))

	for _, pm := range out.MatchedPatterns {
		cat := ""
		if len(pm.HazardCats) > 0 {
			cat = pm.HazardCats[0]
		}
		zone := pm.ZoneDE
		lifecycle := ""
		if len(pm.ApplicableLifecycles) > 0 {
			lifecycle = pm.ApplicableLifecycles[0]
		}
		h := Hazard{
			ID:             uuid.New(),
			Name:           pm.ScenarioDE,
			Category:       cat,
			Description:    pm.ScenarioDE,
			Scenario:       pm.ScenarioDE,
			TriggerEvent:   pm.TriggerDE,
			PossibleHarm:   pm.HarmDE,
			AffectedPerson: pm.AffectedDE,
			HazardousZone:  zone,
			LifecyclePhase: lifecycle,
		}
		if h.Name == "" {
			h.Name = pm.PatternName
		}
		hazards = append(hazards, h)
		patternToHazard[pm.PatternID] = h.ID
	}

	measureNames := make(map[string]string)
	for _, m := range GetProtectiveMeasureLibrary() {
		measureNames[m.ID] = m.Name
	}

	var mitigations []Mitigation
	for _, sm := range out.SuggestedMeasures {
		name := measureNames[sm.MeasureID]
		if name == "" {
			name = sm.MeasureID
		}
		for _, srcPattern := range sm.SourcePatterns {
			hid, ok := patternToHazard[srcPattern]
			if !ok {
				continue
			}
			mitigations = append(mitigations, Mitigation{
				ID:       uuid.New(),
				HazardID: hid,
				Name:     name,
			})
		}
	}
	return hazards, mitigations
}

func abbrev(s string, max int) string {
	if len(s) <= max {
		return s
	}
	return s[:max-1] + "…"
}

func formatPresence(m map[string]bool) string {
	keys := make([]string, 0, len(m))
	for k := range m {
		keys = append(keys, k)
	}
	sort.Strings(keys)
	out := ""
	for _, k := range keys {
		mark := "✗"
		if m[k] {
			mark = "✓"
		}
		out += fmt.Sprintf("%s%s ", mark, k)
	}
	return out
}

func countTrue(m map[string]bool) int {
	n := 0
	for _, v := range m {
		if v {
			n++
		}
	}
	return n
}