003eafa75d
scenarioSimilarity now uses synonym-set cross-matching: if GT says "durchschlaegt" and Engine says "schleuder", the synonym set recognizes them as related. Added significantWordOverlap fallback when no action words found. Extended action terms: schlauch/druck/kuehlschmierstoff, pumpe/bettspuel, potential/bezugspotential, stoerung/emv. Moved extractActionWords to benchmark_synonyms.go (458+119 lines). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
459 lines
12 KiB
Go
459 lines
12 KiB
Go
package iace
|
||
|
||
import (
|
||
"sort"
|
||
"strings"
|
||
)
|
||
|
||
// ============================================================================
|
||
// Fuzzy matching: Ground Truth entries ↔ Engine hazards
|
||
// ============================================================================
|
||
|
||
const matchThreshold = 0.20
|
||
|
||
// categoryMap, synonymSets, wrongMachineTerms → benchmark_synonyms.go
|
||
|
||
// CompareBenchmark runs the full comparison between Ground Truth and engine output.
|
||
func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigation) *BenchmarkResult {
|
||
if gt == nil || len(gt.Entries) == 0 {
|
||
return &BenchmarkResult{}
|
||
}
|
||
|
||
// Build mitigation names per hazard
|
||
mitNamesByHazard := make(map[string][]string)
|
||
for _, m := range mitigations {
|
||
mitNamesByHazard[m.HazardID.String()] = append(mitNamesByHazard[m.HazardID.String()], m.Name)
|
||
}
|
||
|
||
engineSummaries := make([]HazardSummary, len(hazards))
|
||
for i, h := range hazards {
|
||
engineSummaries[i] = HazardSummary{
|
||
ID: h.ID.String(),
|
||
Name: h.Name,
|
||
Category: h.Category,
|
||
Zone: h.HazardousZone,
|
||
Description: h.Description,
|
||
Scenario: h.Scenario,
|
||
PossibleHarm: h.PossibleHarm,
|
||
TriggerEvent: h.TriggerEvent,
|
||
AffectedPerson: h.AffectedPerson,
|
||
LifecyclePhase: h.LifecyclePhase,
|
||
Mitigations: mitNamesByHazard[h.ID.String()],
|
||
}
|
||
}
|
||
|
||
// Build score matrix: gt[i] × engine[j]
|
||
type scoredPair struct {
|
||
gtIdx, engIdx int
|
||
score float64
|
||
reason string
|
||
}
|
||
var pairs []scoredPair
|
||
for i := range gt.Entries {
|
||
for j := range hazards {
|
||
score, reason := fuzzyMatchScore(>.Entries[i], &hazards[j])
|
||
if score >= matchThreshold {
|
||
pairs = append(pairs, scoredPair{i, j, score, reason})
|
||
}
|
||
}
|
||
}
|
||
|
||
// Greedy assignment: sort by score, but prioritize high-specificity matches
|
||
// (matches where both category AND zone overlap) over generic ones
|
||
sort.Slice(pairs, func(a, b int) bool {
|
||
// First: prioritize matches with zone overlap (more specific)
|
||
aHasZone := pairs[a].reason != "" && (strings.Contains(pairs[a].reason, "Zone") || strings.Contains(pairs[a].reason, "Keywords+Zone"))
|
||
bHasZone := pairs[b].reason != "" && (strings.Contains(pairs[b].reason, "Zone") || strings.Contains(pairs[b].reason, "Keywords+Zone"))
|
||
if aHasZone != bHasZone {
|
||
return aHasZone
|
||
}
|
||
return pairs[a].score > pairs[b].score
|
||
})
|
||
usedGT := make(map[int]bool)
|
||
usedEng := make(map[int]bool)
|
||
var matched []HazardMatchPair
|
||
|
||
for _, p := range pairs {
|
||
if usedGT[p.gtIdx] || usedEng[p.engIdx] {
|
||
continue
|
||
}
|
||
usedGT[p.gtIdx] = true
|
||
usedEng[p.engIdx] = true
|
||
matched = append(matched, HazardMatchPair{
|
||
GTEntry: gt.Entries[p.gtIdx],
|
||
EngineHazard: engineSummaries[p.engIdx],
|
||
MatchScore: p.score,
|
||
MatchReason: p.reason,
|
||
})
|
||
}
|
||
|
||
// Collect unmatched
|
||
var missing []GroundTruthEntry
|
||
for i, e := range gt.Entries {
|
||
if !usedGT[i] {
|
||
missing = append(missing, e)
|
||
}
|
||
}
|
||
var extra []HazardSummary
|
||
for i, s := range engineSummaries {
|
||
if !usedEng[i] {
|
||
extra = append(extra, s)
|
||
}
|
||
}
|
||
|
||
// Category breakdown
|
||
catGT := map[string]int{}
|
||
catMatch := map[string]int{}
|
||
for _, e := range gt.Entries {
|
||
cat := normalizeCategoryDE(e.HazardGroup)
|
||
catGT[cat]++
|
||
}
|
||
for _, m := range matched {
|
||
cat := normalizeCategoryDE(m.GTEntry.HazardGroup)
|
||
catMatch[cat]++
|
||
}
|
||
var breakdown []CategoryScore
|
||
for cat, total := range catGT {
|
||
cov := 0.0
|
||
if total > 0 {
|
||
cov = float64(catMatch[cat]) / float64(total)
|
||
}
|
||
breakdown = append(breakdown, CategoryScore{
|
||
Category: cat, GTCount: total, MatchCount: catMatch[cat], Coverage: cov,
|
||
})
|
||
}
|
||
sort.Slice(breakdown, func(i, j int) bool { return breakdown[i].GTCount > breakdown[j].GTCount })
|
||
|
||
// Measure coverage (simplified: count GT entries where at least 1 measure keyword matches)
|
||
measMatched := 0
|
||
for _, m := range matched {
|
||
if measureOverlap(m.GTEntry.Measures, mitigations) {
|
||
measMatched++
|
||
}
|
||
}
|
||
measCov := 0.0
|
||
if len(matched) > 0 {
|
||
measCov = float64(measMatched) / float64(len(matched))
|
||
}
|
||
|
||
// Risk rank comparison
|
||
rankPairs := buildRiskRankPairs(matched)
|
||
|
||
coverage := 0.0
|
||
if len(gt.Entries) > 0 {
|
||
coverage = float64(len(matched)) / float64(len(gt.Entries))
|
||
}
|
||
|
||
return &BenchmarkResult{
|
||
CoverageScore: coverage,
|
||
MeasureCoverage: measCov,
|
||
TotalGT: len(gt.Entries),
|
||
TotalEngine: len(hazards),
|
||
MatchedPairs: matched,
|
||
MissingFromEngine: missing,
|
||
ExtraInEngine: extra,
|
||
CategoryBreakdown: breakdown,
|
||
RiskRankPairs: rankPairs,
|
||
}
|
||
}
|
||
|
||
// fuzzyMatchScore computes a 0-1 similarity between a GT entry and an engine hazard.
|
||
// 4 signals: category (0.2), keywords (0.2), zone (0.3), scenario similarity (0.3).
|
||
func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
||
var score float64
|
||
var reasons []string
|
||
|
||
// 1. Category match (weight 0.2)
|
||
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
|
||
score += 0.2 * catScore
|
||
if catScore > 0 {
|
||
reasons = append(reasons, "Kategorie")
|
||
}
|
||
|
||
// 2. Keyword/synonym match on hazard TYPE (weight 0.2)
|
||
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
|
||
score += 0.2 * kwScore
|
||
if kwScore > 0 {
|
||
reasons = append(reasons, "Keywords")
|
||
}
|
||
|
||
// 3. Component/zone match (weight 0.3)
|
||
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
|
||
score += 0.3 * zoneScore
|
||
if zoneScore > 0 {
|
||
reasons = append(reasons, "Zone")
|
||
}
|
||
|
||
// 4. Scenario similarity (weight 0.3) — compares the actual event description
|
||
scenScore := scenarioSimilarity(gt.HazardCause, h.Scenario, h.Name)
|
||
score += 0.3 * scenScore
|
||
if scenScore > 0 {
|
||
reasons = append(reasons, "Szenario")
|
||
}
|
||
|
||
// Penalty: wrong machine term
|
||
if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
|
||
score *= 0.3
|
||
reasons = append(reasons, "Strafabzug:FremdMaschine")
|
||
}
|
||
|
||
// Penalty: no keyword AND no scenario overlap → unreliable
|
||
if kwScore == 0 && scenScore == 0 && zoneScore < 0.5 {
|
||
score *= 0.4
|
||
reasons = append(reasons, "Strafabzug:KeinInhalt")
|
||
}
|
||
|
||
return score, strings.Join(reasons, "+")
|
||
}
|
||
|
||
// scenarioSimilarity compares the GT cause description with the engine scenario.
|
||
// Uses action words + synonym-set cross-matching for robust comparison.
|
||
func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
|
||
gtText := normalizeDE(gtCause)
|
||
engText := normalizeDE(engScenario + " " + engName)
|
||
|
||
gtActions := extractActionWords(gtText)
|
||
engActions := extractActionWords(engText)
|
||
|
||
if len(gtActions) == 0 {
|
||
// Fallback: use significant word overlap
|
||
return significantWordOverlap(gtText, engText)
|
||
}
|
||
|
||
matched := 0
|
||
for _, ga := range gtActions {
|
||
// Direct match
|
||
directFound := false
|
||
for _, ea := range engActions {
|
||
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
|
||
directFound = true
|
||
break
|
||
}
|
||
}
|
||
if directFound {
|
||
matched++
|
||
continue
|
||
}
|
||
// Synonym-set match: if GT action and any engine action are in the same synonym set
|
||
for _, synSet := range synonymSets {
|
||
gaInSet := false
|
||
for _, syn := range synSet {
|
||
if strings.Contains(ga, syn) || strings.Contains(syn, ga) {
|
||
gaInSet = true
|
||
break
|
||
}
|
||
}
|
||
if !gaInSet {
|
||
continue
|
||
}
|
||
// Check if any engine action is in this same set
|
||
for _, ea := range engActions {
|
||
for _, syn := range synSet {
|
||
if strings.Contains(ea, syn) || strings.Contains(syn, ea) {
|
||
matched++
|
||
goto nextAction
|
||
}
|
||
}
|
||
}
|
||
// Also check full engine text for synonym hit
|
||
for _, syn := range synSet {
|
||
if strings.Contains(engText, syn) {
|
||
matched++
|
||
goto nextAction
|
||
}
|
||
}
|
||
}
|
||
nextAction:
|
||
}
|
||
return float64(matched) / float64(len(gtActions))
|
||
}
|
||
|
||
// significantWordOverlap is a fallback when no action words are found.
|
||
func significantWordOverlap(gtText, engText string) float64 {
|
||
gtWords := extractSignificantWords(gtText)
|
||
if len(gtWords) == 0 {
|
||
return 0
|
||
}
|
||
matched := 0
|
||
for _, w := range gtWords {
|
||
if strings.Contains(engText, w) {
|
||
matched++
|
||
}
|
||
}
|
||
return float64(matched) / float64(len(gtWords))
|
||
}
|
||
|
||
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
||
engText := normalizeDE(engName + " " + engScenario)
|
||
gtText := normalizeDE(gtCause + " " + gtZone)
|
||
for _, term := range wrongMachineTerms {
|
||
if strings.Contains(engText, term) && !strings.Contains(gtText, term) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func categoryMatchScore(gtGroup, engCategory string) float64 {
|
||
normalized := normalizeDE(gtGroup)
|
||
prefixes, ok := categoryMap[normalized]
|
||
if !ok {
|
||
return 0
|
||
}
|
||
engLower := strings.ToLower(engCategory)
|
||
for _, p := range prefixes {
|
||
if strings.Contains(engLower, p) {
|
||
return 1.0
|
||
}
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func keywordMatchScore(gtType, gtCause, engName, engDesc, engScenario string) float64 {
|
||
gtText := normalizeDE(gtType + " " + gtCause)
|
||
engText := normalizeDE(engName + " " + engDesc + " " + engScenario)
|
||
|
||
matchedSets := 0
|
||
totalRelevant := 0
|
||
|
||
for _, synSet := range synonymSets {
|
||
gtHas := false
|
||
engHas := false
|
||
for _, syn := range synSet {
|
||
if strings.Contains(gtText, syn) {
|
||
gtHas = true
|
||
}
|
||
if strings.Contains(engText, syn) {
|
||
engHas = true
|
||
}
|
||
}
|
||
if gtHas {
|
||
totalRelevant++
|
||
if engHas {
|
||
matchedSets++
|
||
}
|
||
}
|
||
}
|
||
|
||
if totalRelevant == 0 {
|
||
return 0
|
||
}
|
||
return float64(matchedSets) / float64(totalRelevant)
|
||
}
|
||
|
||
func zoneMatchScore(gtZone, gtSubgroup, engZone, engModule string) float64 {
|
||
gtText := normalizeDE(gtZone + " " + gtSubgroup)
|
||
engText := normalizeDE(engZone + " " + engModule)
|
||
|
||
if gtText == "" || engText == "" {
|
||
return 0
|
||
}
|
||
|
||
// Check for significant word overlap
|
||
gtWords := extractSignificantWords(gtText)
|
||
engWords := extractSignificantWords(engText)
|
||
|
||
if len(gtWords) == 0 {
|
||
return 0
|
||
}
|
||
|
||
matched := 0
|
||
for _, gw := range gtWords {
|
||
for _, ew := range engWords {
|
||
if strings.Contains(ew, gw) || strings.Contains(gw, ew) {
|
||
matched++
|
||
break
|
||
}
|
||
}
|
||
}
|
||
return float64(matched) / float64(len(gtWords))
|
||
}
|
||
|
||
func extractSignificantWords(text string) []string {
|
||
stopWords := map[string]bool{
|
||
"der": true, "die": true, "das": true, "und": true, "oder": true,
|
||
"von": true, "in": true, "an": true, "am": true, "im": true,
|
||
"zu": true, "bei": true, "mit": true, "des": true, "den": true,
|
||
"dem": true, "ein": true, "eine": true, "einer": true, "einem": true,
|
||
"fuer": true, "auf": true, "aus": true, "um": true, "nach": true,
|
||
"ueber": true, "unter": true, "vor": true, "durch": true,
|
||
}
|
||
words := strings.Fields(text)
|
||
var sig []string
|
||
for _, w := range words {
|
||
if len(w) < 3 || stopWords[w] {
|
||
continue
|
||
}
|
||
sig = append(sig, w)
|
||
}
|
||
return sig
|
||
}
|
||
|
||
// NormalizeDEPublic is the exported version of normalizeDE for use outside this package.
|
||
func NormalizeDEPublic(s string) string { return normalizeDE(s) }
|
||
|
||
// normalizeDE lowercases and replaces umlauts (same as narrative_parser).
|
||
func normalizeDE(s string) string {
|
||
s = strings.ToLower(strings.TrimSpace(s))
|
||
s = strings.ReplaceAll(s, "ä", "ae")
|
||
s = strings.ReplaceAll(s, "ö", "oe")
|
||
s = strings.ReplaceAll(s, "ü", "ue")
|
||
s = strings.ReplaceAll(s, "ß", "ss")
|
||
return s
|
||
}
|
||
|
||
func normalizeCategoryDE(group string) string {
|
||
n := normalizeDE(group)
|
||
// Shorten for display
|
||
n = strings.TrimPrefix(n, "gefaehrdungen durch ")
|
||
n = strings.TrimPrefix(n, "gefaehrdungen im zusammenhang mit ")
|
||
return n
|
||
}
|
||
|
||
func measureOverlap(gtMeasures []string, mitigations []Mitigation) bool {
|
||
for _, gm := range gtMeasures {
|
||
gmNorm := normalizeDE(gm)
|
||
for _, m := range mitigations {
|
||
mNorm := normalizeDE(m.Name + " " + m.Description)
|
||
// Check if any significant word from GT measure appears in engine mitigation
|
||
words := extractSignificantWords(gmNorm)
|
||
for _, w := range words {
|
||
if strings.Contains(mNorm, w) {
|
||
return true
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func buildRiskRankPairs(matched []HazardMatchPair) []RiskRankPair {
|
||
if len(matched) == 0 {
|
||
return nil
|
||
}
|
||
|
||
// Sort by GT risk descending to get GT rank
|
||
type ranked struct {
|
||
idx int
|
||
gtRisk int
|
||
name string
|
||
}
|
||
items := make([]ranked, len(matched))
|
||
for i, m := range matched {
|
||
items[i] = ranked{i, m.GTEntry.RiskIn.R, m.GTEntry.HazardType}
|
||
}
|
||
sort.Slice(items, func(a, b int) bool { return items[a].gtRisk > items[b].gtRisk })
|
||
|
||
pairs := make([]RiskRankPair, len(items))
|
||
for rank, item := range items {
|
||
pairs[rank] = RiskRankPair{
|
||
GTRank: rank + 1,
|
||
EngineRank: 0, // Engine has no assessment yet for auto-generated hazards
|
||
HazardName: item.name,
|
||
GTRiskScore: item.gtRisk,
|
||
EngineRisk: 0,
|
||
}
|
||
}
|
||
return pairs
|
||
}
|