Compare commits

...

3 Commits

Author SHA1 Message Date
Benjamin Admin 733d2bcc7b feat(iace): per-category hazard caps for precision improvement
Build + Deploy / build-admin-compliance (push) Successful in 12s
Build + Deploy / build-backend-compliance (push) Successful in 11s
Build + Deploy / build-ai-sdk (push) Successful in 40s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-tts (push) Successful in 10s
Build + Deploy / build-document-crawler (push) Successful in 10s
Build + Deploy / build-dsms-gateway (push) Successful in 10s
Build + Deploy / build-dsms-node (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 13s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m33s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 46s
CI / test-python-backend (push) Successful in 39s
CI / test-python-document-crawler (push) Successful in 28s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m15s
Add categoryHazardCap() with ISO 12100-proportional limits:
- mechanical: 3x components (min 15, max 60)
- electrical: 1x components (min 8, max 20)
- secondary (thermal, noise, material): 4-8
- software/IT/organizational: 2-5 (minimal for machinery assessment)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-13 10:00:45 +02:00
Benjamin Admin 977e63f372 fix(iace): extend fuzzy matcher synonyms for electrical/EMV coverage
Add synonym sets for isolation/grounding, creepage/surface, EMV/radiation
to improve matching of GT entries 2.5, 2.6, and 6.1.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-13 09:59:12 +02:00
Benjamin Admin be2ac762bd feat(iace): narrative vocabulary overlap filter replaces blacklist
Replace machine-specific term blacklist with generic vocabulary overlap:
- Extract significant words (>=5 chars, not generic safety terms) from
  pattern zone/scenario
- If pattern has specific words but NONE appear in narrative → filter
- genericSafetyTerms whitelist with ~50 terms that appear in all assessments
- Truly generic approach: works for any machine type without maintenance

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-13 09:55:25 +02:00
3 changed files with 123 additions and 39 deletions
@@ -159,6 +159,7 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
created := 0
seenCatZone := make(map[string]bool)
catCount := make(map[string]int)
for _, mp := range matchOutput.MatchedPatterns {
// Narrative relevance filter: skip patterns whose zone/scenario
// mentions machine-specific terms that don't appear in our components
@@ -167,6 +168,12 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
}
for _, cat := range mp.HazardCats {
// Per-category cap: limit hazards per category based on relevance
maxForCat := categoryHazardCap(cat, len(comps))
if catCount[cat] >= maxForCat {
continue
}
// Dedup by category + normalized zone
zoneKey := normalizeZoneKey(mp.ZoneDE)
if zoneKey == "" {
@@ -212,6 +219,7 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
})
if cerr == nil {
created++
catCount[cat]++
hazardIDsByCategory[cat] = hz.ID
}
}
@@ -199,53 +199,126 @@ func containsSubstring(haystack, needle string) bool {
)
}
// machineSpecificTerms are words in a pattern's zone/scenario that indicate
// the pattern is specific to a particular machine type. If a pattern contains
// such a term but the machine narrative does NOT, the pattern is irrelevant.
var machineSpecificTerms = []string{
"extruder", "spinnmaschine", "spielplatz", "aufzug", "elevator",
"kran", "crane", "bagger", "excavator", "traktor", "tractor",
"harvester", "druckmaschine", "printing", "webstuhl", "weaving",
"ofen", "furnace", "kessel", "boiler", "walzwerk", "rolling",
"zentrifuge", "centrifuge", "autoklav", "autoclave", "saege",
"kreissaege", "circular_saw", "hobel", "fraese", "drehmaschine",
"lathe", "schleifmaschine", "grinder", "stanze", "stanzpresse",
"infusion", "beatmung", "ventilator", "patient",
"lebensmittel", "food", "pharma", "verpackung", "packaging",
"seilnetz", "kletterseil", "schaukel", "rutsche",
"gabelstapler", "forklift", "flurfoerder",
// genericSafetyTerms are words that appear in almost all risk assessments
// and should NOT be used to determine machine-specificity.
var genericSafetyTerms = map[string]bool{
"maschine": true, "anlage": true, "bereich": true, "gesamte": true,
"arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true,
"gefahrenstelle": true, "person": true, "werker": true, "bediener": true,
"steuerung": true, "schutzeinrichtung": true, "sicherheit": true,
"betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true,
"bewegung": true, "beweglich": true, "feststehend": true, "teil": true,
"teile": true, "oeffnung": true, "zugang": true, "gefahr": true,
"verletzung": true, "quetsch": true, "scher": true, "schneid": true,
"stoss": true, "schlag": true, "einzug": true, "brand": true,
"motor": true, "antrieb": true, "achse": true, "achsen": true,
"kabel": true, "leitung": true, "schaltschrank": true, "spannung": true,
"schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true,
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
"bedienfeld": true, "display": true, "anzeige": true,
"energie": true, "druck": true, "temperatur": true,
}
// isPatternRelevant checks whether a pattern match is relevant to the actual
// machine described in the narrative. A pattern is considered irrelevant if its
// zone or scenario contains machine-specific terms that don't appear in the
// narrative or component list.
// machine described in the narrative. Uses narrative vocabulary overlap:
// if the pattern's zone/scenario contains machine-specific words (not generic
// safety terms) and NONE of them appear in the narrative → irrelevant.
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE)
narrativeNorm := iace.NormalizeDEPublic(narrative)
// Check if pattern mentions machine-specific terms absent from narrative
for _, term := range machineSpecificTerms {
if !strings.Contains(patternText, term) {
// Extract machine-specific words from pattern (not generic safety terms)
patternWords := strings.Fields(patternText)
var specificWords []string
for _, w := range patternWords {
// Clean punctuation
w = strings.Trim(w, ".,;:!?()/-")
if len(w) < 5 || genericSafetyTerms[w] {
continue
}
// Pattern mentions this machine-specific term — check if machine has it
if strings.Contains(narrativeNorm, term) {
continue // Machine has this term, pattern is relevant
specificWords = append(specificWords, w)
}
// If pattern has no specific words, it's generic → always relevant
if len(specificWords) == 0 {
return true
}
// Check if at least one specific word appears in the narrative or components
for _, sw := range specificWords {
if strings.Contains(narrativeNorm, sw) {
return true
}
// Also check component names
found := false
for _, cn := range compNames {
if strings.Contains(cn, term) {
found = true
break
if strings.Contains(cn, sw) {
return true
}
}
if !found {
return false // Pattern mentions a machine type we don't have
}
}
return true
// No specific word found in narrative → pattern is for a different machine
return false
}
// categoryHazardCap returns the maximum number of hazards to generate per category.
// Caps are based on typical ISO 12100 risk assessment proportions:
// - Core physical categories (mechanical, electrical): scale with component count
// - Secondary categories (thermal, noise, material): smaller fixed caps
// - Software/IT/organizational categories: minimal (these are usually covered by
// other standards like IEC 62443, not ISO 12100 machinery risk assessment)
func categoryHazardCap(cat string, componentCount int) int {
// Core machinery hazard categories — scale with complexity
switch cat {
case "mechanical_hazard":
// Typically 1-3 hazards per component (quetschen, scheren, stoss...)
cap := componentCount * 3
if cap < 15 {
cap = 15
}
if cap > 60 {
cap = 60
}
return cap
case "electrical_hazard":
// Typically 8-15 for a standard machine
cap := componentCount
if cap < 8 {
cap = 8
}
if cap > 20 {
cap = 20
}
return cap
case "pneumatic_hydraulic":
return 8
case "thermal_hazard":
return 6
case "noise_vibration":
return 4
case "material_environmental":
return 6
case "ergonomic", "ergonomic_hazard":
return 4
case "fire_explosion":
return 4
case "radiation_hazard", "emc_hazard":
return 3
// Software/IT/organizational — minimal for machinery assessment
case "safety_function_failure":
return 5
case "software_fault":
return 3
case "configuration_error":
return 3
case "hmi_error":
return 3
case "maintenance_hazard":
return 4
case "mode_confusion":
return 2
default:
return 3
}
}
// normalizeZoneKey reduces a zone string to its core components for better dedup.
@@ -31,12 +31,12 @@ var synonymSets = [][]string{
{"schneid", "cut", "schnitt"},
{"stoss", "schlag", "impact", "treff", "aufprall"},
{"einzug", "fang", "erfass", "entangle", "wickel"},
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr"},
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss"},
{"verbrenn", "burn", "heiss", "thermisch"},
{"laerm", "noise", "gehoer", "schall"},
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr", "koerperdurchstroemung"},
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss", "ueberlast", "ueberstrom"},
{"verbrenn", "burn", "heiss", "thermisch", "lichtbogen"},
{"laerm", "noise", "gehoer", "schall", "dezibel"},
{"vibration", "schwing"},
{"ergonom", "haltung", "handhabung", "bedien"},
{"ergonom", "haltung", "handhabung", "bedien", "bewegungsapparat"},
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
{"pneumat", "druckluft", "compressed"},
{"hydraul", "druck", "pressure"},
@@ -48,6 +48,9 @@ var synonymSets = [][]string{
{"stolper", "rutsch", "slip", "trip"},
{"leckage", "austreten", "leak"},
{"einstich", "puncture", "spritz"},
{"isolat", "kriechstrom", "schutzleiter", "erdung", "indirekt"},
{"luft", "kriechstreck", "beruehrer", "oberflaeche", "leitfaehig"},
{"emv", "strahlung", "radiation", "elektromagnet", "stoereinfluss"},
}
// CompareBenchmark runs the full comparison between Ground Truth and engine output.