Files
breakpilot-compliance/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go
T
Benjamin Admin 9797234ff6 fix(iace): add abbreviations + action words to genericSafetyTerms
KSS, EMV, ESD, DCS, PLR, SIL, HMI, SPS, RCD, LOTO, PSA are
abbreviations that should NOT trigger the relevance filter.
bersten, platzen, abspringen, spritzen, einatmen, ausrutschen,
herabfallen, durchschlaegen, wegschleudern are action words that
appear in many patterns and don't indicate a specific machine.

Fixes: HP1633-HP1675 (KSS patterns) were filtered out because
"kss" was not in the narrative but also not in genericSafetyTerms.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-15 16:05:20 +02:00

391 lines
13 KiB
Go

package handlers
import (
"encoding/json"
"strings"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
"github.com/google/uuid"
)
// extractNarrativeFromMetadata builds a combined text from the limits_form.
func extractNarrativeFromMetadata(metadata json.RawMessage) string {
if metadata == nil {
return ""
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return ""
}
limitsRaw, ok := meta["limits_form"]
if !ok {
return ""
}
var limits map[string]interface{}
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
return ""
}
textFields := []string{
"general_description", "intended_purpose", "foreseeable_misuse",
"space_limits", "time_limits", "environmental_conditions",
"energy_sources", "materials_processed", "operating_modes",
"maintenance_requirements", "personnel_requirements",
"interfaces_description", "control_system_description",
"safety_functions_description",
}
var result string
for _, field := range textFields {
if v, ok := limits[field]; ok {
if s, ok := v.(string); ok && s != "" {
result += s + "\n\n"
}
}
}
return result
}
// patternCatToMeasureCat maps pattern hazard categories to measure categories.
func patternCatToMeasureCat(patternCat string) string {
m := map[string]string{
"mechanical_hazard": "mechanical", "electrical_hazard": "electrical",
"thermal_hazard": "thermal", "noise_vibration": "noise_vibration",
"pneumatic_hydraulic": "pneumatic_hydraulic", "material_environmental": "material_environmental",
"ergonomic": "ergonomic", "ergonomic_hazard": "ergonomic",
"software_fault": "software_control", "safety_function_failure": "safety_function",
"fire_explosion": "thermal", "radiation_hazard": "material_environmental",
"unauthorized_access": "cyber_network", "communication_failure": "cyber_network",
"firmware_corruption": "cyber_network", "logging_audit_failure": "cyber_network",
"ai_misclassification": "ai_specific", "false_classification": "ai_specific",
"model_drift": "ai_specific", "data_poisoning": "ai_specific",
"sensor_spoofing": "ai_specific", "unintended_bias": "ai_specific",
"sensor_fault": "software_control", "configuration_error": "software_control",
"update_failure": "software_control", "hmi_error": "software_control",
"emc_hazard": "electrical", "maintenance_hazard": "mechanical",
"mode_confusion": "software_control", "chemical_risk": "material_environmental",
}
if cat, ok := m[patternCat]; ok {
return cat
}
return "general"
}
// deriveComponentType guesses the component type from its tags.
func deriveComponentType(tags []string) iace.ComponentType {
for _, t := range tags {
switch {
case t == "software" || t == "has_software":
return iace.ComponentTypeSoftware
case t == "firmware" || t == "has_firmware":
return iace.ComponentTypeFirmware
case t == "has_ai" || t == "ai_model":
return iace.ComponentTypeAIModel
case t == "hmi" || t == "display" || t == "touchscreen":
return iace.ComponentTypeHMI
case t == "sensor" || t == "camera":
return iace.ComponentTypeSensor
case t == "electric_motor" || t == "electric_drive":
return iace.ComponentTypeElectrical
case t == "networked" || t == "ethernet" || t == "wifi":
return iace.ComponentTypeNetwork
case t == "hydraulic" || t == "pneumatic":
return iace.ComponentTypeActuator
}
}
return iace.ComponentTypeMechanical
}
// extractOperationalStatesFromMetadata reads the explicit operational_states
// selection that the user set via the Betriebszustand-UI.
func extractOperationalStatesFromMetadata(metadata json.RawMessage) []string {
if metadata == nil {
return nil
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return nil
}
raw, ok := meta["operational_states"]
if !ok {
return nil
}
var states []string
if err := json.Unmarshal(raw, &states); err != nil {
return nil
}
return states
}
// mergeStringSlices merges two string slices, deduplicating entries.
func mergeStringSlices(a, b []string) []string {
seen := make(map[string]bool, len(a)+len(b))
var result []string
for _, s := range a {
if !seen[s] {
seen[s] = true
result = append(result, s)
}
}
for _, s := range b {
if !seen[s] {
seen[s] = true
result = append(result, s)
}
}
return result
}
// extractIndustrySectorsFromMetadata reads the industry_sectors selection
// from project metadata and maps them to MachineTypes for pattern filtering.
func extractIndustrySectorsFromMetadata(metadata json.RawMessage) []string {
if metadata == nil {
return nil
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return nil
}
limitsRaw, ok := meta["limits_form"]
if !ok {
return nil
}
var limits map[string]json.RawMessage
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
return nil
}
sectorsRaw, ok := limits["industry_sectors"]
if !ok {
return nil
}
var sectors []string
if err := json.Unmarshal(sectorsRaw, &sectors); err != nil {
return nil
}
labelMap := map[string][]string{
"Allgemeiner Maschinenbau": {"general_industry"},
"Automobil / Zulieferer": {"automotive"},
"Robotik / Cobot": {"robotics_cobot", "cobot"},
"Medizintechnik": {"medical_device", "infusion_pump", "ventilator", "patient_monitor"},
"Lebensmittel / Getraenke": {"food_processing"},
"Verpackung": {"packaging"},
"Pharma / Chemie": {"chemical", "pharmaceutical"},
"Bau / Baumaschinen": {"construction", "crane", "excavator"},
"Forst / Holzbearbeitung": {"forestry", "woodworking", "circular_saw"},
"Aufzuege / Foerdertechnik": {"elevator", "lift", "escalator", "conveyor"},
"Textil": {"textile", "spinning", "weaving", "finishing"},
"Landmaschinen": {"agricultural", "tractor", "harvester"},
"Druck / Papier": {"printing"},
"Metall / CNC": {"cnc", "metalworking", "lathe", "milling"},
"Schweissen / Oberflaechentechnik": {"welding", "surface_treatment"},
}
var result []string
seen := make(map[string]bool)
for _, sector := range sectors {
for _, mt := range labelMap[sector] {
if !seen[mt] {
seen[mt] = true
result = append(result, mt)
}
}
}
return result
}
// containsSubstring checks if haystack contains needle (case-insensitive, normalized).
func containsSubstring(haystack, needle string) bool {
return strings.Contains(
strings.ToLower(haystack),
strings.ToLower(needle),
)
}
// genericSafetyTerms are words that appear in almost all risk assessments
// and should NOT be used to determine machine-specificity.
var genericSafetyTerms = map[string]bool{
"maschine": true, "anlage": true, "bereich": true, "gesamte": true,
"arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true,
"gefahrenstelle": true, "person": true, "werker": true, "bediener": true,
"steuerung": true, "schutzeinrichtung": true, "sicherheit": true,
"betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true,
"bewegung": true, "beweglich": true, "feststehend": true, "teil": true,
"teile": true, "oeffnung": true, "zugang": true, "gefahr": true,
"verletzung": true, "quetsch": true, "scher": true, "schneid": true,
"stoss": true, "schlag": true, "einzug": true, "brand": true,
"motor": true, "antrieb": true, "achse": true, "achsen": true,
"kabel": true, "leitung": true, "schaltschrank": true, "spannung": true,
"schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true,
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
"bedienfeld": true, "display": true, "anzeige": true,
"energie": true, "druck": true, "temperatur": true,
// Abbreviations and synonyms that should not trigger relevance filter
"kss": true, "emv": true, "esd": true, "dcs": true, "plr": true, "sil": true,
"hmi": true, "sps": true, "rcd": true, "loto": true, "psa": true,
// Common action words
"bersten": true, "platzen": true, "abspringen": true, "spritzen": true,
"einatmen": true, "ausrutschen": true, "herabfallen": true,
"durchschlaegen": true, "wegschleudern": true,
// Common structural terms that don't indicate a specific machine
"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
"innen": true, "aussen": true, "transport": true, "seite": true,
"front": true, "rueck": true, "ober": true, "unter": true,
"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
"getriebe": true, "kette": true, "riemen": true, "feder": true,
"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
}
// isPatternRelevant checks whether a pattern match is relevant to the actual
// machine described in the narrative. Uses narrative vocabulary overlap:
// if the pattern's zone/scenario contains machine-specific words (not generic
// safety terms) and NONE of them appear in the narrative → irrelevant.
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
narrativeNorm := iace.NormalizeDEPublic(narrative)
// Extract machine-specific words from pattern (not generic safety terms)
patternWords := strings.Fields(patternText)
var specificWords []string
for _, w := range patternWords {
// Clean punctuation
w = strings.Trim(w, ".,;:!?()/-")
if len(w) < 5 || genericSafetyTerms[w] {
continue
}
specificWords = append(specificWords, w)
}
// If pattern has no specific words, it's generic → always relevant
if len(specificWords) == 0 {
return true
}
// Check if at least one specific word appears in the narrative or components
for _, sw := range specificWords {
if strings.Contains(narrativeNorm, sw) {
return true
}
for _, cn := range compNames {
if strings.Contains(cn, sw) {
return true
}
}
}
// No specific word found in narrative → pattern is for a different machine
return false
}
// categoryHazardCap returns the maximum number of hazards to generate per category.
// Caps are based on typical ISO 12100 risk assessment proportions:
// - Core physical categories (mechanical, electrical): scale with component count
// - Secondary categories (thermal, noise, material): smaller fixed caps
// - Software/IT/organizational categories: minimal (these are usually covered by
// other standards like IEC 62443, not ISO 12100 machinery risk assessment)
func categoryHazardCap(cat string, componentCount int) int {
// Core machinery hazard categories — scale with complexity
switch cat {
case "mechanical_hazard":
// Typically 1-3 hazards per component (quetschen, scheren, stoss...)
cap := componentCount * 3
if cap < 15 {
cap = 15
}
if cap > 60 {
cap = 60
}
return cap
case "electrical_hazard":
// Typically 8-15 for a standard machine
cap := componentCount
if cap < 8 {
cap = 8
}
if cap > 20 {
cap = 20
}
return cap
case "pneumatic_hydraulic":
return 8
case "thermal_hazard":
return 6
case "noise_vibration":
return 4
case "material_environmental":
return 6
case "ergonomic", "ergonomic_hazard":
return 4
case "fire_explosion":
return 4
case "radiation_hazard", "emc_hazard":
return 3
// Software/IT/organizational — minimal for machinery assessment
case "safety_function_failure":
return 5
case "software_fault":
return 3
case "configuration_error":
return 3
case "hmi_error":
return 3
case "maintenance_hazard":
return 4
case "mode_confusion":
return 2
default:
return 3
}
}
// normalizeZoneKey reduces a zone string to its core components for better dedup.
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
// should dedup to the same key.
func normalizeZoneKey(zone string) string {
if zone == "" {
return ""
}
norm := iace.NormalizeDEPublic(zone)
// Remove filler words and punctuation
for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
norm = strings.ReplaceAll(norm, r, " ")
}
// Extract significant words (>3 chars), sort for stable key
words := strings.Fields(norm)
var sig []string
seen := make(map[string]bool)
stopWords := map[string]bool{
"der": true, "die": true, "das": true, "und": true, "oder": true,
"von": true, "des": true, "den": true, "dem": true, "ein": true,
"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
"alle": true, "aller": true, "allem": true, "sowie": true,
"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
"innerhalb": true, "ausserhalb": true, "umgebung": true,
}
for _, w := range words {
if len(w) < 4 || stopWords[w] || seen[w] {
continue
}
seen[w] = true
sig = append(sig, w)
}
if len(sig) == 0 {
return norm
}
// Take first 3 significant words as key (enough for dedup)
if len(sig) > 3 {
sig = sig[:3]
}
return strings.Join(sig, "_")
}
// findHazardsForMeasureByCategory finds all hazards matching a measure's category.
func findHazardsForMeasureByCategory(measureCat string, hazardsByCategory map[string][]uuid.UUID) []uuid.UUID {
if ids, ok := hazardsByCategory[measureCat]; ok {
return ids
}
for cat, ids := range hazardsByCategory {
if len(measureCat) > 3 && len(cat) > 3 && cat[:4] == measureCat[:4] {
return ids
}
}
return nil
}