package handlers import ( "encoding/json" "strings" "github.com/breakpilot/ai-compliance-sdk/internal/iace" "github.com/google/uuid" ) // extractNarrativeFromMetadata builds a combined text from the limits_form. func extractNarrativeFromMetadata(metadata json.RawMessage) string { if metadata == nil { return "" } var meta map[string]json.RawMessage if err := json.Unmarshal(metadata, &meta); err != nil { return "" } limitsRaw, ok := meta["limits_form"] if !ok { return "" } var limits map[string]interface{} if err := json.Unmarshal(limitsRaw, &limits); err != nil { return "" } textFields := []string{ "general_description", "intended_purpose", "foreseeable_misuse", "space_limits", "time_limits", "environmental_conditions", "energy_sources", "materials_processed", "operating_modes", "maintenance_requirements", "personnel_requirements", "interfaces_description", "control_system_description", "safety_functions_description", } var result string for _, field := range textFields { if v, ok := limits[field]; ok { if s, ok := v.(string); ok && s != "" { result += s + "\n\n" } } } return result } // patternCatToMeasureCat maps pattern hazard categories to measure categories. func patternCatToMeasureCat(patternCat string) string { m := map[string]string{ "mechanical_hazard": "mechanical", "electrical_hazard": "electrical", "thermal_hazard": "thermal", "noise_vibration": "noise_vibration", "pneumatic_hydraulic": "pneumatic_hydraulic", "material_environmental": "material_environmental", "ergonomic": "ergonomic", "ergonomic_hazard": "ergonomic", "software_fault": "software_control", "safety_function_failure": "safety_function", "fire_explosion": "thermal", "radiation_hazard": "material_environmental", "unauthorized_access": "cyber_network", "communication_failure": "cyber_network", "firmware_corruption": "cyber_network", "logging_audit_failure": "cyber_network", "ai_misclassification": "ai_specific", "false_classification": "ai_specific", "model_drift": "ai_specific", "data_poisoning": "ai_specific", "sensor_spoofing": "ai_specific", "unintended_bias": "ai_specific", "sensor_fault": "software_control", "configuration_error": "software_control", "update_failure": "software_control", "hmi_error": "software_control", "emc_hazard": "electrical", "maintenance_hazard": "mechanical", "mode_confusion": "software_control", "chemical_risk": "material_environmental", } if cat, ok := m[patternCat]; ok { return cat } return "general" } // deriveComponentType guesses the component type from its tags. func deriveComponentType(tags []string) iace.ComponentType { for _, t := range tags { switch { case t == "software" || t == "has_software": return iace.ComponentTypeSoftware case t == "firmware" || t == "has_firmware": return iace.ComponentTypeFirmware case t == "has_ai" || t == "ai_model": return iace.ComponentTypeAIModel case t == "hmi" || t == "display" || t == "touchscreen": return iace.ComponentTypeHMI case t == "sensor" || t == "camera": return iace.ComponentTypeSensor case t == "electric_motor" || t == "electric_drive": return iace.ComponentTypeElectrical case t == "networked" || t == "ethernet" || t == "wifi": return iace.ComponentTypeNetwork case t == "hydraulic" || t == "pneumatic": return iace.ComponentTypeActuator } } return iace.ComponentTypeMechanical } // extractOperationalStatesFromMetadata reads the explicit operational_states // selection that the user set via the Betriebszustand-UI. func extractOperationalStatesFromMetadata(metadata json.RawMessage) []string { if metadata == nil { return nil } var meta map[string]json.RawMessage if err := json.Unmarshal(metadata, &meta); err != nil { return nil } raw, ok := meta["operational_states"] if !ok { return nil } var states []string if err := json.Unmarshal(raw, &states); err != nil { return nil } return states } // mergeStringSlices merges two string slices, deduplicating entries. func mergeStringSlices(a, b []string) []string { seen := make(map[string]bool, len(a)+len(b)) var result []string for _, s := range a { if !seen[s] { seen[s] = true result = append(result, s) } } for _, s := range b { if !seen[s] { seen[s] = true result = append(result, s) } } return result } // extractIndustrySectorsFromMetadata reads the industry_sectors selection // from project metadata and maps them to MachineTypes for pattern filtering. func extractIndustrySectorsFromMetadata(metadata json.RawMessage) []string { if metadata == nil { return nil } var meta map[string]json.RawMessage if err := json.Unmarshal(metadata, &meta); err != nil { return nil } limitsRaw, ok := meta["limits_form"] if !ok { return nil } var limits map[string]json.RawMessage if err := json.Unmarshal(limitsRaw, &limits); err != nil { return nil } sectorsRaw, ok := limits["industry_sectors"] if !ok { return nil } var sectors []string if err := json.Unmarshal(sectorsRaw, §ors); err != nil { return nil } labelMap := map[string][]string{ "Allgemeiner Maschinenbau": {"general_industry"}, "Automobil / Zulieferer": {"automotive"}, "Robotik / Cobot": {"robotics_cobot", "cobot"}, "Medizintechnik": {"medical_device", "infusion_pump", "ventilator", "patient_monitor"}, "Lebensmittel / Getraenke": {"food_processing"}, "Verpackung": {"packaging"}, "Pharma / Chemie": {"chemical", "pharmaceutical"}, "Bau / Baumaschinen": {"construction", "crane", "excavator"}, "Forst / Holzbearbeitung": {"forestry", "woodworking", "circular_saw"}, "Aufzuege / Foerdertechnik": {"elevator", "lift", "escalator", "conveyor"}, "Textil": {"textile", "spinning", "weaving", "finishing"}, "Landmaschinen": {"agricultural", "tractor", "harvester"}, "Druck / Papier": {"printing"}, "Metall / CNC": {"cnc", "metalworking", "lathe", "milling"}, "Schweissen / Oberflaechentechnik": {"welding", "surface_treatment"}, } var result []string seen := make(map[string]bool) for _, sector := range sectors { for _, mt := range labelMap[sector] { if !seen[mt] { seen[mt] = true result = append(result, mt) } } } return result } // containsSubstring checks if haystack contains needle (case-insensitive, normalized). func containsSubstring(haystack, needle string) bool { return strings.Contains( strings.ToLower(haystack), strings.ToLower(needle), ) } // genericSafetyTerms are words that appear in almost all risk assessments // and should NOT be used to determine machine-specificity. var genericSafetyTerms = map[string]bool{ "maschine": true, "anlage": true, "bereich": true, "gesamte": true, "arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true, "gefahrenstelle": true, "person": true, "werker": true, "bediener": true, "steuerung": true, "schutzeinrichtung": true, "sicherheit": true, "betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true, "bewegung": true, "beweglich": true, "feststehend": true, "teil": true, "teile": true, "oeffnung": true, "zugang": true, "gefahr": true, "verletzung": true, "quetsch": true, "scher": true, "schneid": true, "stoss": true, "schlag": true, "einzug": true, "brand": true, "motor": true, "antrieb": true, "achse": true, "achsen": true, "kabel": true, "leitung": true, "schaltschrank": true, "spannung": true, "schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true, "leitfaehig": true, "elektrisch": true, "mechanisch": true, "bedienfeld": true, "display": true, "anzeige": true, "energie": true, "druck": true, "temperatur": true, // Common structural terms that don't indicate a specific machine "gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true, "innen": true, "aussen": true, "transport": true, "seite": true, "front": true, "rueck": true, "ober": true, "unter": true, "fuehrung": true, "lager": true, "verschleiss": true, "welle": true, "getriebe": true, "kette": true, "riemen": true, "feder": true, "spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true, } // isPatternRelevant checks whether a pattern match is relevant to the actual // machine described in the narrative. Uses narrative vocabulary overlap: // if the pattern's zone/scenario contains machine-specific words (not generic // safety terms) and NONE of them appear in the narrative → irrelevant. func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool { patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName) narrativeNorm := iace.NormalizeDEPublic(narrative) // Extract machine-specific words from pattern (not generic safety terms) patternWords := strings.Fields(patternText) var specificWords []string for _, w := range patternWords { // Clean punctuation w = strings.Trim(w, ".,;:!?()/-") if len(w) < 5 || genericSafetyTerms[w] { continue } specificWords = append(specificWords, w) } // If pattern has no specific words, it's generic → always relevant if len(specificWords) == 0 { return true } // Check if at least one specific word appears in the narrative or components for _, sw := range specificWords { if strings.Contains(narrativeNorm, sw) { return true } for _, cn := range compNames { if strings.Contains(cn, sw) { return true } } } // No specific word found in narrative → pattern is for a different machine return false } // categoryHazardCap returns the maximum number of hazards to generate per category. // Caps are based on typical ISO 12100 risk assessment proportions: // - Core physical categories (mechanical, electrical): scale with component count // - Secondary categories (thermal, noise, material): smaller fixed caps // - Software/IT/organizational categories: minimal (these are usually covered by // other standards like IEC 62443, not ISO 12100 machinery risk assessment) func categoryHazardCap(cat string, componentCount int) int { // Core machinery hazard categories — scale with complexity switch cat { case "mechanical_hazard": // Typically 1-3 hazards per component (quetschen, scheren, stoss...) cap := componentCount * 3 if cap < 15 { cap = 15 } if cap > 60 { cap = 60 } return cap case "electrical_hazard": // Typically 8-15 for a standard machine cap := componentCount if cap < 8 { cap = 8 } if cap > 20 { cap = 20 } return cap case "pneumatic_hydraulic": return 8 case "thermal_hazard": return 6 case "noise_vibration": return 4 case "material_environmental": return 6 case "ergonomic", "ergonomic_hazard": return 4 case "fire_explosion": return 4 case "radiation_hazard", "emc_hazard": return 3 // Software/IT/organizational — minimal for machinery assessment case "safety_function_failure": return 5 case "software_fault": return 3 case "configuration_error": return 3 case "hmi_error": return 3 case "maintenance_hazard": return 4 case "mode_confusion": return 2 default: return 3 } } // normalizeZoneKey reduces a zone string to its core components for better dedup. // E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen" // should dedup to the same key. func normalizeZoneKey(zone string) string { if zone == "" { return "" } norm := iace.NormalizeDEPublic(zone) // Remove filler words and punctuation for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} { norm = strings.ReplaceAll(norm, r, " ") } // Extract significant words (>3 chars), sort for stable key words := strings.Fields(norm) var sig []string seen := make(map[string]bool) stopWords := map[string]bool{ "der": true, "die": true, "das": true, "und": true, "oder": true, "von": true, "des": true, "den": true, "dem": true, "ein": true, "eine": true, "fuer": true, "bei": true, "mit": true, "nach": true, "alle": true, "aller": true, "allem": true, "sowie": true, "insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true, "innerhalb": true, "ausserhalb": true, "umgebung": true, } for _, w := range words { if len(w) < 4 || stopWords[w] || seen[w] { continue } seen[w] = true sig = append(sig, w) } if len(sig) == 0 { return norm } // Take first 3 significant words as key (enough for dedup) if len(sig) > 3 { sig = sig[:3] } return strings.Join(sig, "_") } // findHazardForMeasureByCategory finds a matching hazard for a measure. func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID { if id, ok := hazardsByCategory[measureCat]; ok { return id } for cat, id := range hazardsByCategory { if len(measureCat) > 3 && len(cat) > 3 && cat[:4] == measureCat[:4] { return id } } for _, id := range hazardsByCategory { return id } return uuid.Nil }