afb3f83f30
Engine precision (stop foreign-machine patterns leaking into a project):
- Wire project.MachineType into the engine machine-type gate (empty input no
longer fires every machine class — press/cnc/excavator/crane/medical...).
- Capability-domain gating extended by 7 domains (outdoor, ventilation,
machining, bulk, palletizer, playground, fitness) so domain-specific hazards
only fire when the narrative names that domain; emitted via keyword_dictionary.
- Relevance backstop moved into iace (single gating contract, testable), and its
dominant false-anchor class removed (a long pattern word no longer matches a
short common token; prepositions/leitung added to the generic stoplist).
- New guard tests: TestCrossDomainPrecision (full pipeline, 0 foreign per GT) and
TestPatternReachability now asserts 0 dead patterns. Both GTs keep coverage 1.0.
Reachability fix: the 51 dead patterns required electrical/pneumatic/hydraulic
tags nothing produced — renamed to the canonical electrical_energy/
pneumatic_pressure/hydraulic_pressure/hydraulic_part.
Component review (negation is best-effort + expert-correctable):
- Parser surfaces negated components (ComponentMatch.Negated) instead of dropping
them; negated contribute no tags/energy → no phantom hazards.
- presence_status (vorhanden|nicht_vorhanden|geloescht) + ce_marked on components;
only `vorhanden` feed matching. CE+safety-relevant flags the PL/SIL obligation.
- Force re-seed preserves the expert's component decisions instead of wiping them.
- Tag-based component→hazard assignment (was: all on the first component).
- Negation-aware narrative parsing ("keine Pneumatik" no longer extracts it).
Local-dev DB: ai-sdk sets search_path=compliance,core,public; reconcile migrations
152-156 bring the consolidated local iace tables to the current schema + add the
presence_status/ce_marked columns. Machine-type vocabulary endpoint for the form.
[migration-approved]
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
456 lines
16 KiB
Go
456 lines
16 KiB
Go
package handlers
|
|
|
|
import (
|
|
"encoding/json"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// withUniversalLifecycles ensures the lifecycle phases that occur on virtually
|
|
// every machine — normal operation, setup, maintenance, cleaning — are always
|
|
// present, so their hazards are derived even when the limits form does not list
|
|
// them explicitly. The professional assesses these phases on most devices.
|
|
func withUniversalLifecycles(parsed []string) []string {
|
|
seen := make(map[string]bool, len(parsed)+4)
|
|
out := make([]string, 0, len(parsed)+4)
|
|
for _, p := range parsed {
|
|
if p != "" && !seen[p] {
|
|
seen[p] = true
|
|
out = append(out, p)
|
|
}
|
|
}
|
|
for _, u := range []string{"normal_operation", "setup", "maintenance", "cleaning"} {
|
|
if !seen[u] {
|
|
seen[u] = true
|
|
out = append(out, u)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// extractNarrativeFromMetadata builds a combined text from the limits_form.
|
|
func extractNarrativeFromMetadata(metadata json.RawMessage) string {
|
|
if metadata == nil {
|
|
return ""
|
|
}
|
|
var meta map[string]json.RawMessage
|
|
if err := json.Unmarshal(metadata, &meta); err != nil {
|
|
return ""
|
|
}
|
|
limitsRaw, ok := meta["limits_form"]
|
|
if !ok {
|
|
return ""
|
|
}
|
|
var limits map[string]interface{}
|
|
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
|
|
return ""
|
|
}
|
|
|
|
// Read EVERY field of the limits form — intended use, foreseeable misuse,
|
|
// machine limits, and ALL interfaces (electrical/mechanical/pneumatic/
|
|
// software). Each is a hazard source. We don't whitelist field names (the
|
|
// form schema evolves); noise fields like serial number / year are harmless
|
|
// because the parser only extracts from recognised keywords. Keys are
|
|
// sorted for deterministic output.
|
|
keys := make([]string, 0, len(limits))
|
|
for k := range limits {
|
|
keys = append(keys, k)
|
|
}
|
|
sort.Strings(keys)
|
|
|
|
var sb strings.Builder
|
|
for _, k := range keys {
|
|
switch val := limits[k].(type) {
|
|
case string:
|
|
if strings.TrimSpace(val) != "" {
|
|
sb.WriteString(val)
|
|
sb.WriteString("\n\n")
|
|
}
|
|
case []interface{}:
|
|
for _, e := range val {
|
|
if s, ok := e.(string); ok && s != "" {
|
|
sb.WriteString(s)
|
|
sb.WriteString(", ")
|
|
}
|
|
}
|
|
sb.WriteString("\n\n")
|
|
}
|
|
}
|
|
return sb.String()
|
|
}
|
|
|
|
// acceptableMeasureCategories returns the set of measure HazardCategory values
|
|
// that are semantically applicable to a hazard with the given pattern category.
|
|
// The mapping is a *set*, not a single value — many pattern categories accept
|
|
// measures from several measure-library categories that are conceptually
|
|
// related. E.g. a safety_function_failure hazard is sensibly mitigated by
|
|
// software_control measures like watchdogs, plausibility checks or self-tests,
|
|
// not just by the (almost empty) safety_function category.
|
|
//
|
|
// "general" is implicit — handled in isCategoryCompatible and not duplicated
|
|
// in every set below.
|
|
func acceptableMeasureCategories(patternCat string) map[string]bool {
|
|
sets := map[string][]string{
|
|
"mechanical_hazard": {"mechanical"},
|
|
"electrical_hazard": {"electrical"},
|
|
"thermal_hazard": {"thermal", "material_environmental"},
|
|
// ISO 12100 Anhang B splits Nr. 4 Laerm and Nr. 5 Vibration into
|
|
// two top-level groups. The legacy combined alias noise_vibration
|
|
// is kept for backwards compat — all three resolve to the same
|
|
// measure pool today (the library doesn't separate noise vs
|
|
// vibration measures), but the pattern category now matches the
|
|
// norm structure.
|
|
"noise_hazard": {"noise_vibration", "ergonomic"},
|
|
"vibration_hazard": {"noise_vibration", "ergonomic"},
|
|
"noise_vibration": {"noise_vibration", "ergonomic"},
|
|
"pneumatic_hydraulic": {"pneumatic_hydraulic", "mechanical"},
|
|
"material_environmental": {"material_environmental"},
|
|
"chemical_risk": {"material_environmental", "thermal"},
|
|
"ergonomic": {"ergonomic"},
|
|
"ergonomic_hazard": {"ergonomic"},
|
|
"fire_explosion": {"thermal", "material_environmental"},
|
|
"radiation_hazard": {"material_environmental"},
|
|
"emc_hazard": {"electrical", "software_control"},
|
|
"maintenance_hazard": {"mechanical"},
|
|
"safety_function_failure": {"safety_function", "software_control"},
|
|
"software_fault": {"software_control"},
|
|
"sensor_fault": {"software_control"},
|
|
"configuration_error": {"software_control"},
|
|
"update_failure": {"software_control"},
|
|
"hmi_error": {"software_control"},
|
|
"mode_confusion": {"software_control"},
|
|
"unauthorized_access": {"cyber_network", "software_control"},
|
|
"communication_failure": {"cyber_network", "software_control"},
|
|
"firmware_corruption": {"cyber_network", "software_control"},
|
|
"logging_audit_failure": {"cyber_network", "software_control"},
|
|
"ai_misclassification": {"ai_specific", "software_control"},
|
|
"false_classification": {"ai_specific", "software_control"},
|
|
"model_drift": {"ai_specific", "software_control"},
|
|
"data_poisoning": {"ai_specific", "software_control"},
|
|
"sensor_spoofing": {"ai_specific", "software_control"},
|
|
"unintended_bias": {"ai_specific", "software_control"},
|
|
// CRA / DIN EN 40000-1-2 cyber-resilience patterns (HP1910+).
|
|
// cyber_resilience is the umbrella category used by patterns that
|
|
// fire on the manufacturer-side obligations: SBOM, signed updates,
|
|
// CVD policy, patch-SLA, hardening docs, incident notification.
|
|
// Accept measures from the dedicated cyber_resilience pool plus the
|
|
// broader cyber_network and software_control pools (existing
|
|
// measures like "intrusion detection" or "audit logging" are
|
|
// applicable here too).
|
|
"cyber_resilience": {"cyber_resilience", "cyber_network", "software_control"},
|
|
// Edge-case pattern categories from legacy authors. Treated as
|
|
// synonyms of their primary hazard category so existing patterns
|
|
// keep matching the right measure pool.
|
|
"noise_source": {"noise_vibration", "ergonomic"},
|
|
"vibration_source": {"noise_vibration", "ergonomic"},
|
|
"high_temperature": {"thermal", "material_environmental"},
|
|
"material_environmental_hazard": {"material_environmental"},
|
|
}
|
|
out := map[string]bool{"general": true}
|
|
if list, ok := sets[patternCat]; ok {
|
|
for _, c := range list {
|
|
out[c] = true
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// isCategoryCompatible reports whether a measure with HazardCategory measureCat
|
|
// is semantically applicable to a hazard whose acceptable measure categories
|
|
// are listed in accepted. Empty measureCat is always allowed (legacy entries),
|
|
// "general" measures are pre-seeded into accepted by acceptableMeasureCategories.
|
|
//
|
|
// Without this guard, patterns silently inherit nonsense mitigations (e.g.
|
|
// HP1651 "robot restart while person in cell" inheriting M054 "Sichere
|
|
// thermische Auslegung" — a thermal-design measure used as generic default in
|
|
// ~100 mechanical patterns). The Fachmann benchmark rejects such mismatches.
|
|
func isCategoryCompatible(measureCat string, accepted map[string]bool) bool {
|
|
if measureCat == "" {
|
|
return true
|
|
}
|
|
return accepted[measureCat]
|
|
}
|
|
|
|
// keysOf returns the sorted keys of a string-bool set, used for diagnostic
|
|
// log messages that report which measure categories were accepted for a hazard.
|
|
func keysOf(s map[string]bool) []string {
|
|
out := make([]string, 0, len(s))
|
|
for k := range s {
|
|
out = append(out, k)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// patternCatToMeasureCat maps pattern hazard categories to measure categories.
|
|
func patternCatToMeasureCat(patternCat string) string {
|
|
m := map[string]string{
|
|
"mechanical_hazard": "mechanical", "electrical_hazard": "electrical",
|
|
"thermal_hazard": "thermal", "noise_vibration": "noise_vibration",
|
|
"pneumatic_hydraulic": "pneumatic_hydraulic", "material_environmental": "material_environmental",
|
|
"ergonomic": "ergonomic", "ergonomic_hazard": "ergonomic",
|
|
"software_fault": "software_control", "safety_function_failure": "safety_function",
|
|
"fire_explosion": "thermal", "radiation_hazard": "material_environmental",
|
|
"unauthorized_access": "cyber_network", "communication_failure": "cyber_network",
|
|
"firmware_corruption": "cyber_network", "logging_audit_failure": "cyber_network",
|
|
"ai_misclassification": "ai_specific", "false_classification": "ai_specific",
|
|
"model_drift": "ai_specific", "data_poisoning": "ai_specific",
|
|
"sensor_spoofing": "ai_specific", "unintended_bias": "ai_specific",
|
|
"sensor_fault": "software_control", "configuration_error": "software_control",
|
|
"update_failure": "software_control", "hmi_error": "software_control",
|
|
"emc_hazard": "electrical", "maintenance_hazard": "mechanical",
|
|
"mode_confusion": "software_control", "chemical_risk": "material_environmental",
|
|
"cyber_resilience": "cyber_resilience",
|
|
}
|
|
if cat, ok := m[patternCat]; ok {
|
|
return cat
|
|
}
|
|
return "general"
|
|
}
|
|
|
|
// deriveComponentType guesses the component type from its tags.
|
|
func deriveComponentType(tags []string) iace.ComponentType {
|
|
for _, t := range tags {
|
|
switch {
|
|
case t == "software" || t == "has_software":
|
|
return iace.ComponentTypeSoftware
|
|
case t == "firmware" || t == "has_firmware":
|
|
return iace.ComponentTypeFirmware
|
|
case t == "has_ai" || t == "ai_model":
|
|
return iace.ComponentTypeAIModel
|
|
case t == "hmi" || t == "display" || t == "touchscreen":
|
|
return iace.ComponentTypeHMI
|
|
case t == "sensor" || t == "camera":
|
|
return iace.ComponentTypeSensor
|
|
case t == "electric_motor" || t == "electric_drive":
|
|
return iace.ComponentTypeElectrical
|
|
case t == "networked" || t == "ethernet" || t == "wifi":
|
|
return iace.ComponentTypeNetwork
|
|
case t == "hydraulic" || t == "pneumatic":
|
|
return iace.ComponentTypeActuator
|
|
}
|
|
}
|
|
return iace.ComponentTypeMechanical
|
|
}
|
|
|
|
// extractOperationalStatesFromMetadata reads the explicit operational_states
|
|
// selection that the user set via the Betriebszustand-UI.
|
|
func extractOperationalStatesFromMetadata(metadata json.RawMessage) []string {
|
|
if metadata == nil {
|
|
return nil
|
|
}
|
|
var meta map[string]json.RawMessage
|
|
if err := json.Unmarshal(metadata, &meta); err != nil {
|
|
return nil
|
|
}
|
|
raw, ok := meta["operational_states"]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
var states []string
|
|
if err := json.Unmarshal(raw, &states); err != nil {
|
|
return nil
|
|
}
|
|
return states
|
|
}
|
|
|
|
// mergeStringSlices merges two string slices, deduplicating entries.
|
|
func mergeStringSlices(a, b []string) []string {
|
|
seen := make(map[string]bool, len(a)+len(b))
|
|
var result []string
|
|
for _, s := range a {
|
|
if !seen[s] {
|
|
seen[s] = true
|
|
result = append(result, s)
|
|
}
|
|
}
|
|
for _, s := range b {
|
|
if !seen[s] {
|
|
seen[s] = true
|
|
result = append(result, s)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// extractIndustrySectorsFromMetadata reads the industry_sectors selection
|
|
// from project metadata and maps them to MachineTypes for pattern filtering.
|
|
func extractIndustrySectorsFromMetadata(metadata json.RawMessage) []string {
|
|
if metadata == nil {
|
|
return nil
|
|
}
|
|
var meta map[string]json.RawMessage
|
|
if err := json.Unmarshal(metadata, &meta); err != nil {
|
|
return nil
|
|
}
|
|
limitsRaw, ok := meta["limits_form"]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
var limits map[string]json.RawMessage
|
|
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
|
|
return nil
|
|
}
|
|
sectorsRaw, ok := limits["industry_sectors"]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
var sectors []string
|
|
if err := json.Unmarshal(sectorsRaw, §ors); err != nil {
|
|
return nil
|
|
}
|
|
labelMap := map[string][]string{
|
|
"Allgemeiner Maschinenbau": {"general_industry"},
|
|
"Automobil / Zulieferer": {"automotive"},
|
|
"Robotik / Cobot": {"robotics_cobot", "cobot"},
|
|
"Medizintechnik": {"medical_device", "infusion_pump", "ventilator", "patient_monitor"},
|
|
"Lebensmittel / Getraenke": {"food_processing"},
|
|
"Verpackung": {"packaging"},
|
|
"Pharma / Chemie": {"chemical", "pharmaceutical"},
|
|
"Bau / Baumaschinen": {"construction", "crane", "excavator"},
|
|
"Forst / Holzbearbeitung": {"forestry", "woodworking", "circular_saw"},
|
|
"Aufzuege / Foerdertechnik": {"elevator", "lift", "escalator", "conveyor"},
|
|
"Textil": {"textile", "spinning", "weaving", "finishing"},
|
|
"Landmaschinen": {"agricultural", "tractor", "harvester"},
|
|
"Druck / Papier": {"printing"},
|
|
"Metall / CNC": {"cnc", "metalworking", "lathe", "milling"},
|
|
"Schweissen / Oberflaechentechnik": {"welding", "surface_treatment"},
|
|
}
|
|
var result []string
|
|
seen := make(map[string]bool)
|
|
for _, sector := range sectors {
|
|
for _, mt := range labelMap[sector] {
|
|
if !seen[mt] {
|
|
seen[mt] = true
|
|
result = append(result, mt)
|
|
}
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// containsSubstring checks if haystack contains needle (case-insensitive, normalized).
|
|
func containsSubstring(haystack, needle string) bool {
|
|
return strings.Contains(
|
|
strings.ToLower(haystack),
|
|
strings.ToLower(needle),
|
|
)
|
|
}
|
|
|
|
// categoryHazardCap returns the maximum number of hazards to generate per category.
|
|
// Caps are based on typical ISO 12100 risk assessment proportions:
|
|
// - Core physical categories (mechanical, electrical): scale with component count
|
|
// - Secondary categories (thermal, noise, material): smaller fixed caps
|
|
// - Software/IT/organizational categories: minimal (these are usually covered by
|
|
// other standards like IEC 62443, not ISO 12100 machinery risk assessment)
|
|
func categoryHazardCap(cat string, componentCount int) int {
|
|
// Core machinery hazard categories — scale with complexity
|
|
switch cat {
|
|
case "mechanical_hazard":
|
|
// Typically 1-3 hazards per component (quetschen, scheren, stoss...)
|
|
cap := componentCount * 3
|
|
if cap < 15 {
|
|
cap = 15
|
|
}
|
|
if cap > 60 {
|
|
cap = 60
|
|
}
|
|
return cap
|
|
case "electrical_hazard":
|
|
// Typically 8-15 for a standard machine
|
|
cap := componentCount
|
|
if cap < 8 {
|
|
cap = 8
|
|
}
|
|
if cap > 20 {
|
|
cap = 20
|
|
}
|
|
return cap
|
|
case "pneumatic_hydraulic":
|
|
return 8
|
|
case "thermal_hazard":
|
|
return 6
|
|
case "noise_vibration":
|
|
return 4
|
|
case "material_environmental":
|
|
return 6
|
|
case "ergonomic", "ergonomic_hazard":
|
|
return 4
|
|
case "fire_explosion":
|
|
return 4
|
|
case "radiation_hazard", "emc_hazard":
|
|
return 3
|
|
// Software/IT/organizational — minimal for machinery assessment
|
|
case "safety_function_failure":
|
|
return 5
|
|
case "software_fault":
|
|
return 3
|
|
case "configuration_error":
|
|
return 3
|
|
case "hmi_error":
|
|
return 3
|
|
case "maintenance_hazard":
|
|
return 4
|
|
case "mode_confusion":
|
|
return 2
|
|
default:
|
|
return 3
|
|
}
|
|
}
|
|
|
|
// normalizeZoneKey reduces a zone string to its core components for better dedup.
|
|
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
|
|
// should dedup to the same key.
|
|
func normalizeZoneKey(zone string) string {
|
|
if zone == "" {
|
|
return ""
|
|
}
|
|
norm := iace.NormalizeDEPublic(zone)
|
|
// Remove filler words and punctuation
|
|
for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
|
|
norm = strings.ReplaceAll(norm, r, " ")
|
|
}
|
|
// Extract significant words (>3 chars), sort for stable key
|
|
words := strings.Fields(norm)
|
|
var sig []string
|
|
seen := make(map[string]bool)
|
|
stopWords := map[string]bool{
|
|
"der": true, "die": true, "das": true, "und": true, "oder": true,
|
|
"von": true, "des": true, "den": true, "dem": true, "ein": true,
|
|
"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
|
|
"alle": true, "aller": true, "allem": true, "sowie": true,
|
|
"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
|
|
"innerhalb": true, "ausserhalb": true, "umgebung": true,
|
|
}
|
|
for _, w := range words {
|
|
if len(w) < 4 || stopWords[w] || seen[w] {
|
|
continue
|
|
}
|
|
seen[w] = true
|
|
sig = append(sig, w)
|
|
}
|
|
if len(sig) == 0 {
|
|
return norm
|
|
}
|
|
// Take first 3 significant words as key (enough for dedup)
|
|
if len(sig) > 3 {
|
|
sig = sig[:3]
|
|
}
|
|
return strings.Join(sig, "_")
|
|
}
|
|
|
|
// findHazardsForMeasureByCategory finds all hazards matching a measure's category.
|
|
func findHazardsForMeasureByCategory(measureCat string, hazardsByCategory map[string][]uuid.UUID) []uuid.UUID {
|
|
if ids, ok := hazardsByCategory[measureCat]; ok {
|
|
return ids
|
|
}
|
|
for cat, ids := range hazardsByCategory {
|
|
if len(measureCat) > 3 && len(cat) > 3 && cat[:4] == measureCat[:4] {
|
|
return ids
|
|
}
|
|
}
|
|
return nil
|
|
}
|