Files
breakpilot-compliance/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go
T
Benjamin Admin afb3f83f30 feat(iace): cross-domain precision overhaul + component review + schema reconcile
Engine precision (stop foreign-machine patterns leaking into a project):
- Wire project.MachineType into the engine machine-type gate (empty input no
  longer fires every machine class — press/cnc/excavator/crane/medical...).
- Capability-domain gating extended by 7 domains (outdoor, ventilation,
  machining, bulk, palletizer, playground, fitness) so domain-specific hazards
  only fire when the narrative names that domain; emitted via keyword_dictionary.
- Relevance backstop moved into iace (single gating contract, testable), and its
  dominant false-anchor class removed (a long pattern word no longer matches a
  short common token; prepositions/leitung added to the generic stoplist).
- New guard tests: TestCrossDomainPrecision (full pipeline, 0 foreign per GT) and
  TestPatternReachability now asserts 0 dead patterns. Both GTs keep coverage 1.0.

Reachability fix: the 51 dead patterns required electrical/pneumatic/hydraulic
tags nothing produced — renamed to the canonical electrical_energy/
pneumatic_pressure/hydraulic_pressure/hydraulic_part.

Component review (negation is best-effort + expert-correctable):
- Parser surfaces negated components (ComponentMatch.Negated) instead of dropping
  them; negated contribute no tags/energy → no phantom hazards.
- presence_status (vorhanden|nicht_vorhanden|geloescht) + ce_marked on components;
  only `vorhanden` feed matching. CE+safety-relevant flags the PL/SIL obligation.
- Force re-seed preserves the expert's component decisions instead of wiping them.
- Tag-based component→hazard assignment (was: all on the first component).
- Negation-aware narrative parsing ("keine Pneumatik" no longer extracts it).

Local-dev DB: ai-sdk sets search_path=compliance,core,public; reconcile migrations
152-156 bring the consolidated local iace tables to the current schema + add the
presence_status/ce_marked columns. Machine-type vocabulary endpoint for the form.

[migration-approved]

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-10 17:15:55 +02:00

456 lines
16 KiB
Go

package handlers
import (
"encoding/json"
"sort"
"strings"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
"github.com/google/uuid"
)
// withUniversalLifecycles ensures the lifecycle phases that occur on virtually
// every machine — normal operation, setup, maintenance, cleaning — are always
// present, so their hazards are derived even when the limits form does not list
// them explicitly. The professional assesses these phases on most devices.
func withUniversalLifecycles(parsed []string) []string {
seen := make(map[string]bool, len(parsed)+4)
out := make([]string, 0, len(parsed)+4)
for _, p := range parsed {
if p != "" && !seen[p] {
seen[p] = true
out = append(out, p)
}
}
for _, u := range []string{"normal_operation", "setup", "maintenance", "cleaning"} {
if !seen[u] {
seen[u] = true
out = append(out, u)
}
}
return out
}
// extractNarrativeFromMetadata builds a combined text from the limits_form.
func extractNarrativeFromMetadata(metadata json.RawMessage) string {
if metadata == nil {
return ""
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return ""
}
limitsRaw, ok := meta["limits_form"]
if !ok {
return ""
}
var limits map[string]interface{}
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
return ""
}
// Read EVERY field of the limits form — intended use, foreseeable misuse,
// machine limits, and ALL interfaces (electrical/mechanical/pneumatic/
// software). Each is a hazard source. We don't whitelist field names (the
// form schema evolves); noise fields like serial number / year are harmless
// because the parser only extracts from recognised keywords. Keys are
// sorted for deterministic output.
keys := make([]string, 0, len(limits))
for k := range limits {
keys = append(keys, k)
}
sort.Strings(keys)
var sb strings.Builder
for _, k := range keys {
switch val := limits[k].(type) {
case string:
if strings.TrimSpace(val) != "" {
sb.WriteString(val)
sb.WriteString("\n\n")
}
case []interface{}:
for _, e := range val {
if s, ok := e.(string); ok && s != "" {
sb.WriteString(s)
sb.WriteString(", ")
}
}
sb.WriteString("\n\n")
}
}
return sb.String()
}
// acceptableMeasureCategories returns the set of measure HazardCategory values
// that are semantically applicable to a hazard with the given pattern category.
// The mapping is a *set*, not a single value — many pattern categories accept
// measures from several measure-library categories that are conceptually
// related. E.g. a safety_function_failure hazard is sensibly mitigated by
// software_control measures like watchdogs, plausibility checks or self-tests,
// not just by the (almost empty) safety_function category.
//
// "general" is implicit — handled in isCategoryCompatible and not duplicated
// in every set below.
func acceptableMeasureCategories(patternCat string) map[string]bool {
sets := map[string][]string{
"mechanical_hazard": {"mechanical"},
"electrical_hazard": {"electrical"},
"thermal_hazard": {"thermal", "material_environmental"},
// ISO 12100 Anhang B splits Nr. 4 Laerm and Nr. 5 Vibration into
// two top-level groups. The legacy combined alias noise_vibration
// is kept for backwards compat — all three resolve to the same
// measure pool today (the library doesn't separate noise vs
// vibration measures), but the pattern category now matches the
// norm structure.
"noise_hazard": {"noise_vibration", "ergonomic"},
"vibration_hazard": {"noise_vibration", "ergonomic"},
"noise_vibration": {"noise_vibration", "ergonomic"},
"pneumatic_hydraulic": {"pneumatic_hydraulic", "mechanical"},
"material_environmental": {"material_environmental"},
"chemical_risk": {"material_environmental", "thermal"},
"ergonomic": {"ergonomic"},
"ergonomic_hazard": {"ergonomic"},
"fire_explosion": {"thermal", "material_environmental"},
"radiation_hazard": {"material_environmental"},
"emc_hazard": {"electrical", "software_control"},
"maintenance_hazard": {"mechanical"},
"safety_function_failure": {"safety_function", "software_control"},
"software_fault": {"software_control"},
"sensor_fault": {"software_control"},
"configuration_error": {"software_control"},
"update_failure": {"software_control"},
"hmi_error": {"software_control"},
"mode_confusion": {"software_control"},
"unauthorized_access": {"cyber_network", "software_control"},
"communication_failure": {"cyber_network", "software_control"},
"firmware_corruption": {"cyber_network", "software_control"},
"logging_audit_failure": {"cyber_network", "software_control"},
"ai_misclassification": {"ai_specific", "software_control"},
"false_classification": {"ai_specific", "software_control"},
"model_drift": {"ai_specific", "software_control"},
"data_poisoning": {"ai_specific", "software_control"},
"sensor_spoofing": {"ai_specific", "software_control"},
"unintended_bias": {"ai_specific", "software_control"},
// CRA / DIN EN 40000-1-2 cyber-resilience patterns (HP1910+).
// cyber_resilience is the umbrella category used by patterns that
// fire on the manufacturer-side obligations: SBOM, signed updates,
// CVD policy, patch-SLA, hardening docs, incident notification.
// Accept measures from the dedicated cyber_resilience pool plus the
// broader cyber_network and software_control pools (existing
// measures like "intrusion detection" or "audit logging" are
// applicable here too).
"cyber_resilience": {"cyber_resilience", "cyber_network", "software_control"},
// Edge-case pattern categories from legacy authors. Treated as
// synonyms of their primary hazard category so existing patterns
// keep matching the right measure pool.
"noise_source": {"noise_vibration", "ergonomic"},
"vibration_source": {"noise_vibration", "ergonomic"},
"high_temperature": {"thermal", "material_environmental"},
"material_environmental_hazard": {"material_environmental"},
}
out := map[string]bool{"general": true}
if list, ok := sets[patternCat]; ok {
for _, c := range list {
out[c] = true
}
}
return out
}
// isCategoryCompatible reports whether a measure with HazardCategory measureCat
// is semantically applicable to a hazard whose acceptable measure categories
// are listed in accepted. Empty measureCat is always allowed (legacy entries),
// "general" measures are pre-seeded into accepted by acceptableMeasureCategories.
//
// Without this guard, patterns silently inherit nonsense mitigations (e.g.
// HP1651 "robot restart while person in cell" inheriting M054 "Sichere
// thermische Auslegung" — a thermal-design measure used as generic default in
// ~100 mechanical patterns). The Fachmann benchmark rejects such mismatches.
func isCategoryCompatible(measureCat string, accepted map[string]bool) bool {
if measureCat == "" {
return true
}
return accepted[measureCat]
}
// keysOf returns the sorted keys of a string-bool set, used for diagnostic
// log messages that report which measure categories were accepted for a hazard.
func keysOf(s map[string]bool) []string {
out := make([]string, 0, len(s))
for k := range s {
out = append(out, k)
}
return out
}
// patternCatToMeasureCat maps pattern hazard categories to measure categories.
func patternCatToMeasureCat(patternCat string) string {
m := map[string]string{
"mechanical_hazard": "mechanical", "electrical_hazard": "electrical",
"thermal_hazard": "thermal", "noise_vibration": "noise_vibration",
"pneumatic_hydraulic": "pneumatic_hydraulic", "material_environmental": "material_environmental",
"ergonomic": "ergonomic", "ergonomic_hazard": "ergonomic",
"software_fault": "software_control", "safety_function_failure": "safety_function",
"fire_explosion": "thermal", "radiation_hazard": "material_environmental",
"unauthorized_access": "cyber_network", "communication_failure": "cyber_network",
"firmware_corruption": "cyber_network", "logging_audit_failure": "cyber_network",
"ai_misclassification": "ai_specific", "false_classification": "ai_specific",
"model_drift": "ai_specific", "data_poisoning": "ai_specific",
"sensor_spoofing": "ai_specific", "unintended_bias": "ai_specific",
"sensor_fault": "software_control", "configuration_error": "software_control",
"update_failure": "software_control", "hmi_error": "software_control",
"emc_hazard": "electrical", "maintenance_hazard": "mechanical",
"mode_confusion": "software_control", "chemical_risk": "material_environmental",
"cyber_resilience": "cyber_resilience",
}
if cat, ok := m[patternCat]; ok {
return cat
}
return "general"
}
// deriveComponentType guesses the component type from its tags.
func deriveComponentType(tags []string) iace.ComponentType {
for _, t := range tags {
switch {
case t == "software" || t == "has_software":
return iace.ComponentTypeSoftware
case t == "firmware" || t == "has_firmware":
return iace.ComponentTypeFirmware
case t == "has_ai" || t == "ai_model":
return iace.ComponentTypeAIModel
case t == "hmi" || t == "display" || t == "touchscreen":
return iace.ComponentTypeHMI
case t == "sensor" || t == "camera":
return iace.ComponentTypeSensor
case t == "electric_motor" || t == "electric_drive":
return iace.ComponentTypeElectrical
case t == "networked" || t == "ethernet" || t == "wifi":
return iace.ComponentTypeNetwork
case t == "hydraulic" || t == "pneumatic":
return iace.ComponentTypeActuator
}
}
return iace.ComponentTypeMechanical
}
// extractOperationalStatesFromMetadata reads the explicit operational_states
// selection that the user set via the Betriebszustand-UI.
func extractOperationalStatesFromMetadata(metadata json.RawMessage) []string {
if metadata == nil {
return nil
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return nil
}
raw, ok := meta["operational_states"]
if !ok {
return nil
}
var states []string
if err := json.Unmarshal(raw, &states); err != nil {
return nil
}
return states
}
// mergeStringSlices merges two string slices, deduplicating entries.
func mergeStringSlices(a, b []string) []string {
seen := make(map[string]bool, len(a)+len(b))
var result []string
for _, s := range a {
if !seen[s] {
seen[s] = true
result = append(result, s)
}
}
for _, s := range b {
if !seen[s] {
seen[s] = true
result = append(result, s)
}
}
return result
}
// extractIndustrySectorsFromMetadata reads the industry_sectors selection
// from project metadata and maps them to MachineTypes for pattern filtering.
func extractIndustrySectorsFromMetadata(metadata json.RawMessage) []string {
if metadata == nil {
return nil
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return nil
}
limitsRaw, ok := meta["limits_form"]
if !ok {
return nil
}
var limits map[string]json.RawMessage
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
return nil
}
sectorsRaw, ok := limits["industry_sectors"]
if !ok {
return nil
}
var sectors []string
if err := json.Unmarshal(sectorsRaw, &sectors); err != nil {
return nil
}
labelMap := map[string][]string{
"Allgemeiner Maschinenbau": {"general_industry"},
"Automobil / Zulieferer": {"automotive"},
"Robotik / Cobot": {"robotics_cobot", "cobot"},
"Medizintechnik": {"medical_device", "infusion_pump", "ventilator", "patient_monitor"},
"Lebensmittel / Getraenke": {"food_processing"},
"Verpackung": {"packaging"},
"Pharma / Chemie": {"chemical", "pharmaceutical"},
"Bau / Baumaschinen": {"construction", "crane", "excavator"},
"Forst / Holzbearbeitung": {"forestry", "woodworking", "circular_saw"},
"Aufzuege / Foerdertechnik": {"elevator", "lift", "escalator", "conveyor"},
"Textil": {"textile", "spinning", "weaving", "finishing"},
"Landmaschinen": {"agricultural", "tractor", "harvester"},
"Druck / Papier": {"printing"},
"Metall / CNC": {"cnc", "metalworking", "lathe", "milling"},
"Schweissen / Oberflaechentechnik": {"welding", "surface_treatment"},
}
var result []string
seen := make(map[string]bool)
for _, sector := range sectors {
for _, mt := range labelMap[sector] {
if !seen[mt] {
seen[mt] = true
result = append(result, mt)
}
}
}
return result
}
// containsSubstring checks if haystack contains needle (case-insensitive, normalized).
func containsSubstring(haystack, needle string) bool {
return strings.Contains(
strings.ToLower(haystack),
strings.ToLower(needle),
)
}
// categoryHazardCap returns the maximum number of hazards to generate per category.
// Caps are based on typical ISO 12100 risk assessment proportions:
// - Core physical categories (mechanical, electrical): scale with component count
// - Secondary categories (thermal, noise, material): smaller fixed caps
// - Software/IT/organizational categories: minimal (these are usually covered by
// other standards like IEC 62443, not ISO 12100 machinery risk assessment)
func categoryHazardCap(cat string, componentCount int) int {
// Core machinery hazard categories — scale with complexity
switch cat {
case "mechanical_hazard":
// Typically 1-3 hazards per component (quetschen, scheren, stoss...)
cap := componentCount * 3
if cap < 15 {
cap = 15
}
if cap > 60 {
cap = 60
}
return cap
case "electrical_hazard":
// Typically 8-15 for a standard machine
cap := componentCount
if cap < 8 {
cap = 8
}
if cap > 20 {
cap = 20
}
return cap
case "pneumatic_hydraulic":
return 8
case "thermal_hazard":
return 6
case "noise_vibration":
return 4
case "material_environmental":
return 6
case "ergonomic", "ergonomic_hazard":
return 4
case "fire_explosion":
return 4
case "radiation_hazard", "emc_hazard":
return 3
// Software/IT/organizational — minimal for machinery assessment
case "safety_function_failure":
return 5
case "software_fault":
return 3
case "configuration_error":
return 3
case "hmi_error":
return 3
case "maintenance_hazard":
return 4
case "mode_confusion":
return 2
default:
return 3
}
}
// normalizeZoneKey reduces a zone string to its core components for better dedup.
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
// should dedup to the same key.
func normalizeZoneKey(zone string) string {
if zone == "" {
return ""
}
norm := iace.NormalizeDEPublic(zone)
// Remove filler words and punctuation
for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
norm = strings.ReplaceAll(norm, r, " ")
}
// Extract significant words (>3 chars), sort for stable key
words := strings.Fields(norm)
var sig []string
seen := make(map[string]bool)
stopWords := map[string]bool{
"der": true, "die": true, "das": true, "und": true, "oder": true,
"von": true, "des": true, "den": true, "dem": true, "ein": true,
"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
"alle": true, "aller": true, "allem": true, "sowie": true,
"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
"innerhalb": true, "ausserhalb": true, "umgebung": true,
}
for _, w := range words {
if len(w) < 4 || stopWords[w] || seen[w] {
continue
}
seen[w] = true
sig = append(sig, w)
}
if len(sig) == 0 {
return norm
}
// Take first 3 significant words as key (enough for dedup)
if len(sig) > 3 {
sig = sig[:3]
}
return strings.Join(sig, "_")
}
// findHazardsForMeasureByCategory finds all hazards matching a measure's category.
func findHazardsForMeasureByCategory(measureCat string, hazardsByCategory map[string][]uuid.UUID) []uuid.UUID {
if ids, ok := hazardsByCategory[measureCat]; ok {
return ids
}
for cat, ids := range hazardsByCategory {
if len(measureCat) > 3 && len(cat) > 3 && cat[:4] == measureCat[:4] {
return ids
}
}
return nil
}