Files
breakpilot-compliance/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go
T
Benjamin Admin 1bd892afbf
Build + Deploy / build-backend-compliance (push) Successful in 3m14s
Build + Deploy / build-ai-sdk (push) Successful in 1m18s
Build + Deploy / build-developer-portal (push) Successful in 1m8s
CI / loc-budget (push) Failing after 19s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
Build + Deploy / build-admin-compliance (push) Successful in 1m56s
Build + Deploy / build-tts (push) Successful in 1m35s
Build + Deploy / build-document-crawler (push) Successful in 47s
Build + Deploy / build-dsms-gateway (push) Successful in 35s
Build + Deploy / build-dsms-node (push) Successful in 19s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-go (push) Successful in 44s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 26s
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 13s
Build + Deploy / trigger-orca (push) Successful in 2m54s
feat(iace): narrative relevance filter + zone normalization for precision
- isPatternRelevant() filters patterns whose zone/scenario mentions
  machine-specific terms (extruder, stanzpresse, spielplatz, etc.)
  absent from the actual machine narrative
- normalizeZoneKey() clusters similar zones for smarter dedup
  (e.g. "Schaltschrank, Sammelschiene" = "Schaltschrank-Innenraum")
- machineSpecificTerms list with 40+ terms for generic filtering

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-13 09:51:00 +02:00

307 lines
10 KiB
Go

package handlers
import (
"encoding/json"
"strings"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
"github.com/google/uuid"
)
// extractNarrativeFromMetadata builds a combined text from the limits_form.
func extractNarrativeFromMetadata(metadata json.RawMessage) string {
if metadata == nil {
return ""
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return ""
}
limitsRaw, ok := meta["limits_form"]
if !ok {
return ""
}
var limits map[string]interface{}
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
return ""
}
textFields := []string{
"general_description", "intended_purpose", "foreseeable_misuse",
"space_limits", "time_limits", "environmental_conditions",
"energy_sources", "materials_processed", "operating_modes",
"maintenance_requirements", "personnel_requirements",
"interfaces_description", "control_system_description",
"safety_functions_description",
}
var result string
for _, field := range textFields {
if v, ok := limits[field]; ok {
if s, ok := v.(string); ok && s != "" {
result += s + "\n\n"
}
}
}
return result
}
// patternCatToMeasureCat maps pattern hazard categories to measure categories.
func patternCatToMeasureCat(patternCat string) string {
m := map[string]string{
"mechanical_hazard": "mechanical", "electrical_hazard": "electrical",
"thermal_hazard": "thermal", "noise_vibration": "noise_vibration",
"pneumatic_hydraulic": "pneumatic_hydraulic", "material_environmental": "material_environmental",
"ergonomic": "ergonomic", "ergonomic_hazard": "ergonomic",
"software_fault": "software_control", "safety_function_failure": "safety_function",
"fire_explosion": "thermal", "radiation_hazard": "material_environmental",
"unauthorized_access": "cyber_network", "communication_failure": "cyber_network",
"firmware_corruption": "cyber_network", "logging_audit_failure": "cyber_network",
"ai_misclassification": "ai_specific", "false_classification": "ai_specific",
"model_drift": "ai_specific", "data_poisoning": "ai_specific",
"sensor_spoofing": "ai_specific", "unintended_bias": "ai_specific",
"sensor_fault": "software_control", "configuration_error": "software_control",
"update_failure": "software_control", "hmi_error": "software_control",
"emc_hazard": "electrical", "maintenance_hazard": "mechanical",
"mode_confusion": "software_control", "chemical_risk": "material_environmental",
}
if cat, ok := m[patternCat]; ok {
return cat
}
return "general"
}
// deriveComponentType guesses the component type from its tags.
func deriveComponentType(tags []string) iace.ComponentType {
for _, t := range tags {
switch {
case t == "software" || t == "has_software":
return iace.ComponentTypeSoftware
case t == "firmware" || t == "has_firmware":
return iace.ComponentTypeFirmware
case t == "has_ai" || t == "ai_model":
return iace.ComponentTypeAIModel
case t == "hmi" || t == "display" || t == "touchscreen":
return iace.ComponentTypeHMI
case t == "sensor" || t == "camera":
return iace.ComponentTypeSensor
case t == "electric_motor" || t == "electric_drive":
return iace.ComponentTypeElectrical
case t == "networked" || t == "ethernet" || t == "wifi":
return iace.ComponentTypeNetwork
case t == "hydraulic" || t == "pneumatic":
return iace.ComponentTypeActuator
}
}
return iace.ComponentTypeMechanical
}
// extractOperationalStatesFromMetadata reads the explicit operational_states
// selection that the user set via the Betriebszustand-UI.
func extractOperationalStatesFromMetadata(metadata json.RawMessage) []string {
if metadata == nil {
return nil
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return nil
}
raw, ok := meta["operational_states"]
if !ok {
return nil
}
var states []string
if err := json.Unmarshal(raw, &states); err != nil {
return nil
}
return states
}
// mergeStringSlices merges two string slices, deduplicating entries.
func mergeStringSlices(a, b []string) []string {
seen := make(map[string]bool, len(a)+len(b))
var result []string
for _, s := range a {
if !seen[s] {
seen[s] = true
result = append(result, s)
}
}
for _, s := range b {
if !seen[s] {
seen[s] = true
result = append(result, s)
}
}
return result
}
// extractIndustrySectorsFromMetadata reads the industry_sectors selection
// from project metadata and maps them to MachineTypes for pattern filtering.
func extractIndustrySectorsFromMetadata(metadata json.RawMessage) []string {
if metadata == nil {
return nil
}
var meta map[string]json.RawMessage
if err := json.Unmarshal(metadata, &meta); err != nil {
return nil
}
limitsRaw, ok := meta["limits_form"]
if !ok {
return nil
}
var limits map[string]json.RawMessage
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
return nil
}
sectorsRaw, ok := limits["industry_sectors"]
if !ok {
return nil
}
var sectors []string
if err := json.Unmarshal(sectorsRaw, &sectors); err != nil {
return nil
}
labelMap := map[string][]string{
"Allgemeiner Maschinenbau": {"general_industry"},
"Automobil / Zulieferer": {"automotive"},
"Robotik / Cobot": {"robotics_cobot", "cobot"},
"Medizintechnik": {"medical_device", "infusion_pump", "ventilator", "patient_monitor"},
"Lebensmittel / Getraenke": {"food_processing"},
"Verpackung": {"packaging"},
"Pharma / Chemie": {"chemical", "pharmaceutical"},
"Bau / Baumaschinen": {"construction", "crane", "excavator"},
"Forst / Holzbearbeitung": {"forestry", "woodworking", "circular_saw"},
"Aufzuege / Foerdertechnik": {"elevator", "lift", "escalator", "conveyor"},
"Textil": {"textile", "spinning", "weaving", "finishing"},
"Landmaschinen": {"agricultural", "tractor", "harvester"},
"Druck / Papier": {"printing"},
"Metall / CNC": {"cnc", "metalworking", "lathe", "milling"},
"Schweissen / Oberflaechentechnik": {"welding", "surface_treatment"},
}
var result []string
seen := make(map[string]bool)
for _, sector := range sectors {
for _, mt := range labelMap[sector] {
if !seen[mt] {
seen[mt] = true
result = append(result, mt)
}
}
}
return result
}
// containsSubstring checks if haystack contains needle (case-insensitive, normalized).
func containsSubstring(haystack, needle string) bool {
return strings.Contains(
strings.ToLower(haystack),
strings.ToLower(needle),
)
}
// machineSpecificTerms are words in a pattern's zone/scenario that indicate
// the pattern is specific to a particular machine type. If a pattern contains
// such a term but the machine narrative does NOT, the pattern is irrelevant.
var machineSpecificTerms = []string{
"extruder", "spinnmaschine", "spielplatz", "aufzug", "elevator",
"kran", "crane", "bagger", "excavator", "traktor", "tractor",
"harvester", "druckmaschine", "printing", "webstuhl", "weaving",
"ofen", "furnace", "kessel", "boiler", "walzwerk", "rolling",
"zentrifuge", "centrifuge", "autoklav", "autoclave", "saege",
"kreissaege", "circular_saw", "hobel", "fraese", "drehmaschine",
"lathe", "schleifmaschine", "grinder", "stanze", "stanzpresse",
"infusion", "beatmung", "ventilator", "patient",
"lebensmittel", "food", "pharma", "verpackung", "packaging",
"seilnetz", "kletterseil", "schaukel", "rutsche",
"gabelstapler", "forklift", "flurfoerder",
}
// isPatternRelevant checks whether a pattern match is relevant to the actual
// machine described in the narrative. A pattern is considered irrelevant if its
// zone or scenario contains machine-specific terms that don't appear in the
// narrative or component list.
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
narrativeNorm := iace.NormalizeDEPublic(narrative)
// Check if pattern mentions machine-specific terms absent from narrative
for _, term := range machineSpecificTerms {
if !strings.Contains(patternText, term) {
continue
}
// Pattern mentions this machine-specific term — check if machine has it
if strings.Contains(narrativeNorm, term) {
continue // Machine has this term, pattern is relevant
}
// Also check component names
found := false
for _, cn := range compNames {
if strings.Contains(cn, term) {
found = true
break
}
}
if !found {
return false // Pattern mentions a machine type we don't have
}
}
return true
}
// normalizeZoneKey reduces a zone string to its core components for better dedup.
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
// should dedup to the same key.
func normalizeZoneKey(zone string) string {
if zone == "" {
return ""
}
norm := iace.NormalizeDEPublic(zone)
// Remove filler words and punctuation
for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
norm = strings.ReplaceAll(norm, r, " ")
}
// Extract significant words (>3 chars), sort for stable key
words := strings.Fields(norm)
var sig []string
seen := make(map[string]bool)
stopWords := map[string]bool{
"der": true, "die": true, "das": true, "und": true, "oder": true,
"von": true, "des": true, "den": true, "dem": true, "ein": true,
"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
"alle": true, "aller": true, "allem": true, "sowie": true,
"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
"innerhalb": true, "ausserhalb": true, "umgebung": true,
}
for _, w := range words {
if len(w) < 4 || stopWords[w] || seen[w] {
continue
}
seen[w] = true
sig = append(sig, w)
}
if len(sig) == 0 {
return norm
}
// Take first 3 significant words as key (enough for dedup)
if len(sig) > 3 {
sig = sig[:3]
}
return strings.Join(sig, "_")
}
// findHazardForMeasureByCategory finds a matching hazard for a measure.
func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID {
if id, ok := hazardsByCategory[measureCat]; ok {
return id
}
for cat, id := range hazardsByCategory {
if len(measureCat) > 3 && len(cat) > 3 && cat[:4] == measureCat[:4] {
return id
}
}
for _, id := range hazardsByCategory {
return id
}
return uuid.Nil
}