cca714755a
Build + Deploy / build-admin-compliance (push) Successful in 10s
Build + Deploy / build-backend-compliance (push) Successful in 11s
Build + Deploy / build-ai-sdk (push) Successful in 40s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-document-crawler (push) Successful in 11s
Build + Deploy / build-dsms-gateway (push) Successful in 12s
Build + Deploy / build-dsms-node (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-tts (push) Successful in 11s
CI / nodejs-build (push) Successful in 2m44s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 43s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
Build + Deploy / trigger-orca (push) Successful in 2m48s
Relevance filter: now checks PatternName in addition to ZoneDE+ScenarioDE, catches "Spielplatz", "Umreifungsband", "Fahrtreppe" etc. in pattern names. Added more generic safety terms to whitelist (welle, getriebe, kette, etc.) Matcher: rebalanced weights (category 0.3, keywords 0.3, zone 0.4) to prioritize zone/component specificity. Added wrong-machine penalty (0.3x) when engine hazard mentions machine-specific terms absent from GT context (e.g. "Kollision zweier Roboter" for a single-robot GT entry). Fixes 18 problematic matches: 8 wrong-machine, 9 zone-mismatch, 1 category. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
387 lines
13 KiB
Go
387 lines
13 KiB
Go
package handlers
|
|
|
|
import (
|
|
"encoding/json"
|
|
"strings"
|
|
|
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// extractNarrativeFromMetadata builds a combined text from the limits_form.
|
|
func extractNarrativeFromMetadata(metadata json.RawMessage) string {
|
|
if metadata == nil {
|
|
return ""
|
|
}
|
|
var meta map[string]json.RawMessage
|
|
if err := json.Unmarshal(metadata, &meta); err != nil {
|
|
return ""
|
|
}
|
|
limitsRaw, ok := meta["limits_form"]
|
|
if !ok {
|
|
return ""
|
|
}
|
|
var limits map[string]interface{}
|
|
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
|
|
return ""
|
|
}
|
|
|
|
textFields := []string{
|
|
"general_description", "intended_purpose", "foreseeable_misuse",
|
|
"space_limits", "time_limits", "environmental_conditions",
|
|
"energy_sources", "materials_processed", "operating_modes",
|
|
"maintenance_requirements", "personnel_requirements",
|
|
"interfaces_description", "control_system_description",
|
|
"safety_functions_description",
|
|
}
|
|
var result string
|
|
for _, field := range textFields {
|
|
if v, ok := limits[field]; ok {
|
|
if s, ok := v.(string); ok && s != "" {
|
|
result += s + "\n\n"
|
|
}
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// patternCatToMeasureCat maps pattern hazard categories to measure categories.
|
|
func patternCatToMeasureCat(patternCat string) string {
|
|
m := map[string]string{
|
|
"mechanical_hazard": "mechanical", "electrical_hazard": "electrical",
|
|
"thermal_hazard": "thermal", "noise_vibration": "noise_vibration",
|
|
"pneumatic_hydraulic": "pneumatic_hydraulic", "material_environmental": "material_environmental",
|
|
"ergonomic": "ergonomic", "ergonomic_hazard": "ergonomic",
|
|
"software_fault": "software_control", "safety_function_failure": "safety_function",
|
|
"fire_explosion": "thermal", "radiation_hazard": "material_environmental",
|
|
"unauthorized_access": "cyber_network", "communication_failure": "cyber_network",
|
|
"firmware_corruption": "cyber_network", "logging_audit_failure": "cyber_network",
|
|
"ai_misclassification": "ai_specific", "false_classification": "ai_specific",
|
|
"model_drift": "ai_specific", "data_poisoning": "ai_specific",
|
|
"sensor_spoofing": "ai_specific", "unintended_bias": "ai_specific",
|
|
"sensor_fault": "software_control", "configuration_error": "software_control",
|
|
"update_failure": "software_control", "hmi_error": "software_control",
|
|
"emc_hazard": "electrical", "maintenance_hazard": "mechanical",
|
|
"mode_confusion": "software_control", "chemical_risk": "material_environmental",
|
|
}
|
|
if cat, ok := m[patternCat]; ok {
|
|
return cat
|
|
}
|
|
return "general"
|
|
}
|
|
|
|
// deriveComponentType guesses the component type from its tags.
|
|
func deriveComponentType(tags []string) iace.ComponentType {
|
|
for _, t := range tags {
|
|
switch {
|
|
case t == "software" || t == "has_software":
|
|
return iace.ComponentTypeSoftware
|
|
case t == "firmware" || t == "has_firmware":
|
|
return iace.ComponentTypeFirmware
|
|
case t == "has_ai" || t == "ai_model":
|
|
return iace.ComponentTypeAIModel
|
|
case t == "hmi" || t == "display" || t == "touchscreen":
|
|
return iace.ComponentTypeHMI
|
|
case t == "sensor" || t == "camera":
|
|
return iace.ComponentTypeSensor
|
|
case t == "electric_motor" || t == "electric_drive":
|
|
return iace.ComponentTypeElectrical
|
|
case t == "networked" || t == "ethernet" || t == "wifi":
|
|
return iace.ComponentTypeNetwork
|
|
case t == "hydraulic" || t == "pneumatic":
|
|
return iace.ComponentTypeActuator
|
|
}
|
|
}
|
|
return iace.ComponentTypeMechanical
|
|
}
|
|
|
|
// extractOperationalStatesFromMetadata reads the explicit operational_states
|
|
// selection that the user set via the Betriebszustand-UI.
|
|
func extractOperationalStatesFromMetadata(metadata json.RawMessage) []string {
|
|
if metadata == nil {
|
|
return nil
|
|
}
|
|
var meta map[string]json.RawMessage
|
|
if err := json.Unmarshal(metadata, &meta); err != nil {
|
|
return nil
|
|
}
|
|
raw, ok := meta["operational_states"]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
var states []string
|
|
if err := json.Unmarshal(raw, &states); err != nil {
|
|
return nil
|
|
}
|
|
return states
|
|
}
|
|
|
|
// mergeStringSlices merges two string slices, deduplicating entries.
|
|
func mergeStringSlices(a, b []string) []string {
|
|
seen := make(map[string]bool, len(a)+len(b))
|
|
var result []string
|
|
for _, s := range a {
|
|
if !seen[s] {
|
|
seen[s] = true
|
|
result = append(result, s)
|
|
}
|
|
}
|
|
for _, s := range b {
|
|
if !seen[s] {
|
|
seen[s] = true
|
|
result = append(result, s)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// extractIndustrySectorsFromMetadata reads the industry_sectors selection
|
|
// from project metadata and maps them to MachineTypes for pattern filtering.
|
|
func extractIndustrySectorsFromMetadata(metadata json.RawMessage) []string {
|
|
if metadata == nil {
|
|
return nil
|
|
}
|
|
var meta map[string]json.RawMessage
|
|
if err := json.Unmarshal(metadata, &meta); err != nil {
|
|
return nil
|
|
}
|
|
limitsRaw, ok := meta["limits_form"]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
var limits map[string]json.RawMessage
|
|
if err := json.Unmarshal(limitsRaw, &limits); err != nil {
|
|
return nil
|
|
}
|
|
sectorsRaw, ok := limits["industry_sectors"]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
var sectors []string
|
|
if err := json.Unmarshal(sectorsRaw, §ors); err != nil {
|
|
return nil
|
|
}
|
|
labelMap := map[string][]string{
|
|
"Allgemeiner Maschinenbau": {"general_industry"},
|
|
"Automobil / Zulieferer": {"automotive"},
|
|
"Robotik / Cobot": {"robotics_cobot", "cobot"},
|
|
"Medizintechnik": {"medical_device", "infusion_pump", "ventilator", "patient_monitor"},
|
|
"Lebensmittel / Getraenke": {"food_processing"},
|
|
"Verpackung": {"packaging"},
|
|
"Pharma / Chemie": {"chemical", "pharmaceutical"},
|
|
"Bau / Baumaschinen": {"construction", "crane", "excavator"},
|
|
"Forst / Holzbearbeitung": {"forestry", "woodworking", "circular_saw"},
|
|
"Aufzuege / Foerdertechnik": {"elevator", "lift", "escalator", "conveyor"},
|
|
"Textil": {"textile", "spinning", "weaving", "finishing"},
|
|
"Landmaschinen": {"agricultural", "tractor", "harvester"},
|
|
"Druck / Papier": {"printing"},
|
|
"Metall / CNC": {"cnc", "metalworking", "lathe", "milling"},
|
|
"Schweissen / Oberflaechentechnik": {"welding", "surface_treatment"},
|
|
}
|
|
var result []string
|
|
seen := make(map[string]bool)
|
|
for _, sector := range sectors {
|
|
for _, mt := range labelMap[sector] {
|
|
if !seen[mt] {
|
|
seen[mt] = true
|
|
result = append(result, mt)
|
|
}
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// containsSubstring checks if haystack contains needle (case-insensitive, normalized).
|
|
func containsSubstring(haystack, needle string) bool {
|
|
return strings.Contains(
|
|
strings.ToLower(haystack),
|
|
strings.ToLower(needle),
|
|
)
|
|
}
|
|
|
|
// genericSafetyTerms are words that appear in almost all risk assessments
|
|
// and should NOT be used to determine machine-specificity.
|
|
var genericSafetyTerms = map[string]bool{
|
|
"maschine": true, "anlage": true, "bereich": true, "gesamte": true,
|
|
"arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true,
|
|
"gefahrenstelle": true, "person": true, "werker": true, "bediener": true,
|
|
"steuerung": true, "schutzeinrichtung": true, "sicherheit": true,
|
|
"betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true,
|
|
"bewegung": true, "beweglich": true, "feststehend": true, "teil": true,
|
|
"teile": true, "oeffnung": true, "zugang": true, "gefahr": true,
|
|
"verletzung": true, "quetsch": true, "scher": true, "schneid": true,
|
|
"stoss": true, "schlag": true, "einzug": true, "brand": true,
|
|
"motor": true, "antrieb": true, "achse": true, "achsen": true,
|
|
"kabel": true, "leitung": true, "schaltschrank": true, "spannung": true,
|
|
"schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true,
|
|
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
|
|
"bedienfeld": true, "display": true, "anzeige": true,
|
|
"energie": true, "druck": true, "temperatur": true,
|
|
// Common structural terms that don't indicate a specific machine
|
|
"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
|
|
"innen": true, "aussen": true, "transport": true, "seite": true,
|
|
"front": true, "rueck": true, "ober": true, "unter": true,
|
|
"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
|
|
"getriebe": true, "kette": true, "riemen": true, "feder": true,
|
|
"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
|
|
}
|
|
|
|
// isPatternRelevant checks whether a pattern match is relevant to the actual
|
|
// machine described in the narrative. Uses narrative vocabulary overlap:
|
|
// if the pattern's zone/scenario contains machine-specific words (not generic
|
|
// safety terms) and NONE of them appear in the narrative → irrelevant.
|
|
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
|
|
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
|
|
narrativeNorm := iace.NormalizeDEPublic(narrative)
|
|
|
|
// Extract machine-specific words from pattern (not generic safety terms)
|
|
patternWords := strings.Fields(patternText)
|
|
var specificWords []string
|
|
for _, w := range patternWords {
|
|
// Clean punctuation
|
|
w = strings.Trim(w, ".,;:!?()/-")
|
|
if len(w) < 5 || genericSafetyTerms[w] {
|
|
continue
|
|
}
|
|
specificWords = append(specificWords, w)
|
|
}
|
|
|
|
// If pattern has no specific words, it's generic → always relevant
|
|
if len(specificWords) == 0 {
|
|
return true
|
|
}
|
|
|
|
// Check if at least one specific word appears in the narrative or components
|
|
for _, sw := range specificWords {
|
|
if strings.Contains(narrativeNorm, sw) {
|
|
return true
|
|
}
|
|
for _, cn := range compNames {
|
|
if strings.Contains(cn, sw) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
// No specific word found in narrative → pattern is for a different machine
|
|
return false
|
|
}
|
|
|
|
// categoryHazardCap returns the maximum number of hazards to generate per category.
|
|
// Caps are based on typical ISO 12100 risk assessment proportions:
|
|
// - Core physical categories (mechanical, electrical): scale with component count
|
|
// - Secondary categories (thermal, noise, material): smaller fixed caps
|
|
// - Software/IT/organizational categories: minimal (these are usually covered by
|
|
// other standards like IEC 62443, not ISO 12100 machinery risk assessment)
|
|
func categoryHazardCap(cat string, componentCount int) int {
|
|
// Core machinery hazard categories — scale with complexity
|
|
switch cat {
|
|
case "mechanical_hazard":
|
|
// Typically 1-3 hazards per component (quetschen, scheren, stoss...)
|
|
cap := componentCount * 3
|
|
if cap < 15 {
|
|
cap = 15
|
|
}
|
|
if cap > 60 {
|
|
cap = 60
|
|
}
|
|
return cap
|
|
case "electrical_hazard":
|
|
// Typically 8-15 for a standard machine
|
|
cap := componentCount
|
|
if cap < 8 {
|
|
cap = 8
|
|
}
|
|
if cap > 20 {
|
|
cap = 20
|
|
}
|
|
return cap
|
|
case "pneumatic_hydraulic":
|
|
return 8
|
|
case "thermal_hazard":
|
|
return 6
|
|
case "noise_vibration":
|
|
return 4
|
|
case "material_environmental":
|
|
return 6
|
|
case "ergonomic", "ergonomic_hazard":
|
|
return 4
|
|
case "fire_explosion":
|
|
return 4
|
|
case "radiation_hazard", "emc_hazard":
|
|
return 3
|
|
// Software/IT/organizational — minimal for machinery assessment
|
|
case "safety_function_failure":
|
|
return 5
|
|
case "software_fault":
|
|
return 3
|
|
case "configuration_error":
|
|
return 3
|
|
case "hmi_error":
|
|
return 3
|
|
case "maintenance_hazard":
|
|
return 4
|
|
case "mode_confusion":
|
|
return 2
|
|
default:
|
|
return 3
|
|
}
|
|
}
|
|
|
|
// normalizeZoneKey reduces a zone string to its core components for better dedup.
|
|
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
|
|
// should dedup to the same key.
|
|
func normalizeZoneKey(zone string) string {
|
|
if zone == "" {
|
|
return ""
|
|
}
|
|
norm := iace.NormalizeDEPublic(zone)
|
|
// Remove filler words and punctuation
|
|
for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
|
|
norm = strings.ReplaceAll(norm, r, " ")
|
|
}
|
|
// Extract significant words (>3 chars), sort for stable key
|
|
words := strings.Fields(norm)
|
|
var sig []string
|
|
seen := make(map[string]bool)
|
|
stopWords := map[string]bool{
|
|
"der": true, "die": true, "das": true, "und": true, "oder": true,
|
|
"von": true, "des": true, "den": true, "dem": true, "ein": true,
|
|
"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
|
|
"alle": true, "aller": true, "allem": true, "sowie": true,
|
|
"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
|
|
"innerhalb": true, "ausserhalb": true, "umgebung": true,
|
|
}
|
|
for _, w := range words {
|
|
if len(w) < 4 || stopWords[w] || seen[w] {
|
|
continue
|
|
}
|
|
seen[w] = true
|
|
sig = append(sig, w)
|
|
}
|
|
if len(sig) == 0 {
|
|
return norm
|
|
}
|
|
// Take first 3 significant words as key (enough for dedup)
|
|
if len(sig) > 3 {
|
|
sig = sig[:3]
|
|
}
|
|
return strings.Join(sig, "_")
|
|
}
|
|
|
|
// findHazardForMeasureByCategory finds a matching hazard for a measure.
|
|
func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID {
|
|
if id, ok := hazardsByCategory[measureCat]; ok {
|
|
return id
|
|
}
|
|
for cat, id := range hazardsByCategory {
|
|
if len(measureCat) > 3 && len(cat) > 3 && cat[:4] == measureCat[:4] {
|
|
return id
|
|
}
|
|
}
|
|
for _, id := range hazardsByCategory {
|
|
return id
|
|
}
|
|
return uuid.Nil
|
|
}
|