breakpilot-compliance/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go

package handlers

import (
	"encoding/json"
	"strings"

	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
	"github.com/google/uuid"
)

// extractNarrativeFromMetadata builds a combined text from the limits_form.
func extractNarrativeFromMetadata(metadata json.RawMessage) string {
	if metadata == nil {
		return ""
	}
	var meta map[string]json.RawMessage
	if err := json.Unmarshal(metadata, &meta); err != nil {
		return ""
	}
	limitsRaw, ok := meta["limits_form"]
	if !ok {
		return ""
	}
	var limits map[string]interface{}
	if err := json.Unmarshal(limitsRaw, &limits); err != nil {
		return ""
	}

	textFields := []string{
		"general_description", "intended_purpose", "foreseeable_misuse",
		"space_limits", "time_limits", "environmental_conditions",
		"energy_sources", "materials_processed", "operating_modes",
		"maintenance_requirements", "personnel_requirements",
		"interfaces_description", "control_system_description",
		"safety_functions_description",
	}
	var result string
	for _, field := range textFields {
		if v, ok := limits[field]; ok {
			if s, ok := v.(string); ok && s != "" {
				result += s + "\n\n"
			}
		}
	}
	return result
}

// patternCatToMeasureCat maps pattern hazard categories to measure categories.
func patternCatToMeasureCat(patternCat string) string {
	m := map[string]string{
		"mechanical_hazard": "mechanical", "electrical_hazard": "electrical",
		"thermal_hazard": "thermal", "noise_vibration": "noise_vibration",
		"pneumatic_hydraulic": "pneumatic_hydraulic", "material_environmental": "material_environmental",
		"ergonomic": "ergonomic", "ergonomic_hazard": "ergonomic",
		"software_fault": "software_control", "safety_function_failure": "safety_function",
		"fire_explosion": "thermal", "radiation_hazard": "material_environmental",
		"unauthorized_access": "cyber_network", "communication_failure": "cyber_network",
		"firmware_corruption": "cyber_network", "logging_audit_failure": "cyber_network",
		"ai_misclassification": "ai_specific", "false_classification": "ai_specific",
		"model_drift": "ai_specific", "data_poisoning": "ai_specific",
		"sensor_spoofing": "ai_specific", "unintended_bias": "ai_specific",
		"sensor_fault": "software_control", "configuration_error": "software_control",
		"update_failure": "software_control", "hmi_error": "software_control",
		"emc_hazard": "electrical", "maintenance_hazard": "mechanical",
		"mode_confusion": "software_control", "chemical_risk": "material_environmental",
	}
	if cat, ok := m[patternCat]; ok {
		return cat
	}
	return "general"
}

// deriveComponentType guesses the component type from its tags.
func deriveComponentType(tags []string) iace.ComponentType {
	for _, t := range tags {
		switch {
		case t == "software" || t == "has_software":
			return iace.ComponentTypeSoftware
		case t == "firmware" || t == "has_firmware":
			return iace.ComponentTypeFirmware
		case t == "has_ai" || t == "ai_model":
			return iace.ComponentTypeAIModel
		case t == "hmi" || t == "display" || t == "touchscreen":
			return iace.ComponentTypeHMI
		case t == "sensor" || t == "camera":
			return iace.ComponentTypeSensor
		case t == "electric_motor" || t == "electric_drive":
			return iace.ComponentTypeElectrical
		case t == "networked" || t == "ethernet" || t == "wifi":
			return iace.ComponentTypeNetwork
		case t == "hydraulic" || t == "pneumatic":
			return iace.ComponentTypeActuator
		}
	}
	return iace.ComponentTypeMechanical
}

// extractOperationalStatesFromMetadata reads the explicit operational_states
// selection that the user set via the Betriebszustand-UI.
func extractOperationalStatesFromMetadata(metadata json.RawMessage) []string {
	if metadata == nil {
		return nil
	}
	var meta map[string]json.RawMessage
	if err := json.Unmarshal(metadata, &meta); err != nil {
		return nil
	}
	raw, ok := meta["operational_states"]
	if !ok {
		return nil
	}
	var states []string
	if err := json.Unmarshal(raw, &states); err != nil {
		return nil
	}
	return states
}

// mergeStringSlices merges two string slices, deduplicating entries.
func mergeStringSlices(a, b []string) []string {
	seen := make(map[string]bool, len(a)+len(b))
	var result []string
	for _, s := range a {
		if !seen[s] {
			seen[s] = true
			result = append(result, s)
		}
	}
	for _, s := range b {
		if !seen[s] {
			seen[s] = true
			result = append(result, s)
		}
	}
	return result
}

// extractIndustrySectorsFromMetadata reads the industry_sectors selection
// from project metadata and maps them to MachineTypes for pattern filtering.
func extractIndustrySectorsFromMetadata(metadata json.RawMessage) []string {
	if metadata == nil {
		return nil
	}
	var meta map[string]json.RawMessage
	if err := json.Unmarshal(metadata, &meta); err != nil {
		return nil
	}
	limitsRaw, ok := meta["limits_form"]
	if !ok {
		return nil
	}
	var limits map[string]json.RawMessage
	if err := json.Unmarshal(limitsRaw, &limits); err != nil {
		return nil
	}
	sectorsRaw, ok := limits["industry_sectors"]
	if !ok {
		return nil
	}
	var sectors []string
	if err := json.Unmarshal(sectorsRaw, &sectors); err != nil {
		return nil
	}
	labelMap := map[string][]string{
		"Allgemeiner Maschinenbau":         {"general_industry"},
		"Automobil / Zulieferer":           {"automotive"},
		"Robotik / Cobot":                  {"robotics_cobot", "cobot"},
		"Medizintechnik":                   {"medical_device", "infusion_pump", "ventilator", "patient_monitor"},
		"Lebensmittel / Getraenke":         {"food_processing"},
		"Verpackung":                       {"packaging"},
		"Pharma / Chemie":                  {"chemical", "pharmaceutical"},
		"Bau / Baumaschinen":               {"construction", "crane", "excavator"},
		"Forst / Holzbearbeitung":          {"forestry", "woodworking", "circular_saw"},
		"Aufzuege / Foerdertechnik":        {"elevator", "lift", "escalator", "conveyor"},
		"Textil":                           {"textile", "spinning", "weaving", "finishing"},
		"Landmaschinen":                    {"agricultural", "tractor", "harvester"},
		"Druck / Papier":                   {"printing"},
		"Metall / CNC":                     {"cnc", "metalworking", "lathe", "milling"},
		"Schweissen / Oberflaechentechnik": {"welding", "surface_treatment"},
	}
	var result []string
	seen := make(map[string]bool)
	for _, sector := range sectors {
		for _, mt := range labelMap[sector] {
			if !seen[mt] {
				seen[mt] = true
				result = append(result, mt)
			}
		}
	}
	return result
}

// containsSubstring checks if haystack contains needle (case-insensitive, normalized).
func containsSubstring(haystack, needle string) bool {
	return strings.Contains(
		strings.ToLower(haystack),
		strings.ToLower(needle),
	)
}

// genericSafetyTerms are words that appear in almost all risk assessments
// and should NOT be used to determine machine-specificity.
var genericSafetyTerms = map[string]bool{
	"maschine": true, "anlage": true, "bereich": true, "gesamte": true,
	"arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true,
	"gefahrenstelle": true, "person": true, "werker": true, "bediener": true,
	"steuerung": true, "schutzeinrichtung": true, "sicherheit": true,
	"betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true,
	"bewegung": true, "beweglich": true, "feststehend": true, "teil": true,
	"teile": true, "oeffnung": true, "zugang": true, "gefahr": true,
	"verletzung": true, "quetsch": true, "scher": true, "schneid": true,
	"stoss": true, "schlag": true, "einzug": true, "brand": true,
	"motor": true, "antrieb": true, "achse": true, "achsen": true,
	"kabel": true, "leitung": true, "schaltschrank": true, "spannung": true,
	"schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true,
	"leitfaehig": true, "elektrisch": true, "mechanisch": true,
	"bedienfeld": true, "display": true, "anzeige": true,
	"energie": true, "druck": true, "temperatur": true,
	// Common structural terms that don't indicate a specific machine
	"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
	"innen": true, "aussen": true, "transport": true, "seite": true,
	"front": true, "rueck": true, "ober": true, "unter": true,
	"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
	"getriebe": true, "kette": true, "riemen": true, "feder": true,
	"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
}

// isPatternRelevant checks whether a pattern match is relevant to the actual
// machine described in the narrative. Uses narrative vocabulary overlap:
// if the pattern's zone/scenario contains machine-specific words (not generic
// safety terms) and NONE of them appear in the narrative → irrelevant.
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
	patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
	narrativeNorm := iace.NormalizeDEPublic(narrative)

	// Extract machine-specific words from pattern (not generic safety terms)
	patternWords := strings.Fields(patternText)
	var specificWords []string
	for _, w := range patternWords {
		// Clean punctuation
		w = strings.Trim(w, ".,;:!?()/-")
		if len(w) < 5 || genericSafetyTerms[w] {
			continue
		}
		specificWords = append(specificWords, w)
	}

	// If pattern has no specific words, it's generic → always relevant
	if len(specificWords) == 0 {
		return true
	}

	// Check if at least one specific word appears in the narrative or components
	for _, sw := range specificWords {
		if strings.Contains(narrativeNorm, sw) {
			return true
		}
		for _, cn := range compNames {
			if strings.Contains(cn, sw) {
				return true
			}
		}
	}

	// No specific word found in narrative → pattern is for a different machine
	return false
}

// categoryHazardCap returns the maximum number of hazards to generate per category.
// Caps are based on typical ISO 12100 risk assessment proportions:
// - Core physical categories (mechanical, electrical): scale with component count
// - Secondary categories (thermal, noise, material): smaller fixed caps
// - Software/IT/organizational categories: minimal (these are usually covered by
//   other standards like IEC 62443, not ISO 12100 machinery risk assessment)
func categoryHazardCap(cat string, componentCount int) int {
	// Core machinery hazard categories — scale with complexity
	switch cat {
	case "mechanical_hazard":
		// Typically 1-3 hazards per component (quetschen, scheren, stoss...)
		cap := componentCount * 3
		if cap < 15 {
			cap = 15
		}
		if cap > 60 {
			cap = 60
		}
		return cap
	case "electrical_hazard":
		// Typically 8-15 for a standard machine
		cap := componentCount
		if cap < 8 {
			cap = 8
		}
		if cap > 20 {
			cap = 20
		}
		return cap
	case "pneumatic_hydraulic":
		return 8
	case "thermal_hazard":
		return 6
	case "noise_vibration":
		return 4
	case "material_environmental":
		return 6
	case "ergonomic", "ergonomic_hazard":
		return 4
	case "fire_explosion":
		return 4
	case "radiation_hazard", "emc_hazard":
		return 3
	// Software/IT/organizational — minimal for machinery assessment
	case "safety_function_failure":
		return 5
	case "software_fault":
		return 3
	case "configuration_error":
		return 3
	case "hmi_error":
		return 3
	case "maintenance_hazard":
		return 4
	case "mode_confusion":
		return 2
	default:
		return 3
	}
}

// normalizeZoneKey reduces a zone string to its core components for better dedup.
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
// should dedup to the same key.
func normalizeZoneKey(zone string) string {
	if zone == "" {
		return ""
	}
	norm := iace.NormalizeDEPublic(zone)
	// Remove filler words and punctuation
	for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
		norm = strings.ReplaceAll(norm, r, " ")
	}
	// Extract significant words (>3 chars), sort for stable key
	words := strings.Fields(norm)
	var sig []string
	seen := make(map[string]bool)
	stopWords := map[string]bool{
		"der": true, "die": true, "das": true, "und": true, "oder": true,
		"von": true, "des": true, "den": true, "dem": true, "ein": true,
		"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
		"alle": true, "aller": true, "allem": true, "sowie": true,
		"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
		"innerhalb": true, "ausserhalb": true, "umgebung": true,
	}
	for _, w := range words {
		if len(w) < 4 || stopWords[w] || seen[w] {
			continue
		}
		seen[w] = true
		sig = append(sig, w)
	}
	if len(sig) == 0 {
		return norm
	}
	// Take first 3 significant words as key (enough for dedup)
	if len(sig) > 3 {
		sig = sig[:3]
	}
	return strings.Join(sig, "_")
}

// findHazardForMeasureByCategory finds a matching hazard for a measure.
func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID {
	if id, ok := hazardsByCategory[measureCat]; ok {
		return id
	}
	for cat, id := range hazardsByCategory {
		if len(measureCat) > 3 && len(cat) > 3 && cat[:4] == measureCat[:4] {
			return id
		}
	}
	for _, id := range hazardsByCategory {
		return id
	}
	return uuid.Nil
}