package iace import ( "regexp" "strconv" "strings" ) // ComponentMatch represents a component detected from narrative text. type ComponentMatch struct { LibraryID string `json:"library_id"` NameDE string `json:"name_de"` MatchedOn string `json:"matched_on"` // The keyword that triggered the match Tags []string `json:"tags"` Confidence float64 `json:"confidence"` } // EnergyMatch represents an energy source detected from narrative text. type EnergyMatch struct { SourceID string `json:"source_id"` NameDE string `json:"name_de"` MatchedOn string `json:"matched_on"` Value string `json:"value,omitempty"` // e.g., "20000 kN", "400 V" Severity int `json:"severity"` // Derived severity 1-5 } // TechSpec represents an extracted technical specification. type TechSpec struct { Value float64 `json:"value"` Unit string `json:"unit"` Raw string `json:"raw"` } // ParseResult contains all entities extracted from a machine narrative. type ParseResult struct { Components []ComponentMatch `json:"components"` EnergySources []EnergyMatch `json:"energy_sources"` LifecyclePhases []string `json:"lifecycle_phases"` Roles []string `json:"roles"` CustomTags []string `json:"custom_tags"` TechSpecs []TechSpec `json:"tech_specs"` Confidence float64 `json:"confidence"` } // techSpecPattern matches numeric values with engineering units. var techSpecPattern = regexp.MustCompile(`(\d[\d.,]*)\s*(kN|Tonnen|tonnen|kJ|kW|MW|V|kV|Hz|°C|bar|mm|m³/h|/min|U/min|rpm|m/s)`) // lifecycleKeywords maps German text patterns to lifecycle phase IDs. var lifecycleKeywords = map[string]string{ "betrieb": "normal_operation", "normalbetrieb": "normal_operation", "automatikbetrieb": "auto_operation", "einricht": "setup", "umruest": "changeover", "werkzeugwechsel": "changeover", "wartung": "maintenance", "instandhalt": "maintenance", "instandsetz": "repair", "reinig": "cleaning", "transport": "transport", "montage": "assembly", "installation": "assembly", "inbetriebnahme": "commissioning", "ausserbetriebnahme": "decommissioning", "demontage": "disposal", "entsorgung": "disposal", "reparatur": "repair", "stoerungsbeseitig": "fault_clearing", "stoerung": "fault_clearing", "fehlersuche": "fault_clearing", "klemm": "fault_clearing", "blockier": "fault_clearing", "stau": "fault_clearing", } // roleKeywords maps German text patterns to role IDs. var roleKeywords = map[string]string{ "bedienpersonal": "operator", "bediener": "operator", "werker": "operator", "einrichter": "setup_personnel", "instandhalt": "maintenance_tech", "wartungspersonal": "maintenance_tech", "elektrofachkraft":"electrical_tech", "besucher": "visitor", "fremdfirma": "contractor", "reinigungspersonal": "cleaning_staff", "aufsichtsperson": "supervisor", "programmierer": "programmer", "auszubildend": "trainee", "leiharbeiter": "temp_worker", } // ParseNarrative extracts components, energy sources, lifecycle phases, // roles, and tags from a machine description text. Fully deterministic, // no LLM required. func ParseNarrative(text string) ParseResult { result := ParseResult{} if text == "" { return result } // Normalize text lower := strings.ToLower(text) lower = strings.ReplaceAll(lower, "ä", "ae") lower = strings.ReplaceAll(lower, "ö", "oe") lower = strings.ReplaceAll(lower, "ü", "ue") lower = strings.ReplaceAll(lower, "ß", "ss") // 1. Extract technical specifications result.TechSpecs = extractTechSpecs(text) // 2. Match keywords → components + energy + tags dictionary := GetKeywordDictionary() compLib := GetComponentLibrary() compMap := make(map[string]ComponentLibraryEntry) for _, c := range compLib { compMap[c.ID] = c } seenComponents := make(map[string]bool) seenEnergy := make(map[string]bool) tagSet := make(map[string]bool) for _, entry := range dictionary { for _, kw := range entry.Keywords { kwNorm := strings.ToLower(kw) kwNorm = strings.ReplaceAll(kwNorm, "ä", "ae") kwNorm = strings.ReplaceAll(kwNorm, "ö", "oe") kwNorm = strings.ReplaceAll(kwNorm, "ü", "ue") kwNorm = strings.ReplaceAll(kwNorm, "ß", "ss") if strings.Contains(lower, kwNorm) { // Add components for _, cid := range entry.ComponentIDs { if !seenComponents[cid] { seenComponents[cid] = true comp := compMap[cid] result.Components = append(result.Components, ComponentMatch{ LibraryID: cid, NameDE: comp.NameDE, MatchedOn: kw, Tags: comp.Tags, Confidence: 0.8, }) // Add component tags for _, t := range comp.Tags { tagSet[t] = true } } } // Add energy sources for _, eid := range entry.EnergyIDs { if !seenEnergy[eid] { seenEnergy[eid] = true result.EnergySources = append(result.EnergySources, EnergyMatch{ SourceID: eid, NameDE: eid, // Will be enriched by caller MatchedOn: kw, }) } } // Add extra tags for _, t := range entry.ExtraTags { tagSet[t] = true } break // First keyword match is enough per entry } } } // 3. Derive energy from tech specs for _, spec := range result.TechSpecs { deriveEnergyFromSpec(spec, &result, seenEnergy, tagSet) } // 4. Extract lifecycle phases phaseSet := make(map[string]bool) for kw, phase := range lifecycleKeywords { kwNorm := strings.ReplaceAll(kw, "ä", "ae") kwNorm = strings.ReplaceAll(kwNorm, "ö", "oe") kwNorm = strings.ReplaceAll(kwNorm, "ü", "ue") if strings.Contains(lower, kwNorm) { if !phaseSet[phase] { phaseSet[phase] = true result.LifecyclePhases = append(result.LifecyclePhases, phase) } } } // 5. Extract roles roleSet := make(map[string]bool) for kw, role := range roleKeywords { if strings.Contains(lower, kw) { if !roleSet[role] { roleSet[role] = true result.Roles = append(result.Roles, role) } } } // 6. Collect all tags for t := range tagSet { result.CustomTags = append(result.CustomTags, t) } // 7. Calculate overall confidence if len(result.Components) > 0 { result.Confidence = float64(len(result.Components)) / 15.0 // Normalize to ~1.0 for 15 components if result.Confidence > 1.0 { result.Confidence = 1.0 } } return result } // extractTechSpecs finds numeric values with engineering units in the text. func extractTechSpecs(text string) []TechSpec { matches := techSpecPattern.FindAllStringSubmatch(text, -1) var specs []TechSpec for _, m := range matches { valStr := strings.ReplaceAll(m[1], ".", "") valStr = strings.ReplaceAll(valStr, ",", ".") val, err := strconv.ParseFloat(valStr, 64) if err != nil { continue } specs = append(specs, TechSpec{ Value: val, Unit: m[2], Raw: m[0], }) } return specs } // deriveEnergyFromSpec maps technical values to energy sources and severity tags. func deriveEnergyFromSpec(spec TechSpec, result *ParseResult, seen map[string]bool, tags map[string]bool) { switch { case (spec.Unit == "kN" || spec.Unit == "Tonnen" || spec.Unit == "tonnen") && spec.Value > 100: addEnergy(result, seen, "EN01", spec.Raw) tags["high_force"] = true if spec.Value > 1000 { tags["crush_point"] = true } case (spec.Unit == "V" || spec.Unit == "kV"): if spec.Value >= 400 || spec.Unit == "kV" { addEnergy(result, seen, "EN05", spec.Raw) tags["high_voltage"] = true } else if spec.Value >= 50 { addEnergy(result, seen, "EN05", spec.Raw) tags["electrical_part"] = true } case spec.Unit == "°C" && spec.Value > 60: addEnergy(result, seen, "EN06", spec.Raw) tags["high_temperature"] = true if spec.Value > 100 { tags["thermal_accumulation"] = true } case spec.Unit == "bar" && spec.Value > 10: addEnergy(result, seen, "EN07", spec.Raw) tags["high_pressure"] = true case (spec.Unit == "kW" || spec.Unit == "MW") && spec.Value > 1: addEnergy(result, seen, "EN02", spec.Raw) tags["rotating_part"] = true case (spec.Unit == "/min" || spec.Unit == "U/min" || spec.Unit == "rpm") && spec.Value > 100: addEnergy(result, seen, "EN02", spec.Raw) tags["rotating_part"] = true if spec.Value > 500 { tags["high_speed"] = true } case spec.Unit == "kJ" && spec.Value > 10: addEnergy(result, seen, "EN03", spec.Raw) tags["stored_energy"] = true } } func addEnergy(result *ParseResult, seen map[string]bool, id, matchedOn string) { if !seen[id] { seen[id] = true result.EnergySources = append(result.EnergySources, EnergyMatch{ SourceID: id, MatchedOn: matchedOn, }) } }