package iace import "strings" // Negation-aware keyword matching for the narrative parser. // // The limits form often states what a machine does NOT have, e.g. // "Keine pneumatischen oder hydraulischen Schnittstellen. Hubantrieb ueber // Kette." Naive substring matching wrongly extracted hydraulic/pneumatic // components (and their energy sources), which then generated phantom hazards // the assessor never raised. We only suppress DETERMINER negations // ("keine/kein/ohne/weder ") — these directly negate the following noun // phrase and are safe. Plain "nicht" is intentionally excluded: it modifies // verbs/adjectives and over-negates ("Schutz nicht erforderlich, Zylinder // vorhanden"). var negationDeterminers = map[string]bool{ "keine": true, "kein": true, "keinen": true, "keiner": true, "keinem": true, "keines": true, "keinerlei": true, "weder": true, "ohne": true, "no": true, "without": true, } // Tokens that end a negation's scope: a contrast ("aber") or a positive-presence // cue ("mit ... vorhanden"). After one of these, a following keyword is positive. var negationScopeEnders = map[string]bool{ "aber": true, "jedoch": true, "sondern": true, "doch": true, "mit": true, "vorhanden": true, "verbaut": true, "vorgesehen": true, "installiert": true, "ausgestattet": true, "but": true, "with": true, } // A negation determiner only reaches a keyword a few tokens away (a short list // like "keine A, B oder C"). Beyond this span we assume the keyword is unrelated. const negationMaxTokenSpan = 8 // keywordIsNegated reports whether the keyword occurrence starting at byte index // idx in the (already normalised) text sits inside the scope of a determiner // negation. It walks back to the start of the current sentence, then scans the // preceding tokens right-to-left for a determiner negation, stopping at any // scope-ender or after negationMaxTokenSpan tokens. func keywordIsNegated(text string, idx int) bool { start := 0 for i := idx - 1; i >= 0; i-- { c := text[i] if c == '.' || c == '\n' || c == ';' || c == '!' || c == '?' || c == ':' { start = i + 1 break } } tokens := strings.Fields(text[start:idx]) for d := 0; d < len(tokens) && d < negationMaxTokenSpan; d++ { w := strings.Trim(tokens[len(tokens)-1-d], ",.;:()-") if negationScopeEnders[w] { return false } if negationDeterminers[w] { return true } } return false } // hasUnnegatedOccurrence reports whether kw appears in text at least once outside // a negation scope. A term that is ONLY ever negated must not create components, // energy sources or tags. func hasUnnegatedOccurrence(text, kw string) bool { from := 0 for { rel := strings.Index(text[from:], kw) if rel < 0 { return false } abs := from + rel if !keywordIsNegated(text, abs) { return true } from = abs + len(kw) } }