package ucca import "strings" // KnowledgeSpace is the CHIP-level knowledge domain used by the clarity gate's // concentration signal + the user-facing context chips. It is deliberately RICHER // than the 4 authority domains in authority.go (data_protection/cyber/ai/ // product_safety), which drive the EU-primary/subsidiarity rerank. The clarity // gate must reflect the FULL corpus breadth (arbeitsschutz, arbeitsrecht, // wirtschaftsrecht, finanz, ...) so a broad query surfaces as broad. Kept separate // + additive so the tuned authority rerank stays untouched. Corpus-grounded from // the 463 real regulation codes (0.3% fall through to "sonstiges"). // knowledgeSpaceExact matches short/ambiguous codes by EXACT string (substring // would misfire on 2-3 char codes like "OR"/"AO"/"BGB"). var knowledgeSpaceExact = map[string]string{ "HGB": "wirtschaftsrecht", "BGB": "wirtschaftsrecht", "AO": "wirtschaftsrecht", "OR": "wirtschaftsrecht", "ABGB": "wirtschaftsrecht", "UGB": "wirtschaftsrecht", "IFRS": "wirtschaftsrecht", "BAO": "wirtschaftsrecht", "GMBHG": "wirtschaftsrecht", "AKTG": "wirtschaftsrecht", "INSO": "wirtschaftsrecht", "USTG": "wirtschaftsrecht", "GOBD": "wirtschaftsrecht", "EGBGB": "wirtschaftsrecht", "GEWO": "wirtschaftsrecht", "URHG": "wirtschaftsrecht", "DPF": "datenschutz", "TKG": "datenschutz", "TMG": "datenschutz", "DDG": "datenschutz", "DSG": "datenschutz", "DSV": "datenschutz", "DSM": "datenschutz", "SCC": "datenschutz", "EPRIVACY": "datenschutz", "SCHREMS II": "datenschutz", "CH_REVDSG": "datenschutz", "PLANET49": "datenschutz", "GOOGLE FONTS": "datenschutz", "DSA": "digitale_dienste", "DMA": "digitale_dienste", "DGA": "digitale_dienste", "EHDS": "digitale_dienste", "EIDAS": "digitale_dienste", "EIDAS 2.0": "digitale_dienste", "DATA ACT": "digitale_dienste", "DATAACT": "digitale_dienste", "DIGITAL CONTENT": "digitale_dienste", "MVO": "produktsicherheit", "MACHINERY": "produktsicherheit", "MASCHVO": "produktsicherheit", "MASCHINENVO": "produktsicherheit", "GPSR": "produktsicherheit", "PID": "produktsicherheit", "EAA": "produktsicherheit", "BFSG": "produktsicherheit", "ELEKTROG": "produktsicherheit", "VERPACKG": "produktsicherheit", "BATTVO": "produktsicherheit", "BATTDG": "produktsicherheit", "EU MDR": "produktsicherheit", "DORA": "finanz", "PSD2": "finanz", "MICA": "finanz", "AMLR": "finanz", "VAIT": "finanz", "BAIT": "finanz", "GWG": "finanz", "UWG": "verbraucherschutz", "UCPD": "verbraucherschutz", "VSBG": "verbraucherschutz", "PANGV": "verbraucherschutz", "DL-INFOV": "verbraucherschutz", "OMNIBUS": "verbraucherschutz", "UWG AT": "verbraucherschutz", "PRODHAFTG": "verbraucherschutz", "PRODUKTHAFTUNGS-RL": "verbraucherschutz", "ARG": "arbeitsrecht", } // KnowledgeSpaceLabel maps a knowledge-space id to a user-facing chip label. var KnowledgeSpaceLabel = map[string]string{ "datenschutz": "Datenschutz", "cyber": "Cybersecurity", "ki": "KI", "produktsicherheit": "Produktsicherheit", "arbeitsschutz": "Arbeitsschutz", "arbeitsrecht": "Arbeitsrecht", "wirtschaftsrecht": "Wirtschaftsrecht", "finanz": "Finanzregulierung", "digitale_dienste": "Digitale Dienste", "verbraucherschutz": "Verbraucherschutz", "lieferkette": "Lieferkette/Nachhaltigkeit", "hinweisgeber": "Hinweisgeberschutz", "sonstiges": "Sonstiges", } // KnowledgeSpaceOf maps a regulation_code to a knowledge space. Robust to code // variants (MVO/MASCHVO/MASCHINENVO -> produktsicherheit; DSK SDM / SDM B51 -> // datenschutz). Returns "" for empty/untagged codes (not a knowledge space). func KnowledgeSpaceOf(code string) string { c := strings.ToUpper(strings.TrimSpace(code)) if c == "" || c == "NONE" { return "" } if d, ok := knowledgeSpaceExact[c]; ok { return d } has := func(subs ...string) bool { for _, s := range subs { if strings.Contains(c, s) { return true } } return false } pre := func(subs ...string) bool { for _, s := range subs { if strings.HasPrefix(c, s) { return true } } return false } switch { case pre("TRGS", "TRBS", "ASR", "OSHA") || has("ARBSCHG", "GEFAHRSTOFF"): return "arbeitsschutz" case has("AI ACT", "KI-VO", "KI VERORDNUNG", "GPAI", "AI RMF", "HLEG AI", "GENAI", "OECD AI", "AI PRINCIPLES", "OH KI", "KI BEHOERDEN", "KI SICHERHEIT", "POS KI"): return "ki" case pre("DSGVO", "BDSG", "TDDDG", "DSK", "EDPB", "WP24", "WP25", "WP26", "DSFA", "BFDI", "BAYLDA", "BAYLFB", "EDPS") || has("DATENSCHUTZ", "LOESCHKONZEPT", "LOESCHUNG", "VVT", "TELEMEDIEN", "EU US DPF", "BESCHAEFTIGTENDATEN"): return "datenschutz" case has("CRA", "NIS2", "NISG", "BSIG", "BSI-TR", "BSI_KRITIS", "KRITIS", "ENISA", "NIST", "OWASP", "EUCSA", "EUCC", "CISA", "CYCLONEDX", "SPDX", "SLSA", "OPENTELEMETRY", "CVSS", "SECURE BY DESIGN"): return "cyber" case has("MACHINERY", "MASCH", "BLUE GUIDE", "FDA HFE"): return "produktsicherheit" case has("LKSG", "CSDDD", "CSRD", "TAXONOMY"): return "lieferkette" case has("HINSCHG", "GESCHGEHG"): return "hinweisgeber" case pre("BAG ", "BAG_") || has("ARBVG", "AZG", "ARBZG", "BETRVG", "KSCHG", "MUSCHG", "AGG", "MILOG", "TZBFG", "NACHWG", "BURLG", "611A", "PAY TRANSPARENCY", "ANGG", "MUTTERSCHUTZ"): return "arbeitsrecht" case has("ECOMMERCE", "ECG", "MEDIENG", "VERBRAUCHERRECHTE", "DIGITAL CONTENT"): return "verbraucherschutz" case pre("EUGH", "BVERFG", "BVGE", "BGH", "OGH") || has("EU TAXONOMY"): return "wirtschaftsrecht" default: return "sonstiges" } } // ScopeResults implements G1 scope-gating: when the query names a regulation, its // knowledge space's hits LEAD the result set (the L2 answer + [n] citations are // built on this order, so scoped answers cite the named regulation instead of the // embedding-majority domain). Non-scoped hits backfill to keep topK. Stable within // each partition. Returns results unchanged when scope is "". func ScopeResults(results []LegalSearchResult, scope string, topK int) []LegalSearchResult { if scope == "" { return results } scoped := make([]LegalSearchResult, 0, len(results)) rest := make([]LegalSearchResult, 0, len(results)) for _, r := range results { if KnowledgeSpaceOf(r.RegulationCode) == scope { scoped = append(scoped, r) } else { rest = append(rest, r) } } out := append(scoped, rest...) if topK > 0 && len(out) > topK { out = out[:topK] } return out } // FilterByKnowledgeSpace returns ONLY the results in the given knowledge space — // a HARD scope with no off-domain backfill. Used by E5 context scoping: when the // user explicitly chose a domain chip, off-domain regelwerke (MDR/UStG/eIDAS) must // not reappear in the evidence. Falls back to the input when the domain has no hits // (never strand the answer). Caps topK. func FilterByKnowledgeSpace(results []LegalSearchResult, scope string, topK int) []LegalSearchResult { if scope == "" { return results } out := make([]LegalSearchResult, 0, len(results)) for _, r := range results { if KnowledgeSpaceOf(r.RegulationCode) == scope { out = append(out, r) } } if len(out) == 0 { return results } if topK > 0 && len(out) > topK { out = out[:topK] } return out }