149 lines
7.0 KiB
Go
149 lines
7.0 KiB
Go
package ucca
|
|
|
|
import "strings"
|
|
|
|
// KnowledgeSpace is the CHIP-level knowledge domain used by the clarity gate's
|
|
// concentration signal + the user-facing context chips. It is deliberately RICHER
|
|
// than the 4 authority domains in authority.go (data_protection/cyber/ai/
|
|
// product_safety), which drive the EU-primary/subsidiarity rerank. The clarity
|
|
// gate must reflect the FULL corpus breadth (arbeitsschutz, arbeitsrecht,
|
|
// wirtschaftsrecht, finanz, ...) so a broad query surfaces as broad. Kept separate
|
|
// + additive so the tuned authority rerank stays untouched. Corpus-grounded from
|
|
// the 463 real regulation codes (0.3% fall through to "sonstiges").
|
|
|
|
// knowledgeSpaceExact matches short/ambiguous codes by EXACT string (substring
|
|
// would misfire on 2-3 char codes like "OR"/"AO"/"BGB").
|
|
var knowledgeSpaceExact = map[string]string{
|
|
"HGB": "wirtschaftsrecht", "BGB": "wirtschaftsrecht", "AO": "wirtschaftsrecht", "OR": "wirtschaftsrecht",
|
|
"ABGB": "wirtschaftsrecht", "UGB": "wirtschaftsrecht", "IFRS": "wirtschaftsrecht", "BAO": "wirtschaftsrecht",
|
|
"GMBHG": "wirtschaftsrecht", "AKTG": "wirtschaftsrecht", "INSO": "wirtschaftsrecht", "USTG": "wirtschaftsrecht",
|
|
"GOBD": "wirtschaftsrecht", "EGBGB": "wirtschaftsrecht", "GEWO": "wirtschaftsrecht", "URHG": "wirtschaftsrecht",
|
|
"DPF": "datenschutz", "TKG": "datenschutz", "TMG": "datenschutz", "DDG": "datenschutz", "DSG": "datenschutz",
|
|
"DSV": "datenschutz", "DSM": "datenschutz", "SCC": "datenschutz", "EPRIVACY": "datenschutz",
|
|
"SCHREMS II": "datenschutz", "CH_REVDSG": "datenschutz", "PLANET49": "datenschutz", "GOOGLE FONTS": "datenschutz",
|
|
"DSA": "digitale_dienste", "DMA": "digitale_dienste", "DGA": "digitale_dienste", "EHDS": "digitale_dienste",
|
|
"EIDAS": "digitale_dienste", "EIDAS 2.0": "digitale_dienste", "DATA ACT": "digitale_dienste",
|
|
"DATAACT": "digitale_dienste", "DIGITAL CONTENT": "digitale_dienste",
|
|
"MVO": "produktsicherheit", "MACHINERY": "produktsicherheit", "MASCHVO": "produktsicherheit",
|
|
"MASCHINENVO": "produktsicherheit", "GPSR": "produktsicherheit", "PID": "produktsicherheit",
|
|
"EAA": "produktsicherheit", "BFSG": "produktsicherheit", "ELEKTROG": "produktsicherheit",
|
|
"VERPACKG": "produktsicherheit", "BATTVO": "produktsicherheit", "BATTDG": "produktsicherheit", "EU MDR": "produktsicherheit",
|
|
"DORA": "finanz", "PSD2": "finanz", "MICA": "finanz", "AMLR": "finanz", "VAIT": "finanz", "BAIT": "finanz", "GWG": "finanz",
|
|
"UWG": "verbraucherschutz", "UCPD": "verbraucherschutz", "VSBG": "verbraucherschutz", "PANGV": "verbraucherschutz",
|
|
"DL-INFOV": "verbraucherschutz", "OMNIBUS": "verbraucherschutz", "UWG AT": "verbraucherschutz",
|
|
"PRODHAFTG": "verbraucherschutz", "PRODUKTHAFTUNGS-RL": "verbraucherschutz",
|
|
"ARG": "arbeitsrecht",
|
|
}
|
|
|
|
// KnowledgeSpaceLabel maps a knowledge-space id to a user-facing chip label.
|
|
var KnowledgeSpaceLabel = map[string]string{
|
|
"datenschutz": "Datenschutz", "cyber": "Cybersecurity", "ki": "KI",
|
|
"produktsicherheit": "Produktsicherheit", "arbeitsschutz": "Arbeitsschutz",
|
|
"arbeitsrecht": "Arbeitsrecht", "wirtschaftsrecht": "Wirtschaftsrecht",
|
|
"finanz": "Finanzregulierung", "digitale_dienste": "Digitale Dienste",
|
|
"verbraucherschutz": "Verbraucherschutz", "lieferkette": "Lieferkette/Nachhaltigkeit",
|
|
"hinweisgeber": "Hinweisgeberschutz", "sonstiges": "Sonstiges",
|
|
}
|
|
|
|
// KnowledgeSpaceOf maps a regulation_code to a knowledge space. Robust to code
|
|
// variants (MVO/MASCHVO/MASCHINENVO -> produktsicherheit; DSK SDM / SDM B51 ->
|
|
// datenschutz). Returns "" for empty/untagged codes (not a knowledge space).
|
|
func KnowledgeSpaceOf(code string) string {
|
|
c := strings.ToUpper(strings.TrimSpace(code))
|
|
if c == "" || c == "NONE" {
|
|
return ""
|
|
}
|
|
if d, ok := knowledgeSpaceExact[c]; ok {
|
|
return d
|
|
}
|
|
has := func(subs ...string) bool {
|
|
for _, s := range subs {
|
|
if strings.Contains(c, s) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
pre := func(subs ...string) bool {
|
|
for _, s := range subs {
|
|
if strings.HasPrefix(c, s) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
switch {
|
|
case pre("TRGS", "TRBS", "ASR", "OSHA") || has("ARBSCHG", "GEFAHRSTOFF"):
|
|
return "arbeitsschutz"
|
|
case has("AI ACT", "KI-VO", "KI VERORDNUNG", "GPAI", "AI RMF", "HLEG AI", "GENAI", "OECD AI", "AI PRINCIPLES", "OH KI", "KI BEHOERDEN", "KI SICHERHEIT", "POS KI"):
|
|
return "ki"
|
|
case pre("DSGVO", "BDSG", "TDDDG", "DSK", "EDPB", "WP24", "WP25", "WP26", "DSFA", "BFDI", "BAYLDA", "BAYLFB", "EDPS") || has("DATENSCHUTZ", "LOESCHKONZEPT", "LOESCHUNG", "VVT", "TELEMEDIEN", "EU US DPF", "BESCHAEFTIGTENDATEN"):
|
|
return "datenschutz"
|
|
case has("CRA", "NIS2", "NISG", "BSIG", "BSI-TR", "BSI_KRITIS", "KRITIS", "ENISA", "NIST", "OWASP", "EUCSA", "EUCC", "CISA", "CYCLONEDX", "SPDX", "SLSA", "OPENTELEMETRY", "CVSS", "SECURE BY DESIGN"):
|
|
return "cyber"
|
|
case has("MACHINERY", "MASCH", "BLUE GUIDE", "FDA HFE"):
|
|
return "produktsicherheit"
|
|
case has("LKSG", "CSDDD", "CSRD", "TAXONOMY"):
|
|
return "lieferkette"
|
|
case has("HINSCHG", "GESCHGEHG"):
|
|
return "hinweisgeber"
|
|
case pre("BAG ", "BAG_") || has("ARBVG", "AZG", "ARBZG", "BETRVG", "KSCHG", "MUSCHG", "AGG", "MILOG", "TZBFG", "NACHWG", "BURLG", "611A", "PAY TRANSPARENCY", "ANGG", "MUTTERSCHUTZ"):
|
|
return "arbeitsrecht"
|
|
case has("ECOMMERCE", "ECG", "MEDIENG", "VERBRAUCHERRECHTE", "DIGITAL CONTENT"):
|
|
return "verbraucherschutz"
|
|
case pre("EUGH", "BVERFG", "BVGE", "BGH", "OGH") || has("EU TAXONOMY"):
|
|
return "wirtschaftsrecht"
|
|
default:
|
|
return "sonstiges"
|
|
}
|
|
}
|
|
|
|
// ScopeResults implements G1 scope-gating: when the query names a regulation, its
|
|
// knowledge space's hits LEAD the result set (the L2 answer + [n] citations are
|
|
// built on this order, so scoped answers cite the named regulation instead of the
|
|
// embedding-majority domain). Non-scoped hits backfill to keep topK. Stable within
|
|
// each partition. Returns results unchanged when scope is "".
|
|
func ScopeResults(results []LegalSearchResult, scope string, topK int) []LegalSearchResult {
|
|
if scope == "" {
|
|
return results
|
|
}
|
|
scoped := make([]LegalSearchResult, 0, len(results))
|
|
rest := make([]LegalSearchResult, 0, len(results))
|
|
for _, r := range results {
|
|
if KnowledgeSpaceOf(r.RegulationCode) == scope {
|
|
scoped = append(scoped, r)
|
|
} else {
|
|
rest = append(rest, r)
|
|
}
|
|
}
|
|
out := append(scoped, rest...)
|
|
if topK > 0 && len(out) > topK {
|
|
out = out[:topK]
|
|
}
|
|
return out
|
|
}
|
|
|
|
// FilterByKnowledgeSpace returns ONLY the results in the given knowledge space —
|
|
// a HARD scope with no off-domain backfill. Used by E5 context scoping: when the
|
|
// user explicitly chose a domain chip, off-domain regelwerke (MDR/UStG/eIDAS) must
|
|
// not reappear in the evidence. Falls back to the input when the domain has no hits
|
|
// (never strand the answer). Caps topK.
|
|
func FilterByKnowledgeSpace(results []LegalSearchResult, scope string, topK int) []LegalSearchResult {
|
|
if scope == "" {
|
|
return results
|
|
}
|
|
out := make([]LegalSearchResult, 0, len(results))
|
|
for _, r := range results {
|
|
if KnowledgeSpaceOf(r.RegulationCode) == scope {
|
|
out = append(out, r)
|
|
}
|
|
}
|
|
if len(out) == 0 {
|
|
return results
|
|
}
|
|
if topK > 0 && len(out) > topK {
|
|
out = out[:topK]
|
|
}
|
|
return out
|
|
}
|