package ucca import ( "sort" "strings" ) // Legal Concept Ontology — the fachliche IP bridge for the Concept->Norm recall // injector. The words users type ("Datenschutzerklärung", "Cookie Banner") are // rarely identical to the article titles that actually govern them (Art. 12/13/14 // DSGVO, § 25 TDDDG). Embedding similarity misses this leap, so these bridges are // curated: concept keyword -> load-bearing norm_ids. This is NOT a fallback to // hardcoding — it is domain knowledge that surfaces the normatively load-bearing // units within the (already correctly retrieved) documents. type conceptNorm struct { keywords []string normIDs []string } var legalConceptOntology = []conceptNorm{ {[]string{"datenschutzerklärung", "datenschutzerklaerung", "privacy policy", "datenschutzhinweise", "datenschutzinformation"}, []string{"EU-DSGVO-Art12", "EU-DSGVO-Art13", "EU-DSGVO-Art14"}}, {[]string{"cookie banner", "cookie-banner", "cookies", "cookie", "tracking"}, []string{"DE-TDDDG-§25", "EU-DSGVO-Art6", "EU-DSGVO-Art7"}}, {[]string{"dsfa", "folgenabschätzung", "folgenabschaetzung", "datenschutz-folgenabschätzung"}, []string{"EU-DSGVO-Art35", "EU-DSGVO-Art36"}}, {[]string{"auskunft", "auskunftsrecht", "auskunftsersuchen"}, []string{"EU-DSGVO-Art15"}}, {[]string{"löschung", "loeschung", "vergessenwerden", "recht auf vergessen"}, []string{"EU-DSGVO-Art17"}}, {[]string{"datenübertragbarkeit", "datenuebertragbarkeit", "portabilität", "portabilitaet"}, []string{"EU-DSGVO-Art20"}}, {[]string{"widerspruch", "widerspruchsrecht"}, []string{"EU-DSGVO-Art21"}}, {[]string{"datenpanne", "datenschutzverletzung", "data breach", "verletzung des schutzes"}, []string{"EU-DSGVO-Art33", "EU-DSGVO-Art34"}}, // E4-Quick-Curation (2026-07-01): resolved abbreviations (E2) pull their core norms. {[]string{"technische und organisatorische maßnahmen", "technische und organisatorische massnahmen"}, []string{"EU-DSGVO-Art32", "EU-DSGVO-Art25", "EU-DSGVO-Art5"}}, {[]string{"verzeichnis von verarbeitungstätigkeiten", "verzeichnis von verarbeitungstaetigkeiten", "verarbeitungsverzeichnis"}, []string{"EU-DSGVO-Art30"}}, {[]string{"auftragsverarbeitungsvertrag", "auftragsverarbeitung", "auftragsverarbeiter"}, []string{"EU-DSGVO-Art28"}}, {[]string{"datenschutzbeauftragt"}, []string{"EU-DSGVO-Art37", "EU-DSGVO-Art38", "EU-DSGVO-Art39"}}, } // ConceptNorms returns the load-bearing norm_ids for the concepts named in the // query (dedup, order-preserving). Empty if no concept is named. func ConceptNorms(query string) []string { q := strings.ToLower(query) seen := map[string]bool{} out := []string{} for _, cn := range legalConceptOntology { for _, kw := range cn.keywords { if strings.Contains(q, kw) { for _, nid := range cn.normIDs { if !seen[nid] { seen[nid] = true out = append(out, nid) } } break } } } return out } // InjectConceptNorms merges concept-injected norm units into the results so the // load-bearing norms are VISIBLE in the evidence set. Dedups by citation_unit // (skips norms already retrieved), then re-sorts by score — the injected units // carry a just-below-top score so they surface high WITHOUT displacing the top // document hit (inject, don't blindly dominate). Caps at topK. func InjectConceptNorms(results, injected []LegalSearchResult, topK int) []LegalSearchResult { if len(injected) == 0 { return results } present := map[string]bool{} for _, r := range results { if r.CitationUnit != "" { present[r.CitationUnit] = true } } merged := append([]LegalSearchResult{}, results...) for _, in := range injected { if in.CitationUnit != "" && !present[in.CitationUnit] { merged = append(merged, in) present[in.CitationUnit] = true } } sort.SliceStable(merged, func(i, j int) bool { return merged[i].Score > merged[j].Score }) if topK > 0 && len(merged) > topK { merged = merged[:topK] } return merged }