package ucca import ( "sort" "strings" ) // Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative). const ( authorityCoef = 0.40 // * weight/100 jurisdictionGain = 0.05 // binding/guidance from DE or EU foreignPenalty = 0.60 // foreign law on a DE/EU question (demoted, not removed) unknownPenalty = 0.08 domainMatchGain = 0.15 offDomainPenalty = 0.10 // off-domain binding (demoted, not removed) scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question topicGain = 0.18 // amplifier only supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt intentLiftGain = 0.10 // epsilon a qualifying interpretative source is lifted ABOVE the best binding intentLiftMargin = 0.05 // ...only if that source is semantically competitive with binding ) // guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation / // recommendation by a guidance body, rather than for the binding obligation. Only // then may a (semantically competitive) guideline outrank the binding norm. var guidanceIntentSignals = []string{ "edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss", "dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe", "auslegung", "empfiehlt", "empfehlung", "sagt", "laut", } // controlIntentSignals mark a query that asks HOW to implement / which controls or // measures fit — rather than WHAT the binding obligation is. Only then may a // (semantically competitive) technical_standard outrank the binding norm. var controlIntentSignals = []string{ "control", "controls", "maßnahme", "massnahme", "schutzmaßnahme", "best practice", "best-practice", "umsetzen", "implementier", "absicher", "härt", "haert", "hardening", "nist", "owasp", "grundschutz", "ccm", "iso 27001", "isms", } func queryMatchesAny(query string, signals []string) bool { q := strings.ToLower(query) for _, sig := range signals { if strings.Contains(q, sig) { return true } } return false } // queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation. func queryWantsGuidance(query string) bool { return queryMatchesAny(query, guidanceIntentSignals) } // queryWantsControls reports whether the query asks for implementation controls/measures. func queryWantsControls(query string) bool { return queryMatchesAny(query, controlIntentSignals) } // bestBindingSemantic returns the highest RAW semantic score among binding-law // results (0 if none / no intent). Used as the guard threshold so an off-topic // interpretative source cannot ride the intent boost. func bestBindingSemantic(results []LegalSearchResult, wantsIntent bool) float64 { if !wantsIntent { return 0 } best := 0.0 for _, r := range results { if r.SourceClass == "binding_law" && r.Score > best { best = r.Score } } return best } // authorityScore computes the normative relevance of a result for a query. It augments the // semantic score with authority/jurisdiction/domain/scope/topic signals. Exposed for tests. func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign bool) float64 { info := classifyAuthority(r) score := r.Score + authorityCoef*float64(info.weight)/100.0 if r.Superseded { // Alt-Quelle (pre-eu-v1): Default-Fragen sollen die eu-v1-Norm sehen. Demoted, // nicht entfernt — fuer Historie/Uebergangsfragen bleibt sie auffindbar. score -= supersededPenalty } if info.jurisdiction == "CH" && !qForeign { score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht } else { score += jurisdictionGain } if info.sourceClass == "unknown" { score -= unknownPenalty } if qDomain != "" { switch cd := chunkDomain(r); { case cd == qDomain: score += domainMatchGain case cd != "": score -= offDomainPenalty // off-domain binding: demoted, nicht geloescht } } if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" { score -= scopePenalty } if resultMatchesTopic(query, r) { score += topicGain // Verstaerker, kein Override } return score } // rerankByAuthority re-orders results so binding law from the matching jurisdiction/domain // ranks above guidance, foreign and off-domain law — WITHOUT dropping anything (guidance is // kept as interpretation context). The computed score is written back to Score so downstream // merges (e.g. the multi-collection advisor) preserve this order. Pure + deterministic. func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchResult { if len(results) < 2 { return results } qDomain := queryDomain(query) qForeign := queryIsForeign(query) wantsGuidance := queryWantsGuidance(query) wantsControls := queryWantsControls(query) bestBindingSem := bestBindingSemantic(results, wantsGuidance || wantsControls) out := make([]LegalSearchResult, len(results)) copy(out, results) for i := range out { out[i].Score = authorityScore(query, out[i], qDomain, qForeign) } // Explicit interpretation intent → a competitive guideline may outrank binding; // explicit implementation intent → a competitive technical_standard may. Both lift // ABOVE the best binding FINAL, so a pure norm question (neither intent) is untouched. if wantsGuidance { liftAboveBinding(out, results, bestBindingSem, "supervisory_guidance") } if wantsControls { liftAboveBinding(out, results, bestBindingSem, "technical_standard") } sort.SliceStable(out, func(a, b int) bool { return out[a].Score > out[b].Score }) return out } // liftAboveBinding lifts a semantically-competitive interpretative source (the given // sourceClass — supervisory_guidance or technical_standard) just ABOVE the best binding // hit, ordered by semantic, so an EXPLICIT guidance/implementation question can return // that source Top-1. A pure norm question (no intent → not called) keeps binding on top. // Sources below the semantic margin are left untouched, so an off-topic source can never // ride the override — and the lift is from the binding FINAL score, so authority/topic/ // domain bonuses cannot edge it out. func liftAboveBinding(out, raw []LegalSearchResult, bestBindingSem float64, sourceClass string) { bestBindingFinal := 0.0 for i := range out { if out[i].SourceClass == "binding_law" && out[i].Score > bestBindingFinal { bestBindingFinal = out[i].Score } } for i := range out { if out[i].SourceClass != sourceClass || raw[i].Score < bestBindingSem-intentLiftMargin { continue } lifted := bestBindingFinal + intentLiftGain + (raw[i].Score - bestBindingSem) if lifted > out[i].Score { out[i].Score = lifted } } }