package ucca import ( "sort" "strings" ) // Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative). const ( authorityCoef = 0.40 // * weight/100 jurisdictionGain = 0.05 // binding/guidance from DE or EU foreignPenalty = 0.60 // foreign law on a DE/EU question (demoted, not removed) unknownPenalty = 0.08 domainMatchGain = 0.15 offDomainPenalty = 0.10 // off-domain binding (demoted, not removed) scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question topicGain = 0.18 // amplifier only supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt guidanceIntentGain = 0.10 // epsilon a qualifying guideline is lifted ABOVE the best binding hit guidanceIntentMargin = 0.05 // ...only if the guideline is semantically competitive with binding ) // guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation / // recommendation by a guidance body, rather than for the binding obligation. Only // then may a (semantically competitive) guideline outrank the binding norm. var guidanceIntentSignals = []string{ "edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss", "dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe", "auslegung", "empfiehlt", "empfehlung", "sagt", "laut", } // queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation. func queryWantsGuidance(query string) bool { q := strings.ToLower(query) for _, sig := range guidanceIntentSignals { if strings.Contains(q, sig) { return true } } return false } // bestBindingSemantic returns the highest RAW semantic score among binding-law // results (0 if none / intent not requested). Used as the guard threshold so an // off-topic guideline cannot ride the interpretation-intent boost. func bestBindingSemantic(results []LegalSearchResult, wantsGuidance bool) float64 { if !wantsGuidance { return 0 } best := 0.0 for _, r := range results { if r.SourceClass == "binding_law" && r.Score > best { best = r.Score } } return best } // authorityScore computes the normative relevance of a result for a query. It augments the // semantic score with authority/jurisdiction/domain/scope/topic signals. Exposed for tests. func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign bool) float64 { info := classifyAuthority(r) score := r.Score + authorityCoef*float64(info.weight)/100.0 if r.Superseded { // Alt-Quelle (pre-eu-v1): Default-Fragen sollen die eu-v1-Norm sehen. Demoted, // nicht entfernt — fuer Historie/Uebergangsfragen bleibt sie auffindbar. score -= supersededPenalty } if info.jurisdiction == "CH" && !qForeign { score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht } else { score += jurisdictionGain } if info.sourceClass == "unknown" { score -= unknownPenalty } if qDomain != "" { switch cd := chunkDomain(r); { case cd == qDomain: score += domainMatchGain case cd != "": score -= offDomainPenalty // off-domain binding: demoted, nicht geloescht } } if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" { score -= scopePenalty } if resultMatchesTopic(query, r) { score += topicGain // Verstaerker, kein Override } return score } // rerankByAuthority re-orders results so binding law from the matching jurisdiction/domain // ranks above guidance, foreign and off-domain law — WITHOUT dropping anything (guidance is // kept as interpretation context). The computed score is written back to Score so downstream // merges (e.g. the multi-collection advisor) preserve this order. Pure + deterministic. func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchResult { if len(results) < 2 { return results } qDomain := queryDomain(query) qForeign := queryIsForeign(query) wantsGuidance := queryWantsGuidance(query) bestBindingSem := bestBindingSemantic(results, wantsGuidance) out := make([]LegalSearchResult, len(results)) copy(out, results) for i := range out { out[i].Score = authorityScore(query, out[i], qDomain, qForeign) } if wantsGuidance { applyGuidanceIntent(out, results, bestBindingSem) } sort.SliceStable(out, func(a, b int) bool { return out[a].Score > out[b].Score }) return out } // applyGuidanceIntent lifts semantically-competitive guidance just ABOVE the best // binding hit (ordered by semantic), so an EXPLICIT interpretation question can // return guidance Top-1. Obligation questions (no intent → not called) keep // binding on top. Guidance below the semantic margin is left untouched, so an // off-topic guideline can never ride the override — and the lift is computed from // the binding FINAL score, so authority/topic/domain bonuses cannot edge it out. func applyGuidanceIntent(out, raw []LegalSearchResult, bestBindingSem float64) { bestBindingFinal := 0.0 for i := range out { if out[i].SourceClass == "binding_law" && out[i].Score > bestBindingFinal { bestBindingFinal = out[i].Score } } for i := range out { if out[i].SourceClass != "supervisory_guidance" || raw[i].Score < bestBindingSem-guidanceIntentMargin { continue } lifted := bestBindingFinal + guidanceIntentGain + (raw[i].Score - bestBindingSem) if lifted > out[i].Score { out[i].Score = lifted } } }