breakpilot-compliance/ai-compliance-sdk/internal/ucca/authority_rerank.go

package ucca

import (
	"sort"
	"strings"
)

// Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative).
const (
	authorityCoef        = 0.40 // * weight/100
	jurisdictionGain     = 0.05 // binding/guidance from DE or EU
	foreignPenalty       = 0.60 // foreign law on a DE/EU question (demoted, not removed)
	unknownPenalty       = 0.08
	domainMatchGain      = 0.15
	offDomainPenalty     = 0.10 // off-domain binding (demoted, not removed)
	scopePenalty         = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
	topicGain            = 0.18 // amplifier only
	supersededPenalty    = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
	guidanceIntentGain   = 0.10 // epsilon a qualifying guideline is lifted ABOVE the best binding hit
	guidanceIntentMargin = 0.05 // ...only if the guideline is semantically competitive with binding
)

// guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation /
// recommendation by a guidance body, rather than for the binding obligation. Only
// then may a (semantically competitive) guideline outrank the binding norm.
var guidanceIntentSignals = []string{
	"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
	"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
	"auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
}

// queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation.
func queryWantsGuidance(query string) bool {
	q := strings.ToLower(query)
	for _, sig := range guidanceIntentSignals {
		if strings.Contains(q, sig) {
			return true
		}
	}
	return false
}

// bestBindingSemantic returns the highest RAW semantic score among binding-law
// results (0 if none / intent not requested). Used as the guard threshold so an
// off-topic guideline cannot ride the interpretation-intent boost.
func bestBindingSemantic(results []LegalSearchResult, wantsGuidance bool) float64 {
	if !wantsGuidance {
		return 0
	}
	best := 0.0
	for _, r := range results {
		if r.SourceClass == "binding_law" && r.Score > best {
			best = r.Score
		}
	}
	return best
}

// authorityScore computes the normative relevance of a result for a query. It augments the
// semantic score with authority/jurisdiction/domain/scope/topic signals. Exposed for tests.
func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign bool) float64 {
	info := classifyAuthority(r)
	score := r.Score + authorityCoef*float64(info.weight)/100.0

	if r.Superseded {
		// Alt-Quelle (pre-eu-v1): Default-Fragen sollen die eu-v1-Norm sehen. Demoted,
		// nicht entfernt — fuer Historie/Uebergangsfragen bleibt sie auffindbar.
		score -= supersededPenalty
	}

	if info.jurisdiction == "CH" && !qForeign {
		score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht
	} else {
		score += jurisdictionGain
	}
	if info.sourceClass == "unknown" {
		score -= unknownPenalty
	}
	if qDomain != "" {
		switch cd := chunkDomain(r); {
		case cd == qDomain:
			score += domainMatchGain
		case cd != "":
			score -= offDomainPenalty // off-domain binding: demoted, nicht geloescht
		}
	}
	if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" {
		score -= scopePenalty
	}
	if resultMatchesTopic(query, r) {
		score += topicGain // Verstaerker, kein Override
	}
	return score
}

// rerankByAuthority re-orders results so binding law from the matching jurisdiction/domain
// ranks above guidance, foreign and off-domain law — WITHOUT dropping anything (guidance is
// kept as interpretation context). The computed score is written back to Score so downstream
// merges (e.g. the multi-collection advisor) preserve this order. Pure + deterministic.
func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchResult {
	if len(results) < 2 {
		return results
	}
	qDomain := queryDomain(query)
	qForeign := queryIsForeign(query)
	wantsGuidance := queryWantsGuidance(query)
	bestBindingSem := bestBindingSemantic(results, wantsGuidance)

	out := make([]LegalSearchResult, len(results))
	copy(out, results)
	for i := range out {
		out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
	}
	if wantsGuidance {
		applyGuidanceIntent(out, results, bestBindingSem)
	}
	sort.SliceStable(out, func(a, b int) bool {
		return out[a].Score > out[b].Score
	})
	return out
}

// applyGuidanceIntent lifts semantically-competitive guidance just ABOVE the best
// binding hit (ordered by semantic), so an EXPLICIT interpretation question can
// return guidance Top-1. Obligation questions (no intent → not called) keep
// binding on top. Guidance below the semantic margin is left untouched, so an
// off-topic guideline can never ride the override — and the lift is computed from
// the binding FINAL score, so authority/topic/domain bonuses cannot edge it out.
func applyGuidanceIntent(out, raw []LegalSearchResult, bestBindingSem float64) {
	bestBindingFinal := 0.0
	for i := range out {
		if out[i].SourceClass == "binding_law" && out[i].Score > bestBindingFinal {
			bestBindingFinal = out[i].Score
		}
	}
	for i := range out {
		if out[i].SourceClass != "supervisory_guidance" || raw[i].Score < bestBindingSem-guidanceIntentMargin {
			continue
		}
		lifted := bestBindingFinal + guidanceIntentGain + (raw[i].Score - bestBindingSem)
		if lifted > out[i].Score {
			out[i].Score = lifted
		}
	}
}