e24a551ee4
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 4s
CI / validate-canonical-controls (push) Successful in 2s
CI / loc-budget (push) Successful in 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 57s
CI / iace-gt-coverage (push) Successful in 15s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
146 lines
5.3 KiB
Go
146 lines
5.3 KiB
Go
package ucca
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative).
|
|
const (
|
|
authorityCoef = 0.40 // * weight/100
|
|
jurisdictionGain = 0.05 // binding/guidance from DE or EU
|
|
foreignPenalty = 0.60 // foreign law on a DE/EU question (demoted, not removed)
|
|
unknownPenalty = 0.08
|
|
domainMatchGain = 0.15
|
|
offDomainPenalty = 0.10 // off-domain binding (demoted, not removed)
|
|
scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
|
|
topicGain = 0.18 // amplifier only
|
|
supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
|
|
guidanceIntentGain = 0.10 // epsilon a qualifying guideline is lifted ABOVE the best binding hit
|
|
guidanceIntentMargin = 0.05 // ...only if the guideline is semantically competitive with binding
|
|
)
|
|
|
|
// guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation /
|
|
// recommendation by a guidance body, rather than for the binding obligation. Only
|
|
// then may a (semantically competitive) guideline outrank the binding norm.
|
|
var guidanceIntentSignals = []string{
|
|
"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
|
|
"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
|
|
"auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
|
|
}
|
|
|
|
// queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation.
|
|
func queryWantsGuidance(query string) bool {
|
|
q := strings.ToLower(query)
|
|
for _, sig := range guidanceIntentSignals {
|
|
if strings.Contains(q, sig) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// bestBindingSemantic returns the highest RAW semantic score among binding-law
|
|
// results (0 if none / intent not requested). Used as the guard threshold so an
|
|
// off-topic guideline cannot ride the interpretation-intent boost.
|
|
func bestBindingSemantic(results []LegalSearchResult, wantsGuidance bool) float64 {
|
|
if !wantsGuidance {
|
|
return 0
|
|
}
|
|
best := 0.0
|
|
for _, r := range results {
|
|
if r.SourceClass == "binding_law" && r.Score > best {
|
|
best = r.Score
|
|
}
|
|
}
|
|
return best
|
|
}
|
|
|
|
// authorityScore computes the normative relevance of a result for a query. It augments the
|
|
// semantic score with authority/jurisdiction/domain/scope/topic signals. Exposed for tests.
|
|
func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign bool) float64 {
|
|
info := classifyAuthority(r)
|
|
score := r.Score + authorityCoef*float64(info.weight)/100.0
|
|
|
|
if r.Superseded {
|
|
// Alt-Quelle (pre-eu-v1): Default-Fragen sollen die eu-v1-Norm sehen. Demoted,
|
|
// nicht entfernt — fuer Historie/Uebergangsfragen bleibt sie auffindbar.
|
|
score -= supersededPenalty
|
|
}
|
|
|
|
if info.jurisdiction == "CH" && !qForeign {
|
|
score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht
|
|
} else {
|
|
score += jurisdictionGain
|
|
}
|
|
if info.sourceClass == "unknown" {
|
|
score -= unknownPenalty
|
|
}
|
|
if qDomain != "" {
|
|
switch cd := chunkDomain(r); {
|
|
case cd == qDomain:
|
|
score += domainMatchGain
|
|
case cd != "":
|
|
score -= offDomainPenalty // off-domain binding: demoted, nicht geloescht
|
|
}
|
|
}
|
|
if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" {
|
|
score -= scopePenalty
|
|
}
|
|
if resultMatchesTopic(query, r) {
|
|
score += topicGain // Verstaerker, kein Override
|
|
}
|
|
return score
|
|
}
|
|
|
|
// rerankByAuthority re-orders results so binding law from the matching jurisdiction/domain
|
|
// ranks above guidance, foreign and off-domain law — WITHOUT dropping anything (guidance is
|
|
// kept as interpretation context). The computed score is written back to Score so downstream
|
|
// merges (e.g. the multi-collection advisor) preserve this order. Pure + deterministic.
|
|
func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchResult {
|
|
if len(results) < 2 {
|
|
return results
|
|
}
|
|
qDomain := queryDomain(query)
|
|
qForeign := queryIsForeign(query)
|
|
wantsGuidance := queryWantsGuidance(query)
|
|
bestBindingSem := bestBindingSemantic(results, wantsGuidance)
|
|
|
|
out := make([]LegalSearchResult, len(results))
|
|
copy(out, results)
|
|
for i := range out {
|
|
out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
|
|
}
|
|
if wantsGuidance {
|
|
applyGuidanceIntent(out, results, bestBindingSem)
|
|
}
|
|
sort.SliceStable(out, func(a, b int) bool {
|
|
return out[a].Score > out[b].Score
|
|
})
|
|
return out
|
|
}
|
|
|
|
// applyGuidanceIntent lifts semantically-competitive guidance just ABOVE the best
|
|
// binding hit (ordered by semantic), so an EXPLICIT interpretation question can
|
|
// return guidance Top-1. Obligation questions (no intent → not called) keep
|
|
// binding on top. Guidance below the semantic margin is left untouched, so an
|
|
// off-topic guideline can never ride the override — and the lift is computed from
|
|
// the binding FINAL score, so authority/topic/domain bonuses cannot edge it out.
|
|
func applyGuidanceIntent(out, raw []LegalSearchResult, bestBindingSem float64) {
|
|
bestBindingFinal := 0.0
|
|
for i := range out {
|
|
if out[i].SourceClass == "binding_law" && out[i].Score > bestBindingFinal {
|
|
bestBindingFinal = out[i].Score
|
|
}
|
|
}
|
|
for i := range out {
|
|
if out[i].SourceClass != "supervisory_guidance" || raw[i].Score < bestBindingSem-guidanceIntentMargin {
|
|
continue
|
|
}
|
|
lifted := bestBindingFinal + guidanceIntentGain + (raw[i].Score - bestBindingSem)
|
|
if lifted > out[i].Score {
|
|
out[i].Score = lifted
|
|
}
|
|
}
|
|
}
|