Files
breakpilot-compliance/ai-compliance-sdk/internal/ucca/authority_rerank.go
T
Benjamin Admin a5b675d999
CI / detect-changes (pull_request) Successful in 6s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 6s
CI / secret-scan (pull_request) Successful in 7s
CI / dep-audit (pull_request) Failing after 56s
CI / sbom-scan (pull_request) Failing after 57s
CI / build-sha-integrity (pull_request) Successful in 10s
CI / validate-canonical-controls (pull_request) Successful in 7s
CI / loc-budget (pull_request) Successful in 20s
CI / go-lint (pull_request) Successful in 48s
CI / python-lint (pull_request) Failing after 15s
CI / nodejs-lint (pull_request) Failing after 1m12s
CI / nodejs-build (pull_request) Successful in 3m20s
CI / test-go (pull_request) Successful in 56s
CI / iace-gt-coverage (pull_request) Successful in 15s
CI / test-python-backend (pull_request) Successful in 27s
CI / test-python-document-crawler (pull_request) Successful in 13s
CI / test-python-dsms-gateway (pull_request) Successful in 9s
feat(ai-sdk): control-intent — technical_standard may win implementation questions
Mirror the guidance interpretation-intent (PR #35) for control frameworks: when a
query explicitly asks HOW to implement / which controls or measures fit (control,
maßnahme, umsetzen, härten, nist, owasp, grundschutz, ...), a semantically
competitive technical_standard is lifted just above the best binding hit — so
"Welche Controls passen zu Security Updates?" can return NIST/OWASP Top-1, while
"Welche Anforderungen bestehen an Security Updates?" keeps CRA (binding) Top-1.

Generalize applyGuidanceIntent -> liftAboveBinding(sourceClass) and reuse it for
both supervisory_guidance (interpretation intent) and technical_standard
(implementation intent). Same semantic guard keeps off-topic sources demoted.
Rename the shared coefficients guidanceIntent* -> intentLift*.

Tested: queryWantsControls detection + 3 rerank cases (standard wins on control
question, binding stays on norm question, off-topic standard blocked by guard).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-24 11:49:49 +02:00

169 lines
6.6 KiB
Go

package ucca
import (
"sort"
"strings"
)
// Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative).
const (
authorityCoef = 0.40 // * weight/100
jurisdictionGain = 0.05 // binding/guidance from DE or EU
foreignPenalty = 0.60 // foreign law on a DE/EU question (demoted, not removed)
unknownPenalty = 0.08
domainMatchGain = 0.15
offDomainPenalty = 0.10 // off-domain binding (demoted, not removed)
scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
topicGain = 0.18 // amplifier only
supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
intentLiftGain = 0.10 // epsilon a qualifying interpretative source is lifted ABOVE the best binding
intentLiftMargin = 0.05 // ...only if that source is semantically competitive with binding
)
// guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation /
// recommendation by a guidance body, rather than for the binding obligation. Only
// then may a (semantically competitive) guideline outrank the binding norm.
var guidanceIntentSignals = []string{
"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
"auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
}
// controlIntentSignals mark a query that asks HOW to implement / which controls or
// measures fit — rather than WHAT the binding obligation is. Only then may a
// (semantically competitive) technical_standard outrank the binding norm.
var controlIntentSignals = []string{
"control", "controls", "maßnahme", "massnahme", "schutzmaßnahme",
"best practice", "best-practice", "umsetzen", "implementier", "absicher",
"härt", "haert", "hardening", "nist", "owasp", "grundschutz",
"ccm", "iso 27001", "isms",
}
func queryMatchesAny(query string, signals []string) bool {
q := strings.ToLower(query)
for _, sig := range signals {
if strings.Contains(q, sig) {
return true
}
}
return false
}
// queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation.
func queryWantsGuidance(query string) bool { return queryMatchesAny(query, guidanceIntentSignals) }
// queryWantsControls reports whether the query asks for implementation controls/measures.
func queryWantsControls(query string) bool { return queryMatchesAny(query, controlIntentSignals) }
// bestBindingSemantic returns the highest RAW semantic score among binding-law
// results (0 if none / no intent). Used as the guard threshold so an off-topic
// interpretative source cannot ride the intent boost.
func bestBindingSemantic(results []LegalSearchResult, wantsIntent bool) float64 {
if !wantsIntent {
return 0
}
best := 0.0
for _, r := range results {
if r.SourceClass == "binding_law" && r.Score > best {
best = r.Score
}
}
return best
}
// authorityScore computes the normative relevance of a result for a query. It augments the
// semantic score with authority/jurisdiction/domain/scope/topic signals. Exposed for tests.
func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign bool) float64 {
info := classifyAuthority(r)
score := r.Score + authorityCoef*float64(info.weight)/100.0
if r.Superseded {
// Alt-Quelle (pre-eu-v1): Default-Fragen sollen die eu-v1-Norm sehen. Demoted,
// nicht entfernt — fuer Historie/Uebergangsfragen bleibt sie auffindbar.
score -= supersededPenalty
}
if info.jurisdiction == "CH" && !qForeign {
score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht
} else {
score += jurisdictionGain
}
if info.sourceClass == "unknown" {
score -= unknownPenalty
}
if qDomain != "" {
switch cd := chunkDomain(r); {
case cd == qDomain:
score += domainMatchGain
case cd != "":
score -= offDomainPenalty // off-domain binding: demoted, nicht geloescht
}
}
if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" {
score -= scopePenalty
}
if resultMatchesTopic(query, r) {
score += topicGain // Verstaerker, kein Override
}
return score
}
// rerankByAuthority re-orders results so binding law from the matching jurisdiction/domain
// ranks above guidance, foreign and off-domain law — WITHOUT dropping anything (guidance is
// kept as interpretation context). The computed score is written back to Score so downstream
// merges (e.g. the multi-collection advisor) preserve this order. Pure + deterministic.
func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchResult {
if len(results) < 2 {
return results
}
qDomain := queryDomain(query)
qForeign := queryIsForeign(query)
wantsGuidance := queryWantsGuidance(query)
wantsControls := queryWantsControls(query)
bestBindingSem := bestBindingSemantic(results, wantsGuidance || wantsControls)
out := make([]LegalSearchResult, len(results))
copy(out, results)
for i := range out {
out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
}
// Explicit interpretation intent → a competitive guideline may outrank binding;
// explicit implementation intent → a competitive technical_standard may. Both lift
// ABOVE the best binding FINAL, so a pure norm question (neither intent) is untouched.
if wantsGuidance {
liftAboveBinding(out, results, bestBindingSem, "supervisory_guidance")
}
if wantsControls {
liftAboveBinding(out, results, bestBindingSem, "technical_standard")
}
sort.SliceStable(out, func(a, b int) bool {
return out[a].Score > out[b].Score
})
return out
}
// liftAboveBinding lifts a semantically-competitive interpretative source (the given
// sourceClass — supervisory_guidance or technical_standard) just ABOVE the best binding
// hit, ordered by semantic, so an EXPLICIT guidance/implementation question can return
// that source Top-1. A pure norm question (no intent → not called) keeps binding on top.
// Sources below the semantic margin are left untouched, so an off-topic source can never
// ride the override — and the lift is from the binding FINAL score, so authority/topic/
// domain bonuses cannot edge it out.
func liftAboveBinding(out, raw []LegalSearchResult, bestBindingSem float64, sourceClass string) {
bestBindingFinal := 0.0
for i := range out {
if out[i].SourceClass == "binding_law" && out[i].Score > bestBindingFinal {
bestBindingFinal = out[i].Score
}
}
for i := range out {
if out[i].SourceClass != sourceClass || raw[i].Score < bestBindingSem-intentLiftMargin {
continue
}
lifted := bestBindingFinal + intentLiftGain + (raw[i].Score - bestBindingSem)
if lifted > out[i].Score {
out[i].Score = lifted
}
}
}