Compare commits

..

1 Commits

Author SHA1 Message Date
Benjamin Admin 24499a25e9 fix(ai-sdk): make interpretation-intent override reliably win
CI / detect-changes (pull_request) Successful in 5s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 4s
CI / secret-scan (pull_request) Successful in 7s
CI / dep-audit (pull_request) Failing after 53s
CI / sbom-scan (pull_request) Failing after 51s
CI / build-sha-integrity (pull_request) Successful in 5s
CI / validate-canonical-controls (pull_request) Successful in 3s
CI / loc-budget (pull_request) Successful in 15s
CI / go-lint (pull_request) Successful in 53s
CI / python-lint (pull_request) Failing after 15s
CI / nodejs-lint (pull_request) Failing after 1m13s
CI / nodejs-build (pull_request) Successful in 3m8s
CI / test-go (pull_request) Successful in 58s
CI / iace-gt-coverage (pull_request) Successful in 15s
CI / test-python-backend (pull_request) Successful in 27s
CI / test-python-document-crawler (pull_request) Successful in 19s
CI / test-python-dsms-gateway (pull_request) Successful in 13s
PR #34's fixed +0.25 guidance gain was too small live: for "Was empfiehlt der
EDPB zum DSB?" the binding Art. 37 (1.381) still edged out the boosted EDPB
guidance (1.348), because the live authority score gives the binding article a
topic/domain bonus the (partly English) guidance chunk does not match.

Replace the fixed gain with a deterministic lift: a semantically competitive
guideline (raw semantic >= best_binding_semantic - 0.05) is lifted just ABOVE
the best binding FINAL score (ordered by semantic), so authority/topic/domain
bonuses can no longer edge it out. Obligation questions (no intent signal) are
untouched — binding stays Top-1; off-topic guidance stays demoted.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-24 11:21:59 +02:00
4 changed files with 34 additions and 149 deletions
+3 -13
View File
@@ -9,8 +9,8 @@ import (
// authorityInfo is the normative classification of a search result, used internally // authorityInfo is the normative classification of a search result, used internally
// for re-ranking only (Phase 1 changes ordering, not the response contract). // for re-ranking only (Phase 1 changes ordering, not the response contract).
type authorityInfo struct { type authorityInfo struct {
weight int // 100 binding, 80 technical_standard, 70 guidance, 0 foreign, 50 unknown weight int // 100 binding_law, 70 guidance, 0 foreign_law, 50 unknown
sourceClass string // binding_law | technical_standard | supervisory_guidance | foreign_law | unknown sourceClass string // binding_law | supervisory_guidance | foreign_law | unknown
jurisdiction string // DE | EU | CH jurisdiction string // DE | EU | CH
} }
@@ -18,13 +18,7 @@ var (
guidanceMarkers = []string{ guidanceMarkers = []string{
"DSK", "EDPB", "BfDI", "BFDI", "BayLfD", "Baylfb", "ENISA", "BSI", "EUCC", "DSK", "EDPB", "BfDI", "BFDI", "BayLfD", "Baylfb", "ENISA", "BSI", "EUCC",
"Standards Mapping", "Kpnr", "Orientierungshilfe", "Handreichung", "Beschluss", "Standards Mapping", "Kpnr", "Orientierungshilfe", "Handreichung", "Beschluss",
"Leitlinie", "Guidance", "Empfehlung", "OECD", "CISA", "Blue Guide", "Leitlinie", "Guidance", "Empfehlung", "NIST", "OECD", "CISA", "Blue Guide",
}
// Technical standards / control frameworks (best-practice controls). Checked BEFORE
// guidanceMarkers so a "BSI Grundschutz" chunk classifies as a standard, not BSI guidance.
standardMarkers = []string{
"NIST", "OWASP", "Grundschutz", "ISO 27001", "ISO/IEC 27001",
"CSA CCM", "Cloud Controls Matrix", "CIS Benchmark", "CIS Control",
} }
foreignMarkers = []string{"RevDSG", "fedlex", "(CH)"} foreignMarkers = []string{"RevDSG", "fedlex", "(CH)"}
deMarkers = []string{"BDSG", "DSK", "BfDI", "BFDI", "BayLfD", "Baylfb", "BSI"} deMarkers = []string{"BDSG", "DSK", "BfDI", "BFDI", "BayLfD", "Baylfb", "BSI"}
@@ -54,8 +48,6 @@ func classifyAuthority(r LegalSearchResult) authorityInfo {
switch { switch {
case containsAny(hay, foreignMarkers): case containsAny(hay, foreignMarkers):
return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"} return authorityInfo{weight: 0, sourceClass: "foreign_law", jurisdiction: "CH"}
case r.Category == "standard" || containsAny(hay, standardMarkers):
return authorityInfo{weight: 80, sourceClass: "technical_standard", jurisdiction: jur}
case r.Category == "guidance" || containsAny(hay, guidanceMarkers): case r.Category == "guidance" || containsAny(hay, guidanceMarkers):
return authorityInfo{weight: 70, sourceClass: "supervisory_guidance", jurisdiction: jur} return authorityInfo{weight: 70, sourceClass: "supervisory_guidance", jurisdiction: jur}
case r.Category == "regulation" || r.Category == "eu_recht" || normPattern.MatchString(r.ArticleLabel): case r.Category == "regulation" || r.Category == "eu_recht" || normPattern.MatchString(r.ArticleLabel):
@@ -69,8 +61,6 @@ func sourceClassFromWeight(w int) string {
switch { switch {
case w >= 100: case w >= 100:
return "binding_law" return "binding_law"
case w >= 80:
return "technical_standard"
case w >= 70: case w >= 70:
return "supervisory_guidance" return "supervisory_guidance"
case w <= 0: case w <= 0:
@@ -16,8 +16,8 @@ const (
scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
topicGain = 0.18 // amplifier only topicGain = 0.18 // amplifier only
supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
intentLiftGain = 0.10 // epsilon a qualifying interpretative source is lifted ABOVE the best binding guidanceIntentGain = 0.10 // epsilon a qualifying guideline is lifted ABOVE the best binding hit
intentLiftMargin = 0.05 // ...only if that source is semantically competitive with binding guidanceIntentMargin = 0.05 // ...only if the guideline is semantically competitive with binding
) )
// guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation / // guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation /
@@ -29,19 +29,10 @@ var guidanceIntentSignals = []string{
"auslegung", "empfiehlt", "empfehlung", "sagt", "laut", "auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
} }
// controlIntentSignals mark a query that asks HOW to implement / which controls or // queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation.
// measures fit — rather than WHAT the binding obligation is. Only then may a func queryWantsGuidance(query string) bool {
// (semantically competitive) technical_standard outrank the binding norm.
var controlIntentSignals = []string{
"control", "controls", "maßnahme", "massnahme", "schutzmaßnahme",
"best practice", "best-practice", "umsetzen", "implementier", "absicher",
"härt", "haert", "hardening", "nist", "owasp", "grundschutz",
"ccm", "iso 27001", "isms",
}
func queryMatchesAny(query string, signals []string) bool {
q := strings.ToLower(query) q := strings.ToLower(query)
for _, sig := range signals { for _, sig := range guidanceIntentSignals {
if strings.Contains(q, sig) { if strings.Contains(q, sig) {
return true return true
} }
@@ -49,22 +40,16 @@ func queryMatchesAny(query string, signals []string) bool {
return false return false
} }
// queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation.
func queryWantsGuidance(query string) bool { return queryMatchesAny(query, guidanceIntentSignals) }
// queryWantsControls reports whether the query asks for implementation controls/measures.
func queryWantsControls(query string) bool { return queryMatchesAny(query, controlIntentSignals) }
// bestBindingSemantic returns the highest RAW semantic score among binding-law // bestBindingSemantic returns the highest RAW semantic score among binding-law
// results (0 if none / no intent). Used as the guard threshold so an off-topic // results (0 if none / intent not requested). Used as the guard threshold so an
// interpretative source cannot ride the intent boost. // off-topic guideline cannot ride the interpretation-intent boost.
func bestBindingSemantic(results []LegalSearchResult, wantsIntent bool) float64 { func bestBindingSemantic(results []LegalSearchResult, wantsGuidance bool) float64 {
if !wantsIntent { if !wantsGuidance {
return 0 return 0
} }
best := 0.0 best := 0.0
for _, r := range results { for _, r := range results {
if classifyAuthority(r).sourceClass == "binding_law" && r.Score > best { if r.SourceClass == "binding_law" && r.Score > best {
best = r.Score best = r.Score
} }
} }
@@ -119,22 +104,15 @@ func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchR
qDomain := queryDomain(query) qDomain := queryDomain(query)
qForeign := queryIsForeign(query) qForeign := queryIsForeign(query)
wantsGuidance := queryWantsGuidance(query) wantsGuidance := queryWantsGuidance(query)
wantsControls := queryWantsControls(query) bestBindingSem := bestBindingSemantic(results, wantsGuidance)
bestBindingSem := bestBindingSemantic(results, wantsGuidance || wantsControls)
out := make([]LegalSearchResult, len(results)) out := make([]LegalSearchResult, len(results))
copy(out, results) copy(out, results)
for i := range out { for i := range out {
out[i].Score = authorityScore(query, out[i], qDomain, qForeign) out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
} }
// Explicit interpretation intent → a competitive guideline may outrank binding;
// explicit implementation intent → a competitive technical_standard may. Both lift
// ABOVE the best binding FINAL, so a pure norm question (neither intent) is untouched.
if wantsGuidance { if wantsGuidance {
liftAboveBinding(out, results, bestBindingSem, "supervisory_guidance") applyGuidanceIntent(out, results, bestBindingSem)
}
if wantsControls {
liftAboveBinding(out, results, bestBindingSem, "technical_standard")
} }
sort.SliceStable(out, func(a, b int) bool { sort.SliceStable(out, func(a, b int) bool {
return out[a].Score > out[b].Score return out[a].Score > out[b].Score
@@ -142,27 +120,24 @@ func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchR
return out return out
} }
// liftAboveBinding lifts a semantically-competitive interpretative source (the given // applyGuidanceIntent lifts semantically-competitive guidance just ABOVE the best
// sourceClass — supervisory_guidance or technical_standard) just ABOVE the best binding // binding hit (ordered by semantic), so an EXPLICIT interpretation question can
// hit, ordered by semantic, so an EXPLICIT guidance/implementation question can return // return guidance Top-1. Obligation questions (no intent → not called) keep
// that source Top-1. A pure norm question (no intent → not called) keeps binding on top. // binding on top. Guidance below the semantic margin is left untouched, so an
// Sources below the semantic margin are left untouched, so an off-topic source can never // off-topic guideline can never ride the override — and the lift is computed from
// ride the override — and the lift is from the binding FINAL score, so authority/topic/ // the binding FINAL score, so authority/topic/domain bonuses cannot edge it out.
// domain bonuses cannot edge it out. func applyGuidanceIntent(out, raw []LegalSearchResult, bestBindingSem float64) {
func liftAboveBinding(out, raw []LegalSearchResult, bestBindingSem float64, sourceClass string) {
bestBindingFinal := 0.0 bestBindingFinal := 0.0
for i := range out { for i := range out {
if classifyAuthority(out[i]).sourceClass == "binding_law" && out[i].Score > bestBindingFinal { if out[i].SourceClass == "binding_law" && out[i].Score > bestBindingFinal {
bestBindingFinal = out[i].Score bestBindingFinal = out[i].Score
} }
} }
for i := range out { for i := range out {
// Classify (not raw payload) so the untagged legacy corpus — e.g. NIST ingested if out[i].SourceClass != "supervisory_guidance" || raw[i].Score < bestBindingSem-guidanceIntentMargin {
// before source_class tagging — is still recognized as its interpretative class.
if classifyAuthority(out[i]).sourceClass != sourceClass || raw[i].Score < bestBindingSem-intentLiftMargin {
continue continue
} }
lifted := bestBindingFinal + intentLiftGain + (raw[i].Score - bestBindingSem) lifted := bestBindingFinal + guidanceIntentGain + (raw[i].Score - bestBindingSem)
if lifted > out[i].Score { if lifted > out[i].Score {
out[i].Score = lifted out[i].Score = lifted
} }
@@ -14,10 +14,6 @@ func TestClassifyAuthority(t *testing.T) {
{"tagged guidance DE", LegalSearchResult{AuthorityWeight: 70, SourceClass: "supervisory_guidance", Jurisdiction: "DE"}, 70, "supervisory_guidance", "DE"}, {"tagged guidance DE", LegalSearchResult{AuthorityWeight: 70, SourceClass: "supervisory_guidance", Jurisdiction: "DE"}, 70, "supervisory_guidance", "DE"},
{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"}, {"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"}, {"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
{"untagged NIST standard", LegalSearchResult{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8"}, 80, "technical_standard", "EU"},
{"BSI Grundschutz standard beats BSI guidance", LegalSearchResult{RegulationShort: "BSI Grundschutz", ArticleLabel: "BSI Grundschutz Baustein"}, 80, "technical_standard", "DE"},
{"weight-only 85 TRGS standard", LegalSearchResult{AuthorityWeight: 85, RegulationShort: "TRGS 529"}, 85, "technical_standard", "EU"},
{"tagged technical_standard", LegalSearchResult{AuthorityWeight: 80, SourceClass: "technical_standard", Jurisdiction: "EU"}, 80, "technical_standard", "EU"},
{"untagged CRA binding", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation"}, 100, "binding_law", "EU"}, {"untagged CRA binding", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation"}, 100, "binding_law", "EU"},
{"untagged BDSG binding DE", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, 100, "binding_law", "DE"}, {"untagged BDSG binding DE", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, 100, "binding_law", "DE"},
{"untagged RevDSG foreign", LegalSearchResult{RegulationShort: "RevDSG", ArticleLabel: "RevDSG (CH)"}, 0, "foreign_law", "CH"}, {"untagged RevDSG foreign", LegalSearchResult{RegulationShort: "RevDSG", ArticleLabel: "RevDSG (CH)"}, 0, "foreign_law", "CH"},
@@ -70,79 +70,3 @@ func TestRerank_OffTopicGuidance_BlockedByGuard(t *testing.T) {
t.Errorf("off-topic guidance must not win even with intent, got %s", out[0].SourceClass) t.Errorf("off-topic guidance must not win even with intent, got %s", out[0].SourceClass)
} }
} }
func TestQueryWantsControls(t *testing.T) {
wants := []string{
"Welche Controls passen zu Security Updates?",
"Welche Maßnahmen sollten wir umsetzen?",
"Wie härten wir den Server ab?",
"Gibt es NIST-Controls dafür?",
"OWASP Best Practice für Logging?",
"BSI Grundschutz Bausteine",
}
plain := []string{
"Welche Anforderungen bestehen an Security Updates?",
"Ab wann braucht man einen Datenschutzbeauftragten?",
}
for _, q := range wants {
if !queryWantsControls(q) {
t.Errorf("should detect control/implementation intent: %q", q)
}
}
for _, q := range plain {
if queryWantsControls(q) {
t.Errorf("should NOT detect control intent (norm question): %q", q)
}
}
}
func TestRerank_ControlQuestion_StandardMayWin(t *testing.T) {
// Explicit implementation intent + standard semantically competitive → standard wins.
results := []LegalSearchResult{
intentRes("NIST SP 800-82", "technical_standard", 0.62, 80),
intentRes("CRA", "binding_law", 0.58, 100),
}
out := rerankByAuthority("Welche Controls passen zu Security Updates?", results)
if out[0].SourceClass != "technical_standard" {
t.Errorf("control question: technical_standard should win Top-1, got %s", out[0].SourceClass)
}
}
func TestRerank_NormQuestion_BindingOverStandard(t *testing.T) {
// "Anforderungen" → no control intent → binding stays Top-1 over the standard.
results := []LegalSearchResult{
intentRes("NIST SP 800-82", "technical_standard", 0.62, 80),
intentRes("CRA", "binding_law", 0.58, 100),
}
out := rerankByAuthority("Welche Anforderungen bestehen an Security Updates?", results)
if out[0].SourceClass != "binding_law" {
t.Errorf("norm question: binding must stay Top-1 over standard, got %s", out[0].SourceClass)
}
}
func TestRerank_OffTopicStandard_BlockedByGuard(t *testing.T) {
// Control intent present, but the standard is semantically far below binding →
// the margin guard keeps binding Top-1 (no off-topic standard override).
results := []LegalSearchResult{
intentRes("NIST SP 800-82", "technical_standard", 0.40, 80),
intentRes("CRA", "binding_law", 0.58, 100),
}
out := rerankByAuthority("Welche Controls passen zu Security Updates?", results)
if out[0].SourceClass != "binding_law" {
t.Errorf("off-topic standard must not win even with control intent, got %s", out[0].SourceClass)
}
}
func TestRerank_ControlQuestion_UntaggedNISTLifted(t *testing.T) {
// The existing NIST corpus is UNtagged (no source_class). It must still be classified
// technical_standard via markers and lifted on a control question — the whole reason
// the lift path classifies instead of trusting the raw payload field.
results := []LegalSearchResult{
{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8", Score: 0.62},
{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation", Score: 0.58},
}
out := rerankByAuthority("Welche Controls passen zu Security Updates?", results)
if out[0].RegulationShort != "NIST SP 800-82r3" {
t.Errorf("untagged NIST should be lifted Top-1 on a control question, got %q", out[0].RegulationShort)
}
}