feat(ai-sdk): demote superseded pre-eu-v1 sources in authority rerank
CI / detect-changes (pull_request) Successful in 18s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 14s
CI / secret-scan (pull_request) Successful in 16s
CI / dep-audit (pull_request) Failing after 1m3s
CI / sbom-scan (pull_request) Failing after 1m8s
CI / build-sha-integrity (pull_request) Successful in 16s
CI / validate-canonical-controls (pull_request) Successful in 14s
CI / loc-budget (pull_request) Successful in 24s
CI / go-lint (pull_request) Successful in 57s
CI / python-lint (pull_request) Failing after 20s
CI / nodejs-lint (pull_request) Failing after 1m13s
CI / nodejs-build (pull_request) Successful in 3m9s
CI / test-go (pull_request) Successful in 1m3s
CI / iace-gt-coverage (pull_request) Successful in 26s
CI / test-python-backend (pull_request) Successful in 36s
CI / test-python-document-crawler (pull_request) Successful in 20s
CI / test-python-dsms-gateway (pull_request) Successful in 18s
CI / detect-changes (pull_request) Successful in 18s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 14s
CI / secret-scan (pull_request) Successful in 16s
CI / dep-audit (pull_request) Failing after 1m3s
CI / sbom-scan (pull_request) Failing after 1m8s
CI / build-sha-integrity (pull_request) Successful in 16s
CI / validate-canonical-controls (pull_request) Successful in 14s
CI / loc-budget (pull_request) Successful in 24s
CI / go-lint (pull_request) Successful in 57s
CI / python-lint (pull_request) Failing after 20s
CI / nodejs-lint (pull_request) Failing after 1m13s
CI / nodejs-build (pull_request) Successful in 3m9s
CI / test-go (pull_request) Successful in 1m3s
CI / iace-gt-coverage (pull_request) Successful in 26s
CI / test-python-backend (pull_request) Successful in 36s
CI / test-python-document-crawler (pull_request) Successful in 20s
CI / test-python-dsms-gateway (pull_request) Successful in 18s
The old pre-eu-v1 corpus chunks (un-annotated CRA/AI Act/DORA/NIS2/DSGVO duplicates + the old Machinery Directive and its guide) are tagged status=superseded / use_for_primary=false in the vector store. Honor that in the rerank: a superseded result takes a fixed penalty so the eu-v1 norm wins default questions, while the old source stays in the pool (demoted, not hidden) and remains findable for history / transition questions. Verified on dev: "CRA Sicherheitsupdates" now returns CRA Anhang I (eu-v1) at #1 instead of an un-annotated old chunk; MaschinenVO outranks the old Machinery Directive/guide; superseded chunks remain retrievable lower down. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ const (
|
|||||||
offDomainPenalty = 0.10 // off-domain binding (demoted, not removed)
|
offDomainPenalty = 0.10 // off-domain binding (demoted, not removed)
|
||||||
scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
|
scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
|
||||||
topicGain = 0.18 // amplifier only
|
topicGain = 0.18 // amplifier only
|
||||||
|
supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
|
||||||
)
|
)
|
||||||
|
|
||||||
// authorityScore computes the normative relevance of a result for a query. It augments the
|
// authorityScore computes the normative relevance of a result for a query. It augments the
|
||||||
@@ -20,6 +21,12 @@ func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign
|
|||||||
info := classifyAuthority(r)
|
info := classifyAuthority(r)
|
||||||
score := r.Score + authorityCoef*float64(info.weight)/100.0
|
score := r.Score + authorityCoef*float64(info.weight)/100.0
|
||||||
|
|
||||||
|
if r.Superseded {
|
||||||
|
// Alt-Quelle (pre-eu-v1): Default-Fragen sollen die eu-v1-Norm sehen. Demoted,
|
||||||
|
// nicht entfernt — fuer Historie/Uebergangsfragen bleibt sie auffindbar.
|
||||||
|
score -= supersededPenalty
|
||||||
|
}
|
||||||
|
|
||||||
if info.jurisdiction == "CH" && !qForeign {
|
if info.jurisdiction == "CH" && !qForeign {
|
||||||
score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht
|
score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -148,6 +148,7 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
|
|||||||
CitationUnit: getString(hit.Payload, "citation_unit"),
|
CitationUnit: getString(hit.Payload, "citation_unit"),
|
||||||
ReferencesOut: getStringSlice(hit.Payload, "references_out"),
|
ReferencesOut: getStringSlice(hit.Payload, "references_out"),
|
||||||
ReferencesIn: getStringSlice(hit.Payload, "references_in"),
|
ReferencesIn: getStringSlice(hit.Payload, "references_in"),
|
||||||
|
Superseded: getString(hit.Payload, "status") == "superseded",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,30 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
// A superseded alt-source must rank below the same result when it is NOT
|
||||||
|
// superseded (the eu-v1 norm), but only demoted — the penalty is finite, so it
|
||||||
|
// stays in the pool and remains findable for history/transition questions.
|
||||||
|
func TestAuthorityScore_SupersededIsDemotedNotRemoved(t *testing.T) {
|
||||||
|
fresh := LegalSearchResult{
|
||||||
|
Score: 0.65, SourceClass: "binding_law", AuthorityWeight: 100,
|
||||||
|
Jurisdiction: "EU", RegulationShort: "CRA", Article: "13",
|
||||||
|
}
|
||||||
|
old := fresh
|
||||||
|
old.Superseded = true
|
||||||
|
|
||||||
|
sFresh := authorityScore("CRA Sicherheitsupdates Hersteller", fresh, "", false)
|
||||||
|
sOld := authorityScore("CRA Sicherheitsupdates Hersteller", old, "", false)
|
||||||
|
|
||||||
|
if sOld >= sFresh {
|
||||||
|
t.Errorf("superseded must score lower: fresh=%.3f superseded=%.3f", sFresh, sOld)
|
||||||
|
}
|
||||||
|
gap := sFresh - sOld
|
||||||
|
if gap < supersededPenalty-0.001 || gap > supersededPenalty+0.001 {
|
||||||
|
t.Errorf("demotion should equal supersededPenalty (%.2f), got %.3f", supersededPenalty, gap)
|
||||||
|
}
|
||||||
|
// Still a positive, finite score → present in the pool, not hidden.
|
||||||
|
if sOld <= -1 {
|
||||||
|
t.Errorf("superseded score collapsed (%.3f) — must remain findable", sOld)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -33,6 +33,10 @@ type LegalSearchResult struct {
|
|||||||
CitationUnit string `json:"-"`
|
CitationUnit string `json:"-"`
|
||||||
ReferencesOut []string `json:"-"`
|
ReferencesOut []string `json:"-"`
|
||||||
ReferencesIn []string `json:"-"`
|
ReferencesIn []string `json:"-"`
|
||||||
|
|
||||||
|
// Supersede-Status (status="superseded", use_for_primary=false) — Alt-Quelle,
|
||||||
|
// die fuer Default-Fragen demoted wird (nicht versteckt; fuer Historie auffindbar).
|
||||||
|
Superseded bool `json:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// LegalAssessment is the auditable explanation layer over a ranked result set:
|
// LegalAssessment is the auditable explanation layer over a ranked result set:
|
||||||
|
|||||||
Reference in New Issue
Block a user