feat(ai-sdk): authority-aware re-ranking for legal RAG retrieval (Phase 1)
CI / detect-changes (pull_request) Successful in 16s
CI / branch-name (pull_request) Successful in 2s
CI / guardrail-integrity (pull_request) Successful in 5s
CI / secret-scan (pull_request) Successful in 6s
CI / dep-audit (pull_request) Failing after 1m1s
CI / sbom-scan (pull_request) Failing after 1m4s
CI / build-sha-integrity (pull_request) Successful in 14s
CI / validate-canonical-controls (pull_request) Successful in 13s
CI / test-go (pull_request) Successful in 1m2s
CI / loc-budget (pull_request) Successful in 24s
CI / go-lint (pull_request) Failing after 20s
CI / python-lint (pull_request) Failing after 23s
CI / nodejs-lint (pull_request) Failing after 1m10s
CI / nodejs-build (pull_request) Successful in 3m26s
CI / iace-gt-coverage (pull_request) Successful in 16s
CI / test-python-backend (pull_request) Successful in 27s
CI / test-python-document-crawler (pull_request) Successful in 13s
CI / test-python-dsms-gateway (pull_request) Successful in 9s
CI / detect-changes (pull_request) Successful in 16s
CI / branch-name (pull_request) Successful in 2s
CI / guardrail-integrity (pull_request) Successful in 5s
CI / secret-scan (pull_request) Successful in 6s
CI / dep-audit (pull_request) Failing after 1m1s
CI / sbom-scan (pull_request) Failing after 1m4s
CI / build-sha-integrity (pull_request) Successful in 14s
CI / validate-canonical-controls (pull_request) Successful in 13s
CI / test-go (pull_request) Successful in 1m2s
CI / loc-budget (pull_request) Successful in 24s
CI / go-lint (pull_request) Failing after 20s
CI / python-lint (pull_request) Failing after 23s
CI / nodejs-lint (pull_request) Failing after 1m10s
CI / nodejs-build (pull_request) Successful in 3m26s
CI / iace-gt-coverage (pull_request) Successful in 16s
CI / test-python-backend (pull_request) Successful in 27s
CI / test-python-document-crawler (pull_request) Successful in 13s
CI / test-python-dsms-gateway (pull_request) Successful in 9s
Re-orders /sdk/v1/rag/search results so binding law from the matching jurisdiction and domain ranks above guidance, foreign and off-domain law — without dropping anything (guidance stays as interpretation context). Internal-only: response schema is unchanged (json:"-" fields), so every consumer benefits without a contract change. - authority.go: classifyAuthority / queryDomain / chunkDomain / scopeClass / topic ontology. Tagged payload (authority_weight/source_class/jurisdiction) wins; deterministic fallback via category + name markers for the untagged corpus. - authority_rerank.go: rerankByAuthority. final = semantic + authority + jurisdiction + domain + scope + topic; the authority score is written back to Score so the multi-collection advisor merge preserves the order. - legal_rag_client: stratified retrieval — the binding-law pool AUGMENTS the semantic pool (mergeDedupHits), then re-rank. - legal_rag_http: searchBinding (source_class filter) + shared doPointsSearch. - table-driven tests for authority/domain/scope/topic + rerank acceptance + a stratified-binding integration test. go test -race green. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -93,6 +93,13 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
|
||||
hits = denseHits
|
||||
}
|
||||
|
||||
// Stratified: den binding_law-Pool ERGAENZEN (nicht ersetzen), damit die Pflichtquelle
|
||||
// immer Kandidat ist — Guidance bleibt als Auslegungskontext erhalten. Best-effort:
|
||||
// Fehler beim Binding-Query degradieren still auf den semantischen Pool.
|
||||
if bindingHits, bErr := c.searchBinding(ctx, collection, embedding, topK); bErr == nil {
|
||||
hits = mergeDedupHits(hits, bindingHits)
|
||||
}
|
||||
|
||||
results := make([]LegalSearchResult, len(hits))
|
||||
for i, hit := range hits {
|
||||
// Legal-Metadaten nach rag_reingest_spec.md §2: bevorzugt die normalisierten Felder
|
||||
@@ -121,12 +128,41 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
|
||||
Pages: getIntSlice(hit.Payload, "pages"),
|
||||
SourceURL: getString(hit.Payload, "source"),
|
||||
Score: hit.Score,
|
||||
AuthorityWeight: getInt(hit.Payload, "authority_weight"),
|
||||
SourceClass: getString(hit.Payload, "source_class"),
|
||||
Jurisdiction: getString(hit.Payload, "jurisdiction"),
|
||||
}
|
||||
}
|
||||
|
||||
// Authority-aware Re-Ranking: bindendes Recht der passenden Jurisdiktion/Domaene nach
|
||||
// oben, Guidance/Fremdrecht/Off-Domain runter (nichts wird geloescht). Reihenfolge only,
|
||||
// Response-Schema unveraendert. Score traegt den Authority-Score, damit nachgelagerte
|
||||
// Multi-Collection-Merges (Advisor) die Ordnung bewahren.
|
||||
results = rerankByAuthority(query, results)
|
||||
if topK > 0 && len(results) > topK {
|
||||
results = results[:topK]
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// mergeDedupHits concatenates two hit lists, keeping the first occurrence of each point ID.
|
||||
func mergeDedupHits(primary, extra []qdrantSearchHit) []qdrantSearchHit {
|
||||
seen := make(map[string]bool, len(primary)+len(extra))
|
||||
out := make([]qdrantSearchHit, 0, len(primary)+len(extra))
|
||||
for _, list := range [][]qdrantSearchHit{primary, extra} {
|
||||
for _, h := range list {
|
||||
id := fmt.Sprint(h.ID)
|
||||
if seen[id] {
|
||||
continue
|
||||
}
|
||||
seen[id] = true
|
||||
out = append(out, h)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// FormatLegalContextForPrompt formats the legal context for inclusion in an LLM prompt.
|
||||
func (c *LegalRAGClient) FormatLegalContextForPrompt(lc *LegalContext) string {
|
||||
if lc == nil || len(lc.Results) == 0 {
|
||||
|
||||
Reference in New Issue
Block a user