feat(ai-sdk): legal-corpus coverage + Phase-2 citation-graph assessment (#33)
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / build-sha-integrity (push) Successful in 6s
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 6s
CI / go-lint (push) Has been skipped
CI / loc-budget (push) Successful in 19s
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m1s
CI / test-go (push) Successful in 59s
CI / iace-gt-coverage (push) Successful in 22s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

This commit was merged in pull request #33.
This commit is contained in:
2026-06-24 06:37:22 +00:00
parent b83c3e6e00
commit 230dc05287
14 changed files with 941 additions and 14 deletions
@@ -20,6 +20,7 @@ type LegalRAGClient struct {
httpClient *http.Client
textIndexEnsured map[string]bool
hybridEnabled bool
graphEnabled bool
}
// NewLegalRAGClient creates a new Legal RAG client using Ollama bge-m3 embeddings.
@@ -38,6 +39,11 @@ func NewLegalRAGClient() *LegalRAGClient {
}
hybridEnabled := os.Getenv("RAG_HYBRID_SEARCH") != "false"
// Graph-Expansion ist OPT-IN: kein gemessener Rang-Nutzen ggue. der Binding-Augmentation,
// +1 Qdrant-Call/Suche, Flutungsrisiko ueber Reverse-Kanten. Bleibt als Recall-Sicherheitsnetz
// fuer spaetere Luecken (RAG_GRAPH_EXPANSION=true). Die Graph-Kanten werden in der Response
// zur Begruendung/Vollstaendigkeit genutzt, nicht zur Pool-Expansion (Default).
graphEnabled := os.Getenv("RAG_GRAPH_EXPANSION") == "true"
return &LegalRAGClient{
qdrantURL: qdrantURL,
@@ -47,6 +53,7 @@ func NewLegalRAGClient() *LegalRAGClient {
collection: "bp_compliance_ce",
textIndexEnsured: make(map[string]bool),
hybridEnabled: hybridEnabled,
graphEnabled: graphEnabled,
httpClient: &http.Client{
Timeout: 60 * time.Second,
},
@@ -100,6 +107,13 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
hits = mergeDedupHits(hits, bindingHits)
}
// Graph-Augmentation: verbundene Normen (references_out/in) der Top-Hits ueber die
// praezise Zitations-Kante in den Pool ziehen — z.B. Art. 13 CRA zieht Anhang I (die
// eigentliche Pflichtquelle). Pool-Augmentation only; Re-Rank + topK bleiben.
if c.graphEnabled {
hits = c.expandViaGraph(ctx, collection, hits)
}
results := make([]LegalSearchResult, len(hits))
for i, hit := range hits {
// Legal-Metadaten nach rag_reingest_spec.md §2: bevorzugt die normalisierten Felder
@@ -131,6 +145,10 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
AuthorityWeight: getInt(hit.Payload, "authority_weight"),
SourceClass: getString(hit.Payload, "source_class"),
Jurisdiction: getString(hit.Payload, "jurisdiction"),
CitationUnit: getString(hit.Payload, "citation_unit"),
ReferencesOut: getStringSlice(hit.Payload, "references_out"),
ReferencesIn: getStringSlice(hit.Payload, "references_in"),
Superseded: getString(hit.Payload, "status") == "superseded",
}
}