feat(ai-sdk): citation-graph assessment + opt-in graph expansion (Phase 2)

Add an `assessment` object to the legal RAG search response: primary norm, connected norms (from the citation graph references_out/in of the primary), cross_regime, human_review_flag, a norm-level winner_margin and a short reasoning string. The margin is computed over DISTINCT norms, so a long article split into several chunks no longer fabricates uncertainty. The per-result schema stays frozen — graph fields are internal (json:"-"). Also wire optional citation-graph expansion (RAG_GRAPH_EXPANSION=true, default off): top hits pull their referenced norms into the candidate pool via the precise edge (e.g. Art. 13 CRA -> Anhang I). Measured to add no rank gain over the existing binding-law augmentation, with +1 Qdrant call per search and reverse-edge fan-out risk, so it ships off-by-default as a recall safety net. The graph EXPLAINS retrieval (assessment), it does not expand it by default. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-23 19:48:01 +02:00
parent 4c99773fa1
commit 989d9f6f91
7 changed files with 539 additions and 3 deletions
@@ -0,0 +1,134 @@
+package ucca
+
+import (
+	"fmt"
+	"strings"
+)
+
+const (
+	assessConnectedCap    = 12   // cap connected norms surfaced in the assessment
+	assessCrossRegimeTopN = 5    // window over which "cross regime" is judged
+	assessReviewMargin    = 0.05 // a tighter winner gap → recommend human review
+)
+
+// Assess builds the auditable explanation layer over a ranked result set:
+// primary norm, the norms it connects to (citation graph), cross-regime, a
+// human-review flag, the winner margin and a short reasoning string. Pure →
+// unit-testable. It EXPLAINS the ranking, it does not change it. Returns nil for
+// an empty result set.
+func Assess(results []LegalSearchResult) *LegalAssessment {
+	if len(results) == 0 {
+		return nil
+	}
+	// Norm-level view: collapse multiple chunks of the same article/annex so the
+	// margin and cross-regime are judged between DISTINCT norms, not near-identical
+	// chunks of one norm (which would make every winner margin ~0).
+	norms := distinctNorms(results)
+	p := norms[0]
+
+	primary := primaryLabel(p)
+	connected := dedupStrings(p.ReferencesOut, p.ReferencesIn, p.CitationUnit)
+	if len(connected) > assessConnectedCap {
+		connected = connected[:assessConnectedCap]
+	}
+
+	window := norms
+	if len(window) > assessCrossRegimeTopN {
+		window = window[:assessCrossRegimeTopN]
+	}
+	regimes := make(map[string]bool)
+	for _, r := range window {
+		if r.RegulationShort != "" {
+			regimes[r.RegulationShort] = true
+		}
+	}
+	crossRegime := len(regimes) > 1
+
+	margin := 0.0
+	if len(norms) > 1 {
+		margin = norms[0].Score - norms[1].Score
+	}
+
+	primaryBinding := p.SourceClass == "binding_law"
+	humanReview := margin < assessReviewMargin || crossRegime || !primaryBinding
+
+	return &LegalAssessment{
+		PrimaryNorm:       primary,
+		PrimaryRegulation: p.RegulationShort,
+		ConnectedNorms:    connected,
+		CrossRegime:       crossRegime,
+		HumanReviewFlag:   humanReview,
+		WinnerMargin:      margin,
+		ScoreReasoning:    assessReasoning(p, margin, crossRegime, primaryBinding),
+	}
+}
+
+func primaryLabel(p LegalSearchResult) string {
+	if p.CitationUnit != "" {
+		return p.CitationUnit
+	}
+	if p.ArticleLabel != "" {
+		return p.ArticleLabel
+	}
+	return strings.TrimSpace(p.RegulationShort + " " + p.Article)
+}
+
+// assessReasoning renders a short, human-readable justification (German).
+func assessReasoning(p LegalSearchResult, margin float64, crossRegime, primaryBinding bool) string {
+	label := primaryLabel(p)
+	parts := make([]string, 0, 4)
+	if primaryBinding {
+		parts = append(parts, fmt.Sprintf("Primärtreffer %s: bindendes Recht (Autorität %d).", label, p.AuthorityWeight))
+	} else {
+		parts = append(parts, fmt.Sprintf("Primärtreffer %s ist keine bindende Norm (Leitlinie/Standard) — Quelle prüfen.", label))
+	}
+	if margin > 0 {
+		parts = append(parts, fmt.Sprintf("Vorsprung %.2f vor #2.", margin))
+	}
+	if margin < assessReviewMargin {
+		parts = append(parts, "Knapper Vorsprung — Alternativtreffer prüfen.")
+	}
+	if crossRegime {
+		parts = append(parts, "Mehrere Regime betroffen — Querbezug prüfen.")
+	}
+	return strings.Join(parts, " ")
+}
+
+// distinctNorms collapses results that share a citation (multiple chunks of the
+// same article/annex) to the first — i.e. highest-ranked — occurrence. Results
+// without any citation identity are each kept, since they cannot be matched.
+func distinctNorms(results []LegalSearchResult) []LegalSearchResult {
+	seen := make(map[string]bool, len(results))
+	out := make([]LegalSearchResult, 0, len(results))
+	for _, r := range results {
+		key := r.CitationUnit
+		if key == "" {
+			key = r.ArticleLabel
+		}
+		if key != "" {
+			if seen[key] {
+				continue
+			}
+			seen[key] = true
+		}
+		out = append(out, r)
+	}
+	return out
+}
+
+// dedupStrings concatenates out+in, drops empties and the excluded value, and
+// returns a stable de-duplicated slice (insertion order preserved).
+func dedupStrings(out, in []string, exclude string) []string {
+	seen := map[string]bool{exclude: true}
+	res := make([]string, 0, len(out)+len(in))
+	for _, list := range [][]string{out, in} {
+		for _, s := range list {
+			if s == "" || seen[s] {
+				continue
+			}
+			seen[s] = true
+			res = append(res, s)
+		}
+	}
+	return res
+}