feat(ai-sdk): EvidenceType-Schicht — autoritative Fußnoten/Tabellen/Figuren surfacen

Router-Schicht Intent→KnowledgeSpace→EvidenceType→Collection→Merge→Authority (User-
Entscheidung A generalisiert). Neuer EvidenceType{TEXT,FOOTNOTE,TABLE,FIGURE} +
classifyEvidence (aus is_footnote/is_table/is_figure-Payload). RetrieveEvidence() zieht
die autoritative typisierte Evidence GEZIELT aus der KB-Slice (top-20, in-scope) statt
sie im Breit-Basis-Text-Merge zu verlieren; /retrieve liefert footnotes[]/tables[]/
figures[]. Kein perColl-Blindanstieg. Dieselbe Infra trägt C8 (FIGURE) ohne Router-Umbau.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-07-01 09:30:42 +02:00
parent 6f0c1cf30d
commit a606000a20
4 changed files with 132 additions and 20 deletions
@@ -0,0 +1,68 @@
package ucca
import "context"
// EvidenceType classifies a retrieved unit by WHAT KIND of evidence it is, independent of its
// collection. Footnotes/tables/figures are Evidence Types, not collections. The Authority Router
// surfaces non-text evidence from the authoritative knowledge space (the KB slice) SEPARATELY from
// the merged text top-K, so fine-grained evidence isn't outranked by broad-base text.
//
// The layer this introduces: Intent -> Knowledge Space -> EvidenceType -> Collection -> Merge ->
// Authority. Today FOOTNOTE is populated; FIGURE arrives with C8 and TABLE is already present from
// C6/C9 — no router rebuild needed, the same path carries every new evidence type.
type EvidenceType string
const (
EvidenceText EvidenceType = "text"
EvidenceFootnote EvidenceType = "footnote"
EvidenceTable EvidenceType = "table"
EvidenceFigure EvidenceType = "figure"
)
// classifyEvidence derives the EvidenceType from a result's payload markers. Precedence
// footnote > figure > table > text (a unit carries at most one is_* marker in practice).
func classifyEvidence(r LegalSearchResult) EvidenceType {
switch {
case r.IsFootnote:
return EvidenceFootnote
case r.IsFigure:
return EvidenceFigure
case r.IsTable:
return EvidenceTable
default:
return EvidenceText
}
}
// evidenceRetrievalTopK is the budget for the authoritative-KB evidence pass. Deliberately targeted
// (the authoritative slice within the recognized knowledge space), NOT a blanket top-K increase of
// the merged result set — the successes came from BETTER-targeted evidence, not MORE evidence.
const evidenceRetrievalTopK = 20
// maxEvidencePerType caps each surfaced evidence type.
const maxEvidencePerType = 6
// RetrieveEvidence returns the authoritative typed evidence (footnotes/tables/figures) for an
// in-scope query, pulled from the KB slice and grouped by EvidenceType. This is the "Evidence Type"
// router layer (Option A): when the query is in the KB knowledge space, the authoritative evidence
// within that space is surfaced separately so it isn't lost in the broad-base text merge. Returns an
// empty map when out of scope or KB routing is disabled. Text evidence is NOT returned here — it
// flows through the normal Retrieve() merge (the LLM context + the sources list).
func (c *LegalRAGClient) RetrieveEvidence(ctx context.Context, query string) map[EvidenceType][]LegalSearchResult {
ev := map[EvidenceType][]LegalSearchResult{}
if !c.kbScopeRoutingEnabled || c.kbSliceCollection == "" || !inKBScope(query) {
return ev
}
hits, err := c.searchInternal(ctx, c.kbSliceCollection, query, nil, evidenceRetrievalTopK)
if err != nil {
return ev
}
for _, h := range hits {
t := classifyEvidence(h)
if t == EvidenceText || len(ev[t]) >= maxEvidencePerType {
continue
}
ev[t] = append(ev[t], h)
}
return ev
}
@@ -38,13 +38,16 @@ type LegalSearchResult struct {
// die fuer Default-Fragen demoted wird (nicht versteckt; fuer Historie auffindbar).
Superseded bool `json:"-"`
// C-FN Fußnoten-Evidence — intern (json:"-", kein Pro-Result-Contract-Change),
// aus dem Qdrant-Payload befuellt; der /retrieve-Handler baut daraus das Top-Level
// footnotes[] fuer den Advisor-Evidence-Workspace (Frontend RawFootnote).
// Evidence-Type-Marker — intern (json:"-", kein Pro-Result-Contract-Change), aus dem
// Qdrant-Payload befuellt. classifyEvidence() leitet daraus den EvidenceType ab; der
// Router surfacet nicht-Text-Evidence (Fußnote/Tabelle/Figur) getrennt vom Text-Merge,
// damit feingranulare Evidence nicht von Breit-Basis-Text ueberrankt wird.
IsFootnote bool `json:"-"`
FootnoteLabel string `json:"-"`
FootnoteVerbatim string `json:"-"`
RefCitationUnit string `json:"-"`
IsTable bool `json:"-"` // C6/C9: is_table (liniiert + borderless)
IsFigure bool `json:"-"` // C8: is_figure (noch nicht befuellt bis C8)
}
// LegalAssessment is the auditable explanation layer over a ranked result set:
@@ -200,6 +200,8 @@ func hitsToResults(hits []qdrantSearchHit) []LegalSearchResult {
FootnoteLabel: getString(hit.Payload, "footnote_label"),
FootnoteVerbatim: getString(hit.Payload, "footnote_verbatim"),
RefCitationUnit: getString(hit.Payload, "ref_citation_unit"),
IsTable: getBool(hit.Payload, "is_table"),
IsFigure: getBool(hit.Payload, "is_figure"),
}
}
return results