From a606000a20b5b09cc6596670663e63cf590bd9cc Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 1 Jul 2026 09:30:42 +0200 Subject: [PATCH] =?UTF-8?q?feat(ai-sdk):=20EvidenceType-Schicht=20?= =?UTF-8?q?=E2=80=94=20autoritative=20Fu=C3=9Fnoten/Tabellen/Figuren=20sur?= =?UTF-8?q?facen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Router-Schicht Intent→KnowledgeSpace→EvidenceType→Collection→Merge→Authority (User- Entscheidung A generalisiert). Neuer EvidenceType{TEXT,FOOTNOTE,TABLE,FIGURE} + classifyEvidence (aus is_footnote/is_table/is_figure-Payload). RetrieveEvidence() zieht die autoritative typisierte Evidence GEZIELT aus der KB-Slice (top-20, in-scope) statt sie im Breit-Basis-Text-Merge zu verlieren; /retrieve liefert footnotes[]/tables[]/ figures[]. Kein perColl-Blindanstieg. Dieselbe Infra trägt C8 (FIGURE) ohne Router-Umbau. Co-Authored-By: Claude Opus 4.7 --- .../internal/api/handlers/rag_handlers.go | 73 ++++++++++++++----- .../internal/ucca/evidence_type.go | 68 +++++++++++++++++ .../internal/ucca/legal_rag_types.go | 9 ++- .../internal/ucca/multi_regulation.go | 2 + 4 files changed, 132 insertions(+), 20 deletions(-) create mode 100644 ai-compliance-sdk/internal/ucca/evidence_type.go diff --git a/ai-compliance-sdk/internal/api/handlers/rag_handlers.go b/ai-compliance-sdk/internal/api/handlers/rag_handlers.go index e8613b7a..1b7d6519 100644 --- a/ai-compliance-sdk/internal/api/handlers/rag_handlers.go +++ b/ai-compliance-sdk/internal/api/handlers/rag_handlers.go @@ -111,15 +111,27 @@ func (h *RAGHandlers) Retrieve(c *gin.Context) { return } - // C-FN: Fußnoten-Hits als separates footnotes[] herausziehen (Frontend RawFootnote-Shape), - // damit der Advisor-Evidence-Workspace sie im dedizierten Fußnoten-Bereich rendert. Die Hits - // bleiben zusätzlich in results[] (LLM-Kontext). figures[] = C8-Platzhalter (leer bis C8). - footnotes := make([]gin.H, 0) - for _, r := range results { - if !r.IsFootnote { - continue - } - footnotes = append(footnotes, gin.H{ + // Evidence-Type-Schicht: die autoritative typisierte Evidence (Fußnoten/Tabellen/Figuren) aus + // dem KB-Wissensraum SEPARAT surfacen, statt sie im Breit-Basis-Text-Merge zu verlieren. + // results[] bleibt der Text-Kontext fürs LLM + die Quellen-Liste. + ev := h.ragClient.RetrieveEvidence(c.Request.Context(), req.Query) + + c.JSON(http.StatusOK, gin.H{ + "query": req.Query, + "results": results, + "count": len(results), + "assessment": ucca.Assess(results), + "footnotes": footnotesFromEvidence(ev[ucca.EvidenceFootnote]), + "tables": tablesFromEvidence(ev[ucca.EvidenceTable]), + "figures": figuresFromEvidence(ev[ucca.EvidenceFigure]), + }) +} + +// footnotesFromEvidence maps FOOTNOTE evidence to the Evidence-Workspace RawFootnote shape. +func footnotesFromEvidence(rs []ucca.LegalSearchResult) []gin.H { + out := make([]gin.H, 0, len(rs)) + for _, r := range rs { + out = append(out, gin.H{ "id": r.CitationUnit, "ref": r.CitationUnit, "number": r.FootnoteLabel, @@ -130,15 +142,42 @@ func (h *RAGHandlers) Retrieve(c *gin.Context) { "text": r.FootnoteVerbatim, }) } + return out +} - c.JSON(http.StatusOK, gin.H{ - "query": req.Query, - "results": results, - "count": len(results), - "assessment": ucca.Assess(results), - "footnotes": footnotes, - "figures": []gin.H{}, - }) +// tablesFromEvidence maps TABLE evidence (C6/C9). Key is present so the same Evidence-Type path +// carries tables the moment the UI adds a table section. +func tablesFromEvidence(rs []ucca.LegalSearchResult) []gin.H { + out := make([]gin.H, 0, len(rs)) + for _, r := range rs { + out = append(out, gin.H{ + "id": r.CitationUnit, + "caption": r.ArticleLabel, + "regulation_code": r.RegulationCode, + "regulation_short": r.RegulationShort, + "regulation_name": r.RegulationName, + "section": r.RefCitationUnit, + "text": r.Text, + }) + } + return out +} + +// figuresFromEvidence maps FIGURE evidence (C8). Empty until C8 populates figure units; image_url/ +// caption/vision_summary get added here when C8 lands — same path, no router change. +func figuresFromEvidence(rs []ucca.LegalSearchResult) []gin.H { + out := make([]gin.H, 0, len(rs)) + for _, r := range rs { + out = append(out, gin.H{ + "figure_id": r.CitationUnit, + "caption": r.ArticleLabel, + "regulation_code": r.RegulationCode, + "regulation_short": r.RegulationShort, + "regulation_name": r.RegulationName, + "section": r.RefCitationUnit, + }) + } + return out } // ListRegulations returns the list of available regulations in the corpus. diff --git a/ai-compliance-sdk/internal/ucca/evidence_type.go b/ai-compliance-sdk/internal/ucca/evidence_type.go new file mode 100644 index 00000000..77f797d7 --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/evidence_type.go @@ -0,0 +1,68 @@ +package ucca + +import "context" + +// EvidenceType classifies a retrieved unit by WHAT KIND of evidence it is, independent of its +// collection. Footnotes/tables/figures are Evidence Types, not collections. The Authority Router +// surfaces non-text evidence from the authoritative knowledge space (the KB slice) SEPARATELY from +// the merged text top-K, so fine-grained evidence isn't outranked by broad-base text. +// +// The layer this introduces: Intent -> Knowledge Space -> EvidenceType -> Collection -> Merge -> +// Authority. Today FOOTNOTE is populated; FIGURE arrives with C8 and TABLE is already present from +// C6/C9 — no router rebuild needed, the same path carries every new evidence type. +type EvidenceType string + +const ( + EvidenceText EvidenceType = "text" + EvidenceFootnote EvidenceType = "footnote" + EvidenceTable EvidenceType = "table" + EvidenceFigure EvidenceType = "figure" +) + +// classifyEvidence derives the EvidenceType from a result's payload markers. Precedence +// footnote > figure > table > text (a unit carries at most one is_* marker in practice). +func classifyEvidence(r LegalSearchResult) EvidenceType { + switch { + case r.IsFootnote: + return EvidenceFootnote + case r.IsFigure: + return EvidenceFigure + case r.IsTable: + return EvidenceTable + default: + return EvidenceText + } +} + +// evidenceRetrievalTopK is the budget for the authoritative-KB evidence pass. Deliberately targeted +// (the authoritative slice within the recognized knowledge space), NOT a blanket top-K increase of +// the merged result set — the successes came from BETTER-targeted evidence, not MORE evidence. +const evidenceRetrievalTopK = 20 + +// maxEvidencePerType caps each surfaced evidence type. +const maxEvidencePerType = 6 + +// RetrieveEvidence returns the authoritative typed evidence (footnotes/tables/figures) for an +// in-scope query, pulled from the KB slice and grouped by EvidenceType. This is the "Evidence Type" +// router layer (Option A): when the query is in the KB knowledge space, the authoritative evidence +// within that space is surfaced separately so it isn't lost in the broad-base text merge. Returns an +// empty map when out of scope or KB routing is disabled. Text evidence is NOT returned here — it +// flows through the normal Retrieve() merge (the LLM context + the sources list). +func (c *LegalRAGClient) RetrieveEvidence(ctx context.Context, query string) map[EvidenceType][]LegalSearchResult { + ev := map[EvidenceType][]LegalSearchResult{} + if !c.kbScopeRoutingEnabled || c.kbSliceCollection == "" || !inKBScope(query) { + return ev + } + hits, err := c.searchInternal(ctx, c.kbSliceCollection, query, nil, evidenceRetrievalTopK) + if err != nil { + return ev + } + for _, h := range hits { + t := classifyEvidence(h) + if t == EvidenceText || len(ev[t]) >= maxEvidencePerType { + continue + } + ev[t] = append(ev[t], h) + } + return ev +} diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_types.go b/ai-compliance-sdk/internal/ucca/legal_rag_types.go index 16bd8f2a..8a4a18fb 100644 --- a/ai-compliance-sdk/internal/ucca/legal_rag_types.go +++ b/ai-compliance-sdk/internal/ucca/legal_rag_types.go @@ -38,13 +38,16 @@ type LegalSearchResult struct { // die fuer Default-Fragen demoted wird (nicht versteckt; fuer Historie auffindbar). Superseded bool `json:"-"` - // C-FN Fußnoten-Evidence — intern (json:"-", kein Pro-Result-Contract-Change), - // aus dem Qdrant-Payload befuellt; der /retrieve-Handler baut daraus das Top-Level - // footnotes[] fuer den Advisor-Evidence-Workspace (Frontend RawFootnote). + // Evidence-Type-Marker — intern (json:"-", kein Pro-Result-Contract-Change), aus dem + // Qdrant-Payload befuellt. classifyEvidence() leitet daraus den EvidenceType ab; der + // Router surfacet nicht-Text-Evidence (Fußnote/Tabelle/Figur) getrennt vom Text-Merge, + // damit feingranulare Evidence nicht von Breit-Basis-Text ueberrankt wird. IsFootnote bool `json:"-"` FootnoteLabel string `json:"-"` FootnoteVerbatim string `json:"-"` RefCitationUnit string `json:"-"` + IsTable bool `json:"-"` // C6/C9: is_table (liniiert + borderless) + IsFigure bool `json:"-"` // C8: is_figure (noch nicht befuellt bis C8) } // LegalAssessment is the auditable explanation layer over a ranked result set: diff --git a/ai-compliance-sdk/internal/ucca/multi_regulation.go b/ai-compliance-sdk/internal/ucca/multi_regulation.go index df9d7cc7..0d5ae068 100644 --- a/ai-compliance-sdk/internal/ucca/multi_regulation.go +++ b/ai-compliance-sdk/internal/ucca/multi_regulation.go @@ -200,6 +200,8 @@ func hitsToResults(hits []qdrantSearchHit) []LegalSearchResult { FootnoteLabel: getString(hit.Payload, "footnote_label"), FootnoteVerbatim: getString(hit.Payload, "footnote_verbatim"), RefCitationUnit: getString(hit.Payload, "ref_citation_unit"), + IsTable: getBool(hit.Payload, "is_table"), + IsFigure: getBool(hit.Payload, "is_figure"), } } return results