Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a606000a20 | |||
| 6f0c1cf30d | |||
| f0120b237e |
@@ -111,14 +111,75 @@ func (h *RAGHandlers) Retrieve(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Evidence-Type-Schicht: die autoritative typisierte Evidence (Fußnoten/Tabellen/Figuren) aus
|
||||||
|
// dem KB-Wissensraum SEPARAT surfacen, statt sie im Breit-Basis-Text-Merge zu verlieren.
|
||||||
|
// results[] bleibt der Text-Kontext fürs LLM + die Quellen-Liste.
|
||||||
|
ev := h.ragClient.RetrieveEvidence(c.Request.Context(), req.Query)
|
||||||
|
|
||||||
c.JSON(http.StatusOK, gin.H{
|
c.JSON(http.StatusOK, gin.H{
|
||||||
"query": req.Query,
|
"query": req.Query,
|
||||||
"results": results,
|
"results": results,
|
||||||
"count": len(results),
|
"count": len(results),
|
||||||
"assessment": ucca.Assess(results),
|
"assessment": ucca.Assess(results),
|
||||||
|
"footnotes": footnotesFromEvidence(ev[ucca.EvidenceFootnote]),
|
||||||
|
"tables": tablesFromEvidence(ev[ucca.EvidenceTable]),
|
||||||
|
"figures": figuresFromEvidence(ev[ucca.EvidenceFigure]),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// footnotesFromEvidence maps FOOTNOTE evidence to the Evidence-Workspace RawFootnote shape.
|
||||||
|
func footnotesFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
|
||||||
|
out := make([]gin.H, 0, len(rs))
|
||||||
|
for _, r := range rs {
|
||||||
|
out = append(out, gin.H{
|
||||||
|
"id": r.CitationUnit,
|
||||||
|
"ref": r.CitationUnit,
|
||||||
|
"number": r.FootnoteLabel,
|
||||||
|
"regulation_code": r.RegulationCode,
|
||||||
|
"regulation_short": r.RegulationShort,
|
||||||
|
"regulation_name": r.RegulationName,
|
||||||
|
"section": r.RefCitationUnit,
|
||||||
|
"text": r.FootnoteVerbatim,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// tablesFromEvidence maps TABLE evidence (C6/C9). Key is present so the same Evidence-Type path
|
||||||
|
// carries tables the moment the UI adds a table section.
|
||||||
|
func tablesFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
|
||||||
|
out := make([]gin.H, 0, len(rs))
|
||||||
|
for _, r := range rs {
|
||||||
|
out = append(out, gin.H{
|
||||||
|
"id": r.CitationUnit,
|
||||||
|
"caption": r.ArticleLabel,
|
||||||
|
"regulation_code": r.RegulationCode,
|
||||||
|
"regulation_short": r.RegulationShort,
|
||||||
|
"regulation_name": r.RegulationName,
|
||||||
|
"section": r.RefCitationUnit,
|
||||||
|
"text": r.Text,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// figuresFromEvidence maps FIGURE evidence (C8). Empty until C8 populates figure units; image_url/
|
||||||
|
// caption/vision_summary get added here when C8 lands — same path, no router change.
|
||||||
|
func figuresFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
|
||||||
|
out := make([]gin.H, 0, len(rs))
|
||||||
|
for _, r := range rs {
|
||||||
|
out = append(out, gin.H{
|
||||||
|
"figure_id": r.CitationUnit,
|
||||||
|
"caption": r.ArticleLabel,
|
||||||
|
"regulation_code": r.RegulationCode,
|
||||||
|
"regulation_short": r.RegulationShort,
|
||||||
|
"regulation_name": r.RegulationName,
|
||||||
|
"section": r.RefCitationUnit,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
// ListRegulations returns the list of available regulations in the corpus.
|
// ListRegulations returns the list of available regulations in the corpus.
|
||||||
// GET /sdk/v1/rag/regulations
|
// GET /sdk/v1/rag/regulations
|
||||||
func (h *RAGHandlers) ListRegulations(c *gin.Context) {
|
func (h *RAGHandlers) ListRegulations(c *gin.Context) {
|
||||||
|
|||||||
@@ -0,0 +1,68 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import "context"
|
||||||
|
|
||||||
|
// EvidenceType classifies a retrieved unit by WHAT KIND of evidence it is, independent of its
|
||||||
|
// collection. Footnotes/tables/figures are Evidence Types, not collections. The Authority Router
|
||||||
|
// surfaces non-text evidence from the authoritative knowledge space (the KB slice) SEPARATELY from
|
||||||
|
// the merged text top-K, so fine-grained evidence isn't outranked by broad-base text.
|
||||||
|
//
|
||||||
|
// The layer this introduces: Intent -> Knowledge Space -> EvidenceType -> Collection -> Merge ->
|
||||||
|
// Authority. Today FOOTNOTE is populated; FIGURE arrives with C8 and TABLE is already present from
|
||||||
|
// C6/C9 — no router rebuild needed, the same path carries every new evidence type.
|
||||||
|
type EvidenceType string
|
||||||
|
|
||||||
|
const (
|
||||||
|
EvidenceText EvidenceType = "text"
|
||||||
|
EvidenceFootnote EvidenceType = "footnote"
|
||||||
|
EvidenceTable EvidenceType = "table"
|
||||||
|
EvidenceFigure EvidenceType = "figure"
|
||||||
|
)
|
||||||
|
|
||||||
|
// classifyEvidence derives the EvidenceType from a result's payload markers. Precedence
|
||||||
|
// footnote > figure > table > text (a unit carries at most one is_* marker in practice).
|
||||||
|
func classifyEvidence(r LegalSearchResult) EvidenceType {
|
||||||
|
switch {
|
||||||
|
case r.IsFootnote:
|
||||||
|
return EvidenceFootnote
|
||||||
|
case r.IsFigure:
|
||||||
|
return EvidenceFigure
|
||||||
|
case r.IsTable:
|
||||||
|
return EvidenceTable
|
||||||
|
default:
|
||||||
|
return EvidenceText
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// evidenceRetrievalTopK is the budget for the authoritative-KB evidence pass. Deliberately targeted
|
||||||
|
// (the authoritative slice within the recognized knowledge space), NOT a blanket top-K increase of
|
||||||
|
// the merged result set — the successes came from BETTER-targeted evidence, not MORE evidence.
|
||||||
|
const evidenceRetrievalTopK = 20
|
||||||
|
|
||||||
|
// maxEvidencePerType caps each surfaced evidence type.
|
||||||
|
const maxEvidencePerType = 6
|
||||||
|
|
||||||
|
// RetrieveEvidence returns the authoritative typed evidence (footnotes/tables/figures) for an
|
||||||
|
// in-scope query, pulled from the KB slice and grouped by EvidenceType. This is the "Evidence Type"
|
||||||
|
// router layer (Option A): when the query is in the KB knowledge space, the authoritative evidence
|
||||||
|
// within that space is surfaced separately so it isn't lost in the broad-base text merge. Returns an
|
||||||
|
// empty map when out of scope or KB routing is disabled. Text evidence is NOT returned here — it
|
||||||
|
// flows through the normal Retrieve() merge (the LLM context + the sources list).
|
||||||
|
func (c *LegalRAGClient) RetrieveEvidence(ctx context.Context, query string) map[EvidenceType][]LegalSearchResult {
|
||||||
|
ev := map[EvidenceType][]LegalSearchResult{}
|
||||||
|
if !c.kbScopeRoutingEnabled || c.kbSliceCollection == "" || !inKBScope(query) {
|
||||||
|
return ev
|
||||||
|
}
|
||||||
|
hits, err := c.searchInternal(ctx, c.kbSliceCollection, query, nil, evidenceRetrievalTopK)
|
||||||
|
if err != nil {
|
||||||
|
return ev
|
||||||
|
}
|
||||||
|
for _, h := range hits {
|
||||||
|
t := classifyEvidence(h)
|
||||||
|
if t == EvidenceText || len(ev[t]) >= maxEvidencePerType {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ev[t] = append(ev[t], h)
|
||||||
|
}
|
||||||
|
return ev
|
||||||
|
}
|
||||||
@@ -37,6 +37,17 @@ type LegalSearchResult struct {
|
|||||||
// Supersede-Status (status="superseded", use_for_primary=false) — Alt-Quelle,
|
// Supersede-Status (status="superseded", use_for_primary=false) — Alt-Quelle,
|
||||||
// die fuer Default-Fragen demoted wird (nicht versteckt; fuer Historie auffindbar).
|
// die fuer Default-Fragen demoted wird (nicht versteckt; fuer Historie auffindbar).
|
||||||
Superseded bool `json:"-"`
|
Superseded bool `json:"-"`
|
||||||
|
|
||||||
|
// Evidence-Type-Marker — intern (json:"-", kein Pro-Result-Contract-Change), aus dem
|
||||||
|
// Qdrant-Payload befuellt. classifyEvidence() leitet daraus den EvidenceType ab; der
|
||||||
|
// Router surfacet nicht-Text-Evidence (Fußnote/Tabelle/Figur) getrennt vom Text-Merge,
|
||||||
|
// damit feingranulare Evidence nicht von Breit-Basis-Text ueberrankt wird.
|
||||||
|
IsFootnote bool `json:"-"`
|
||||||
|
FootnoteLabel string `json:"-"`
|
||||||
|
FootnoteVerbatim string `json:"-"`
|
||||||
|
RefCitationUnit string `json:"-"`
|
||||||
|
IsTable bool `json:"-"` // C6/C9: is_table (liniiert + borderless)
|
||||||
|
IsFigure bool `json:"-"` // C8: is_figure (noch nicht befuellt bis C8)
|
||||||
}
|
}
|
||||||
|
|
||||||
// LegalAssessment is the auditable explanation layer over a ranked result set:
|
// LegalAssessment is the auditable explanation layer over a ranked result set:
|
||||||
|
|||||||
@@ -195,6 +195,13 @@ func hitsToResults(hits []qdrantSearchHit) []LegalSearchResult {
|
|||||||
ReferencesOut: getStringSlice(hit.Payload, "references_out"),
|
ReferencesOut: getStringSlice(hit.Payload, "references_out"),
|
||||||
ReferencesIn: getStringSlice(hit.Payload, "references_in"),
|
ReferencesIn: getStringSlice(hit.Payload, "references_in"),
|
||||||
Superseded: getString(hit.Payload, "status") == "superseded",
|
Superseded: getString(hit.Payload, "status") == "superseded",
|
||||||
|
|
||||||
|
IsFootnote: getBool(hit.Payload, "is_footnote"),
|
||||||
|
FootnoteLabel: getString(hit.Payload, "footnote_label"),
|
||||||
|
FootnoteVerbatim: getString(hit.Payload, "footnote_verbatim"),
|
||||||
|
RefCitationUnit: getString(hit.Payload, "ref_citation_unit"),
|
||||||
|
IsTable: getBool(hit.Payload, "is_table"),
|
||||||
|
IsFigure: getBool(hit.Payload, "is_figure"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results
|
return results
|
||||||
|
|||||||
Reference in New Issue
Block a user