From 491df4e1b0607e26d206625dd06e8511b5fe26b5 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sat, 28 Feb 2026 09:35:52 +0100 Subject: [PATCH] feat: add Chunk-Browser tab to RAG page - New 'Chunk-Browser' tab for sequential chunk browsing - Qdrant scroll API proxy (scroll + collection-count actions) - Pagination with prev/next through all chunks in a collection - Text search filter with highlighting - Click to expand chunk and see all metadata - 'In Chunks suchen' button now navigates to Chunk-Browser with correct collection Co-Authored-By: Claude Opus 4.6 --- admin-lehrer/app/(admin)/ai/rag/page.tsx | 254 ++++++++++++++++++++- admin-lehrer/app/api/legal-corpus/route.ts | 66 ++++++ 2 files changed, 317 insertions(+), 3 deletions(-) diff --git a/admin-lehrer/app/(admin)/ai/rag/page.tsx b/admin-lehrer/app/(admin)/ai/rag/page.tsx index d783504..29e7c08 100644 --- a/admin-lehrer/app/(admin)/ai/rag/page.tsx +++ b/admin-lehrer/app/(admin)/ai/rag/page.tsx @@ -73,7 +73,7 @@ interface DsfaCorpusStatus { type RegulationCategory = 'regulations' | 'dsfa' | 'nibis' | 'templates' // Tab definitions -type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'data' | 'ingestion' | 'pipeline' +type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'chunks' | 'data' | 'ingestion' | 'pipeline' // Custom document type interface CustomDocument { @@ -1850,6 +1850,18 @@ export default function RAGPage() { const [autoRefresh, setAutoRefresh] = useState(true) const [elapsedTime, setElapsedTime] = useState('') + // Chunk browser state + const [chunkCollection, setChunkCollection] = useState('bp_compliance_gesetze') + const [chunkData, setChunkData] = useState[]>([]) + const [chunkOffset, setChunkOffset] = useState(null) + const [chunkHistory, setChunkHistory] = useState<(string | null)[]>([]) + const [chunkLoading, setChunkLoading] = useState(false) + const [chunkTextSearch, setChunkTextSearch] = useState('') + const [chunkTotalCount, setChunkTotalCount] = useState(0) + const [chunkCurrentPage, setChunkCurrentPage] = useState(0) + const [chunkNextOffset, setChunkNextOffset] = useState(null) + const [expandedChunk, setExpandedChunk] = useState(null) + // DSFA corpus state const [dsfaSources, setDsfaSources] = useState([]) const [dsfaStatus, setDsfaStatus] = useState(null) @@ -2095,6 +2107,69 @@ export default function RAGPage() { return () => clearInterval(interval) }, [pipelineState?.started_at, pipelineState?.status]) + const loadChunks = async (offset: string | null = null, newCollection?: string) => { + const col = newCollection || chunkCollection + setChunkLoading(true) + try { + const params = new URLSearchParams({ + action: 'scroll', + collection: col, + limit: '20', + }) + if (offset) params.append('offset', offset) + if (chunkTextSearch.trim()) params.append('text_search', chunkTextSearch.trim()) + + const res = await fetch(`${API_PROXY}?${params}`) + if (res.ok) { + const data = await res.json() + setChunkData(data.chunks || []) + setChunkNextOffset(data.next_offset || null) + setExpandedChunk(null) + } + } catch (error) { + console.error('Chunk scroll failed:', error) + } finally { + setChunkLoading(false) + } + } + + const loadChunkCount = async (col: string) => { + try { + const res = await fetch(`${API_PROXY}?action=collection-count&collection=${encodeURIComponent(col)}`) + if (res.ok) { + const data = await res.json() + setChunkTotalCount(data.count || 0) + } + } catch { /* ignore */ } + } + + const handleChunkCollectionChange = (col: string) => { + setChunkCollection(col) + setChunkOffset(null) + setChunkHistory([]) + setChunkCurrentPage(0) + loadChunkCount(col) + loadChunks(null, col) + } + + const handleChunkNext = () => { + if (!chunkNextOffset) return + setChunkHistory((prev) => [...prev, chunkOffset]) + setChunkOffset(chunkNextOffset) + setChunkCurrentPage((p) => p + 1) + loadChunks(chunkNextOffset) + } + + const handleChunkPrev = () => { + if (chunkHistory.length === 0) return + const prev = [...chunkHistory] + const prevOffset = prev.pop() ?? null + setChunkHistory(prev) + setChunkOffset(prevOffset) + setChunkCurrentPage((p) => Math.max(0, p - 1)) + loadChunks(prevOffset) + } + const handleSearch = async () => { if (!searchQuery.trim()) return @@ -2180,6 +2255,7 @@ export default function RAGPage() { { id: 'regulations' as TabId, name: 'Regulierungen', icon: 'πŸ“œ' }, { id: 'map' as TabId, name: 'Landkarte', icon: 'πŸ—ΊοΈ' }, { id: 'search' as TabId, name: 'Suche', icon: 'πŸ”' }, + { id: 'chunks' as TabId, name: 'Chunk-Browser', icon: '🧩' }, { id: 'data' as TabId, name: 'Daten', icon: 'πŸ“' }, { id: 'ingestion' as TabId, name: 'Ingestion', icon: 'βš™οΈ' }, { id: 'pipeline' as TabId, name: 'Pipeline', icon: 'πŸ”„' }, @@ -2535,8 +2611,11 @@ export default function RAGPage() { + + +
+ {chunkTotalCount > 0 && {chunkTotalCount.toLocaleString()} Chunks total} +
+ + + + {/* Pagination */} + {chunkData.length > 0 && ( +
+ + + Seite {chunkCurrentPage + 1} β€” {chunkData.length} Chunks angezeigt + + +
+ )} + + {/* Chunk List */} + {chunkLoading ? ( +
Chunks werden geladen...
+ ) : chunkData.length === 0 ? ( +
+ Collection waehlen und "Laden" klicken um Chunks anzuzeigen. +
+ ) : ( +
+ {chunkData.map((chunk, i) => { + const text = String(chunk.text || chunk.content || chunk.chunk_text || '') + const source = String(chunk.source_name || chunk.regulation_code || chunk.guideline_name || chunk.regulation_short || '') + const isExpanded = expandedChunk === i + const highlightTerm = chunkTextSearch.trim().toLowerCase() + + const renderHighlighted = (str: string) => { + if (!highlightTerm) return str + const idx = str.toLowerCase().indexOf(highlightTerm) + if (idx === -1) return str + return ( + <> + {str.slice(0, idx)} + {str.slice(idx, idx + highlightTerm.length)} + {str.slice(idx + highlightTerm.length)} + + ) + } + + return ( +
setExpandedChunk(isExpanded ? null : i)} + > +
+
+ #{chunkCurrentPage * 20 + i + 1} + {source && ( + {source} + )} + {chunk.article && ( + Art. {String(chunk.article)} + )} + {chunk.language && ( + {String(chunk.language).toUpperCase()} + )} +
+

+ {renderHighlighted(text)} +

+
+ {isExpanded && ( +
+
+ {Object.entries(chunk).filter(([k]) => !['text', 'content', 'chunk_text', 'id'].includes(k)).map(([k, v]) => ( +
+ {k}:{' '} + {String(v)} +
+ ))} +
+
+ )} +
+ ) + })} +
+ )} + + {/* Bottom Pagination */} + {chunkData.length > 0 && ( +
+ + + Seite {chunkCurrentPage + 1} + + +
+ )} + + )} + {activeTab === 'data' && (
{/* Upload Document */} diff --git a/admin-lehrer/app/api/legal-corpus/route.ts b/admin-lehrer/app/api/legal-corpus/route.ts index 4d0a2c8..98b45cd 100644 --- a/admin-lehrer/app/api/legal-corpus/route.ts +++ b/admin-lehrer/app/api/legal-corpus/route.ts @@ -66,6 +66,72 @@ export async function GET(request: NextRequest) { url += `/traceability?chunk_id=${encodeURIComponent(chunkId || '')}®ulation=${encodeURIComponent(regulation || '')}` break } + case 'scroll': { + const collection = searchParams.get('collection') || 'bp_compliance_gesetze' + const limit = parseInt(searchParams.get('limit') || '20', 10) + const offsetParam = searchParams.get('offset') + const filterKey = searchParams.get('filter_key') + const filterValue = searchParams.get('filter_value') + const textSearch = searchParams.get('text_search') + + const scrollBody: Record = { + limit: Math.min(limit, 100), + with_payload: true, + with_vector: false, + } + if (offsetParam) { + scrollBody.offset = offsetParam + } + if (filterKey && filterValue) { + scrollBody.filter = { + must: [{ key: filterKey, match: { value: filterValue } }], + } + } + + const scrollRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(collection)}/points/scroll`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(scrollBody), + cache: 'no-store', + }) + if (!scrollRes.ok) { + return NextResponse.json({ error: 'Qdrant scroll failed' }, { status: scrollRes.status }) + } + const scrollData = await scrollRes.json() + const points = (scrollData.result?.points || []).map((p: { id: string; payload?: Record }) => ({ + id: p.id, + ...p.payload, + })) + + // Client-side text search filter + let filtered = points + if (textSearch && textSearch.trim()) { + const term = textSearch.toLowerCase() + filtered = points.filter((p: Record) => { + const text = String(p.text || p.content || p.chunk_text || '') + return text.toLowerCase().includes(term) + }) + } + + return NextResponse.json({ + chunks: filtered, + next_offset: scrollData.result?.next_page_offset || null, + total_in_page: points.length, + }) + } + case 'collection-count': { + const col = searchParams.get('collection') || 'bp_compliance_gesetze' + const countRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}`, { + cache: 'no-store', + }) + if (!countRes.ok) { + return NextResponse.json({ count: 0 }) + } + const countData = await countRes.json() + return NextResponse.json({ + count: countData.result?.points_count || 0, + }) + } default: return NextResponse.json({ error: 'Unknown action' }, { status: 400 }) }