feat: add Chunk-Browser tab to RAG page
- New 'Chunk-Browser' tab for sequential chunk browsing - Qdrant scroll API proxy (scroll + collection-count actions) - Pagination with prev/next through all chunks in a collection - Text search filter with highlighting - Click to expand chunk and see all metadata - 'In Chunks suchen' button now navigates to Chunk-Browser with correct collection Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -73,7 +73,7 @@ interface DsfaCorpusStatus {
|
|||||||
type RegulationCategory = 'regulations' | 'dsfa' | 'nibis' | 'templates'
|
type RegulationCategory = 'regulations' | 'dsfa' | 'nibis' | 'templates'
|
||||||
|
|
||||||
// Tab definitions
|
// Tab definitions
|
||||||
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'data' | 'ingestion' | 'pipeline'
|
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'chunks' | 'data' | 'ingestion' | 'pipeline'
|
||||||
|
|
||||||
// Custom document type
|
// Custom document type
|
||||||
interface CustomDocument {
|
interface CustomDocument {
|
||||||
@@ -1850,6 +1850,18 @@ export default function RAGPage() {
|
|||||||
const [autoRefresh, setAutoRefresh] = useState(true)
|
const [autoRefresh, setAutoRefresh] = useState(true)
|
||||||
const [elapsedTime, setElapsedTime] = useState<string>('')
|
const [elapsedTime, setElapsedTime] = useState<string>('')
|
||||||
|
|
||||||
|
// Chunk browser state
|
||||||
|
const [chunkCollection, setChunkCollection] = useState('bp_compliance_gesetze')
|
||||||
|
const [chunkData, setChunkData] = useState<Record<string, unknown>[]>([])
|
||||||
|
const [chunkOffset, setChunkOffset] = useState<string | null>(null)
|
||||||
|
const [chunkHistory, setChunkHistory] = useState<(string | null)[]>([])
|
||||||
|
const [chunkLoading, setChunkLoading] = useState(false)
|
||||||
|
const [chunkTextSearch, setChunkTextSearch] = useState('')
|
||||||
|
const [chunkTotalCount, setChunkTotalCount] = useState(0)
|
||||||
|
const [chunkCurrentPage, setChunkCurrentPage] = useState(0)
|
||||||
|
const [chunkNextOffset, setChunkNextOffset] = useState<string | null>(null)
|
||||||
|
const [expandedChunk, setExpandedChunk] = useState<number | null>(null)
|
||||||
|
|
||||||
// DSFA corpus state
|
// DSFA corpus state
|
||||||
const [dsfaSources, setDsfaSources] = useState<DsfaSource[]>([])
|
const [dsfaSources, setDsfaSources] = useState<DsfaSource[]>([])
|
||||||
const [dsfaStatus, setDsfaStatus] = useState<DsfaCorpusStatus | null>(null)
|
const [dsfaStatus, setDsfaStatus] = useState<DsfaCorpusStatus | null>(null)
|
||||||
@@ -2095,6 +2107,69 @@ export default function RAGPage() {
|
|||||||
return () => clearInterval(interval)
|
return () => clearInterval(interval)
|
||||||
}, [pipelineState?.started_at, pipelineState?.status])
|
}, [pipelineState?.started_at, pipelineState?.status])
|
||||||
|
|
||||||
|
const loadChunks = async (offset: string | null = null, newCollection?: string) => {
|
||||||
|
const col = newCollection || chunkCollection
|
||||||
|
setChunkLoading(true)
|
||||||
|
try {
|
||||||
|
const params = new URLSearchParams({
|
||||||
|
action: 'scroll',
|
||||||
|
collection: col,
|
||||||
|
limit: '20',
|
||||||
|
})
|
||||||
|
if (offset) params.append('offset', offset)
|
||||||
|
if (chunkTextSearch.trim()) params.append('text_search', chunkTextSearch.trim())
|
||||||
|
|
||||||
|
const res = await fetch(`${API_PROXY}?${params}`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
setChunkData(data.chunks || [])
|
||||||
|
setChunkNextOffset(data.next_offset || null)
|
||||||
|
setExpandedChunk(null)
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Chunk scroll failed:', error)
|
||||||
|
} finally {
|
||||||
|
setChunkLoading(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const loadChunkCount = async (col: string) => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${API_PROXY}?action=collection-count&collection=${encodeURIComponent(col)}`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
setChunkTotalCount(data.count || 0)
|
||||||
|
}
|
||||||
|
} catch { /* ignore */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleChunkCollectionChange = (col: string) => {
|
||||||
|
setChunkCollection(col)
|
||||||
|
setChunkOffset(null)
|
||||||
|
setChunkHistory([])
|
||||||
|
setChunkCurrentPage(0)
|
||||||
|
loadChunkCount(col)
|
||||||
|
loadChunks(null, col)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleChunkNext = () => {
|
||||||
|
if (!chunkNextOffset) return
|
||||||
|
setChunkHistory((prev) => [...prev, chunkOffset])
|
||||||
|
setChunkOffset(chunkNextOffset)
|
||||||
|
setChunkCurrentPage((p) => p + 1)
|
||||||
|
loadChunks(chunkNextOffset)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleChunkPrev = () => {
|
||||||
|
if (chunkHistory.length === 0) return
|
||||||
|
const prev = [...chunkHistory]
|
||||||
|
const prevOffset = prev.pop() ?? null
|
||||||
|
setChunkHistory(prev)
|
||||||
|
setChunkOffset(prevOffset)
|
||||||
|
setChunkCurrentPage((p) => Math.max(0, p - 1))
|
||||||
|
loadChunks(prevOffset)
|
||||||
|
}
|
||||||
|
|
||||||
const handleSearch = async () => {
|
const handleSearch = async () => {
|
||||||
if (!searchQuery.trim()) return
|
if (!searchQuery.trim()) return
|
||||||
|
|
||||||
@@ -2180,6 +2255,7 @@ export default function RAGPage() {
|
|||||||
{ id: 'regulations' as TabId, name: 'Regulierungen', icon: '📜' },
|
{ id: 'regulations' as TabId, name: 'Regulierungen', icon: '📜' },
|
||||||
{ id: 'map' as TabId, name: 'Landkarte', icon: '🗺️' },
|
{ id: 'map' as TabId, name: 'Landkarte', icon: '🗺️' },
|
||||||
{ id: 'search' as TabId, name: 'Suche', icon: '🔍' },
|
{ id: 'search' as TabId, name: 'Suche', icon: '🔍' },
|
||||||
|
{ id: 'chunks' as TabId, name: 'Chunk-Browser', icon: '🧩' },
|
||||||
{ id: 'data' as TabId, name: 'Daten', icon: '📁' },
|
{ id: 'data' as TabId, name: 'Daten', icon: '📁' },
|
||||||
{ id: 'ingestion' as TabId, name: 'Ingestion', icon: '⚙️' },
|
{ id: 'ingestion' as TabId, name: 'Ingestion', icon: '⚙️' },
|
||||||
{ id: 'pipeline' as TabId, name: 'Pipeline', icon: '🔄' },
|
{ id: 'pipeline' as TabId, name: 'Pipeline', icon: '🔄' },
|
||||||
@@ -2535,8 +2611,11 @@ export default function RAGPage() {
|
|||||||
<button
|
<button
|
||||||
onClick={(e) => {
|
onClick={(e) => {
|
||||||
e.stopPropagation()
|
e.stopPropagation()
|
||||||
setSearchQuery(reg.name)
|
const ragEntry = REGULATIONS_IN_RAG[reg.code as keyof typeof REGULATIONS_IN_RAG]
|
||||||
setActiveTab('search')
|
const col = ragEntry?.collection || 'bp_compliance_gesetze'
|
||||||
|
setChunkTextSearch(reg.name)
|
||||||
|
handleChunkCollectionChange(col)
|
||||||
|
setActiveTab('chunks')
|
||||||
}}
|
}}
|
||||||
className="text-teal-600 hover:text-teal-700 font-medium"
|
className="text-teal-600 hover:text-teal-700 font-medium"
|
||||||
>
|
>
|
||||||
@@ -3183,6 +3262,175 @@ export default function RAGPage() {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{activeTab === 'chunks' && (
|
||||||
|
<div className="space-y-6">
|
||||||
|
{/* Collection Selector + Controls */}
|
||||||
|
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||||
|
<h3 className="font-semibold text-slate-900 mb-4">Chunk-Browser</h3>
|
||||||
|
<div className="flex flex-wrap gap-4 items-end">
|
||||||
|
<div>
|
||||||
|
<label className="block text-sm font-medium text-slate-700 mb-1">Collection</label>
|
||||||
|
<select
|
||||||
|
value={chunkCollection}
|
||||||
|
onChange={(e) => handleChunkCollectionChange(e.target.value)}
|
||||||
|
className="px-3 py-2 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
|
||||||
|
>
|
||||||
|
<option value="bp_compliance_gesetze">bp_compliance_gesetze</option>
|
||||||
|
<option value="bp_compliance_ce">bp_compliance_ce</option>
|
||||||
|
<option value="bp_compliance_datenschutz">bp_compliance_datenschutz</option>
|
||||||
|
<option value="bp_dsfa_corpus">bp_dsfa_corpus</option>
|
||||||
|
<option value="bp_compliance_recht">bp_compliance_recht</option>
|
||||||
|
<option value="bp_legal_templates">bp_legal_templates</option>
|
||||||
|
<option value="bp_compliance_gdpr">bp_compliance_gdpr</option>
|
||||||
|
<option value="bp_compliance_schulrecht">bp_compliance_schulrecht</option>
|
||||||
|
<option value="bp_dsfa_templates">bp_dsfa_templates</option>
|
||||||
|
<option value="bp_dsfa_risks">bp_dsfa_risks</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 min-w-[200px]">
|
||||||
|
<label className="block text-sm font-medium text-slate-700 mb-1">Textsuche (filtert geladene Seite)</label>
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={chunkTextSearch}
|
||||||
|
onChange={(e) => setChunkTextSearch(e.target.value)}
|
||||||
|
onKeyDown={(e) => { if (e.key === 'Enter') loadChunks(null) }}
|
||||||
|
placeholder="z.B. DSGVO, IFRS, Maschinenverordnung..."
|
||||||
|
className="flex-1 px-3 py-2 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
onClick={() => { setChunkOffset(null); setChunkHistory([]); setChunkCurrentPage(0); loadChunks(null) }}
|
||||||
|
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700"
|
||||||
|
>
|
||||||
|
Laden
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="text-sm text-slate-500">
|
||||||
|
{chunkTotalCount > 0 && <span>{chunkTotalCount.toLocaleString()} Chunks total</span>}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Pagination */}
|
||||||
|
{chunkData.length > 0 && (
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<button
|
||||||
|
onClick={handleChunkPrev}
|
||||||
|
disabled={chunkCurrentPage === 0}
|
||||||
|
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
|
||||||
|
>
|
||||||
|
Zurueck
|
||||||
|
</button>
|
||||||
|
<span className="text-sm text-slate-600">
|
||||||
|
Seite {chunkCurrentPage + 1} — {chunkData.length} Chunks angezeigt
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={handleChunkNext}
|
||||||
|
disabled={!chunkNextOffset}
|
||||||
|
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
|
||||||
|
>
|
||||||
|
Weiter
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Chunk List */}
|
||||||
|
{chunkLoading ? (
|
||||||
|
<div className="text-center py-12 text-slate-500">Chunks werden geladen...</div>
|
||||||
|
) : chunkData.length === 0 ? (
|
||||||
|
<div className="text-center py-12 text-slate-400">
|
||||||
|
Collection waehlen und "Laden" klicken um Chunks anzuzeigen.
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="space-y-2">
|
||||||
|
{chunkData.map((chunk, i) => {
|
||||||
|
const text = String(chunk.text || chunk.content || chunk.chunk_text || '')
|
||||||
|
const source = String(chunk.source_name || chunk.regulation_code || chunk.guideline_name || chunk.regulation_short || '')
|
||||||
|
const isExpanded = expandedChunk === i
|
||||||
|
const highlightTerm = chunkTextSearch.trim().toLowerCase()
|
||||||
|
|
||||||
|
const renderHighlighted = (str: string) => {
|
||||||
|
if (!highlightTerm) return str
|
||||||
|
const idx = str.toLowerCase().indexOf(highlightTerm)
|
||||||
|
if (idx === -1) return str
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
{str.slice(0, idx)}
|
||||||
|
<mark className="bg-yellow-200 px-0.5 rounded">{str.slice(idx, idx + highlightTerm.length)}</mark>
|
||||||
|
{str.slice(idx + highlightTerm.length)}
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={String(chunk.id || i)}
|
||||||
|
className={`bg-white rounded-lg border transition-all cursor-pointer ${
|
||||||
|
isExpanded ? 'border-teal-300 shadow-md' : 'border-slate-200 hover:border-slate-300'
|
||||||
|
}`}
|
||||||
|
onClick={() => setExpandedChunk(isExpanded ? null : i)}
|
||||||
|
>
|
||||||
|
<div className="px-4 py-3">
|
||||||
|
<div className="flex items-center gap-2 mb-1">
|
||||||
|
<span className="text-xs font-mono text-slate-400">#{chunkCurrentPage * 20 + i + 1}</span>
|
||||||
|
{source && (
|
||||||
|
<span className="px-2 py-0.5 text-xs rounded bg-slate-100 text-slate-600">{source}</span>
|
||||||
|
)}
|
||||||
|
{chunk.article && (
|
||||||
|
<span className="text-xs text-slate-500">Art. {String(chunk.article)}</span>
|
||||||
|
)}
|
||||||
|
{chunk.language && (
|
||||||
|
<span className="text-xs text-slate-400 ml-auto">{String(chunk.language).toUpperCase()}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<p className={`text-sm text-slate-700 ${isExpanded ? '' : 'line-clamp-3'}`}>
|
||||||
|
{renderHighlighted(text)}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
{isExpanded && (
|
||||||
|
<div className="px-4 py-3 border-t border-slate-100 bg-slate-50 text-xs text-slate-500 space-y-1">
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-4 gap-2">
|
||||||
|
{Object.entries(chunk).filter(([k]) => !['text', 'content', 'chunk_text', 'id'].includes(k)).map(([k, v]) => (
|
||||||
|
<div key={k}>
|
||||||
|
<span className="font-medium text-slate-600">{k}:</span>{' '}
|
||||||
|
<span>{String(v)}</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Bottom Pagination */}
|
||||||
|
{chunkData.length > 0 && (
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<button
|
||||||
|
onClick={handleChunkPrev}
|
||||||
|
disabled={chunkCurrentPage === 0}
|
||||||
|
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
|
||||||
|
>
|
||||||
|
Zurueck
|
||||||
|
</button>
|
||||||
|
<span className="text-sm text-slate-600">
|
||||||
|
Seite {chunkCurrentPage + 1}
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={handleChunkNext}
|
||||||
|
disabled={!chunkNextOffset}
|
||||||
|
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
|
||||||
|
>
|
||||||
|
Weiter
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{activeTab === 'data' && (
|
{activeTab === 'data' && (
|
||||||
<div className="space-y-6">
|
<div className="space-y-6">
|
||||||
{/* Upload Document */}
|
{/* Upload Document */}
|
||||||
|
|||||||
@@ -66,6 +66,72 @@ export async function GET(request: NextRequest) {
|
|||||||
url += `/traceability?chunk_id=${encodeURIComponent(chunkId || '')}®ulation=${encodeURIComponent(regulation || '')}`
|
url += `/traceability?chunk_id=${encodeURIComponent(chunkId || '')}®ulation=${encodeURIComponent(regulation || '')}`
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
case 'scroll': {
|
||||||
|
const collection = searchParams.get('collection') || 'bp_compliance_gesetze'
|
||||||
|
const limit = parseInt(searchParams.get('limit') || '20', 10)
|
||||||
|
const offsetParam = searchParams.get('offset')
|
||||||
|
const filterKey = searchParams.get('filter_key')
|
||||||
|
const filterValue = searchParams.get('filter_value')
|
||||||
|
const textSearch = searchParams.get('text_search')
|
||||||
|
|
||||||
|
const scrollBody: Record<string, unknown> = {
|
||||||
|
limit: Math.min(limit, 100),
|
||||||
|
with_payload: true,
|
||||||
|
with_vector: false,
|
||||||
|
}
|
||||||
|
if (offsetParam) {
|
||||||
|
scrollBody.offset = offsetParam
|
||||||
|
}
|
||||||
|
if (filterKey && filterValue) {
|
||||||
|
scrollBody.filter = {
|
||||||
|
must: [{ key: filterKey, match: { value: filterValue } }],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const scrollRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(collection)}/points/scroll`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(scrollBody),
|
||||||
|
cache: 'no-store',
|
||||||
|
})
|
||||||
|
if (!scrollRes.ok) {
|
||||||
|
return NextResponse.json({ error: 'Qdrant scroll failed' }, { status: scrollRes.status })
|
||||||
|
}
|
||||||
|
const scrollData = await scrollRes.json()
|
||||||
|
const points = (scrollData.result?.points || []).map((p: { id: string; payload?: Record<string, unknown> }) => ({
|
||||||
|
id: p.id,
|
||||||
|
...p.payload,
|
||||||
|
}))
|
||||||
|
|
||||||
|
// Client-side text search filter
|
||||||
|
let filtered = points
|
||||||
|
if (textSearch && textSearch.trim()) {
|
||||||
|
const term = textSearch.toLowerCase()
|
||||||
|
filtered = points.filter((p: Record<string, unknown>) => {
|
||||||
|
const text = String(p.text || p.content || p.chunk_text || '')
|
||||||
|
return text.toLowerCase().includes(term)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return NextResponse.json({
|
||||||
|
chunks: filtered,
|
||||||
|
next_offset: scrollData.result?.next_page_offset || null,
|
||||||
|
total_in_page: points.length,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
case 'collection-count': {
|
||||||
|
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
|
||||||
|
const countRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}`, {
|
||||||
|
cache: 'no-store',
|
||||||
|
})
|
||||||
|
if (!countRes.ok) {
|
||||||
|
return NextResponse.json({ count: 0 })
|
||||||
|
}
|
||||||
|
const countData = await countRes.json()
|
||||||
|
return NextResponse.json({
|
||||||
|
count: countData.result?.points_count || 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return NextResponse.json({ error: 'Unknown action' }, { status: 400 })
|
return NextResponse.json({ error: 'Unknown action' }, { status: 400 })
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user