'use client' import React, { useState, useEffect, useCallback, useRef } from 'react' import { RAG_PDF_MAPPING } from './rag-pdf-mapping' import { REGULATIONS_IN_RAG, REGULATION_INFO } from '../rag-constants' interface ChunkBrowserQAProps { apiProxy: string } type RegGroupKey = 'eu_regulation' | 'eu_directive' | 'de_law' | 'at_law' | 'ch_law' | 'national_law' | 'bsi_standard' | 'eu_guideline' | 'international_standard' | 'other' const GROUP_LABELS: Record = { eu_regulation: 'EU Verordnungen', eu_directive: 'EU Richtlinien', de_law: 'DE Gesetze', at_law: 'AT Gesetze', ch_law: 'CH Gesetze', national_law: 'Nationale Gesetze (EU)', bsi_standard: 'BSI Standards', eu_guideline: 'EDPB / Guidelines', international_standard: 'Internationale Standards', other: 'Sonstige', } const GROUP_ORDER: RegGroupKey[] = [ 'eu_regulation', 'eu_directive', 'de_law', 'at_law', 'ch_law', 'national_law', 'bsi_standard', 'eu_guideline', 'international_standard', 'other', ] const COLLECTIONS = [ 'bp_compliance_gesetze', 'bp_compliance_ce', 'bp_compliance_datenschutz', ] export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) { // Filter-Sidebar const [selectedRegulation, setSelectedRegulation] = useState(null) const [regulationCounts, setRegulationCounts] = useState>({}) const [filterSearch, setFilterSearch] = useState('') const [countsLoading, setCountsLoading] = useState(false) // Dokument-Chunks (sequenziell) const [docChunks, setDocChunks] = useState[]>([]) const [docChunkIndex, setDocChunkIndex] = useState(0) const [docTotalChunks, setDocTotalChunks] = useState(0) const [docLoading, setDocLoading] = useState(false) const docChunksRef = useRef(docChunks) docChunksRef.current = docChunks // Split-View const [splitViewActive, setSplitViewActive] = useState(true) const [chunksPerPage, setChunksPerPage] = useState(6) // Collection const [collection, setCollection] = useState('bp_compliance_gesetze') // Sidebar collapsed groups const [collapsedGroups, setCollapsedGroups] = useState>(new Set()) // Build grouped regulations for sidebar const regulationsInCollection = Object.entries(REGULATIONS_IN_RAG) .filter(([, info]) => info.collection === collection) .map(([code]) => code) const groupedRegulations = React.useMemo(() => { const groups: Record = { eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [], national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [], } for (const code of regulationsInCollection) { const reg = REGULATION_INFO.find(r => r.code === code) const type = (reg?.type || 'other') as RegGroupKey const groupKey = type in groups ? type : 'other' groups[groupKey].push({ code, name: reg?.name || code, type: reg?.type || 'unknown', }) } return groups }, [regulationsInCollection.join(',')]) // Load regulation counts for current collection const loadRegulationCounts = useCallback(async (col: string) => { const entries = Object.entries(REGULATIONS_IN_RAG) .filter(([, info]) => info.collection === col && info.qdrant_id) if (entries.length === 0) return // Build qdrant_id -> our_code mapping const qdrantIdToCode: Record = {} for (const [code, info] of entries) { if (!qdrantIdToCode[info.qdrant_id]) qdrantIdToCode[info.qdrant_id] = [] qdrantIdToCode[info.qdrant_id].push(code) } const uniqueQdrantIds = Object.keys(qdrantIdToCode) setCountsLoading(true) try { const params = new URLSearchParams({ action: 'regulation-counts-batch', collection: col, qdrant_ids: uniqueQdrantIds.join(','), }) const res = await fetch(`${apiProxy}?${params}`) if (res.ok) { const data = await res.json() // Map qdrant_id counts back to our codes const mapped: Record = {} for (const [qid, count] of Object.entries(data.counts as Record)) { const codes = qdrantIdToCode[qid] || [] for (const code of codes) { mapped[code] = count } } setRegulationCounts(prev => ({ ...prev, ...mapped })) } } catch (error) { console.error('Failed to load regulation counts:', error) } finally { setCountsLoading(false) } }, [apiProxy]) // Load all chunks for a regulation (paginated scroll) const loadDocumentChunks = useCallback(async (regulationCode: string) => { const ragInfo = REGULATIONS_IN_RAG[regulationCode] if (!ragInfo || !ragInfo.qdrant_id) return setDocLoading(true) setDocChunks([]) setDocChunkIndex(0) setDocTotalChunks(0) const allChunks: Record[] = [] let offset: string | null = null try { // Paginated scroll, 100 at a time let safety = 0 do { const params = new URLSearchParams({ action: 'scroll', collection: ragInfo.collection, limit: '100', filter_key: 'regulation_id', filter_value: ragInfo.qdrant_id, }) if (offset) params.append('offset', offset) const res = await fetch(`${apiProxy}?${params}`) if (!res.ok) break const data = await res.json() const chunks = data.chunks || [] allChunks.push(...chunks) offset = data.next_offset || null safety++ } while (offset && safety < 200) // safety limit ~20k chunks // Sort by chunk_index allChunks.sort((a, b) => { const ai = Number(a.chunk_index ?? a.chunk_id ?? 0) const bi = Number(b.chunk_index ?? b.chunk_id ?? 0) return ai - bi }) setDocChunks(allChunks) setDocTotalChunks(allChunks.length) setDocChunkIndex(0) } catch (error) { console.error('Failed to load document chunks:', error) } finally { setDocLoading(false) } }, [apiProxy]) // Initial load useEffect(() => { loadRegulationCounts(collection) }, [collection, loadRegulationCounts]) // Current chunk const currentChunk = docChunks[docChunkIndex] || null const prevChunk = docChunkIndex > 0 ? docChunks[docChunkIndex - 1] : null const nextChunk = docChunkIndex < docChunks.length - 1 ? docChunks[docChunkIndex + 1] : null // PDF page estimation const estimatePdfPage = (chunkIndex: number): number => { const mapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null const cpp = mapping?.chunksPerPage || chunksPerPage return Math.floor(chunkIndex / cpp) + 1 } const pdfPage = currentChunk ? estimatePdfPage(docChunkIndex) : 1 const pdfMapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null const pdfUrl = pdfMapping ? `/rag-originals/${pdfMapping.filename}#page=${pdfPage}` : null // Handlers const handleSelectRegulation = (code: string) => { setSelectedRegulation(code) loadDocumentChunks(code) } const handleCollectionChange = (col: string) => { setCollection(col) setSelectedRegulation(null) setDocChunks([]) setDocChunkIndex(0) setDocTotalChunks(0) setRegulationCounts({}) } const handlePrev = () => { if (docChunkIndex > 0) setDocChunkIndex(i => i - 1) } const handleNext = () => { if (docChunkIndex < docChunks.length - 1) setDocChunkIndex(i => i + 1) } const handleKeyDown = useCallback((e: KeyboardEvent) => { if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') { e.preventDefault() setDocChunkIndex(i => Math.max(0, i - 1)) } else if (e.key === 'ArrowRight' || e.key === 'ArrowDown') { e.preventDefault() setDocChunkIndex(i => Math.min(docChunksRef.current.length - 1, i + 1)) } }, []) useEffect(() => { if (selectedRegulation && docChunks.length > 0) { window.addEventListener('keydown', handleKeyDown) return () => window.removeEventListener('keydown', handleKeyDown) } }, [selectedRegulation, docChunks.length, handleKeyDown]) const toggleGroup = (group: string) => { setCollapsedGroups(prev => { const next = new Set(prev) if (next.has(group)) next.delete(group) else next.add(group) return next }) } // Get text content from a chunk const getChunkText = (chunk: Record | null): string => { if (!chunk) return '' return String(chunk.text || chunk.content || chunk.chunk_text || '') } // Overlap extraction const getOverlapPrev = (): string => { if (!prevChunk) return '' const text = getChunkText(prevChunk) return text.length > 150 ? '...' + text.slice(-150) : text } const getOverlapNext = (): string => { if (!nextChunk) return '' const text = getChunkText(nextChunk) return text.length > 150 ? text.slice(0, 150) + '...' : text } // Filter sidebar items const filteredRegulations = React.useMemo(() => { if (!filterSearch.trim()) return groupedRegulations const term = filterSearch.toLowerCase() const filtered: typeof groupedRegulations = { eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [], national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [], } for (const [group, items] of Object.entries(groupedRegulations)) { filtered[group as RegGroupKey] = items.filter( r => r.code.toLowerCase().includes(term) || r.name.toLowerCase().includes(term) ) } return filtered }, [groupedRegulations, filterSearch]) // Regulation name lookup const getRegName = (code: string): string => { const reg = REGULATION_INFO.find(r => r.code === code) return reg?.name || code } return (
{/* Header bar */}
{selectedRegulation && ( <>
QA-Modus: {selectedRegulation} — {getRegName(selectedRegulation)}
Chunk {docChunkIndex + 1} / {docTotalChunks}
)}
{/* Main content: Sidebar + Content */}
{/* Sidebar */}
setFilterSearch(e.target.value)} placeholder="Suche..." className="w-full px-2 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500" /> {countsLoading && (
Counts werden geladen...
)}
{GROUP_ORDER.map(group => { const items = filteredRegulations[group] if (items.length === 0) return null const isCollapsed = collapsedGroups.has(group) return (
{!isCollapsed && items.map(reg => { const count = regulationCounts[reg.code] ?? REGULATIONS_IN_RAG[reg.code]?.chunks ?? 0 const isSelected = selectedRegulation === reg.code return ( ) })}
) })}
{/* Content area */} {!selectedRegulation ? (
🔍

Waehle ein Dokument in der Sidebar, um die QA-Ansicht zu starten.

Pfeiltasten navigieren zwischen Chunks.

) : docLoading ? (

Chunks werden geladen...

{selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks || '?'} Chunks erwartet

) : (
{/* Chunk-Text Panel */}
Chunk-Text Index: {docChunkIndex} / {docTotalChunks - 1}
{/* Overlap from previous chunk */} {prevChunk && (
↑ Overlap (vorheriger Chunk #{docChunkIndex - 1})

{getOverlapPrev()}

)} {/* Current chunk text */} {currentChunk && (
{getChunkText(currentChunk)}
)} {/* Overlap from next chunk */} {nextChunk && (
↓ Overlap (naechster Chunk #{docChunkIndex + 1})

{getOverlapNext()}

)} {/* Metadata */} {currentChunk && (
Metadaten
{Object.entries(currentChunk) .filter(([k]) => !['text', 'content', 'chunk_text', 'id'].includes(k)) .map(([k, v]) => (
{k}: {String(v)}
))}
)}
{/* Bottom nav */}
{ const v = parseInt(e.target.value, 10) if (!isNaN(v) && v >= 0 && v < docTotalChunks) setDocChunkIndex(v) }} className="w-20 px-2 py-1 border rounded text-xs text-center" /> / {docTotalChunks - 1}
{/* PDF-Viewer Panel */} {splitViewActive && (
Original-PDF Seite ~{pdfPage} {pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''}
{pdfUrl ? (