'use client' import React, { useState, useEffect, useCallback, useRef } from 'react' import { RAG_PDF_MAPPING } from './rag-pdf-mapping' import { REGULATIONS_IN_RAG, REGULATION_INFO } from '../rag-constants' interface ChunkBrowserQAProps { apiProxy: string } type RegGroupKey = 'eu_regulation' | 'eu_directive' | 'de_law' | 'at_law' | 'ch_law' | 'national_law' | 'bsi_standard' | 'eu_guideline' | 'international_standard' | 'other' const GROUP_LABELS: Record = { eu_regulation: 'EU Verordnungen', eu_directive: 'EU Richtlinien', de_law: 'DE Gesetze', at_law: 'AT Gesetze', ch_law: 'CH Gesetze', national_law: 'Nationale Gesetze (EU)', bsi_standard: 'BSI Standards', eu_guideline: 'EDPB / Guidelines', international_standard: 'Internationale Standards', other: 'Sonstige', } const GROUP_ORDER: RegGroupKey[] = [ 'eu_regulation', 'eu_directive', 'de_law', 'at_law', 'ch_law', 'national_law', 'bsi_standard', 'eu_guideline', 'international_standard', 'other', ] const COLLECTIONS = [ 'bp_compliance_gesetze', 'bp_compliance_ce', 'bp_compliance_datenschutz', 'bp_dsfa_corpus', 'bp_compliance_recht', 'bp_legal_templates', 'bp_nibis_eh', ] export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) { // Filter-Sidebar const [selectedRegulation, setSelectedRegulation] = useState(null) const [regulationCounts, setRegulationCounts] = useState>({}) const [filterSearch, setFilterSearch] = useState('') const [countsLoading, setCountsLoading] = useState(false) // Dokument-Chunks (sequenziell) const [docChunks, setDocChunks] = useState[]>([]) const [docChunkIndex, setDocChunkIndex] = useState(0) const [docTotalChunks, setDocTotalChunks] = useState(0) const [docLoading, setDocLoading] = useState(false) const docChunksRef = useRef(docChunks) docChunksRef.current = docChunks // Split-View const [splitViewActive, setSplitViewActive] = useState(true) const [chunksPerPage, setChunksPerPage] = useState(6) const [fullscreen, setFullscreen] = useState(false) // Collection — default to bp_compliance_ce where we have PDFs downloaded const [collection, setCollection] = useState('bp_compliance_ce') // PDF existence check const [pdfExists, setPdfExists] = useState(null) // Sidebar collapsed groups const [collapsedGroups, setCollapsedGroups] = useState>(new Set()) // Build grouped regulations for sidebar const regulationsInCollection = Object.entries(REGULATIONS_IN_RAG) .filter(([, info]) => info.collection === collection) .map(([code]) => code) const groupedRegulations = React.useMemo(() => { const groups: Record = { eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [], national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [], } for (const code of regulationsInCollection) { const reg = REGULATION_INFO.find(r => r.code === code) const type = (reg?.type || 'other') as RegGroupKey const groupKey = type in groups ? type : 'other' groups[groupKey].push({ code, name: reg?.name || code, type: reg?.type || 'unknown', }) } return groups }, [regulationsInCollection.join(',')]) // Load regulation counts for current collection const loadRegulationCounts = useCallback(async (col: string) => { const entries = Object.entries(REGULATIONS_IN_RAG) .filter(([, info]) => info.collection === col && info.qdrant_id) if (entries.length === 0) return // Build qdrant_id -> our_code mapping const qdrantIdToCode: Record = {} for (const [code, info] of entries) { if (!qdrantIdToCode[info.qdrant_id]) qdrantIdToCode[info.qdrant_id] = [] qdrantIdToCode[info.qdrant_id].push(code) } const uniqueQdrantIds = Object.keys(qdrantIdToCode) setCountsLoading(true) try { const params = new URLSearchParams({ action: 'regulation-counts-batch', collection: col, qdrant_ids: uniqueQdrantIds.join(','), }) const res = await fetch(`${apiProxy}?${params}`) if (res.ok) { const data = await res.json() // Map qdrant_id counts back to our codes const mapped: Record = {} for (const [qid, count] of Object.entries(data.counts as Record)) { const codes = qdrantIdToCode[qid] || [] for (const code of codes) { mapped[code] = count } } setRegulationCounts(prev => ({ ...prev, ...mapped })) } } catch (error) { console.error('Failed to load regulation counts:', error) } finally { setCountsLoading(false) } }, [apiProxy]) // Load all chunks for a regulation (paginated scroll) const loadDocumentChunks = useCallback(async (regulationCode: string) => { const ragInfo = REGULATIONS_IN_RAG[regulationCode] if (!ragInfo || !ragInfo.qdrant_id) return setDocLoading(true) setDocChunks([]) setDocChunkIndex(0) setDocTotalChunks(0) const allChunks: Record[] = [] let offset: string | null = null try { let safety = 0 do { const params = new URLSearchParams({ action: 'scroll', collection: ragInfo.collection, limit: '100', filter_key: 'regulation_id', filter_value: ragInfo.qdrant_id, }) if (offset) params.append('offset', offset) const res = await fetch(`${apiProxy}?${params}`) if (!res.ok) break const data = await res.json() const chunks = data.chunks || [] allChunks.push(...chunks) offset = data.next_offset || null safety++ } while (offset && safety < 200) // Sort by chunk_index allChunks.sort((a, b) => { const ai = Number(a.chunk_index ?? a.chunk_id ?? 0) const bi = Number(b.chunk_index ?? b.chunk_id ?? 0) return ai - bi }) setDocChunks(allChunks) setDocTotalChunks(allChunks.length) setDocChunkIndex(0) } catch (error) { console.error('Failed to load document chunks:', error) } finally { setDocLoading(false) } }, [apiProxy]) // Initial load useEffect(() => { loadRegulationCounts(collection) }, [collection, loadRegulationCounts]) // Current chunk const currentChunk = docChunks[docChunkIndex] || null const prevChunk = docChunkIndex > 0 ? docChunks[docChunkIndex - 1] : null const nextChunk = docChunkIndex < docChunks.length - 1 ? docChunks[docChunkIndex + 1] : null // PDF page estimation — use pages metadata if available const estimatePdfPage = (chunk: Record | null, chunkIdx: number): number => { if (chunk) { // Try pages array from payload (e.g. [7] or [7,8]) const pages = chunk.pages as number[] | undefined if (Array.isArray(pages) && pages.length > 0) return pages[0] // Try page field const page = chunk.page as number | undefined if (typeof page === 'number' && page > 0) return page } const mapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null const cpp = mapping?.chunksPerPage || chunksPerPage return Math.floor(chunkIdx / cpp) + 1 } const pdfPage = estimatePdfPage(currentChunk, docChunkIndex) const pdfMapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null const pdfUrl = pdfMapping ? `/rag-originals/${pdfMapping.filename}#page=${pdfPage}` : null // Check PDF existence when regulation changes useEffect(() => { if (!selectedRegulation) { setPdfExists(null); return } const mapping = RAG_PDF_MAPPING[selectedRegulation] if (!mapping) { setPdfExists(false); return } const url = `/rag-originals/${mapping.filename}` fetch(url, { method: 'HEAD' }) .then(res => setPdfExists(res.ok)) .catch(() => setPdfExists(false)) }, [selectedRegulation]) // Handlers const handleSelectRegulation = (code: string) => { setSelectedRegulation(code) loadDocumentChunks(code) } const handleCollectionChange = (col: string) => { setCollection(col) setSelectedRegulation(null) setDocChunks([]) setDocChunkIndex(0) setDocTotalChunks(0) setRegulationCounts({}) } const handlePrev = () => { if (docChunkIndex > 0) setDocChunkIndex(i => i - 1) } const handleNext = () => { if (docChunkIndex < docChunks.length - 1) setDocChunkIndex(i => i + 1) } const handleKeyDown = useCallback((e: KeyboardEvent) => { if (e.key === 'Escape' && fullscreen) { e.preventDefault() setFullscreen(false) } else if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') { e.preventDefault() setDocChunkIndex(i => Math.max(0, i - 1)) } else if (e.key === 'ArrowRight' || e.key === 'ArrowDown') { e.preventDefault() setDocChunkIndex(i => Math.min(docChunksRef.current.length - 1, i + 1)) } }, [fullscreen]) useEffect(() => { if (fullscreen || (selectedRegulation && docChunks.length > 0)) { window.addEventListener('keydown', handleKeyDown) return () => window.removeEventListener('keydown', handleKeyDown) } }, [selectedRegulation, docChunks.length, handleKeyDown, fullscreen]) const toggleGroup = (group: string) => { setCollapsedGroups(prev => { const next = new Set(prev) if (next.has(group)) next.delete(group) else next.add(group) return next }) } // Get text content from a chunk const getChunkText = (chunk: Record | null): string => { if (!chunk) return '' return String(chunk.chunk_text || chunk.text || chunk.content || '') } // Extract structural metadata for prominent display const getStructuralInfo = (chunk: Record | null): { article?: string; section?: string; pages?: string } => { if (!chunk) return {} const result: { article?: string; section?: string; pages?: string } = {} // Article / paragraph const article = chunk.article || chunk.artikel || chunk.paragraph || chunk.section_title if (article) result.article = String(article) // Section const section = chunk.section || chunk.chapter || chunk.abschnitt || chunk.kapitel if (section) result.section = String(section) // Pages const pages = chunk.pages as number[] | undefined if (Array.isArray(pages) && pages.length > 0) { result.pages = pages.length === 1 ? `S. ${pages[0]}` : `S. ${pages[0]}-${pages[pages.length - 1]}` } else if (chunk.page) { result.pages = `S. ${chunk.page}` } return result } // Overlap extraction const getOverlapPrev = (): string => { if (!prevChunk) return '' const text = getChunkText(prevChunk) return text.length > 150 ? '...' + text.slice(-150) : text } const getOverlapNext = (): string => { if (!nextChunk) return '' const text = getChunkText(nextChunk) return text.length > 150 ? text.slice(0, 150) + '...' : text } // Filter sidebar items const filteredRegulations = React.useMemo(() => { if (!filterSearch.trim()) return groupedRegulations const term = filterSearch.toLowerCase() const filtered: typeof groupedRegulations = { eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [], national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [], } for (const [group, items] of Object.entries(groupedRegulations)) { filtered[group as RegGroupKey] = items.filter( r => r.code.toLowerCase().includes(term) || r.name.toLowerCase().includes(term) ) } return filtered }, [groupedRegulations, filterSearch]) // Regulation name lookup const getRegName = (code: string): string => { const reg = REGULATION_INFO.find(r => r.code === code) return reg?.name || code } // Important metadata keys to show prominently const STRUCTURAL_KEYS = new Set([ 'article', 'artikel', 'paragraph', 'section_title', 'section', 'chapter', 'abschnitt', 'kapitel', 'pages', 'page', ]) const HIDDEN_KEYS = new Set([ 'text', 'content', 'chunk_text', 'id', 'embedding', ]) const structInfo = getStructuralInfo(currentChunk) return (
{/* Header bar — fixed height */}
{selectedRegulation && ( <>
{selectedRegulation} — {getRegName(selectedRegulation)} {structInfo.article && ( {structInfo.article} )} {structInfo.pages && ( {structInfo.pages} )}
{docChunkIndex + 1} / {docTotalChunks} { const v = parseInt(e.target.value, 10) if (!isNaN(v) && v >= 1 && v <= docTotalChunks) setDocChunkIndex(v - 1) }} className="w-16 px-2 py-1 border rounded text-xs text-center" title="Springe zu Chunk Nr." />
)}
{/* Main content: Sidebar + Content — fills remaining height */}
{/* Sidebar — scrollable */}
setFilterSearch(e.target.value)} placeholder="Suche..." className="w-full px-2 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500" /> {countsLoading && (
Counts laden...
)}
{GROUP_ORDER.map(group => { const items = filteredRegulations[group] if (items.length === 0) return null const isCollapsed = collapsedGroups.has(group) return (
{!isCollapsed && items.map(reg => { const count = regulationCounts[reg.code] ?? 0 const isSelected = selectedRegulation === reg.code return ( ) })}
) })}
{/* Content area — fills remaining width and height */} {!selectedRegulation ? (
🔍

Dokument in der Sidebar auswaehlen, um QA zu starten.

Pfeiltasten: Chunk vor/zurueck

) : docLoading ? (

Chunks werden geladen...

{selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks.toLocaleString() || '?'} Chunks erwartet

) : (
{/* Chunk-Text Panel — fixed height, internal scroll */}
{/* Panel header */}
Chunk-Text
{structInfo.article && ( {structInfo.article} )} {structInfo.section && ( {structInfo.section} )} #{docChunkIndex} / {docTotalChunks - 1}
{/* Scrollable content */}
{/* Overlap from previous chunk */} {prevChunk && (
↑ Ende vorheriger Chunk #{docChunkIndex - 1}

{getOverlapPrev()}

)} {/* Current chunk text */} {currentChunk ? (
{getChunkText(currentChunk)}
) : (
Kein Chunk-Text vorhanden.
)} {/* Overlap from next chunk */} {nextChunk && (
↓ Anfang naechster Chunk #{docChunkIndex + 1}

{getOverlapNext()}

)} {/* Metadata */} {currentChunk && (
Metadaten
{Object.entries(currentChunk) .filter(([k]) => !HIDDEN_KEYS.has(k)) .sort(([a], [b]) => { // Structural keys first const aStruct = STRUCTURAL_KEYS.has(a) ? 0 : 1 const bStruct = STRUCTURAL_KEYS.has(b) ? 0 : 1 return aStruct - bStruct || a.localeCompare(b) }) .map(([k, v]) => (
{k}: {Array.isArray(v) ? v.join(', ') : String(v)}
))}
{/* Chunk quality indicator */}
Chunk-Laenge: {getChunkText(currentChunk).length} Zeichen {getChunkText(currentChunk).length < 50 && ( ⚠ Sehr kurz )} {getChunkText(currentChunk).length > 2000 && ( ⚠ Sehr lang )}
)}
{/* PDF-Viewer Panel */} {splitViewActive && (
Original-PDF
Seite ~{pdfPage} {pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''} {pdfUrl && ( Oeffnen ↗ )}
{pdfUrl && pdfExists ? (