diff --git a/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserQA.tsx b/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserQA.tsx index f2e0099..854a8f3 100644 --- a/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserQA.tsx +++ b/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserQA.tsx @@ -137,7 +137,6 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) { let offset: string | null = null try { - // Paginated scroll, 100 at a time let safety = 0 do { const params = new URLSearchParams({ @@ -157,7 +156,7 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) { allChunks.push(...chunks) offset = data.next_offset || null safety++ - } while (offset && safety < 200) // safety limit ~20k chunks + } while (offset && safety < 200) // Sort by chunk_index allChunks.sort((a, b) => { @@ -186,14 +185,22 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) { const prevChunk = docChunkIndex > 0 ? docChunks[docChunkIndex - 1] : null const nextChunk = docChunkIndex < docChunks.length - 1 ? docChunks[docChunkIndex + 1] : null - // PDF page estimation - const estimatePdfPage = (chunkIndex: number): number => { + // PDF page estimation — use pages metadata if available + const estimatePdfPage = (chunk: Record | null, chunkIdx: number): number => { + if (chunk) { + // Try pages array from payload (e.g. [7] or [7,8]) + const pages = chunk.pages as number[] | undefined + if (Array.isArray(pages) && pages.length > 0) return pages[0] + // Try page field + const page = chunk.page as number | undefined + if (typeof page === 'number' && page > 0) return page + } const mapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null const cpp = mapping?.chunksPerPage || chunksPerPage - return Math.floor(chunkIndex / cpp) + 1 + return Math.floor(chunkIdx / cpp) + 1 } - const pdfPage = currentChunk ? estimatePdfPage(docChunkIndex) : 1 + const pdfPage = estimatePdfPage(currentChunk, docChunkIndex) const pdfMapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null const pdfUrl = pdfMapping ? `/rag-originals/${pdfMapping.filename}#page=${pdfPage}` : null @@ -249,7 +256,27 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) { // Get text content from a chunk const getChunkText = (chunk: Record | null): string => { if (!chunk) return '' - return String(chunk.text || chunk.content || chunk.chunk_text || '') + return String(chunk.chunk_text || chunk.text || chunk.content || '') + } + + // Extract structural metadata for prominent display + const getStructuralInfo = (chunk: Record | null): { article?: string; section?: string; pages?: string } => { + if (!chunk) return {} + const result: { article?: string; section?: string; pages?: string } = {} + // Article / paragraph + const article = chunk.article || chunk.artikel || chunk.paragraph || chunk.section_title + if (article) result.article = String(article) + // Section + const section = chunk.section || chunk.chapter || chunk.abschnitt || chunk.kapitel + if (section) result.section = String(section) + // Pages + const pages = chunk.pages as number[] | undefined + if (Array.isArray(pages) && pages.length > 0) { + result.pages = pages.length === 1 ? `S. ${pages[0]}` : `S. ${pages[0]}-${pages[pages.length - 1]}` + } else if (chunk.page) { + result.pages = `S. ${chunk.page}` + } + return result } // Overlap extraction @@ -287,10 +314,21 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) { return reg?.name || code } + // Important metadata keys to show prominently + const STRUCTURAL_KEYS = new Set([ + 'article', 'artikel', 'paragraph', 'section_title', 'section', 'chapter', + 'abschnitt', 'kapitel', 'pages', 'page', + ]) + const HIDDEN_KEYS = new Set([ + 'text', 'content', 'chunk_text', 'id', 'embedding', + ]) + + const structInfo = getStructuralInfo(currentChunk) + return ( -
- {/* Header bar */} -
+
+ {/* Header bar — fixed height */} +
@@ -309,27 +347,49 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) { <>
- QA-Modus: {selectedRegulation} — {getRegName(selectedRegulation)} + {selectedRegulation} — {getRegName(selectedRegulation)} + {structInfo.article && ( + + {structInfo.article} + + )} + {structInfo.pages && ( + + {structInfo.pages} + + )}
- - Chunk {docChunkIndex + 1} / {docTotalChunks} - + + {docChunkIndex + 1} / {docTotalChunks} + + { + const v = parseInt(e.target.value, 10) + if (!isNaN(v) && v >= 1 && v <= docTotalChunks) setDocChunkIndex(v - 1) + }} + className="w-16 px-2 py-1 border rounded text-xs text-center" + title="Springe zu Chunk Nr." + />
@@ -356,11 +416,11 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {
- {/* Main content: Sidebar + Content */} -
- {/* Sidebar */} -
-
+ {/* Main content: Sidebar + Content — fills remaining height */} +
+ {/* Sidebar — scrollable */} +
+
{countsLoading && ( -
Counts werden geladen...
+
Counts laden...
)}
-
+
{GROUP_ORDER.map(group => { const items = filteredRegulations[group] if (items.length === 0) return null @@ -381,13 +441,13 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {
{!isCollapsed && items.map(reg => { - const count = regulationCounts[reg.code] ?? REGULATIONS_IN_RAG[reg.code]?.chunks ?? 0 + const count = regulationCounts[reg.code] ?? 0 const isSelected = selectedRegulation === reg.code return ( @@ -410,13 +470,13 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {
- {/* Content area */} + {/* Content area — fills remaining width and height */} {!selectedRegulation ? (
🔍
-

Waehle ein Dokument in der Sidebar, um die QA-Ansicht zu starten.

-

Pfeiltasten navigieren zwischen Chunks.

+

Dokument in der Sidebar auswaehlen, um QA zu starten.

+

Pfeiltasten: Chunk vor/zurueck

) : docLoading ? ( @@ -425,40 +485,57 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {

Chunks werden geladen...

- {selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks || '?'} Chunks erwartet + {selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks.toLocaleString() || '?'} Chunks erwartet

) : ( -
- {/* Chunk-Text Panel */} -
-
+
+ {/* Chunk-Text Panel — fixed height, internal scroll */} +
+ {/* Panel header */} +
Chunk-Text - - Index: {docChunkIndex} / {docTotalChunks - 1} - +
+ {structInfo.article && ( + + {structInfo.article} + + )} + {structInfo.section && ( + + {structInfo.section} + + )} + + #{docChunkIndex} / {docTotalChunks - 1} + +
-
+ + {/* Scrollable content */} +
{/* Overlap from previous chunk */} {prevChunk && (
-
↑ Overlap (vorheriger Chunk #{docChunkIndex - 1})
+
↑ Ende vorheriger Chunk #{docChunkIndex - 1}

{getOverlapPrev()}

)} {/* Current chunk text */} - {currentChunk && ( + {currentChunk ? (
{getChunkText(currentChunk)}
+ ) : ( +
Kein Chunk-Text vorhanden.
)} {/* Overlap from next chunk */} {nextChunk && (
-
↓ Overlap (naechster Chunk #{docChunkIndex + 1})
+
↓ Anfang naechster Chunk #{docChunkIndex + 1}

{getOverlapNext()}

)} @@ -469,62 +546,62 @@ export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {
Metadaten
{Object.entries(currentChunk) - .filter(([k]) => !['text', 'content', 'chunk_text', 'id'].includes(k)) + .filter(([k]) => !HIDDEN_KEYS.has(k)) + .sort(([a], [b]) => { + // Structural keys first + const aStruct = STRUCTURAL_KEYS.has(a) ? 0 : 1 + const bStruct = STRUCTURAL_KEYS.has(b) ? 0 : 1 + return aStruct - bStruct || a.localeCompare(b) + }) .map(([k, v]) => ( -
- {k}: - {String(v)} +
+ {k}: + + {Array.isArray(v) ? v.join(', ') : String(v)} +
))}
+ {/* Chunk quality indicator */} +
+
+ Chunk-Laenge: {getChunkText(currentChunk).length} Zeichen + {getChunkText(currentChunk).length < 50 && ( + ⚠ Sehr kurz + )} + {getChunkText(currentChunk).length > 2000 && ( + ⚠ Sehr lang + )} +
+
)}
- - {/* Bottom nav */} -
- -
- { - const v = parseInt(e.target.value, 10) - if (!isNaN(v) && v >= 0 && v < docTotalChunks) setDocChunkIndex(v) - }} - className="w-20 px-2 py-1 border rounded text-xs text-center" - /> - / {docTotalChunks - 1} -
- -
{/* PDF-Viewer Panel */} {splitViewActive && ( -
-
+
+
Original-PDF - - Seite ~{pdfPage} - {pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''} - +
+ + Seite ~{pdfPage} + {pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''} + + {pdfUrl && ( + + Oeffnen ↗ + + )} +
-
+
{pdfUrl ? (