feat(rag): add QA Split-View Chunk-Browser for ingestion verification

New ChunkBrowserQA component replaces inline chunk browser with:
- Document sidebar with live chunk counts per regulation (batched Qdrant count API)
- Sequential chunk navigation with arrow keys (1/N through all chunks of a document)
- Overlap display showing previous/next chunk boundaries (amber-highlighted)
- Split-view with original PDF via iframe (estimated page from chunk index)
- Adjustable chunks-per-page ratio for PDF page estimation

Extracts REGULATIONS_IN_RAG and REGULATION_INFO to shared rag-constants.ts.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-28 17:46:11 +01:00
parent 984dfab975
commit 8c42fefa77
6 changed files with 1377 additions and 355 deletions

View File

@@ -11,6 +11,8 @@ import React, { useState, useEffect, useCallback } from 'react'
import Link from 'next/link'
import { PagePurpose } from '@/components/common/PagePurpose'
import { AIModuleSidebarResponsive } from '@/components/ai/AIModuleSidebar'
import { REGULATIONS_IN_RAG } from './rag-constants'
import { ChunkBrowserQA } from './components/ChunkBrowserQA'
// API uses local proxy route to klausur-service
const API_PROXY = '/api/legal-corpus'
@@ -1374,116 +1376,7 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
DMA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
}
// Regulations that are currently ingested in RAG (Qdrant collections)
// Updated: 2026-02-27 — Aktualisieren wenn neue Dokumente ingestiert werden!
const REGULATIONS_IN_RAG: Record<string, { collection: string; chunks: number }> = {
// EU Verordnungen/Richtlinien (bp_compliance_ce: 7.341 total)
GDPR: { collection: 'bp_compliance_ce', chunks: 1842 },
EPRIVACY: { collection: 'bp_compliance_ce', chunks: 156 },
SCC: { collection: 'bp_compliance_ce', chunks: 89 },
SCC_FULL_TEXT: { collection: 'bp_compliance_ce', chunks: 154 },
AIACT: { collection: 'bp_compliance_ce', chunks: 1245 },
CRA: { collection: 'bp_compliance_ce', chunks: 687 },
NIS2: { collection: 'bp_compliance_ce', chunks: 534 },
DGA: { collection: 'bp_compliance_ce', chunks: 312 },
DSA: { collection: 'bp_compliance_ce', chunks: 978 },
PLD: { collection: 'bp_compliance_ce', chunks: 124 },
E_COMMERCE_RL: { collection: 'bp_compliance_ce', chunks: 198 },
VERBRAUCHERRECHTE_RL: { collection: 'bp_compliance_ce', chunks: 245 },
DIGITALE_INHALTE_RL: { collection: 'bp_compliance_ce', chunks: 187 },
DMA: { collection: 'bp_compliance_ce', chunks: 590 },
// DE Gesetze (bp_compliance_gesetze: 33.929 total)
TDDDG: { collection: 'bp_compliance_gesetze', chunks: 215 },
BDSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 487 },
DE_DDG: { collection: 'bp_compliance_gesetze', chunks: 198 },
DE_BGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 4250 },
DE_EGBGB: { collection: 'bp_compliance_gesetze', chunks: 312 },
DE_HGB_RET: { collection: 'bp_compliance_gesetze', chunks: 6840 },
DE_AO_RET: { collection: 'bp_compliance_gesetze', chunks: 5620 },
// BSI Standards (bp_compliance_gesetze)
'BSI-TR-03161-1': { collection: 'bp_compliance_gesetze', chunks: 425 },
'BSI-TR-03161-2': { collection: 'bp_compliance_gesetze', chunks: 380 },
'BSI-TR-03161-3': { collection: 'bp_compliance_gesetze', chunks: 345 },
// Nationale Datenschutzgesetze (bp_compliance_gesetze)
AT_DSG: { collection: 'bp_compliance_gesetze', chunks: 287 },
CH_DSG: { collection: 'bp_compliance_gesetze', chunks: 156 },
ES_LOPDGDD: { collection: 'bp_compliance_gesetze', chunks: 1245 },
IT_CODICE_PRIVACY: { collection: 'bp_compliance_gesetze', chunks: 198 },
NL_UAVG: { collection: 'bp_compliance_gesetze', chunks: 1320 },
FR_CNIL_GUIDE: { collection: 'bp_compliance_gesetze', chunks: 1450 },
IE_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 534 },
UK_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 1680 },
UK_GDPR: { collection: 'bp_compliance_gesetze', chunks: 890 },
NO_PERSONOPPLYSNINGSLOVEN: { collection: 'bp_compliance_gesetze', chunks: 245 },
SE_DATASKYDDSLAG: { collection: 'bp_compliance_gesetze', chunks: 167 },
PL_UODO: { collection: 'bp_compliance_gesetze', chunks: 198 },
CZ_ZOU: { collection: 'bp_compliance_gesetze', chunks: 1120 },
HU_INFOTV: { collection: 'bp_compliance_gesetze', chunks: 1345 },
// EDPB Guidelines (bp_compliance_datenschutz)
EDPB_GUIDELINES_5_2020: { collection: 'bp_compliance_datenschutz', chunks: 245 },
EDPB_GUIDELINES_7_2020: { collection: 'bp_compliance_datenschutz', chunks: 347 },
// === Neue Regulierungen (2026-02-28) ===
// EU CE-Regulierungen (bp_compliance_ce)
DPF: { collection: 'bp_compliance_ce', chunks: 1232 },
EUCSA: { collection: 'bp_compliance_ce', chunks: 558 },
DATAACT: { collection: 'bp_compliance_ce', chunks: 809 },
DORA: { collection: 'bp_compliance_ce', chunks: 823 },
PSD2: { collection: 'bp_compliance_ce', chunks: 796 },
AMLR: { collection: 'bp_compliance_ce', chunks: 1182 },
MiCA: { collection: 'bp_compliance_ce', chunks: 1640 },
EHDS: { collection: 'bp_compliance_ce', chunks: 1212 },
EAA: { collection: 'bp_compliance_ce', chunks: 433 },
DSM: { collection: 'bp_compliance_ce', chunks: 416 },
GPSR: { collection: 'bp_compliance_ce', chunks: 509 },
// DE Gesetze (bp_compliance_gesetze)
DE_UWG: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_TKG: { collection: 'bp_compliance_gesetze', chunks: 1631 },
DE_PANGV: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_DLINFOV: { collection: 'bp_compliance_gesetze', chunks: 21 },
DE_BETRVG: { collection: 'bp_compliance_gesetze', chunks: 498 },
DE_GESCHGEHG: { collection: 'bp_compliance_gesetze', chunks: 63 },
DE_BSIG: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_USTG_RET: { collection: 'bp_compliance_gesetze', chunks: 1071 },
// AT Gesetze (bp_compliance_gesetze)
AT_DSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 6 },
LI_DSG: { collection: 'bp_compliance_gesetze', chunks: 2 },
AT_ECG: { collection: 'bp_compliance_gesetze', chunks: 120 },
AT_TKG: { collection: 'bp_compliance_gesetze', chunks: 2174 },
AT_KSCHG: { collection: 'bp_compliance_gesetze', chunks: 402 },
AT_FAGG: { collection: 'bp_compliance_gesetze', chunks: 2 },
AT_UGB_RET: { collection: 'bp_compliance_gesetze', chunks: 2828 },
AT_BAO_RET: { collection: 'bp_compliance_gesetze', chunks: 2246 },
AT_MEDIENG: { collection: 'bp_compliance_gesetze', chunks: 571 },
AT_ABGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 2521 },
AT_UWG: { collection: 'bp_compliance_gesetze', chunks: 403 },
// CH Gesetze (bp_compliance_gesetze)
CH_DSV: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_OR_AGB: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_UWG: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_FMG: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_GEBUV: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_ZERTES: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_ZGB_PERS: { collection: 'bp_compliance_gesetze', chunks: 5 },
// Weitere EU-Laender (bp_compliance_gesetze)
BE_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 3 },
FI_TIETOSUOJALAKI: { collection: 'bp_compliance_gesetze', chunks: 2 },
DK_DATABESKYTTELSESLOVEN: { collection: 'bp_compliance_gesetze', chunks: 2 },
LU_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 2 },
// === Industrie-Compliance (2026-02-28) ===
// EU CE-Regulierungen (bp_compliance_ce)
MACHINERY_REG: { collection: 'bp_compliance_ce', chunks: 0 },
BLUE_GUIDE: { collection: 'bp_compliance_ce', chunks: 0 },
// Frameworks/Guidance (bp_compliance_datenschutz)
ENISA_SECURE_BY_DESIGN: { collection: 'bp_compliance_datenschutz', chunks: 0 },
ENISA_SUPPLY_CHAIN: { collection: 'bp_compliance_datenschutz', chunks: 0 },
NIST_SSDF: { collection: 'bp_compliance_datenschutz', chunks: 0 },
NIST_CSF_2: { collection: 'bp_compliance_datenschutz', chunks: 0 },
OECD_AI_PRINCIPLES: { collection: 'bp_compliance_datenschutz', chunks: 0 },
// EU-IFRS / EFRAG (2026-02-28)
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 0 },
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 0 },
EFRAG_ENDORSEMENT: { collection: 'bp_compliance_datenschutz', chunks: 0 },
}
// REGULATIONS_IN_RAG is imported from ./rag-constants.ts
// Helper: Check if regulation is in RAG
const isInRag = (code: string): boolean => code in REGULATIONS_IN_RAG
@@ -1850,17 +1743,7 @@ export default function RAGPage() {
const [autoRefresh, setAutoRefresh] = useState(true)
const [elapsedTime, setElapsedTime] = useState<string>('')
// Chunk browser state
const [chunkCollection, setChunkCollection] = useState('bp_compliance_gesetze')
const [chunkData, setChunkData] = useState<Record<string, unknown>[]>([])
const [chunkOffset, setChunkOffset] = useState<string | null>(null)
const [chunkHistory, setChunkHistory] = useState<(string | null)[]>([])
const [chunkLoading, setChunkLoading] = useState(false)
const [chunkTextSearch, setChunkTextSearch] = useState('')
const [chunkTotalCount, setChunkTotalCount] = useState(0)
const [chunkCurrentPage, setChunkCurrentPage] = useState(0)
const [chunkNextOffset, setChunkNextOffset] = useState<string | null>(null)
const [expandedChunk, setExpandedChunk] = useState<number | null>(null)
// Chunk browser state is now in ChunkBrowserQA component
// DSFA corpus state
const [dsfaSources, setDsfaSources] = useState<DsfaSource[]>([])
@@ -2107,68 +1990,7 @@ export default function RAGPage() {
return () => clearInterval(interval)
}, [pipelineState?.started_at, pipelineState?.status])
const loadChunks = async (offset: string | null = null, newCollection?: string) => {
const col = newCollection || chunkCollection
setChunkLoading(true)
try {
const params = new URLSearchParams({
action: 'scroll',
collection: col,
limit: '20',
})
if (offset) params.append('offset', offset)
if (chunkTextSearch.trim()) params.append('text_search', chunkTextSearch.trim())
const res = await fetch(`${API_PROXY}?${params}`)
if (res.ok) {
const data = await res.json()
setChunkData(data.chunks || [])
setChunkNextOffset(data.next_offset || null)
setExpandedChunk(null)
}
} catch (error) {
console.error('Chunk scroll failed:', error)
} finally {
setChunkLoading(false)
}
}
const loadChunkCount = async (col: string) => {
try {
const res = await fetch(`${API_PROXY}?action=collection-count&collection=${encodeURIComponent(col)}`)
if (res.ok) {
const data = await res.json()
setChunkTotalCount(data.count || 0)
}
} catch { /* ignore */ }
}
const handleChunkCollectionChange = (col: string) => {
setChunkCollection(col)
setChunkOffset(null)
setChunkHistory([])
setChunkCurrentPage(0)
loadChunkCount(col)
loadChunks(null, col)
}
const handleChunkNext = () => {
if (!chunkNextOffset) return
setChunkHistory((prev) => [...prev, chunkOffset])
setChunkOffset(chunkNextOffset)
setChunkCurrentPage((p) => p + 1)
loadChunks(chunkNextOffset)
}
const handleChunkPrev = () => {
if (chunkHistory.length === 0) return
const prev = [...chunkHistory]
const prevOffset = prev.pop() ?? null
setChunkHistory(prev)
setChunkOffset(prevOffset)
setChunkCurrentPage((p) => Math.max(0, p - 1))
loadChunks(prevOffset)
}
// Chunk browser functions are now in ChunkBrowserQA component
const handleSearch = async () => {
if (!searchQuery.trim()) return
@@ -2611,10 +2433,6 @@ export default function RAGPage() {
<button
onClick={(e) => {
e.stopPropagation()
const ragEntry = REGULATIONS_IN_RAG[reg.code as keyof typeof REGULATIONS_IN_RAG]
const col = ragEntry?.collection || 'bp_compliance_gesetze'
setChunkTextSearch(reg.name)
handleChunkCollectionChange(col)
setActiveTab('chunks')
}}
className="text-teal-600 hover:text-teal-700 font-medium"
@@ -3263,172 +3081,7 @@ export default function RAGPage() {
)}
{activeTab === 'chunks' && (
<div className="space-y-6">
{/* Collection Selector + Controls */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Chunk-Browser</h3>
<div className="flex flex-wrap gap-4 items-end">
<div>
<label className="block text-sm font-medium text-slate-700 mb-1">Collection</label>
<select
value={chunkCollection}
onChange={(e) => handleChunkCollectionChange(e.target.value)}
className="px-3 py-2 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
>
<option value="bp_compliance_gesetze">bp_compliance_gesetze</option>
<option value="bp_compliance_ce">bp_compliance_ce</option>
<option value="bp_compliance_datenschutz">bp_compliance_datenschutz</option>
<option value="bp_dsfa_corpus">bp_dsfa_corpus</option>
<option value="bp_compliance_recht">bp_compliance_recht</option>
<option value="bp_legal_templates">bp_legal_templates</option>
<option value="bp_compliance_gdpr">bp_compliance_gdpr</option>
<option value="bp_compliance_schulrecht">bp_compliance_schulrecht</option>
<option value="bp_dsfa_templates">bp_dsfa_templates</option>
<option value="bp_dsfa_risks">bp_dsfa_risks</option>
</select>
</div>
<div className="flex-1 min-w-[200px]">
<label className="block text-sm font-medium text-slate-700 mb-1">Textsuche (filtert geladene Seite)</label>
<div className="flex gap-2">
<input
type="text"
value={chunkTextSearch}
onChange={(e) => setChunkTextSearch(e.target.value)}
onKeyDown={(e) => { if (e.key === 'Enter') loadChunks(null) }}
placeholder="z.B. DSGVO, IFRS, Maschinenverordnung..."
className="flex-1 px-3 py-2 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
/>
<button
onClick={() => { setChunkOffset(null); setChunkHistory([]); setChunkCurrentPage(0); loadChunks(null) }}
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700"
>
Laden
</button>
</div>
</div>
<div className="text-sm text-slate-500">
{chunkTotalCount > 0 && <span>{chunkTotalCount.toLocaleString()} Chunks total</span>}
</div>
</div>
</div>
{/* Pagination */}
{chunkData.length > 0 && (
<div className="flex items-center justify-between">
<button
onClick={handleChunkPrev}
disabled={chunkCurrentPage === 0}
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Zurueck
</button>
<span className="text-sm text-slate-600">
Seite {chunkCurrentPage + 1} {chunkData.length} Chunks angezeigt
</span>
<button
onClick={handleChunkNext}
disabled={!chunkNextOffset}
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Weiter
</button>
</div>
)}
{/* Chunk List */}
{chunkLoading ? (
<div className="text-center py-12 text-slate-500">Chunks werden geladen...</div>
) : chunkData.length === 0 ? (
<div className="text-center py-12 text-slate-400">
Collection waehlen und &quot;Laden&quot; klicken um Chunks anzuzeigen.
</div>
) : (
<div className="space-y-2">
{chunkData.map((chunk, i) => {
const text = String(chunk.text || chunk.content || chunk.chunk_text || '')
const source = String(chunk.source_name || chunk.regulation_code || chunk.guideline_name || chunk.regulation_short || '')
const isExpanded = expandedChunk === i
const highlightTerm = chunkTextSearch.trim().toLowerCase()
const renderHighlighted = (str: string) => {
if (!highlightTerm) return str
const idx = str.toLowerCase().indexOf(highlightTerm)
if (idx === -1) return str
return (
<>
{str.slice(0, idx)}
<mark className="bg-yellow-200 px-0.5 rounded">{str.slice(idx, idx + highlightTerm.length)}</mark>
{str.slice(idx + highlightTerm.length)}
</>
)
}
return (
<div
key={String(chunk.id || i)}
className={`bg-white rounded-lg border transition-all cursor-pointer ${
isExpanded ? 'border-teal-300 shadow-md' : 'border-slate-200 hover:border-slate-300'
}`}
onClick={() => setExpandedChunk(isExpanded ? null : i)}
>
<div className="px-4 py-3">
<div className="flex items-center gap-2 mb-1">
<span className="text-xs font-mono text-slate-400">#{chunkCurrentPage * 20 + i + 1}</span>
{source && (
<span className="px-2 py-0.5 text-xs rounded bg-slate-100 text-slate-600">{source}</span>
)}
{chunk.article && (
<span className="text-xs text-slate-500">Art. {String(chunk.article)}</span>
)}
{chunk.language && (
<span className="text-xs text-slate-400 ml-auto">{String(chunk.language).toUpperCase()}</span>
)}
</div>
<p className={`text-sm text-slate-700 ${isExpanded ? '' : 'line-clamp-3'}`}>
{renderHighlighted(text)}
</p>
</div>
{isExpanded && (
<div className="px-4 py-3 border-t border-slate-100 bg-slate-50 text-xs text-slate-500 space-y-1">
<div className="grid grid-cols-2 md:grid-cols-4 gap-2">
{Object.entries(chunk).filter(([k]) => !['text', 'content', 'chunk_text', 'id'].includes(k)).map(([k, v]) => (
<div key={k}>
<span className="font-medium text-slate-600">{k}:</span>{' '}
<span>{String(v)}</span>
</div>
))}
</div>
</div>
)}
</div>
)
})}
</div>
)}
{/* Bottom Pagination */}
{chunkData.length > 0 && (
<div className="flex items-center justify-between">
<button
onClick={handleChunkPrev}
disabled={chunkCurrentPage === 0}
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Zurueck
</button>
<span className="text-sm text-slate-600">
Seite {chunkCurrentPage + 1}
</span>
<button
onClick={handleChunkNext}
disabled={!chunkNextOffset}
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Weiter
</button>
</div>
)}
</div>
<ChunkBrowserQA apiProxy={API_PROXY} />
)}
{activeTab === 'data' && (