feat(rag): add QA Split-View Chunk-Browser for ingestion verification

New ChunkBrowserQA component replaces inline chunk browser with:
- Document sidebar with live chunk counts per regulation (batched Qdrant count API)
- Sequential chunk navigation with arrow keys (1/N through all chunks of a document)
- Overlap display showing previous/next chunk boundaries (amber-highlighted)
- Split-view with original PDF via iframe (estimated page from chunk index)
- Adjustable chunks-per-page ratio for PDF page estimation

Extracts REGULATIONS_IN_RAG and REGULATION_INFO to shared rag-constants.ts.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-28 17:46:11 +01:00
parent 984dfab975
commit 8c42fefa77
6 changed files with 1377 additions and 355 deletions

View File

@@ -0,0 +1,552 @@
'use client'
import React, { useState, useEffect, useCallback, useRef } from 'react'
import { RAG_PDF_MAPPING } from './rag-pdf-mapping'
import { REGULATIONS_IN_RAG, REGULATION_INFO } from '../rag-constants'
interface ChunkBrowserQAProps {
apiProxy: string
}
type RegGroupKey = 'eu_regulation' | 'eu_directive' | 'de_law' | 'at_law' | 'ch_law' | 'national_law' | 'bsi_standard' | 'eu_guideline' | 'international_standard' | 'other'
const GROUP_LABELS: Record<RegGroupKey, string> = {
eu_regulation: 'EU Verordnungen',
eu_directive: 'EU Richtlinien',
de_law: 'DE Gesetze',
at_law: 'AT Gesetze',
ch_law: 'CH Gesetze',
national_law: 'Nationale Gesetze (EU)',
bsi_standard: 'BSI Standards',
eu_guideline: 'EDPB / Guidelines',
international_standard: 'Internationale Standards',
other: 'Sonstige',
}
const GROUP_ORDER: RegGroupKey[] = [
'eu_regulation', 'eu_directive', 'de_law', 'at_law', 'ch_law',
'national_law', 'bsi_standard', 'eu_guideline', 'international_standard', 'other',
]
const COLLECTIONS = [
'bp_compliance_gesetze',
'bp_compliance_ce',
'bp_compliance_datenschutz',
]
export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {
// Filter-Sidebar
const [selectedRegulation, setSelectedRegulation] = useState<string | null>(null)
const [regulationCounts, setRegulationCounts] = useState<Record<string, number>>({})
const [filterSearch, setFilterSearch] = useState('')
const [countsLoading, setCountsLoading] = useState(false)
// Dokument-Chunks (sequenziell)
const [docChunks, setDocChunks] = useState<Record<string, unknown>[]>([])
const [docChunkIndex, setDocChunkIndex] = useState(0)
const [docTotalChunks, setDocTotalChunks] = useState(0)
const [docLoading, setDocLoading] = useState(false)
const docChunksRef = useRef(docChunks)
docChunksRef.current = docChunks
// Split-View
const [splitViewActive, setSplitViewActive] = useState(true)
const [chunksPerPage, setChunksPerPage] = useState(6)
// Collection
const [collection, setCollection] = useState('bp_compliance_gesetze')
// Sidebar collapsed groups
const [collapsedGroups, setCollapsedGroups] = useState<Set<string>>(new Set())
// Build grouped regulations for sidebar
const regulationsInCollection = Object.entries(REGULATIONS_IN_RAG)
.filter(([, info]) => info.collection === collection)
.map(([code]) => code)
const groupedRegulations = React.useMemo(() => {
const groups: Record<RegGroupKey, { code: string; name: string; type: string }[]> = {
eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [],
national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [],
}
for (const code of regulationsInCollection) {
const reg = REGULATION_INFO.find(r => r.code === code)
const type = (reg?.type || 'other') as RegGroupKey
const groupKey = type in groups ? type : 'other'
groups[groupKey].push({
code,
name: reg?.name || code,
type: reg?.type || 'unknown',
})
}
return groups
}, [regulationsInCollection.join(',')])
// Load regulation counts for current collection
const loadRegulationCounts = useCallback(async (col: string) => {
const codes = Object.entries(REGULATIONS_IN_RAG)
.filter(([, info]) => info.collection === col)
.map(([code]) => code)
if (codes.length === 0) return
setCountsLoading(true)
try {
const params = new URLSearchParams({
action: 'regulation-counts-batch',
collection: col,
codes: codes.join(','),
})
const res = await fetch(`${apiProxy}?${params}`)
if (res.ok) {
const data = await res.json()
setRegulationCounts(prev => ({ ...prev, ...data.counts }))
}
} catch (error) {
console.error('Failed to load regulation counts:', error)
} finally {
setCountsLoading(false)
}
}, [apiProxy])
// Load all chunks for a regulation (paginated scroll)
const loadDocumentChunks = useCallback(async (regulationCode: string) => {
const ragInfo = REGULATIONS_IN_RAG[regulationCode]
if (!ragInfo) return
setDocLoading(true)
setDocChunks([])
setDocChunkIndex(0)
setDocTotalChunks(0)
const allChunks: Record<string, unknown>[] = []
let offset: string | null = null
try {
// Paginated scroll, 100 at a time
let safety = 0
do {
const params = new URLSearchParams({
action: 'scroll',
collection: ragInfo.collection,
limit: '100',
filter_key: 'regulation_code',
filter_value: regulationCode,
})
if (offset) params.append('offset', offset)
const res = await fetch(`${apiProxy}?${params}`)
if (!res.ok) break
const data = await res.json()
const chunks = data.chunks || []
allChunks.push(...chunks)
offset = data.next_offset || null
safety++
} while (offset && safety < 200) // safety limit ~20k chunks
// Sort by chunk_index
allChunks.sort((a, b) => {
const ai = Number(a.chunk_index ?? a.chunk_id ?? 0)
const bi = Number(b.chunk_index ?? b.chunk_id ?? 0)
return ai - bi
})
setDocChunks(allChunks)
setDocTotalChunks(allChunks.length)
setDocChunkIndex(0)
} catch (error) {
console.error('Failed to load document chunks:', error)
} finally {
setDocLoading(false)
}
}, [apiProxy])
// Initial load
useEffect(() => {
loadRegulationCounts(collection)
}, [collection, loadRegulationCounts])
// Current chunk
const currentChunk = docChunks[docChunkIndex] || null
const prevChunk = docChunkIndex > 0 ? docChunks[docChunkIndex - 1] : null
const nextChunk = docChunkIndex < docChunks.length - 1 ? docChunks[docChunkIndex + 1] : null
// PDF page estimation
const estimatePdfPage = (chunkIndex: number): number => {
const mapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null
const cpp = mapping?.chunksPerPage || chunksPerPage
return Math.floor(chunkIndex / cpp) + 1
}
const pdfPage = currentChunk ? estimatePdfPage(docChunkIndex) : 1
const pdfMapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null
const pdfUrl = pdfMapping ? `/rag-originals/${pdfMapping.filename}#page=${pdfPage}` : null
// Handlers
const handleSelectRegulation = (code: string) => {
setSelectedRegulation(code)
loadDocumentChunks(code)
}
const handleCollectionChange = (col: string) => {
setCollection(col)
setSelectedRegulation(null)
setDocChunks([])
setDocChunkIndex(0)
setDocTotalChunks(0)
setRegulationCounts({})
}
const handlePrev = () => {
if (docChunkIndex > 0) setDocChunkIndex(i => i - 1)
}
const handleNext = () => {
if (docChunkIndex < docChunks.length - 1) setDocChunkIndex(i => i + 1)
}
const handleKeyDown = useCallback((e: KeyboardEvent) => {
if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') {
e.preventDefault()
setDocChunkIndex(i => Math.max(0, i - 1))
} else if (e.key === 'ArrowRight' || e.key === 'ArrowDown') {
e.preventDefault()
setDocChunkIndex(i => Math.min(docChunksRef.current.length - 1, i + 1))
}
}, [])
useEffect(() => {
if (selectedRegulation && docChunks.length > 0) {
window.addEventListener('keydown', handleKeyDown)
return () => window.removeEventListener('keydown', handleKeyDown)
}
}, [selectedRegulation, docChunks.length, handleKeyDown])
const toggleGroup = (group: string) => {
setCollapsedGroups(prev => {
const next = new Set(prev)
if (next.has(group)) next.delete(group)
else next.add(group)
return next
})
}
// Get text content from a chunk
const getChunkText = (chunk: Record<string, unknown> | null): string => {
if (!chunk) return ''
return String(chunk.text || chunk.content || chunk.chunk_text || '')
}
// Overlap extraction
const getOverlapPrev = (): string => {
if (!prevChunk) return ''
const text = getChunkText(prevChunk)
return text.length > 150 ? '...' + text.slice(-150) : text
}
const getOverlapNext = (): string => {
if (!nextChunk) return ''
const text = getChunkText(nextChunk)
return text.length > 150 ? text.slice(0, 150) + '...' : text
}
// Filter sidebar items
const filteredRegulations = React.useMemo(() => {
if (!filterSearch.trim()) return groupedRegulations
const term = filterSearch.toLowerCase()
const filtered: typeof groupedRegulations = {
eu_regulation: [], eu_directive: [], de_law: [], at_law: [], ch_law: [],
national_law: [], bsi_standard: [], eu_guideline: [], international_standard: [], other: [],
}
for (const [group, items] of Object.entries(groupedRegulations)) {
filtered[group as RegGroupKey] = items.filter(
r => r.code.toLowerCase().includes(term) || r.name.toLowerCase().includes(term)
)
}
return filtered
}, [groupedRegulations, filterSearch])
// Regulation name lookup
const getRegName = (code: string): string => {
const reg = REGULATION_INFO.find(r => r.code === code)
return reg?.name || code
}
return (
<div className="space-y-4">
{/* Header bar */}
<div className="bg-white rounded-xl border border-slate-200 p-4">
<div className="flex flex-wrap items-center gap-4">
<div>
<label className="block text-xs font-medium text-slate-500 mb-1">Collection</label>
<select
value={collection}
onChange={(e) => handleCollectionChange(e.target.value)}
className="px-3 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
>
{COLLECTIONS.map(c => (
<option key={c} value={c}>{c}</option>
))}
</select>
</div>
{selectedRegulation && (
<>
<div className="flex items-center gap-2">
<span className="text-sm font-semibold text-slate-900">
QA-Modus: {selectedRegulation} {getRegName(selectedRegulation)}
</span>
</div>
<div className="flex items-center gap-2 ml-auto">
<span className="text-sm text-slate-600">
Chunk {docChunkIndex + 1} / {docTotalChunks}
</span>
<button
onClick={handlePrev}
disabled={docChunkIndex === 0}
className="px-3 py-1 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
&#9664; Prev
</button>
<button
onClick={handleNext}
disabled={docChunkIndex >= docChunks.length - 1}
className="px-3 py-1 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Next &#9654;
</button>
</div>
<div className="flex items-center gap-2">
<label className="text-xs text-slate-500">Chunks/Seite:</label>
<select
value={chunksPerPage}
onChange={(e) => setChunksPerPage(Number(e.target.value))}
className="px-2 py-1 border rounded text-xs"
>
{[3, 4, 5, 6, 8, 10, 12, 15, 20].map(n => (
<option key={n} value={n}>{n}</option>
))}
</select>
<button
onClick={() => setSplitViewActive(!splitViewActive)}
className={`px-3 py-1 text-xs rounded-lg border ${
splitViewActive ? 'bg-teal-50 border-teal-300 text-teal-700' : 'bg-slate-50 border-slate-300 text-slate-600'
}`}
>
{splitViewActive ? 'Split-View an' : 'Split-View aus'}
</button>
</div>
</>
)}
</div>
</div>
{/* Main content: Sidebar + Content */}
<div className="flex gap-4" style={{ minHeight: '70vh' }}>
{/* Sidebar */}
<div className="w-64 flex-shrink-0 bg-white rounded-xl border border-slate-200 overflow-hidden flex flex-col">
<div className="p-3 border-b border-slate-100">
<input
type="text"
value={filterSearch}
onChange={(e) => setFilterSearch(e.target.value)}
placeholder="Suche..."
className="w-full px-2 py-1.5 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
/>
{countsLoading && (
<div className="text-xs text-slate-400 mt-1">Counts werden geladen...</div>
)}
</div>
<div className="flex-1 overflow-y-auto">
{GROUP_ORDER.map(group => {
const items = filteredRegulations[group]
if (items.length === 0) return null
const isCollapsed = collapsedGroups.has(group)
return (
<div key={group}>
<button
onClick={() => toggleGroup(group)}
className="w-full px-3 py-1.5 text-left text-xs font-semibold text-slate-500 bg-slate-50 hover:bg-slate-100 flex items-center justify-between"
>
<span>{GROUP_LABELS[group]}</span>
<span className="text-slate-400">{isCollapsed ? '+' : '-'}</span>
</button>
{!isCollapsed && items.map(reg => {
const count = regulationCounts[reg.code] ?? REGULATIONS_IN_RAG[reg.code]?.chunks ?? 0
const isSelected = selectedRegulation === reg.code
return (
<button
key={reg.code}
onClick={() => handleSelectRegulation(reg.code)}
className={`w-full px-3 py-1.5 text-left text-sm flex items-center justify-between hover:bg-teal-50 transition-colors ${
isSelected ? 'bg-teal-100 text-teal-900 font-medium' : 'text-slate-700'
}`}
>
<span className="truncate">{reg.code}</span>
<span className={`text-xs tabular-nums ${count > 0 ? 'text-slate-500' : 'text-slate-300'}`}>
{count > 0 ? count.toLocaleString() : '—'}
</span>
</button>
)
})}
</div>
)
})}
</div>
</div>
{/* Content area */}
{!selectedRegulation ? (
<div className="flex-1 flex items-center justify-center bg-white rounded-xl border border-slate-200">
<div className="text-center text-slate-400 space-y-2">
<div className="text-4xl">&#128269;</div>
<p className="text-sm">Waehle ein Dokument in der Sidebar, um die QA-Ansicht zu starten.</p>
<p className="text-xs text-slate-300">Pfeiltasten navigieren zwischen Chunks.</p>
</div>
</div>
) : docLoading ? (
<div className="flex-1 flex items-center justify-center bg-white rounded-xl border border-slate-200">
<div className="text-center text-slate-500 space-y-2">
<div className="animate-spin text-3xl">&#9881;</div>
<p className="text-sm">Chunks werden geladen...</p>
<p className="text-xs text-slate-400">
{selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks || '?'} Chunks erwartet
</p>
</div>
</div>
) : (
<div className={`flex-1 grid gap-4 ${splitViewActive ? 'grid-cols-2' : 'grid-cols-1'}`}>
{/* Chunk-Text Panel */}
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden flex flex-col">
<div className="px-4 py-2 bg-slate-50 border-b border-slate-100 flex items-center justify-between">
<span className="text-sm font-medium text-slate-700">Chunk-Text</span>
<span className="text-xs text-slate-400">
Index: {docChunkIndex} / {docTotalChunks - 1}
</span>
</div>
<div className="flex-1 overflow-y-auto p-4 space-y-3">
{/* Overlap from previous chunk */}
{prevChunk && (
<div className="text-xs text-slate-400 bg-amber-50 border-l-2 border-amber-300 px-3 py-2 rounded-r">
<div className="font-medium text-amber-600 mb-1">&#8593; Overlap (vorheriger Chunk #{docChunkIndex - 1})</div>
<p className="whitespace-pre-wrap break-words leading-relaxed">{getOverlapPrev()}</p>
</div>
)}
{/* Current chunk text */}
{currentChunk && (
<div className="text-sm text-slate-800 whitespace-pre-wrap break-words leading-relaxed border-l-2 border-teal-400 pl-3">
{getChunkText(currentChunk)}
</div>
)}
{/* Overlap from next chunk */}
{nextChunk && (
<div className="text-xs text-slate-400 bg-amber-50 border-l-2 border-amber-300 px-3 py-2 rounded-r">
<div className="font-medium text-amber-600 mb-1">&#8595; Overlap (naechster Chunk #{docChunkIndex + 1})</div>
<p className="whitespace-pre-wrap break-words leading-relaxed">{getOverlapNext()}</p>
</div>
)}
{/* Metadata */}
{currentChunk && (
<div className="mt-4 pt-3 border-t border-slate-100">
<div className="text-xs font-medium text-slate-500 mb-2">Metadaten</div>
<div className="grid grid-cols-2 gap-x-4 gap-y-1 text-xs">
{Object.entries(currentChunk)
.filter(([k]) => !['text', 'content', 'chunk_text', 'id'].includes(k))
.map(([k, v]) => (
<div key={k} className="flex gap-1">
<span className="font-medium text-slate-500">{k}:</span>
<span className="text-slate-700 truncate">{String(v)}</span>
</div>
))}
</div>
</div>
)}
</div>
{/* Bottom nav */}
<div className="px-4 py-2 border-t border-slate-100 bg-slate-50 flex items-center justify-between">
<button
onClick={handlePrev}
disabled={docChunkIndex === 0}
className="px-3 py-1 text-xs border rounded bg-white hover:bg-slate-50 disabled:opacity-30"
>
&#9664; Zurueck
</button>
<div className="flex items-center gap-3">
<input
type="number"
min={0}
max={docTotalChunks - 1}
value={docChunkIndex}
onChange={(e) => {
const v = parseInt(e.target.value, 10)
if (!isNaN(v) && v >= 0 && v < docTotalChunks) setDocChunkIndex(v)
}}
className="w-20 px-2 py-1 border rounded text-xs text-center"
/>
<span className="text-xs text-slate-400">/ {docTotalChunks - 1}</span>
</div>
<button
onClick={handleNext}
disabled={docChunkIndex >= docChunks.length - 1}
className="px-3 py-1 text-xs border rounded bg-white hover:bg-slate-50 disabled:opacity-30"
>
Weiter &#9654;
</button>
</div>
</div>
{/* PDF-Viewer Panel */}
{splitViewActive && (
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden flex flex-col">
<div className="px-4 py-2 bg-slate-50 border-b border-slate-100 flex items-center justify-between">
<span className="text-sm font-medium text-slate-700">Original-PDF</span>
<span className="text-xs text-slate-400">
Seite ~{pdfPage}
{pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''}
</span>
</div>
<div className="flex-1 relative">
{pdfUrl ? (
<iframe
key={`${selectedRegulation}-${pdfPage}`}
src={pdfUrl}
className="absolute inset-0 w-full h-full border-0"
title="Original PDF"
/>
) : (
<div className="flex items-center justify-center h-full text-slate-400 text-sm">
<div className="text-center space-y-2">
<div className="text-3xl">&#128196;</div>
<p>Kein PDF-Mapping fuer {selectedRegulation}.</p>
<p className="text-xs">Bitte rag-pdf-mapping.ts ergaenzen und PDF in ~/rag-originals/ ablegen.</p>
</div>
</div>
)}
</div>
{pdfUrl && (
<div className="px-4 py-2 border-t border-slate-100 bg-slate-50 flex items-center justify-between">
<span className="text-xs text-slate-500">
{pdfMapping?.filename}
</span>
<a
href={pdfUrl.split('#')[0]}
target="_blank"
rel="noopener noreferrer"
className="text-xs text-teal-600 hover:text-teal-800"
>
PDF oeffnen &#8599;
</a>
</div>
)}
</div>
)}
</div>
)}
</div>
</div>
)
}

View File

@@ -0,0 +1,110 @@
export interface RagPdfMapping {
filename: string
totalPages?: number
chunksPerPage?: number
language: string
}
export const RAG_PDF_MAPPING: Record<string, RagPdfMapping> = {
// EU Verordnungen
GDPR: { filename: 'GDPR_DE.pdf', language: 'de', totalPages: 88 },
EPRIVACY: { filename: 'EPRIVACY_DE.pdf', language: 'de' },
SCC: { filename: 'SCC_DE.pdf', language: 'de' },
SCC_FULL_TEXT: { filename: 'SCC_FULL_TEXT_DE.pdf', language: 'de' },
AIACT: { filename: 'AIACT_DE.pdf', language: 'de', totalPages: 144 },
CRA: { filename: 'CRA_DE.pdf', language: 'de' },
NIS2: { filename: 'NIS2_DE.pdf', language: 'de' },
DGA: { filename: 'DGA_DE.pdf', language: 'de' },
DSA: { filename: 'DSA_DE.pdf', language: 'de' },
PLD: { filename: 'PLD_DE.pdf', language: 'de' },
E_COMMERCE_RL: { filename: 'E_COMMERCE_RL_DE.pdf', language: 'de' },
VERBRAUCHERRECHTE_RL: { filename: 'VERBRAUCHERRECHTE_RL_DE.pdf', language: 'de' },
DIGITALE_INHALTE_RL: { filename: 'DIGITALE_INHALTE_RL_DE.pdf', language: 'de' },
DMA: { filename: 'DMA_DE.pdf', language: 'de' },
DPF: { filename: 'DPF_DE.pdf', language: 'de' },
EUCSA: { filename: 'EUCSA_DE.pdf', language: 'de' },
DATAACT: { filename: 'DATAACT_DE.pdf', language: 'de' },
DORA: { filename: 'DORA_DE.pdf', language: 'de' },
PSD2: { filename: 'PSD2_DE.pdf', language: 'de' },
AMLR: { filename: 'AMLR_DE.pdf', language: 'de' },
MiCA: { filename: 'MiCA_DE.pdf', language: 'de' },
EHDS: { filename: 'EHDS_DE.pdf', language: 'de' },
EAA: { filename: 'EAA_DE.pdf', language: 'de' },
DSM: { filename: 'DSM_DE.pdf', language: 'de' },
GPSR: { filename: 'GPSR_DE.pdf', language: 'de' },
MACHINERY_REG: { filename: 'MACHINERY_REG_DE.pdf', language: 'de' },
BLUE_GUIDE: { filename: 'BLUE_GUIDE_DE.pdf', language: 'de' },
// DE Gesetze
TDDDG: { filename: 'TDDDG_DE.pdf', language: 'de' },
BDSG_FULL: { filename: 'BDSG_FULL_DE.pdf', language: 'de' },
DE_DDG: { filename: 'DE_DDG.pdf', language: 'de' },
DE_BGB_AGB: { filename: 'DE_BGB_AGB.pdf', language: 'de' },
DE_EGBGB: { filename: 'DE_EGBGB.pdf', language: 'de' },
DE_HGB_RET: { filename: 'DE_HGB_RET.pdf', language: 'de' },
DE_AO_RET: { filename: 'DE_AO_RET.pdf', language: 'de' },
DE_UWG: { filename: 'DE_UWG.pdf', language: 'de' },
DE_TKG: { filename: 'DE_TKG.pdf', language: 'de' },
DE_PANGV: { filename: 'DE_PANGV.pdf', language: 'de' },
DE_DLINFOV: { filename: 'DE_DLINFOV.pdf', language: 'de' },
DE_BETRVG: { filename: 'DE_BETRVG.pdf', language: 'de' },
DE_GESCHGEHG: { filename: 'DE_GESCHGEHG.pdf', language: 'de' },
DE_BSIG: { filename: 'DE_BSIG.pdf', language: 'de' },
DE_USTG_RET: { filename: 'DE_USTG_RET.pdf', language: 'de' },
// BSI Standards
'BSI-TR-03161-1': { filename: 'BSI-TR-03161-1.pdf', language: 'de' },
'BSI-TR-03161-2': { filename: 'BSI-TR-03161-2.pdf', language: 'de' },
'BSI-TR-03161-3': { filename: 'BSI-TR-03161-3.pdf', language: 'de' },
// AT Gesetze
AT_DSG: { filename: 'AT_DSG.pdf', language: 'de' },
AT_DSG_FULL: { filename: 'AT_DSG_FULL.pdf', language: 'de' },
AT_ECG: { filename: 'AT_ECG.pdf', language: 'de' },
AT_TKG: { filename: 'AT_TKG.pdf', language: 'de' },
AT_KSCHG: { filename: 'AT_KSCHG.pdf', language: 'de' },
AT_FAGG: { filename: 'AT_FAGG.pdf', language: 'de' },
AT_UGB_RET: { filename: 'AT_UGB_RET.pdf', language: 'de' },
AT_BAO_RET: { filename: 'AT_BAO_RET.pdf', language: 'de' },
AT_MEDIENG: { filename: 'AT_MEDIENG.pdf', language: 'de' },
AT_ABGB_AGB: { filename: 'AT_ABGB_AGB.pdf', language: 'de' },
AT_UWG: { filename: 'AT_UWG.pdf', language: 'de' },
// CH Gesetze
CH_DSG: { filename: 'CH_DSG.pdf', language: 'de' },
CH_DSV: { filename: 'CH_DSV.pdf', language: 'de' },
CH_OR_AGB: { filename: 'CH_OR_AGB.pdf', language: 'de' },
CH_UWG: { filename: 'CH_UWG.pdf', language: 'de' },
CH_FMG: { filename: 'CH_FMG.pdf', language: 'de' },
CH_GEBUV: { filename: 'CH_GEBUV.pdf', language: 'de' },
CH_ZERTES: { filename: 'CH_ZERTES.pdf', language: 'de' },
CH_ZGB_PERS: { filename: 'CH_ZGB_PERS.pdf', language: 'de' },
// LI
LI_DSG: { filename: 'LI_DSG.pdf', language: 'de' },
// Nationale DSG (andere EU)
ES_LOPDGDD: { filename: 'ES_LOPDGDD.pdf', language: 'es' },
IT_CODICE_PRIVACY: { filename: 'IT_CODICE_PRIVACY.pdf', language: 'it' },
NL_UAVG: { filename: 'NL_UAVG.pdf', language: 'nl' },
FR_CNIL_GUIDE: { filename: 'FR_CNIL_GUIDE.pdf', language: 'fr' },
IE_DPA_2018: { filename: 'IE_DPA_2018.pdf', language: 'en' },
UK_DPA_2018: { filename: 'UK_DPA_2018.pdf', language: 'en' },
UK_GDPR: { filename: 'UK_GDPR.pdf', language: 'en' },
NO_PERSONOPPLYSNINGSLOVEN: { filename: 'NO_PERSONOPPLYSNINGSLOVEN.pdf', language: 'no' },
SE_DATASKYDDSLAG: { filename: 'SE_DATASKYDDSLAG.pdf', language: 'sv' },
PL_UODO: { filename: 'PL_UODO.pdf', language: 'pl' },
CZ_ZOU: { filename: 'CZ_ZOU.pdf', language: 'cs' },
HU_INFOTV: { filename: 'HU_INFOTV.pdf', language: 'hu' },
BE_DPA_LAW: { filename: 'BE_DPA_LAW.pdf', language: 'nl' },
FI_TIETOSUOJALAKI: { filename: 'FI_TIETOSUOJALAKI.pdf', language: 'fi' },
DK_DATABESKYTTELSESLOVEN: { filename: 'DK_DATABESKYTTELSESLOVEN.pdf', language: 'da' },
LU_DPA_LAW: { filename: 'LU_DPA_LAW.pdf', language: 'fr' },
// EDPB Guidelines
EDPB_GUIDELINES_5_2020: { filename: 'EDPB_GUIDELINES_5_2020.pdf', language: 'en' },
EDPB_GUIDELINES_7_2020: { filename: 'EDPB_GUIDELINES_7_2020.pdf', language: 'en' },
// Frameworks
ENISA_SECURE_BY_DESIGN: { filename: 'ENISA_SECURE_BY_DESIGN.pdf', language: 'en' },
ENISA_SUPPLY_CHAIN: { filename: 'ENISA_SUPPLY_CHAIN.pdf', language: 'en' },
NIST_SSDF: { filename: 'NIST_SSDF.pdf', language: 'en' },
NIST_CSF_2: { filename: 'NIST_CSF_2.pdf', language: 'en' },
OECD_AI_PRINCIPLES: { filename: 'OECD_AI_PRINCIPLES.pdf', language: 'en' },
// EU-IFRS / EFRAG
EU_IFRS_DE: { filename: 'EU_IFRS_DE.pdf', language: 'de' },
EU_IFRS_EN: { filename: 'EU_IFRS_EN.pdf', language: 'en' },
EFRAG_ENDORSEMENT: { filename: 'EFRAG_ENDORSEMENT.pdf', language: 'en' },
}

View File

@@ -11,6 +11,8 @@ import React, { useState, useEffect, useCallback } from 'react'
import Link from 'next/link'
import { PagePurpose } from '@/components/common/PagePurpose'
import { AIModuleSidebarResponsive } from '@/components/ai/AIModuleSidebar'
import { REGULATIONS_IN_RAG } from './rag-constants'
import { ChunkBrowserQA } from './components/ChunkBrowserQA'
// API uses local proxy route to klausur-service
const API_PROXY = '/api/legal-corpus'
@@ -1374,116 +1376,7 @@ const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string
DMA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
}
// Regulations that are currently ingested in RAG (Qdrant collections)
// Updated: 2026-02-27 — Aktualisieren wenn neue Dokumente ingestiert werden!
const REGULATIONS_IN_RAG: Record<string, { collection: string; chunks: number }> = {
// EU Verordnungen/Richtlinien (bp_compliance_ce: 7.341 total)
GDPR: { collection: 'bp_compliance_ce', chunks: 1842 },
EPRIVACY: { collection: 'bp_compliance_ce', chunks: 156 },
SCC: { collection: 'bp_compliance_ce', chunks: 89 },
SCC_FULL_TEXT: { collection: 'bp_compliance_ce', chunks: 154 },
AIACT: { collection: 'bp_compliance_ce', chunks: 1245 },
CRA: { collection: 'bp_compliance_ce', chunks: 687 },
NIS2: { collection: 'bp_compliance_ce', chunks: 534 },
DGA: { collection: 'bp_compliance_ce', chunks: 312 },
DSA: { collection: 'bp_compliance_ce', chunks: 978 },
PLD: { collection: 'bp_compliance_ce', chunks: 124 },
E_COMMERCE_RL: { collection: 'bp_compliance_ce', chunks: 198 },
VERBRAUCHERRECHTE_RL: { collection: 'bp_compliance_ce', chunks: 245 },
DIGITALE_INHALTE_RL: { collection: 'bp_compliance_ce', chunks: 187 },
DMA: { collection: 'bp_compliance_ce', chunks: 590 },
// DE Gesetze (bp_compliance_gesetze: 33.929 total)
TDDDG: { collection: 'bp_compliance_gesetze', chunks: 215 },
BDSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 487 },
DE_DDG: { collection: 'bp_compliance_gesetze', chunks: 198 },
DE_BGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 4250 },
DE_EGBGB: { collection: 'bp_compliance_gesetze', chunks: 312 },
DE_HGB_RET: { collection: 'bp_compliance_gesetze', chunks: 6840 },
DE_AO_RET: { collection: 'bp_compliance_gesetze', chunks: 5620 },
// BSI Standards (bp_compliance_gesetze)
'BSI-TR-03161-1': { collection: 'bp_compliance_gesetze', chunks: 425 },
'BSI-TR-03161-2': { collection: 'bp_compliance_gesetze', chunks: 380 },
'BSI-TR-03161-3': { collection: 'bp_compliance_gesetze', chunks: 345 },
// Nationale Datenschutzgesetze (bp_compliance_gesetze)
AT_DSG: { collection: 'bp_compliance_gesetze', chunks: 287 },
CH_DSG: { collection: 'bp_compliance_gesetze', chunks: 156 },
ES_LOPDGDD: { collection: 'bp_compliance_gesetze', chunks: 1245 },
IT_CODICE_PRIVACY: { collection: 'bp_compliance_gesetze', chunks: 198 },
NL_UAVG: { collection: 'bp_compliance_gesetze', chunks: 1320 },
FR_CNIL_GUIDE: { collection: 'bp_compliance_gesetze', chunks: 1450 },
IE_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 534 },
UK_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 1680 },
UK_GDPR: { collection: 'bp_compliance_gesetze', chunks: 890 },
NO_PERSONOPPLYSNINGSLOVEN: { collection: 'bp_compliance_gesetze', chunks: 245 },
SE_DATASKYDDSLAG: { collection: 'bp_compliance_gesetze', chunks: 167 },
PL_UODO: { collection: 'bp_compliance_gesetze', chunks: 198 },
CZ_ZOU: { collection: 'bp_compliance_gesetze', chunks: 1120 },
HU_INFOTV: { collection: 'bp_compliance_gesetze', chunks: 1345 },
// EDPB Guidelines (bp_compliance_datenschutz)
EDPB_GUIDELINES_5_2020: { collection: 'bp_compliance_datenschutz', chunks: 245 },
EDPB_GUIDELINES_7_2020: { collection: 'bp_compliance_datenschutz', chunks: 347 },
// === Neue Regulierungen (2026-02-28) ===
// EU CE-Regulierungen (bp_compliance_ce)
DPF: { collection: 'bp_compliance_ce', chunks: 1232 },
EUCSA: { collection: 'bp_compliance_ce', chunks: 558 },
DATAACT: { collection: 'bp_compliance_ce', chunks: 809 },
DORA: { collection: 'bp_compliance_ce', chunks: 823 },
PSD2: { collection: 'bp_compliance_ce', chunks: 796 },
AMLR: { collection: 'bp_compliance_ce', chunks: 1182 },
MiCA: { collection: 'bp_compliance_ce', chunks: 1640 },
EHDS: { collection: 'bp_compliance_ce', chunks: 1212 },
EAA: { collection: 'bp_compliance_ce', chunks: 433 },
DSM: { collection: 'bp_compliance_ce', chunks: 416 },
GPSR: { collection: 'bp_compliance_ce', chunks: 509 },
// DE Gesetze (bp_compliance_gesetze)
DE_UWG: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_TKG: { collection: 'bp_compliance_gesetze', chunks: 1631 },
DE_PANGV: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_DLINFOV: { collection: 'bp_compliance_gesetze', chunks: 21 },
DE_BETRVG: { collection: 'bp_compliance_gesetze', chunks: 498 },
DE_GESCHGEHG: { collection: 'bp_compliance_gesetze', chunks: 63 },
DE_BSIG: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_USTG_RET: { collection: 'bp_compliance_gesetze', chunks: 1071 },
// AT Gesetze (bp_compliance_gesetze)
AT_DSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 6 },
LI_DSG: { collection: 'bp_compliance_gesetze', chunks: 2 },
AT_ECG: { collection: 'bp_compliance_gesetze', chunks: 120 },
AT_TKG: { collection: 'bp_compliance_gesetze', chunks: 2174 },
AT_KSCHG: { collection: 'bp_compliance_gesetze', chunks: 402 },
AT_FAGG: { collection: 'bp_compliance_gesetze', chunks: 2 },
AT_UGB_RET: { collection: 'bp_compliance_gesetze', chunks: 2828 },
AT_BAO_RET: { collection: 'bp_compliance_gesetze', chunks: 2246 },
AT_MEDIENG: { collection: 'bp_compliance_gesetze', chunks: 571 },
AT_ABGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 2521 },
AT_UWG: { collection: 'bp_compliance_gesetze', chunks: 403 },
// CH Gesetze (bp_compliance_gesetze)
CH_DSV: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_OR_AGB: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_UWG: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_FMG: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_GEBUV: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_ZERTES: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_ZGB_PERS: { collection: 'bp_compliance_gesetze', chunks: 5 },
// Weitere EU-Laender (bp_compliance_gesetze)
BE_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 3 },
FI_TIETOSUOJALAKI: { collection: 'bp_compliance_gesetze', chunks: 2 },
DK_DATABESKYTTELSESLOVEN: { collection: 'bp_compliance_gesetze', chunks: 2 },
LU_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 2 },
// === Industrie-Compliance (2026-02-28) ===
// EU CE-Regulierungen (bp_compliance_ce)
MACHINERY_REG: { collection: 'bp_compliance_ce', chunks: 0 },
BLUE_GUIDE: { collection: 'bp_compliance_ce', chunks: 0 },
// Frameworks/Guidance (bp_compliance_datenschutz)
ENISA_SECURE_BY_DESIGN: { collection: 'bp_compliance_datenschutz', chunks: 0 },
ENISA_SUPPLY_CHAIN: { collection: 'bp_compliance_datenschutz', chunks: 0 },
NIST_SSDF: { collection: 'bp_compliance_datenschutz', chunks: 0 },
NIST_CSF_2: { collection: 'bp_compliance_datenschutz', chunks: 0 },
OECD_AI_PRINCIPLES: { collection: 'bp_compliance_datenschutz', chunks: 0 },
// EU-IFRS / EFRAG (2026-02-28)
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 0 },
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 0 },
EFRAG_ENDORSEMENT: { collection: 'bp_compliance_datenschutz', chunks: 0 },
}
// REGULATIONS_IN_RAG is imported from ./rag-constants.ts
// Helper: Check if regulation is in RAG
const isInRag = (code: string): boolean => code in REGULATIONS_IN_RAG
@@ -1850,17 +1743,7 @@ export default function RAGPage() {
const [autoRefresh, setAutoRefresh] = useState(true)
const [elapsedTime, setElapsedTime] = useState<string>('')
// Chunk browser state
const [chunkCollection, setChunkCollection] = useState('bp_compliance_gesetze')
const [chunkData, setChunkData] = useState<Record<string, unknown>[]>([])
const [chunkOffset, setChunkOffset] = useState<string | null>(null)
const [chunkHistory, setChunkHistory] = useState<(string | null)[]>([])
const [chunkLoading, setChunkLoading] = useState(false)
const [chunkTextSearch, setChunkTextSearch] = useState('')
const [chunkTotalCount, setChunkTotalCount] = useState(0)
const [chunkCurrentPage, setChunkCurrentPage] = useState(0)
const [chunkNextOffset, setChunkNextOffset] = useState<string | null>(null)
const [expandedChunk, setExpandedChunk] = useState<number | null>(null)
// Chunk browser state is now in ChunkBrowserQA component
// DSFA corpus state
const [dsfaSources, setDsfaSources] = useState<DsfaSource[]>([])
@@ -2107,68 +1990,7 @@ export default function RAGPage() {
return () => clearInterval(interval)
}, [pipelineState?.started_at, pipelineState?.status])
const loadChunks = async (offset: string | null = null, newCollection?: string) => {
const col = newCollection || chunkCollection
setChunkLoading(true)
try {
const params = new URLSearchParams({
action: 'scroll',
collection: col,
limit: '20',
})
if (offset) params.append('offset', offset)
if (chunkTextSearch.trim()) params.append('text_search', chunkTextSearch.trim())
const res = await fetch(`${API_PROXY}?${params}`)
if (res.ok) {
const data = await res.json()
setChunkData(data.chunks || [])
setChunkNextOffset(data.next_offset || null)
setExpandedChunk(null)
}
} catch (error) {
console.error('Chunk scroll failed:', error)
} finally {
setChunkLoading(false)
}
}
const loadChunkCount = async (col: string) => {
try {
const res = await fetch(`${API_PROXY}?action=collection-count&collection=${encodeURIComponent(col)}`)
if (res.ok) {
const data = await res.json()
setChunkTotalCount(data.count || 0)
}
} catch { /* ignore */ }
}
const handleChunkCollectionChange = (col: string) => {
setChunkCollection(col)
setChunkOffset(null)
setChunkHistory([])
setChunkCurrentPage(0)
loadChunkCount(col)
loadChunks(null, col)
}
const handleChunkNext = () => {
if (!chunkNextOffset) return
setChunkHistory((prev) => [...prev, chunkOffset])
setChunkOffset(chunkNextOffset)
setChunkCurrentPage((p) => p + 1)
loadChunks(chunkNextOffset)
}
const handleChunkPrev = () => {
if (chunkHistory.length === 0) return
const prev = [...chunkHistory]
const prevOffset = prev.pop() ?? null
setChunkHistory(prev)
setChunkOffset(prevOffset)
setChunkCurrentPage((p) => Math.max(0, p - 1))
loadChunks(prevOffset)
}
// Chunk browser functions are now in ChunkBrowserQA component
const handleSearch = async () => {
if (!searchQuery.trim()) return
@@ -2611,10 +2433,6 @@ export default function RAGPage() {
<button
onClick={(e) => {
e.stopPropagation()
const ragEntry = REGULATIONS_IN_RAG[reg.code as keyof typeof REGULATIONS_IN_RAG]
const col = ragEntry?.collection || 'bp_compliance_gesetze'
setChunkTextSearch(reg.name)
handleChunkCollectionChange(col)
setActiveTab('chunks')
}}
className="text-teal-600 hover:text-teal-700 font-medium"
@@ -3263,172 +3081,7 @@ export default function RAGPage() {
)}
{activeTab === 'chunks' && (
<div className="space-y-6">
{/* Collection Selector + Controls */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Chunk-Browser</h3>
<div className="flex flex-wrap gap-4 items-end">
<div>
<label className="block text-sm font-medium text-slate-700 mb-1">Collection</label>
<select
value={chunkCollection}
onChange={(e) => handleChunkCollectionChange(e.target.value)}
className="px-3 py-2 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
>
<option value="bp_compliance_gesetze">bp_compliance_gesetze</option>
<option value="bp_compliance_ce">bp_compliance_ce</option>
<option value="bp_compliance_datenschutz">bp_compliance_datenschutz</option>
<option value="bp_dsfa_corpus">bp_dsfa_corpus</option>
<option value="bp_compliance_recht">bp_compliance_recht</option>
<option value="bp_legal_templates">bp_legal_templates</option>
<option value="bp_compliance_gdpr">bp_compliance_gdpr</option>
<option value="bp_compliance_schulrecht">bp_compliance_schulrecht</option>
<option value="bp_dsfa_templates">bp_dsfa_templates</option>
<option value="bp_dsfa_risks">bp_dsfa_risks</option>
</select>
</div>
<div className="flex-1 min-w-[200px]">
<label className="block text-sm font-medium text-slate-700 mb-1">Textsuche (filtert geladene Seite)</label>
<div className="flex gap-2">
<input
type="text"
value={chunkTextSearch}
onChange={(e) => setChunkTextSearch(e.target.value)}
onKeyDown={(e) => { if (e.key === 'Enter') loadChunks(null) }}
placeholder="z.B. DSGVO, IFRS, Maschinenverordnung..."
className="flex-1 px-3 py-2 border rounded-lg text-sm focus:ring-2 focus:ring-teal-500"
/>
<button
onClick={() => { setChunkOffset(null); setChunkHistory([]); setChunkCurrentPage(0); loadChunks(null) }}
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700"
>
Laden
</button>
</div>
</div>
<div className="text-sm text-slate-500">
{chunkTotalCount > 0 && <span>{chunkTotalCount.toLocaleString()} Chunks total</span>}
</div>
</div>
</div>
{/* Pagination */}
{chunkData.length > 0 && (
<div className="flex items-center justify-between">
<button
onClick={handleChunkPrev}
disabled={chunkCurrentPage === 0}
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Zurueck
</button>
<span className="text-sm text-slate-600">
Seite {chunkCurrentPage + 1} {chunkData.length} Chunks angezeigt
</span>
<button
onClick={handleChunkNext}
disabled={!chunkNextOffset}
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Weiter
</button>
</div>
)}
{/* Chunk List */}
{chunkLoading ? (
<div className="text-center py-12 text-slate-500">Chunks werden geladen...</div>
) : chunkData.length === 0 ? (
<div className="text-center py-12 text-slate-400">
Collection waehlen und &quot;Laden&quot; klicken um Chunks anzuzeigen.
</div>
) : (
<div className="space-y-2">
{chunkData.map((chunk, i) => {
const text = String(chunk.text || chunk.content || chunk.chunk_text || '')
const source = String(chunk.source_name || chunk.regulation_code || chunk.guideline_name || chunk.regulation_short || '')
const isExpanded = expandedChunk === i
const highlightTerm = chunkTextSearch.trim().toLowerCase()
const renderHighlighted = (str: string) => {
if (!highlightTerm) return str
const idx = str.toLowerCase().indexOf(highlightTerm)
if (idx === -1) return str
return (
<>
{str.slice(0, idx)}
<mark className="bg-yellow-200 px-0.5 rounded">{str.slice(idx, idx + highlightTerm.length)}</mark>
{str.slice(idx + highlightTerm.length)}
</>
)
}
return (
<div
key={String(chunk.id || i)}
className={`bg-white rounded-lg border transition-all cursor-pointer ${
isExpanded ? 'border-teal-300 shadow-md' : 'border-slate-200 hover:border-slate-300'
}`}
onClick={() => setExpandedChunk(isExpanded ? null : i)}
>
<div className="px-4 py-3">
<div className="flex items-center gap-2 mb-1">
<span className="text-xs font-mono text-slate-400">#{chunkCurrentPage * 20 + i + 1}</span>
{source && (
<span className="px-2 py-0.5 text-xs rounded bg-slate-100 text-slate-600">{source}</span>
)}
{chunk.article && (
<span className="text-xs text-slate-500">Art. {String(chunk.article)}</span>
)}
{chunk.language && (
<span className="text-xs text-slate-400 ml-auto">{String(chunk.language).toUpperCase()}</span>
)}
</div>
<p className={`text-sm text-slate-700 ${isExpanded ? '' : 'line-clamp-3'}`}>
{renderHighlighted(text)}
</p>
</div>
{isExpanded && (
<div className="px-4 py-3 border-t border-slate-100 bg-slate-50 text-xs text-slate-500 space-y-1">
<div className="grid grid-cols-2 md:grid-cols-4 gap-2">
{Object.entries(chunk).filter(([k]) => !['text', 'content', 'chunk_text', 'id'].includes(k)).map(([k, v]) => (
<div key={k}>
<span className="font-medium text-slate-600">{k}:</span>{' '}
<span>{String(v)}</span>
</div>
))}
</div>
</div>
)}
</div>
)
})}
</div>
)}
{/* Bottom Pagination */}
{chunkData.length > 0 && (
<div className="flex items-center justify-between">
<button
onClick={handleChunkPrev}
disabled={chunkCurrentPage === 0}
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Zurueck
</button>
<span className="text-sm text-slate-600">
Seite {chunkCurrentPage + 1}
</span>
<button
onClick={handleChunkNext}
disabled={!chunkNextOffset}
className="px-4 py-2 text-sm border rounded-lg bg-white hover:bg-slate-50 disabled:opacity-30"
>
Weiter
</button>
</div>
)}
</div>
<ChunkBrowserQA apiProxy={API_PROXY} />
)}
{activeTab === 'data' && (

View File

@@ -0,0 +1,222 @@
/**
* Shared RAG constants used by both page.tsx and ChunkBrowserQA.
* REGULATIONS_IN_RAG maps regulation codes to their Qdrant collection and chunk count.
* REGULATION_INFO provides minimal metadata (code, name, type) for all regulations.
*/
export const REGULATIONS_IN_RAG: Record<string, { collection: string; chunks: number }> = {
// EU Verordnungen/Richtlinien (bp_compliance_ce)
GDPR: { collection: 'bp_compliance_ce', chunks: 1842 },
EPRIVACY: { collection: 'bp_compliance_ce', chunks: 156 },
SCC: { collection: 'bp_compliance_ce', chunks: 89 },
SCC_FULL_TEXT: { collection: 'bp_compliance_ce', chunks: 154 },
AIACT: { collection: 'bp_compliance_ce', chunks: 1245 },
CRA: { collection: 'bp_compliance_ce', chunks: 687 },
NIS2: { collection: 'bp_compliance_ce', chunks: 534 },
DGA: { collection: 'bp_compliance_ce', chunks: 312 },
DSA: { collection: 'bp_compliance_ce', chunks: 978 },
PLD: { collection: 'bp_compliance_ce', chunks: 124 },
E_COMMERCE_RL: { collection: 'bp_compliance_ce', chunks: 198 },
VERBRAUCHERRECHTE_RL: { collection: 'bp_compliance_ce', chunks: 245 },
DIGITALE_INHALTE_RL: { collection: 'bp_compliance_ce', chunks: 187 },
DMA: { collection: 'bp_compliance_ce', chunks: 590 },
DPF: { collection: 'bp_compliance_ce', chunks: 1232 },
EUCSA: { collection: 'bp_compliance_ce', chunks: 558 },
DATAACT: { collection: 'bp_compliance_ce', chunks: 809 },
DORA: { collection: 'bp_compliance_ce', chunks: 823 },
PSD2: { collection: 'bp_compliance_ce', chunks: 796 },
AMLR: { collection: 'bp_compliance_ce', chunks: 1182 },
MiCA: { collection: 'bp_compliance_ce', chunks: 1640 },
EHDS: { collection: 'bp_compliance_ce', chunks: 1212 },
EAA: { collection: 'bp_compliance_ce', chunks: 433 },
DSM: { collection: 'bp_compliance_ce', chunks: 416 },
GPSR: { collection: 'bp_compliance_ce', chunks: 509 },
MACHINERY_REG: { collection: 'bp_compliance_ce', chunks: 0 },
BLUE_GUIDE: { collection: 'bp_compliance_ce', chunks: 0 },
EU_IFRS_DE: { collection: 'bp_compliance_ce', chunks: 0 },
EU_IFRS_EN: { collection: 'bp_compliance_ce', chunks: 0 },
// DE Gesetze (bp_compliance_gesetze)
TDDDG: { collection: 'bp_compliance_gesetze', chunks: 215 },
BDSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 487 },
DE_DDG: { collection: 'bp_compliance_gesetze', chunks: 198 },
DE_BGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 4250 },
DE_EGBGB: { collection: 'bp_compliance_gesetze', chunks: 312 },
DE_HGB_RET: { collection: 'bp_compliance_gesetze', chunks: 6840 },
DE_AO_RET: { collection: 'bp_compliance_gesetze', chunks: 5620 },
DE_UWG: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_TKG: { collection: 'bp_compliance_gesetze', chunks: 1631 },
DE_PANGV: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_DLINFOV: { collection: 'bp_compliance_gesetze', chunks: 21 },
DE_BETRVG: { collection: 'bp_compliance_gesetze', chunks: 498 },
DE_GESCHGEHG: { collection: 'bp_compliance_gesetze', chunks: 63 },
DE_BSIG: { collection: 'bp_compliance_gesetze', chunks: 1 },
DE_USTG_RET: { collection: 'bp_compliance_gesetze', chunks: 1071 },
// BSI Standards (bp_compliance_gesetze)
'BSI-TR-03161-1': { collection: 'bp_compliance_gesetze', chunks: 425 },
'BSI-TR-03161-2': { collection: 'bp_compliance_gesetze', chunks: 380 },
'BSI-TR-03161-3': { collection: 'bp_compliance_gesetze', chunks: 345 },
// AT Gesetze (bp_compliance_gesetze)
AT_DSG: { collection: 'bp_compliance_gesetze', chunks: 287 },
AT_DSG_FULL: { collection: 'bp_compliance_gesetze', chunks: 6 },
AT_ECG: { collection: 'bp_compliance_gesetze', chunks: 120 },
AT_TKG: { collection: 'bp_compliance_gesetze', chunks: 2174 },
AT_KSCHG: { collection: 'bp_compliance_gesetze', chunks: 402 },
AT_FAGG: { collection: 'bp_compliance_gesetze', chunks: 2 },
AT_UGB_RET: { collection: 'bp_compliance_gesetze', chunks: 2828 },
AT_BAO_RET: { collection: 'bp_compliance_gesetze', chunks: 2246 },
AT_MEDIENG: { collection: 'bp_compliance_gesetze', chunks: 571 },
AT_ABGB_AGB: { collection: 'bp_compliance_gesetze', chunks: 2521 },
AT_UWG: { collection: 'bp_compliance_gesetze', chunks: 403 },
// CH Gesetze (bp_compliance_gesetze)
CH_DSG: { collection: 'bp_compliance_gesetze', chunks: 156 },
CH_DSV: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_OR_AGB: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_UWG: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_FMG: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_GEBUV: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_ZERTES: { collection: 'bp_compliance_gesetze', chunks: 5 },
CH_ZGB_PERS: { collection: 'bp_compliance_gesetze', chunks: 5 },
// LI
LI_DSG: { collection: 'bp_compliance_gesetze', chunks: 2 },
// Nationale DSG (andere EU)
ES_LOPDGDD: { collection: 'bp_compliance_gesetze', chunks: 1245 },
IT_CODICE_PRIVACY: { collection: 'bp_compliance_gesetze', chunks: 198 },
NL_UAVG: { collection: 'bp_compliance_gesetze', chunks: 1320 },
FR_CNIL_GUIDE: { collection: 'bp_compliance_gesetze', chunks: 1450 },
IE_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 534 },
UK_DPA_2018: { collection: 'bp_compliance_gesetze', chunks: 1680 },
UK_GDPR: { collection: 'bp_compliance_gesetze', chunks: 890 },
NO_PERSONOPPLYSNINGSLOVEN: { collection: 'bp_compliance_gesetze', chunks: 245 },
SE_DATASKYDDSLAG: { collection: 'bp_compliance_gesetze', chunks: 167 },
PL_UODO: { collection: 'bp_compliance_gesetze', chunks: 198 },
CZ_ZOU: { collection: 'bp_compliance_gesetze', chunks: 1120 },
HU_INFOTV: { collection: 'bp_compliance_gesetze', chunks: 1345 },
BE_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 3 },
FI_TIETOSUOJALAKI: { collection: 'bp_compliance_gesetze', chunks: 2 },
DK_DATABESKYTTELSESLOVEN: { collection: 'bp_compliance_gesetze', chunks: 2 },
LU_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 2 },
// EDPB Guidelines (bp_compliance_datenschutz)
EDPB_GUIDELINES_5_2020: { collection: 'bp_compliance_datenschutz', chunks: 245 },
EDPB_GUIDELINES_7_2020: { collection: 'bp_compliance_datenschutz', chunks: 347 },
// Frameworks (bp_compliance_datenschutz)
ENISA_SECURE_BY_DESIGN: { collection: 'bp_compliance_datenschutz', chunks: 0 },
ENISA_SUPPLY_CHAIN: { collection: 'bp_compliance_datenschutz', chunks: 0 },
NIST_SSDF: { collection: 'bp_compliance_datenschutz', chunks: 0 },
NIST_CSF_2: { collection: 'bp_compliance_datenschutz', chunks: 0 },
OECD_AI_PRINCIPLES: { collection: 'bp_compliance_datenschutz', chunks: 0 },
EFRAG_ENDORSEMENT: { collection: 'bp_compliance_datenschutz', chunks: 0 },
}
/**
* Minimal regulation info for sidebar display.
* Full REGULATIONS array with descriptions remains in page.tsx.
*/
export interface RegulationInfo {
code: string
name: string
type: string
}
export const REGULATION_INFO: RegulationInfo[] = [
// EU Verordnungen
{ code: 'GDPR', name: 'DSGVO', type: 'eu_regulation' },
{ code: 'EPRIVACY', name: 'ePrivacy-Richtlinie', type: 'eu_directive' },
{ code: 'SCC', name: 'Standardvertragsklauseln', type: 'eu_regulation' },
{ code: 'SCC_FULL_TEXT', name: 'SCC Volltext', type: 'eu_regulation' },
{ code: 'DPF', name: 'EU-US Data Privacy Framework', type: 'eu_regulation' },
{ code: 'AIACT', name: 'EU AI Act', type: 'eu_regulation' },
{ code: 'CRA', name: 'Cyber Resilience Act', type: 'eu_regulation' },
{ code: 'NIS2', name: 'NIS2-Richtlinie', type: 'eu_directive' },
{ code: 'EUCSA', name: 'EU Cybersecurity Act', type: 'eu_regulation' },
{ code: 'DATAACT', name: 'Data Act', type: 'eu_regulation' },
{ code: 'DGA', name: 'Data Governance Act', type: 'eu_regulation' },
{ code: 'DSA', name: 'Digital Services Act', type: 'eu_regulation' },
{ code: 'DMA', name: 'Digital Markets Act', type: 'eu_regulation' },
{ code: 'EAA', name: 'European Accessibility Act', type: 'eu_directive' },
{ code: 'DSM', name: 'DSM-Urheberrechtsrichtlinie', type: 'eu_directive' },
{ code: 'PLD', name: 'Produkthaftungsrichtlinie', type: 'eu_directive' },
{ code: 'GPSR', name: 'General Product Safety', type: 'eu_regulation' },
{ code: 'E_COMMERCE_RL', name: 'E-Commerce-Richtlinie', type: 'eu_directive' },
{ code: 'VERBRAUCHERRECHTE_RL', name: 'Verbraucherrechte-RL', type: 'eu_directive' },
{ code: 'DIGITALE_INHALTE_RL', name: 'Digitale-Inhalte-RL', type: 'eu_directive' },
// Financial
{ code: 'DORA', name: 'DORA', type: 'eu_regulation' },
{ code: 'PSD2', name: 'PSD2', type: 'eu_directive' },
{ code: 'AMLR', name: 'AML-Verordnung', type: 'eu_regulation' },
{ code: 'MiCA', name: 'MiCA', type: 'eu_regulation' },
{ code: 'EHDS', name: 'EHDS', type: 'eu_regulation' },
{ code: 'MACHINERY_REG', name: 'Maschinenverordnung', type: 'eu_regulation' },
{ code: 'BLUE_GUIDE', name: 'Blue Guide', type: 'eu_regulation' },
{ code: 'EU_IFRS_DE', name: 'EU-IFRS (DE)', type: 'eu_regulation' },
{ code: 'EU_IFRS_EN', name: 'EU-IFRS (EN)', type: 'eu_regulation' },
// DE Gesetze
{ code: 'TDDDG', name: 'TDDDG', type: 'de_law' },
{ code: 'BDSG_FULL', name: 'BDSG', type: 'de_law' },
{ code: 'DE_DDG', name: 'DDG', type: 'de_law' },
{ code: 'DE_BGB_AGB', name: 'BGB/AGB', type: 'de_law' },
{ code: 'DE_EGBGB', name: 'EGBGB', type: 'de_law' },
{ code: 'DE_HGB_RET', name: 'HGB', type: 'de_law' },
{ code: 'DE_AO_RET', name: 'AO', type: 'de_law' },
{ code: 'DE_UWG', name: 'UWG', type: 'de_law' },
{ code: 'DE_TKG', name: 'TKG', type: 'de_law' },
{ code: 'DE_PANGV', name: 'PAngV', type: 'de_law' },
{ code: 'DE_DLINFOV', name: 'DL-InfoV', type: 'de_law' },
{ code: 'DE_BETRVG', name: 'BetrVG', type: 'de_law' },
{ code: 'DE_GESCHGEHG', name: 'GeschGehG', type: 'de_law' },
{ code: 'DE_BSIG', name: 'BSIG', type: 'de_law' },
{ code: 'DE_USTG_RET', name: 'UStG', type: 'de_law' },
// BSI
{ code: 'BSI-TR-03161-1', name: 'BSI-TR Teil 1', type: 'bsi_standard' },
{ code: 'BSI-TR-03161-2', name: 'BSI-TR Teil 2', type: 'bsi_standard' },
{ code: 'BSI-TR-03161-3', name: 'BSI-TR Teil 3', type: 'bsi_standard' },
// AT
{ code: 'AT_DSG', name: 'DSG Oesterreich', type: 'at_law' },
{ code: 'AT_DSG_FULL', name: 'DSG Volltext', type: 'at_law' },
{ code: 'AT_ECG', name: 'ECG', type: 'at_law' },
{ code: 'AT_TKG', name: 'TKG AT', type: 'at_law' },
{ code: 'AT_KSCHG', name: 'KSchG', type: 'at_law' },
{ code: 'AT_FAGG', name: 'FAGG', type: 'at_law' },
{ code: 'AT_UGB_RET', name: 'UGB', type: 'at_law' },
{ code: 'AT_BAO_RET', name: 'BAO', type: 'at_law' },
{ code: 'AT_MEDIENG', name: 'MedienG', type: 'at_law' },
{ code: 'AT_ABGB_AGB', name: 'ABGB/AGB', type: 'at_law' },
{ code: 'AT_UWG', name: 'UWG AT', type: 'at_law' },
// CH
{ code: 'CH_DSG', name: 'DSG Schweiz', type: 'ch_law' },
{ code: 'CH_DSV', name: 'DSV', type: 'ch_law' },
{ code: 'CH_OR_AGB', name: 'OR/AGB', type: 'ch_law' },
{ code: 'CH_UWG', name: 'UWG CH', type: 'ch_law' },
{ code: 'CH_FMG', name: 'FMG', type: 'ch_law' },
{ code: 'CH_GEBUV', name: 'GeBuV', type: 'ch_law' },
{ code: 'CH_ZERTES', name: 'ZertES', type: 'ch_law' },
{ code: 'CH_ZGB_PERS', name: 'ZGB', type: 'ch_law' },
// LI
{ code: 'LI_DSG', name: 'DSG Liechtenstein', type: 'national_law' },
// Andere EU nationale
{ code: 'ES_LOPDGDD', name: 'LOPDGDD Spanien', type: 'national_law' },
{ code: 'IT_CODICE_PRIVACY', name: 'Codice Privacy Italien', type: 'national_law' },
{ code: 'NL_UAVG', name: 'UAVG Niederlande', type: 'national_law' },
{ code: 'FR_CNIL_GUIDE', name: 'CNIL Guide RGPD', type: 'national_law' },
{ code: 'IE_DPA_2018', name: 'DPA 2018 Ireland', type: 'national_law' },
{ code: 'UK_DPA_2018', name: 'DPA 2018 UK', type: 'national_law' },
{ code: 'UK_GDPR', name: 'UK GDPR', type: 'national_law' },
{ code: 'NO_PERSONOPPLYSNINGSLOVEN', name: 'Personopplysningsloven', type: 'national_law' },
{ code: 'SE_DATASKYDDSLAG', name: 'Dataskyddslag Schweden', type: 'national_law' },
{ code: 'PL_UODO', name: 'UODO Polen', type: 'national_law' },
{ code: 'CZ_ZOU', name: 'Zakon Tschechien', type: 'national_law' },
{ code: 'HU_INFOTV', name: 'Infotv. Ungarn', type: 'national_law' },
{ code: 'BE_DPA_LAW', name: 'Datenschutzgesetz Belgien', type: 'national_law' },
{ code: 'FI_TIETOSUOJALAKI', name: 'Tietosuojalaki Finnland', type: 'national_law' },
{ code: 'DK_DATABESKYTTELSESLOVEN', name: 'Databeskyttelsesloven DK', type: 'national_law' },
{ code: 'LU_DPA_LAW', name: 'Datenschutzgesetz Luxemburg', type: 'national_law' },
// EDPB
{ code: 'EDPB_GUIDELINES_5_2020', name: 'EDPB GL Einwilligung', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_7_2020', name: 'EDPB GL C/P Konzepte', type: 'eu_guideline' },
// Frameworks
{ code: 'ENISA_SECURE_BY_DESIGN', name: 'ENISA Secure by Design', type: 'international_standard' },
{ code: 'ENISA_SUPPLY_CHAIN', name: 'ENISA Supply Chain', type: 'international_standard' },
{ code: 'NIST_SSDF', name: 'NIST SSDF', type: 'international_standard' },
{ code: 'NIST_CSF_2', name: 'NIST CSF 2.0', type: 'international_standard' },
{ code: 'OECD_AI_PRINCIPLES', name: 'OECD AI Principles', type: 'international_standard' },
{ code: 'EFRAG_ENDORSEMENT', name: 'EFRAG Endorsement', type: 'eu_guideline' },
]

View File

@@ -119,6 +119,32 @@ export async function GET(request: NextRequest) {
total_in_page: points.length,
})
}
case 'regulation-counts-batch': {
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
const codes = (searchParams.get('codes') || '').split(',').filter(Boolean)
const results: Record<string, number> = {}
for (let i = 0; i < codes.length; i += 10) {
const batch = codes.slice(i, i + 10)
await Promise.all(batch.map(async (code) => {
try {
const res = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}/points/count`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
filter: { must: [{ key: 'regulation_code', match: { value: code } }] },
exact: true,
}),
cache: 'no-store',
})
if (res.ok) {
const data = await res.json()
results[code] = data.result?.count || 0
}
} catch { /* skip failed counts */ }
}))
}
return NextResponse.json({ counts: results })
}
case 'collection-count': {
const col = searchParams.get('collection') || 'bp_compliance_gesetze'
const countRes = await fetch(`${QDRANT_URL}/collections/${encodeURIComponent(col)}`, {