diff --git a/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/DSFABadges.tsx b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/DSFABadges.tsx new file mode 100644 index 0000000..d8dd507 --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/DSFABadges.tsx @@ -0,0 +1,61 @@ +'use client' + +import React from 'react' +import { Scale, CheckCircle, Clock, AlertCircle } from 'lucide-react' +import { + DSFALicenseCode, + DSFA_LICENSE_LABELS, + DSFA_DOCUMENT_TYPE_LABELS, +} from '@/lib/sdk/types' + +export function LicenseBadge({ licenseCode }: { licenseCode: DSFALicenseCode }) { + const colorMap: Record = { + 'DL-DE-BY-2.0': 'bg-blue-100 text-blue-700 border-blue-200', + 'DL-DE-ZERO-2.0': 'bg-gray-100 text-gray-700 border-gray-200', + 'CC-BY-4.0': 'bg-green-100 text-green-700 border-green-200', + 'EDPB-LICENSE': 'bg-purple-100 text-purple-700 border-purple-200', + 'PUBLIC_DOMAIN': 'bg-gray-100 text-gray-600 border-gray-200', + 'PROPRIETARY': 'bg-amber-100 text-amber-700 border-amber-200', + } + + return ( + + + {DSFA_LICENSE_LABELS[licenseCode] || licenseCode} + + ) +} + +export function DocumentTypeBadge({ type }: { type?: string }) { + if (!type) return null + + const colorMap: Record = { + guideline: 'bg-indigo-100 text-indigo-700', + checklist: 'bg-emerald-100 text-emerald-700', + regulation: 'bg-red-100 text-red-700', + template: 'bg-orange-100 text-orange-700', + } + + return ( + + {DSFA_DOCUMENT_TYPE_LABELS[type as keyof typeof DSFA_DOCUMENT_TYPE_LABELS] || type} + + ) +} + +export function StatusIndicator({ status }: { status: string }) { + const statusConfig: Record = { + green: { color: 'text-green-500', icon: , label: 'Aktiv' }, + yellow: { color: 'text-yellow-500', icon: , label: 'Ausstehend' }, + red: { color: 'text-red-500', icon: , label: 'Fehler' }, + } + + const config = statusConfig[status] || statusConfig.yellow + + return ( + + {config.icon} + {config.label} + + ) +} diff --git a/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/SourceCard.tsx b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/SourceCard.tsx new file mode 100644 index 0000000..3333e67 --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/SourceCard.tsx @@ -0,0 +1,137 @@ +'use client' + +import { useState } from 'react' +import { + RefreshCw, + ChevronDown, + ChevronUp, + ExternalLink, +} from 'lucide-react' +import { DSFASource, DSFASourceStats } from '@/lib/sdk/types' +import { LicenseBadge, DocumentTypeBadge } from './DSFABadges' + +interface SourceCardProps { + source: DSFASource + stats?: DSFASourceStats + onIngest: () => void + isIngesting: boolean +} + +export function SourceCard({ source, stats, onIngest, isIngesting }: SourceCardProps) { + const [isExpanded, setIsExpanded] = useState(false) + + return ( +
+
+
+
+
+ + {source.sourceCode} + + +
+

+ {source.name} +

+ {source.organization && ( +

+ {source.organization} +

+ )} +
+ +
+ +
+ + {stats && ( + <> + + {stats.documentCount} Dok. + + + {stats.chunkCount} Chunks + + + )} +
+ + {source.attributionRequired && ( +
+ Attribution: {source.attributionText} +
+ )} +
+ + {isExpanded && ( +
+
+ {source.sourceUrl && ( + <> +
Quelle:
+
+ + Link + +
+ + )} + {source.licenseUrl && ( + <> +
Lizenz-URL:
+
+ + {source.licenseName} + +
+ + )} +
Sprache:
+
{source.language}
+ {stats?.lastIndexedAt && ( + <> +
Zuletzt indexiert:
+
{new Date(stats.lastIndexedAt).toLocaleString('de-DE')}
+ + )} +
+ +
+ +
+
+ )} +
+ ) +} diff --git a/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/StatsOverview.tsx b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/StatsOverview.tsx new file mode 100644 index 0000000..c360e56 --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/StatsOverview.tsx @@ -0,0 +1,59 @@ +'use client' + +import { Database } from 'lucide-react' +import { DSFACorpusStats } from '@/lib/sdk/types' +import { StatusIndicator } from './DSFABadges' + +interface StatsOverviewProps { + stats: DSFACorpusStats +} + +export function StatsOverview({ stats }: StatsOverviewProps) { + return ( +
+
+

+ + Corpus-Statistik +

+ +
+ +
+
+

+ {stats.totalSources} +

+

Quellen

+
+
+

+ {stats.totalDocuments} +

+

Dokumente

+
+
+

+ {stats.totalChunks.toLocaleString()} +

+

Chunks

+
+
+

+ {stats.qdrantPointsCount.toLocaleString()} +

+

Vektoren

+
+
+ +
+

+ Collection:{' '} + + {stats.qdrantCollection} + +

+
+
+ ) +} diff --git a/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/dsfa-api.ts b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/dsfa-api.ts new file mode 100644 index 0000000..7318dea --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/_components/dsfa-api.ts @@ -0,0 +1,137 @@ +/** + * DSFA API functions and mock data. + */ + +import { + DSFASource, + DSFACorpusStats, +} from '@/lib/sdk/types' + +export const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086' + +export const MOCK_SOURCES: DSFASource[] = [ + { + id: '1', + sourceCode: 'WP248', + name: 'WP248 rev.01 - Leitlinien zur DSFA', + fullName: 'Leitlinien zur Datenschutz-Folgenabschaetzung', + organization: 'Artikel-29-Datenschutzgruppe / EDPB', + sourceUrl: 'https://ec.europa.eu/newsroom/article29/items/611236/en', + licenseCode: 'EDPB-LICENSE', + licenseName: 'EDPB Document License', + attributionRequired: true, + attributionText: 'Quelle: WP248 rev.01, Artikel-29-Datenschutzgruppe (2017)', + documentType: 'guideline', + language: 'de', + }, + { + id: '2', + sourceCode: 'DSK_KP5', + name: 'Kurzpapier Nr. 5 - DSFA nach Art. 35 DS-GVO', + organization: 'Datenschutzkonferenz (DSK)', + sourceUrl: 'https://www.datenschutzkonferenz-online.de/media/kp/dsk_kpnr_5.pdf', + licenseCode: 'DL-DE-BY-2.0', + licenseName: 'Datenlizenz DE \u2013 Namensnennung 2.0', + licenseUrl: 'https://www.govdata.de/dl-de/by-2-0', + attributionRequired: true, + attributionText: 'Quelle: DSK Kurzpapier Nr. 5 (Stand: 2018)', + documentType: 'guideline', + language: 'de', + }, + { + id: '3', + sourceCode: 'BFDI_MUSS_PUBLIC', + name: 'BfDI DSFA-Liste (oeffentlicher Bereich)', + organization: 'BfDI', + sourceUrl: 'https://www.bfdi.bund.de', + licenseCode: 'DL-DE-ZERO-2.0', + licenseName: 'Datenlizenz DE \u2013 Zero 2.0', + attributionRequired: false, + attributionText: 'Quelle: BfDI, Liste gem. Art. 35 Abs. 4 DSGVO', + documentType: 'checklist', + language: 'de', + }, + { + id: '4', + sourceCode: 'NI_MUSS_PRIVATE', + name: 'LfD NI DSFA-Liste (nicht-oeffentlich)', + organization: 'LfD Niedersachsen', + sourceUrl: 'https://www.lfd.niedersachsen.de/download/131098', + licenseCode: 'DL-DE-BY-2.0', + licenseName: 'Datenlizenz DE \u2013 Namensnennung 2.0', + attributionRequired: true, + attributionText: 'Quelle: LfD Niedersachsen, DSFA-Muss-Liste', + documentType: 'checklist', + language: 'de', + }, +] + +export const MOCK_STATS: DSFACorpusStats = { + sources: [ + { + sourceId: '1', + sourceCode: 'WP248', + name: 'WP248 rev.01', + organization: 'EDPB', + licenseCode: 'EDPB-LICENSE', + documentType: 'guideline', + documentCount: 1, + chunkCount: 50, + lastIndexedAt: '2026-02-09T10:00:00Z', + }, + { + sourceId: '2', + sourceCode: 'DSK_KP5', + name: 'DSK Kurzpapier Nr. 5', + organization: 'DSK', + licenseCode: 'DL-DE-BY-2.0', + documentType: 'guideline', + documentCount: 1, + chunkCount: 35, + lastIndexedAt: '2026-02-09T10:00:00Z', + }, + ], + totalSources: 45, + totalDocuments: 45, + totalChunks: 850, + qdrantCollection: 'bp_dsfa_corpus', + qdrantPointsCount: 850, + qdrantStatus: 'green', +} + +export async function fetchSources(): Promise { + try { + const response = await fetch(`${API_BASE}/api/v1/dsfa-rag/sources`) + if (!response.ok) throw new Error('Failed to fetch sources') + return await response.json() + } catch { + return MOCK_SOURCES + } +} + +export async function fetchStats(): Promise { + try { + const response = await fetch(`${API_BASE}/api/v1/dsfa-rag/stats`) + if (!response.ok) throw new Error('Failed to fetch stats') + return await response.json() + } catch { + return MOCK_STATS + } +} + +export async function initializeCorpus(): Promise<{ sources_registered: number }> { + const response = await fetch(`${API_BASE}/api/v1/dsfa-rag/init`, { + method: 'POST', + }) + if (!response.ok) throw new Error('Failed to initialize corpus') + return await response.json() +} + +export async function triggerIngestion(sourceCode: string): Promise { + const response = await fetch(`${API_BASE}/api/v1/dsfa-rag/sources/${sourceCode}/ingest`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }) + if (!response.ok) throw new Error('Failed to trigger ingestion') +} diff --git a/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/page.tsx b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/page.tsx index aedbf66..abb5c8e 100644 --- a/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/page.tsx +++ b/admin-lehrer/app/(admin)/ai/rag-pipeline/dsfa/page.tsx @@ -4,11 +4,6 @@ * DSFA Document Manager * * Manages DSFA-related sources and documents for the RAG pipeline. - * Features: - * - View all registered DSFA sources with license info - * - Upload new documents - * - Trigger re-indexing - * - View corpus statistics */ import { useState, useEffect } from 'react' @@ -19,411 +14,24 @@ import { Upload, FileText, Database, - Scale, - ExternalLink, - ChevronDown, - ChevronUp, Search, Filter, - CheckCircle, - Clock, AlertCircle, - BookOpen + BookOpen, } from 'lucide-react' +import { DSFASource, DSFACorpusStats, DSFASourceStats } from '@/lib/sdk/types' + import { - DSFASource, - DSFACorpusStats, - DSFASourceStats, - DSFALicenseCode, - DSFA_LICENSE_LABELS, - DSFA_DOCUMENT_TYPE_LABELS -} from '@/lib/sdk/types' - -// ============================================================================ -// TYPES -// ============================================================================ - -interface APIError { - message: string - status?: number -} - -// ============================================================================ -// API FUNCTIONS -// ============================================================================ - -const API_BASE = process.env.NEXT_PUBLIC_KLAUSUR_SERVICE_URL || 'http://localhost:8086' - -async function fetchSources(): Promise { - try { - const response = await fetch(`${API_BASE}/api/v1/dsfa-rag/sources`) - if (!response.ok) throw new Error('Failed to fetch sources') - return await response.json() - } catch { - // Return mock data for demo - return MOCK_SOURCES - } -} - -async function fetchStats(): Promise { - try { - const response = await fetch(`${API_BASE}/api/v1/dsfa-rag/stats`) - if (!response.ok) throw new Error('Failed to fetch stats') - return await response.json() - } catch { - return MOCK_STATS - } -} - -async function initializeCorpus(): Promise<{ sources_registered: number }> { - const response = await fetch(`${API_BASE}/api/v1/dsfa-rag/init`, { - method: 'POST', - }) - if (!response.ok) throw new Error('Failed to initialize corpus') - return await response.json() -} - -async function triggerIngestion(sourceCode: string): Promise { - const response = await fetch(`${API_BASE}/api/v1/dsfa-rag/sources/${sourceCode}/ingest`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({}), - }) - if (!response.ok) throw new Error('Failed to trigger ingestion') -} - -// ============================================================================ -// MOCK DATA -// ============================================================================ - -const MOCK_SOURCES: DSFASource[] = [ - { - id: '1', - sourceCode: 'WP248', - name: 'WP248 rev.01 - Leitlinien zur DSFA', - fullName: 'Leitlinien zur Datenschutz-Folgenabschaetzung', - organization: 'Artikel-29-Datenschutzgruppe / EDPB', - sourceUrl: 'https://ec.europa.eu/newsroom/article29/items/611236/en', - licenseCode: 'EDPB-LICENSE', - licenseName: 'EDPB Document License', - attributionRequired: true, - attributionText: 'Quelle: WP248 rev.01, Artikel-29-Datenschutzgruppe (2017)', - documentType: 'guideline', - language: 'de', - }, - { - id: '2', - sourceCode: 'DSK_KP5', - name: 'Kurzpapier Nr. 5 - DSFA nach Art. 35 DS-GVO', - organization: 'Datenschutzkonferenz (DSK)', - sourceUrl: 'https://www.datenschutzkonferenz-online.de/media/kp/dsk_kpnr_5.pdf', - licenseCode: 'DL-DE-BY-2.0', - licenseName: 'Datenlizenz DE – Namensnennung 2.0', - licenseUrl: 'https://www.govdata.de/dl-de/by-2-0', - attributionRequired: true, - attributionText: 'Quelle: DSK Kurzpapier Nr. 5 (Stand: 2018)', - documentType: 'guideline', - language: 'de', - }, - { - id: '3', - sourceCode: 'BFDI_MUSS_PUBLIC', - name: 'BfDI DSFA-Liste (oeffentlicher Bereich)', - organization: 'BfDI', - sourceUrl: 'https://www.bfdi.bund.de', - licenseCode: 'DL-DE-ZERO-2.0', - licenseName: 'Datenlizenz DE – Zero 2.0', - attributionRequired: false, - attributionText: 'Quelle: BfDI, Liste gem. Art. 35 Abs. 4 DSGVO', - documentType: 'checklist', - language: 'de', - }, - { - id: '4', - sourceCode: 'NI_MUSS_PRIVATE', - name: 'LfD NI DSFA-Liste (nicht-oeffentlich)', - organization: 'LfD Niedersachsen', - sourceUrl: 'https://www.lfd.niedersachsen.de/download/131098', - licenseCode: 'DL-DE-BY-2.0', - licenseName: 'Datenlizenz DE – Namensnennung 2.0', - attributionRequired: true, - attributionText: 'Quelle: LfD Niedersachsen, DSFA-Muss-Liste', - documentType: 'checklist', - language: 'de', - }, -] - -const MOCK_STATS: DSFACorpusStats = { - sources: [ - { - sourceId: '1', - sourceCode: 'WP248', - name: 'WP248 rev.01', - organization: 'EDPB', - licenseCode: 'EDPB-LICENSE', - documentType: 'guideline', - documentCount: 1, - chunkCount: 50, - lastIndexedAt: '2026-02-09T10:00:00Z', - }, - { - sourceId: '2', - sourceCode: 'DSK_KP5', - name: 'DSK Kurzpapier Nr. 5', - organization: 'DSK', - licenseCode: 'DL-DE-BY-2.0', - documentType: 'guideline', - documentCount: 1, - chunkCount: 35, - lastIndexedAt: '2026-02-09T10:00:00Z', - }, - ], - totalSources: 45, - totalDocuments: 45, - totalChunks: 850, - qdrantCollection: 'bp_dsfa_corpus', - qdrantPointsCount: 850, - qdrantStatus: 'green', -} - -// ============================================================================ -// COMPONENTS -// ============================================================================ - -function LicenseBadge({ licenseCode }: { licenseCode: DSFALicenseCode }) { - const colorMap: Record = { - 'DL-DE-BY-2.0': 'bg-blue-100 text-blue-700 border-blue-200', - 'DL-DE-ZERO-2.0': 'bg-gray-100 text-gray-700 border-gray-200', - 'CC-BY-4.0': 'bg-green-100 text-green-700 border-green-200', - 'EDPB-LICENSE': 'bg-purple-100 text-purple-700 border-purple-200', - 'PUBLIC_DOMAIN': 'bg-gray-100 text-gray-600 border-gray-200', - 'PROPRIETARY': 'bg-amber-100 text-amber-700 border-amber-200', - } - - return ( - - - {DSFA_LICENSE_LABELS[licenseCode] || licenseCode} - - ) -} - -function DocumentTypeBadge({ type }: { type?: string }) { - if (!type) return null - - const colorMap: Record = { - guideline: 'bg-indigo-100 text-indigo-700', - checklist: 'bg-emerald-100 text-emerald-700', - regulation: 'bg-red-100 text-red-700', - template: 'bg-orange-100 text-orange-700', - } - - return ( - - {DSFA_DOCUMENT_TYPE_LABELS[type as keyof typeof DSFA_DOCUMENT_TYPE_LABELS] || type} - - ) -} - -function StatusIndicator({ status }: { status: string }) { - const statusConfig: Record = { - green: { color: 'text-green-500', icon: , label: 'Aktiv' }, - yellow: { color: 'text-yellow-500', icon: , label: 'Ausstehend' }, - red: { color: 'text-red-500', icon: , label: 'Fehler' }, - } - - const config = statusConfig[status] || statusConfig.yellow - - return ( - - {config.icon} - {config.label} - - ) -} - -function SourceCard({ - source, - stats, - onIngest, - isIngesting -}: { - source: DSFASource - stats?: DSFASourceStats - onIngest: () => void - isIngesting: boolean -}) { - const [isExpanded, setIsExpanded] = useState(false) - - return ( -
-
-
-
-
- - {source.sourceCode} - - -
-

- {source.name} -

- {source.organization && ( -

- {source.organization} -

- )} -
- -
- -
- - {stats && ( - <> - - {stats.documentCount} Dok. - - - {stats.chunkCount} Chunks - - - )} -
- - {source.attributionRequired && ( -
- Attribution: {source.attributionText} -
- )} -
- - {isExpanded && ( -
-
- {source.sourceUrl && ( - <> -
Quelle:
-
- - Link - -
- - )} - {source.licenseUrl && ( - <> -
Lizenz-URL:
-
- - {source.licenseName} - -
- - )} -
Sprache:
-
{source.language}
- {stats?.lastIndexedAt && ( - <> -
Zuletzt indexiert:
-
{new Date(stats.lastIndexedAt).toLocaleString('de-DE')}
- - )} -
- -
- -
-
- )} -
- ) -} - -function StatsOverview({ stats }: { stats: DSFACorpusStats }) { - return ( -
-
-

- - Corpus-Statistik -

- -
- -
-
-

- {stats.totalSources} -

-

Quellen

-
-
-

- {stats.totalDocuments} -

-

Dokumente

-
-
-

- {stats.totalChunks.toLocaleString()} -

-

Chunks

-
-
-

- {stats.qdrantPointsCount.toLocaleString()} -

-

Vektoren

-
-
- -
-

- Collection:{' '} - - {stats.qdrantCollection} - -

-
-
- ) -} - -// ============================================================================ -// MAIN PAGE -// ============================================================================ + fetchSources, + fetchStats, + initializeCorpus, + triggerIngestion, + MOCK_SOURCES, + MOCK_STATS, +} from './_components/dsfa-api' +import { LicenseBadge } from './_components/DSFABadges' +import { SourceCard } from './_components/SourceCard' +import { StatsOverview } from './_components/StatsOverview' export default function DSFADocumentManagerPage() { const [sources, setSources] = useState([]) @@ -461,7 +69,6 @@ export default function DSFADocumentManagerPage() { setIsInitializing(true) try { await initializeCorpus() - // Reload data const [sourcesData, statsData] = await Promise.all([ fetchSources(), fetchStats(), @@ -479,7 +86,6 @@ export default function DSFADocumentManagerPage() { setIngestingSource(sourceCode) try { await triggerIngestion(sourceCode) - // Reload stats const statsData = await fetchStats() setStats(statsData) } catch (err) { @@ -501,7 +107,6 @@ export default function DSFADocumentManagerPage() { return matchesSearch && matchesType }) - // Get stats by source code const getStatsForSource = (sourceCode: string): DSFASourceStats | undefined => { return stats?.sources.find(s => s.sourceCode === sourceCode) } diff --git a/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserConstants.ts b/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserConstants.ts new file mode 100644 index 0000000..62e91a5 --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserConstants.ts @@ -0,0 +1,52 @@ +/** + * Constants and types for ChunkBrowserQA component. + */ + +export type RegGroupKey = + | 'eu_regulation' + | 'eu_directive' + | 'de_law' + | 'at_law' + | 'ch_law' + | 'national_law' + | 'bsi_standard' + | 'eu_guideline' + | 'international_standard' + | 'other' + +export const GROUP_LABELS: Record = { + eu_regulation: 'EU Verordnungen', + eu_directive: 'EU Richtlinien', + de_law: 'DE Gesetze', + at_law: 'AT Gesetze', + ch_law: 'CH Gesetze', + national_law: 'Nationale Gesetze (EU)', + bsi_standard: 'BSI Standards', + eu_guideline: 'EDPB / Guidelines', + international_standard: 'Internationale Standards', + other: 'Sonstige', +} + +export const GROUP_ORDER: RegGroupKey[] = [ + 'eu_regulation', 'eu_directive', 'de_law', 'at_law', 'ch_law', + 'national_law', 'bsi_standard', 'eu_guideline', 'international_standard', 'other', +] + +export const COLLECTIONS = [ + 'bp_compliance_gesetze', + 'bp_compliance_ce', + 'bp_compliance_datenschutz', + 'bp_dsfa_corpus', + 'bp_compliance_recht', + 'bp_legal_templates', + 'bp_nibis_eh', +] + +export const STRUCTURAL_KEYS = new Set([ + 'article', 'artikel', 'paragraph', 'section_title', 'section', 'chapter', + 'abschnitt', 'kapitel', 'pages', 'page', +]) + +export const HIDDEN_KEYS = new Set([ + 'text', 'content', 'chunk_text', 'id', 'embedding', +]) diff --git a/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserContent.tsx b/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserContent.tsx new file mode 100644 index 0000000..ec7f373 --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/rag/components/ChunkBrowserContent.tsx @@ -0,0 +1,234 @@ +'use client' + +import React from 'react' +import { STRUCTURAL_KEYS, HIDDEN_KEYS } from './ChunkBrowserConstants' +import { getChunkText, getStructuralInfo } from './ChunkBrowserHelpers' +import { RAG_PDF_MAPPING } from './rag-pdf-mapping' +import { REGULATIONS_IN_RAG } from '../rag-constants' + +interface ChunkBrowserContentProps { + selectedRegulation: string | null + docLoading: boolean + docChunks: Record[] + docChunkIndex: number + docTotalChunks: number + splitViewActive: boolean + chunksPerPage: number + pdfExists: boolean | null +} + +export function ChunkBrowserContent({ + selectedRegulation, + docLoading, + docChunks, + docChunkIndex, + docTotalChunks, + splitViewActive, + chunksPerPage, + pdfExists, +}: ChunkBrowserContentProps) { + const currentChunk = docChunks[docChunkIndex] || null + const prevChunk = docChunkIndex > 0 ? docChunks[docChunkIndex - 1] : null + const nextChunk = docChunkIndex < docChunks.length - 1 ? docChunks[docChunkIndex + 1] : null + + const structInfo = getStructuralInfo(currentChunk) + + // PDF page estimation + const estimatePdfPage = (chunk: Record | null, chunkIdx: number): number => { + if (chunk) { + const pages = chunk.pages as number[] | undefined + if (Array.isArray(pages) && pages.length > 0) return pages[0] + const page = chunk.page as number | undefined + if (typeof page === 'number' && page > 0) return page + } + const mapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null + const cpp = mapping?.chunksPerPage || chunksPerPage + return Math.floor(chunkIdx / cpp) + 1 + } + + const pdfPage = estimatePdfPage(currentChunk, docChunkIndex) + const pdfMapping = selectedRegulation ? RAG_PDF_MAPPING[selectedRegulation] : null + const pdfUrl = pdfMapping ? `/rag-originals/${pdfMapping.filename}#page=${pdfPage}` : null + + // Overlap extraction + const getOverlapPrev = (): string => { + if (!prevChunk) return '' + const text = getChunkText(prevChunk) + return text.length > 150 ? '...' + text.slice(-150) : text + } + + const getOverlapNext = (): string => { + if (!nextChunk) return '' + const text = getChunkText(nextChunk) + return text.length > 150 ? text.slice(0, 150) + '...' : text + } + + if (!selectedRegulation) { + return ( +
+
+
🔍
+

Dokument in der Sidebar auswaehlen, um QA zu starten.

+

Pfeiltasten: Chunk vor/zurueck

+
+
+ ) + } + + if (docLoading) { + return ( +
+
+
+

Chunks werden geladen...

+

+ {selectedRegulation}: {REGULATIONS_IN_RAG[selectedRegulation]?.chunks.toLocaleString() || '?'} Chunks erwartet +

+
+
+ ) + } + + return ( +
+ {/* Chunk-Text Panel */} +
+
+ Chunk-Text +
+ {structInfo.article && ( + + {structInfo.article} + + )} + {structInfo.section && ( + + {structInfo.section} + + )} + + #{docChunkIndex} / {docTotalChunks - 1} + +
+
+ +
+ {/* Overlap from previous chunk */} + {prevChunk && ( +
+
↑ Ende vorheriger Chunk #{docChunkIndex - 1}
+

{getOverlapPrev()}

+
+ )} + + {/* Current chunk text */} + {currentChunk ? ( +
+ {getChunkText(currentChunk)} +
+ ) : ( +
Kein Chunk-Text vorhanden.
+ )} + + {/* Overlap from next chunk */} + {nextChunk && ( +
+
↓ Anfang naechster Chunk #{docChunkIndex + 1}
+

{getOverlapNext()}

+
+ )} + + {/* Metadata */} + {currentChunk && ( +
+
Metadaten
+
+ {Object.entries(currentChunk) + .filter(([k]) => !HIDDEN_KEYS.has(k)) + .sort(([a], [b]) => { + const aStruct = STRUCTURAL_KEYS.has(a) ? 0 : 1 + const bStruct = STRUCTURAL_KEYS.has(b) ? 0 : 1 + return aStruct - bStruct || a.localeCompare(b) + }) + .map(([k, v]) => ( +
+ {k}: + + {Array.isArray(v) ? v.join(', ') : String(v)} + +
+ ))} +
+
+
+ Chunk-Laenge: {getChunkText(currentChunk).length} Zeichen + {getChunkText(currentChunk).length < 50 && ( + ⚠ Sehr kurz + )} + {getChunkText(currentChunk).length > 2000 && ( + ⚠ Sehr lang + )} +
+
+
+ )} +
+
+ + {/* PDF-Viewer Panel */} + {splitViewActive && ( +
+
+ Original-PDF +
+ + Seite ~{pdfPage} + {pdfMapping?.totalPages ? ` / ${pdfMapping.totalPages}` : ''} + + {pdfUrl && ( + + Oeffnen ↗ + + )} +
+
+
+ {pdfUrl && pdfExists ? ( +