'use client' import { useCallback, useEffect, useState } from 'react' import { PagePurpose } from '@/components/common/PagePurpose' import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper' import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation' import { StepCrop } from '@/components/ocr-pipeline/StepCrop' import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew' import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp' import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection' import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection' import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition' import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview' import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction' import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth' import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs' import { PIPELINE_STEPS, DOCUMENT_CATEGORIES, type PipelineStep, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types' const KLAUSUR_API = '/klausur-api' export default function OcrPipelinePage() { const [currentStep, setCurrentStep] = useState(0) const [sessionId, setSessionId] = useState(null) const [sessionName, setSessionName] = useState('') const [sessions, setSessions] = useState([]) const [loadingSessions, setLoadingSessions] = useState(true) const [editingName, setEditingName] = useState(null) const [editNameValue, setEditNameValue] = useState('') const [editingCategory, setEditingCategory] = useState(null) const [docTypeResult, setDocTypeResult] = useState(null) const [activeCategory, setActiveCategory] = useState(undefined) const [subSessions, setSubSessions] = useState([]) const [parentSessionId, setParentSessionId] = useState(null) const [steps, setSteps] = useState( PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending', })), ) // Load session list on mount useEffect(() => { loadSessions() }, []) const loadSessions = async () => { setLoadingSessions(true) try { const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`) if (res.ok) { const data = await res.json() setSessions(data.sessions || []) } } catch (e) { console.error('Failed to load sessions:', e) } finally { setLoadingSessions(false) } } const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => { try { const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`) if (!res.ok) return const data = await res.json() setSessionId(sid) setSessionName(data.name || data.filename || '') setActiveCategory(data.document_category || undefined) // Sub-session handling if (data.sub_sessions && data.sub_sessions.length > 0) { setSubSessions(data.sub_sessions) setParentSessionId(sid) } else if (data.parent_session_id) { // This is a sub-session — keep parent info but don't reset sub-session list setParentSessionId(data.parent_session_id) } else if (!keepSubSessions) { setSubSessions([]) setParentSessionId(null) } // Restore doc type result if available const savedDocType: DocumentTypeResult | null = data.doc_type_result || null setDocTypeResult(savedDocType) // Determine which step to jump to based on current_step const dbStep = data.current_step || 1 // DB steps: 1=start, 2=orientation, 3=deskew, 4=dewarp, 5=crop, 6=columns, ... // UI steps are 0-indexed: 0=orientation, 1=deskew, 2=dewarp, 3=crop, 4=columns, ... let uiStep = Math.max(0, dbStep - 1) const skipSteps = [...(savedDocType?.skip_steps || [])] // Sub-sessions: image is already cropped, skip pre-processing steps // Jump directly to columns (UI step 4) unless already further ahead const isSubSession = !!data.parent_session_id const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop'] if (isSubSession) { for (const s of SUB_SESSION_SKIP) { if (!skipSteps.includes(s)) skipSteps.push(s) } if (uiStep < 4) uiStep = 4 // columns step } setSteps( PIPELINE_STEPS.map((s, i) => ({ ...s, status: skipSteps.includes(s.id) ? 'skipped' : i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending', })), ) setCurrentStep(uiStep) } catch (e) { console.error('Failed to open session:', e) } }, []) const deleteSession = useCallback(async (sid: string) => { try { await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' }) setSessions((prev) => prev.filter((s) => s.id !== sid)) if (sessionId === sid) { setSessionId(null) setCurrentStep(0) setDocTypeResult(null) setSubSessions([]) setParentSessionId(null) setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) } } catch (e) { console.error('Failed to delete session:', e) } }, [sessionId]) const renameSession = useCallback(async (sid: string, newName: string) => { try { await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ name: newName }), }) setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s))) if (sessionId === sid) setSessionName(newName) } catch (e) { console.error('Failed to rename session:', e) } setEditingName(null) }, [sessionId]) const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => { try { await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ document_category: category }), }) setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s))) if (sessionId === sid) setActiveCategory(category) } catch (e) { console.error('Failed to update category:', e) } setEditingCategory(null) }, [sessionId]) const deleteAllSessions = useCallback(async () => { if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return try { await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' }) setSessions([]) setSessionId(null) setCurrentStep(0) setDocTypeResult(null) setActiveCategory(undefined) setSubSessions([]) setParentSessionId(null) setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) } catch (e) { console.error('Failed to delete all sessions:', e) } }, []) const handleStepClick = (index: number) => { if (index <= currentStep || steps[index].status === 'completed') { setCurrentStep(index) } } const goToStep = (step: number) => { setCurrentStep(step) setSteps((prev) => prev.map((s, i) => ({ ...s, status: i < step ? 'completed' : i === step ? 'active' : 'pending', })), ) } const handleNext = () => { if (currentStep >= steps.length - 1) { // Last step completed if (parentSessionId && sessionId !== parentSessionId) { // Sub-session completed — update its status and stay in tab view setSubSessions((prev) => prev.map((s) => s.id === sessionId ? { ...s, status: 'completed', current_step: 10 } : s) ) // Switch back to parent handleSessionChange(parentSessionId) return } // Main session: return to session list setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) setCurrentStep(0) setSessionId(null) setSubSessions([]) setParentSessionId(null) loadSessions() return } // Find the next non-skipped step const skipSteps = docTypeResult?.skip_steps || [] let nextStep = currentStep + 1 while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) { nextStep++ } if (nextStep >= steps.length) nextStep = steps.length - 1 setSteps((prev) => prev.map((s, i) => { if (i === currentStep) return { ...s, status: 'completed' } if (i === nextStep) return { ...s, status: 'active' } // Mark skipped steps between current and next if (i > currentStep && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) { return { ...s, status: 'skipped' } } return s }), ) setCurrentStep(nextStep) } const handleOrientationComplete = (sid: string) => { setSessionId(sid) // Reload session list to show the new session loadSessions() handleNext() } const handleCropNext = async () => { // Auto-detect document type after crop (last image-processing step), then advance if (sessionId) { try { const res = await fetch( `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-type`, { method: 'POST' }, ) if (res.ok) { const data: DocumentTypeResult = await res.json() setDocTypeResult(data) // Mark skipped steps immediately const skipSteps = data.skip_steps || [] if (skipSteps.length > 0) { setSteps((prev) => prev.map((s) => skipSteps.includes(s.id) ? { ...s, status: 'skipped' } : s, ), ) } } } catch (e) { console.error('Doc type detection failed:', e) // Not critical — continue without it } } handleNext() } const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => { if (!docTypeResult) return // Build new skip_steps based on doc type let skipSteps: string[] = [] if (newDocType === 'full_text') { skipSteps = ['columns', 'rows'] } // vocab_table and generic_table: no skips const updated: DocumentTypeResult = { ...docTypeResult, doc_type: newDocType, skip_steps: skipSteps, pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first', } setDocTypeResult(updated) // Update step statuses setSteps((prev) => prev.map((s) => { if (skipSteps.includes(s.id)) return { ...s, status: 'skipped' as const } if (s.status === 'skipped') return { ...s, status: 'pending' as const } return s }), ) } const handleNewSession = () => { setSessionId(null) setSessionName('') setCurrentStep(0) setDocTypeResult(null) setSubSessions([]) setParentSessionId(null) setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) } const handleSessionChange = useCallback((newSessionId: string) => { openSession(newSessionId, true) }, [openSession]) const handleBoxSessionsCreated = useCallback((subs: SubSession[]) => { setSubSessions(subs) if (sessionId) setParentSessionId(sessionId) }, [sessionId]) const stepNames: Record = { 1: 'Orientierung', 2: 'Begradigung', 3: 'Entzerrung', 4: 'Zuschneiden', 5: 'Spalten', 6: 'Zeilen', 7: 'Woerter', 8: 'Korrektur', 9: 'Rekonstruktion', 10: 'Validierung', } const reprocessFromStep = useCallback(async (uiStep: number) => { if (!sessionId) return const dbStep = uiStep + 1 // UI is 0-indexed, DB is 1-indexed if (!confirm(`Ab Schritt ${dbStep} (${stepNames[dbStep] || '?'}) neu verarbeiten? Nachfolgende Daten werden geloescht.`)) return try { const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ from_step: dbStep }), }) if (!res.ok) { const data = await res.json().catch(() => ({})) console.error('Reprocess failed:', data.detail || res.status) return } // Reset UI steps goToStep(uiStep) } catch (e) { console.error('Reprocess error:', e) } // eslint-disable-next-line react-hooks/exhaustive-deps }, [sessionId, goToStep]) const renderStep = () => { switch (currentStep) { case 0: return case 1: return case 2: return case 3: return case 4: return case 5: return case 6: return case 7: return case 8: return case 9: return default: return null } } return (
{/* Session List */}

Sessions ({sessions.length})

{sessions.length > 0 && ( )}
{loadingSessions ? (
Lade Sessions...
) : sessions.length === 0 ? (
Noch keine Sessions vorhanden.
) : (
{sessions.map((s) => { const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category) return (
{/* Thumbnail */}
openSession(s.id)} > {/* eslint-disable-next-line @next/next/no-img-element */} { (e.target as HTMLImageElement).style.display = 'none' }} />
{/* Info */}
openSession(s.id)}> {editingName === s.id ? ( setEditNameValue(e.target.value)} onBlur={() => renameSession(s.id, editNameValue)} onKeyDown={(e) => { if (e.key === 'Enter') renameSession(s.id, editNameValue) if (e.key === 'Escape') setEditingName(null) }} onClick={(e) => e.stopPropagation()} className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600" /> ) : (
{s.name || s.filename}
)} {/* ID row */}
{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })} Schritt {s.current_step}: {stepNames[s.current_step] || '?'}
{/* Badges */}
e.stopPropagation()}> {/* Category Badge */} {/* Doc Type Badge (read-only) */} {s.doc_type && ( {s.doc_type} )}
{/* Action buttons */}
{/* Category dropdown (inline) */} {editingCategory === s.id && (
e.stopPropagation()} > {DOCUMENT_CATEGORIES.map((cat) => ( ))}
)}
) })}
)}
{/* Active session info */} {sessionId && sessionName && (
Aktive Session: {sessionName} {activeCategory && (() => { const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory) return cat ? {cat.icon} {cat.label} : null })()} {docTypeResult && ( {docTypeResult.doc_type} )}
)} {subSessions.length > 0 && parentSessionId && sessionId && ( )}
{renderStep()}
) }