Page-split now creates independent sessions (no parent_session_id), parent marked as status='split' and hidden from list. Navigation uses useSearchParams for URL-based step tracking (browser back/forward works). page.tsx reduced from 684 to 443 lines via usePipelineNavigation hook. Box sub-sessions (column detection) remain unchanged. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
444 lines
19 KiB
TypeScript
444 lines
19 KiB
TypeScript
'use client'
|
|
|
|
import { Suspense, useCallback, useEffect, useState } from 'react'
|
|
import { PagePurpose } from '@/components/common/PagePurpose'
|
|
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
|
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
|
|
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
|
|
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
|
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
|
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
|
|
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
|
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
|
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
|
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
|
|
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
|
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
|
import { DOCUMENT_CATEGORIES, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types'
|
|
import { usePipelineNavigation } from './usePipelineNavigation'
|
|
|
|
const KLAUSUR_API = '/klausur-api'
|
|
|
|
const STEP_NAMES: Record<number, string> = {
|
|
1: 'Orientierung', 2: 'Begradigung', 3: 'Entzerrung', 4: 'Zuschneiden',
|
|
5: 'Spalten', 6: 'Zeilen', 7: 'Woerter', 8: 'Struktur',
|
|
9: 'Korrektur', 10: 'Rekonstruktion', 11: 'Validierung',
|
|
}
|
|
|
|
function OcrPipelineContent() {
|
|
const nav = usePipelineNavigation()
|
|
const [sessions, setSessions] = useState<SessionListItem[]>([])
|
|
const [loadingSessions, setLoadingSessions] = useState(true)
|
|
const [editingName, setEditingName] = useState<string | null>(null)
|
|
const [editNameValue, setEditNameValue] = useState('')
|
|
const [editingCategory, setEditingCategory] = useState<string | null>(null)
|
|
const [sessionName, setSessionName] = useState('')
|
|
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
|
|
|
|
const loadSessions = useCallback(async () => {
|
|
setLoadingSessions(true)
|
|
try {
|
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
|
|
if (res.ok) {
|
|
const data = await res.json()
|
|
setSessions(data.sessions || [])
|
|
}
|
|
} catch (e) {
|
|
console.error('Failed to load sessions:', e)
|
|
} finally {
|
|
setLoadingSessions(false)
|
|
}
|
|
}, [])
|
|
|
|
useEffect(() => { loadSessions() }, [loadSessions])
|
|
|
|
// Sync session name when nav.sessionId changes
|
|
useEffect(() => {
|
|
if (!nav.sessionId) {
|
|
setSessionName('')
|
|
setActiveCategory(undefined)
|
|
return
|
|
}
|
|
const load = async () => {
|
|
try {
|
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${nav.sessionId}`)
|
|
if (!res.ok) return
|
|
const data = await res.json()
|
|
setSessionName(data.name || data.filename || '')
|
|
setActiveCategory(data.document_category || undefined)
|
|
} catch { /* ignore */ }
|
|
}
|
|
load()
|
|
}, [nav.sessionId])
|
|
|
|
const openSession = useCallback((sid: string) => {
|
|
nav.goToSession(sid)
|
|
}, [nav])
|
|
|
|
const deleteSession = useCallback(async (sid: string) => {
|
|
try {
|
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
|
|
setSessions(prev => prev.filter(s => s.id !== sid))
|
|
if (nav.sessionId === sid) nav.goToSessionList()
|
|
} catch (e) {
|
|
console.error('Failed to delete session:', e)
|
|
}
|
|
}, [nav])
|
|
|
|
const renameSession = useCallback(async (sid: string, newName: string) => {
|
|
try {
|
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
|
method: 'PUT',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ name: newName }),
|
|
})
|
|
setSessions(prev => prev.map(s => (s.id === sid ? { ...s, name: newName } : s)))
|
|
if (nav.sessionId === sid) setSessionName(newName)
|
|
} catch (e) {
|
|
console.error('Failed to rename session:', e)
|
|
}
|
|
setEditingName(null)
|
|
}, [nav.sessionId])
|
|
|
|
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
|
|
try {
|
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
|
|
method: 'PUT',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ document_category: category }),
|
|
})
|
|
setSessions(prev => prev.map(s => (s.id === sid ? { ...s, document_category: category } : s)))
|
|
if (nav.sessionId === sid) setActiveCategory(category)
|
|
} catch (e) {
|
|
console.error('Failed to update category:', e)
|
|
}
|
|
setEditingCategory(null)
|
|
}, [nav.sessionId])
|
|
|
|
const deleteAllSessions = useCallback(async () => {
|
|
if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return
|
|
try {
|
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' })
|
|
setSessions([])
|
|
nav.goToSessionList()
|
|
} catch (e) {
|
|
console.error('Failed to delete all sessions:', e)
|
|
}
|
|
}, [nav])
|
|
|
|
const handleStepClick = (index: number) => {
|
|
if (index <= nav.currentStepIndex || nav.steps[index].status === 'completed') {
|
|
nav.goToStep(index)
|
|
}
|
|
}
|
|
|
|
// Orientation: after upload, navigate to session at deskew step
|
|
const handleOrientationComplete = useCallback(async (sid: string) => {
|
|
loadSessions()
|
|
// Navigate directly to deskew step (index 1) for this session
|
|
nav.goToSession(sid)
|
|
}, [nav, loadSessions])
|
|
|
|
// Crop: detect doc type then advance
|
|
const handleCropNext = useCallback(async () => {
|
|
if (nav.sessionId) {
|
|
try {
|
|
const res = await fetch(
|
|
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${nav.sessionId}/detect-type`,
|
|
{ method: 'POST' },
|
|
)
|
|
if (res.ok) {
|
|
const data: DocumentTypeResult = await res.json()
|
|
nav.setDocType(data)
|
|
}
|
|
} catch (e) {
|
|
console.error('Doc type detection failed:', e)
|
|
}
|
|
}
|
|
nav.goToNextStep()
|
|
}, [nav])
|
|
|
|
const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => {
|
|
if (!nav.docTypeResult) return
|
|
let skipSteps: string[] = []
|
|
if (newDocType === 'full_text') skipSteps = ['columns', 'rows']
|
|
|
|
nav.setDocType({
|
|
...nav.docTypeResult,
|
|
doc_type: newDocType,
|
|
skip_steps: skipSteps,
|
|
pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first',
|
|
})
|
|
}
|
|
|
|
// Box sub-sessions (column detection) — still supported
|
|
const handleBoxSessionsCreated = useCallback((_subs: SubSession[]) => {
|
|
// Box sub-sessions are tracked by the backend; no client-side state needed anymore
|
|
}, [])
|
|
|
|
const renderStep = () => {
|
|
const sid = nav.sessionId
|
|
switch (nav.currentStepIndex) {
|
|
case 0:
|
|
return (
|
|
<StepOrientation
|
|
key={sid}
|
|
sessionId={sid}
|
|
onNext={handleOrientationComplete}
|
|
onSessionList={() => { loadSessions(); nav.goToSessionList() }}
|
|
/>
|
|
)
|
|
case 1:
|
|
return <StepDeskew key={sid} sessionId={sid} onNext={nav.goToNextStep} />
|
|
case 2:
|
|
return <StepDewarp key={sid} sessionId={sid} onNext={nav.goToNextStep} />
|
|
case 3:
|
|
return <StepCrop key={sid} sessionId={sid} onNext={handleCropNext} />
|
|
case 4:
|
|
return <StepColumnDetection sessionId={sid} onNext={nav.goToNextStep} onBoxSessionsCreated={handleBoxSessionsCreated} />
|
|
case 5:
|
|
return <StepRowDetection sessionId={sid} onNext={nav.goToNextStep} />
|
|
case 6:
|
|
return <StepWordRecognition sessionId={sid} onNext={nav.goToNextStep} goToStep={nav.goToStep} />
|
|
case 7:
|
|
return <StepStructureDetection sessionId={sid} onNext={nav.goToNextStep} />
|
|
case 8:
|
|
return <StepLlmReview sessionId={sid} onNext={nav.goToNextStep} />
|
|
case 9:
|
|
return <StepReconstruction sessionId={sid} onNext={nav.goToNextStep} />
|
|
case 10:
|
|
return <StepGroundTruth sessionId={sid} onNext={nav.goToNextStep} />
|
|
default:
|
|
return null
|
|
}
|
|
}
|
|
|
|
return (
|
|
<div className="space-y-6">
|
|
<PagePurpose
|
|
title="OCR Pipeline"
|
|
purpose="Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. Ziel: 10 Vokabelseiten fehlerfrei rekonstruieren."
|
|
audience={['Entwickler', 'Data Scientists']}
|
|
architecture={{
|
|
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
|
databases: ['PostgreSQL Sessions'],
|
|
}}
|
|
relatedPages={[
|
|
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
|
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Trainingsdaten' },
|
|
]}
|
|
defaultCollapsed
|
|
/>
|
|
|
|
{/* Session List */}
|
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
|
|
<div className="flex items-center justify-between mb-3">
|
|
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
|
Sessions ({sessions.length})
|
|
</h3>
|
|
<div className="flex gap-2">
|
|
{sessions.length > 0 && (
|
|
<button
|
|
onClick={deleteAllSessions}
|
|
className="text-xs px-3 py-1.5 text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-colors"
|
|
title="Alle Sessions loeschen"
|
|
>
|
|
Alle loeschen
|
|
</button>
|
|
)}
|
|
<button
|
|
onClick={() => nav.goToSessionList()}
|
|
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
|
|
>
|
|
+ Neue Session
|
|
</button>
|
|
</div>
|
|
</div>
|
|
|
|
{loadingSessions ? (
|
|
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
|
|
) : sessions.length === 0 ? (
|
|
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
|
|
) : (
|
|
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
|
|
{sessions.map((s) => {
|
|
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
|
|
return (
|
|
<div
|
|
key={s.id}
|
|
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
|
|
nav.sessionId === s.id
|
|
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
|
}`}
|
|
>
|
|
{/* Thumbnail */}
|
|
<div
|
|
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
|
|
onClick={() => openSession(s.id)}
|
|
>
|
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
|
<img
|
|
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
|
|
alt=""
|
|
className="w-full h-full object-cover"
|
|
loading="lazy"
|
|
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
|
|
/>
|
|
</div>
|
|
|
|
{/* Info */}
|
|
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
|
|
{editingName === s.id ? (
|
|
<input
|
|
autoFocus
|
|
value={editNameValue}
|
|
onChange={(e) => setEditNameValue(e.target.value)}
|
|
onBlur={() => renameSession(s.id, editNameValue)}
|
|
onKeyDown={(e) => {
|
|
if (e.key === 'Enter') renameSession(s.id, editNameValue)
|
|
if (e.key === 'Escape') setEditingName(null)
|
|
}}
|
|
onClick={(e) => e.stopPropagation()}
|
|
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
|
|
/>
|
|
) : (
|
|
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
|
|
{s.name || s.filename}
|
|
</div>
|
|
)}
|
|
{/* ID row */}
|
|
<button
|
|
onClick={(e) => {
|
|
e.stopPropagation()
|
|
navigator.clipboard.writeText(s.id)
|
|
const btn = e.currentTarget
|
|
btn.textContent = 'Kopiert!'
|
|
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
|
|
}}
|
|
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
|
|
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
|
|
>
|
|
ID: {s.id.slice(0, 8)}
|
|
</button>
|
|
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
|
|
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
|
|
<span>Schritt {s.current_step}: {STEP_NAMES[s.current_step] || '?'}</span>
|
|
</div>
|
|
</div>
|
|
|
|
{/* Badges */}
|
|
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
|
|
<button
|
|
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
|
|
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
|
|
catInfo
|
|
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
|
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
|
|
}`}
|
|
title="Kategorie setzen"
|
|
>
|
|
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
|
|
</button>
|
|
{s.doc_type && (
|
|
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
|
|
{s.doc_type}
|
|
</span>
|
|
)}
|
|
</div>
|
|
|
|
{/* Action buttons */}
|
|
<div className="flex flex-col gap-0.5 flex-shrink-0">
|
|
<button
|
|
onClick={(e) => {
|
|
e.stopPropagation()
|
|
setEditNameValue(s.name || s.filename)
|
|
setEditingName(s.id)
|
|
}}
|
|
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
|
|
title="Umbenennen"
|
|
>
|
|
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
|
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
|
|
</svg>
|
|
</button>
|
|
<button
|
|
onClick={(e) => {
|
|
e.stopPropagation()
|
|
if (confirm('Session loeschen?')) deleteSession(s.id)
|
|
}}
|
|
className="p-1 text-gray-400 hover:text-red-500"
|
|
title="Loeschen"
|
|
>
|
|
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
|
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
|
</svg>
|
|
</button>
|
|
</div>
|
|
|
|
{/* Category dropdown */}
|
|
{editingCategory === s.id && (
|
|
<div
|
|
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
|
|
onClick={(e) => e.stopPropagation()}
|
|
>
|
|
{DOCUMENT_CATEGORIES.map((cat) => (
|
|
<button
|
|
key={cat.value}
|
|
onClick={() => updateCategory(s.id, cat.value)}
|
|
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
|
|
s.document_category === cat.value
|
|
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
|
|
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
|
}`}
|
|
>
|
|
{cat.icon} {cat.label}
|
|
</button>
|
|
))}
|
|
</div>
|
|
)}
|
|
</div>
|
|
)
|
|
})}
|
|
</div>
|
|
)}
|
|
</div>
|
|
|
|
{/* Active session info */}
|
|
{nav.sessionId && sessionName && (
|
|
<div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
|
|
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
|
|
{activeCategory && (() => {
|
|
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
|
|
return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
|
|
})()}
|
|
{nav.docTypeResult && (
|
|
<span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
|
|
{nav.docTypeResult.doc_type}
|
|
</span>
|
|
)}
|
|
</div>
|
|
)}
|
|
|
|
<PipelineStepper
|
|
steps={nav.steps}
|
|
currentStep={nav.currentStepIndex}
|
|
onStepClick={handleStepClick}
|
|
onReprocess={nav.sessionId ? nav.reprocessFromStep : undefined}
|
|
docTypeResult={nav.docTypeResult}
|
|
onDocTypeChange={handleDocTypeChange}
|
|
/>
|
|
|
|
<div className="min-h-[400px]">{renderStep()}</div>
|
|
</div>
|
|
)
|
|
}
|
|
|
|
export default function OcrPipelinePage() {
|
|
return (
|
|
<Suspense fallback={<div className="p-8 text-gray-400">Lade Pipeline...</div>}>
|
|
<OcrPipelineContent />
|
|
</Suspense>
|
|
)
|
|
}
|