From f931091b5783efb943ad15bfe8c2fbae828d2f7f Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 24 Mar 2026 17:05:33 +0100 Subject: [PATCH] refactor: independent sessions for page-split + URL-based pipeline navigation Page-split now creates independent sessions (no parent_session_id), parent marked as status='split' and hidden from list. Navigation uses useSearchParams for URL-based step tracking (browser back/forward works). page.tsx reduced from 684 to 443 lines via usePipelineNavigation hook. Box sub-sessions (column detection) remain unchanged. Co-Authored-By: Claude Opus 4.6 --- .../app/(admin)/ai/ocr-overlay/page.tsx | 4 +- .../app/(admin)/ai/ocr-pipeline/page.tsx | 463 +++++------------- .../app/(admin)/ai/ocr-pipeline/types.ts | 3 +- .../ai/ocr-pipeline/usePipelineNavigation.ts | 225 +++++++++ .../ocr-pipeline/BoxSessionTabs.tsx | 5 +- .../ocr-pipeline/StepOrientation.tsx | 43 +- .../backend/ocr_pipeline_session_store.py | 2 +- .../backend/ocr_pipeline_sessions.py | 4 +- .../backend/orientation_crop_api.py | 10 +- 9 files changed, 368 insertions(+), 391 deletions(-) create mode 100644 admin-lehrer/app/(admin)/ai/ocr-pipeline/usePipelineNavigation.ts diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx index 52b0df2..8db122e 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx @@ -383,7 +383,7 @@ export default function OcrOverlayPage() { if (mode === 'paddle-direct' || mode === 'kombi') { switch (currentStep) { case 0: - return + return { loadSessions(); setSessionId(null) }} /> case 1: return case 2: @@ -421,7 +421,7 @@ export default function OcrOverlayPage() { } switch (currentStep) { case 0: - return + return { loadSessions(); setSessionId(null) }} /> case 1: return case 2: diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx index a573ef1..9214a09 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx @@ -1,6 +1,6 @@ 'use client' -import { useCallback, useEffect, useState } from 'react' +import { Suspense, useCallback, useEffect, useState } from 'react' import { PagePurpose } from '@/components/common/PagePurpose' import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper' import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation' @@ -14,37 +14,28 @@ import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecogniti import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview' import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction' import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth' -import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs' -import { PIPELINE_STEPS, DOCUMENT_CATEGORIES, type PipelineStep, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types' +import { DOCUMENT_CATEGORIES, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types' +import { usePipelineNavigation } from './usePipelineNavigation' const KLAUSUR_API = '/klausur-api' -export default function OcrPipelinePage() { - const [currentStep, setCurrentStep] = useState(0) - const [sessionId, setSessionId] = useState(null) - const [sessionName, setSessionName] = useState('') +const STEP_NAMES: Record = { + 1: 'Orientierung', 2: 'Begradigung', 3: 'Entzerrung', 4: 'Zuschneiden', + 5: 'Spalten', 6: 'Zeilen', 7: 'Woerter', 8: 'Struktur', + 9: 'Korrektur', 10: 'Rekonstruktion', 11: 'Validierung', +} + +function OcrPipelineContent() { + const nav = usePipelineNavigation() const [sessions, setSessions] = useState([]) const [loadingSessions, setLoadingSessions] = useState(true) const [editingName, setEditingName] = useState(null) const [editNameValue, setEditNameValue] = useState('') const [editingCategory, setEditingCategory] = useState(null) - const [docTypeResult, setDocTypeResult] = useState(null) + const [sessionName, setSessionName] = useState('') const [activeCategory, setActiveCategory] = useState(undefined) - const [subSessions, setSubSessions] = useState([]) - const [parentSessionId, setParentSessionId] = useState(null) - const [steps, setSteps] = useState( - PIPELINE_STEPS.map((s, i) => ({ - ...s, - status: i === 0 ? 'active' : 'pending', - })), - ) - // Load session list on mount - useEffect(() => { - loadSessions() - }, []) - - const loadSessions = async () => { + const loadSessions = useCallback(async () => { setLoadingSessions(true) try { const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`) @@ -57,103 +48,42 @@ export default function OcrPipelinePage() { } finally { setLoadingSessions(false) } - } - - const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => { - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`) - if (!res.ok) return - const data = await res.json() - - setSessionId(sid) - setSessionName(data.name || data.filename || '') - setActiveCategory(data.document_category || undefined) - - // Sub-session handling - if (data.sub_sessions && data.sub_sessions.length > 0) { - setSubSessions(data.sub_sessions) - setParentSessionId(sid) - // Parent has sub-sessions — open the first incomplete one (or most advanced if all done) - const incomplete = data.sub_sessions.find( - (s: SubSession) => !s.current_step || s.current_step < 10, - ) - const target = incomplete || [...data.sub_sessions].sort( - (a: SubSession, b: SubSession) => (b.current_step || 0) - (a.current_step || 0), - )[0] - if (target) { - openSession(target.id, true) - return - } - } else if (data.parent_session_id) { - // This is a sub-session — keep parent info but don't reset sub-session list - setParentSessionId(data.parent_session_id) - } else if (!keepSubSessions) { - setSubSessions([]) - setParentSessionId(null) - } - - // Restore doc type result if available - const savedDocType: DocumentTypeResult | null = data.doc_type_result || null - setDocTypeResult(savedDocType) - - // Determine which step to jump to based on current_step - const dbStep = data.current_step || 1 - // DB steps: 1=start, 2=orientation, 3=deskew, 4=dewarp, 5=crop, 6=columns, ... - // UI steps are 0-indexed: 0=orientation, 1=deskew, 2=dewarp, 3=crop, 4=columns, ... - let uiStep = Math.max(0, dbStep - 1) - const skipSteps = [...(savedDocType?.skip_steps || [])] - - // Sub-session handling depends on how they were created: - // - Crop-based (current_step >= 5): image already cropped, skip all pre-processing - // - Page-split (current_step 2): orientation done on parent, skip only orientation - // - Page-split from original (current_step 1): needs full pipeline - const isSubSession = !!data.parent_session_id - if (isSubSession) { - if (dbStep >= 5) { - // Crop-based sub-sessions: image already cropped - const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop'] - for (const s of SUB_SESSION_SKIP) { - if (!skipSteps.includes(s)) skipSteps.push(s) - } - if (uiStep < 4) uiStep = 4 // columns step (index 4) - } else if (dbStep >= 2) { - // Page-split sub-session: parent orientation applied, skip only orientation - if (!skipSteps.includes('orientation')) skipSteps.push('orientation') - if (uiStep < 1) uiStep = 1 // advance past skipped orientation to deskew - } - // dbStep === 1: page-split from original image, needs full pipeline - } - - setSteps( - PIPELINE_STEPS.map((s, i) => ({ - ...s, - status: skipSteps.includes(s.id) - ? 'skipped' - : i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending', - })), - ) - setCurrentStep(uiStep) - } catch (e) { - console.error('Failed to open session:', e) - } }, []) + useEffect(() => { loadSessions() }, [loadSessions]) + + // Sync session name when nav.sessionId changes + useEffect(() => { + if (!nav.sessionId) { + setSessionName('') + setActiveCategory(undefined) + return + } + const load = async () => { + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${nav.sessionId}`) + if (!res.ok) return + const data = await res.json() + setSessionName(data.name || data.filename || '') + setActiveCategory(data.document_category || undefined) + } catch { /* ignore */ } + } + load() + }, [nav.sessionId]) + + const openSession = useCallback((sid: string) => { + nav.goToSession(sid) + }, [nav]) + const deleteSession = useCallback(async (sid: string) => { try { await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' }) - setSessions((prev) => prev.filter((s) => s.id !== sid)) - if (sessionId === sid) { - setSessionId(null) - setCurrentStep(0) - setDocTypeResult(null) - setSubSessions([]) - setParentSessionId(null) - setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) - } + setSessions(prev => prev.filter(s => s.id !== sid)) + if (nav.sessionId === sid) nav.goToSessionList() } catch (e) { console.error('Failed to delete session:', e) } - }, [sessionId]) + }, [nav]) const renameSession = useCallback(async (sid: string, newName: string) => { try { @@ -162,13 +92,13 @@ export default function OcrPipelinePage() { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ name: newName }), }) - setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s))) - if (sessionId === sid) setSessionName(newName) + setSessions(prev => prev.map(s => (s.id === sid ? { ...s, name: newName } : s))) + if (nav.sessionId === sid) setSessionName(newName) } catch (e) { console.error('Failed to rename session:', e) } setEditingName(null) - }, [sessionId]) + }, [nav.sessionId]) const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => { try { @@ -177,275 +107,107 @@ export default function OcrPipelinePage() { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ document_category: category }), }) - setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s))) - if (sessionId === sid) setActiveCategory(category) + setSessions(prev => prev.map(s => (s.id === sid ? { ...s, document_category: category } : s))) + if (nav.sessionId === sid) setActiveCategory(category) } catch (e) { console.error('Failed to update category:', e) } setEditingCategory(null) - }, [sessionId]) + }, [nav.sessionId]) const deleteAllSessions = useCallback(async () => { if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return try { await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' }) setSessions([]) - setSessionId(null) - setCurrentStep(0) - setDocTypeResult(null) - setActiveCategory(undefined) - setSubSessions([]) - setParentSessionId(null) - setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) + nav.goToSessionList() } catch (e) { console.error('Failed to delete all sessions:', e) } - }, []) + }, [nav]) const handleStepClick = (index: number) => { - if (index <= currentStep || steps[index].status === 'completed') { - setCurrentStep(index) + if (index <= nav.currentStepIndex || nav.steps[index].status === 'completed') { + nav.goToStep(index) } } - const goToStep = (step: number) => { - setCurrentStep(step) - setSteps((prev) => - prev.map((s, i) => ({ - ...s, - status: i < step ? 'completed' : i === step ? 'active' : 'pending', - })), - ) - } - - const handleNext = () => { - if (currentStep >= steps.length - 1) { - // Last step completed - if (parentSessionId && sessionId !== parentSessionId) { - // Sub-session completed — mark it and find next incomplete one - const updatedSubs = subSessions.map((s) => - s.id === sessionId ? { ...s, status: 'completed' as const, current_step: 10 } : s, - ) - setSubSessions(updatedSubs) - - // Find next incomplete sub-session - const nextIncomplete = updatedSubs.find( - (s) => s.id !== sessionId && (!s.current_step || s.current_step < 10), - ) - if (nextIncomplete) { - // Open next incomplete sub-session - openSession(nextIncomplete.id, true) - } else { - // All sub-sessions done — return to session list - setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) - setCurrentStep(0) - setSessionId(null) - setSubSessions([]) - setParentSessionId(null) - loadSessions() - } - return - } - // Main session: return to session list - setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) - setCurrentStep(0) - setSessionId(null) - setSubSessions([]) - setParentSessionId(null) - loadSessions() - return - } - - // Find the next non-skipped step - const skipSteps = docTypeResult?.skip_steps || [] - let nextStep = currentStep + 1 - while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) { - nextStep++ - } - if (nextStep >= steps.length) nextStep = steps.length - 1 - - setSteps((prev) => - prev.map((s, i) => { - if (i === currentStep) return { ...s, status: 'completed' } - if (i === nextStep) return { ...s, status: 'active' } - // Mark skipped steps between current and next - if (i > currentStep && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) { - return { ...s, status: 'skipped' } - } - return s - }), - ) - setCurrentStep(nextStep) - } - - const handleOrientationComplete = async (sid: string) => { - setSessionId(sid) + // Orientation: after upload, navigate to session at deskew step + const handleOrientationComplete = useCallback(async (sid: string) => { loadSessions() + // Navigate directly to deskew step (index 1) for this session + nav.goToSession(sid) + }, [nav, loadSessions]) - // Check for page-split sub-sessions directly from API - // (React state may not be committed yet due to batching) - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`) - if (res.ok) { - const data = await res.json() - if (data.sub_sessions?.length > 0) { - const subs: SubSession[] = data.sub_sessions.map((s: SubSession) => ({ - id: s.id, - name: s.name, - box_index: s.box_index, - current_step: s.current_step, - })) - setSubSessions(subs) - setParentSessionId(sid) - openSession(subs[0].id, true) - return - } - } - } catch (e) { - console.error('Failed to check for sub-sessions:', e) - } - - handleNext() - } - - const handleCropNext = async () => { - // Auto-detect document type after crop (last image-processing step), then advance - if (sessionId) { + // Crop: detect doc type then advance + const handleCropNext = useCallback(async () => { + if (nav.sessionId) { try { const res = await fetch( - `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-type`, + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${nav.sessionId}/detect-type`, { method: 'POST' }, ) if (res.ok) { const data: DocumentTypeResult = await res.json() - setDocTypeResult(data) - - // Mark skipped steps immediately - const skipSteps = data.skip_steps || [] - if (skipSteps.length > 0) { - setSteps((prev) => - prev.map((s) => - skipSteps.includes(s.id) ? { ...s, status: 'skipped' } : s, - ), - ) - } + nav.setDocType(data) } } catch (e) { console.error('Doc type detection failed:', e) - // Not critical — continue without it } } - handleNext() - } + nav.goToNextStep() + }, [nav]) const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => { - if (!docTypeResult) return - - // Build new skip_steps based on doc type + if (!nav.docTypeResult) return let skipSteps: string[] = [] - if (newDocType === 'full_text') { - skipSteps = ['columns', 'rows'] - } - // vocab_table and generic_table: no skips + if (newDocType === 'full_text') skipSteps = ['columns', 'rows'] - const updated: DocumentTypeResult = { - ...docTypeResult, + nav.setDocType({ + ...nav.docTypeResult, doc_type: newDocType, skip_steps: skipSteps, pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first', - } - setDocTypeResult(updated) - - // Update step statuses - setSteps((prev) => - prev.map((s) => { - if (skipSteps.includes(s.id)) return { ...s, status: 'skipped' as const } - if (s.status === 'skipped') return { ...s, status: 'pending' as const } - return s - }), - ) + }) } - const handleNewSession = () => { - setSessionId(null) - setSessionName('') - setCurrentStep(0) - setDocTypeResult(null) - setSubSessions([]) - setParentSessionId(null) - setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) - } - - const handleSessionChange = useCallback((newSessionId: string) => { - openSession(newSessionId, true) - }, [openSession]) - - const handleBoxSessionsCreated = useCallback((subs: SubSession[]) => { - setSubSessions(subs) - if (sessionId) setParentSessionId(sessionId) - }, [sessionId]) - - const stepNames: Record = { - 1: 'Orientierung', - 2: 'Begradigung', - 3: 'Entzerrung', - 4: 'Zuschneiden', - 5: 'Spalten', - 6: 'Zeilen', - 7: 'Woerter', - 8: 'Struktur', - 9: 'Korrektur', - 10: 'Rekonstruktion', - 11: 'Validierung', - } - - const reprocessFromStep = useCallback(async (uiStep: number) => { - if (!sessionId) return - const dbStep = uiStep + 1 // UI is 0-indexed, DB is 1-indexed - if (!confirm(`Ab Schritt ${dbStep} (${stepNames[dbStep] || '?'}) neu verarbeiten? Nachfolgende Daten werden geloescht.`)) return - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ from_step: dbStep }), - }) - if (!res.ok) { - const data = await res.json().catch(() => ({})) - console.error('Reprocess failed:', data.detail || res.status) - return - } - // Reset UI steps - goToStep(uiStep) - } catch (e) { - console.error('Reprocess error:', e) - } - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [sessionId, goToStep]) + // Box sub-sessions (column detection) — still supported + const handleBoxSessionsCreated = useCallback((_subs: SubSession[]) => { + // Box sub-sessions are tracked by the backend; no client-side state needed anymore + }, []) const renderStep = () => { - switch (currentStep) { + const sid = nav.sessionId + switch (nav.currentStepIndex) { case 0: - return + return ( + { loadSessions(); nav.goToSessionList() }} + /> + ) case 1: - return + return case 2: - return + return case 3: - return + return case 4: - return + return case 5: - return + return case 6: - return + return case 7: - return + return case 8: - return + return case 9: - return + return case 10: - return + return default: return null } @@ -485,7 +247,7 @@ export default function OcrPipelinePage() { )} - {/* Doc Type Badge (read-only) */} {s.doc_type && ( {s.doc_type} @@ -616,7 +376,7 @@ export default function OcrPipelinePage() { - {/* Category dropdown (inline) */} + {/* Category dropdown */} {editingCategory === s.id && (
{/* Active session info */} - {sessionId && sessionName && ( + {nav.sessionId && sessionName && (
Aktive Session: {sessionName} {activeCategory && (() => { const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory) return cat ? {cat.icon} {cat.label} : null })()} - {docTypeResult && ( + {nav.docTypeResult && ( - {docTypeResult.doc_type} + {nav.docTypeResult.doc_type} )}
)} - {subSessions.length > 0 && parentSessionId && sessionId && ( - - )} -
{renderStep()}
) } + +export default function OcrPipelinePage() { + return ( + Lade Pipeline...}> + + + ) +} diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts index c68f3bd..aa8c057 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts @@ -35,10 +35,9 @@ export interface SessionListItem { doc_type?: string created_at: string updated_at?: string - parent_session_id?: string | null - box_index?: number | null } +/** Box sub-session (from column detection zone_type='box') */ export interface SubSession { id: string name: string diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/usePipelineNavigation.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/usePipelineNavigation.ts new file mode 100644 index 0000000..f1f6e13 --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/usePipelineNavigation.ts @@ -0,0 +1,225 @@ +'use client' + +import { useCallback, useEffect, useState } from 'react' +import { useRouter, useSearchParams } from 'next/navigation' +import { PIPELINE_STEPS, type PipelineStep, type PipelineStepStatus, type DocumentTypeResult } from './types' + +const KLAUSUR_API = '/klausur-api' + +export interface PipelineNav { + sessionId: string | null + currentStepIndex: number + currentStepId: string + steps: PipelineStep[] + docTypeResult: DocumentTypeResult | null + + goToNextStep: () => void + goToStep: (index: number) => void + goToSession: (sessionId: string) => void + goToSessionList: () => void + setDocType: (result: DocumentTypeResult) => void + reprocessFromStep: (uiStep: number) => Promise +} + +const STEP_NAMES: Record = { + 1: 'Orientierung', 2: 'Begradigung', 3: 'Entzerrung', 4: 'Zuschneiden', + 5: 'Spalten', 6: 'Zeilen', 7: 'Woerter', 8: 'Struktur', + 9: 'Korrektur', 10: 'Rekonstruktion', 11: 'Validierung', +} + +function buildSteps(uiStep: number, skipSteps: string[]): PipelineStep[] { + return PIPELINE_STEPS.map((s, i) => ({ + ...s, + status: ( + skipSteps.includes(s.id) ? 'skipped' + : i < uiStep ? 'completed' + : i === uiStep ? 'active' + : 'pending' + ) as PipelineStepStatus, + })) +} + +export function usePipelineNavigation(): PipelineNav { + const router = useRouter() + const searchParams = useSearchParams() + + const paramSession = searchParams.get('session') + const paramStep = searchParams.get('step') + + const [sessionId, setSessionId] = useState(paramSession) + const [currentStepIndex, setCurrentStepIndex] = useState(0) + const [docTypeResult, setDocTypeResult] = useState(null) + const [steps, setSteps] = useState(buildSteps(0, [])) + const [loaded, setLoaded] = useState(false) + + // Load session info when session param changes + useEffect(() => { + if (!paramSession) { + setSessionId(null) + setCurrentStepIndex(0) + setDocTypeResult(null) + setSteps(buildSteps(0, [])) + setLoaded(true) + return + } + + const load = async () => { + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${paramSession}`) + if (!res.ok) return + const data = await res.json() + + setSessionId(paramSession) + + const savedDocType: DocumentTypeResult | null = data.doc_type_result || null + setDocTypeResult(savedDocType) + + const dbStep = data.current_step || 1 + let uiStep = Math.max(0, dbStep - 1) + const skipSteps = [...(savedDocType?.skip_steps || [])] + + // Box sub-sessions (from column detection) skip pre-processing + const isBoxSubSession = !!data.parent_session_id + if (isBoxSubSession && dbStep >= 5) { + const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop'] + for (const s of SUB_SESSION_SKIP) { + if (!skipSteps.includes(s)) skipSteps.push(s) + } + if (uiStep < 4) uiStep = 4 + } + + // If URL has a step param, use that instead + if (paramStep) { + const stepIdx = PIPELINE_STEPS.findIndex(s => s.id === paramStep) + if (stepIdx >= 0) uiStep = stepIdx + } + + setCurrentStepIndex(uiStep) + setSteps(buildSteps(uiStep, skipSteps)) + } catch (e) { + console.error('Failed to load session:', e) + } finally { + setLoaded(true) + } + } + + load() + }, [paramSession, paramStep]) + + const updateUrl = useCallback((sid: string | null, stepIdx?: number) => { + if (!sid) { + router.push('/ai/ocr-pipeline') + return + } + const stepId = stepIdx !== undefined ? PIPELINE_STEPS[stepIdx]?.id : undefined + const params = new URLSearchParams() + params.set('session', sid) + if (stepId) params.set('step', stepId) + router.push(`/ai/ocr-pipeline?${params.toString()}`) + }, [router]) + + const goToNextStep = useCallback(() => { + if (currentStepIndex >= steps.length - 1) { + // Last step — return to session list + setSessionId(null) + setCurrentStepIndex(0) + setDocTypeResult(null) + setSteps(buildSteps(0, [])) + router.push('/ai/ocr-pipeline') + return + } + + const skipSteps = docTypeResult?.skip_steps || [] + let nextStep = currentStepIndex + 1 + while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) { + nextStep++ + } + if (nextStep >= steps.length) nextStep = steps.length - 1 + + setSteps(prev => + prev.map((s, i) => { + if (i === currentStepIndex) return { ...s, status: 'completed' as PipelineStepStatus } + if (i === nextStep) return { ...s, status: 'active' as PipelineStepStatus } + if (i > currentStepIndex && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) { + return { ...s, status: 'skipped' as PipelineStepStatus } + } + return s + }), + ) + setCurrentStepIndex(nextStep) + if (sessionId) updateUrl(sessionId, nextStep) + }, [currentStepIndex, steps.length, docTypeResult, sessionId, updateUrl, router]) + + const goToStep = useCallback((index: number) => { + setCurrentStepIndex(index) + setSteps(prev => + prev.map((s, i) => ({ + ...s, + status: s.status === 'skipped' ? 'skipped' + : i < index ? 'completed' + : i === index ? 'active' + : 'pending' as PipelineStepStatus, + })), + ) + if (sessionId) updateUrl(sessionId, index) + }, [sessionId, updateUrl]) + + const goToSession = useCallback((sid: string) => { + updateUrl(sid) + }, [updateUrl]) + + const goToSessionList = useCallback(() => { + setSessionId(null) + setCurrentStepIndex(0) + setDocTypeResult(null) + setSteps(buildSteps(0, [])) + router.push('/ai/ocr-pipeline') + }, [router]) + + const setDocType = useCallback((result: DocumentTypeResult) => { + setDocTypeResult(result) + const skipSteps = result.skip_steps || [] + if (skipSteps.length > 0) { + setSteps(prev => + prev.map(s => + skipSteps.includes(s.id) ? { ...s, status: 'skipped' as PipelineStepStatus } : s, + ), + ) + } + }, []) + + const reprocessFromStep = useCallback(async (uiStep: number) => { + if (!sessionId) return + const dbStep = uiStep + 1 + if (!confirm(`Ab Schritt ${dbStep} (${STEP_NAMES[dbStep] || '?'}) neu verarbeiten? Nachfolgende Daten werden geloescht.`)) return + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ from_step: dbStep }), + }) + if (!res.ok) { + const data = await res.json().catch(() => ({})) + console.error('Reprocess failed:', data.detail || res.status) + return + } + goToStep(uiStep) + } catch (e) { + console.error('Reprocess error:', e) + } + }, [sessionId, goToStep]) + + return { + sessionId, + currentStepIndex, + currentStepId: PIPELINE_STEPS[currentStepIndex]?.id || 'orientation', + steps, + docTypeResult, + goToNextStep, + goToStep, + goToSession, + goToSessionList, + setDocType, + reprocessFromStep, + } +} diff --git a/admin-lehrer/components/ocr-pipeline/BoxSessionTabs.tsx b/admin-lehrer/components/ocr-pipeline/BoxSessionTabs.tsx index f4322e3..2c7af16 100644 --- a/admin-lehrer/components/ocr-pipeline/BoxSessionTabs.tsx +++ b/admin-lehrer/components/ocr-pipeline/BoxSessionTabs.tsx @@ -21,6 +21,7 @@ function getStatusIcon(sub: SubSession): string { return STATUS_ICONS.pending } +/** Tabs for box sub-sessions (from column detection zone_type='box'). */ export function BoxSessionTabs({ parentSessionId, subSessions, activeSessionId, onSessionChange }: BoxSessionTabsProps) { if (subSessions.length === 0) return null @@ -28,7 +29,6 @@ export function BoxSessionTabs({ parentSessionId, subSessions, activeSessionId, return (
- {/* Main session tab */} ) })} diff --git a/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx b/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx index 9bf0515..9d731c4 100644 --- a/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx @@ -1,7 +1,7 @@ 'use client' import { useCallback, useEffect, useState } from 'react' -import type { OrientationResult, SessionInfo, SubSession } from '@/app/(admin)/ai/ocr-pipeline/types' +import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types' import { ImageCompareView } from './ImageCompareView' const KLAUSUR_API = '/klausur-api' @@ -17,10 +17,10 @@ interface PageSplitResult { interface StepOrientationProps { sessionId?: string | null onNext: (sessionId: string) => void - onSubSessionsCreated?: (subs: SubSession[]) => void + onSessionList?: () => void } -export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSessionsCreated }: StepOrientationProps) { +export function StepOrientation({ sessionId: existingSessionId, onNext, onSessionList }: StepOrientationProps) { const [session, setSession] = useState(null) const [orientationResult, setOrientationResult] = useState(null) const [pageSplitResult, setPageSplitResult] = useState(null) @@ -112,16 +112,6 @@ export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSes if (splitRes.ok) { const splitData: PageSplitResult = await splitRes.json() setPageSplitResult(splitData) - if (splitData.multi_page && splitData.sub_sessions && onSubSessionsCreated) { - onSubSessionsCreated( - splitData.sub_sessions.map((s) => ({ - id: s.id, - name: s.name, - box_index: s.page_index, - current_step: splitData.used_original ? 1 : 2, - })) - ) - } } } catch (e) { console.error('Page-split detection failed:', e) @@ -133,7 +123,7 @@ export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSes setUploading(false) setDetecting(false) } - }, [sessionName, onSubSessionsCreated]) + }, [sessionName]) const handleDrop = useCallback((e: React.DragEvent) => { e.preventDefault() @@ -264,10 +254,10 @@ export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSes {pageSplitResult?.multi_page && (
- Doppelseite erkannt — {pageSplitResult.page_count} Seiten + Doppelseite erkannt — {pageSplitResult.page_count} unabhaengige Sessions erstellt

- Jede Seite wird einzeln durch die Pipeline (Begradigung, Entzerrung, Zuschnitt, ...) verarbeitet. + Jede Seite wird als eigene Session durch die Pipeline verarbeitet. {pageSplitResult.used_original && ' (Seitentrennung auf dem Originalbild, da die Orientierung die Doppelseite gedreht hat.)'}

@@ -286,12 +276,21 @@ export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSes {/* Next button */} {orientationResult && (
- + {pageSplitResult?.multi_page ? ( + + ) : ( + + )}
)} diff --git a/klausur-service/backend/ocr_pipeline_session_store.py b/klausur-service/backend/ocr_pipeline_session_store.py index 1fed396..05e0fb5 100644 --- a/klausur-service/backend/ocr_pipeline_session_store.py +++ b/klausur-service/backend/ocr_pipeline_session_store.py @@ -238,7 +238,7 @@ async def list_sessions_db( """ pool = await get_pool() async with pool.acquire() as conn: - where = "" if include_sub_sessions else "WHERE parent_session_id IS NULL" + where = "" if include_sub_sessions else "WHERE parent_session_id IS NULL AND (status IS NULL OR status != 'split')" rows = await conn.fetch(f""" SELECT id, name, filename, status, current_step, document_category, doc_type, diff --git a/klausur-service/backend/ocr_pipeline_sessions.py b/klausur-service/backend/ocr_pipeline_sessions.py index 89d055b..99372eb 100644 --- a/klausur-service/backend/ocr_pipeline_sessions.py +++ b/klausur-service/backend/ocr_pipeline_sessions.py @@ -191,12 +191,12 @@ async def get_session_info(session_id: str): if session.get("ground_truth"): result["ground_truth"] = session["ground_truth"] - # Sub-session info + # Box sub-session info (zone_type='box' from column detection — NOT page-split) if session.get("parent_session_id"): result["parent_session_id"] = session["parent_session_id"] result["box_index"] = session.get("box_index") else: - # Check for sub-sessions + # Check for box sub-sessions (column detection creates these) subs = await get_sub_sessions(session_id) if subs: result["sub_sessions"] = [ diff --git a/klausur-service/backend/orientation_crop_api.py b/klausur-service/backend/orientation_crop_api.py index a537137..9409070 100644 --- a/klausur-service/backend/orientation_crop_api.py +++ b/klausur-service/backend/orientation_crop_api.py @@ -238,8 +238,8 @@ async def detect_page_split(session_id: str): "duration_seconds": round(duration, 2), } - # Mark parent session as split (store info in crop_result for backward compat) - await update_session_db(session_id, crop_result=split_info) + # Mark parent session as split and hidden from session list + await update_session_db(session_id, crop_result=split_info, status='split') cached["crop_result"] = split_info await _append_pipeline_log(session_id, "page_split", { @@ -346,6 +346,7 @@ async def auto_crop(session_id: str): cropped_png=png_buf.tobytes() if ok else b"", crop_result=crop_info, current_step=5, + status='split', ) logger.info( @@ -461,8 +462,6 @@ async def _create_page_sub_sessions( name=sub_name, filename=parent_filename, original_png=page_png, - parent_session_id=parent_session_id, - box_index=pi, ) # Pre-populate: set cropped = original (already cropped) @@ -540,8 +539,6 @@ async def _create_page_sub_sessions_full( name=sub_name, filename=parent_filename, original_png=page_png, - parent_session_id=parent_session_id, - box_index=pi, ) # start_step=2 → ready for deskew (orientation already done on spread) @@ -553,7 +550,6 @@ async def _create_page_sub_sessions_full( "id": sub_id, "filename": parent_filename, "name": sub_name, - "parent_session_id": parent_session_id, "original_bgr": page_bgr, "oriented_bgr": None, "cropped_bgr": None,