Add double-page split support to OCR Overlay (Kombi 7 Schritte)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 2m5s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s

The page-split detection was only implemented in the regular pipeline
page but not in the OCR Overlay page where the user actually tests
with Kombi mode. Now the overlay page has full sub-session support:

- openSession: handles sub_sessions, parent_session_id, skip logic
  for page-split vs crop-based sub-sessions, preserves current mode
- handleOrientationComplete: async, fetches API to detect sub-sessions
- BoxSessionTabs: shown between stepper and step content
- handleNext: returns to parent after sub-session completion
- handleSessionChange/handleBoxSessionsCreated: session switching

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-24 11:48:26 +01:00
parent 14fd8e0b1e
commit 49a36364a8

View File

@@ -15,7 +15,9 @@ import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstru
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
import { GridEditor } from '@/components/grid-editor/GridEditor'
import { StepGridReview } from '@/components/ocr-pipeline/StepGridReview'
import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs'
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
import type { SubSession } from '../ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
@@ -31,6 +33,8 @@ export default function OcrOverlayPage() {
const [editingCategory, setEditingCategory] = useState<string | null>(null)
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
const [editingActiveCategory, setEditingActiveCategory] = useState(false)
const [subSessions, setSubSessions] = useState<SubSession[]>([])
const [parentSessionId, setParentSessionId] = useState<string | null>(null)
const [isGroundTruth, setIsGroundTruth] = useState(false)
const [gtSaving, setGtSaving] = useState(false)
const [gtMessage, setGtMessage] = useState('')
@@ -65,7 +69,7 @@ export default function OcrOverlayPage() {
}
}
const openSession = useCallback(async (sid: string) => {
const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
if (!res.ok) return
@@ -77,48 +81,78 @@ export default function OcrOverlayPage() {
setIsGroundTruth(!!data.ground_truth?.build_grid_reference)
setGtMessage('')
// Check if this session was processed with paddle_direct, kombi, or rapid_kombi
// Sub-session handling
if (data.sub_sessions && data.sub_sessions.length > 0) {
setSubSessions(data.sub_sessions)
setParentSessionId(sid)
} else if (data.parent_session_id) {
setParentSessionId(data.parent_session_id)
} else if (!keepSubSessions) {
setSubSessions([])
setParentSessionId(null)
}
const isSubSession = !!data.parent_session_id
// Mode detection for root sessions with word_result
const ocrEngine = data.word_result?.ocr_engine
const isPaddleDirect = ocrEngine === 'paddle_direct'
const isKombi = ocrEngine === 'kombi' || ocrEngine === 'rapid_kombi'
if (isPaddleDirect || isKombi) {
const m = isKombi ? 'kombi' : 'paddle-direct'
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
setMode(m)
let activeMode = mode // keep current mode for sub-sessions
if (!isSubSession && (isPaddleDirect || isKombi)) {
activeMode = isKombi ? 'kombi' : 'paddle-direct'
setMode(activeMode)
} else if (!isSubSession && !ocrEngine) {
// Unprocessed root session: keep the user's selected mode
activeMode = mode
}
// For Kombi: if grid_editor_result exists, jump to grid editor step (6)
// If structure_result exists, jump to grid editor (6)
// If word_result exists, jump to structure step (5)
const baseSteps = activeMode === 'kombi' ? KOMBI_STEPS
: activeMode === 'paddle-direct' ? PADDLE_DIRECT_STEPS
: OVERLAY_PIPELINE_STEPS
// Determine UI step
let uiStep: number
const skipIds: string[] = []
if (!isSubSession && (isPaddleDirect || isKombi)) {
const hasGrid = isKombi && data.grid_editor_result
const hasStructure = isKombi && data.structure_result
const hasWords = isKombi && data.word_result
const activeStep = hasGrid ? 6 : hasStructure ? 6 : hasWords ? 5 : 4
setSteps(
baseSteps.map((s, i) => ({
...s,
status: i < activeStep ? 'completed' : i === activeStep ? 'active' : 'pending',
})),
)
setCurrentStep(activeStep)
uiStep = hasGrid ? 6 : hasStructure ? 6 : data.word_result ? 5 : 4
if (isPaddleDirect) uiStep = data.word_result ? 4 : 4
} else {
setMode('pipeline')
// Map DB step to overlay UI step
const dbStep = data.current_step || 1
const uiStep = dbStepToOverlayUi(dbStep)
if (dbStep <= 2) uiStep = 0
else if (dbStep === 3) uiStep = 1
else if (dbStep === 4) uiStep = 2
else if (dbStep === 5) uiStep = 3
else uiStep = 4
setSteps(
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
...s,
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
})),
)
setCurrentStep(uiStep)
// Sub-session skip logic
if (isSubSession) {
if (dbStep >= 5) {
skipIds.push('orientation', 'deskew', 'dewarp', 'crop')
if (uiStep < 4) uiStep = 4
} else if (dbStep >= 2) {
skipIds.push('orientation')
}
}
}
setSteps(
baseSteps.map((s, i) => ({
...s,
status: skipIds.includes(s.id)
? 'skipped'
: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
})),
)
setCurrentStep(uiStep)
} catch (e) {
console.error('Failed to open session:', e)
}
}, [])
}, [mode])
// Handle deep-link: ?session=xxx&mode=kombi (from GT Queue page)
useEffect(() => {
@@ -143,6 +177,8 @@ export default function OcrOverlayPage() {
if (sessionId === sid) {
setSessionId(null)
setCurrentStep(0)
setSubSessions([])
setParentSessionId(null)
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
@@ -199,11 +235,21 @@ export default function OcrOverlayPage() {
const handleNext = () => {
if (currentStep >= steps.length - 1) {
// Sub-session completed — switch back to parent
if (parentSessionId && sessionId !== parentSessionId) {
setSubSessions((prev) =>
prev.map((s) => s.id === sessionId ? { ...s, status: 'completed', current_step: 10 } : s)
)
handleSessionChange(parentSessionId)
return
}
// Last step completed — return to session list
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
setCurrentStep(0)
setSessionId(null)
setSubSessions([])
setParentSessionId(null)
loadSessions()
return
}
@@ -219,16 +265,50 @@ export default function OcrOverlayPage() {
setCurrentStep(nextStep)
}
const handleOrientationComplete = (sid: string) => {
const handleOrientationComplete = async (sid: string) => {
setSessionId(sid)
loadSessions()
// Check for page-split sub-sessions directly from API
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
if (res.ok) {
const data = await res.json()
if (data.sub_sessions?.length > 0) {
const subs: SubSession[] = data.sub_sessions.map((s: SubSession) => ({
id: s.id,
name: s.name,
box_index: s.box_index,
current_step: s.current_step,
}))
setSubSessions(subs)
setParentSessionId(sid)
openSession(subs[0].id, true)
return
}
}
} catch (e) {
console.error('Failed to check for sub-sessions:', e)
}
handleNext()
}
const handleBoxSessionsCreated = useCallback((subs: SubSession[]) => {
setSubSessions(subs)
if (sessionId) setParentSessionId(sessionId)
}, [sessionId])
const handleSessionChange = useCallback((newSessionId: string) => {
openSession(newSessionId, true)
}, [openSession])
const handleNewSession = () => {
setSessionId(null)
setSessionName('')
setCurrentStep(0)
setSubSessions([])
setParentSessionId(null)
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
@@ -302,7 +382,7 @@ export default function OcrOverlayPage() {
if (mode === 'paddle-direct' || mode === 'kombi') {
switch (currentStep) {
case 0:
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
case 1:
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
case 2:
@@ -340,7 +420,7 @@ export default function OcrOverlayPage() {
}
switch (currentStep) {
case 0:
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
case 1:
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
case 2:
@@ -635,6 +715,15 @@ export default function OcrOverlayPage() {
onReprocess={mode === 'pipeline' && sessionId != null ? reprocessFromStep : undefined}
/>
{subSessions.length > 0 && parentSessionId && sessionId && (
<BoxSessionTabs
parentSessionId={parentSessionId}
subSessions={subSessions}
activeSessionId={sessionId}
onSessionChange={handleSessionChange}
/>
)}
<div className="min-h-[400px]">{renderStep()}</div>
{/* Ground Truth button bar — visible on last step */}