Add double-page spread detection to frontend pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m0s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s

After orientation detection, the frontend now automatically calls the
page-split endpoint. When a double-page book spread is detected, two
sub-sessions are created and each goes through the full pipeline
(deskew/dewarp/crop) independently — essential because each page of a
spread tilts differently due to the spine.

Frontend changes:
- StepOrientation: calls POST /page-split after orientation, shows
  split info ("Doppelseite erkannt"), notifies parent of sub-sessions
- page.tsx: distinguishes page-split sub-sessions (current_step < 5)
  from crop-based sub-sessions (current_step >= 5). Page-split subs
  only skip orientation, not deskew/dewarp/crop.
- page.tsx: handleOrientationComplete opens first sub-session when
  page-split was detected

Backend changes (orientation_crop_api.py):
- page-split endpoint falls back to original image when orientation
  rotated a landscape spread to portrait
- start_step parameter: 1 if split from original, 2 if from oriented

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-24 11:09:44 +01:00
parent 40815dafd1
commit 247b79674d
3 changed files with 115 additions and 19 deletions

View File

@@ -92,15 +92,24 @@ export default function OcrPipelinePage() {
let uiStep = Math.max(0, dbStep - 1)
const skipSteps = [...(savedDocType?.skip_steps || [])]
// Sub-sessions: image is already cropped, skip pre-processing steps
// Jump directly to columns (UI step 4) unless already further ahead
// Sub-session handling depends on how they were created:
// - Crop-based (current_step >= 5): image already cropped, skip all pre-processing
// - Page-split (current_step 2): orientation done on parent, skip only orientation
// - Page-split from original (current_step 1): needs full pipeline
const isSubSession = !!data.parent_session_id
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
if (isSubSession) {
for (const s of SUB_SESSION_SKIP) {
if (!skipSteps.includes(s)) skipSteps.push(s)
if (dbStep >= 5) {
// Crop-based sub-sessions: image already cropped
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
for (const s of SUB_SESSION_SKIP) {
if (!skipSteps.includes(s)) skipSteps.push(s)
}
if (uiStep < 4) uiStep = 4 // columns step (index 4)
} else if (dbStep >= 2) {
// Page-split sub-session: parent orientation applied, skip only orientation
if (!skipSteps.includes('orientation')) skipSteps.push('orientation')
}
if (uiStep < 4) uiStep = 4 // columns step (index 4)
// dbStep === 1: page-split from original image, needs full pipeline
}
setSteps(
@@ -245,6 +254,13 @@ export default function OcrPipelinePage() {
setSessionId(sid)
// Reload session list to show the new session
loadSessions()
// If page-split created sub-sessions, open the first one
if (subSessions.length > 0) {
openSession(subSessions[0].id, true)
return
}
handleNext()
}
@@ -365,7 +381,7 @@ export default function OcrPipelinePage() {
const renderStep = () => {
switch (currentStep) {
case 0:
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
case 1:
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
case 2:

View File

@@ -1,19 +1,29 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { OrientationResult, SessionInfo, SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
import { ImageCompareView } from './ImageCompareView'
const KLAUSUR_API = '/klausur-api'
interface PageSplitResult {
multi_page: boolean
page_count?: number
sub_sessions?: { id: string; name: string; page_index: number }[]
used_original?: boolean
duration_seconds?: number
}
interface StepOrientationProps {
sessionId?: string | null
onNext: (sessionId: string) => void
onSubSessionsCreated?: (subs: SubSession[]) => void
}
export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOrientationProps) {
export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSessionsCreated }: StepOrientationProps) {
const [session, setSession] = useState<SessionInfo | null>(null)
const [orientationResult, setOrientationResult] = useState<OrientationResult | null>(null)
const [pageSplitResult, setPageSplitResult] = useState<PageSplitResult | null>(null)
const [uploading, setUploading] = useState(false)
const [detecting, setDetecting] = useState(false)
const [error, setError] = useState<string | null>(null)
@@ -92,13 +102,38 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr
corrected: orientData.corrected,
duration_seconds: orientData.duration_seconds,
})
// Auto-trigger page-split detection (double-page book spreads)
try {
const splitRes = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/page-split`,
{ method: 'POST' },
)
if (splitRes.ok) {
const splitData: PageSplitResult = await splitRes.json()
setPageSplitResult(splitData)
if (splitData.multi_page && splitData.sub_sessions && onSubSessionsCreated) {
onSubSessionsCreated(
splitData.sub_sessions.map((s) => ({
id: s.id,
name: s.name,
box_index: s.page_index,
current_step: splitData.used_original ? 1 : 2,
}))
)
}
}
} catch (e) {
console.error('Page-split detection failed:', e)
// Not critical — continue as single page
}
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
} finally {
setUploading(false)
setDetecting(false)
}
}, [sessionName])
}, [sessionName, onSubSessionsCreated])
const handleDrop = useCallback((e: React.DragEvent) => {
e.preventDefault()
@@ -225,6 +260,29 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr
</div>
)}
{/* Page-split result */}
{pageSplitResult?.multi_page && (
<div className="bg-blue-50 dark:bg-blue-900/20 rounded-lg border border-blue-200 dark:border-blue-700 p-4">
<div className="text-sm font-medium text-blue-700 dark:text-blue-300">
Doppelseite erkannt {pageSplitResult.page_count} Seiten
</div>
<p className="text-xs text-blue-600 dark:text-blue-400 mt-1">
Jede Seite wird einzeln durch die Pipeline (Begradigung, Entzerrung, Zuschnitt, ...) verarbeitet.
{pageSplitResult.used_original && ' (Seitentrennung auf dem Originalbild, da die Orientierung die Doppelseite gedreht hat.)'}
</p>
<div className="flex gap-2 mt-2">
{pageSplitResult.sub_sessions?.map((s) => (
<span
key={s.id}
className="text-xs px-2 py-1 rounded-md bg-blue-100 dark:bg-blue-800/40 text-blue-700 dark:text-blue-300"
>
{s.name}
</span>
))}
</div>
</div>
)}
{/* Next button */}
{orientationResult && (
<div className="flex justify-end">
@@ -232,7 +290,7 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr
onClick={() => onNext(session.session_id)}
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
>
Weiter &rarr;
{pageSplitResult?.multi_page ? 'Seiten verarbeiten' : 'Weiter'} &rarr;
</button>
</div>
)}

View File

@@ -191,6 +191,23 @@ async def detect_page_split(session_id: str):
t0 = time.time()
page_splits = detect_page_splits(img_bgr)
used_original = False
if not page_splits or len(page_splits) < 2:
# Orientation may have rotated a landscape double-page spread to
# portrait. Try the original (pre-orientation) image as fallback.
orig_bgr = cached.get("original_bgr")
if orig_bgr is not None and orig_bgr is not img_bgr:
page_splits_orig = detect_page_splits(orig_bgr)
if page_splits_orig and len(page_splits_orig) >= 2:
logger.info(
"OCR Pipeline: page-split session %s: spread detected on "
"ORIGINAL (orientation rotated it away)",
session_id,
)
img_bgr = orig_bgr
page_splits = page_splits_orig
used_original = True
if not page_splits or len(page_splits) < 2:
duration = time.time() - t0
@@ -204,9 +221,12 @@ async def detect_page_split(session_id: str):
"duration_seconds": round(duration, 2),
}
# Multi-page spread detected — create sub-sessions for full pipeline
# Multi-page spread detected — create sub-sessions for full pipeline.
# start_step=2 means "ready for deskew" (orientation already applied).
# start_step=1 means "needs orientation too" (split from original image).
start_step = 1 if used_original else 2
sub_sessions = await _create_page_sub_sessions_full(
session_id, cached, img_bgr, page_splits,
session_id, cached, img_bgr, page_splits, start_step=start_step,
)
duration = time.time() - t0
@@ -214,6 +234,7 @@ async def detect_page_split(session_id: str):
"multi_page": True,
"page_count": len(page_splits),
"page_splits": page_splits,
"used_original": used_original,
"duration_seconds": round(duration, 2),
}
@@ -475,13 +496,14 @@ async def _create_page_sub_sessions_full(
parent_cached: dict,
full_img_bgr: np.ndarray,
page_splits: List[Dict[str, Any]],
start_step: int = 2,
) -> List[Dict[str, Any]]:
"""Create sub-sessions for each page with RAW regions for full pipeline processing.
Unlike ``_create_page_sub_sessions`` (used by the crop step), these
sub-sessions store the *uncropped* page region and start at
``current_step=2`` (ready for deskew). Each page therefore goes through
its own deskew → dewarp → crop → columns → rows → words → grid pipeline,
``start_step`` (default 2 = ready for deskew; 1 if orientation still
needed). Each page goes through its own pipeline independently,
which is essential for book spreads where each page has a different tilt.
"""
# Idempotent: reuse existing sub-sessions
@@ -522,9 +544,9 @@ async def _create_page_sub_sessions_full(
box_index=pi,
)
# Start at step 2 (deskew) — orientation was already applied to the
# whole spread before splitting.
await update_session_db(sub_id, current_step=2)
# start_step=2 → ready for deskew (orientation already done on spread)
# start_step=1 → needs its own orientation (split from original image)
await update_session_db(sub_id, current_step=start_step)
# Cache the BGR so the pipeline can start immediately
_cache[sub_id] = {
@@ -542,7 +564,7 @@ async def _create_page_sub_sessions_full(
"deskew_result": None,
"dewarp_result": None,
"ground_truth": {},
"current_step": 2,
"current_step": start_step,
}
rh, rw = page_bgr.shape[:2]