Add double-page spread detection to frontend pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m0s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m0s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s
After orientation detection, the frontend now automatically calls the
page-split endpoint. When a double-page book spread is detected, two
sub-sessions are created and each goes through the full pipeline
(deskew/dewarp/crop) independently — essential because each page of a
spread tilts differently due to the spine.
Frontend changes:
- StepOrientation: calls POST /page-split after orientation, shows
split info ("Doppelseite erkannt"), notifies parent of sub-sessions
- page.tsx: distinguishes page-split sub-sessions (current_step < 5)
from crop-based sub-sessions (current_step >= 5). Page-split subs
only skip orientation, not deskew/dewarp/crop.
- page.tsx: handleOrientationComplete opens first sub-session when
page-split was detected
Backend changes (orientation_crop_api.py):
- page-split endpoint falls back to original image when orientation
rotated a landscape spread to portrait
- start_step parameter: 1 if split from original, 2 if from oriented
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -92,15 +92,24 @@ export default function OcrPipelinePage() {
|
||||
let uiStep = Math.max(0, dbStep - 1)
|
||||
const skipSteps = [...(savedDocType?.skip_steps || [])]
|
||||
|
||||
// Sub-sessions: image is already cropped, skip pre-processing steps
|
||||
// Jump directly to columns (UI step 4) unless already further ahead
|
||||
// Sub-session handling depends on how they were created:
|
||||
// - Crop-based (current_step >= 5): image already cropped, skip all pre-processing
|
||||
// - Page-split (current_step 2): orientation done on parent, skip only orientation
|
||||
// - Page-split from original (current_step 1): needs full pipeline
|
||||
const isSubSession = !!data.parent_session_id
|
||||
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
|
||||
if (isSubSession) {
|
||||
for (const s of SUB_SESSION_SKIP) {
|
||||
if (!skipSteps.includes(s)) skipSteps.push(s)
|
||||
if (dbStep >= 5) {
|
||||
// Crop-based sub-sessions: image already cropped
|
||||
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
|
||||
for (const s of SUB_SESSION_SKIP) {
|
||||
if (!skipSteps.includes(s)) skipSteps.push(s)
|
||||
}
|
||||
if (uiStep < 4) uiStep = 4 // columns step (index 4)
|
||||
} else if (dbStep >= 2) {
|
||||
// Page-split sub-session: parent orientation applied, skip only orientation
|
||||
if (!skipSteps.includes('orientation')) skipSteps.push('orientation')
|
||||
}
|
||||
if (uiStep < 4) uiStep = 4 // columns step (index 4)
|
||||
// dbStep === 1: page-split from original image, needs full pipeline
|
||||
}
|
||||
|
||||
setSteps(
|
||||
@@ -245,6 +254,13 @@ export default function OcrPipelinePage() {
|
||||
setSessionId(sid)
|
||||
// Reload session list to show the new session
|
||||
loadSessions()
|
||||
|
||||
// If page-split created sub-sessions, open the first one
|
||||
if (subSessions.length > 0) {
|
||||
openSession(subSessions[0].id, true)
|
||||
return
|
||||
}
|
||||
|
||||
handleNext()
|
||||
}
|
||||
|
||||
@@ -365,7 +381,7 @@ export default function OcrPipelinePage() {
|
||||
const renderStep = () => {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} onSubSessionsCreated={handleBoxSessionsCreated} />
|
||||
case 1:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleNext} />
|
||||
case 2:
|
||||
|
||||
@@ -1,19 +1,29 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import type { OrientationResult, SessionInfo, SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { ImageCompareView } from './ImageCompareView'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface PageSplitResult {
|
||||
multi_page: boolean
|
||||
page_count?: number
|
||||
sub_sessions?: { id: string; name: string; page_index: number }[]
|
||||
used_original?: boolean
|
||||
duration_seconds?: number
|
||||
}
|
||||
|
||||
interface StepOrientationProps {
|
||||
sessionId?: string | null
|
||||
onNext: (sessionId: string) => void
|
||||
onSubSessionsCreated?: (subs: SubSession[]) => void
|
||||
}
|
||||
|
||||
export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOrientationProps) {
|
||||
export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSessionsCreated }: StepOrientationProps) {
|
||||
const [session, setSession] = useState<SessionInfo | null>(null)
|
||||
const [orientationResult, setOrientationResult] = useState<OrientationResult | null>(null)
|
||||
const [pageSplitResult, setPageSplitResult] = useState<PageSplitResult | null>(null)
|
||||
const [uploading, setUploading] = useState(false)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
@@ -92,13 +102,38 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr
|
||||
corrected: orientData.corrected,
|
||||
duration_seconds: orientData.duration_seconds,
|
||||
})
|
||||
|
||||
// Auto-trigger page-split detection (double-page book spreads)
|
||||
try {
|
||||
const splitRes = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/page-split`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (splitRes.ok) {
|
||||
const splitData: PageSplitResult = await splitRes.json()
|
||||
setPageSplitResult(splitData)
|
||||
if (splitData.multi_page && splitData.sub_sessions && onSubSessionsCreated) {
|
||||
onSubSessionsCreated(
|
||||
splitData.sub_sessions.map((s) => ({
|
||||
id: s.id,
|
||||
name: s.name,
|
||||
box_index: s.page_index,
|
||||
current_step: splitData.used_original ? 1 : 2,
|
||||
}))
|
||||
)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Page-split detection failed:', e)
|
||||
// Not critical — continue as single page
|
||||
}
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setUploading(false)
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionName])
|
||||
}, [sessionName, onSubSessionsCreated])
|
||||
|
||||
const handleDrop = useCallback((e: React.DragEvent) => {
|
||||
e.preventDefault()
|
||||
@@ -225,6 +260,29 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Page-split result */}
|
||||
{pageSplitResult?.multi_page && (
|
||||
<div className="bg-blue-50 dark:bg-blue-900/20 rounded-lg border border-blue-200 dark:border-blue-700 p-4">
|
||||
<div className="text-sm font-medium text-blue-700 dark:text-blue-300">
|
||||
Doppelseite erkannt — {pageSplitResult.page_count} Seiten
|
||||
</div>
|
||||
<p className="text-xs text-blue-600 dark:text-blue-400 mt-1">
|
||||
Jede Seite wird einzeln durch die Pipeline (Begradigung, Entzerrung, Zuschnitt, ...) verarbeitet.
|
||||
{pageSplitResult.used_original && ' (Seitentrennung auf dem Originalbild, da die Orientierung die Doppelseite gedreht hat.)'}
|
||||
</p>
|
||||
<div className="flex gap-2 mt-2">
|
||||
{pageSplitResult.sub_sessions?.map((s) => (
|
||||
<span
|
||||
key={s.id}
|
||||
className="text-xs px-2 py-1 rounded-md bg-blue-100 dark:bg-blue-800/40 text-blue-700 dark:text-blue-300"
|
||||
>
|
||||
{s.name}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Next button */}
|
||||
{orientationResult && (
|
||||
<div className="flex justify-end">
|
||||
@@ -232,7 +290,7 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr
|
||||
onClick={() => onNext(session.session_id)}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Weiter →
|
||||
{pageSplitResult?.multi_page ? 'Seiten verarbeiten' : 'Weiter'} →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -191,6 +191,23 @@ async def detect_page_split(session_id: str):
|
||||
|
||||
t0 = time.time()
|
||||
page_splits = detect_page_splits(img_bgr)
|
||||
used_original = False
|
||||
|
||||
if not page_splits or len(page_splits) < 2:
|
||||
# Orientation may have rotated a landscape double-page spread to
|
||||
# portrait. Try the original (pre-orientation) image as fallback.
|
||||
orig_bgr = cached.get("original_bgr")
|
||||
if orig_bgr is not None and orig_bgr is not img_bgr:
|
||||
page_splits_orig = detect_page_splits(orig_bgr)
|
||||
if page_splits_orig and len(page_splits_orig) >= 2:
|
||||
logger.info(
|
||||
"OCR Pipeline: page-split session %s: spread detected on "
|
||||
"ORIGINAL (orientation rotated it away)",
|
||||
session_id,
|
||||
)
|
||||
img_bgr = orig_bgr
|
||||
page_splits = page_splits_orig
|
||||
used_original = True
|
||||
|
||||
if not page_splits or len(page_splits) < 2:
|
||||
duration = time.time() - t0
|
||||
@@ -204,9 +221,12 @@ async def detect_page_split(session_id: str):
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
# Multi-page spread detected — create sub-sessions for full pipeline
|
||||
# Multi-page spread detected — create sub-sessions for full pipeline.
|
||||
# start_step=2 means "ready for deskew" (orientation already applied).
|
||||
# start_step=1 means "needs orientation too" (split from original image).
|
||||
start_step = 1 if used_original else 2
|
||||
sub_sessions = await _create_page_sub_sessions_full(
|
||||
session_id, cached, img_bgr, page_splits,
|
||||
session_id, cached, img_bgr, page_splits, start_step=start_step,
|
||||
)
|
||||
duration = time.time() - t0
|
||||
|
||||
@@ -214,6 +234,7 @@ async def detect_page_split(session_id: str):
|
||||
"multi_page": True,
|
||||
"page_count": len(page_splits),
|
||||
"page_splits": page_splits,
|
||||
"used_original": used_original,
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
@@ -475,13 +496,14 @@ async def _create_page_sub_sessions_full(
|
||||
parent_cached: dict,
|
||||
full_img_bgr: np.ndarray,
|
||||
page_splits: List[Dict[str, Any]],
|
||||
start_step: int = 2,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Create sub-sessions for each page with RAW regions for full pipeline processing.
|
||||
|
||||
Unlike ``_create_page_sub_sessions`` (used by the crop step), these
|
||||
sub-sessions store the *uncropped* page region and start at
|
||||
``current_step=2`` (ready for deskew). Each page therefore goes through
|
||||
its own deskew → dewarp → crop → columns → rows → words → grid pipeline,
|
||||
``start_step`` (default 2 = ready for deskew; 1 if orientation still
|
||||
needed). Each page goes through its own pipeline independently,
|
||||
which is essential for book spreads where each page has a different tilt.
|
||||
"""
|
||||
# Idempotent: reuse existing sub-sessions
|
||||
@@ -522,9 +544,9 @@ async def _create_page_sub_sessions_full(
|
||||
box_index=pi,
|
||||
)
|
||||
|
||||
# Start at step 2 (deskew) — orientation was already applied to the
|
||||
# whole spread before splitting.
|
||||
await update_session_db(sub_id, current_step=2)
|
||||
# start_step=2 → ready for deskew (orientation already done on spread)
|
||||
# start_step=1 → needs its own orientation (split from original image)
|
||||
await update_session_db(sub_id, current_step=start_step)
|
||||
|
||||
# Cache the BGR so the pipeline can start immediately
|
||||
_cache[sub_id] = {
|
||||
@@ -542,7 +564,7 @@ async def _create_page_sub_sessions_full(
|
||||
"deskew_result": None,
|
||||
"dewarp_result": None,
|
||||
"ground_truth": {},
|
||||
"current_step": 2,
|
||||
"current_step": start_step,
|
||||
}
|
||||
|
||||
rh, rw = page_bgr.shape[:2]
|
||||
|
||||
Reference in New Issue
Block a user