From 247b79674d6b7b1d6bb52f9309b407df7ff28690 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 24 Mar 2026 11:09:44 +0100 Subject: [PATCH] Add double-page spread detection to frontend pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After orientation detection, the frontend now automatically calls the page-split endpoint. When a double-page book spread is detected, two sub-sessions are created and each goes through the full pipeline (deskew/dewarp/crop) independently — essential because each page of a spread tilts differently due to the spine. Frontend changes: - StepOrientation: calls POST /page-split after orientation, shows split info ("Doppelseite erkannt"), notifies parent of sub-sessions - page.tsx: distinguishes page-split sub-sessions (current_step < 5) from crop-based sub-sessions (current_step >= 5). Page-split subs only skip orientation, not deskew/dewarp/crop. - page.tsx: handleOrientationComplete opens first sub-session when page-split was detected Backend changes (orientation_crop_api.py): - page-split endpoint falls back to original image when orientation rotated a landscape spread to portrait - start_step parameter: 1 if split from original, 2 if from oriented Co-Authored-By: Claude Opus 4.6 --- .../app/(admin)/ai/ocr-pipeline/page.tsx | 30 +++++++-- .../ocr-pipeline/StepOrientation.tsx | 66 +++++++++++++++++-- .../backend/orientation_crop_api.py | 38 ++++++++--- 3 files changed, 115 insertions(+), 19 deletions(-) diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx index 378159b..b02f409 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx @@ -92,15 +92,24 @@ export default function OcrPipelinePage() { let uiStep = Math.max(0, dbStep - 1) const skipSteps = [...(savedDocType?.skip_steps || [])] - // Sub-sessions: image is already cropped, skip pre-processing steps - // Jump directly to columns (UI step 4) unless already further ahead + // Sub-session handling depends on how they were created: + // - Crop-based (current_step >= 5): image already cropped, skip all pre-processing + // - Page-split (current_step 2): orientation done on parent, skip only orientation + // - Page-split from original (current_step 1): needs full pipeline const isSubSession = !!data.parent_session_id - const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop'] if (isSubSession) { - for (const s of SUB_SESSION_SKIP) { - if (!skipSteps.includes(s)) skipSteps.push(s) + if (dbStep >= 5) { + // Crop-based sub-sessions: image already cropped + const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop'] + for (const s of SUB_SESSION_SKIP) { + if (!skipSteps.includes(s)) skipSteps.push(s) + } + if (uiStep < 4) uiStep = 4 // columns step (index 4) + } else if (dbStep >= 2) { + // Page-split sub-session: parent orientation applied, skip only orientation + if (!skipSteps.includes('orientation')) skipSteps.push('orientation') } - if (uiStep < 4) uiStep = 4 // columns step (index 4) + // dbStep === 1: page-split from original image, needs full pipeline } setSteps( @@ -245,6 +254,13 @@ export default function OcrPipelinePage() { setSessionId(sid) // Reload session list to show the new session loadSessions() + + // If page-split created sub-sessions, open the first one + if (subSessions.length > 0) { + openSession(subSessions[0].id, true) + return + } + handleNext() } @@ -365,7 +381,7 @@ export default function OcrPipelinePage() { const renderStep = () => { switch (currentStep) { case 0: - return + return case 1: return case 2: diff --git a/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx b/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx index 89239c7..9bf0515 100644 --- a/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepOrientation.tsx @@ -1,19 +1,29 @@ 'use client' import { useCallback, useEffect, useState } from 'react' -import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types' +import type { OrientationResult, SessionInfo, SubSession } from '@/app/(admin)/ai/ocr-pipeline/types' import { ImageCompareView } from './ImageCompareView' const KLAUSUR_API = '/klausur-api' +interface PageSplitResult { + multi_page: boolean + page_count?: number + sub_sessions?: { id: string; name: string; page_index: number }[] + used_original?: boolean + duration_seconds?: number +} + interface StepOrientationProps { sessionId?: string | null onNext: (sessionId: string) => void + onSubSessionsCreated?: (subs: SubSession[]) => void } -export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOrientationProps) { +export function StepOrientation({ sessionId: existingSessionId, onNext, onSubSessionsCreated }: StepOrientationProps) { const [session, setSession] = useState(null) const [orientationResult, setOrientationResult] = useState(null) + const [pageSplitResult, setPageSplitResult] = useState(null) const [uploading, setUploading] = useState(false) const [detecting, setDetecting] = useState(false) const [error, setError] = useState(null) @@ -92,13 +102,38 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr corrected: orientData.corrected, duration_seconds: orientData.duration_seconds, }) + + // Auto-trigger page-split detection (double-page book spreads) + try { + const splitRes = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${data.session_id}/page-split`, + { method: 'POST' }, + ) + if (splitRes.ok) { + const splitData: PageSplitResult = await splitRes.json() + setPageSplitResult(splitData) + if (splitData.multi_page && splitData.sub_sessions && onSubSessionsCreated) { + onSubSessionsCreated( + splitData.sub_sessions.map((s) => ({ + id: s.id, + name: s.name, + box_index: s.page_index, + current_step: splitData.used_original ? 1 : 2, + })) + ) + } + } + } catch (e) { + console.error('Page-split detection failed:', e) + // Not critical — continue as single page + } } catch (e) { setError(e instanceof Error ? e.message : 'Unbekannter Fehler') } finally { setUploading(false) setDetecting(false) } - }, [sessionName]) + }, [sessionName, onSubSessionsCreated]) const handleDrop = useCallback((e: React.DragEvent) => { e.preventDefault() @@ -225,6 +260,29 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr )} + {/* Page-split result */} + {pageSplitResult?.multi_page && ( +
+
+ Doppelseite erkannt — {pageSplitResult.page_count} Seiten +
+

+ Jede Seite wird einzeln durch die Pipeline (Begradigung, Entzerrung, Zuschnitt, ...) verarbeitet. + {pageSplitResult.used_original && ' (Seitentrennung auf dem Originalbild, da die Orientierung die Doppelseite gedreht hat.)'} +

+
+ {pageSplitResult.sub_sessions?.map((s) => ( + + {s.name} + + ))} +
+
+ )} + {/* Next button */} {orientationResult && (
@@ -232,7 +290,7 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr onClick={() => onNext(session.session_id)} className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors" > - Weiter → + {pageSplitResult?.multi_page ? 'Seiten verarbeiten' : 'Weiter'} →
)} diff --git a/klausur-service/backend/orientation_crop_api.py b/klausur-service/backend/orientation_crop_api.py index 0d758b9..a537137 100644 --- a/klausur-service/backend/orientation_crop_api.py +++ b/klausur-service/backend/orientation_crop_api.py @@ -191,6 +191,23 @@ async def detect_page_split(session_id: str): t0 = time.time() page_splits = detect_page_splits(img_bgr) + used_original = False + + if not page_splits or len(page_splits) < 2: + # Orientation may have rotated a landscape double-page spread to + # portrait. Try the original (pre-orientation) image as fallback. + orig_bgr = cached.get("original_bgr") + if orig_bgr is not None and orig_bgr is not img_bgr: + page_splits_orig = detect_page_splits(orig_bgr) + if page_splits_orig and len(page_splits_orig) >= 2: + logger.info( + "OCR Pipeline: page-split session %s: spread detected on " + "ORIGINAL (orientation rotated it away)", + session_id, + ) + img_bgr = orig_bgr + page_splits = page_splits_orig + used_original = True if not page_splits or len(page_splits) < 2: duration = time.time() - t0 @@ -204,9 +221,12 @@ async def detect_page_split(session_id: str): "duration_seconds": round(duration, 2), } - # Multi-page spread detected — create sub-sessions for full pipeline + # Multi-page spread detected — create sub-sessions for full pipeline. + # start_step=2 means "ready for deskew" (orientation already applied). + # start_step=1 means "needs orientation too" (split from original image). + start_step = 1 if used_original else 2 sub_sessions = await _create_page_sub_sessions_full( - session_id, cached, img_bgr, page_splits, + session_id, cached, img_bgr, page_splits, start_step=start_step, ) duration = time.time() - t0 @@ -214,6 +234,7 @@ async def detect_page_split(session_id: str): "multi_page": True, "page_count": len(page_splits), "page_splits": page_splits, + "used_original": used_original, "duration_seconds": round(duration, 2), } @@ -475,13 +496,14 @@ async def _create_page_sub_sessions_full( parent_cached: dict, full_img_bgr: np.ndarray, page_splits: List[Dict[str, Any]], + start_step: int = 2, ) -> List[Dict[str, Any]]: """Create sub-sessions for each page with RAW regions for full pipeline processing. Unlike ``_create_page_sub_sessions`` (used by the crop step), these sub-sessions store the *uncropped* page region and start at - ``current_step=2`` (ready for deskew). Each page therefore goes through - its own deskew → dewarp → crop → columns → rows → words → grid pipeline, + ``start_step`` (default 2 = ready for deskew; 1 if orientation still + needed). Each page goes through its own pipeline independently, which is essential for book spreads where each page has a different tilt. """ # Idempotent: reuse existing sub-sessions @@ -522,9 +544,9 @@ async def _create_page_sub_sessions_full( box_index=pi, ) - # Start at step 2 (deskew) — orientation was already applied to the - # whole spread before splitting. - await update_session_db(sub_id, current_step=2) + # start_step=2 → ready for deskew (orientation already done on spread) + # start_step=1 → needs its own orientation (split from original image) + await update_session_db(sub_id, current_step=start_step) # Cache the BGR so the pipeline can start immediately _cache[sub_id] = { @@ -542,7 +564,7 @@ async def _create_page_sub_sessions_full( "deskew_result": None, "dewarp_result": None, "ground_truth": {}, - "current_step": 2, + "current_step": start_step, } rh, rw = page_bgr.shape[:2]