Add double-page spread detection to frontend pipeline

After orientation detection, the frontend now automatically calls the page-split endpoint. When a double-page book spread is detected, two sub-sessions are created and each goes through the full pipeline (deskew/dewarp/crop) independently — essential because each page of a spread tilts differently due to the spine. Frontend changes: - StepOrientation: calls POST /page-split after orientation, shows split info ("Doppelseite erkannt"), notifies parent of sub-sessions - page.tsx: distinguishes page-split sub-sessions (current_step < 5) from crop-based sub-sessions (current_step >= 5). Page-split subs only skip orientation, not deskew/dewarp/crop. - page.tsx: handleOrientationComplete opens first sub-session when page-split was detected Backend changes (orientation_crop_api.py): - page-split endpoint falls back to original image when orientation rotated a landscape spread to portrait - start_step parameter: 1 if split from original, 2 if from oriented Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 11:09:44 +01:00
parent 40815dafd1
commit 247b79674d
3 changed files with 115 additions and 19 deletions
@@ -191,6 +191,23 @@ async def detect_page_split(session_id: str):

    t0 = time.time()
    page_splits = detect_page_splits(img_bgr)
+    used_original = False
+
+    if not page_splits or len(page_splits) < 2:
+        # Orientation may have rotated a landscape double-page spread to
+        # portrait.  Try the original (pre-orientation) image as fallback.
+        orig_bgr = cached.get("original_bgr")
+        if orig_bgr is not None and orig_bgr is not img_bgr:
+            page_splits_orig = detect_page_splits(orig_bgr)
+            if page_splits_orig and len(page_splits_orig) >= 2:
+                logger.info(
+                    "OCR Pipeline: page-split session %s: spread detected on "
+                    "ORIGINAL (orientation rotated it away)",
+                    session_id,
+                )
+                img_bgr = orig_bgr
+                page_splits = page_splits_orig
+                used_original = True

    if not page_splits or len(page_splits) < 2:
        duration = time.time() - t0
@@ -204,9 +221,12 @@ async def detect_page_split(session_id: str):
            "duration_seconds": round(duration, 2),
        }

-    # Multi-page spread detected — create sub-sessions for full pipeline
+    # Multi-page spread detected — create sub-sessions for full pipeline.
+    # start_step=2 means "ready for deskew" (orientation already applied).
+    # start_step=1 means "needs orientation too" (split from original image).
+    start_step = 1 if used_original else 2
    sub_sessions = await _create_page_sub_sessions_full(
-        session_id, cached, img_bgr, page_splits,
+        session_id, cached, img_bgr, page_splits, start_step=start_step,
    )
    duration = time.time() - t0

@@ -214,6 +234,7 @@ async def detect_page_split(session_id: str):
        "multi_page": True,
        "page_count": len(page_splits),
        "page_splits": page_splits,
+        "used_original": used_original,
        "duration_seconds": round(duration, 2),
    }

@@ -475,13 +496,14 @@ async def _create_page_sub_sessions_full(
    parent_cached: dict,
    full_img_bgr: np.ndarray,
    page_splits: List[Dict[str, Any]],
+    start_step: int = 2,
 ) -> List[Dict[str, Any]]:
    """Create sub-sessions for each page with RAW regions for full pipeline processing.

    Unlike ``_create_page_sub_sessions`` (used by the crop step), these
    sub-sessions store the *uncropped* page region and start at
-    ``current_step=2`` (ready for deskew).  Each page therefore goes through
-    its own deskew → dewarp → crop → columns → rows → words → grid pipeline,
+    ``start_step`` (default 2 = ready for deskew; 1 if orientation still
+    needed).  Each page goes through its own pipeline independently,
    which is essential for book spreads where each page has a different tilt.
    """
    # Idempotent: reuse existing sub-sessions
@@ -522,9 +544,9 @@ async def _create_page_sub_sessions_full(
            box_index=pi,
        )

-        # Start at step 2 (deskew) — orientation was already applied to the
-        # whole spread before splitting.
-        await update_session_db(sub_id, current_step=2)
+        # start_step=2 → ready for deskew (orientation already done on spread)
+        # start_step=1 → needs its own orientation (split from original image)
+        await update_session_db(sub_id, current_step=start_step)

        # Cache the BGR so the pipeline can start immediately
        _cache[sub_id] = {
@@ -542,7 +564,7 @@ async def _create_page_sub_sessions_full(
            "deskew_result": None,
            "dewarp_result": None,
            "ground_truth": {},
-            "current_step": 2,
+            "current_step": start_step,
        }

        rh, rw = page_bgr.shape[:2]