Add double-page spread detection to frontend pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m0s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s

After orientation detection, the frontend now automatically calls the
page-split endpoint. When a double-page book spread is detected, two
sub-sessions are created and each goes through the full pipeline
(deskew/dewarp/crop) independently — essential because each page of a
spread tilts differently due to the spine.

Frontend changes:
- StepOrientation: calls POST /page-split after orientation, shows
  split info ("Doppelseite erkannt"), notifies parent of sub-sessions
- page.tsx: distinguishes page-split sub-sessions (current_step < 5)
  from crop-based sub-sessions (current_step >= 5). Page-split subs
  only skip orientation, not deskew/dewarp/crop.
- page.tsx: handleOrientationComplete opens first sub-session when
  page-split was detected

Backend changes (orientation_crop_api.py):
- page-split endpoint falls back to original image when orientation
  rotated a landscape spread to portrait
- start_step parameter: 1 if split from original, 2 if from oriented

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-24 11:09:44 +01:00
parent 40815dafd1
commit 247b79674d
3 changed files with 115 additions and 19 deletions

View File

@@ -191,6 +191,23 @@ async def detect_page_split(session_id: str):
t0 = time.time()
page_splits = detect_page_splits(img_bgr)
used_original = False
if not page_splits or len(page_splits) < 2:
# Orientation may have rotated a landscape double-page spread to
# portrait. Try the original (pre-orientation) image as fallback.
orig_bgr = cached.get("original_bgr")
if orig_bgr is not None and orig_bgr is not img_bgr:
page_splits_orig = detect_page_splits(orig_bgr)
if page_splits_orig and len(page_splits_orig) >= 2:
logger.info(
"OCR Pipeline: page-split session %s: spread detected on "
"ORIGINAL (orientation rotated it away)",
session_id,
)
img_bgr = orig_bgr
page_splits = page_splits_orig
used_original = True
if not page_splits or len(page_splits) < 2:
duration = time.time() - t0
@@ -204,9 +221,12 @@ async def detect_page_split(session_id: str):
"duration_seconds": round(duration, 2),
}
# Multi-page spread detected — create sub-sessions for full pipeline
# Multi-page spread detected — create sub-sessions for full pipeline.
# start_step=2 means "ready for deskew" (orientation already applied).
# start_step=1 means "needs orientation too" (split from original image).
start_step = 1 if used_original else 2
sub_sessions = await _create_page_sub_sessions_full(
session_id, cached, img_bgr, page_splits,
session_id, cached, img_bgr, page_splits, start_step=start_step,
)
duration = time.time() - t0
@@ -214,6 +234,7 @@ async def detect_page_split(session_id: str):
"multi_page": True,
"page_count": len(page_splits),
"page_splits": page_splits,
"used_original": used_original,
"duration_seconds": round(duration, 2),
}
@@ -475,13 +496,14 @@ async def _create_page_sub_sessions_full(
parent_cached: dict,
full_img_bgr: np.ndarray,
page_splits: List[Dict[str, Any]],
start_step: int = 2,
) -> List[Dict[str, Any]]:
"""Create sub-sessions for each page with RAW regions for full pipeline processing.
Unlike ``_create_page_sub_sessions`` (used by the crop step), these
sub-sessions store the *uncropped* page region and start at
``current_step=2`` (ready for deskew). Each page therefore goes through
its own deskew → dewarp → crop → columns → rows → words → grid pipeline,
``start_step`` (default 2 = ready for deskew; 1 if orientation still
needed). Each page goes through its own pipeline independently,
which is essential for book spreads where each page has a different tilt.
"""
# Idempotent: reuse existing sub-sessions
@@ -522,9 +544,9 @@ async def _create_page_sub_sessions_full(
box_index=pi,
)
# Start at step 2 (deskew) — orientation was already applied to the
# whole spread before splitting.
await update_session_db(sub_id, current_step=2)
# start_step=2 → ready for deskew (orientation already done on spread)
# start_step=1 → needs its own orientation (split from original image)
await update_session_db(sub_id, current_step=start_step)
# Cache the BGR so the pipeline can start immediately
_cache[sub_id] = {
@@ -542,7 +564,7 @@ async def _create_page_sub_sessions_full(
"deskew_result": None,
"dewarp_result": None,
"ground_truth": {},
"current_step": 2,
"current_step": start_step,
}
rh, rw = page_bgr.shape[:2]