Add double-page spread detection to frontend pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m0s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m0s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s
After orientation detection, the frontend now automatically calls the
page-split endpoint. When a double-page book spread is detected, two
sub-sessions are created and each goes through the full pipeline
(deskew/dewarp/crop) independently — essential because each page of a
spread tilts differently due to the spine.
Frontend changes:
- StepOrientation: calls POST /page-split after orientation, shows
split info ("Doppelseite erkannt"), notifies parent of sub-sessions
- page.tsx: distinguishes page-split sub-sessions (current_step < 5)
from crop-based sub-sessions (current_step >= 5). Page-split subs
only skip orientation, not deskew/dewarp/crop.
- page.tsx: handleOrientationComplete opens first sub-session when
page-split was detected
Backend changes (orientation_crop_api.py):
- page-split endpoint falls back to original image when orientation
rotated a landscape spread to portrait
- start_step parameter: 1 if split from original, 2 if from oriented
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -191,6 +191,23 @@ async def detect_page_split(session_id: str):
|
||||
|
||||
t0 = time.time()
|
||||
page_splits = detect_page_splits(img_bgr)
|
||||
used_original = False
|
||||
|
||||
if not page_splits or len(page_splits) < 2:
|
||||
# Orientation may have rotated a landscape double-page spread to
|
||||
# portrait. Try the original (pre-orientation) image as fallback.
|
||||
orig_bgr = cached.get("original_bgr")
|
||||
if orig_bgr is not None and orig_bgr is not img_bgr:
|
||||
page_splits_orig = detect_page_splits(orig_bgr)
|
||||
if page_splits_orig and len(page_splits_orig) >= 2:
|
||||
logger.info(
|
||||
"OCR Pipeline: page-split session %s: spread detected on "
|
||||
"ORIGINAL (orientation rotated it away)",
|
||||
session_id,
|
||||
)
|
||||
img_bgr = orig_bgr
|
||||
page_splits = page_splits_orig
|
||||
used_original = True
|
||||
|
||||
if not page_splits or len(page_splits) < 2:
|
||||
duration = time.time() - t0
|
||||
@@ -204,9 +221,12 @@ async def detect_page_split(session_id: str):
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
# Multi-page spread detected — create sub-sessions for full pipeline
|
||||
# Multi-page spread detected — create sub-sessions for full pipeline.
|
||||
# start_step=2 means "ready for deskew" (orientation already applied).
|
||||
# start_step=1 means "needs orientation too" (split from original image).
|
||||
start_step = 1 if used_original else 2
|
||||
sub_sessions = await _create_page_sub_sessions_full(
|
||||
session_id, cached, img_bgr, page_splits,
|
||||
session_id, cached, img_bgr, page_splits, start_step=start_step,
|
||||
)
|
||||
duration = time.time() - t0
|
||||
|
||||
@@ -214,6 +234,7 @@ async def detect_page_split(session_id: str):
|
||||
"multi_page": True,
|
||||
"page_count": len(page_splits),
|
||||
"page_splits": page_splits,
|
||||
"used_original": used_original,
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
@@ -475,13 +496,14 @@ async def _create_page_sub_sessions_full(
|
||||
parent_cached: dict,
|
||||
full_img_bgr: np.ndarray,
|
||||
page_splits: List[Dict[str, Any]],
|
||||
start_step: int = 2,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Create sub-sessions for each page with RAW regions for full pipeline processing.
|
||||
|
||||
Unlike ``_create_page_sub_sessions`` (used by the crop step), these
|
||||
sub-sessions store the *uncropped* page region and start at
|
||||
``current_step=2`` (ready for deskew). Each page therefore goes through
|
||||
its own deskew → dewarp → crop → columns → rows → words → grid pipeline,
|
||||
``start_step`` (default 2 = ready for deskew; 1 if orientation still
|
||||
needed). Each page goes through its own pipeline independently,
|
||||
which is essential for book spreads where each page has a different tilt.
|
||||
"""
|
||||
# Idempotent: reuse existing sub-sessions
|
||||
@@ -522,9 +544,9 @@ async def _create_page_sub_sessions_full(
|
||||
box_index=pi,
|
||||
)
|
||||
|
||||
# Start at step 2 (deskew) — orientation was already applied to the
|
||||
# whole spread before splitting.
|
||||
await update_session_db(sub_id, current_step=2)
|
||||
# start_step=2 → ready for deskew (orientation already done on spread)
|
||||
# start_step=1 → needs its own orientation (split from original image)
|
||||
await update_session_db(sub_id, current_step=start_step)
|
||||
|
||||
# Cache the BGR so the pipeline can start immediately
|
||||
_cache[sub_id] = {
|
||||
@@ -542,7 +564,7 @@ async def _create_page_sub_sessions_full(
|
||||
"deskew_result": None,
|
||||
"dewarp_result": None,
|
||||
"ground_truth": {},
|
||||
"current_step": 2,
|
||||
"current_step": start_step,
|
||||
}
|
||||
|
||||
rh, rw = page_bgr.shape[:2]
|
||||
|
||||
Reference in New Issue
Block a user