+
+ Doppelseite erkannt — {pageSplitResult.page_count} Seiten
+
+
+ Jede Seite wird einzeln durch die Pipeline (Begradigung, Entzerrung, Zuschnitt, ...) verarbeitet.
+ {pageSplitResult.used_original && ' (Seitentrennung auf dem Originalbild, da die Orientierung die Doppelseite gedreht hat.)'}
+
+
+ {pageSplitResult.sub_sessions?.map((s) => (
+
+ {s.name}
+
+ ))}
+
+
+ )}
+
{/* Next button */}
{orientationResult && (
@@ -232,7 +290,7 @@ export function StepOrientation({ sessionId: existingSessionId, onNext }: StepOr
onClick={() => onNext(session.session_id)}
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
>
- Weiter →
+ {pageSplitResult?.multi_page ? 'Seiten verarbeiten' : 'Weiter'} →
)}
diff --git a/klausur-service/backend/orientation_crop_api.py b/klausur-service/backend/orientation_crop_api.py
index 0d758b9..a537137 100644
--- a/klausur-service/backend/orientation_crop_api.py
+++ b/klausur-service/backend/orientation_crop_api.py
@@ -191,6 +191,23 @@ async def detect_page_split(session_id: str):
t0 = time.time()
page_splits = detect_page_splits(img_bgr)
+ used_original = False
+
+ if not page_splits or len(page_splits) < 2:
+ # Orientation may have rotated a landscape double-page spread to
+ # portrait. Try the original (pre-orientation) image as fallback.
+ orig_bgr = cached.get("original_bgr")
+ if orig_bgr is not None and orig_bgr is not img_bgr:
+ page_splits_orig = detect_page_splits(orig_bgr)
+ if page_splits_orig and len(page_splits_orig) >= 2:
+ logger.info(
+ "OCR Pipeline: page-split session %s: spread detected on "
+ "ORIGINAL (orientation rotated it away)",
+ session_id,
+ )
+ img_bgr = orig_bgr
+ page_splits = page_splits_orig
+ used_original = True
if not page_splits or len(page_splits) < 2:
duration = time.time() - t0
@@ -204,9 +221,12 @@ async def detect_page_split(session_id: str):
"duration_seconds": round(duration, 2),
}
- # Multi-page spread detected — create sub-sessions for full pipeline
+ # Multi-page spread detected — create sub-sessions for full pipeline.
+ # start_step=2 means "ready for deskew" (orientation already applied).
+ # start_step=1 means "needs orientation too" (split from original image).
+ start_step = 1 if used_original else 2
sub_sessions = await _create_page_sub_sessions_full(
- session_id, cached, img_bgr, page_splits,
+ session_id, cached, img_bgr, page_splits, start_step=start_step,
)
duration = time.time() - t0
@@ -214,6 +234,7 @@ async def detect_page_split(session_id: str):
"multi_page": True,
"page_count": len(page_splits),
"page_splits": page_splits,
+ "used_original": used_original,
"duration_seconds": round(duration, 2),
}
@@ -475,13 +496,14 @@ async def _create_page_sub_sessions_full(
parent_cached: dict,
full_img_bgr: np.ndarray,
page_splits: List[Dict[str, Any]],
+ start_step: int = 2,
) -> List[Dict[str, Any]]:
"""Create sub-sessions for each page with RAW regions for full pipeline processing.
Unlike ``_create_page_sub_sessions`` (used by the crop step), these
sub-sessions store the *uncropped* page region and start at
- ``current_step=2`` (ready for deskew). Each page therefore goes through
- its own deskew → dewarp → crop → columns → rows → words → grid pipeline,
+ ``start_step`` (default 2 = ready for deskew; 1 if orientation still
+ needed). Each page goes through its own pipeline independently,
which is essential for book spreads where each page has a different tilt.
"""
# Idempotent: reuse existing sub-sessions
@@ -522,9 +544,9 @@ async def _create_page_sub_sessions_full(
box_index=pi,
)
- # Start at step 2 (deskew) — orientation was already applied to the
- # whole spread before splitting.
- await update_session_db(sub_id, current_step=2)
+ # start_step=2 → ready for deskew (orientation already done on spread)
+ # start_step=1 → needs its own orientation (split from original image)
+ await update_session_db(sub_id, current_step=start_step)
# Cache the BGR so the pipeline can start immediately
_cache[sub_id] = {
@@ -542,7 +564,7 @@ async def _create_page_sub_sessions_full(
"deskew_result": None,
"dewarp_result": None,
"ground_truth": {},
- "current_step": 2,
+ "current_step": start_step,
}
rh, rw = page_bgr.shape[:2]