""" Sub-session creation for multi-page spreads. Used by both the page-split and crop steps when a double-page scan is detected. """ import logging import uuid as uuid_mod from typing import Any, Dict, List import cv2 import numpy as np from page_crop import detect_and_crop_page from ocr_pipeline_session_store import ( create_session_db, get_sub_sessions, update_session_db, ) from orientation_crop_helpers import get_cache_ref logger = logging.getLogger(__name__) async def create_page_sub_sessions( parent_session_id: str, parent_cached: dict, full_img_bgr: np.ndarray, page_splits: List[Dict[str, Any]], ) -> List[Dict[str, Any]]: """Create sub-sessions for each detected page in a multi-page spread. Each page region is individually cropped, then stored as a sub-session with its own cropped image ready for the rest of the pipeline. """ # Check for existing sub-sessions (idempotent) existing = await get_sub_sessions(parent_session_id) if existing: return [ {"id": s["id"], "name": s["name"], "page_index": s.get("box_index", i)} for i, s in enumerate(existing) ] parent_name = parent_cached.get("name", "Scan") parent_filename = parent_cached.get("filename", "scan.png") sub_sessions: List[Dict[str, Any]] = [] for page in page_splits: pi = page["page_index"] px, py = page["x"], page["y"] pw, ph = page["width"], page["height"] # Extract page region page_bgr = full_img_bgr[py:py + ph, px:px + pw].copy() # Crop each page individually (remove its own borders) cropped_page, page_crop_info = detect_and_crop_page(page_bgr) # Encode as PNG ok, png_buf = cv2.imencode(".png", cropped_page) page_png = png_buf.tobytes() if ok else b"" sub_id = str(uuid_mod.uuid4()) sub_name = f"{parent_name} — Seite {pi + 1}" await create_session_db( session_id=sub_id, name=sub_name, filename=parent_filename, original_png=page_png, ) # Pre-populate: set cropped = original (already cropped) await update_session_db( sub_id, cropped_png=page_png, crop_result=page_crop_info, current_step=5, ) ch, cw = cropped_page.shape[:2] sub_sessions.append({ "id": sub_id, "name": sub_name, "page_index": pi, "source_rect": page, "cropped_size": {"width": cw, "height": ch}, "detected_format": page_crop_info.get("detected_format"), }) logger.info( "Page sub-session %s: page %d, region x=%d w=%d -> cropped %dx%d", sub_id, pi + 1, px, pw, cw, ch, ) return sub_sessions async def create_page_sub_sessions_full( parent_session_id: str, parent_cached: dict, full_img_bgr: np.ndarray, page_splits: List[Dict[str, Any]], start_step: int = 2, ) -> List[Dict[str, Any]]: """Create sub-sessions for each page with RAW regions for full pipeline processing. Unlike ``create_page_sub_sessions`` (used by the crop step), these sub-sessions store the *uncropped* page region and start at ``start_step`` (default 2 = ready for deskew; 1 if orientation still needed). Each page goes through its own pipeline independently, which is essential for book spreads where each page has a different tilt. """ _cache = get_cache_ref() # Idempotent: reuse existing sub-sessions existing = await get_sub_sessions(parent_session_id) if existing: return [ {"id": s["id"], "name": s["name"], "page_index": s.get("box_index", i)} for i, s in enumerate(existing) ] parent_name = parent_cached.get("name", "Scan") parent_filename = parent_cached.get("filename", "scan.png") sub_sessions: List[Dict[str, Any]] = [] for page in page_splits: pi = page["page_index"] px, py = page["x"], page["y"] pw, ph = page["width"], page["height"] # Extract RAW page region — NO individual cropping here; each # sub-session will run its own crop step after deskew + dewarp. page_bgr = full_img_bgr[py:py + ph, px:px + pw].copy() # Encode as PNG ok, png_buf = cv2.imencode(".png", page_bgr) page_png = png_buf.tobytes() if ok else b"" sub_id = str(uuid_mod.uuid4()) sub_name = f"{parent_name} — Seite {pi + 1}" await create_session_db( session_id=sub_id, name=sub_name, filename=parent_filename, original_png=page_png, ) # start_step=2 -> ready for deskew (orientation already done on spread) # start_step=1 -> needs its own orientation (split from original image) await update_session_db(sub_id, current_step=start_step) # Cache the BGR so the pipeline can start immediately _cache[sub_id] = { "id": sub_id, "filename": parent_filename, "name": sub_name, "original_bgr": page_bgr, "oriented_bgr": None, "cropped_bgr": None, "deskewed_bgr": None, "dewarped_bgr": None, "orientation_result": None, "crop_result": None, "deskew_result": None, "dewarp_result": None, "ground_truth": {}, "current_step": start_step, } rh, rw = page_bgr.shape[:2] sub_sessions.append({ "id": sub_id, "name": sub_name, "page_index": pi, "source_rect": page, "image_size": {"width": rw, "height": rh}, }) logger.info( "Page sub-session %s (full pipeline): page %d, region x=%d w=%d -> %dx%d", sub_id, pi + 1, px, pw, rw, rh, ) return sub_sessions