breakpilot-lehrer/klausur-service/backend/page_sub_sessions.py

"""
Sub-session creation for multi-page spreads.

Used by both the page-split and crop steps when a double-page scan is detected.
"""

import logging
import uuid as uuid_mod
from typing import Any, Dict, List

import cv2
import numpy as np

from page_crop import detect_and_crop_page
from ocr_pipeline_session_store import (
    create_session_db,
    get_sub_sessions,
    update_session_db,
)
from orientation_crop_helpers import get_cache_ref

logger = logging.getLogger(__name__)


async def create_page_sub_sessions(
    parent_session_id: str,
    parent_cached: dict,
    full_img_bgr: np.ndarray,
    page_splits: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
    """Create sub-sessions for each detected page in a multi-page spread.

    Each page region is individually cropped, then stored as a sub-session
    with its own cropped image ready for the rest of the pipeline.
    """
    # Check for existing sub-sessions (idempotent)
    existing = await get_sub_sessions(parent_session_id)
    if existing:
        return [
            {"id": s["id"], "name": s["name"], "page_index": s.get("box_index", i)}
            for i, s in enumerate(existing)
        ]

    parent_name = parent_cached.get("name", "Scan")
    parent_filename = parent_cached.get("filename", "scan.png")

    sub_sessions: List[Dict[str, Any]] = []

    for page in page_splits:
        pi = page["page_index"]
        px, py = page["x"], page["y"]
        pw, ph = page["width"], page["height"]

        # Extract page region
        page_bgr = full_img_bgr[py:py + ph, px:px + pw].copy()

        # Crop each page individually (remove its own borders)
        cropped_page, page_crop_info = detect_and_crop_page(page_bgr)

        # Encode as PNG
        ok, png_buf = cv2.imencode(".png", cropped_page)
        page_png = png_buf.tobytes() if ok else b""

        sub_id = str(uuid_mod.uuid4())
        sub_name = f"{parent_name} — Seite {pi + 1}"

        await create_session_db(
            session_id=sub_id,
            name=sub_name,
            filename=parent_filename,
            original_png=page_png,
        )

        # Pre-populate: set cropped = original (already cropped)
        await update_session_db(
            sub_id,
            cropped_png=page_png,
            crop_result=page_crop_info,
            current_step=5,
        )

        ch, cw = cropped_page.shape[:2]
        sub_sessions.append({
            "id": sub_id,
            "name": sub_name,
            "page_index": pi,
            "source_rect": page,
            "cropped_size": {"width": cw, "height": ch},
            "detected_format": page_crop_info.get("detected_format"),
        })

        logger.info(
            "Page sub-session %s: page %d, region x=%d w=%d -> cropped %dx%d",
            sub_id, pi + 1, px, pw, cw, ch,
        )

    return sub_sessions


async def create_page_sub_sessions_full(
    parent_session_id: str,
    parent_cached: dict,
    full_img_bgr: np.ndarray,
    page_splits: List[Dict[str, Any]],
    start_step: int = 2,
) -> List[Dict[str, Any]]:
    """Create sub-sessions for each page with RAW regions for full pipeline processing.

    Unlike ``create_page_sub_sessions`` (used by the crop step), these
    sub-sessions store the *uncropped* page region and start at
    ``start_step`` (default 2 = ready for deskew; 1 if orientation still
    needed).  Each page goes through its own pipeline independently,
    which is essential for book spreads where each page has a different tilt.
    """
    _cache = get_cache_ref()

    # Idempotent: reuse existing sub-sessions
    existing = await get_sub_sessions(parent_session_id)
    if existing:
        return [
            {"id": s["id"], "name": s["name"], "page_index": s.get("box_index", i)}
            for i, s in enumerate(existing)
        ]

    parent_name = parent_cached.get("name", "Scan")
    parent_filename = parent_cached.get("filename", "scan.png")

    sub_sessions: List[Dict[str, Any]] = []

    for page in page_splits:
        pi = page["page_index"]
        px, py = page["x"], page["y"]
        pw, ph = page["width"], page["height"]

        # Extract RAW page region — NO individual cropping here; each
        # sub-session will run its own crop step after deskew + dewarp.
        page_bgr = full_img_bgr[py:py + ph, px:px + pw].copy()

        # Encode as PNG
        ok, png_buf = cv2.imencode(".png", page_bgr)
        page_png = png_buf.tobytes() if ok else b""

        sub_id = str(uuid_mod.uuid4())
        sub_name = f"{parent_name} — Seite {pi + 1}"

        await create_session_db(
            session_id=sub_id,
            name=sub_name,
            filename=parent_filename,
            original_png=page_png,
        )

        # start_step=2 -> ready for deskew (orientation already done on spread)
        # start_step=1 -> needs its own orientation (split from original image)
        await update_session_db(sub_id, current_step=start_step)

        # Cache the BGR so the pipeline can start immediately
        _cache[sub_id] = {
            "id": sub_id,
            "filename": parent_filename,
            "name": sub_name,
            "original_bgr": page_bgr,
            "oriented_bgr": None,
            "cropped_bgr": None,
            "deskewed_bgr": None,
            "dewarped_bgr": None,
            "orientation_result": None,
            "crop_result": None,
            "deskew_result": None,
            "dewarp_result": None,
            "ground_truth": {},
            "current_step": start_step,
        }

        rh, rw = page_bgr.shape[:2]
        sub_sessions.append({
            "id": sub_id,
            "name": sub_name,
            "page_index": pi,
            "source_rect": page,
            "image_size": {"width": rw, "height": rh},
        })

        logger.info(
            "Page sub-session %s (full pipeline): page %d, region x=%d w=%d -> %dx%d",
            sub_id, pi + 1, px, pw, rw, rh,
        )

    return sub_sessions