diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 2c54aec..122fe50 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -83,11 +83,25 @@ def _filter_border_strip_words(words: List[Dict]) -> Tuple[List[Dict], int]: right_count = total - gi break + # Validate candidate strip: real border decorations are mostly short + # single-character words (alphabet letters, stray marks). Multi-word + # content like "der Ranzen" or "die Schals" (continuation of German + # translations) must NOT be removed. + def _is_decorative_strip(candidates: List[Dict]) -> bool: + if not candidates: + return False + short = sum(1 for w in candidates if len((w.get("text") or "").strip()) <= 2) + return short / len(candidates) >= 0.5 + strip_ids: set = set() if left_count > 0 and left_count / total < 0.20: - strip_ids = {id(w) for w in sorted_words[:left_count]} + candidates = sorted_words[:left_count] + if _is_decorative_strip(candidates): + strip_ids = {id(w) for w in candidates} elif right_count > 0 and right_count / total < 0.20: - strip_ids = {id(w) for w in sorted_words[total - right_count :]} + candidates = sorted_words[total - right_count:] + if _is_decorative_strip(candidates): + strip_ids = {id(w) for w in candidates} if not strip_ids: return words, 0 diff --git a/klausur-service/backend/orientation_crop_api.py b/klausur-service/backend/orientation_crop_api.py index cee1c64..0d758b9 100644 --- a/klausur-service/backend/orientation_crop_api.py +++ b/klausur-service/backend/orientation_crop_api.py @@ -163,6 +163,84 @@ async def detect_orientation(session_id: str): } +# --------------------------------------------------------------------------- +# Step 1b: Page-split detection — runs AFTER orientation, BEFORE deskew +# --------------------------------------------------------------------------- + +@router.post("/sessions/{session_id}/page-split") +async def detect_page_split(session_id: str): + """Detect if the image is a double-page book spread and split into sub-sessions. + + Must be called **after orientation** (step 1) and **before deskew** (step 2). + Each sub-session receives the raw page region and goes through the full + pipeline (deskew → dewarp → crop → columns → rows → words → grid) + independently, so each page gets its own deskew correction. + + Returns ``{"multi_page": false}`` if only one page is detected. + """ + cached = await _ensure_cached(session_id) + + # Use oriented (preferred), fall back to original + img_bgr = next( + (v for k in ("oriented_bgr", "original_bgr") + if (v := cached.get(k)) is not None), + None, + ) + if img_bgr is None: + raise HTTPException(status_code=400, detail="No image available for page-split detection") + + t0 = time.time() + page_splits = detect_page_splits(img_bgr) + + if not page_splits or len(page_splits) < 2: + duration = time.time() - t0 + logger.info( + "OCR Pipeline: page-split session %s: single page (%.2fs)", + session_id, duration, + ) + return { + "session_id": session_id, + "multi_page": False, + "duration_seconds": round(duration, 2), + } + + # Multi-page spread detected — create sub-sessions for full pipeline + sub_sessions = await _create_page_sub_sessions_full( + session_id, cached, img_bgr, page_splits, + ) + duration = time.time() - t0 + + split_info: Dict[str, Any] = { + "multi_page": True, + "page_count": len(page_splits), + "page_splits": page_splits, + "duration_seconds": round(duration, 2), + } + + # Mark parent session as split (store info in crop_result for backward compat) + await update_session_db(session_id, crop_result=split_info) + cached["crop_result"] = split_info + + await _append_pipeline_log(session_id, "page_split", { + "multi_page": True, + "page_count": len(page_splits), + }, duration_ms=int(duration * 1000)) + + logger.info( + "OCR Pipeline: page-split session %s: %d pages detected in %.2fs", + session_id, len(page_splits), duration, + ) + + h, w = img_bgr.shape[:2] + return { + "session_id": session_id, + **split_info, + "image_width": w, + "image_height": h, + "sub_sessions": sub_sessions, + } + + # --------------------------------------------------------------------------- # Step 4 (UI index 3): Crop — runs after deskew + dewarp # --------------------------------------------------------------------------- @@ -191,7 +269,29 @@ async def auto_crop(session_id: str): t0 = time.time() - # --- Multi-page detection --- + # --- Check for existing sub-sessions (from page-split step) --- + # If page-split already created sub-sessions, skip multi-page detection + # in the crop step. Each sub-session runs its own crop independently. + existing_subs = await get_sub_sessions(session_id) + if existing_subs: + crop_result = cached.get("crop_result") or {} + if crop_result.get("multi_page"): + # Already split — just return the existing info + duration = time.time() - t0 + h, w = img_bgr.shape[:2] + return { + "session_id": session_id, + **crop_result, + "image_width": w, + "image_height": h, + "sub_sessions": [ + {"id": s["id"], "name": s.get("name"), "page_index": s.get("box_index", i)} + for i, s in enumerate(existing_subs) + ], + "note": "Page split was already performed; each sub-session runs its own crop.", + } + + # --- Multi-page detection (fallback for sessions that skipped page-split) --- page_splits = detect_page_splits(img_bgr) if page_splits and len(page_splits) >= 2: @@ -370,6 +470,98 @@ async def _create_page_sub_sessions( return sub_sessions +async def _create_page_sub_sessions_full( + parent_session_id: str, + parent_cached: dict, + full_img_bgr: np.ndarray, + page_splits: List[Dict[str, Any]], +) -> List[Dict[str, Any]]: + """Create sub-sessions for each page with RAW regions for full pipeline processing. + + Unlike ``_create_page_sub_sessions`` (used by the crop step), these + sub-sessions store the *uncropped* page region and start at + ``current_step=2`` (ready for deskew). Each page therefore goes through + its own deskew → dewarp → crop → columns → rows → words → grid pipeline, + which is essential for book spreads where each page has a different tilt. + """ + # Idempotent: reuse existing sub-sessions + existing = await get_sub_sessions(parent_session_id) + if existing: + return [ + {"id": s["id"], "name": s["name"], "page_index": s.get("box_index", i)} + for i, s in enumerate(existing) + ] + + parent_name = parent_cached.get("name", "Scan") + parent_filename = parent_cached.get("filename", "scan.png") + + sub_sessions: List[Dict[str, Any]] = [] + + for page in page_splits: + pi = page["page_index"] + px, py = page["x"], page["y"] + pw, ph = page["width"], page["height"] + + # Extract RAW page region — NO individual cropping here; each + # sub-session will run its own crop step after deskew + dewarp. + page_bgr = full_img_bgr[py:py + ph, px:px + pw].copy() + + # Encode as PNG + ok, png_buf = cv2.imencode(".png", page_bgr) + page_png = png_buf.tobytes() if ok else b"" + + sub_id = str(uuid_mod.uuid4()) + sub_name = f"{parent_name} — Seite {pi + 1}" + + await create_session_db( + session_id=sub_id, + name=sub_name, + filename=parent_filename, + original_png=page_png, + parent_session_id=parent_session_id, + box_index=pi, + ) + + # Start at step 2 (deskew) — orientation was already applied to the + # whole spread before splitting. + await update_session_db(sub_id, current_step=2) + + # Cache the BGR so the pipeline can start immediately + _cache[sub_id] = { + "id": sub_id, + "filename": parent_filename, + "name": sub_name, + "parent_session_id": parent_session_id, + "original_bgr": page_bgr, + "oriented_bgr": None, + "cropped_bgr": None, + "deskewed_bgr": None, + "dewarped_bgr": None, + "orientation_result": None, + "crop_result": None, + "deskew_result": None, + "dewarp_result": None, + "ground_truth": {}, + "current_step": 2, + } + + rh, rw = page_bgr.shape[:2] + sub_sessions.append({ + "id": sub_id, + "name": sub_name, + "page_index": pi, + "source_rect": page, + "image_size": {"width": rw, "height": rh}, + }) + + logger.info( + "Page sub-session %s (full pipeline): page %d, region x=%d w=%d → %dx%d", + sub_id, pi + 1, px, pw, rw, rh, + ) + + return sub_sessions + + class ManualCropRequest(BaseModel): x: float # percentage 0-100 y: float # percentage 0-100