""" Orientation & Crop API - Steps 1 and 4 of the OCR Pipeline. Step 1: Orientation detection (fix 90/180/270 degree rotations) Step 4 (UI index 3): Page cropping (after deskew + dewarp, so the image is straight) These endpoints were extracted from the main pipeline to keep files manageable. """ import logging import time from typing import Any, Dict, Optional import cv2 import numpy as np from fastapi import APIRouter, HTTPException from pydantic import BaseModel from cv_vocab_pipeline import detect_and_fix_orientation from page_crop import detect_and_crop_page from ocr_pipeline_session_store import ( get_session_db, get_session_image, update_session_db, ) logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"]) # Reference to the shared cache from ocr_pipeline_api (set in main.py) _cache: Dict[str, Dict[str, Any]] = {} def set_cache_ref(cache: Dict[str, Dict[str, Any]]): """Set reference to the shared cache from ocr_pipeline_api.""" global _cache _cache = cache async def _ensure_cached(session_id: str) -> Dict[str, Any]: """Ensure session is in cache, loading from DB if needed.""" if session_id in _cache: return _cache[session_id] session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") cache_entry: Dict[str, Any] = { "id": session_id, **session, "original_bgr": None, "oriented_bgr": None, "cropped_bgr": None, "deskewed_bgr": None, "dewarped_bgr": None, } for img_type, bgr_key in [ ("original", "original_bgr"), ("oriented", "oriented_bgr"), ("cropped", "cropped_bgr"), ("deskewed", "deskewed_bgr"), ("dewarped", "dewarped_bgr"), ]: png_data = await get_session_image(session_id, img_type) if png_data: arr = np.frombuffer(png_data, dtype=np.uint8) bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR) cache_entry[bgr_key] = bgr _cache[session_id] = cache_entry return cache_entry async def _append_pipeline_log(session_id: str, step: str, metrics: dict, duration_ms: int): """Append a step entry to the pipeline log.""" from datetime import datetime session = await get_session_db(session_id) if not session: return pipeline_log = session.get("pipeline_log") or {"steps": []} pipeline_log["steps"].append({ "step": step, "completed_at": datetime.utcnow().isoformat(), "success": True, "duration_ms": duration_ms, "metrics": metrics, }) await update_session_db(session_id, pipeline_log=pipeline_log) # --------------------------------------------------------------------------- # Step 1: Orientation # --------------------------------------------------------------------------- @router.post("/sessions/{session_id}/orientation") async def detect_orientation(session_id: str): """Detect and fix 90/180/270 degree rotations from scanners. Reads the original image, applies orientation correction, stores the result as oriented_png. """ cached = await _ensure_cached(session_id) img_bgr = cached.get("original_bgr") if img_bgr is None: raise HTTPException(status_code=400, detail="Original image not available") t0 = time.time() # Detect and fix orientation oriented_bgr, orientation_deg = detect_and_fix_orientation(img_bgr.copy()) duration = time.time() - t0 orientation_result = { "orientation_degrees": orientation_deg, "corrected": orientation_deg != 0, "duration_seconds": round(duration, 2), } # Encode oriented image success, png_buf = cv2.imencode(".png", oriented_bgr) oriented_png = png_buf.tobytes() if success else b"" # Update cache cached["oriented_bgr"] = oriented_bgr cached["orientation_result"] = orientation_result # Persist to DB await update_session_db( session_id, oriented_png=oriented_png, orientation_result=orientation_result, current_step=2, ) logger.info( "OCR Pipeline: orientation session %s: %d° (%s) in %.2fs", session_id, orientation_deg, "corrected" if orientation_deg else "no change", duration, ) await _append_pipeline_log(session_id, "orientation", { "orientation_degrees": orientation_deg, "corrected": orientation_deg != 0, }, duration_ms=int(duration * 1000)) h, w = oriented_bgr.shape[:2] return { "session_id": session_id, **orientation_result, "image_width": w, "image_height": h, "oriented_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/oriented", } # --------------------------------------------------------------------------- # Step 4 (UI index 3): Crop — runs after deskew + dewarp # --------------------------------------------------------------------------- @router.post("/sessions/{session_id}/crop") async def auto_crop(session_id: str): """Auto-detect and crop scanner/book borders. Reads the dewarped image (post-deskew + dewarp, so the page is straight). Falls back to oriented → original if earlier steps were skipped. """ cached = await _ensure_cached(session_id) # Use dewarped (preferred), fall back to oriented, then original img_bgr = next( (v for k in ("dewarped_bgr", "oriented_bgr", "original_bgr") if (v := cached.get(k)) is not None), None, ) if img_bgr is None: raise HTTPException(status_code=400, detail="No image available for cropping") t0 = time.time() cropped_bgr, crop_info = detect_and_crop_page(img_bgr) duration = time.time() - t0 crop_info["duration_seconds"] = round(duration, 2) # Encode cropped image success, png_buf = cv2.imencode(".png", cropped_bgr) cropped_png = png_buf.tobytes() if success else b"" # Update cache cached["cropped_bgr"] = cropped_bgr cached["crop_result"] = crop_info # Persist to DB await update_session_db( session_id, cropped_png=cropped_png, crop_result=crop_info, current_step=5, ) logger.info( "OCR Pipeline: crop session %s: applied=%s format=%s in %.2fs", session_id, crop_info["crop_applied"], crop_info.get("detected_format", "?"), duration, ) await _append_pipeline_log(session_id, "crop", { "crop_applied": crop_info["crop_applied"], "detected_format": crop_info.get("detected_format"), "format_confidence": crop_info.get("format_confidence"), }, duration_ms=int(duration * 1000)) h, w = cropped_bgr.shape[:2] return { "session_id": session_id, **crop_info, "image_width": w, "image_height": h, "cropped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/cropped", } class ManualCropRequest(BaseModel): x: float # percentage 0-100 y: float # percentage 0-100 width: float # percentage 0-100 height: float # percentage 0-100 @router.post("/sessions/{session_id}/crop/manual") async def manual_crop(session_id: str, req: ManualCropRequest): """Manually crop using percentage coordinates.""" cached = await _ensure_cached(session_id) img_bgr = next( (v for k in ("dewarped_bgr", "oriented_bgr", "original_bgr") if (v := cached.get(k)) is not None), None, ) if img_bgr is None: raise HTTPException(status_code=400, detail="No image available for cropping") h, w = img_bgr.shape[:2] # Convert percentages to pixels px_x = int(w * req.x / 100.0) px_y = int(h * req.y / 100.0) px_w = int(w * req.width / 100.0) px_h = int(h * req.height / 100.0) # Clamp px_x = max(0, min(px_x, w - 1)) px_y = max(0, min(px_y, h - 1)) px_w = max(1, min(px_w, w - px_x)) px_h = max(1, min(px_h, h - px_y)) cropped_bgr = img_bgr[px_y:px_y + px_h, px_x:px_x + px_w].copy() success, png_buf = cv2.imencode(".png", cropped_bgr) cropped_png = png_buf.tobytes() if success else b"" crop_result = { "crop_applied": True, "crop_rect": {"x": px_x, "y": px_y, "width": px_w, "height": px_h}, "crop_rect_pct": {"x": round(req.x, 2), "y": round(req.y, 2), "width": round(req.width, 2), "height": round(req.height, 2)}, "original_size": {"width": w, "height": h}, "cropped_size": {"width": px_w, "height": px_h}, "method": "manual", } cached["cropped_bgr"] = cropped_bgr cached["crop_result"] = crop_result await update_session_db( session_id, cropped_png=cropped_png, crop_result=crop_result, current_step=5, ) ch, cw = cropped_bgr.shape[:2] return { "session_id": session_id, **crop_result, "image_width": cw, "image_height": ch, "cropped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/cropped", } @router.post("/sessions/{session_id}/crop/skip") async def skip_crop(session_id: str): """Skip cropping — use dewarped (or oriented/original) image as-is.""" cached = await _ensure_cached(session_id) img_bgr = next( (v for k in ("dewarped_bgr", "oriented_bgr", "original_bgr") if (v := cached.get(k)) is not None), None, ) if img_bgr is None: raise HTTPException(status_code=400, detail="No image available") h, w = img_bgr.shape[:2] # Store the dewarped image as cropped (identity crop) success, png_buf = cv2.imencode(".png", img_bgr) cropped_png = png_buf.tobytes() if success else b"" crop_result = { "crop_applied": False, "skipped": True, "original_size": {"width": w, "height": h}, "cropped_size": {"width": w, "height": h}, } cached["cropped_bgr"] = img_bgr cached["crop_result"] = crop_result await update_session_db( session_id, cropped_png=cropped_png, crop_result=crop_result, current_step=5, ) return { "session_id": session_id, **crop_result, "image_width": w, "image_height": h, "cropped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/cropped", }