""" OCR Pipeline Deskew Endpoints (Step 2) Auto deskew, manual deskew, and ground truth for the deskew step. Extracted from ocr_pipeline_geometry.py for file-size compliance. """ import logging import time from datetime import datetime import cv2 from fastapi import APIRouter, HTTPException from cv_vocab_pipeline import ( create_ocr_image, deskew_image, deskew_image_by_word_alignment, deskew_two_pass, ) from ocr_pipeline_session_store import ( get_session_db, update_session_db, ) from ocr_pipeline_common import ( _cache, _load_session_to_cache, _get_cached, _append_pipeline_log, ManualDeskewRequest, DeskewGroundTruthRequest, ) logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"]) @router.post("/sessions/{session_id}/deskew") async def auto_deskew(session_id: str): """Two-pass deskew: iterative projection (wide range) + word-alignment residual.""" # Ensure session is in cache if session_id not in _cache: await _load_session_to_cache(session_id) cached = _get_cached(session_id) # Deskew runs right after orientation -- use oriented image, fall back to original img_bgr = next((v for k in ("oriented_bgr", "original_bgr") if (v := cached.get(k)) is not None), None) if img_bgr is None: raise HTTPException(status_code=400, detail="No image available for deskewing") t0 = time.time() # Two-pass deskew: iterative (+-5 deg) + word-alignment residual check deskewed_bgr, angle_applied, two_pass_debug = deskew_two_pass(img_bgr.copy()) # Also run individual methods for reporting (non-authoritative) try: _, angle_hough = deskew_image(img_bgr.copy()) except Exception: angle_hough = 0.0 success_enc, png_orig = cv2.imencode(".png", img_bgr) orig_bytes = png_orig.tobytes() if success_enc else b"" try: _, angle_wa = deskew_image_by_word_alignment(orig_bytes) except Exception: angle_wa = 0.0 angle_iterative = two_pass_debug.get("pass1_angle", 0.0) angle_residual = two_pass_debug.get("pass2_angle", 0.0) angle_textline = two_pass_debug.get("pass3_angle", 0.0) duration = time.time() - t0 method_used = "three_pass" if abs(angle_textline) >= 0.01 else ( "two_pass" if abs(angle_residual) >= 0.01 else "iterative" ) # Encode as PNG success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr) deskewed_png = deskewed_png_buf.tobytes() if success else b"" # Create binarized version binarized_png = None try: binarized = create_ocr_image(deskewed_bgr) success_bin, bin_buf = cv2.imencode(".png", binarized) binarized_png = bin_buf.tobytes() if success_bin else None except Exception as e: logger.warning(f"Binarization failed: {e}") confidence = max(0.5, 1.0 - abs(angle_applied) / 5.0) deskew_result = { "angle_hough": round(angle_hough, 3), "angle_word_alignment": round(angle_wa, 3), "angle_iterative": round(angle_iterative, 3), "angle_residual": round(angle_residual, 3), "angle_textline": round(angle_textline, 3), "angle_applied": round(angle_applied, 3), "method_used": method_used, "confidence": round(confidence, 2), "duration_seconds": round(duration, 2), "two_pass_debug": two_pass_debug, } # Update cache cached["deskewed_bgr"] = deskewed_bgr cached["binarized_png"] = binarized_png cached["deskew_result"] = deskew_result # Persist to DB db_update = { "deskewed_png": deskewed_png, "deskew_result": deskew_result, "current_step": 3, } if binarized_png: db_update["binarized_png"] = binarized_png await update_session_db(session_id, **db_update) logger.info(f"OCR Pipeline: deskew session {session_id}: " f"hough={angle_hough:.2f} wa={angle_wa:.2f} " f"iter={angle_iterative:.2f} residual={angle_residual:.2f} " f"textline={angle_textline:.2f} " f"-> {method_used} total={angle_applied:.2f}") await _append_pipeline_log(session_id, "deskew", { "angle_applied": round(angle_applied, 3), "angle_iterative": round(angle_iterative, 3), "angle_residual": round(angle_residual, 3), "angle_textline": round(angle_textline, 3), "confidence": round(confidence, 2), "method": method_used, }, duration_ms=int(duration * 1000)) return { "session_id": session_id, **deskew_result, "deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed", "binarized_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/binarized", } @router.post("/sessions/{session_id}/deskew/manual") async def manual_deskew(session_id: str, req: ManualDeskewRequest): """Apply a manual rotation angle to the oriented image.""" if session_id not in _cache: await _load_session_to_cache(session_id) cached = _get_cached(session_id) img_bgr = next((v for k in ("oriented_bgr", "original_bgr") if (v := cached.get(k)) is not None), None) if img_bgr is None: raise HTTPException(status_code=400, detail="No image available for deskewing") angle = max(-5.0, min(5.0, req.angle)) h, w = img_bgr.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine(img_bgr, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE) success, png_buf = cv2.imencode(".png", rotated) deskewed_png = png_buf.tobytes() if success else b"" # Binarize binarized_png = None try: binarized = create_ocr_image(rotated) success_bin, bin_buf = cv2.imencode(".png", binarized) binarized_png = bin_buf.tobytes() if success_bin else None except Exception: pass deskew_result = { **(cached.get("deskew_result") or {}), "angle_applied": round(angle, 3), "method_used": "manual", } # Update cache cached["deskewed_bgr"] = rotated cached["binarized_png"] = binarized_png cached["deskew_result"] = deskew_result # Persist to DB db_update = { "deskewed_png": deskewed_png, "deskew_result": deskew_result, } if binarized_png: db_update["binarized_png"] = binarized_png await update_session_db(session_id, **db_update) logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}") return { "session_id": session_id, "angle_applied": round(angle, 3), "method_used": "manual", "deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed", } @router.post("/sessions/{session_id}/ground-truth/deskew") async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthRequest): """Save ground truth feedback for the deskew step.""" session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") ground_truth = session.get("ground_truth") or {} gt = { "is_correct": req.is_correct, "corrected_angle": req.corrected_angle, "notes": req.notes, "saved_at": datetime.utcnow().isoformat(), "deskew_result": session.get("deskew_result"), } ground_truth["deskew"] = gt await update_session_db(session_id, ground_truth=ground_truth) # Update cache if session_id in _cache: _cache[session_id]["ground_truth"] = ground_truth logger.info(f"OCR Pipeline: ground truth deskew session {session_id}: " f"correct={req.is_correct}, corrected_angle={req.corrected_angle}") return {"session_id": session_id, "ground_truth": gt}