""" Orientation & Crop shared helpers - cache management and pipeline logging. """ import logging from typing import Any, Dict import cv2 import numpy as np from fastapi import HTTPException from ocr_pipeline_session_store import ( get_session_db, get_session_image, update_session_db, ) logger = logging.getLogger(__name__) # Reference to the shared cache from ocr_pipeline_api (set in main.py) _cache: Dict[str, Dict[str, Any]] = {} def set_cache_ref(cache: Dict[str, Dict[str, Any]]): """Set reference to the shared cache from ocr_pipeline_api.""" global _cache _cache = cache def get_cache_ref() -> Dict[str, Dict[str, Any]]: """Get reference to the shared cache.""" return _cache async def ensure_cached(session_id: str) -> Dict[str, Any]: """Ensure session is in cache, loading from DB if needed.""" if session_id in _cache: return _cache[session_id] session = await get_session_db(session_id) if not session: raise HTTPException(status_code=404, detail=f"Session {session_id} not found") cache_entry: Dict[str, Any] = { "id": session_id, **session, "original_bgr": None, "oriented_bgr": None, "cropped_bgr": None, "deskewed_bgr": None, "dewarped_bgr": None, } for img_type, bgr_key in [ ("original", "original_bgr"), ("oriented", "oriented_bgr"), ("cropped", "cropped_bgr"), ("deskewed", "deskewed_bgr"), ("dewarped", "dewarped_bgr"), ]: png_data = await get_session_image(session_id, img_type) if png_data: arr = np.frombuffer(png_data, dtype=np.uint8) bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR) cache_entry[bgr_key] = bgr _cache[session_id] = cache_entry return cache_entry async def append_pipeline_log(session_id: str, step: str, metrics: dict, duration_ms: int): """Append a step entry to the pipeline log.""" from datetime import datetime session = await get_session_db(session_id) if not session: return pipeline_log = session.get("pipeline_log") or {"steps": []} pipeline_log["steps"].append({ "step": step, "completed_at": datetime.utcnow().isoformat(), "success": True, "duration_ms": duration_ms, "metrics": metrics, }) await update_session_db(session_id, pipeline_log=pipeline_log)