diff --git a/paddleocr-service/main.py b/paddleocr-service/main.py index b658777..39d28ed 100644 --- a/paddleocr-service/main.py +++ b/paddleocr-service/main.py @@ -3,6 +3,7 @@ import io import logging import os +import threading import numpy as np from fastapi import FastAPI, File, Header, HTTPException, UploadFile @@ -15,17 +16,18 @@ app = FastAPI(title="PaddleOCR Service") _engine = None _ready = False +_loading = False API_KEY = os.environ.get("PADDLEOCR_API_KEY", "") -def get_engine(): - global _engine - if _engine is None: +def _load_model(): + """Load PaddleOCR model in background thread.""" + global _engine, _ready + try: logger.info("Importing paddleocr...") from paddleocr import PaddleOCR logger.info("Import done. Loading PaddleOCR model...") - # PaddleOCR >= 3.x: lang="en" + PP-OCRv5; older: lang="latin" try: _engine = PaddleOCR( lang="en", @@ -42,27 +44,29 @@ def get_engine(): show_log=False, ) logger.info("Using PP-OCRv4 fallback (latin)") - logger.info("PaddleOCR model loaded successfully") - return _engine + _ready = True + logger.info("PaddleOCR model loaded successfully — ready to serve") + except Exception as e: + logger.error(f"Failed to load PaddleOCR model: {e}") @app.on_event("startup") def startup_load_model(): - """Pre-load model at startup so health check passes.""" - global _ready - try: - get_engine() - _ready = True - logger.info("PaddleOCR ready to serve requests") - except Exception as e: - logger.error(f"Failed to load PaddleOCR model: {e}") + """Start model loading in background so health check passes immediately.""" + global _loading + _loading = True + thread = threading.Thread(target=_load_model, daemon=True) + thread.start() + logger.info("Model loading started in background thread") @app.get("/health") def health(): if _ready: return {"status": "ok", "model": "PP-OCRv5-latin"} - return {"status": "loading"} + if _loading: + return {"status": "loading"} + return {"status": "error"} @app.post("/ocr") @@ -80,8 +84,7 @@ async def ocr( img = Image.open(io.BytesIO(img_bytes)).convert("RGB") img_np = np.array(img) - engine = get_engine() - result = engine.ocr(img_np) + result = _engine.ocr(img_np) words = [] for line in result[0] or []: