diff --git a/paddleocr-service/main.py b/paddleocr-service/main.py
index b658777..39d28ed 100644
--- a/paddleocr-service/main.py
+++ b/paddleocr-service/main.py
@@ -3,6 +3,7 @@
 import io
 import logging
 import os
+import threading
 
 import numpy as np
 from fastapi import FastAPI, File, Header, HTTPException, UploadFile
@@ -15,17 +16,18 @@ app = FastAPI(title="PaddleOCR Service")
 
 _engine = None
 _ready = False
+_loading = False
 API_KEY = os.environ.get("PADDLEOCR_API_KEY", "")
 
 
-def get_engine():
-    global _engine
-    if _engine is None:
+def _load_model():
+    """Load PaddleOCR model in background thread."""
+    global _engine, _ready
+    try:
         logger.info("Importing paddleocr...")
         from paddleocr import PaddleOCR
 
         logger.info("Import done. Loading PaddleOCR model...")
-        # PaddleOCR >= 3.x: lang="en" + PP-OCRv5; older: lang="latin"
         try:
             _engine = PaddleOCR(
                 lang="en",
@@ -42,27 +44,29 @@ def get_engine():
                 show_log=False,
             )
             logger.info("Using PP-OCRv4 fallback (latin)")
-        logger.info("PaddleOCR model loaded successfully")
-    return _engine
+        _ready = True
+        logger.info("PaddleOCR model loaded successfully — ready to serve")
+    except Exception as e:
+        logger.error(f"Failed to load PaddleOCR model: {e}")
 
 
 @app.on_event("startup")
 def startup_load_model():
-    """Pre-load model at startup so health check passes."""
-    global _ready
-    try:
-        get_engine()
-        _ready = True
-        logger.info("PaddleOCR ready to serve requests")
-    except Exception as e:
-        logger.error(f"Failed to load PaddleOCR model: {e}")
+    """Start model loading in background so health check passes immediately."""
+    global _loading
+    _loading = True
+    thread = threading.Thread(target=_load_model, daemon=True)
+    thread.start()
+    logger.info("Model loading started in background thread")
 
 
 @app.get("/health")
 def health():
     if _ready:
         return {"status": "ok", "model": "PP-OCRv5-latin"}
-    return {"status": "loading"}
+    if _loading:
+        return {"status": "loading"}
+    return {"status": "error"}
 
 
 @app.post("/ocr")
@@ -80,8 +84,7 @@ async def ocr(
     img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
     img_np = np.array(img)
 
-    engine = get_engine()
-    result = engine.ocr(img_np)
+    result = _engine.ocr(img_np)
 
     words = []
     for line in result[0] or []: