fix: downgrade to PaddleOCR 2.x — 3.x uses too much RAM on CPU

PaddlePaddle 3.x + PP-OCRv5 requires >6GB RAM and has oneDNN compatibility issues on CPU. PaddleOCR 2.x with PP-OCRv4 works reliably with ~2-3GB RAM and has no MKLDNN issues. - Pin paddlepaddle<3.0.0 and paddleocr<3.0.0 - Simplify main.py — single init strategy, direct 2.x result format - Re-enable warmup (fits in memory with 2.x) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 19:13:33 +01:00
parent 3fd3336f6c
commit 96f94475f6
2 changed files with 35 additions and 89 deletions
@@ -1,23 +1,10 @@
-"""PaddleOCR Remote Service — PP-OCRv5 Latin auf x86_64."""
+"""PaddleOCR Remote Service — PP-OCRv4 on x86_64 (CPU)."""
 import io
 import logging
 import os
 import threading
 # Disable oneDNN/MKLDNN before importing paddle — avoids
 # ConvertPirAttribute2RuntimeAttribute errors on PaddlePaddle 3.x
 os.environ["FLAGS_use_mkldnn"] = "0"
 os.environ["FLAGS_use_onednn"] = "0"
 os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "1"
 # Must set paddle flags before import
 try:
    import paddle
    paddle.set_flags({"FLAGS_use_mkldnn": False})
 except Exception:
    pass
 import numpy as np
 from fastapi import FastAPI, File, Header, HTTPException, UploadFile
 from PIL import Image
@@ -40,31 +27,22 @@ def _load_model():
        logger.info("Importing paddleocr...")
        from paddleocr import PaddleOCR
-        logger.info("Import done. Loading PaddleOCR model...")
+        logger.info("Loading PaddleOCR model (PP-OCRv4, lang=en)...")
-        # Try multiple init strategies for different PaddleOCR versions
+        _engine = PaddleOCR(
-        inits = [
+            lang="en",
-            # PaddleOCR 3.x — disable MKLDNN via enable_mkldnn=False
+            use_angle_cls=True,
-            dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True, enable_mkldnn=False),
+            show_log=False,
-            # PaddleOCR 3.x without enable_mkldnn param
+            enable_mkldnn=False,
-            dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True),
+            use_gpu=False,
-            # PaddleOCR 3.x with deprecated param
+        )
-            dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True),
+        logger.info("PaddleOCR model loaded — running warmup...")
-            # PaddleOCR 2.8+ (en, no version)
+        # Warmup with tiny image to trigger any lazy init
-            dict(lang="en", use_angle_cls=True, show_log=False),
+        dummy = np.ones((30, 100, 3), dtype=np.uint8) * 255
-        ]
+        _engine.ocr(dummy)
        for i, kwargs in enumerate(inits):
            try:
                _engine = PaddleOCR(**kwargs)
                logger.info(f"PaddleOCR init succeeded with strategy {i}: {kwargs}")
                break
            except Exception as e:
                logger.info(f"PaddleOCR init strategy {i} failed: {e}")
        else:
            raise RuntimeError("All PaddleOCR init strategies failed")
        _ready = True
-        logger.info("PaddleOCR model loaded successfully — ready to serve")
+        logger.info("PaddleOCR ready to serve")
    except Exception as e:
-        logger.error(f"Failed to load PaddleOCR model: {e}")
+        logger.error(f"Failed to load PaddleOCR: {e}", exc_info=True)
@app.on_event("startup")
@@ -72,15 +50,14 @@ def startup_load_model():
    """Start model loading in background so health check passes immediately."""
    global _loading
    _loading = True
-    thread = threading.Thread(target=_load_model, daemon=True)
+    threading.Thread(target=_load_model, daemon=True).start()
    thread.start()
    logger.info("Model loading started in background thread")
@app.get("/health")
 def health():
    if _ready:
-        return {"status": "ok", "model": "PP-OCRv5-latin"}
+        return {"status": "ok", "model": "PP-OCRv4"}
    if _loading:
        return {"status": "loading"}
    return {"status": "error"}
@@ -105,57 +82,26 @@ async def ocr(
        result = _engine.ocr(img_np)
    except Exception as e:
        logger.error(f"OCR failed: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}")
+        raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
-    if not result:
+    if not result or not result[0]:
        return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
    # PaddleOCR 2.x returns: [[line, ...]] where line = [box, (text, conf)]
    # PaddleOCR 3.x returns: [{'text': ..., 'boxes': [...], 'rec_scores': ...}] or similar
    words = []
-    try:
+    for line in result[0]:
-        lines = result[0] if isinstance(result, list) and result else result
+        box, (text, conf) = line[0], line[1]
-        if not lines:
+        x_min = min(p[0] for p in box)
-            return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
+        y_min = min(p[1] for p in box)
-
+        x_max = max(p[0] for p in box)
-        for line in lines:
+        y_max = max(p[1] for p in box)
-            if isinstance(line, dict):
+        words.append({
-                # PaddleOCR 3.x dict format
+            "text": str(text).strip(),
-                text = str(line.get("text", line.get("rec_text", ""))).strip()
+            "left": int(x_min),
-                conf = float(line.get("score", line.get("rec_score", 0)))
+            "top": int(y_min),
-                box = line.get("boxes", line.get("dt_polys", []))
+            "width": int(x_max - x_min),
-                if not text or not box:
+            "height": int(y_max - y_min),
-                    continue
+            "conf": round(float(conf) * 100, 1),
-                # box might be [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] or flat
+        })
                if isinstance(box[0], (list, tuple)):
                    x_min = min(p[0] for p in box)
                    y_min = min(p[1] for p in box)
                    x_max = max(p[0] for p in box)
                    y_max = max(p[1] for p in box)
                else:
                    x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
                words.append({
                    "text": text,
                    "left": int(x_min), "top": int(y_min),
                    "width": int(x_max - x_min), "height": int(y_max - y_min),
                    "conf": round(conf * 100 if conf <= 1 else conf, 1),
                })
            elif isinstance(line, (list, tuple)) and len(line) == 2:
                # PaddleOCR 2.x format: [box, (text, conf)]
                box, (text, conf) = line[0], line[1]
                x_min = min(p[0] for p in box)
                y_min = min(p[1] for p in box)
                x_max = max(p[0] for p in box)
                y_max = max(p[1] for p in box)
                words.append({
                    "text": str(text).strip(),
                    "left": int(x_min), "top": int(y_min),
                    "width": int(x_max - x_min), "height": int(y_max - y_min),
                    "conf": round(float(conf) * 100 if conf <= 1 else float(conf), 1),
                })
    except Exception as e:
        logger.error(f"Failed to parse OCR result: {e}. Raw: {str(result)[:500]}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"OCR result parsing failed: {e}")
    return {
        "words": words,
@@ -1,5 +1,5 @@
-paddlepaddle>=3.0.0
+paddlepaddle>=2.6.0,<3.0.0
-paddleocr>=2.9.0
+paddleocr>=2.7.0,<3.0.0
 fastapi>=0.110.0
 uvicorn>=0.25.0
 python-multipart>=0.0.6