From 96f94475f6115377c8bc78aafd120a4ba9b7a941 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBookPro.fritz.box>
Date: Fri, 13 Mar 2026 19:13:33 +0100
Subject: [PATCH] =?UTF-8?q?fix:=20downgrade=20to=20PaddleOCR=202.x=20?=
 =?UTF-8?q?=E2=80=94=203.x=20uses=20too=20much=20RAM=20on=20CPU?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PaddlePaddle 3.x + PP-OCRv5 requires >6GB RAM and has oneDNN
compatibility issues on CPU. PaddleOCR 2.x with PP-OCRv4 works
reliably with ~2-3GB RAM and has no MKLDNN issues.

- Pin paddlepaddle<3.0.0 and paddleocr<3.0.0
- Simplify main.py — single init strategy, direct 2.x result format
- Re-enable warmup (fits in memory with 2.x)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 paddleocr-service/main.py          | 120 ++++++++---------------------
 paddleocr-service/requirements.txt |   4 +-
 2 files changed, 35 insertions(+), 89 deletions(-)

diff --git a/paddleocr-service/main.py b/paddleocr-service/main.py
index 6e2778e..ead4dc0 100644
--- a/paddleocr-service/main.py
+++ b/paddleocr-service/main.py
@@ -1,23 +1,10 @@
-"""PaddleOCR Remote Service — PP-OCRv5 Latin auf x86_64."""
+"""PaddleOCR Remote Service — PP-OCRv4 on x86_64 (CPU)."""
 
 import io
 import logging
 import os
 import threading
 
-# Disable oneDNN/MKLDNN before importing paddle — avoids
-# ConvertPirAttribute2RuntimeAttribute errors on PaddlePaddle 3.x
-os.environ["FLAGS_use_mkldnn"] = "0"
-os.environ["FLAGS_use_onednn"] = "0"
-os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "1"
-
-# Must set paddle flags before import
-try:
-    import paddle
-    paddle.set_flags({"FLAGS_use_mkldnn": False})
-except Exception:
-    pass
-
 import numpy as np
 from fastapi import FastAPI, File, Header, HTTPException, UploadFile
 from PIL import Image
@@ -40,31 +27,22 @@ def _load_model():
         logger.info("Importing paddleocr...")
         from paddleocr import PaddleOCR
 
-        logger.info("Import done. Loading PaddleOCR model...")
-        # Try multiple init strategies for different PaddleOCR versions
-        inits = [
-            # PaddleOCR 3.x — disable MKLDNN via enable_mkldnn=False
-            dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True, enable_mkldnn=False),
-            # PaddleOCR 3.x without enable_mkldnn param
-            dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True),
-            # PaddleOCR 3.x with deprecated param
-            dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True),
-            # PaddleOCR 2.8+ (en, no version)
-            dict(lang="en", use_angle_cls=True, show_log=False),
-        ]
-        for i, kwargs in enumerate(inits):
-            try:
-                _engine = PaddleOCR(**kwargs)
-                logger.info(f"PaddleOCR init succeeded with strategy {i}: {kwargs}")
-                break
-            except Exception as e:
-                logger.info(f"PaddleOCR init strategy {i} failed: {e}")
-        else:
-            raise RuntimeError("All PaddleOCR init strategies failed")
+        logger.info("Loading PaddleOCR model (PP-OCRv4, lang=en)...")
+        _engine = PaddleOCR(
+            lang="en",
+            use_angle_cls=True,
+            show_log=False,
+            enable_mkldnn=False,
+            use_gpu=False,
+        )
+        logger.info("PaddleOCR model loaded — running warmup...")
+        # Warmup with tiny image to trigger any lazy init
+        dummy = np.ones((30, 100, 3), dtype=np.uint8) * 255
+        _engine.ocr(dummy)
         _ready = True
-        logger.info("PaddleOCR model loaded successfully — ready to serve")
+        logger.info("PaddleOCR ready to serve")
     except Exception as e:
-        logger.error(f"Failed to load PaddleOCR model: {e}")
+        logger.error(f"Failed to load PaddleOCR: {e}", exc_info=True)
 
 
 @app.on_event("startup")
@@ -72,15 +50,14 @@ def startup_load_model():
     """Start model loading in background so health check passes immediately."""
     global _loading
     _loading = True
-    thread = threading.Thread(target=_load_model, daemon=True)
-    thread.start()
+    threading.Thread(target=_load_model, daemon=True).start()
     logger.info("Model loading started in background thread")
 
 
 @app.get("/health")
 def health():
     if _ready:
-        return {"status": "ok", "model": "PP-OCRv5-latin"}
+        return {"status": "ok", "model": "PP-OCRv4"}
     if _loading:
         return {"status": "loading"}
     return {"status": "error"}
@@ -105,57 +82,26 @@ async def ocr(
         result = _engine.ocr(img_np)
     except Exception as e:
         logger.error(f"OCR failed: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}")
+        raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
 
-    if not result:
+    if not result or not result[0]:
         return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
 
-    # PaddleOCR 2.x returns: [[line, ...]] where line = [box, (text, conf)]
-    # PaddleOCR 3.x returns: [{'text': ..., 'boxes': [...], 'rec_scores': ...}] or similar
     words = []
-    try:
-        lines = result[0] if isinstance(result, list) and result else result
-        if not lines:
-            return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
-
-        for line in lines:
-            if isinstance(line, dict):
-                # PaddleOCR 3.x dict format
-                text = str(line.get("text", line.get("rec_text", ""))).strip()
-                conf = float(line.get("score", line.get("rec_score", 0)))
-                box = line.get("boxes", line.get("dt_polys", []))
-                if not text or not box:
-                    continue
-                # box might be [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] or flat
-                if isinstance(box[0], (list, tuple)):
-                    x_min = min(p[0] for p in box)
-                    y_min = min(p[1] for p in box)
-                    x_max = max(p[0] for p in box)
-                    y_max = max(p[1] for p in box)
-                else:
-                    x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
-                words.append({
-                    "text": text,
-                    "left": int(x_min), "top": int(y_min),
-                    "width": int(x_max - x_min), "height": int(y_max - y_min),
-                    "conf": round(conf * 100 if conf <= 1 else conf, 1),
-                })
-            elif isinstance(line, (list, tuple)) and len(line) == 2:
-                # PaddleOCR 2.x format: [box, (text, conf)]
-                box, (text, conf) = line[0], line[1]
-                x_min = min(p[0] for p in box)
-                y_min = min(p[1] for p in box)
-                x_max = max(p[0] for p in box)
-                y_max = max(p[1] for p in box)
-                words.append({
-                    "text": str(text).strip(),
-                    "left": int(x_min), "top": int(y_min),
-                    "width": int(x_max - x_min), "height": int(y_max - y_min),
-                    "conf": round(float(conf) * 100 if conf <= 1 else float(conf), 1),
-                })
-    except Exception as e:
-        logger.error(f"Failed to parse OCR result: {e}. Raw: {str(result)[:500]}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"OCR result parsing failed: {e}")
+    for line in result[0]:
+        box, (text, conf) = line[0], line[1]
+        x_min = min(p[0] for p in box)
+        y_min = min(p[1] for p in box)
+        x_max = max(p[0] for p in box)
+        y_max = max(p[1] for p in box)
+        words.append({
+            "text": str(text).strip(),
+            "left": int(x_min),
+            "top": int(y_min),
+            "width": int(x_max - x_min),
+            "height": int(y_max - y_min),
+            "conf": round(float(conf) * 100, 1),
+        })
 
     return {
         "words": words,
diff --git a/paddleocr-service/requirements.txt b/paddleocr-service/requirements.txt
index 47f8951..c25eb33 100644
--- a/paddleocr-service/requirements.txt
+++ b/paddleocr-service/requirements.txt
@@ -1,5 +1,5 @@
-paddlepaddle>=3.0.0
-paddleocr>=2.9.0
+paddlepaddle>=2.6.0,<3.0.0
+paddleocr>=2.7.0,<3.0.0
 fastapi>=0.110.0
 uvicorn>=0.25.0
 python-multipart>=0.0.6