From 96f94475f6115377c8bc78aafd120a4ba9b7a941 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 13 Mar 2026 19:13:33 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20downgrade=20to=20PaddleOCR=202.x=20?= =?UTF-8?q?=E2=80=94=203.x=20uses=20too=20much=20RAM=20on=20CPU?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PaddlePaddle 3.x + PP-OCRv5 requires >6GB RAM and has oneDNN compatibility issues on CPU. PaddleOCR 2.x with PP-OCRv4 works reliably with ~2-3GB RAM and has no MKLDNN issues. - Pin paddlepaddle<3.0.0 and paddleocr<3.0.0 - Simplify main.py — single init strategy, direct 2.x result format - Re-enable warmup (fits in memory with 2.x) Co-Authored-By: Claude Opus 4.6 --- paddleocr-service/main.py | 120 ++++++++--------------------- paddleocr-service/requirements.txt | 4 +- 2 files changed, 35 insertions(+), 89 deletions(-) diff --git a/paddleocr-service/main.py b/paddleocr-service/main.py index 6e2778e..ead4dc0 100644 --- a/paddleocr-service/main.py +++ b/paddleocr-service/main.py @@ -1,23 +1,10 @@ -"""PaddleOCR Remote Service — PP-OCRv5 Latin auf x86_64.""" +"""PaddleOCR Remote Service — PP-OCRv4 on x86_64 (CPU).""" import io import logging import os import threading -# Disable oneDNN/MKLDNN before importing paddle — avoids -# ConvertPirAttribute2RuntimeAttribute errors on PaddlePaddle 3.x -os.environ["FLAGS_use_mkldnn"] = "0" -os.environ["FLAGS_use_onednn"] = "0" -os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "1" - -# Must set paddle flags before import -try: - import paddle - paddle.set_flags({"FLAGS_use_mkldnn": False}) -except Exception: - pass - import numpy as np from fastapi import FastAPI, File, Header, HTTPException, UploadFile from PIL import Image @@ -40,31 +27,22 @@ def _load_model(): logger.info("Importing paddleocr...") from paddleocr import PaddleOCR - logger.info("Import done. Loading PaddleOCR model...") - # Try multiple init strategies for different PaddleOCR versions - inits = [ - # PaddleOCR 3.x — disable MKLDNN via enable_mkldnn=False - dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True, enable_mkldnn=False), - # PaddleOCR 3.x without enable_mkldnn param - dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True), - # PaddleOCR 3.x with deprecated param - dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True), - # PaddleOCR 2.8+ (en, no version) - dict(lang="en", use_angle_cls=True, show_log=False), - ] - for i, kwargs in enumerate(inits): - try: - _engine = PaddleOCR(**kwargs) - logger.info(f"PaddleOCR init succeeded with strategy {i}: {kwargs}") - break - except Exception as e: - logger.info(f"PaddleOCR init strategy {i} failed: {e}") - else: - raise RuntimeError("All PaddleOCR init strategies failed") + logger.info("Loading PaddleOCR model (PP-OCRv4, lang=en)...") + _engine = PaddleOCR( + lang="en", + use_angle_cls=True, + show_log=False, + enable_mkldnn=False, + use_gpu=False, + ) + logger.info("PaddleOCR model loaded — running warmup...") + # Warmup with tiny image to trigger any lazy init + dummy = np.ones((30, 100, 3), dtype=np.uint8) * 255 + _engine.ocr(dummy) _ready = True - logger.info("PaddleOCR model loaded successfully — ready to serve") + logger.info("PaddleOCR ready to serve") except Exception as e: - logger.error(f"Failed to load PaddleOCR model: {e}") + logger.error(f"Failed to load PaddleOCR: {e}", exc_info=True) @app.on_event("startup") @@ -72,15 +50,14 @@ def startup_load_model(): """Start model loading in background so health check passes immediately.""" global _loading _loading = True - thread = threading.Thread(target=_load_model, daemon=True) - thread.start() + threading.Thread(target=_load_model, daemon=True).start() logger.info("Model loading started in background thread") @app.get("/health") def health(): if _ready: - return {"status": "ok", "model": "PP-OCRv5-latin"} + return {"status": "ok", "model": "PP-OCRv4"} if _loading: return {"status": "loading"} return {"status": "error"} @@ -105,57 +82,26 @@ async def ocr( result = _engine.ocr(img_np) except Exception as e: logger.error(f"OCR failed: {e}", exc_info=True) - raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}") + raise HTTPException(status_code=500, detail=f"OCR failed: {e}") - if not result: + if not result or not result[0]: return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]} - # PaddleOCR 2.x returns: [[line, ...]] where line = [box, (text, conf)] - # PaddleOCR 3.x returns: [{'text': ..., 'boxes': [...], 'rec_scores': ...}] or similar words = [] - try: - lines = result[0] if isinstance(result, list) and result else result - if not lines: - return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]} - - for line in lines: - if isinstance(line, dict): - # PaddleOCR 3.x dict format - text = str(line.get("text", line.get("rec_text", ""))).strip() - conf = float(line.get("score", line.get("rec_score", 0))) - box = line.get("boxes", line.get("dt_polys", [])) - if not text or not box: - continue - # box might be [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] or flat - if isinstance(box[0], (list, tuple)): - x_min = min(p[0] for p in box) - y_min = min(p[1] for p in box) - x_max = max(p[0] for p in box) - y_max = max(p[1] for p in box) - else: - x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3] - words.append({ - "text": text, - "left": int(x_min), "top": int(y_min), - "width": int(x_max - x_min), "height": int(y_max - y_min), - "conf": round(conf * 100 if conf <= 1 else conf, 1), - }) - elif isinstance(line, (list, tuple)) and len(line) == 2: - # PaddleOCR 2.x format: [box, (text, conf)] - box, (text, conf) = line[0], line[1] - x_min = min(p[0] for p in box) - y_min = min(p[1] for p in box) - x_max = max(p[0] for p in box) - y_max = max(p[1] for p in box) - words.append({ - "text": str(text).strip(), - "left": int(x_min), "top": int(y_min), - "width": int(x_max - x_min), "height": int(y_max - y_min), - "conf": round(float(conf) * 100 if conf <= 1 else float(conf), 1), - }) - except Exception as e: - logger.error(f"Failed to parse OCR result: {e}. Raw: {str(result)[:500]}", exc_info=True) - raise HTTPException(status_code=500, detail=f"OCR result parsing failed: {e}") + for line in result[0]: + box, (text, conf) = line[0], line[1] + x_min = min(p[0] for p in box) + y_min = min(p[1] for p in box) + x_max = max(p[0] for p in box) + y_max = max(p[1] for p in box) + words.append({ + "text": str(text).strip(), + "left": int(x_min), + "top": int(y_min), + "width": int(x_max - x_min), + "height": int(y_max - y_min), + "conf": round(float(conf) * 100, 1), + }) return { "words": words, diff --git a/paddleocr-service/requirements.txt b/paddleocr-service/requirements.txt index 47f8951..c25eb33 100644 --- a/paddleocr-service/requirements.txt +++ b/paddleocr-service/requirements.txt @@ -1,5 +1,5 @@ -paddlepaddle>=3.0.0 -paddleocr>=2.9.0 +paddlepaddle>=2.6.0,<3.0.0 +paddleocr>=2.7.0,<3.0.0 fastapi>=0.110.0 uvicorn>=0.25.0 python-multipart>=0.0.6