fix: downgrade to PaddleOCR 2.x — 3.x uses too much RAM on CPU
All checks were successful
CI / go-lint (push) Has been skipped
CI / test-go-consent (push) Successful in 33s
CI / test-python-voice (push) Successful in 31s
CI / test-bqas (push) Successful in 34s
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / Deploy (push) Successful in 2s

PaddlePaddle 3.x + PP-OCRv5 requires >6GB RAM and has oneDNN
compatibility issues on CPU. PaddleOCR 2.x with PP-OCRv4 works
reliably with ~2-3GB RAM and has no MKLDNN issues.

- Pin paddlepaddle<3.0.0 and paddleocr<3.0.0
- Simplify main.py — single init strategy, direct 2.x result format
- Re-enable warmup (fits in memory with 2.x)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-13 19:13:33 +01:00
parent 3fd3336f6c
commit 96f94475f6
2 changed files with 35 additions and 89 deletions

View File

@@ -1,23 +1,10 @@
"""PaddleOCR Remote Service — PP-OCRv5 Latin auf x86_64.""" """PaddleOCR Remote Service — PP-OCRv4 on x86_64 (CPU)."""
import io import io
import logging import logging
import os import os
import threading import threading
# Disable oneDNN/MKLDNN before importing paddle — avoids
# ConvertPirAttribute2RuntimeAttribute errors on PaddlePaddle 3.x
os.environ["FLAGS_use_mkldnn"] = "0"
os.environ["FLAGS_use_onednn"] = "0"
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "1"
# Must set paddle flags before import
try:
import paddle
paddle.set_flags({"FLAGS_use_mkldnn": False})
except Exception:
pass
import numpy as np import numpy as np
from fastapi import FastAPI, File, Header, HTTPException, UploadFile from fastapi import FastAPI, File, Header, HTTPException, UploadFile
from PIL import Image from PIL import Image
@@ -40,31 +27,22 @@ def _load_model():
logger.info("Importing paddleocr...") logger.info("Importing paddleocr...")
from paddleocr import PaddleOCR from paddleocr import PaddleOCR
logger.info("Import done. Loading PaddleOCR model...") logger.info("Loading PaddleOCR model (PP-OCRv4, lang=en)...")
# Try multiple init strategies for different PaddleOCR versions _engine = PaddleOCR(
inits = [ lang="en",
# PaddleOCR 3.x — disable MKLDNN via enable_mkldnn=False use_angle_cls=True,
dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True, enable_mkldnn=False), show_log=False,
# PaddleOCR 3.x without enable_mkldnn param enable_mkldnn=False,
dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True), use_gpu=False,
# PaddleOCR 3.x with deprecated param )
dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True), logger.info("PaddleOCR model loaded — running warmup...")
# PaddleOCR 2.8+ (en, no version) # Warmup with tiny image to trigger any lazy init
dict(lang="en", use_angle_cls=True, show_log=False), dummy = np.ones((30, 100, 3), dtype=np.uint8) * 255
] _engine.ocr(dummy)
for i, kwargs in enumerate(inits):
try:
_engine = PaddleOCR(**kwargs)
logger.info(f"PaddleOCR init succeeded with strategy {i}: {kwargs}")
break
except Exception as e:
logger.info(f"PaddleOCR init strategy {i} failed: {e}")
else:
raise RuntimeError("All PaddleOCR init strategies failed")
_ready = True _ready = True
logger.info("PaddleOCR model loaded successfully — ready to serve") logger.info("PaddleOCR ready to serve")
except Exception as e: except Exception as e:
logger.error(f"Failed to load PaddleOCR model: {e}") logger.error(f"Failed to load PaddleOCR: {e}", exc_info=True)
@app.on_event("startup") @app.on_event("startup")
@@ -72,15 +50,14 @@ def startup_load_model():
"""Start model loading in background so health check passes immediately.""" """Start model loading in background so health check passes immediately."""
global _loading global _loading
_loading = True _loading = True
thread = threading.Thread(target=_load_model, daemon=True) threading.Thread(target=_load_model, daemon=True).start()
thread.start()
logger.info("Model loading started in background thread") logger.info("Model loading started in background thread")
@app.get("/health") @app.get("/health")
def health(): def health():
if _ready: if _ready:
return {"status": "ok", "model": "PP-OCRv5-latin"} return {"status": "ok", "model": "PP-OCRv4"}
if _loading: if _loading:
return {"status": "loading"} return {"status": "loading"}
return {"status": "error"} return {"status": "error"}
@@ -105,57 +82,26 @@ async def ocr(
result = _engine.ocr(img_np) result = _engine.ocr(img_np)
except Exception as e: except Exception as e:
logger.error(f"OCR failed: {e}", exc_info=True) logger.error(f"OCR failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}") raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
if not result: if not result or not result[0]:
return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]} return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
# PaddleOCR 2.x returns: [[line, ...]] where line = [box, (text, conf)]
# PaddleOCR 3.x returns: [{'text': ..., 'boxes': [...], 'rec_scores': ...}] or similar
words = [] words = []
try: for line in result[0]:
lines = result[0] if isinstance(result, list) and result else result box, (text, conf) = line[0], line[1]
if not lines: x_min = min(p[0] for p in box)
return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]} y_min = min(p[1] for p in box)
x_max = max(p[0] for p in box)
for line in lines: y_max = max(p[1] for p in box)
if isinstance(line, dict): words.append({
# PaddleOCR 3.x dict format "text": str(text).strip(),
text = str(line.get("text", line.get("rec_text", ""))).strip() "left": int(x_min),
conf = float(line.get("score", line.get("rec_score", 0))) "top": int(y_min),
box = line.get("boxes", line.get("dt_polys", [])) "width": int(x_max - x_min),
if not text or not box: "height": int(y_max - y_min),
continue "conf": round(float(conf) * 100, 1),
# box might be [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] or flat })
if isinstance(box[0], (list, tuple)):
x_min = min(p[0] for p in box)
y_min = min(p[1] for p in box)
x_max = max(p[0] for p in box)
y_max = max(p[1] for p in box)
else:
x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
words.append({
"text": text,
"left": int(x_min), "top": int(y_min),
"width": int(x_max - x_min), "height": int(y_max - y_min),
"conf": round(conf * 100 if conf <= 1 else conf, 1),
})
elif isinstance(line, (list, tuple)) and len(line) == 2:
# PaddleOCR 2.x format: [box, (text, conf)]
box, (text, conf) = line[0], line[1]
x_min = min(p[0] for p in box)
y_min = min(p[1] for p in box)
x_max = max(p[0] for p in box)
y_max = max(p[1] for p in box)
words.append({
"text": str(text).strip(),
"left": int(x_min), "top": int(y_min),
"width": int(x_max - x_min), "height": int(y_max - y_min),
"conf": round(float(conf) * 100 if conf <= 1 else float(conf), 1),
})
except Exception as e:
logger.error(f"Failed to parse OCR result: {e}. Raw: {str(result)[:500]}", exc_info=True)
raise HTTPException(status_code=500, detail=f"OCR result parsing failed: {e}")
return { return {
"words": words, "words": words,

View File

@@ -1,5 +1,5 @@
paddlepaddle>=3.0.0 paddlepaddle>=2.6.0,<3.0.0
paddleocr>=2.9.0 paddleocr>=2.7.0,<3.0.0
fastapi>=0.110.0 fastapi>=0.110.0
uvicorn>=0.25.0 uvicorn>=0.25.0
python-multipart>=0.0.6 python-multipart>=0.0.6