fix: disable oneDNN/MKLDNN and support PaddleOCR 3.x result format
All checks were successful
CI / test-go-consent (push) Successful in 31s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-voice (push) Successful in 1m19s
CI / test-bqas (push) Successful in 32s
CI / Deploy (push) Successful in 2s

- Set FLAGS_use_mkldnn=0 before paddle import to avoid
  ConvertPirAttribute2RuntimeAttribute error
- Support both PaddleOCR 2.x (list) and 3.x (dict) result formats
- Use use_textline_orientation (3.x) instead of use_angle_cls
- Remove latin lang fallback (not supported in 3.x)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-13 18:52:31 +01:00
parent ed2cc234b8
commit eaba087d11

View File

@@ -5,6 +5,11 @@ import logging
import os import os
import threading import threading
# Disable oneDNN/MKLDNN before importing paddle — avoids
# ConvertPirAttribute2RuntimeAttribute errors on PaddlePaddle 3.x
os.environ["FLAGS_use_mkldnn"] = "0"
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "1"
import numpy as np import numpy as np
from fastapi import FastAPI, File, Header, HTTPException, UploadFile from fastapi import FastAPI, File, Header, HTTPException, UploadFile
from PIL import Image from PIL import Image
@@ -30,12 +35,10 @@ def _load_model():
logger.info("Import done. Loading PaddleOCR model...") logger.info("Import done. Loading PaddleOCR model...")
# Try multiple init strategies for different PaddleOCR versions # Try multiple init strategies for different PaddleOCR versions
inits = [ inits = [
# PaddleOCR 3.x (no show_log) # PaddleOCR 3.x — use_textline_orientation replaces use_angle_cls
dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True),
# PaddleOCR 3.x with deprecated param
dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True), dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True),
# PaddleOCR 3.x with show_log
dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True, show_log=False),
# PaddleOCR 2.8+ (latin)
dict(lang="latin", use_angle_cls=True, show_log=False),
# PaddleOCR 2.8+ (en, no version) # PaddleOCR 2.8+ (en, no version)
dict(lang="en", use_angle_cls=True, show_log=False), dict(lang="en", use_angle_cls=True, show_log=False),
] ]
@@ -94,26 +97,55 @@ async def ocr(
logger.error(f"OCR failed: {e}", exc_info=True) logger.error(f"OCR failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}") raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}")
if not result or not result[0]: if not result:
return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]} return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
# PaddleOCR 2.x returns: [[line, ...]] where line = [box, (text, conf)]
# PaddleOCR 3.x returns: [{'text': ..., 'boxes': [...], 'rec_scores': ...}] or similar
words = [] words = []
for line in result[0]: try:
box, (text, conf) = line[0], line[1] lines = result[0] if isinstance(result, list) and result else result
x_min = min(p[0] for p in box) if not lines:
y_min = min(p[1] for p in box) return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
x_max = max(p[0] for p in box)
y_max = max(p[1] for p in box) for line in lines:
words.append( if isinstance(line, dict):
{ # PaddleOCR 3.x dict format
"text": text.strip(), text = str(line.get("text", line.get("rec_text", ""))).strip()
"left": int(x_min), conf = float(line.get("score", line.get("rec_score", 0)))
"top": int(y_min), box = line.get("boxes", line.get("dt_polys", []))
"width": int(x_max - x_min), if not text or not box:
"height": int(y_max - y_min), continue
"conf": round(conf * 100, 1), # box might be [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] or flat
} if isinstance(box[0], (list, tuple)):
) x_min = min(p[0] for p in box)
y_min = min(p[1] for p in box)
x_max = max(p[0] for p in box)
y_max = max(p[1] for p in box)
else:
x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
words.append({
"text": text,
"left": int(x_min), "top": int(y_min),
"width": int(x_max - x_min), "height": int(y_max - y_min),
"conf": round(conf * 100 if conf <= 1 else conf, 1),
})
elif isinstance(line, (list, tuple)) and len(line) == 2:
# PaddleOCR 2.x format: [box, (text, conf)]
box, (text, conf) = line[0], line[1]
x_min = min(p[0] for p in box)
y_min = min(p[1] for p in box)
x_max = max(p[0] for p in box)
y_max = max(p[1] for p in box)
words.append({
"text": str(text).strip(),
"left": int(x_min), "top": int(y_min),
"width": int(x_max - x_min), "height": int(y_max - y_min),
"conf": round(float(conf) * 100 if conf <= 1 else float(conf), 1),
})
except Exception as e:
logger.error(f"Failed to parse OCR result: {e}. Raw: {str(result)[:500]}", exc_info=True)
raise HTTPException(status_code=500, detail=f"OCR result parsing failed: {e}")
return { return {
"words": words, "words": words,