fix: disable oneDNN and support PaddleOCR 3.x format
Some checks failed
Deploy to Coolify / deploy (push) Failing after 2s

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-13 18:54:28 +01:00
parent 52618a0630
commit 2ac6559291

View File

@@ -5,6 +5,11 @@ import logging
import os import os
import threading import threading
# Disable oneDNN/MKLDNN before importing paddle — avoids
# ConvertPirAttribute2RuntimeAttribute errors on PaddlePaddle 3.x
os.environ["FLAGS_use_mkldnn"] = "0"
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "1"
import numpy as np import numpy as np
from fastapi import FastAPI, File, Header, HTTPException, UploadFile from fastapi import FastAPI, File, Header, HTTPException, UploadFile
from PIL import Image from PIL import Image
@@ -30,12 +35,10 @@ def _load_model():
logger.info("Import done. Loading PaddleOCR model...") logger.info("Import done. Loading PaddleOCR model...")
# Try multiple init strategies for different PaddleOCR versions # Try multiple init strategies for different PaddleOCR versions
inits = [ inits = [
# PaddleOCR 3.x (no show_log) # PaddleOCR 3.x — use_textline_orientation replaces use_angle_cls
dict(lang="en", ocr_version="PP-OCRv5", use_textline_orientation=True),
# PaddleOCR 3.x with deprecated param
dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True), dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True),
# PaddleOCR 3.x with show_log
dict(lang="en", ocr_version="PP-OCRv5", use_angle_cls=True, show_log=False),
# PaddleOCR 2.8+ (latin)
dict(lang="latin", use_angle_cls=True, show_log=False),
# PaddleOCR 2.8+ (en, no version) # PaddleOCR 2.8+ (en, no version)
dict(lang="en", use_angle_cls=True, show_log=False), dict(lang="en", use_angle_cls=True, show_log=False),
] ]
@@ -94,26 +97,55 @@ async def ocr(
logger.error(f"OCR failed: {e}", exc_info=True) logger.error(f"OCR failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}") raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}")
if not result or not result[0]: if not result:
return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]} return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
# PaddleOCR 2.x returns: [[line, ...]] where line = [box, (text, conf)]
# PaddleOCR 3.x returns: [{'text': ..., 'boxes': [...], 'rec_scores': ...}] or similar
words = [] words = []
for line in result[0]: try:
box, (text, conf) = line[0], line[1] lines = result[0] if isinstance(result, list) and result else result
x_min = min(p[0] for p in box) if not lines:
y_min = min(p[1] for p in box) return {"words": [], "image_width": img_np.shape[1], "image_height": img_np.shape[0]}
x_max = max(p[0] for p in box)
y_max = max(p[1] for p in box) for line in lines:
words.append( if isinstance(line, dict):
{ # PaddleOCR 3.x dict format
"text": text.strip(), text = str(line.get("text", line.get("rec_text", ""))).strip()
"left": int(x_min), conf = float(line.get("score", line.get("rec_score", 0)))
"top": int(y_min), box = line.get("boxes", line.get("dt_polys", []))
"width": int(x_max - x_min), if not text or not box:
"height": int(y_max - y_min), continue
"conf": round(conf * 100, 1), # box might be [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] or flat
} if isinstance(box[0], (list, tuple)):
) x_min = min(p[0] for p in box)
y_min = min(p[1] for p in box)
x_max = max(p[0] for p in box)
y_max = max(p[1] for p in box)
else:
x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
words.append({
"text": text,
"left": int(x_min), "top": int(y_min),
"width": int(x_max - x_min), "height": int(y_max - y_min),
"conf": round(conf * 100 if conf <= 1 else conf, 1),
})
elif isinstance(line, (list, tuple)) and len(line) == 2:
# PaddleOCR 2.x format: [box, (text, conf)]
box, (text, conf) = line[0], line[1]
x_min = min(p[0] for p in box)
y_min = min(p[1] for p in box)
x_max = max(p[0] for p in box)
y_max = max(p[1] for p in box)
words.append({
"text": str(text).strip(),
"left": int(x_min), "top": int(y_min),
"width": int(x_max - x_min), "height": int(y_max - y_min),
"conf": round(float(conf) * 100 if conf <= 1 else float(conf), 1),
})
except Exception as e:
logger.error(f"Failed to parse OCR result: {e}. Raw: {str(result)[:500]}", exc_info=True)
raise HTTPException(status_code=500, detail=f"OCR result parsing failed: {e}")
return { return {
"words": words, "words": words,