From 8003dcac39f129d25f653c3929e14eebd6626117 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 13 Mar 2026 09:54:52 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20PaddleOCR=203.4.0=20compatibility=20?= =?UTF-8?q?=E2=80=94=20use=20lang=3Den=20with=20PP-OCRv5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PaddleOCR 3.4.0 removed 'latin' language support, causing ValueError at startup. Now uses lang='en' with ocr_version='PP-OCRv5' and falls back to lang='latin' for older PaddleOCR versions. Co-Authored-By: Claude Opus 4.6 --- paddleocr-service/main.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/paddleocr-service/main.py b/paddleocr-service/main.py index f66aef7..94c0923 100644 --- a/paddleocr-service/main.py +++ b/paddleocr-service/main.py @@ -1,4 +1,4 @@ -"""PaddleOCR Remote Service — PP-OCRv4 Latin auf x86_64.""" +"""PaddleOCR Remote Service — PP-OCRv5 Latin auf x86_64.""" import io import logging @@ -24,11 +24,20 @@ def get_engine(): from paddleocr import PaddleOCR logger.info("Loading PaddleOCR model...") - _engine = PaddleOCR( - lang="latin", - use_angle_cls=True, - show_log=False, - ) + # PaddleOCR >= 3.x: use ocr_version param; fallback for older API + try: + _engine = PaddleOCR( + lang="en", + ocr_version="PP-OCRv5", + use_angle_cls=True, + show_log=False, + ) + except (ValueError, TypeError): + _engine = PaddleOCR( + lang="latin", + use_angle_cls=True, + show_log=False, + ) logger.info("PaddleOCR model loaded successfully") return _engine @@ -48,7 +57,7 @@ def startup_load_model(): @app.get("/health") def health(): if _ready: - return {"status": "ok", "model": "PP-OCRv4-latin"} + return {"status": "ok", "model": "PP-OCRv5-latin"} return {"status": "loading"}