fix: PaddleOCR v3 API — explicit model name + predict() statt ocr()

lang="latin" braucht text_recognition_model_name in PP-OCRv5. Neue API nutzt predict() statt ocr(), Ergebnis-Format angepasst. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 12:47:07 +01:00
parent 8f5f9641c7
commit 992d4f2a6b
1 changed files with 28 additions and 19 deletions
@@ -19,8 +19,11 @@ def get_engine():
        from paddleocr import PaddleOCR

        _engine = PaddleOCR(
-            lang="latin",
-            use_angle_cls=True,
+            lang="en",
+            text_recognition_model_name="latin_PP-OCRv5_mobile_rec",
+            use_doc_orientation_classify=False,
+            use_doc_unwarping=False,
+            use_textline_orientation=False,
            show_log=False,
        )
    return _engine
@@ -44,23 +47,29 @@ async def ocr(
    img_np = np.array(img)

    engine = get_engine()
-    result = engine.ocr(img_np)
+    result = engine.predict(img_np)

    words = []
-    for line in result[0] or []:
-        box, (text, conf) = line[0], line[1]
-        x_min = min(p[0] for p in box)
-        y_min = min(p[1] for p in box)
-        x_max = max(p[0] for p in box)
-        y_max = max(p[1] for p in box)
+    for item in result:
+        rec_texts = item.get("rec_texts", [])
+        rec_scores = item.get("rec_scores", [])
+        dt_polys = item.get("dt_polys", [])
+
+        for text, score, poly in zip(rec_texts, rec_scores, dt_polys):
+            if not text or not text.strip():
+                continue
+            xs = [p[0] for p in poly]
+            ys = [p[1] for p in poly]
+            x_min, x_max = min(xs), max(xs)
+            y_min, y_max = min(ys), max(ys)
            words.append(
                {
-                "text": text,
+                    "text": text.strip(),
                    "left": int(x_min),
                    "top": int(y_min),
                    "width": int(x_max - x_min),
                    "height": int(y_max - y_min),
-                "conf": round(conf * 100, 1),
+                    "conf": round(float(score) * 100, 1),
                }
            )