fix: downscale large images before PaddleOCR (Traefik 60s limit)

Bilder > 1500px werden vor dem Upload verkleinert. Koordinaten werden zurueckskaliert. JPEG statt PNG fuer schnelleren Upload. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 14:28:58 +01:00
parent e2c2acdf86
commit 685d135be5
1 changed files with 23 additions and 8 deletions
@@ -412,20 +412,35 @@ async def ocr_region_paddle(
    if crop.size == 0:
        return []

-    # Encode as PNG
-    success, png_buf = cv2.imencode(".png", crop)
+    # Downscale large images to fit within Traefik's 60s timeout.
+    # PaddleOCR works well at ~1500px max dimension.
+    h, w = crop.shape[:2]
+    scale = 1.0
+    _MAX_DIM = 1500
+    if max(h, w) > _MAX_DIM:
+        scale = _MAX_DIM / max(h, w)
+        new_w, new_h = int(w * scale), int(h * scale)
+        crop = cv2.resize(crop, (new_w, new_h), interpolation=cv2.INTER_AREA)
+        logger.info("ocr_region_paddle: downscaled %dx%d → %dx%d (scale=%.2f)",
+                     w, h, new_w, new_h, scale)
+
+    # Encode as JPEG (smaller than PNG, faster upload)
+    success, jpg_buf = cv2.imencode(".jpg", crop, [cv2.IMWRITE_JPEG_QUALITY, 90])
    if not success:
        logger.error("ocr_region_paddle: cv2.imencode failed")
        return []

-    words, _w, _h = await ocr_remote_paddle(png_buf.tobytes())
+    words, _w, _h = await ocr_remote_paddle(jpg_buf.tobytes(), filename="scan.jpg")

-    # Shift coordinates to absolute image space
-    for w in words:
-        w["left"] += offset_x
-        w["top"] += offset_y
+    # Scale coordinates back to original size and shift to absolute image space
+    inv_scale = 1.0 / scale if scale != 1.0 else 1.0
+    for wd in words:
+        wd["left"] = int(wd["left"] * inv_scale) + offset_x
+        wd["top"] = int(wd["top"] * inv_scale) + offset_y
+        wd["width"] = int(wd["width"] * inv_scale)
+        wd["height"] = int(wd["height"] * inv_scale)
        if region is not None:
-            w["region_type"] = region.type
+            wd["region_type"] = region.type

    return words