fix: upscale RapidOCR crops to min 150px (was 64px), matching Tesseract

Cell crops of 35-54px height were too small for RapidOCR to detect text reliably. Uses _ensure_minimum_crop_size(min_dim=150) for consistent upscaling across all OCR engines. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 17:38:06 +01:00
parent 113a1c10e5
commit 604da26b24
1 changed files with 14 additions and 14 deletions
@@ -4760,27 +4760,27 @@ def _ocr_cell_crop(
        cell_region = PageRegion(type=col.type, x=cx, y=cy, width=cw, height=ch)
        words = ocr_region_lighton(img_bgr, cell_region)
    elif engine_name == "rapid" and img_bgr is not None:
-        # Upscale small BGR crops for RapidOCR (same as Tesseract path)
+        # Upscale small BGR crops for RapidOCR — use same min_dim as Tesseract (150px)
        bgr_crop = img_bgr[cy:cy + ch, cx:cx + cw]
        if bgr_crop.size == 0:
            words = []
        else:
-            min_dim = 64
-            scale = 1.0
-            if ch < min_dim or cw < min_dim:
-                scale = max(min_dim / max(ch, 1), min_dim / max(cw, 1), 2.0)
-                bgr_crop = cv2.resize(bgr_crop, None, fx=scale, fy=scale,
-                                      interpolation=cv2.INTER_CUBIC)
-            up_h, up_w = bgr_crop.shape[:2]
+            upscaled_bin = _ensure_minimum_crop_size(
+                bgr_crop, min_dim=150, max_scale=3,
+            )
+            up_h, up_w = upscaled_bin.shape[:2]
+            scale_x = up_w / max(cw, 1)
+            scale_y = up_h / max(ch, 1)
+            was_scaled = (up_w != cw or up_h != ch)
            tmp_region = PageRegion(type=col.type, x=0, y=0, width=up_w, height=up_h)
-            words = ocr_region_rapid(bgr_crop, tmp_region)
+            words = ocr_region_rapid(upscaled_bin, tmp_region)
            # Remap positions back to original image coords
-            if words and scale != 1.0:
+            if words and was_scaled:
                for w in words:
-                    w['left'] = int(w['left'] / scale) + cx
-                    w['top'] = int(w['top'] / scale) + cy
-                    w['width'] = int(w['width'] / scale)
-                    w['height'] = int(w['height'] / scale)
+                    w['left'] = int(w['left'] / scale_x) + cx
+                    w['top'] = int(w['top'] / scale_y) + cy
+                    w['width'] = int(w['width'] / scale_x)
+                    w['height'] = int(w['height'] / scale_y)
            elif words:
                for w in words:
                    w['left'] += cx