fix: upscale RapidOCR crops to min 150px (was 64px), matching Tesseract

Cell crops of 35-54px height were too small for RapidOCR to detect
text reliably. Uses _ensure_minimum_crop_size(min_dim=150) for
consistent upscaling across all OCR engines.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-04 17:38:06 +01:00
parent 113a1c10e5
commit 604da26b24

View File

@@ -4760,27 +4760,27 @@ def _ocr_cell_crop(
cell_region = PageRegion(type=col.type, x=cx, y=cy, width=cw, height=ch)
words = ocr_region_lighton(img_bgr, cell_region)
elif engine_name == "rapid" and img_bgr is not None:
# Upscale small BGR crops for RapidOCR (same as Tesseract path)
# Upscale small BGR crops for RapidOCR — use same min_dim as Tesseract (150px)
bgr_crop = img_bgr[cy:cy + ch, cx:cx + cw]
if bgr_crop.size == 0:
words = []
else:
min_dim = 64
scale = 1.0
if ch < min_dim or cw < min_dim:
scale = max(min_dim / max(ch, 1), min_dim / max(cw, 1), 2.0)
bgr_crop = cv2.resize(bgr_crop, None, fx=scale, fy=scale,
interpolation=cv2.INTER_CUBIC)
up_h, up_w = bgr_crop.shape[:2]
upscaled_bin = _ensure_minimum_crop_size(
bgr_crop, min_dim=150, max_scale=3,
)
up_h, up_w = upscaled_bin.shape[:2]
scale_x = up_w / max(cw, 1)
scale_y = up_h / max(ch, 1)
was_scaled = (up_w != cw or up_h != ch)
tmp_region = PageRegion(type=col.type, x=0, y=0, width=up_w, height=up_h)
words = ocr_region_rapid(bgr_crop, tmp_region)
words = ocr_region_rapid(upscaled_bin, tmp_region)
# Remap positions back to original image coords
if words and scale != 1.0:
if words and was_scaled:
for w in words:
w['left'] = int(w['left'] / scale) + cx
w['top'] = int(w['top'] / scale) + cy
w['width'] = int(w['width'] / scale)
w['height'] = int(w['height'] / scale)
w['left'] = int(w['left'] / scale_x) + cx
w['top'] = int(w['top'] / scale_y) + cy
w['width'] = int(w['width'] / scale_x)
w['height'] = int(w['height'] / scale_y)
elif words:
for w in words:
w['left'] += cx