fix: force 3x upscale for short RapidOCR crops + lower box_thresh

- Short cell crops (<80px height) are always 3x upscaled for RapidOCR
  to improve recognition of periods, ellipsis, and phonetic symbols
- Lowered Det.box_thresh from 0.6 to 0.4 to detect small characters
  that were being filtered out (dots, brackets, IPA symbols)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-04 19:47:36 +01:00
parent bb0e23303c
commit 90ecb46bed

View File

@@ -3692,7 +3692,8 @@ def _get_rapid_engine():
"Rec.ocr_version": _OCRVersion.PPOCRV5,
# Tighter detection boxes to reduce word merging
"Det.unclip_ratio": 1.3,
"Det.box_thresh": 0.6,
# Lower threshold to detect small chars (periods, ellipsis, phonetics)
"Det.box_thresh": 0.4,
# Silence verbose logging
"Global.log_level": "critical",
})
@@ -4760,22 +4761,32 @@ def _ocr_cell_crop(
cell_region = PageRegion(type=col.type, x=cx, y=cy, width=cw, height=ch)
words = ocr_region_lighton(img_bgr, cell_region)
elif engine_name == "rapid" and img_bgr is not None:
# Upscale small BGR crops for RapidOCR — use same min_dim as Tesseract (150px)
# Upscale small BGR crops for RapidOCR.
# Cell crops typically have height 35-55px but width >300px.
# _ensure_minimum_crop_size only scales when EITHER dim < min_dim,
# using uniform scale → a 365×54 crop becomes ~1014×150 (scale ~2.78).
# For very short heights (< 80px), force 3× upscale for better OCR
# of small characters like periods, ellipsis, and phonetic symbols.
bgr_crop = img_bgr[cy:cy + ch, cx:cx + cw]
if bgr_crop.size == 0:
words = []
else:
upscaled_bin = _ensure_minimum_crop_size(
bgr_crop, min_dim=150, max_scale=3,
)
up_h, up_w = upscaled_bin.shape[:2]
logger.info("_ocr_cell_crop R%02d_C%d: rapid upscale %dx%d -> %dx%d",
row_idx, col_idx, cw, ch, up_w, up_h)
scale_x = up_w / max(cw, 1)
scale_y = up_h / max(ch, 1)
was_scaled = (up_w != cw or up_h != ch)
crop_h, crop_w = bgr_crop.shape[:2]
if crop_h < 80:
# Force 3× upscale for short rows — small chars need more pixels
scale = 3.0
bgr_up = cv2.resize(bgr_crop, None, fx=scale, fy=scale,
interpolation=cv2.INTER_CUBIC)
else:
bgr_up = _ensure_minimum_crop_size(bgr_crop, min_dim=150, max_scale=3)
up_h, up_w = bgr_up.shape[:2]
scale_x = up_w / max(crop_w, 1)
scale_y = up_h / max(crop_h, 1)
was_scaled = (up_w != crop_w or up_h != crop_h)
logger.info("_ocr_cell_crop R%02d_C%d: rapid %dx%d -> %dx%d (scale=%.1fx)",
row_idx, col_idx, crop_w, crop_h, up_w, up_h, scale_y)
tmp_region = PageRegion(type=col.type, x=0, y=0, width=up_w, height=up_h)
words = ocr_region_rapid(upscaled_bin, tmp_region)
words = ocr_region_rapid(bgr_up, tmp_region)
# Remap positions back to original image coords
if words and was_scaled:
for w in words: