fix: force 3x upscale for short RapidOCR crops + lower box_thresh
- Short cell crops (<80px height) are always 3x upscaled for RapidOCR to improve recognition of periods, ellipsis, and phonetic symbols - Lowered Det.box_thresh from 0.6 to 0.4 to detect small characters that were being filtered out (dots, brackets, IPA symbols) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3692,7 +3692,8 @@ def _get_rapid_engine():
|
||||
"Rec.ocr_version": _OCRVersion.PPOCRV5,
|
||||
# Tighter detection boxes to reduce word merging
|
||||
"Det.unclip_ratio": 1.3,
|
||||
"Det.box_thresh": 0.6,
|
||||
# Lower threshold to detect small chars (periods, ellipsis, phonetics)
|
||||
"Det.box_thresh": 0.4,
|
||||
# Silence verbose logging
|
||||
"Global.log_level": "critical",
|
||||
})
|
||||
@@ -4760,22 +4761,32 @@ def _ocr_cell_crop(
|
||||
cell_region = PageRegion(type=col.type, x=cx, y=cy, width=cw, height=ch)
|
||||
words = ocr_region_lighton(img_bgr, cell_region)
|
||||
elif engine_name == "rapid" and img_bgr is not None:
|
||||
# Upscale small BGR crops for RapidOCR — use same min_dim as Tesseract (150px)
|
||||
# Upscale small BGR crops for RapidOCR.
|
||||
# Cell crops typically have height 35-55px but width >300px.
|
||||
# _ensure_minimum_crop_size only scales when EITHER dim < min_dim,
|
||||
# using uniform scale → a 365×54 crop becomes ~1014×150 (scale ~2.78).
|
||||
# For very short heights (< 80px), force 3× upscale for better OCR
|
||||
# of small characters like periods, ellipsis, and phonetic symbols.
|
||||
bgr_crop = img_bgr[cy:cy + ch, cx:cx + cw]
|
||||
if bgr_crop.size == 0:
|
||||
words = []
|
||||
else:
|
||||
upscaled_bin = _ensure_minimum_crop_size(
|
||||
bgr_crop, min_dim=150, max_scale=3,
|
||||
)
|
||||
up_h, up_w = upscaled_bin.shape[:2]
|
||||
logger.info("_ocr_cell_crop R%02d_C%d: rapid upscale %dx%d -> %dx%d",
|
||||
row_idx, col_idx, cw, ch, up_w, up_h)
|
||||
scale_x = up_w / max(cw, 1)
|
||||
scale_y = up_h / max(ch, 1)
|
||||
was_scaled = (up_w != cw or up_h != ch)
|
||||
crop_h, crop_w = bgr_crop.shape[:2]
|
||||
if crop_h < 80:
|
||||
# Force 3× upscale for short rows — small chars need more pixels
|
||||
scale = 3.0
|
||||
bgr_up = cv2.resize(bgr_crop, None, fx=scale, fy=scale,
|
||||
interpolation=cv2.INTER_CUBIC)
|
||||
else:
|
||||
bgr_up = _ensure_minimum_crop_size(bgr_crop, min_dim=150, max_scale=3)
|
||||
up_h, up_w = bgr_up.shape[:2]
|
||||
scale_x = up_w / max(crop_w, 1)
|
||||
scale_y = up_h / max(crop_h, 1)
|
||||
was_scaled = (up_w != crop_w or up_h != crop_h)
|
||||
logger.info("_ocr_cell_crop R%02d_C%d: rapid %dx%d -> %dx%d (scale=%.1fx)",
|
||||
row_idx, col_idx, crop_w, crop_h, up_w, up_h, scale_y)
|
||||
tmp_region = PageRegion(type=col.type, x=0, y=0, width=up_w, height=up_h)
|
||||
words = ocr_region_rapid(upscaled_bin, tmp_region)
|
||||
words = ocr_region_rapid(bgr_up, tmp_region)
|
||||
# Remap positions back to original image coords
|
||||
if words and was_scaled:
|
||||
for w in words:
|
||||
|
||||
Reference in New Issue
Block a user