From 604da26b248c2de50ccfe1f5619b596c0fbde933 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 4 Mar 2026 17:38:06 +0100 Subject: [PATCH] fix: upscale RapidOCR crops to min 150px (was 64px), matching Tesseract Cell crops of 35-54px height were too small for RapidOCR to detect text reliably. Uses _ensure_minimum_crop_size(min_dim=150) for consistent upscaling across all OCR engines. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 28 ++++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 26b9e71..c612fb5 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -4760,27 +4760,27 @@ def _ocr_cell_crop( cell_region = PageRegion(type=col.type, x=cx, y=cy, width=cw, height=ch) words = ocr_region_lighton(img_bgr, cell_region) elif engine_name == "rapid" and img_bgr is not None: - # Upscale small BGR crops for RapidOCR (same as Tesseract path) + # Upscale small BGR crops for RapidOCR — use same min_dim as Tesseract (150px) bgr_crop = img_bgr[cy:cy + ch, cx:cx + cw] if bgr_crop.size == 0: words = [] else: - min_dim = 64 - scale = 1.0 - if ch < min_dim or cw < min_dim: - scale = max(min_dim / max(ch, 1), min_dim / max(cw, 1), 2.0) - bgr_crop = cv2.resize(bgr_crop, None, fx=scale, fy=scale, - interpolation=cv2.INTER_CUBIC) - up_h, up_w = bgr_crop.shape[:2] + upscaled_bin = _ensure_minimum_crop_size( + bgr_crop, min_dim=150, max_scale=3, + ) + up_h, up_w = upscaled_bin.shape[:2] + scale_x = up_w / max(cw, 1) + scale_y = up_h / max(ch, 1) + was_scaled = (up_w != cw or up_h != ch) tmp_region = PageRegion(type=col.type, x=0, y=0, width=up_w, height=up_h) - words = ocr_region_rapid(bgr_crop, tmp_region) + words = ocr_region_rapid(upscaled_bin, tmp_region) # Remap positions back to original image coords - if words and scale != 1.0: + if words and was_scaled: for w in words: - w['left'] = int(w['left'] / scale) + cx - w['top'] = int(w['top'] / scale) + cy - w['width'] = int(w['width'] / scale) - w['height'] = int(w['height'] / scale) + w['left'] = int(w['left'] / scale_x) + cx + w['top'] = int(w['top'] / scale_y) + cy + w['width'] = int(w['width'] / scale_x) + w['height'] = int(w['height'] / scale_y) elif words: for w in words: w['left'] += cx