diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index bead1af..e1e8644 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -3692,7 +3692,8 @@ def _get_rapid_engine(): "Rec.ocr_version": _OCRVersion.PPOCRV5, # Tighter detection boxes to reduce word merging "Det.unclip_ratio": 1.3, - "Det.box_thresh": 0.6, + # Lower threshold to detect small chars (periods, ellipsis, phonetics) + "Det.box_thresh": 0.4, # Silence verbose logging "Global.log_level": "critical", }) @@ -4760,22 +4761,32 @@ def _ocr_cell_crop( cell_region = PageRegion(type=col.type, x=cx, y=cy, width=cw, height=ch) words = ocr_region_lighton(img_bgr, cell_region) elif engine_name == "rapid" and img_bgr is not None: - # Upscale small BGR crops for RapidOCR — use same min_dim as Tesseract (150px) + # Upscale small BGR crops for RapidOCR. + # Cell crops typically have height 35-55px but width >300px. + # _ensure_minimum_crop_size only scales when EITHER dim < min_dim, + # using uniform scale → a 365×54 crop becomes ~1014×150 (scale ~2.78). + # For very short heights (< 80px), force 3× upscale for better OCR + # of small characters like periods, ellipsis, and phonetic symbols. bgr_crop = img_bgr[cy:cy + ch, cx:cx + cw] if bgr_crop.size == 0: words = [] else: - upscaled_bin = _ensure_minimum_crop_size( - bgr_crop, min_dim=150, max_scale=3, - ) - up_h, up_w = upscaled_bin.shape[:2] - logger.info("_ocr_cell_crop R%02d_C%d: rapid upscale %dx%d -> %dx%d", - row_idx, col_idx, cw, ch, up_w, up_h) - scale_x = up_w / max(cw, 1) - scale_y = up_h / max(ch, 1) - was_scaled = (up_w != cw or up_h != ch) + crop_h, crop_w = bgr_crop.shape[:2] + if crop_h < 80: + # Force 3× upscale for short rows — small chars need more pixels + scale = 3.0 + bgr_up = cv2.resize(bgr_crop, None, fx=scale, fy=scale, + interpolation=cv2.INTER_CUBIC) + else: + bgr_up = _ensure_minimum_crop_size(bgr_crop, min_dim=150, max_scale=3) + up_h, up_w = bgr_up.shape[:2] + scale_x = up_w / max(crop_w, 1) + scale_y = up_h / max(crop_h, 1) + was_scaled = (up_w != crop_w or up_h != crop_h) + logger.info("_ocr_cell_crop R%02d_C%d: rapid %dx%d -> %dx%d (scale=%.1fx)", + row_idx, col_idx, crop_w, crop_h, up_w, up_h, scale_y) tmp_region = PageRegion(type=col.type, x=0, y=0, width=up_w, height=up_h) - words = ocr_region_rapid(upscaled_bin, tmp_region) + words = ocr_region_rapid(bgr_up, tmp_region) # Remap positions back to original image coords if words and was_scaled: for w in words: