diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 42e7314..26b9e71 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -4703,11 +4703,16 @@ def _ocr_cell_crop( disp_w = col.width disp_h = row.height - # Crop boundaries (clamped to image) - cx = max(0, disp_x) - cy = max(0, disp_y) - cw = min(disp_w, img_w - cx) - ch = min(disp_h, img_h - cy) + # Crop boundaries: add small internal padding (3px each side) to avoid + # clipping characters near column/row edges (e.g. parentheses, descenders). + # Stays within image bounds but may extend slightly beyond strict cell. + _PAD = 3 + cx = max(0, disp_x - _PAD) + cy = max(0, disp_y - _PAD) + cx2 = min(img_w, disp_x + disp_w + _PAD) + cy2 = min(img_h, disp_y + disp_h + _PAD) + cw = cx2 - cx + ch = cy2 - cy empty_cell = { 'cell_id': f"R{row_idx:02d}_C{col_idx}", @@ -4727,7 +4732,7 @@ def _ocr_cell_crop( } if cw <= 0 or ch <= 0: - logger.debug("_ocr_cell_crop R%02d_C%d: zero-size crop (%dx%d)", row_idx, col_idx, cw, ch) + logger.info("_ocr_cell_crop R%02d_C%d: zero-size crop (%dx%d)", row_idx, col_idx, cw, ch) return empty_cell # --- Pixel-density check: skip truly empty cells --- @@ -4736,8 +4741,8 @@ def _ocr_cell_crop( if crop.size > 0: dark_ratio = float(np.count_nonzero(crop < 180)) / crop.size if dark_ratio < 0.005: - logger.debug("_ocr_cell_crop R%02d_C%d: skip empty (dark_ratio=%.4f, crop=%dx%d)", - row_idx, col_idx, dark_ratio, cw, ch) + logger.info("_ocr_cell_crop R%02d_C%d: skip empty (dark_ratio=%.4f, crop=%dx%d)", + row_idx, col_idx, dark_ratio, cw, ch) return empty_cell # --- Prepare crop for OCR --- @@ -4755,8 +4760,31 @@ def _ocr_cell_crop( cell_region = PageRegion(type=col.type, x=cx, y=cy, width=cw, height=ch) words = ocr_region_lighton(img_bgr, cell_region) elif engine_name == "rapid" and img_bgr is not None: - cell_region = PageRegion(type=col.type, x=cx, y=cy, width=cw, height=ch) - words = ocr_region_rapid(img_bgr, cell_region) + # Upscale small BGR crops for RapidOCR (same as Tesseract path) + bgr_crop = img_bgr[cy:cy + ch, cx:cx + cw] + if bgr_crop.size == 0: + words = [] + else: + min_dim = 64 + scale = 1.0 + if ch < min_dim or cw < min_dim: + scale = max(min_dim / max(ch, 1), min_dim / max(cw, 1), 2.0) + bgr_crop = cv2.resize(bgr_crop, None, fx=scale, fy=scale, + interpolation=cv2.INTER_CUBIC) + up_h, up_w = bgr_crop.shape[:2] + tmp_region = PageRegion(type=col.type, x=0, y=0, width=up_w, height=up_h) + words = ocr_region_rapid(bgr_crop, tmp_region) + # Remap positions back to original image coords + if words and scale != 1.0: + for w in words: + w['left'] = int(w['left'] / scale) + cx + w['top'] = int(w['top'] / scale) + cy + w['width'] = int(w['width'] / scale) + w['height'] = int(w['height'] / scale) + elif words: + for w in words: + w['left'] += cx + w['top'] += cy else: # Tesseract: upscale tiny crops for better recognition if ocr_img is not None: @@ -4790,11 +4818,11 @@ def _ocr_cell_crop( y_tol = max(15, ch) text = _words_to_reading_order_text(words, y_tolerance_px=y_tol) avg_conf = round(sum(w['conf'] for w in words) / len(words), 1) - logger.debug("_ocr_cell_crop R%02d_C%d: OCR raw text=%r conf=%.1f nwords=%d crop=%dx%d psm=%s engine=%s", - row_idx, col_idx, text, avg_conf, len(words), cw, ch, psm, engine_name) + logger.info("_ocr_cell_crop R%02d_C%d: OCR raw text=%r conf=%.1f nwords=%d crop=%dx%d psm=%s engine=%s", + row_idx, col_idx, text, avg_conf, len(words), cw, ch, psm, engine_name) else: - logger.debug("_ocr_cell_crop R%02d_C%d: OCR returned NO words (crop=%dx%d psm=%s engine=%s)", - row_idx, col_idx, cw, ch, psm, engine_name) + logger.info("_ocr_cell_crop R%02d_C%d: OCR returned NO words (crop=%dx%d psm=%s engine=%s)", + row_idx, col_idx, cw, ch, psm, engine_name) # --- PSM 7 fallback for still-empty Tesseract cells --- if not text.strip() and engine_name == "tesseract" and ocr_img is not None: @@ -4819,8 +4847,8 @@ def _ocr_cell_crop( pre_filter = text text = _clean_cell_text_lite(text) if not text: - logger.debug("_ocr_cell_crop R%02d_C%d: _clean_cell_text_lite REMOVED %r", - row_idx, col_idx, pre_filter) + logger.info("_ocr_cell_crop R%02d_C%d: _clean_cell_text_lite REMOVED %r", + row_idx, col_idx, pre_filter) avg_conf = 0.0 result = dict(empty_cell)