From 2b73d9beecefa1e72e573f437829274629624f14 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 17 Mar 2026 10:28:56 +0100 Subject: [PATCH] fix: increase color recovery occupancy padding to prevent gap artifacts Colored-pixel fragments in narrow inter-word gaps were being recovered as false characters (e.g., "!" between "lend" and "sb."), disrupting word order. Use adaptive padding based on median word height instead of fixed 4px. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_color_detect.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/klausur-service/backend/cv_color_detect.py b/klausur-service/backend/cv_color_detect.py index dcc785b..beaa55a 100644 --- a/klausur-service/backend/cv_color_detect.py +++ b/klausur-service/backend/cv_color_detect.py @@ -210,9 +210,14 @@ def recover_colored_text( ih, iw = img_bgr.shape[:2] max_area = int(ih * iw * 0.005) - # --- Build occupancy mask from existing words (with 4px padding) --- + # --- Build occupancy mask from existing words (adaptive padding) --- + # Pad word boxes generously to prevent colored-pixel artifacts in + # narrow inter-word gaps from being recovered as false characters. + heights = [wb["height"] for wb in existing_words if wb.get("height", 0) > 0] + median_h = int(np.median(heights)) if heights else 20 + pad = max(8, int(median_h * 0.35)) + occupied = np.zeros((ih, iw), dtype=np.uint8) - pad = 4 for wb in existing_words: x1 = max(0, int(wb["left"]) - pad) y1 = max(0, int(wb["top"]) - pad)