fix: use median hue, Otsu threshold, and background subtraction for colors

- Median hue instead of mean (robust to background contamination) - Otsu threshold instead of fixed 180 (adapts to colored backgrounds) - Background sampling from border pixels with hue-distance filter - Higher sat_threshold (70) + min_sat_ratio (25%) to reduce false positives - Classify using saturated pixels only for cleaner hue signal Fixes: borrow/lend misdetected as orange (actually red, median_H=5) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 07:44:03 +01:00
parent 4a8d43fd71
commit a6951940b9
1 changed files with 53 additions and 11 deletions
@@ -81,7 +81,8 @@ def _hue_to_color_name(hue: float) -> str:
 def detect_word_colors(
    img_bgr: np.ndarray,
    word_boxes: List[Dict],
-    sat_threshold: int = 50,
+    sat_threshold: int = 70,
    min_sat_ratio: float = 0.25,
 ) -> None:
    """Annotate each word_box in-place with its detected text color.
@@ -90,9 +91,12 @@ def detect_word_colors(
    Algorithm per word:
      1. Crop the word region from the image.
-      2. Build a text-pixel mask (dark pixels OR high-saturation pixels).
+      2. Otsu-threshold for text/background separation.
-      3. Sample HSV values at mask positions.
+      3. Sample background color from border pixels of the crop.
-      4. If mean saturation ≥ threshold → classify hue; else → black.
+      4. Remove text pixels that match the background (avoids colored
         backgrounds like blue boxes leaking into the result).
      5. Use **median** hue (robust to outliers) and require a minimum
         ratio of saturated pixels before classifying as colored.
    """
    if img_bgr is None or not word_boxes:
        return
@@ -114,10 +118,14 @@ def detect_word_colors(
            continue
        crop_hsv = img_hsv[y1:y2, x1:x2]
-        crop_gray = cv2.cvtColor(img_bgr[y1:y2, x1:x2], cv2.COLOR_BGR2GRAY)
+        crop_bgr = img_bgr[y1:y2, x1:x2]
        crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
        ch, cw = crop_hsv.shape[:2]
-        # Text pixels: dark in grayscale OR saturated (colored ink)
+        # --- Text mask: Otsu (adaptive) + high-saturation pixels ---
-        _, dark_mask = cv2.threshold(crop_gray, 180, 255, cv2.THRESH_BINARY_INV)
+        _, dark_mask = cv2.threshold(
            crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
        )
        sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
        text_mask = cv2.bitwise_or(dark_mask, sat_mask)
@@ -128,14 +136,48 @@ def detect_word_colors(
            wb["color_name"] = "black"
            continue
-        mean_sat = float(np.mean(text_pixels[:, 1]))
+        # --- Background subtraction via border pixels ---
        # Sample background from the 2px border ring of the crop
        if ch > 6 and cw > 6:
            border = 2
            bg_top = crop_hsv[:border, :].reshape(-1, 3)
            bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
            bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
            bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
            bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])
-        if mean_sat < sat_threshold:
+            bg_med_h = float(np.median(bg_pixels[:, 0]))
            bg_med_s = float(np.median(bg_pixels[:, 1]))
            # If background is tinted (S > 15), remove text pixels
            # with similar hue to avoid false colored detections
            if bg_med_s > 15:
                hue_diff = np.minimum(
                    np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
                    180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
                )
                keep = hue_diff > 20
                if np.any(keep):
                    text_pixels = text_pixels[keep]
        if len(text_pixels) < 3:
            wb["color"] = _COLOR_HEX["black"]
            wb["color_name"] = "black"
            continue
        # --- Classification using MEDIAN (robust to outliers) ---
        median_sat = float(np.median(text_pixels[:, 1]))
        sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
        sat_ratio = sat_count / len(text_pixels)
        if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
            wb["color"] = _COLOR_HEX["black"]
            wb["color_name"] = "black"
        else:
-            mean_hue = float(np.mean(text_pixels[:, 0]))
+            # Use median hue of saturated pixels only for cleaner signal
-            name = _hue_to_color_name(mean_hue)
+            sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
            median_hue = float(np.median(sat_pixels[:, 0]))
            name = _hue_to_color_name(median_hue)
            wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
            wb["color_name"] = name
            colored_count += 1