fix: use median hue, Otsu threshold, and background subtraction for colors

- Median hue instead of mean (robust to background contamination) - Otsu threshold instead of fixed 180 (adapts to colored backgrounds) - Background sampling from border pixels with hue-distance filter - Higher sat_threshold (70) + min_sat_ratio (25%) to reduce false positives - Classify using saturated pixels only for cleaner hue signal Fixes: borrow/lend misdetected as orange (actually red, median_H=5) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 07:44:03 +01:00
parent 4a8d43fd71
commit a6951940b9
1 changed files with 53 additions and 11 deletions
@@ -81,7 +81,8 @@ def _hue_to_color_name(hue: float) -> str:
 def detect_word_colors(
    img_bgr: np.ndarray,
    word_boxes: List[Dict],
-    sat_threshold: int = 50,
+    sat_threshold: int = 70,
+    min_sat_ratio: float = 0.25,
 ) -> None:
    """Annotate each word_box in-place with its detected text color.

@@ -90,9 +91,12 @@ def detect_word_colors(

    Algorithm per word:
      1. Crop the word region from the image.
-      2. Build a text-pixel mask (dark pixels OR high-saturation pixels).
-      3. Sample HSV values at mask positions.
-      4. If mean saturation ≥ threshold → classify hue; else → black.
+      2. Otsu-threshold for text/background separation.
+      3. Sample background color from border pixels of the crop.
+      4. Remove text pixels that match the background (avoids colored
+         backgrounds like blue boxes leaking into the result).
+      5. Use **median** hue (robust to outliers) and require a minimum
+         ratio of saturated pixels before classifying as colored.
    """
    if img_bgr is None or not word_boxes:
        return
@@ -114,10 +118,14 @@ def detect_word_colors(
            continue

        crop_hsv = img_hsv[y1:y2, x1:x2]
-        crop_gray = cv2.cvtColor(img_bgr[y1:y2, x1:x2], cv2.COLOR_BGR2GRAY)
+        crop_bgr = img_bgr[y1:y2, x1:x2]
+        crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
+        ch, cw = crop_hsv.shape[:2]

-        # Text pixels: dark in grayscale OR saturated (colored ink)
-        _, dark_mask = cv2.threshold(crop_gray, 180, 255, cv2.THRESH_BINARY_INV)
+        # --- Text mask: Otsu (adaptive) + high-saturation pixels ---
+        _, dark_mask = cv2.threshold(
+            crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
+        )
        sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
        text_mask = cv2.bitwise_or(dark_mask, sat_mask)

@@ -128,14 +136,48 @@ def detect_word_colors(
            wb["color_name"] = "black"
            continue

-        mean_sat = float(np.mean(text_pixels[:, 1]))
+        # --- Background subtraction via border pixels ---
+        # Sample background from the 2px border ring of the crop
+        if ch > 6 and cw > 6:
+            border = 2
+            bg_top = crop_hsv[:border, :].reshape(-1, 3)
+            bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
+            bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
+            bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
+            bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])

-        if mean_sat < sat_threshold:
+            bg_med_h = float(np.median(bg_pixels[:, 0]))
+            bg_med_s = float(np.median(bg_pixels[:, 1]))
+
+            # If background is tinted (S > 15), remove text pixels
+            # with similar hue to avoid false colored detections
+            if bg_med_s > 15:
+                hue_diff = np.minimum(
+                    np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
+                    180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
+                )
+                keep = hue_diff > 20
+                if np.any(keep):
+                    text_pixels = text_pixels[keep]
+
+        if len(text_pixels) < 3:
+            wb["color"] = _COLOR_HEX["black"]
+            wb["color_name"] = "black"
+            continue
+
+        # --- Classification using MEDIAN (robust to outliers) ---
+        median_sat = float(np.median(text_pixels[:, 1]))
+        sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
+        sat_ratio = sat_count / len(text_pixels)
+
+        if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
            wb["color"] = _COLOR_HEX["black"]
            wb["color_name"] = "black"
        else:
-            mean_hue = float(np.mean(text_pixels[:, 0]))
-            name = _hue_to_color_name(mean_hue)
+            # Use median hue of saturated pixels only for cleaner signal
+            sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
+            median_hue = float(np.median(sat_pixels[:, 0]))
+            name = _hue_to_color_name(median_hue)
            wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
            wb["color_name"] = name
            colored_count += 1