fix: use median hue, Otsu threshold, and background subtraction for colors
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s

- Median hue instead of mean (robust to background contamination)
- Otsu threshold instead of fixed 180 (adapts to colored backgrounds)
- Background sampling from border pixels with hue-distance filter
- Higher sat_threshold (70) + min_sat_ratio (25%) to reduce false positives
- Classify using saturated pixels only for cleaner hue signal

Fixes: borrow/lend misdetected as orange (actually red, median_H=5)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-16 07:44:03 +01:00
parent 4a8d43fd71
commit a6951940b9

View File

@@ -81,7 +81,8 @@ def _hue_to_color_name(hue: float) -> str:
def detect_word_colors( def detect_word_colors(
img_bgr: np.ndarray, img_bgr: np.ndarray,
word_boxes: List[Dict], word_boxes: List[Dict],
sat_threshold: int = 50, sat_threshold: int = 70,
min_sat_ratio: float = 0.25,
) -> None: ) -> None:
"""Annotate each word_box in-place with its detected text color. """Annotate each word_box in-place with its detected text color.
@@ -90,9 +91,12 @@ def detect_word_colors(
Algorithm per word: Algorithm per word:
1. Crop the word region from the image. 1. Crop the word region from the image.
2. Build a text-pixel mask (dark pixels OR high-saturation pixels). 2. Otsu-threshold for text/background separation.
3. Sample HSV values at mask positions. 3. Sample background color from border pixels of the crop.
4. If mean saturation ≥ threshold → classify hue; else → black. 4. Remove text pixels that match the background (avoids colored
backgrounds like blue boxes leaking into the result).
5. Use **median** hue (robust to outliers) and require a minimum
ratio of saturated pixels before classifying as colored.
""" """
if img_bgr is None or not word_boxes: if img_bgr is None or not word_boxes:
return return
@@ -114,10 +118,14 @@ def detect_word_colors(
continue continue
crop_hsv = img_hsv[y1:y2, x1:x2] crop_hsv = img_hsv[y1:y2, x1:x2]
crop_gray = cv2.cvtColor(img_bgr[y1:y2, x1:x2], cv2.COLOR_BGR2GRAY) crop_bgr = img_bgr[y1:y2, x1:x2]
crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
ch, cw = crop_hsv.shape[:2]
# Text pixels: dark in grayscale OR saturated (colored ink) # --- Text mask: Otsu (adaptive) + high-saturation pixels ---
_, dark_mask = cv2.threshold(crop_gray, 180, 255, cv2.THRESH_BINARY_INV) _, dark_mask = cv2.threshold(
crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
)
sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255 sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
text_mask = cv2.bitwise_or(dark_mask, sat_mask) text_mask = cv2.bitwise_or(dark_mask, sat_mask)
@@ -128,14 +136,48 @@ def detect_word_colors(
wb["color_name"] = "black" wb["color_name"] = "black"
continue continue
mean_sat = float(np.mean(text_pixels[:, 1])) # --- Background subtraction via border pixels ---
# Sample background from the 2px border ring of the crop
if ch > 6 and cw > 6:
border = 2
bg_top = crop_hsv[:border, :].reshape(-1, 3)
bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])
if mean_sat < sat_threshold: bg_med_h = float(np.median(bg_pixels[:, 0]))
bg_med_s = float(np.median(bg_pixels[:, 1]))
# If background is tinted (S > 15), remove text pixels
# with similar hue to avoid false colored detections
if bg_med_s > 15:
hue_diff = np.minimum(
np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
)
keep = hue_diff > 20
if np.any(keep):
text_pixels = text_pixels[keep]
if len(text_pixels) < 3:
wb["color"] = _COLOR_HEX["black"]
wb["color_name"] = "black"
continue
# --- Classification using MEDIAN (robust to outliers) ---
median_sat = float(np.median(text_pixels[:, 1]))
sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
sat_ratio = sat_count / len(text_pixels)
if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
wb["color"] = _COLOR_HEX["black"] wb["color"] = _COLOR_HEX["black"]
wb["color_name"] = "black" wb["color_name"] = "black"
else: else:
mean_hue = float(np.mean(text_pixels[:, 0])) # Use median hue of saturated pixels only for cleaner signal
name = _hue_to_color_name(mean_hue) sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
median_hue = float(np.median(sat_pixels[:, 0]))
name = _hue_to_color_name(median_hue)
wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"]) wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
wb["color_name"] = name wb["color_name"] = name
colored_count += 1 colored_count += 1