fix: use median hue, Otsu threshold, and background subtraction for colors
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s

- Median hue instead of mean (robust to background contamination)
- Otsu threshold instead of fixed 180 (adapts to colored backgrounds)
- Background sampling from border pixels with hue-distance filter
- Higher sat_threshold (70) + min_sat_ratio (25%) to reduce false positives
- Classify using saturated pixels only for cleaner hue signal

Fixes: borrow/lend misdetected as orange (actually red, median_H=5)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-16 07:44:03 +01:00
parent 4a8d43fd71
commit a6951940b9

View File

@@ -81,7 +81,8 @@ def _hue_to_color_name(hue: float) -> str:
def detect_word_colors(
img_bgr: np.ndarray,
word_boxes: List[Dict],
sat_threshold: int = 50,
sat_threshold: int = 70,
min_sat_ratio: float = 0.25,
) -> None:
"""Annotate each word_box in-place with its detected text color.
@@ -90,9 +91,12 @@ def detect_word_colors(
Algorithm per word:
1. Crop the word region from the image.
2. Build a text-pixel mask (dark pixels OR high-saturation pixels).
3. Sample HSV values at mask positions.
4. If mean saturation ≥ threshold → classify hue; else → black.
2. Otsu-threshold for text/background separation.
3. Sample background color from border pixels of the crop.
4. Remove text pixels that match the background (avoids colored
backgrounds like blue boxes leaking into the result).
5. Use **median** hue (robust to outliers) and require a minimum
ratio of saturated pixels before classifying as colored.
"""
if img_bgr is None or not word_boxes:
return
@@ -114,10 +118,14 @@ def detect_word_colors(
continue
crop_hsv = img_hsv[y1:y2, x1:x2]
crop_gray = cv2.cvtColor(img_bgr[y1:y2, x1:x2], cv2.COLOR_BGR2GRAY)
crop_bgr = img_bgr[y1:y2, x1:x2]
crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
ch, cw = crop_hsv.shape[:2]
# Text pixels: dark in grayscale OR saturated (colored ink)
_, dark_mask = cv2.threshold(crop_gray, 180, 255, cv2.THRESH_BINARY_INV)
# --- Text mask: Otsu (adaptive) + high-saturation pixels ---
_, dark_mask = cv2.threshold(
crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
)
sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
text_mask = cv2.bitwise_or(dark_mask, sat_mask)
@@ -128,14 +136,48 @@ def detect_word_colors(
wb["color_name"] = "black"
continue
mean_sat = float(np.mean(text_pixels[:, 1]))
# --- Background subtraction via border pixels ---
# Sample background from the 2px border ring of the crop
if ch > 6 and cw > 6:
border = 2
bg_top = crop_hsv[:border, :].reshape(-1, 3)
bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])
if mean_sat < sat_threshold:
bg_med_h = float(np.median(bg_pixels[:, 0]))
bg_med_s = float(np.median(bg_pixels[:, 1]))
# If background is tinted (S > 15), remove text pixels
# with similar hue to avoid false colored detections
if bg_med_s > 15:
hue_diff = np.minimum(
np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
)
keep = hue_diff > 20
if np.any(keep):
text_pixels = text_pixels[keep]
if len(text_pixels) < 3:
wb["color"] = _COLOR_HEX["black"]
wb["color_name"] = "black"
continue
# --- Classification using MEDIAN (robust to outliers) ---
median_sat = float(np.median(text_pixels[:, 1]))
sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
sat_ratio = sat_count / len(text_pixels)
if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
wb["color"] = _COLOR_HEX["black"]
wb["color_name"] = "black"
else:
mean_hue = float(np.mean(text_pixels[:, 0]))
name = _hue_to_color_name(mean_hue)
# Use median hue of saturated pixels only for cleaner signal
sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
median_hue = float(np.median(sat_pixels[:, 0]))
name = _hue_to_color_name(median_hue)
wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
wb["color_name"] = name
colored_count += 1