fix: use median hue, Otsu threshold, and background subtraction for colors
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 36s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s
- Median hue instead of mean (robust to background contamination) - Otsu threshold instead of fixed 180 (adapts to colored backgrounds) - Background sampling from border pixels with hue-distance filter - Higher sat_threshold (70) + min_sat_ratio (25%) to reduce false positives - Classify using saturated pixels only for cleaner hue signal Fixes: borrow/lend misdetected as orange (actually red, median_H=5) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -81,7 +81,8 @@ def _hue_to_color_name(hue: float) -> str:
|
||||
def detect_word_colors(
|
||||
img_bgr: np.ndarray,
|
||||
word_boxes: List[Dict],
|
||||
sat_threshold: int = 50,
|
||||
sat_threshold: int = 70,
|
||||
min_sat_ratio: float = 0.25,
|
||||
) -> None:
|
||||
"""Annotate each word_box in-place with its detected text color.
|
||||
|
||||
@@ -90,9 +91,12 @@ def detect_word_colors(
|
||||
|
||||
Algorithm per word:
|
||||
1. Crop the word region from the image.
|
||||
2. Build a text-pixel mask (dark pixels OR high-saturation pixels).
|
||||
3. Sample HSV values at mask positions.
|
||||
4. If mean saturation ≥ threshold → classify hue; else → black.
|
||||
2. Otsu-threshold for text/background separation.
|
||||
3. Sample background color from border pixels of the crop.
|
||||
4. Remove text pixels that match the background (avoids colored
|
||||
backgrounds like blue boxes leaking into the result).
|
||||
5. Use **median** hue (robust to outliers) and require a minimum
|
||||
ratio of saturated pixels before classifying as colored.
|
||||
"""
|
||||
if img_bgr is None or not word_boxes:
|
||||
return
|
||||
@@ -114,10 +118,14 @@ def detect_word_colors(
|
||||
continue
|
||||
|
||||
crop_hsv = img_hsv[y1:y2, x1:x2]
|
||||
crop_gray = cv2.cvtColor(img_bgr[y1:y2, x1:x2], cv2.COLOR_BGR2GRAY)
|
||||
crop_bgr = img_bgr[y1:y2, x1:x2]
|
||||
crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
|
||||
ch, cw = crop_hsv.shape[:2]
|
||||
|
||||
# Text pixels: dark in grayscale OR saturated (colored ink)
|
||||
_, dark_mask = cv2.threshold(crop_gray, 180, 255, cv2.THRESH_BINARY_INV)
|
||||
# --- Text mask: Otsu (adaptive) + high-saturation pixels ---
|
||||
_, dark_mask = cv2.threshold(
|
||||
crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU,
|
||||
)
|
||||
sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255
|
||||
text_mask = cv2.bitwise_or(dark_mask, sat_mask)
|
||||
|
||||
@@ -128,14 +136,48 @@ def detect_word_colors(
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
mean_sat = float(np.mean(text_pixels[:, 1]))
|
||||
# --- Background subtraction via border pixels ---
|
||||
# Sample background from the 2px border ring of the crop
|
||||
if ch > 6 and cw > 6:
|
||||
border = 2
|
||||
bg_top = crop_hsv[:border, :].reshape(-1, 3)
|
||||
bg_bot = crop_hsv[-border:, :].reshape(-1, 3)
|
||||
bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3)
|
||||
bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3)
|
||||
bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt])
|
||||
|
||||
if mean_sat < sat_threshold:
|
||||
bg_med_h = float(np.median(bg_pixels[:, 0]))
|
||||
bg_med_s = float(np.median(bg_pixels[:, 1]))
|
||||
|
||||
# If background is tinted (S > 15), remove text pixels
|
||||
# with similar hue to avoid false colored detections
|
||||
if bg_med_s > 15:
|
||||
hue_diff = np.minimum(
|
||||
np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
|
||||
180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h),
|
||||
)
|
||||
keep = hue_diff > 20
|
||||
if np.any(keep):
|
||||
text_pixels = text_pixels[keep]
|
||||
|
||||
if len(text_pixels) < 3:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
continue
|
||||
|
||||
# --- Classification using MEDIAN (robust to outliers) ---
|
||||
median_sat = float(np.median(text_pixels[:, 1]))
|
||||
sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold))
|
||||
sat_ratio = sat_count / len(text_pixels)
|
||||
|
||||
if median_sat < sat_threshold or sat_ratio < min_sat_ratio:
|
||||
wb["color"] = _COLOR_HEX["black"]
|
||||
wb["color_name"] = "black"
|
||||
else:
|
||||
mean_hue = float(np.mean(text_pixels[:, 0]))
|
||||
name = _hue_to_color_name(mean_hue)
|
||||
# Use median hue of saturated pixels only for cleaner signal
|
||||
sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
|
||||
median_hue = float(np.median(sat_pixels[:, 0]))
|
||||
name = _hue_to_color_name(median_hue)
|
||||
wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
|
||||
wb["color_name"] = name
|
||||
colored_count += 1
|
||||
|
||||
Reference in New Issue
Block a user