Fix red false-positive in color detection for scanned black text
Scanner artifacts on black text produce slight warm tint (hue ~0, sat ~60) that was misclassified as red. Now requires median_sat >= 80 specifically for red classification, since genuine red text always has high saturation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -178,6 +178,15 @@ def detect_word_colors(
|
|||||||
sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
|
sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold]
|
||||||
median_hue = float(np.median(sat_pixels[:, 0]))
|
median_hue = float(np.median(sat_pixels[:, 0]))
|
||||||
name = _hue_to_color_name(median_hue)
|
name = _hue_to_color_name(median_hue)
|
||||||
|
|
||||||
|
# Red requires higher saturation — scanner artifacts on black
|
||||||
|
# text often produce a slight warm tint (hue ~0) with low
|
||||||
|
# saturation that would otherwise be misclassified as red.
|
||||||
|
if name == "red" and median_sat < 80:
|
||||||
|
wb["color"] = _COLOR_HEX["black"]
|
||||||
|
wb["color_name"] = "black"
|
||||||
|
continue
|
||||||
|
|
||||||
wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
|
wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"])
|
||||||
wb["color_name"] = name
|
wb["color_name"] = name
|
||||||
colored_count += 1
|
colored_count += 1
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ Covers:
|
|||||||
import sys
|
import sys
|
||||||
sys.path.insert(0, '/app')
|
sys.path.insert(0, '/app')
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
from cv_vocab_types import PageZone, DetectedBox
|
from cv_vocab_types import PageZone, DetectedBox
|
||||||
from grid_editor_api import (
|
from grid_editor_api import (
|
||||||
@@ -912,3 +914,43 @@ class TestSlashIpaConversion:
|
|||||||
"""tile /tail/ → tile [tˈaɪl]."""
|
"""tile /tail/ → tile [tˈaɪl]."""
|
||||||
result = self._run_step_5h("tile /tail/ Nomen Dachziegel")
|
result = self._run_step_5h("tile /tail/ Nomen Dachziegel")
|
||||||
assert "[tˈaɪl]" in result
|
assert "[tˈaɪl]" in result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Color detection: red false-positive suppression
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestRedFalsePositiveSuppression:
|
||||||
|
"""Red requires median_sat >= 80 to avoid scanner artifact false positives."""
|
||||||
|
|
||||||
|
def test_low_saturation_red_classified_as_black(self):
|
||||||
|
"""Black text with slight warm scanner tint (sat ~60) → black, not red."""
|
||||||
|
import numpy as np
|
||||||
|
from cv_color_detect import detect_word_colors
|
||||||
|
|
||||||
|
# Create a 40x20 image with dark gray pixels (slight warm tint)
|
||||||
|
# HSV: hue=5 (red range), sat=60 (above 55 threshold but below 80), val=40
|
||||||
|
img_hsv = np.full((40, 200, 3), [5, 60, 40], dtype=np.uint8)
|
||||||
|
img_bgr = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR)
|
||||||
|
|
||||||
|
wb = [{"left": 10, "top": 5, "width": 50, "height": 20, "text": "test"}]
|
||||||
|
detect_word_colors(img_bgr, wb)
|
||||||
|
assert wb[0]["color_name"] == "black", \
|
||||||
|
f"Expected black, got {wb[0]['color_name']} (scanner artifact false positive)"
|
||||||
|
|
||||||
|
def test_high_saturation_red_classified_as_red(self):
|
||||||
|
"""Genuinely red text (sat=150) → red."""
|
||||||
|
import numpy as np
|
||||||
|
from cv_color_detect import detect_word_colors
|
||||||
|
|
||||||
|
# White background with red text region
|
||||||
|
# Background: white (H=0, S=0, V=255)
|
||||||
|
img_hsv = np.full((40, 200, 3), [0, 0, 255], dtype=np.uint8)
|
||||||
|
# Text area: red (H=5, S=180, V=200)
|
||||||
|
img_hsv[8:18, 15:55] = [5, 180, 200]
|
||||||
|
img_bgr = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR)
|
||||||
|
|
||||||
|
wb = [{"left": 10, "top": 5, "width": 50, "height": 20, "text": "red"}]
|
||||||
|
detect_word_colors(img_bgr, wb)
|
||||||
|
assert wb[0]["color_name"] == "red", \
|
||||||
|
f"Expected red, got {wb[0]['color_name']}"
|
||||||
|
|||||||
Reference in New Issue
Block a user