diff --git a/klausur-service/backend/cv_color_detect.py b/klausur-service/backend/cv_color_detect.py index c87a771..423d61d 100644 --- a/klausur-service/backend/cv_color_detect.py +++ b/klausur-service/backend/cv_color_detect.py @@ -178,6 +178,15 @@ def detect_word_colors( sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold] median_hue = float(np.median(sat_pixels[:, 0])) name = _hue_to_color_name(median_hue) + + # Red requires higher saturation — scanner artifacts on black + # text often produce a slight warm tint (hue ~0) with low + # saturation that would otherwise be misclassified as red. + if name == "red" and median_sat < 80: + wb["color"] = _COLOR_HEX["black"] + wb["color_name"] = "black" + continue + wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"]) wb["color_name"] = name colored_count += 1 diff --git a/klausur-service/backend/tests/test_grid_editor_api.py b/klausur-service/backend/tests/test_grid_editor_api.py index 69c32eb..a62d62d 100644 --- a/klausur-service/backend/tests/test_grid_editor_api.py +++ b/klausur-service/backend/tests/test_grid_editor_api.py @@ -11,6 +11,8 @@ Covers: import sys sys.path.insert(0, '/app') +import cv2 +import numpy as np import pytest from cv_vocab_types import PageZone, DetectedBox from grid_editor_api import ( @@ -912,3 +914,43 @@ class TestSlashIpaConversion: """tile /tail/ → tile [tˈaɪl].""" result = self._run_step_5h("tile /tail/ Nomen Dachziegel") assert "[tˈaɪl]" in result + + +# --------------------------------------------------------------------------- +# Color detection: red false-positive suppression +# --------------------------------------------------------------------------- + +class TestRedFalsePositiveSuppression: + """Red requires median_sat >= 80 to avoid scanner artifact false positives.""" + + def test_low_saturation_red_classified_as_black(self): + """Black text with slight warm scanner tint (sat ~60) → black, not red.""" + import numpy as np + from cv_color_detect import detect_word_colors + + # Create a 40x20 image with dark gray pixels (slight warm tint) + # HSV: hue=5 (red range), sat=60 (above 55 threshold but below 80), val=40 + img_hsv = np.full((40, 200, 3), [5, 60, 40], dtype=np.uint8) + img_bgr = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR) + + wb = [{"left": 10, "top": 5, "width": 50, "height": 20, "text": "test"}] + detect_word_colors(img_bgr, wb) + assert wb[0]["color_name"] == "black", \ + f"Expected black, got {wb[0]['color_name']} (scanner artifact false positive)" + + def test_high_saturation_red_classified_as_red(self): + """Genuinely red text (sat=150) → red.""" + import numpy as np + from cv_color_detect import detect_word_colors + + # White background with red text region + # Background: white (H=0, S=0, V=255) + img_hsv = np.full((40, 200, 3), [0, 0, 255], dtype=np.uint8) + # Text area: red (H=5, S=180, V=200) + img_hsv[8:18, 15:55] = [5, 180, 200] + img_bgr = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR) + + wb = [{"left": 10, "top": 5, "width": 50, "height": 20, "text": "red"}] + detect_word_colors(img_bgr, wb) + assert wb[0]["color_name"] == "red", \ + f"Expected red, got {wb[0]['color_name']}"