diff --git a/klausur-service/backend/cv_color_detect.py b/klausur-service/backend/cv_color_detect.py index 6b0143f..dcc785b 100644 --- a/klausur-service/backend/cv_color_detect.py +++ b/klausur-service/backend/cv_color_detect.py @@ -81,7 +81,8 @@ def _hue_to_color_name(hue: float) -> str: def detect_word_colors( img_bgr: np.ndarray, word_boxes: List[Dict], - sat_threshold: int = 50, + sat_threshold: int = 70, + min_sat_ratio: float = 0.25, ) -> None: """Annotate each word_box in-place with its detected text color. @@ -90,9 +91,12 @@ def detect_word_colors( Algorithm per word: 1. Crop the word region from the image. - 2. Build a text-pixel mask (dark pixels OR high-saturation pixels). - 3. Sample HSV values at mask positions. - 4. If mean saturation ≥ threshold → classify hue; else → black. + 2. Otsu-threshold for text/background separation. + 3. Sample background color from border pixels of the crop. + 4. Remove text pixels that match the background (avoids colored + backgrounds like blue boxes leaking into the result). + 5. Use **median** hue (robust to outliers) and require a minimum + ratio of saturated pixels before classifying as colored. """ if img_bgr is None or not word_boxes: return @@ -114,10 +118,14 @@ def detect_word_colors( continue crop_hsv = img_hsv[y1:y2, x1:x2] - crop_gray = cv2.cvtColor(img_bgr[y1:y2, x1:x2], cv2.COLOR_BGR2GRAY) + crop_bgr = img_bgr[y1:y2, x1:x2] + crop_gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY) + ch, cw = crop_hsv.shape[:2] - # Text pixels: dark in grayscale OR saturated (colored ink) - _, dark_mask = cv2.threshold(crop_gray, 180, 255, cv2.THRESH_BINARY_INV) + # --- Text mask: Otsu (adaptive) + high-saturation pixels --- + _, dark_mask = cv2.threshold( + crop_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU, + ) sat_mask = (crop_hsv[:, :, 1] > sat_threshold).astype(np.uint8) * 255 text_mask = cv2.bitwise_or(dark_mask, sat_mask) @@ -128,14 +136,48 @@ def detect_word_colors( wb["color_name"] = "black" continue - mean_sat = float(np.mean(text_pixels[:, 1])) + # --- Background subtraction via border pixels --- + # Sample background from the 2px border ring of the crop + if ch > 6 and cw > 6: + border = 2 + bg_top = crop_hsv[:border, :].reshape(-1, 3) + bg_bot = crop_hsv[-border:, :].reshape(-1, 3) + bg_lft = crop_hsv[border:-border, :border].reshape(-1, 3) + bg_rgt = crop_hsv[border:-border, -border:].reshape(-1, 3) + bg_pixels = np.vstack([bg_top, bg_bot, bg_lft, bg_rgt]) - if mean_sat < sat_threshold: + bg_med_h = float(np.median(bg_pixels[:, 0])) + bg_med_s = float(np.median(bg_pixels[:, 1])) + + # If background is tinted (S > 15), remove text pixels + # with similar hue to avoid false colored detections + if bg_med_s > 15: + hue_diff = np.minimum( + np.abs(text_pixels[:, 0].astype(float) - bg_med_h), + 180.0 - np.abs(text_pixels[:, 0].astype(float) - bg_med_h), + ) + keep = hue_diff > 20 + if np.any(keep): + text_pixels = text_pixels[keep] + + if len(text_pixels) < 3: + wb["color"] = _COLOR_HEX["black"] + wb["color_name"] = "black" + continue + + # --- Classification using MEDIAN (robust to outliers) --- + median_sat = float(np.median(text_pixels[:, 1])) + sat_count = int(np.sum(text_pixels[:, 1] > sat_threshold)) + sat_ratio = sat_count / len(text_pixels) + + if median_sat < sat_threshold or sat_ratio < min_sat_ratio: wb["color"] = _COLOR_HEX["black"] wb["color_name"] = "black" else: - mean_hue = float(np.mean(text_pixels[:, 0])) - name = _hue_to_color_name(mean_hue) + # Use median hue of saturated pixels only for cleaner signal + sat_pixels = text_pixels[text_pixels[:, 1] > sat_threshold] + median_hue = float(np.median(sat_pixels[:, 0])) + name = _hue_to_color_name(median_hue) wb["color"] = _COLOR_HEX.get(name, _COLOR_HEX["black"]) wb["color_name"] = name colored_count += 1