diff --git a/klausur-service/backend/cv_color_detect.py b/klausur-service/backend/cv_color_detect.py index 423d61d..998d604 100644 --- a/klausur-service/backend/cv_color_detect.py +++ b/klausur-service/backend/cv_color_detect.py @@ -182,7 +182,7 @@ def detect_word_colors( # Red requires higher saturation — scanner artifacts on black # text often produce a slight warm tint (hue ~0) with low # saturation that would otherwise be misclassified as red. - if name == "red" and median_sat < 80: + if name == "red" and median_sat < 90: wb["color"] = _COLOR_HEX["black"] wb["color_name"] = "black" continue diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 7db58d9..20c048a 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -2275,6 +2275,22 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: if overlap_pct > 0.40: c1 = w1.get("conf", 50) c2 = w2.get("conf", 50) + t1 = (w1.get("text") or "").strip().lower() + t2 = (w2.get("text") or "").strip().lower() + + # For very high overlap (>90%) with different text, + # prefer the word that exists in the IPA dictionary + # over confidence (OCR can give artifacts high conf). + if overlap_pct > 0.90 and t1 != t2: + in_dict_1 = bool(_lookup_ipa(re.sub(r'[²³¹\d/]', '', t1), "british")) if t1.isalpha() else False + in_dict_2 = bool(_lookup_ipa(re.sub(r'[²³¹\d/]', '', t2), "british")) if t2.isalpha() else False + if in_dict_1 and not in_dict_2: + to_remove.add(i2) + continue + elif in_dict_2 and not in_dict_1: + to_remove.add(i1) + continue + if c1 < c2: to_remove.add(i1) elif c2 < c1: diff --git a/klausur-service/backend/tests/test_grid_editor_api.py b/klausur-service/backend/tests/test_grid_editor_api.py index ccb077f..ff03ab5 100644 --- a/klausur-service/backend/tests/test_grid_editor_api.py +++ b/klausur-service/backend/tests/test_grid_editor_api.py @@ -924,13 +924,13 @@ class TestRedFalsePositiveSuppression: """Red requires median_sat >= 80 to avoid scanner artifact false positives.""" def test_low_saturation_red_classified_as_black(self): - """Black text with slight warm scanner tint (sat ~60) → black, not red.""" + """Black text with slight warm scanner tint (sat ~85) → black, not red.""" import numpy as np from cv_color_detect import detect_word_colors # Create a 40x20 image with dark gray pixels (slight warm tint) - # HSV: hue=5 (red range), sat=60 (above 55 threshold but below 80), val=40 - img_hsv = np.full((40, 200, 3), [5, 60, 40], dtype=np.uint8) + # HSV: hue=5 (red range), sat=85 (above 55 threshold but below 90), val=40 + img_hsv = np.full((40, 200, 3), [5, 85, 40], dtype=np.uint8) img_bgr = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR) wb = [{"left": 10, "top": 5, "width": 50, "height": 20, "text": "test"}]