diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 3523964..9787053 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -2292,7 +2292,7 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: # OCR reads these as text artifacts (©, e, *, or even plausible words # like "fighily" overlapping the real word "tightly"). # Detection rules: - # a) Tiny blue symbols: area < 150 AND conf < 85 + # a) Tiny coloured symbols: area < 200 AND conf < 85 (any non-black) # b) Overlapping word_boxes: >40% x-overlap → remove lower confidence # c) Duplicate text: consecutive blue wbs with identical text, gap < 6px bullet_removed = 0 @@ -2303,10 +2303,11 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: continue to_remove: set = set() - # Rule (a): tiny blue symbols + # Rule (a): tiny coloured symbols (bullets, graphic fragments) for i, wb in enumerate(wbs): - if (wb.get("color_name") == "blue" - and wb.get("width", 0) * wb.get("height", 0) < 150 + cn = wb.get("color_name", "black") + if (cn != "black" + and wb.get("width", 0) * wb.get("height", 0) < 200 and wb.get("conf", 100) < 85): to_remove.add(i)