From ab30e8b17aa6bc091f07c1d308058b4a31d63aa8 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 17 Mar 2026 12:53:58 +0100 Subject: [PATCH] feat: apply IPA phonetic correction in build-grid combo mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix_cell_phonetics was only called in the OCR pipeline endpoints (/words, /cells) but not in the combo mode (build-grid / ocr-overlay). Garbled IPA like [teist] is now corrected to [teɪst] using the IPA dictionary, same as in the pipeline. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/grid_editor_api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index e34843e..ff8722a 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -21,6 +21,7 @@ from fastapi import APIRouter, HTTPException, Request from cv_box_detect import detect_boxes, split_page_into_zones from cv_color_detect import detect_word_colors, recover_colored_text +from cv_ocr_engines import fix_cell_phonetics from cv_words_first import _cluster_rows, _build_cells from ocr_pipeline_session_store import ( get_session_db, @@ -970,6 +971,11 @@ async def build_grid(session_id: str): if ")" in text and "(" not in text: cell["text"] = "(" + text + # 5c. IPA phonetic correction — replace garbled OCR phonetics with + # correct IPA from the dictionary (same as in the OCR pipeline). + all_cells = [cell for z in zones_data for cell in z.get("cells", [])] + fix_cell_phonetics(all_cells, pronunciation="british") + duration = time.time() - t0 # 6. Build result