diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 20c048a..384d56c 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -23,7 +23,7 @@ from fastapi import APIRouter, HTTPException, Request from cv_box_detect import detect_boxes, split_page_into_zones from cv_vocab_types import PageZone from cv_color_detect import detect_word_colors, recover_colored_text -from cv_ocr_engines import fix_cell_phonetics, fix_ipa_continuation_cell, _text_has_garbled_ipa, _lookup_ipa +from cv_ocr_engines import fix_cell_phonetics, fix_ipa_continuation_cell, _text_has_garbled_ipa, _lookup_ipa, _words_to_reading_order_text from cv_words_first import _cluster_rows, _build_cells from ocr_pipeline_session_store import ( get_session_db, @@ -1850,11 +1850,7 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: if len(filtered) < len(wbs): removed_oversized += len(wbs) - len(filtered) cell["word_boxes"] = filtered - cell["text"] = " ".join( - wb.get("text", "").strip() - for wb in sorted(filtered, key=lambda w: (w.get("top", 0), w.get("left", 0))) - if wb.get("text", "").strip() - ) + cell["text"] = _words_to_reading_order_text(filtered) if removed_oversized: # Remove cells that became empty after oversized removal z["cells"] = [c for c in cells if c.get("word_boxes")] @@ -1879,11 +1875,7 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: if len(filtered) < len(wbs): removed_pipes += len(wbs) - len(filtered) cell["word_boxes"] = filtered - cell["text"] = " ".join( - wb.get("text", "").strip() - for wb in sorted(filtered, key=lambda w: (w.get("top", 0), w.get("left", 0))) - if wb.get("text", "").strip() - ) + cell["text"] = _words_to_reading_order_text(filtered) # Remove cells that became empty after pipe removal if removed_pipes: z["cells"] = [c for c in z.get("cells", []) if (c.get("word_boxes") or c.get("text", "").strip())] @@ -2316,11 +2308,7 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: bullet_removed += len(to_remove) filtered = [wb for i, wb in enumerate(wbs) if i not in to_remove] cell["word_boxes"] = filtered - cell["text"] = " ".join( - wb.get("text", "").strip() - for wb in sorted(filtered, key=lambda w: (w.get("top", 0), w.get("left", 0))) - if wb.get("text", "").strip() - ) + cell["text"] = _words_to_reading_order_text(filtered) # Remove cells that became empty after bullet removal if bullet_removed: