diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index e510aaf..4aaed6f 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -1324,64 +1324,9 @@ async def _build_grid_core(session_id: str, session: dict) -> dict: if orig: cell["col_type"] = orig - # 5d. Remove IPA continuation rows — rows where the printed - # phonetic transcription wraps to a line below the headword. - # These rows have text only in the English column (+ margin - # noise) and fix_cell_phonetics did NOT insert IPA brackets - # (because there's no real English word to look up). - ipa_cont_rows: set = set() - for z in zones_data: - for row in z.get("rows", []): - ri = row["index"] - row_cells = [ - c for c in z.get("cells", []) - if c.get("row_index") == ri - ] - en_cells = [ - c for c in row_cells - if c.get("col_type") == en_col_type - ] - # Other cells with ≥3 chars (ignore margin noise) - other_cells = [ - c for c in row_cells - if c.get("col_type") != en_col_type - and len((c.get("text") or "").strip()) >= 3 - ] - if en_cells and not other_cells: - en_text = en_cells[0].get("text", "") - # Strip any IPA brackets that fix_cell_phonetics - # may have added for short dictionary matches - # (e.g. "si" → "[si]") to check underlying text. - text_bare = re.sub(r'\[[^\]]*\]', '', en_text).strip() - # Garbled IPA typically contains ':' (length mark) - # or starts with ' (stress mark), and has no word - # with ≥3 letters that could be a real headword. - has_headword = any( - len(re.sub(r'[^a-zA-Z]', '', w)) >= 3 - for w in text_bare.split() - ) if text_bare else False - looks_phonetic = ( - ':' in text_bare - or text_bare.startswith("'") - or text_bare.startswith("\u2019") - or not has_headword - ) - if looks_phonetic: - ipa_cont_rows.add(ri) - if ipa_cont_rows: - for z in zones_data: - z["rows"] = [ - r for r in z.get("rows", []) - if r["index"] not in ipa_cont_rows - ] - z["cells"] = [ - c for c in z.get("cells", []) - if c.get("row_index") not in ipa_cont_rows - ] - logger.info( - "removed %d IPA continuation rows: %s", - len(ipa_cont_rows), sorted(ipa_cont_rows), - ) + # 5d. IPA continuation rows are preserved — they contain the + # printed phonetic transcription that wraps to a line below the + # headword. The user can manually delete them if not needed. duration = time.time() - t0