From 5f89913a9a45127e9dd91c168f6641b00de0252e Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.fritz.box>
Date: Thu, 19 Mar 2026 23:34:41 +0100
Subject: [PATCH] Fix IPA continuation to check all columns, not just
 en_col_type

The en_col_type heuristic (longest avg text) picks the example column,
missing IPA continuation cells in the actual headword column. Now Step 5d
checks all column_* cells for garbled IPA patterns independently.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 klausur-service/backend/grid_editor_api.py | 91 +++++++++-------------
 1 file changed, 38 insertions(+), 53 deletions(-)

diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py
index 15e43ec..67fc02a 100644
--- a/klausur-service/backend/grid_editor_api.py
+++ b/klausur-service/backend/grid_editor_api.py
@@ -1614,69 +1614,54 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
             if orig:
                 cell["col_type"] = orig
 
-        # 5d. Fix IPA continuation rows — rows where the printed
+        # 5d. Fix IPA continuation cells — cells where the printed
         # phonetic transcription wraps to a line below the headword.
-        # These contain garbled IPA in the EN column.  Replace garbled
-        # text with proper IPA looked up from the headword in the
-        # previous row.
+        # These contain garbled IPA (e.g. "[n, nn]", "[1uedtX,1]").
+        # Replace garbled text with proper IPA looked up from the
+        # headword in the previous row's same column.
+        # Note: We check ALL columns, not just en_col_type, because
+        # the EN headword column may not be the longest-average column.
         ipa_cont_fixed = 0
         for z in zones_data:
             rows_sorted = sorted(z.get("rows", []), key=lambda r: r["index"])
             z_cells = z.get("cells", [])
             for idx, row in enumerate(rows_sorted):
-                ri = row["index"]
-                row_cells = [c for c in z_cells if c.get("row_index") == ri]
-                en_cells = [
-                    c for c in row_cells
-                    if c.get("col_type") == en_col_type
-                ]
-                if not en_cells:
-                    continue
-                en_text = en_cells[0].get("text", "")
-                if not _text_has_garbled_ipa(en_text):
-                    continue
-                # Already has proper IPA brackets → already fixed
-                if re.search(r'\[[^\]]*[ˈˌəɪɛɒʊʌæɑɔʃʒθðŋ][^\]]*\]', en_text):
-                    continue
-                # When the EN cell does NOT look obviously garbled
-                # (e.g. bracketed non-IPA), require that other columns
-                # are empty — otherwise it's a normal content row.
-                en_stripped = en_text.strip()
-                is_bracket_garbled = (
-                    en_stripped.startswith('[') and en_stripped.endswith(']')
-                )
-                if not is_bracket_garbled:
-                    other_cells = [
-                        c for c in row_cells
-                        if c.get("col_type") != en_col_type
-                        and len((c.get("text") or "").strip()) >= 3
-                    ]
-                    if other_cells:
-                        continue
-                # Find headword in previous row
                 if idx == 0:
                     continue
-                prev_ri = rows_sorted[idx - 1]["index"]
-                prev_en = [
-                    c for c in z_cells
-                    if c.get("row_index") == prev_ri
-                    and c.get("col_type") == en_col_type
-                ]
-                if not prev_en:
-                    continue
-                prev_text = prev_en[0].get("text", "")
-                fixed = fix_ipa_continuation_cell(
-                    en_text, prev_text, pronunciation="british",
-                )
-                if fixed != en_text:
-                    en_cells[0]["text"] = fixed
-                    ipa_cont_fixed += 1
-                    logger.info(
-                        "IPA continuation R%d: '%s' → '%s'",
-                        ri, en_text, fixed,
+                ri = row["index"]
+                row_cells = [c for c in z_cells if c.get("row_index") == ri]
+                for cell in row_cells:
+                    ct = cell.get("col_type", "")
+                    if not ct.startswith("column_"):
+                        continue
+                    cell_text = cell.get("text", "")
+                    if not _text_has_garbled_ipa(cell_text):
+                        continue
+                    # Already has proper IPA brackets → already fixed
+                    if re.search(r'\[[^\]]*[ˈˌəɪɛɒʊʌæɑɔʃʒθðŋ][^\]]*\]', cell_text):
+                        continue
+                    # Find headword in previous row, same column
+                    prev_ri = rows_sorted[idx - 1]["index"]
+                    prev_same_col = [
+                        c for c in z_cells
+                        if c.get("row_index") == prev_ri
+                        and c.get("col_type") == ct
+                    ]
+                    if not prev_same_col:
+                        continue
+                    prev_text = prev_same_col[0].get("text", "")
+                    fixed = fix_ipa_continuation_cell(
+                        cell_text, prev_text, pronunciation="british",
                     )
+                    if fixed != cell_text:
+                        cell["text"] = fixed
+                        ipa_cont_fixed += 1
+                        logger.info(
+                            "IPA continuation R%d %s: '%s' → '%s'",
+                            ri, ct, cell_text, fixed,
+                        )
         if ipa_cont_fixed:
-            logger.info("Fixed %d IPA continuation rows", ipa_cont_fixed)
+            logger.info("Fixed %d IPA continuation cells", ipa_cont_fixed)
 
     duration = time.time() - t0