From 76cd1ac0208d2a1815238e604978efd024728a77 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 25 Mar 2026 21:49:05 +0100 Subject: [PATCH] Fix false headers on sparse layouts and IPA corruption on German text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Header detection: Add 25% cap to single-cell heading heuristic. On German synonym dicts where most rows naturally have only 1 content cell, the old logic marked 60%+ of rows as headers. 2. IPA de/all mode: Use "column_text" (light processing) for non- English columns instead of "column_en" (full processing). The full path runs _insert_missing_ipa() which splits on whitespace, matches English prefixes ("bildschön" → "bild"), and truncates the rest — destroying German comma-separated synonym lists. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/grid_editor_api.py | 15 ++++++++++++--- klausur-service/backend/grid_editor_helpers.py | 12 ++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 0ba002f..0e58d74 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -914,9 +914,18 @@ async def _build_grid_core( if ipa_target_cols: for cell in all_cells: - if cell.get("col_type") in ipa_target_cols: - cell["_orig_col_type"] = cell["col_type"] - cell["col_type"] = "column_en" + ct = cell.get("col_type") + if ct in ipa_target_cols: + cell["_orig_col_type"] = ct + # Full IPA processing (incl. insertion) only for the + # detected English column; other columns get light + # processing (bracket replacement only) — our IPA + # dictionary is English-only, so inserting IPA into + # German text would corrupt it. + if ct == en_col_type: + cell["col_type"] = "column_en" + else: + cell["col_type"] = "column_text" # Snapshot text before IPA fix to detect which cells were modified _pre_ipa = {id(c): c.get("text", "") for c in all_cells} fix_cell_phonetics(all_cells, pronunciation="british") diff --git a/klausur-service/backend/grid_editor_helpers.py b/klausur-service/backend/grid_editor_helpers.py index 6eade87..32dbe14 100644 --- a/klausur-service/backend/grid_editor_helpers.py +++ b/klausur-service/backend/grid_editor_helpers.py @@ -913,6 +913,18 @@ def _detect_heading_rows_by_single_cell( continue heading_row_indices.append(ri) + # Guard: if >25% of eligible rows would become headings, the + # heuristic is misfiring (e.g. sparse single-column layout where + # most rows naturally have only 1 content cell). + eligible_rows = len(non_header_rows) - 2 # minus first/last excluded + if eligible_rows > 0 and len(heading_row_indices) > eligible_rows * 0.25: + logger.debug( + "Skipping single-cell heading detection for zone %s: " + "%d/%d rows would be headings (>25%%)", + z.get("zone_index"), len(heading_row_indices), eligible_rows, + ) + continue + for hri in heading_row_indices: header_cells = [c for c in cells if c.get("row_index") == hri] if not header_cells: