diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 82b6fab..ddc962b 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -347,6 +347,13 @@ async def _build_grid_core( zone_words = _words_in_zone( all_words, pz.y, pz.height, pz.x, pz.width ) + if pz.zone_type == "content": + logger.info( + "build-grid zone %d (%s): bounds x=%d..%d y=%d..%d → %d/%d words", + pz.index, pz.zone_type, + pz.x, pz.x + pz.width, pz.y, pz.y + pz.height, + len(zone_words), len(all_words), + ) # Filter recovered single-char artifacts in ALL zones # (decorative colored pixel blobs like !, ?, • from # recover_colored_text that don't represent real text) @@ -1710,6 +1717,16 @@ async def _build_grid_core( except ImportError: pass + # --- Ensure space before IPA brackets: "word[ipa]" → "word [ipa]" --- + _IPA_NOSPACE_RE = re.compile(r'([a-zA-ZäöüÄÖÜß])(\[[^\]]*[ˈˌːɑɒæɛəɜɪɔʊʌðŋθʃʒɹɡɾ][^\]]*\])') + for z in zones_data: + for cell in z.get("cells", []): + text = cell.get("text", "") + if text and "[" in text: + fixed = _IPA_NOSPACE_RE.sub(r'\1 \2', text) + if fixed != text: + cell["text"] = fixed + # Clean up internal flags before returning for z in zones_data: for cell in z.get("cells", []):