Fix IPA spacing + add zone debug logging for marker column issue
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 55s
CI / test-go-edu-search (push) Successful in 49s
CI / test-python-klausur (push) Failing after 2m48s
CI / test-python-agent-core (push) Successful in 32s
CI / test-nodejs-website (push) Successful in 37s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 55s
CI / test-go-edu-search (push) Successful in 49s
CI / test-python-klausur (push) Failing after 2m48s
CI / test-python-agent-core (push) Successful in 32s
CI / test-nodejs-website (push) Successful in 37s
1. Ensure space before IPA brackets in cell text: "word[ipa]" → "word [ipa]" Applied as final cleanup in grid-build finalization. 2. Add debug logging for zone-word assignment to diagnose why marker column cells are empty despite correct column detection. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -347,6 +347,13 @@ async def _build_grid_core(
|
||||
zone_words = _words_in_zone(
|
||||
all_words, pz.y, pz.height, pz.x, pz.width
|
||||
)
|
||||
if pz.zone_type == "content":
|
||||
logger.info(
|
||||
"build-grid zone %d (%s): bounds x=%d..%d y=%d..%d → %d/%d words",
|
||||
pz.index, pz.zone_type,
|
||||
pz.x, pz.x + pz.width, pz.y, pz.y + pz.height,
|
||||
len(zone_words), len(all_words),
|
||||
)
|
||||
# Filter recovered single-char artifacts in ALL zones
|
||||
# (decorative colored pixel blobs like !, ?, • from
|
||||
# recover_colored_text that don't represent real text)
|
||||
@@ -1710,6 +1717,16 @@ async def _build_grid_core(
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# --- Ensure space before IPA brackets: "word[ipa]" → "word [ipa]" ---
|
||||
_IPA_NOSPACE_RE = re.compile(r'([a-zA-ZäöüÄÖÜß])(\[[^\]]*[ˈˌːɑɒæɛəɜɪɔʊʌðŋθʃʒɹɡɾ][^\]]*\])')
|
||||
for z in zones_data:
|
||||
for cell in z.get("cells", []):
|
||||
text = cell.get("text", "")
|
||||
if text and "[" in text:
|
||||
fixed = _IPA_NOSPACE_RE.sub(r'\1 \2', text)
|
||||
if fixed != text:
|
||||
cell["text"] = fixed
|
||||
|
||||
# Clean up internal flags before returning
|
||||
for z in zones_data:
|
||||
for cell in z.get("cells", []):
|
||||
|
||||
Reference in New Issue
Block a user