Fix IPA spacing + add zone debug logging for marker column issue
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 55s
CI / test-go-edu-search (push) Successful in 49s
CI / test-python-klausur (push) Failing after 2m48s
CI / test-python-agent-core (push) Successful in 32s
CI / test-nodejs-website (push) Successful in 37s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 55s
CI / test-go-edu-search (push) Successful in 49s
CI / test-python-klausur (push) Failing after 2m48s
CI / test-python-agent-core (push) Successful in 32s
CI / test-nodejs-website (push) Successful in 37s
1. Ensure space before IPA brackets in cell text: "word[ipa]" → "word [ipa]" Applied as final cleanup in grid-build finalization. 2. Add debug logging for zone-word assignment to diagnose why marker column cells are empty despite correct column detection. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -347,6 +347,13 @@ async def _build_grid_core(
|
|||||||
zone_words = _words_in_zone(
|
zone_words = _words_in_zone(
|
||||||
all_words, pz.y, pz.height, pz.x, pz.width
|
all_words, pz.y, pz.height, pz.x, pz.width
|
||||||
)
|
)
|
||||||
|
if pz.zone_type == "content":
|
||||||
|
logger.info(
|
||||||
|
"build-grid zone %d (%s): bounds x=%d..%d y=%d..%d → %d/%d words",
|
||||||
|
pz.index, pz.zone_type,
|
||||||
|
pz.x, pz.x + pz.width, pz.y, pz.y + pz.height,
|
||||||
|
len(zone_words), len(all_words),
|
||||||
|
)
|
||||||
# Filter recovered single-char artifacts in ALL zones
|
# Filter recovered single-char artifacts in ALL zones
|
||||||
# (decorative colored pixel blobs like !, ?, • from
|
# (decorative colored pixel blobs like !, ?, • from
|
||||||
# recover_colored_text that don't represent real text)
|
# recover_colored_text that don't represent real text)
|
||||||
@@ -1710,6 +1717,16 @@ async def _build_grid_core(
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# --- Ensure space before IPA brackets: "word[ipa]" → "word [ipa]" ---
|
||||||
|
_IPA_NOSPACE_RE = re.compile(r'([a-zA-ZäöüÄÖÜß])(\[[^\]]*[ˈˌːɑɒæɛəɜɪɔʊʌðŋθʃʒɹɡɾ][^\]]*\])')
|
||||||
|
for z in zones_data:
|
||||||
|
for cell in z.get("cells", []):
|
||||||
|
text = cell.get("text", "")
|
||||||
|
if text and "[" in text:
|
||||||
|
fixed = _IPA_NOSPACE_RE.sub(r'\1 \2', text)
|
||||||
|
if fixed != text:
|
||||||
|
cell["text"] = fixed
|
||||||
|
|
||||||
# Clean up internal flags before returning
|
# Clean up internal flags before returning
|
||||||
for z in zones_data:
|
for z in zones_data:
|
||||||
for cell in z.get("cells", []):
|
for cell in z.get("cells", []):
|
||||||
|
|||||||
Reference in New Issue
Block a user