feat: IPA-Lautschrift in Cell-Texte einfuegen (fuer Overlay-Modus)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 34s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m5s
CI / test-python-agent-core (push) Successful in 23s
CI / test-nodejs-website (push) Successful in 22s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 34s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m5s
CI / test-python-agent-core (push) Successful in 23s
CI / test-nodejs-website (push) Successful in 22s
fix_cell_phonetics() ersetzt fehlerhafte IPA-Klammern UND fuegt fehlende Lautschrift fuer englische Woerter ein (z.B. badge, film, challenge, profit). Wird auf alle Zellen mit col_type column_en/column_text angewandt. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,6 +43,7 @@ from cv_vocab_pipeline import (
|
||||
_detect_sub_columns,
|
||||
_fix_character_confusion,
|
||||
_fix_phonetic_brackets,
|
||||
fix_cell_phonetics,
|
||||
analyze_layout,
|
||||
analyze_layout_by_words,
|
||||
build_cell_grid,
|
||||
@@ -2030,6 +2031,9 @@ async def detect_words(
|
||||
# Determine which engine was actually used
|
||||
used_engine = cells[0].get("ocr_engine", "tesseract") if cells else engine
|
||||
|
||||
# Apply IPA phonetic fixes directly to cell texts (for overlay mode)
|
||||
fix_cell_phonetics(cells, pronunciation=pronunciation)
|
||||
|
||||
# Grid result (always generic)
|
||||
word_result = {
|
||||
"cells": cells,
|
||||
@@ -2169,11 +2173,14 @@ async def _word_batch_stream_generator(
|
||||
logger.info(f"SSE batch: client disconnected after OCR for {session_id}")
|
||||
return
|
||||
|
||||
# 4. Send columns meta
|
||||
# 4. Apply IPA phonetic fixes directly to cell texts (for overlay mode)
|
||||
fix_cell_phonetics(cells, pronunciation=pronunciation)
|
||||
|
||||
# 5. Send columns meta
|
||||
if columns_meta:
|
||||
yield f"data: {json.dumps({'type': 'columns', 'columns_used': columns_meta})}\n\n"
|
||||
|
||||
# 5. Stream all cells
|
||||
# 6. Stream all cells
|
||||
for idx, cell in enumerate(cells):
|
||||
cell_event = {
|
||||
"type": "cell",
|
||||
@@ -2323,6 +2330,9 @@ async def _word_stream_generator(
|
||||
|
||||
used_engine = all_cells[0].get("ocr_engine", "tesseract") if all_cells else engine
|
||||
|
||||
# Apply IPA phonetic fixes directly to cell texts (for overlay mode)
|
||||
fix_cell_phonetics(all_cells, pronunciation=pronunciation)
|
||||
|
||||
word_result = {
|
||||
"cells": all_cells,
|
||||
"grid_shape": {
|
||||
@@ -3996,6 +4006,9 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
|
||||
n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
|
||||
used_engine = cells[0].get("ocr_engine", "tesseract") if cells else req.ocr_engine
|
||||
|
||||
# Apply IPA phonetic fixes directly to cell texts
|
||||
fix_cell_phonetics(cells, pronunciation=req.pronunciation)
|
||||
|
||||
word_result_data = {
|
||||
"cells": cells,
|
||||
"grid_shape": {
|
||||
|
||||
Reference in New Issue
Block a user