feat(ocr-pipeline): British/American IPA pronunciation choice
- Integrate Britfone dictionary (MIT, 15k British English IPA entries) - Add pronunciation parameter: 'british' (default) or 'american' - British uses Britfone (Received Pronunciation), falls back to CMU - American uses eng_to_ipa/CMU, falls back to Britfone - Frontend: dropdown to switch pronunciation, default = British - API: ?pronunciation=british|american query parameter Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1007,11 +1007,12 @@ async def get_row_ground_truth(session_id: str):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/words")
|
||||
async def detect_words(session_id: str, engine: str = "auto"):
|
||||
async def detect_words(session_id: str, engine: str = "auto", pronunciation: str = "british"):
|
||||
"""Build word grid from columns × rows, OCR each cell.
|
||||
|
||||
Query params:
|
||||
engine: 'auto' (default), 'tesseract', or 'rapid'
|
||||
pronunciation: 'british' (default) or 'american' — for IPA dictionary lookup
|
||||
"""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
@@ -1068,6 +1069,7 @@ async def detect_words(session_id: str, engine: str = "auto"):
|
||||
entries = build_word_grid(
|
||||
ocr_img, col_regions, row_geoms, img_w, img_h,
|
||||
ocr_engine=engine, img_bgr=dewarped_bgr,
|
||||
pronunciation=pronunciation,
|
||||
)
|
||||
duration = time.time() - t0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user