diff --git a/klausur-service/backend/cv_syllable_detect.py b/klausur-service/backend/cv_syllable_detect.py index 42fb162..9057afc 100644 --- a/klausur-service/backend/cv_syllable_detect.py +++ b/klausur-service/backend/cv_syllable_detect.py @@ -200,10 +200,9 @@ def insert_syllable_dividers( For dictionary pages: process all content column cells, strip existing pipes, merge pipe-gap spaces, and re-syllabify using pyphen. - Pre-check: at least 5% of content cells must already contain ``|`` from - OCR. This guards against false-positive dictionary detection on pages - like synonym dictionaries or alphabetical word lists that have no actual - syllable divider lines. + Pre-check: at least 1% of content cells must already contain ``|`` from + OCR. This guards against pages with zero pipe characters (the primary + guard — article_col_index — is checked at the call site). Returns the number of cells modified. """ @@ -227,10 +226,10 @@ def insert_syllable_dividers( if total_col_cells > 0: pipe_ratio = cells_with_pipes / total_col_cells - if pipe_ratio < 0.05: + if pipe_ratio < 0.01: logger.info( "build-grid session %s: skipping syllable insertion — " - "only %.1f%% of cells have existing pipes (need >=5%%)", + "only %.1f%% of cells have existing pipes (need >=1%%)", session_id, pipe_ratio * 100, ) return 0