Add language-specific IPA and syllable modes (de/en)

Extend ipa_mode and syllable_mode toggles with language options: - auto: smart detection (default) - en: only English headword column - de: only German definition columns - all: all content columns - none: skip entirely Also improve English column auto-detection: use garbled IPA patterns (apostrophes, colons) in addition to bracket patterns. This correctly identifies English dictionary pages where OCR produces garbled ASCII instead of bracket IPA. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-25 08:16:29 +01:00
parent 34680732f8
commit 83c058e400
4 changed files with 68 additions and 31 deletions
--- a/klausur-service/backend/cv_syllable_detect.py
+++ b/klausur-service/backend/cv_syllable_detect.py
@@ -196,6 +196,7 @@ def insert_syllable_dividers(
    session_id: str,
    *,
    force: bool = False,
+    col_filter: Optional[set] = None,
 ) -> int:
    """Insert pipe syllable dividers into dictionary cells.

@@ -209,6 +210,8 @@ def insert_syllable_dividers(
    Args:
        force: If True, skip the pipe-ratio pre-check and syllabify all
            content words regardless of whether the original has pipe dividers.
+        col_filter: If set, only process cells whose col_type is in this set.
+            None means process all content columns.

    Returns the number of cells modified.
    """
@@ -247,6 +250,8 @@ def insert_syllable_dividers(
            ct = cell.get("col_type", "")
            if not ct.startswith("column_"):
                continue
+            if col_filter is not None and ct not in col_filter:
+                continue
            text = cell.get("text", "")
            if not text:
                continue