Add IPA and syllable mode toggles, fix false IPA on German documents

Backend: Remove en_col_type fallback heuristic (longest avg text) that incorrectly identified German columns as English. IPA now only applied when OCR bracket patterns are actually found. Add ipa_mode (auto/all/none) and syllable_mode (auto/all/none) query params to build-grid API. Frontend: Add IPA and Silben dropdown selects to GridToolbar. Modes are passed as query params on rebuild. Auto = current smart detection, All = force for all words, Aus = skip entirely. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-25 08:04:44 +01:00
parent c42924a94a
commit 34680732f8
6 changed files with 165 additions and 55 deletions
--- a/klausur-service/backend/cv_syllable_detect.py
+++ b/klausur-service/backend/cv_syllable_detect.py
@@ -194,6 +194,8 @@ def insert_syllable_dividers(
    zones_data: List[Dict],
    img_bgr: np.ndarray,
    session_id: str,
+    *,
+    force: bool = False,
 ) -> int:
    """Insert pipe syllable dividers into dictionary cells.

@@ -204,6 +206,10 @@ def insert_syllable_dividers(
    OCR.  This guards against pages with zero pipe characters (the primary
    guard — article_col_index — is checked at the call site).

+    Args:
+        force: If True, skip the pipe-ratio pre-check and syllabify all
+            content words regardless of whether the original has pipe dividers.
+
    Returns the number of cells modified.
    """
    hyph_de, hyph_en = _get_hyphenators()
@@ -215,24 +221,25 @@ def insert_syllable_dividers(
    # Real dictionary pages with printed syllable dividers will have OCR-
    # detected pipes in many cells.  Pages without syllable dividers will
    # have zero — skip those to avoid false syllabification.
-    total_col_cells = 0
-    cells_with_pipes = 0
-    for z in zones_data:
-        for cell in z.get("cells", []):
-            if cell.get("col_type", "").startswith("column_"):
-                total_col_cells += 1
-                if "|" in cell.get("text", ""):
-                    cells_with_pipes += 1
+    if not force:
+        total_col_cells = 0
+        cells_with_pipes = 0
+        for z in zones_data:
+            for cell in z.get("cells", []):
+                if cell.get("col_type", "").startswith("column_"):
+                    total_col_cells += 1
+                    if "|" in cell.get("text", ""):
+                        cells_with_pipes += 1

-    if total_col_cells > 0:
-        pipe_ratio = cells_with_pipes / total_col_cells
-        if pipe_ratio < 0.01:
-            logger.info(
-                "build-grid session %s: skipping syllable insertion — "
-                "only %.1f%% of cells have existing pipes (need >=1%%)",
-                session_id, pipe_ratio * 100,
-            )
-            return 0
+        if total_col_cells > 0:
+            pipe_ratio = cells_with_pipes / total_col_cells
+            if pipe_ratio < 0.01:
+                logger.info(
+                    "build-grid session %s: skipping syllable insertion — "
+                    "only %.1f%% of cells have existing pipes (need >=1%%)",
+                    session_id, pipe_ratio * 100,
+                )
+                return 0

    insertions = 0
    for z in zones_data: