diff --git a/admin-lehrer/components/grid-editor/GridToolbar.tsx b/admin-lehrer/components/grid-editor/GridToolbar.tsx index 0113f15..cddbc53 100644 --- a/admin-lehrer/components/grid-editor/GridToolbar.tsx +++ b/admin-lehrer/components/grid-editor/GridToolbar.tsx @@ -21,12 +21,16 @@ interface GridToolbarProps { const IPA_LABELS: Record = { auto: 'IPA: Auto', + en: 'IPA: nur EN', + de: 'IPA: nur DE', all: 'IPA: Alle', none: 'IPA: Aus', } const SYLLABLE_LABELS: Record = { auto: 'Silben: Original', + en: 'Silben: nur EN', + de: 'Silben: nur DE', all: 'Silben: Alle', none: 'Silben: Aus', } diff --git a/admin-lehrer/components/grid-editor/useGridEditor.ts b/admin-lehrer/components/grid-editor/useGridEditor.ts index 84a4b69..83e5c10 100644 --- a/admin-lehrer/components/grid-editor/useGridEditor.ts +++ b/admin-lehrer/components/grid-editor/useGridEditor.ts @@ -14,8 +14,8 @@ export interface GridEditorState { selectedZone: number | null } -export type IpaMode = 'auto' | 'all' | 'none' -export type SyllableMode = 'auto' | 'all' | 'none' +export type IpaMode = 'auto' | 'all' | 'de' | 'en' | 'none' +export type SyllableMode = 'auto' | 'all' | 'de' | 'en' | 'none' export function useGridEditor(sessionId: string | null) { const [grid, setGrid] = useState(null) diff --git a/klausur-service/backend/cv_syllable_detect.py b/klausur-service/backend/cv_syllable_detect.py index e86ef09..cc43cf9 100644 --- a/klausur-service/backend/cv_syllable_detect.py +++ b/klausur-service/backend/cv_syllable_detect.py @@ -196,6 +196,7 @@ def insert_syllable_dividers( session_id: str, *, force: bool = False, + col_filter: Optional[set] = None, ) -> int: """Insert pipe syllable dividers into dictionary cells. @@ -209,6 +210,8 @@ def insert_syllable_dividers( Args: force: If True, skip the pipe-ratio pre-check and syllabify all content words regardless of whether the original has pipe dividers. + col_filter: If set, only process cells whose col_type is in this set. + None means process all content columns. Returns the number of cells modified. """ @@ -247,6 +250,8 @@ def insert_syllable_dividers( ct = cell.get("col_type", "") if not ct.startswith("column_"): continue + if col_filter is not None and ct not in col_filter: + continue text = cell.get("text", "") if not text: continue diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index cc190c0..82c04ad 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -80,9 +80,11 @@ async def _build_grid_core( session_id: Session identifier (for logging and image loading). session: Full session dict from get_session_db(). ipa_mode: "auto" (only when English headwords detected), "all" - (force IPA on all content columns), or "none" (skip IPA entirely). + (force IPA on all content columns), "en" (English column only), + "de" (German/definition columns only), or "none" (skip entirely). syllable_mode: "auto" (only when original has pipe dividers), - "all" (force syllabification on all words), or "none" (skip). + "all" (force syllabification on all words), "en" (English only), + "de" (German only), or "none" (skip). Returns: StructuredGrid result dict. @@ -869,32 +871,51 @@ async def _build_grid_core( all_cells = [cell for z in zones_data for cell in z.get("cells", [])] total_cols = sum(len(z.get("columns", [])) for z in zones_data) en_col_type = None + ipa_target_cols: set = set() + all_content_cols: set = set() skip_ipa = (ipa_mode == "none") if not skip_ipa and total_cols >= 3: - # Find the column that contains IPA brackets → English headwords. - # Count cells with bracket patterns per col_type. The column with - # the most brackets is the headword column (IPA sits after or below - # headwords). - col_bracket_count: Dict[str, int] = {} + # Detect English headword column via IPA signals (brackets or garbled). + col_ipa_count: Dict[str, int] = {} + all_content_cols: set = set() for cell in all_cells: ct = cell.get("col_type", "") + if not ct.startswith("column_"): + continue txt = cell.get("text", "") or "" - if ct.startswith("column_") and '[' in txt: - col_bracket_count[ct] = col_bracket_count.get(ct, 0) + 1 - # Pick column with most bracket IPA patterns. - # ipa_mode="auto": only when OCR already found bracket IPA (no fallback). - # ipa_mode="all": fallback to headword_col_index from dictionary detection. - if col_bracket_count: - en_col_type = max(col_bracket_count, key=col_bracket_count.get) - elif ipa_mode == "all": - # Force IPA: use headword column from dictionary detection - hw_idx = dict_detection.get("headword_col_index") - if hw_idx is not None: - en_col_type = f"column_{hw_idx + 1}" - if en_col_type: + if txt.strip(): + all_content_cols.add(ct) + if '[' in txt or _text_has_garbled_ipa(txt): + col_ipa_count[ct] = col_ipa_count.get(ct, 0) + 1 + if col_ipa_count: + en_col_type = max(col_ipa_count, key=col_ipa_count.get) + elif ipa_mode in ("all", "de", "en"): + # Force mode without auto-detection: pick column with most cells + col_cell_count: Dict[str, int] = {} for cell in all_cells: - if cell.get("col_type") == en_col_type: - cell["_orig_col_type"] = en_col_type + ct = cell.get("col_type", "") + if ct.startswith("column_") and (cell.get("text") or "").strip(): + col_cell_count[ct] = col_cell_count.get(ct, 0) + 1 + if col_cell_count: + en_col_type = max(col_cell_count, key=col_cell_count.get) + + # Decide which columns to process based on ipa_mode: + # auto/en: only the detected EN headword column + # de: all content columns EXCEPT the EN column + # all: all content columns + ipa_target_cols: set = set() + if ipa_mode in ("auto", "en"): + if en_col_type: + ipa_target_cols.add(en_col_type) + elif ipa_mode == "de": + ipa_target_cols = all_content_cols - {en_col_type} if en_col_type else all_content_cols + elif ipa_mode == "all": + ipa_target_cols = all_content_cols + + if ipa_target_cols: + for cell in all_cells: + if cell.get("col_type") in ipa_target_cols: + cell["_orig_col_type"] = cell["col_type"] cell["col_type"] = "column_en" # Snapshot text before IPA fix to detect which cells were modified _pre_ipa = {id(c): c.get("text", "") for c in all_cells} @@ -1476,24 +1497,31 @@ async def _build_grid_core( # --- Syllable divider insertion for dictionary pages --- # syllable_mode: "auto" = only when original has pipe dividers (1% threshold), - # "all" = force syllabification on all content words, - # "none" = skip entirely. + # "all" = force on all content words, "en" = English column only, + # "de" = German columns only, "none" = skip entirely. syllable_insertions = 0 if syllable_mode != "none" and img_bgr is not None: _syllable_eligible = False - if syllable_mode == "all": + if syllable_mode in ("all", "de", "en"): _syllable_eligible = True elif (dict_detection.get("is_dictionary") and dict_detection.get("article_col_index") is not None): # auto: only on dictionary pages with article columns _syllable_eligible = True + # For language-specific modes, determine allowed columns + _syllable_col_filter: Optional[set] = None # None = all columns + if syllable_mode == "en" and en_col_type: + _syllable_col_filter = {en_col_type} + elif syllable_mode == "de" and en_col_type: + _syllable_col_filter = all_content_cols - {en_col_type} if total_cols >= 3 else None if _syllable_eligible: try: from cv_syllable_detect import insert_syllable_dividers - force_syllables = (syllable_mode == "all") + force_syllables = (syllable_mode in ("all", "de", "en")) syllable_insertions = insert_syllable_dividers( zones_data, img_bgr, session_id, force=force_syllables, + col_filter=_syllable_col_filter, ) except Exception as e: logger.warning("Syllable insertion failed: %s", e) @@ -1538,7 +1566,7 @@ async def _build_grid_core( "processing_modes": { "ipa_mode": ipa_mode, "syllable_mode": syllable_mode, - "ipa_applied": en_col_type is not None and not skip_ipa, + "ipa_applied": bool(ipa_target_cols) if not skip_ipa else False, "syllables_applied": syllable_insertions > 0, }, "duration_seconds": round(duration, 2), @@ -1554,8 +1582,8 @@ async def _build_grid_core( @router.post("/sessions/{session_id}/build-grid") async def build_grid( session_id: str, - ipa_mode: str = Query("auto", pattern="^(auto|all|none)$"), - syllable_mode: str = Query("auto", pattern="^(auto|all|none)$"), + ipa_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"), + syllable_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"), ): """Build a structured, zone-aware grid from existing Kombi word results.