Add IPA and syllable mode toggles, fix false IPA on German documents
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 2m1s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 15s

Backend: Remove en_col_type fallback heuristic (longest avg text) that
incorrectly identified German columns as English. IPA now only applied
when OCR bracket patterns are actually found. Add ipa_mode (auto/all/none)
and syllable_mode (auto/all/none) query params to build-grid API.

Frontend: Add IPA and Silben dropdown selects to GridToolbar. Modes
are passed as query params on rebuild. Auto = current smart detection,
All = force for all words, Aus = skip entirely.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-25 08:04:44 +01:00
parent c42924a94a
commit 34680732f8
6 changed files with 165 additions and 55 deletions

View File

@@ -36,6 +36,10 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
addColumn, addColumn,
deleteRow, deleteRow,
addRow, addRow,
ipaMode,
setIpaMode,
syllableMode,
setSyllableMode,
} = useGridEditor(sessionId) } = useGridEditor(sessionId)
const [showOverlay, setShowOverlay] = useState(false) const [showOverlay, setShowOverlay] = useState(false)
@@ -183,11 +187,15 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
canUndo={canUndo} canUndo={canUndo}
canRedo={canRedo} canRedo={canRedo}
showOverlay={showOverlay} showOverlay={showOverlay}
ipaMode={ipaMode}
syllableMode={syllableMode}
onSave={saveGrid} onSave={saveGrid}
onUndo={undo} onUndo={undo}
onRedo={redo} onRedo={redo}
onRebuild={buildGrid} onRebuild={buildGrid}
onToggleOverlay={() => setShowOverlay(!showOverlay)} onToggleOverlay={() => setShowOverlay(!showOverlay)}
onIpaModeChange={setIpaMode}
onSyllableModeChange={setSyllableMode}
/> />
</div> </div>

View File

@@ -1,16 +1,34 @@
'use client' 'use client'
import type { IpaMode, SyllableMode } from './useGridEditor'
interface GridToolbarProps { interface GridToolbarProps {
dirty: boolean dirty: boolean
saving: boolean saving: boolean
canUndo: boolean canUndo: boolean
canRedo: boolean canRedo: boolean
showOverlay: boolean showOverlay: boolean
ipaMode: IpaMode
syllableMode: SyllableMode
onSave: () => void onSave: () => void
onUndo: () => void onUndo: () => void
onRedo: () => void onRedo: () => void
onRebuild: () => void onRebuild: () => void
onToggleOverlay: () => void onToggleOverlay: () => void
onIpaModeChange: (mode: IpaMode) => void
onSyllableModeChange: (mode: SyllableMode) => void
}
const IPA_LABELS: Record<IpaMode, string> = {
auto: 'IPA: Auto',
all: 'IPA: Alle',
none: 'IPA: Aus',
}
const SYLLABLE_LABELS: Record<SyllableMode, string> = {
auto: 'Silben: Original',
all: 'Silben: Alle',
none: 'Silben: Aus',
} }
export function GridToolbar({ export function GridToolbar({
@@ -19,11 +37,15 @@ export function GridToolbar({
canUndo, canUndo,
canRedo, canRedo,
showOverlay, showOverlay,
ipaMode,
syllableMode,
onSave, onSave,
onUndo, onUndo,
onRedo, onRedo,
onRebuild, onRebuild,
onToggleOverlay, onToggleOverlay,
onIpaModeChange,
onSyllableModeChange,
}: GridToolbarProps) { }: GridToolbarProps) {
return ( return (
<div className="flex items-center gap-2 flex-wrap"> <div className="flex items-center gap-2 flex-wrap">
@@ -67,6 +89,30 @@ export function GridToolbar({
Bild-Overlay Bild-Overlay
</button> </button>
{/* IPA mode */}
<select
value={ipaMode}
onChange={(e) => onIpaModeChange(e.target.value as IpaMode)}
className="px-2 py-1.5 text-xs rounded-md border border-gray-200 dark:border-gray-700 bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400"
title="Lautschrift (IPA): Auto = nur bei erkannten englischen Woertern, Alle = fuer alle Vokabeln, Aus = keine"
>
{(Object.keys(IPA_LABELS) as IpaMode[]).map((m) => (
<option key={m} value={m}>{IPA_LABELS[m]}</option>
))}
</select>
{/* Syllable mode */}
<select
value={syllableMode}
onChange={(e) => onSyllableModeChange(e.target.value as SyllableMode)}
className="px-2 py-1.5 text-xs rounded-md border border-gray-200 dark:border-gray-700 bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400"
title="Silbentrennung: Original = nur wo im Scan vorhanden, Alle = fuer alle Woerter, Aus = keine"
>
{(Object.keys(SYLLABLE_LABELS) as SyllableMode[]).map((m) => (
<option key={m} value={m}>{SYLLABLE_LABELS[m]}</option>
))}
</select>
{/* Rebuild */} {/* Rebuild */}
<button <button
onClick={onRebuild} onClick={onRebuild}

View File

@@ -14,6 +14,9 @@ export interface GridEditorState {
selectedZone: number | null selectedZone: number | null
} }
export type IpaMode = 'auto' | 'all' | 'none'
export type SyllableMode = 'auto' | 'all' | 'none'
export function useGridEditor(sessionId: string | null) { export function useGridEditor(sessionId: string | null) {
const [grid, setGrid] = useState<StructuredGrid | null>(null) const [grid, setGrid] = useState<StructuredGrid | null>(null)
const [loading, setLoading] = useState(false) const [loading, setLoading] = useState(false)
@@ -22,6 +25,8 @@ export function useGridEditor(sessionId: string | null) {
const [dirty, setDirty] = useState(false) const [dirty, setDirty] = useState(false)
const [selectedCell, setSelectedCell] = useState<string | null>(null) const [selectedCell, setSelectedCell] = useState<string | null>(null)
const [selectedZone, setSelectedZone] = useState<number | null>(null) const [selectedZone, setSelectedZone] = useState<number | null>(null)
const [ipaMode, setIpaMode] = useState<IpaMode>('auto')
const [syllableMode, setSyllableMode] = useState<SyllableMode>('auto')
// Undo/redo stacks store serialized zone arrays // Undo/redo stacks store serialized zone arrays
const undoStack = useRef<string[]>([]) const undoStack = useRef<string[]>([])
@@ -44,8 +49,11 @@ export function useGridEditor(sessionId: string | null) {
setLoading(true) setLoading(true)
setError(null) setError(null)
try { try {
const params = new URLSearchParams()
params.set('ipa_mode', ipaMode)
params.set('syllable_mode', syllableMode)
const res = await fetch( const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`, `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid?${params}`,
{ method: 'POST' }, { method: 'POST' },
) )
if (!res.ok) { if (!res.ok) {
@@ -62,7 +70,7 @@ export function useGridEditor(sessionId: string | null) {
} finally { } finally {
setLoading(false) setLoading(false)
} }
}, [sessionId]) }, [sessionId, ipaMode, syllableMode])
const loadGrid = useCallback(async () => { const loadGrid = useCallback(async () => {
if (!sessionId) return if (!sessionId) return
@@ -915,5 +923,9 @@ export function useGridEditor(sessionId: string | null) {
toggleSelectedBold, toggleSelectedBold,
autoCorrectColumnPatterns, autoCorrectColumnPatterns,
setCellColor, setCellColor,
ipaMode,
setIpaMode,
syllableMode,
setSyllableMode,
} }
} }

View File

@@ -57,6 +57,10 @@ export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewPro
toggleSelectedBold, toggleSelectedBold,
autoCorrectColumnPatterns, autoCorrectColumnPatterns,
setCellColor, setCellColor,
ipaMode,
setIpaMode,
syllableMode,
setSyllableMode,
} = useGridEditor(sessionId) } = useGridEditor(sessionId)
const [showImage, setShowImage] = useState(true) const [showImage, setShowImage] = useState(true)
@@ -283,11 +287,15 @@ export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewPro
canUndo={canUndo} canUndo={canUndo}
canRedo={canRedo} canRedo={canRedo}
showOverlay={false} showOverlay={false}
ipaMode={ipaMode}
syllableMode={syllableMode}
onSave={saveGrid} onSave={saveGrid}
onUndo={undo} onUndo={undo}
onRedo={redo} onRedo={redo}
onRebuild={buildGrid} onRebuild={buildGrid}
onToggleOverlay={() => setShowImage(!showImage)} onToggleOverlay={() => setShowImage(!showImage)}
onIpaModeChange={setIpaMode}
onSyllableModeChange={setSyllableMode}
/> />
</div> </div>

View File

@@ -194,6 +194,8 @@ def insert_syllable_dividers(
zones_data: List[Dict], zones_data: List[Dict],
img_bgr: np.ndarray, img_bgr: np.ndarray,
session_id: str, session_id: str,
*,
force: bool = False,
) -> int: ) -> int:
"""Insert pipe syllable dividers into dictionary cells. """Insert pipe syllable dividers into dictionary cells.
@@ -204,6 +206,10 @@ def insert_syllable_dividers(
OCR. This guards against pages with zero pipe characters (the primary OCR. This guards against pages with zero pipe characters (the primary
guard — article_col_index — is checked at the call site). guard — article_col_index — is checked at the call site).
Args:
force: If True, skip the pipe-ratio pre-check and syllabify all
content words regardless of whether the original has pipe dividers.
Returns the number of cells modified. Returns the number of cells modified.
""" """
hyph_de, hyph_en = _get_hyphenators() hyph_de, hyph_en = _get_hyphenators()
@@ -215,24 +221,25 @@ def insert_syllable_dividers(
# Real dictionary pages with printed syllable dividers will have OCR- # Real dictionary pages with printed syllable dividers will have OCR-
# detected pipes in many cells. Pages without syllable dividers will # detected pipes in many cells. Pages without syllable dividers will
# have zero — skip those to avoid false syllabification. # have zero — skip those to avoid false syllabification.
total_col_cells = 0 if not force:
cells_with_pipes = 0 total_col_cells = 0
for z in zones_data: cells_with_pipes = 0
for cell in z.get("cells", []): for z in zones_data:
if cell.get("col_type", "").startswith("column_"): for cell in z.get("cells", []):
total_col_cells += 1 if cell.get("col_type", "").startswith("column_"):
if "|" in cell.get("text", ""): total_col_cells += 1
cells_with_pipes += 1 if "|" in cell.get("text", ""):
cells_with_pipes += 1
if total_col_cells > 0: if total_col_cells > 0:
pipe_ratio = cells_with_pipes / total_col_cells pipe_ratio = cells_with_pipes / total_col_cells
if pipe_ratio < 0.01: if pipe_ratio < 0.01:
logger.info( logger.info(
"build-grid session %s: skipping syllable insertion — " "build-grid session %s: skipping syllable insertion — "
"only %.1f%% of cells have existing pipes (need >=1%%)", "only %.1f%% of cells have existing pipes (need >=1%%)",
session_id, pipe_ratio * 100, session_id, pipe_ratio * 100,
) )
return 0 return 0
insertions = 0 insertions = 0
for z in zones_data: for z in zones_data:

View File

@@ -18,7 +18,7 @@ from typing import Any, Dict, List, Optional, Tuple
import cv2 import cv2
import numpy as np import numpy as np
from fastapi import APIRouter, HTTPException, Request from fastapi import APIRouter, HTTPException, Query, Request
from cv_box_detect import detect_boxes, split_page_into_zones from cv_box_detect import detect_boxes, split_page_into_zones
from cv_graphic_detect import detect_graphic_elements from cv_graphic_detect import detect_graphic_elements
@@ -67,12 +67,22 @@ router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["grid-editor"])
# Core computation (used by build-grid endpoint and regression tests) # Core computation (used by build-grid endpoint and regression tests)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
async def _build_grid_core(session_id: str, session: dict) -> dict: async def _build_grid_core(
session_id: str,
session: dict,
*,
ipa_mode: str = "auto",
syllable_mode: str = "auto",
) -> dict:
"""Core grid building logic — pure computation, no HTTP or DB side effects. """Core grid building logic — pure computation, no HTTP or DB side effects.
Args: Args:
session_id: Session identifier (for logging and image loading). session_id: Session identifier (for logging and image loading).
session: Full session dict from get_session_db(). session: Full session dict from get_session_db().
ipa_mode: "auto" (only when English headwords detected), "all"
(force IPA on all content columns), or "none" (skip IPA entirely).
syllable_mode: "auto" (only when original has pipe dividers),
"all" (force syllabification on all words), or "none" (skip).
Returns: Returns:
StructuredGrid result dict. StructuredGrid result dict.
@@ -859,32 +869,28 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
all_cells = [cell for z in zones_data for cell in z.get("cells", [])] all_cells = [cell for z in zones_data for cell in z.get("cells", [])]
total_cols = sum(len(z.get("columns", [])) for z in zones_data) total_cols = sum(len(z.get("columns", [])) for z in zones_data)
en_col_type = None en_col_type = None
if total_cols >= 3: skip_ipa = (ipa_mode == "none")
if not skip_ipa and total_cols >= 3:
# Find the column that contains IPA brackets → English headwords. # Find the column that contains IPA brackets → English headwords.
# Count cells with bracket patterns per col_type. The column with # Count cells with bracket patterns per col_type. The column with
# the most brackets is the headword column (IPA sits after or below # the most brackets is the headword column (IPA sits after or below
# headwords). Falls back to longest-average if no brackets found. # headwords).
col_bracket_count: Dict[str, int] = {} col_bracket_count: Dict[str, int] = {}
col_avg_len: Dict[str, List[int]] = {}
for cell in all_cells: for cell in all_cells:
ct = cell.get("col_type", "") ct = cell.get("col_type", "")
txt = cell.get("text", "") or "" txt = cell.get("text", "") or ""
col_avg_len.setdefault(ct, []).append(len(txt))
if ct.startswith("column_") and '[' in txt: if ct.startswith("column_") and '[' in txt:
col_bracket_count[ct] = col_bracket_count.get(ct, 0) + 1 col_bracket_count[ct] = col_bracket_count.get(ct, 0) + 1
# Pick column with most bracket IPA patterns # Pick column with most bracket IPA patterns.
# ipa_mode="auto": only when OCR already found bracket IPA (no fallback).
# ipa_mode="all": fallback to headword_col_index from dictionary detection.
if col_bracket_count: if col_bracket_count:
en_col_type = max(col_bracket_count, key=col_bracket_count.get) en_col_type = max(col_bracket_count, key=col_bracket_count.get)
else: elif ipa_mode == "all":
# Fallback: longest average text # Force IPA: use headword column from dictionary detection
best_avg = 0 hw_idx = dict_detection.get("headword_col_index")
for ct, lengths in col_avg_len.items(): if hw_idx is not None:
if not ct.startswith("column_"): en_col_type = f"column_{hw_idx + 1}"
continue
avg = sum(lengths) / len(lengths) if lengths else 0
if avg > best_avg:
best_avg = avg
en_col_type = ct
if en_col_type: if en_col_type:
for cell in all_cells: for cell in all_cells:
if cell.get("col_type") == en_col_type: if cell.get("col_type") == en_col_type:
@@ -912,7 +918,7 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
# the EN headword column may not be the longest-average column. # the EN headword column may not be the longest-average column.
_REAL_IPA_CHARS = set("ˈˌəɪɛɒʊʌæɑɔʃʒθðŋ") _REAL_IPA_CHARS = set("ˈˌəɪɛɒʊʌæɑɔʃʒθðŋ")
ipa_cont_fixed = 0 ipa_cont_fixed = 0
for z in zones_data: for z in ([] if skip_ipa else zones_data):
rows_sorted = sorted(z.get("rows", []), key=lambda r: r["index"]) rows_sorted = sorted(z.get("rows", []), key=lambda r: r["index"])
z_cells = z.get("cells", []) z_cells = z.get("cells", [])
for idx, row in enumerate(rows_sorted): for idx, row in enumerate(rows_sorted):
@@ -1110,7 +1116,7 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
# Reject matches that look like grammar: "sb/sth up a) jdn/" # Reject matches that look like grammar: "sb/sth up a) jdn/"
_SLASH_IPA_REJECT_RE = re.compile(r'[\s(),]') _SLASH_IPA_REJECT_RE = re.compile(r'[\s(),]')
slash_ipa_fixed = 0 slash_ipa_fixed = 0
for z in zones_data: for z in ([] if skip_ipa else zones_data):
for cell in z.get("cells", []): for cell in z.get("cells", []):
# Only process English headword column — avoid converting # Only process English headword column — avoid converting
# German text like "der/die/das" to IPA. # German text like "der/die/das" to IPA.
@@ -1469,22 +1475,28 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
logger.warning("Dictionary detection failed: %s", e) logger.warning("Dictionary detection failed: %s", e)
# --- Syllable divider insertion for dictionary pages --- # --- Syllable divider insertion for dictionary pages ---
# Only on confirmed dictionary pages with article columns (der/die/das). # syllable_mode: "auto" = only when original has pipe dividers (1% threshold),
# The article_col_index check avoids false positives on synonym lists, # "all" = force syllabification on all content words,
# word frequency tables, and other alphabetically sorted non-dictionary pages. # "none" = skip entirely.
# Additionally, insert_syllable_dividers has its own pre-check for existing
# pipe characters in cells (OCR must have already found some).
syllable_insertions = 0 syllable_insertions = 0
if (dict_detection.get("is_dictionary") if syllable_mode != "none" and img_bgr is not None:
and dict_detection.get("article_col_index") is not None _syllable_eligible = False
and img_bgr is not None): if syllable_mode == "all":
try: _syllable_eligible = True
from cv_syllable_detect import insert_syllable_dividers elif (dict_detection.get("is_dictionary")
syllable_insertions = insert_syllable_dividers( and dict_detection.get("article_col_index") is not None):
zones_data, img_bgr, session_id, # auto: only on dictionary pages with article columns
) _syllable_eligible = True
except Exception as e: if _syllable_eligible:
logger.warning("Syllable insertion failed: %s", e) try:
from cv_syllable_detect import insert_syllable_dividers
force_syllables = (syllable_mode == "all")
syllable_insertions = insert_syllable_dividers(
zones_data, img_bgr, session_id,
force=force_syllables,
)
except Exception as e:
logger.warning("Syllable insertion failed: %s", e)
# Clean up internal flags before returning # Clean up internal flags before returning
for z in zones_data: for z in zones_data:
@@ -1523,6 +1535,12 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
"article_col_index": dict_detection.get("article_col_index"), "article_col_index": dict_detection.get("article_col_index"),
"headword_col_index": dict_detection.get("headword_col_index"), "headword_col_index": dict_detection.get("headword_col_index"),
}, },
"processing_modes": {
"ipa_mode": ipa_mode,
"syllable_mode": syllable_mode,
"ipa_applied": en_col_type is not None and not skip_ipa,
"syllables_applied": syllable_insertions > 0,
},
"duration_seconds": round(duration, 2), "duration_seconds": round(duration, 2),
} }
@@ -1534,12 +1552,20 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@router.post("/sessions/{session_id}/build-grid") @router.post("/sessions/{session_id}/build-grid")
async def build_grid(session_id: str): async def build_grid(
session_id: str,
ipa_mode: str = Query("auto", pattern="^(auto|all|none)$"),
syllable_mode: str = Query("auto", pattern="^(auto|all|none)$"),
):
"""Build a structured, zone-aware grid from existing Kombi word results. """Build a structured, zone-aware grid from existing Kombi word results.
Requires that paddle-kombi or rapid-kombi has already been run on the session. Requires that paddle-kombi or rapid-kombi has already been run on the session.
Uses the image for box detection and the word positions for grid structuring. Uses the image for box detection and the word positions for grid structuring.
Query params:
ipa_mode: "auto" (only when English IPA detected), "all" (force), "none" (skip)
syllable_mode: "auto" (only when original has dividers), "all" (force), "none" (skip)
Returns a StructuredGrid with zones, each containing their own Returns a StructuredGrid with zones, each containing their own
columns, rows, and cells — ready for the frontend Excel-like editor. columns, rows, and cells — ready for the frontend Excel-like editor.
""" """
@@ -1548,7 +1574,10 @@ async def build_grid(session_id: str):
raise HTTPException(status_code=404, detail=f"Session {session_id} not found") raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
try: try:
result = await _build_grid_core(session_id, session) result = await _build_grid_core(
session_id, session,
ipa_mode=ipa_mode, syllable_mode=syllable_mode,
)
except ValueError as e: except ValueError as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))