Add IPA and syllable mode toggles, fix false IPA on German documents
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 2m1s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 15s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 2m1s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 15s
Backend: Remove en_col_type fallback heuristic (longest avg text) that incorrectly identified German columns as English. IPA now only applied when OCR bracket patterns are actually found. Add ipa_mode (auto/all/none) and syllable_mode (auto/all/none) query params to build-grid API. Frontend: Add IPA and Silben dropdown selects to GridToolbar. Modes are passed as query params on rebuild. Auto = current smart detection, All = force for all words, Aus = skip entirely. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,10 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
|||||||
addColumn,
|
addColumn,
|
||||||
deleteRow,
|
deleteRow,
|
||||||
addRow,
|
addRow,
|
||||||
|
ipaMode,
|
||||||
|
setIpaMode,
|
||||||
|
syllableMode,
|
||||||
|
setSyllableMode,
|
||||||
} = useGridEditor(sessionId)
|
} = useGridEditor(sessionId)
|
||||||
|
|
||||||
const [showOverlay, setShowOverlay] = useState(false)
|
const [showOverlay, setShowOverlay] = useState(false)
|
||||||
@@ -183,11 +187,15 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
|||||||
canUndo={canUndo}
|
canUndo={canUndo}
|
||||||
canRedo={canRedo}
|
canRedo={canRedo}
|
||||||
showOverlay={showOverlay}
|
showOverlay={showOverlay}
|
||||||
|
ipaMode={ipaMode}
|
||||||
|
syllableMode={syllableMode}
|
||||||
onSave={saveGrid}
|
onSave={saveGrid}
|
||||||
onUndo={undo}
|
onUndo={undo}
|
||||||
onRedo={redo}
|
onRedo={redo}
|
||||||
onRebuild={buildGrid}
|
onRebuild={buildGrid}
|
||||||
onToggleOverlay={() => setShowOverlay(!showOverlay)}
|
onToggleOverlay={() => setShowOverlay(!showOverlay)}
|
||||||
|
onIpaModeChange={setIpaMode}
|
||||||
|
onSyllableModeChange={setSyllableMode}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,34 @@
|
|||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
|
import type { IpaMode, SyllableMode } from './useGridEditor'
|
||||||
|
|
||||||
interface GridToolbarProps {
|
interface GridToolbarProps {
|
||||||
dirty: boolean
|
dirty: boolean
|
||||||
saving: boolean
|
saving: boolean
|
||||||
canUndo: boolean
|
canUndo: boolean
|
||||||
canRedo: boolean
|
canRedo: boolean
|
||||||
showOverlay: boolean
|
showOverlay: boolean
|
||||||
|
ipaMode: IpaMode
|
||||||
|
syllableMode: SyllableMode
|
||||||
onSave: () => void
|
onSave: () => void
|
||||||
onUndo: () => void
|
onUndo: () => void
|
||||||
onRedo: () => void
|
onRedo: () => void
|
||||||
onRebuild: () => void
|
onRebuild: () => void
|
||||||
onToggleOverlay: () => void
|
onToggleOverlay: () => void
|
||||||
|
onIpaModeChange: (mode: IpaMode) => void
|
||||||
|
onSyllableModeChange: (mode: SyllableMode) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
const IPA_LABELS: Record<IpaMode, string> = {
|
||||||
|
auto: 'IPA: Auto',
|
||||||
|
all: 'IPA: Alle',
|
||||||
|
none: 'IPA: Aus',
|
||||||
|
}
|
||||||
|
|
||||||
|
const SYLLABLE_LABELS: Record<SyllableMode, string> = {
|
||||||
|
auto: 'Silben: Original',
|
||||||
|
all: 'Silben: Alle',
|
||||||
|
none: 'Silben: Aus',
|
||||||
}
|
}
|
||||||
|
|
||||||
export function GridToolbar({
|
export function GridToolbar({
|
||||||
@@ -19,11 +37,15 @@ export function GridToolbar({
|
|||||||
canUndo,
|
canUndo,
|
||||||
canRedo,
|
canRedo,
|
||||||
showOverlay,
|
showOverlay,
|
||||||
|
ipaMode,
|
||||||
|
syllableMode,
|
||||||
onSave,
|
onSave,
|
||||||
onUndo,
|
onUndo,
|
||||||
onRedo,
|
onRedo,
|
||||||
onRebuild,
|
onRebuild,
|
||||||
onToggleOverlay,
|
onToggleOverlay,
|
||||||
|
onIpaModeChange,
|
||||||
|
onSyllableModeChange,
|
||||||
}: GridToolbarProps) {
|
}: GridToolbarProps) {
|
||||||
return (
|
return (
|
||||||
<div className="flex items-center gap-2 flex-wrap">
|
<div className="flex items-center gap-2 flex-wrap">
|
||||||
@@ -67,6 +89,30 @@ export function GridToolbar({
|
|||||||
Bild-Overlay
|
Bild-Overlay
|
||||||
</button>
|
</button>
|
||||||
|
|
||||||
|
{/* IPA mode */}
|
||||||
|
<select
|
||||||
|
value={ipaMode}
|
||||||
|
onChange={(e) => onIpaModeChange(e.target.value as IpaMode)}
|
||||||
|
className="px-2 py-1.5 text-xs rounded-md border border-gray-200 dark:border-gray-700 bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400"
|
||||||
|
title="Lautschrift (IPA): Auto = nur bei erkannten englischen Woertern, Alle = fuer alle Vokabeln, Aus = keine"
|
||||||
|
>
|
||||||
|
{(Object.keys(IPA_LABELS) as IpaMode[]).map((m) => (
|
||||||
|
<option key={m} value={m}>{IPA_LABELS[m]}</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
|
||||||
|
{/* Syllable mode */}
|
||||||
|
<select
|
||||||
|
value={syllableMode}
|
||||||
|
onChange={(e) => onSyllableModeChange(e.target.value as SyllableMode)}
|
||||||
|
className="px-2 py-1.5 text-xs rounded-md border border-gray-200 dark:border-gray-700 bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-400"
|
||||||
|
title="Silbentrennung: Original = nur wo im Scan vorhanden, Alle = fuer alle Woerter, Aus = keine"
|
||||||
|
>
|
||||||
|
{(Object.keys(SYLLABLE_LABELS) as SyllableMode[]).map((m) => (
|
||||||
|
<option key={m} value={m}>{SYLLABLE_LABELS[m]}</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
|
||||||
{/* Rebuild */}
|
{/* Rebuild */}
|
||||||
<button
|
<button
|
||||||
onClick={onRebuild}
|
onClick={onRebuild}
|
||||||
|
|||||||
@@ -14,6 +14,9 @@ export interface GridEditorState {
|
|||||||
selectedZone: number | null
|
selectedZone: number | null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type IpaMode = 'auto' | 'all' | 'none'
|
||||||
|
export type SyllableMode = 'auto' | 'all' | 'none'
|
||||||
|
|
||||||
export function useGridEditor(sessionId: string | null) {
|
export function useGridEditor(sessionId: string | null) {
|
||||||
const [grid, setGrid] = useState<StructuredGrid | null>(null)
|
const [grid, setGrid] = useState<StructuredGrid | null>(null)
|
||||||
const [loading, setLoading] = useState(false)
|
const [loading, setLoading] = useState(false)
|
||||||
@@ -22,6 +25,8 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
const [dirty, setDirty] = useState(false)
|
const [dirty, setDirty] = useState(false)
|
||||||
const [selectedCell, setSelectedCell] = useState<string | null>(null)
|
const [selectedCell, setSelectedCell] = useState<string | null>(null)
|
||||||
const [selectedZone, setSelectedZone] = useState<number | null>(null)
|
const [selectedZone, setSelectedZone] = useState<number | null>(null)
|
||||||
|
const [ipaMode, setIpaMode] = useState<IpaMode>('auto')
|
||||||
|
const [syllableMode, setSyllableMode] = useState<SyllableMode>('auto')
|
||||||
|
|
||||||
// Undo/redo stacks store serialized zone arrays
|
// Undo/redo stacks store serialized zone arrays
|
||||||
const undoStack = useRef<string[]>([])
|
const undoStack = useRef<string[]>([])
|
||||||
@@ -44,8 +49,11 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
setLoading(true)
|
setLoading(true)
|
||||||
setError(null)
|
setError(null)
|
||||||
try {
|
try {
|
||||||
|
const params = new URLSearchParams()
|
||||||
|
params.set('ipa_mode', ipaMode)
|
||||||
|
params.set('syllable_mode', syllableMode)
|
||||||
const res = await fetch(
|
const res = await fetch(
|
||||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`,
|
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid?${params}`,
|
||||||
{ method: 'POST' },
|
{ method: 'POST' },
|
||||||
)
|
)
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
@@ -62,7 +70,7 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
} finally {
|
} finally {
|
||||||
setLoading(false)
|
setLoading(false)
|
||||||
}
|
}
|
||||||
}, [sessionId])
|
}, [sessionId, ipaMode, syllableMode])
|
||||||
|
|
||||||
const loadGrid = useCallback(async () => {
|
const loadGrid = useCallback(async () => {
|
||||||
if (!sessionId) return
|
if (!sessionId) return
|
||||||
@@ -915,5 +923,9 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
toggleSelectedBold,
|
toggleSelectedBold,
|
||||||
autoCorrectColumnPatterns,
|
autoCorrectColumnPatterns,
|
||||||
setCellColor,
|
setCellColor,
|
||||||
|
ipaMode,
|
||||||
|
setIpaMode,
|
||||||
|
syllableMode,
|
||||||
|
setSyllableMode,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -57,6 +57,10 @@ export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewPro
|
|||||||
toggleSelectedBold,
|
toggleSelectedBold,
|
||||||
autoCorrectColumnPatterns,
|
autoCorrectColumnPatterns,
|
||||||
setCellColor,
|
setCellColor,
|
||||||
|
ipaMode,
|
||||||
|
setIpaMode,
|
||||||
|
syllableMode,
|
||||||
|
setSyllableMode,
|
||||||
} = useGridEditor(sessionId)
|
} = useGridEditor(sessionId)
|
||||||
|
|
||||||
const [showImage, setShowImage] = useState(true)
|
const [showImage, setShowImage] = useState(true)
|
||||||
@@ -283,11 +287,15 @@ export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewPro
|
|||||||
canUndo={canUndo}
|
canUndo={canUndo}
|
||||||
canRedo={canRedo}
|
canRedo={canRedo}
|
||||||
showOverlay={false}
|
showOverlay={false}
|
||||||
|
ipaMode={ipaMode}
|
||||||
|
syllableMode={syllableMode}
|
||||||
onSave={saveGrid}
|
onSave={saveGrid}
|
||||||
onUndo={undo}
|
onUndo={undo}
|
||||||
onRedo={redo}
|
onRedo={redo}
|
||||||
onRebuild={buildGrid}
|
onRebuild={buildGrid}
|
||||||
onToggleOverlay={() => setShowImage(!showImage)}
|
onToggleOverlay={() => setShowImage(!showImage)}
|
||||||
|
onIpaModeChange={setIpaMode}
|
||||||
|
onSyllableModeChange={setSyllableMode}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -194,6 +194,8 @@ def insert_syllable_dividers(
|
|||||||
zones_data: List[Dict],
|
zones_data: List[Dict],
|
||||||
img_bgr: np.ndarray,
|
img_bgr: np.ndarray,
|
||||||
session_id: str,
|
session_id: str,
|
||||||
|
*,
|
||||||
|
force: bool = False,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Insert pipe syllable dividers into dictionary cells.
|
"""Insert pipe syllable dividers into dictionary cells.
|
||||||
|
|
||||||
@@ -204,6 +206,10 @@ def insert_syllable_dividers(
|
|||||||
OCR. This guards against pages with zero pipe characters (the primary
|
OCR. This guards against pages with zero pipe characters (the primary
|
||||||
guard — article_col_index — is checked at the call site).
|
guard — article_col_index — is checked at the call site).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
force: If True, skip the pipe-ratio pre-check and syllabify all
|
||||||
|
content words regardless of whether the original has pipe dividers.
|
||||||
|
|
||||||
Returns the number of cells modified.
|
Returns the number of cells modified.
|
||||||
"""
|
"""
|
||||||
hyph_de, hyph_en = _get_hyphenators()
|
hyph_de, hyph_en = _get_hyphenators()
|
||||||
@@ -215,6 +221,7 @@ def insert_syllable_dividers(
|
|||||||
# Real dictionary pages with printed syllable dividers will have OCR-
|
# Real dictionary pages with printed syllable dividers will have OCR-
|
||||||
# detected pipes in many cells. Pages without syllable dividers will
|
# detected pipes in many cells. Pages without syllable dividers will
|
||||||
# have zero — skip those to avoid false syllabification.
|
# have zero — skip those to avoid false syllabification.
|
||||||
|
if not force:
|
||||||
total_col_cells = 0
|
total_col_cells = 0
|
||||||
cells_with_pipes = 0
|
cells_with_pipes = 0
|
||||||
for z in zones_data:
|
for z in zones_data:
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from fastapi import APIRouter, HTTPException, Request
|
from fastapi import APIRouter, HTTPException, Query, Request
|
||||||
|
|
||||||
from cv_box_detect import detect_boxes, split_page_into_zones
|
from cv_box_detect import detect_boxes, split_page_into_zones
|
||||||
from cv_graphic_detect import detect_graphic_elements
|
from cv_graphic_detect import detect_graphic_elements
|
||||||
@@ -67,12 +67,22 @@ router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["grid-editor"])
|
|||||||
# Core computation (used by build-grid endpoint and regression tests)
|
# Core computation (used by build-grid endpoint and regression tests)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
async def _build_grid_core(session_id: str, session: dict) -> dict:
|
async def _build_grid_core(
|
||||||
|
session_id: str,
|
||||||
|
session: dict,
|
||||||
|
*,
|
||||||
|
ipa_mode: str = "auto",
|
||||||
|
syllable_mode: str = "auto",
|
||||||
|
) -> dict:
|
||||||
"""Core grid building logic — pure computation, no HTTP or DB side effects.
|
"""Core grid building logic — pure computation, no HTTP or DB side effects.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
session_id: Session identifier (for logging and image loading).
|
session_id: Session identifier (for logging and image loading).
|
||||||
session: Full session dict from get_session_db().
|
session: Full session dict from get_session_db().
|
||||||
|
ipa_mode: "auto" (only when English headwords detected), "all"
|
||||||
|
(force IPA on all content columns), or "none" (skip IPA entirely).
|
||||||
|
syllable_mode: "auto" (only when original has pipe dividers),
|
||||||
|
"all" (force syllabification on all words), or "none" (skip).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
StructuredGrid result dict.
|
StructuredGrid result dict.
|
||||||
@@ -859,32 +869,28 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
all_cells = [cell for z in zones_data for cell in z.get("cells", [])]
|
all_cells = [cell for z in zones_data for cell in z.get("cells", [])]
|
||||||
total_cols = sum(len(z.get("columns", [])) for z in zones_data)
|
total_cols = sum(len(z.get("columns", [])) for z in zones_data)
|
||||||
en_col_type = None
|
en_col_type = None
|
||||||
if total_cols >= 3:
|
skip_ipa = (ipa_mode == "none")
|
||||||
|
if not skip_ipa and total_cols >= 3:
|
||||||
# Find the column that contains IPA brackets → English headwords.
|
# Find the column that contains IPA brackets → English headwords.
|
||||||
# Count cells with bracket patterns per col_type. The column with
|
# Count cells with bracket patterns per col_type. The column with
|
||||||
# the most brackets is the headword column (IPA sits after or below
|
# the most brackets is the headword column (IPA sits after or below
|
||||||
# headwords). Falls back to longest-average if no brackets found.
|
# headwords).
|
||||||
col_bracket_count: Dict[str, int] = {}
|
col_bracket_count: Dict[str, int] = {}
|
||||||
col_avg_len: Dict[str, List[int]] = {}
|
|
||||||
for cell in all_cells:
|
for cell in all_cells:
|
||||||
ct = cell.get("col_type", "")
|
ct = cell.get("col_type", "")
|
||||||
txt = cell.get("text", "") or ""
|
txt = cell.get("text", "") or ""
|
||||||
col_avg_len.setdefault(ct, []).append(len(txt))
|
|
||||||
if ct.startswith("column_") and '[' in txt:
|
if ct.startswith("column_") and '[' in txt:
|
||||||
col_bracket_count[ct] = col_bracket_count.get(ct, 0) + 1
|
col_bracket_count[ct] = col_bracket_count.get(ct, 0) + 1
|
||||||
# Pick column with most bracket IPA patterns
|
# Pick column with most bracket IPA patterns.
|
||||||
|
# ipa_mode="auto": only when OCR already found bracket IPA (no fallback).
|
||||||
|
# ipa_mode="all": fallback to headword_col_index from dictionary detection.
|
||||||
if col_bracket_count:
|
if col_bracket_count:
|
||||||
en_col_type = max(col_bracket_count, key=col_bracket_count.get)
|
en_col_type = max(col_bracket_count, key=col_bracket_count.get)
|
||||||
else:
|
elif ipa_mode == "all":
|
||||||
# Fallback: longest average text
|
# Force IPA: use headword column from dictionary detection
|
||||||
best_avg = 0
|
hw_idx = dict_detection.get("headword_col_index")
|
||||||
for ct, lengths in col_avg_len.items():
|
if hw_idx is not None:
|
||||||
if not ct.startswith("column_"):
|
en_col_type = f"column_{hw_idx + 1}"
|
||||||
continue
|
|
||||||
avg = sum(lengths) / len(lengths) if lengths else 0
|
|
||||||
if avg > best_avg:
|
|
||||||
best_avg = avg
|
|
||||||
en_col_type = ct
|
|
||||||
if en_col_type:
|
if en_col_type:
|
||||||
for cell in all_cells:
|
for cell in all_cells:
|
||||||
if cell.get("col_type") == en_col_type:
|
if cell.get("col_type") == en_col_type:
|
||||||
@@ -912,7 +918,7 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
# the EN headword column may not be the longest-average column.
|
# the EN headword column may not be the longest-average column.
|
||||||
_REAL_IPA_CHARS = set("ˈˌəɪɛɒʊʌæɑɔʃʒθðŋ")
|
_REAL_IPA_CHARS = set("ˈˌəɪɛɒʊʌæɑɔʃʒθðŋ")
|
||||||
ipa_cont_fixed = 0
|
ipa_cont_fixed = 0
|
||||||
for z in zones_data:
|
for z in ([] if skip_ipa else zones_data):
|
||||||
rows_sorted = sorted(z.get("rows", []), key=lambda r: r["index"])
|
rows_sorted = sorted(z.get("rows", []), key=lambda r: r["index"])
|
||||||
z_cells = z.get("cells", [])
|
z_cells = z.get("cells", [])
|
||||||
for idx, row in enumerate(rows_sorted):
|
for idx, row in enumerate(rows_sorted):
|
||||||
@@ -1110,7 +1116,7 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
# Reject matches that look like grammar: "sb/sth up a) jdn/"
|
# Reject matches that look like grammar: "sb/sth up a) jdn/"
|
||||||
_SLASH_IPA_REJECT_RE = re.compile(r'[\s(),]')
|
_SLASH_IPA_REJECT_RE = re.compile(r'[\s(),]')
|
||||||
slash_ipa_fixed = 0
|
slash_ipa_fixed = 0
|
||||||
for z in zones_data:
|
for z in ([] if skip_ipa else zones_data):
|
||||||
for cell in z.get("cells", []):
|
for cell in z.get("cells", []):
|
||||||
# Only process English headword column — avoid converting
|
# Only process English headword column — avoid converting
|
||||||
# German text like "der/die/das" to IPA.
|
# German text like "der/die/das" to IPA.
|
||||||
@@ -1469,19 +1475,25 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
logger.warning("Dictionary detection failed: %s", e)
|
logger.warning("Dictionary detection failed: %s", e)
|
||||||
|
|
||||||
# --- Syllable divider insertion for dictionary pages ---
|
# --- Syllable divider insertion for dictionary pages ---
|
||||||
# Only on confirmed dictionary pages with article columns (der/die/das).
|
# syllable_mode: "auto" = only when original has pipe dividers (1% threshold),
|
||||||
# The article_col_index check avoids false positives on synonym lists,
|
# "all" = force syllabification on all content words,
|
||||||
# word frequency tables, and other alphabetically sorted non-dictionary pages.
|
# "none" = skip entirely.
|
||||||
# Additionally, insert_syllable_dividers has its own pre-check for existing
|
|
||||||
# pipe characters in cells (OCR must have already found some).
|
|
||||||
syllable_insertions = 0
|
syllable_insertions = 0
|
||||||
if (dict_detection.get("is_dictionary")
|
if syllable_mode != "none" and img_bgr is not None:
|
||||||
and dict_detection.get("article_col_index") is not None
|
_syllable_eligible = False
|
||||||
and img_bgr is not None):
|
if syllable_mode == "all":
|
||||||
|
_syllable_eligible = True
|
||||||
|
elif (dict_detection.get("is_dictionary")
|
||||||
|
and dict_detection.get("article_col_index") is not None):
|
||||||
|
# auto: only on dictionary pages with article columns
|
||||||
|
_syllable_eligible = True
|
||||||
|
if _syllable_eligible:
|
||||||
try:
|
try:
|
||||||
from cv_syllable_detect import insert_syllable_dividers
|
from cv_syllable_detect import insert_syllable_dividers
|
||||||
|
force_syllables = (syllable_mode == "all")
|
||||||
syllable_insertions = insert_syllable_dividers(
|
syllable_insertions = insert_syllable_dividers(
|
||||||
zones_data, img_bgr, session_id,
|
zones_data, img_bgr, session_id,
|
||||||
|
force=force_syllables,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Syllable insertion failed: %s", e)
|
logger.warning("Syllable insertion failed: %s", e)
|
||||||
@@ -1523,6 +1535,12 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
"article_col_index": dict_detection.get("article_col_index"),
|
"article_col_index": dict_detection.get("article_col_index"),
|
||||||
"headword_col_index": dict_detection.get("headword_col_index"),
|
"headword_col_index": dict_detection.get("headword_col_index"),
|
||||||
},
|
},
|
||||||
|
"processing_modes": {
|
||||||
|
"ipa_mode": ipa_mode,
|
||||||
|
"syllable_mode": syllable_mode,
|
||||||
|
"ipa_applied": en_col_type is not None and not skip_ipa,
|
||||||
|
"syllables_applied": syllable_insertions > 0,
|
||||||
|
},
|
||||||
"duration_seconds": round(duration, 2),
|
"duration_seconds": round(duration, 2),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1534,12 +1552,20 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@router.post("/sessions/{session_id}/build-grid")
|
@router.post("/sessions/{session_id}/build-grid")
|
||||||
async def build_grid(session_id: str):
|
async def build_grid(
|
||||||
|
session_id: str,
|
||||||
|
ipa_mode: str = Query("auto", pattern="^(auto|all|none)$"),
|
||||||
|
syllable_mode: str = Query("auto", pattern="^(auto|all|none)$"),
|
||||||
|
):
|
||||||
"""Build a structured, zone-aware grid from existing Kombi word results.
|
"""Build a structured, zone-aware grid from existing Kombi word results.
|
||||||
|
|
||||||
Requires that paddle-kombi or rapid-kombi has already been run on the session.
|
Requires that paddle-kombi or rapid-kombi has already been run on the session.
|
||||||
Uses the image for box detection and the word positions for grid structuring.
|
Uses the image for box detection and the word positions for grid structuring.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
ipa_mode: "auto" (only when English IPA detected), "all" (force), "none" (skip)
|
||||||
|
syllable_mode: "auto" (only when original has dividers), "all" (force), "none" (skip)
|
||||||
|
|
||||||
Returns a StructuredGrid with zones, each containing their own
|
Returns a StructuredGrid with zones, each containing their own
|
||||||
columns, rows, and cells — ready for the frontend Excel-like editor.
|
columns, rows, and cells — ready for the frontend Excel-like editor.
|
||||||
"""
|
"""
|
||||||
@@ -1548,7 +1574,10 @@ async def build_grid(session_id: str):
|
|||||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = await _build_grid_core(session_id, session)
|
result = await _build_grid_core(
|
||||||
|
session_id, session,
|
||||||
|
ipa_mode=ipa_mode, syllable_mode=syllable_mode,
|
||||||
|
)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise HTTPException(status_code=400, detail=str(e))
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user