fix(ocr-pipeline): preserve sub-column data in vocab table display
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m51s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m51s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s
Three fixes for sub-columns disappearing at end of streaming: 1. Backend: add column_marker mapping in _cells_to_vocab_entries() so marker text is included in vocab entries (not silently dropped) 2. Frontend types: add source_page and bbox_ref to WordEntry interface 3. Frontend table: show page_ref column (Seite) in vocab table when entries have source_page data, instead of only EN/DE/Example Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -180,11 +180,13 @@ export interface WordEntry {
|
|||||||
english: string
|
english: string
|
||||||
german: string
|
german: string
|
||||||
example: string
|
example: string
|
||||||
|
source_page?: string
|
||||||
confidence: number
|
confidence: number
|
||||||
bbox: WordBbox
|
bbox: WordBbox
|
||||||
bbox_en: WordBbox | null
|
bbox_en: WordBbox | null
|
||||||
bbox_de: WordBbox | null
|
bbox_de: WordBbox | null
|
||||||
bbox_ex: WordBbox | null
|
bbox_ex: WordBbox | null
|
||||||
|
bbox_ref?: WordBbox | null
|
||||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -518,43 +518,54 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
{/* Entry/Cell table */}
|
{/* Entry/Cell table */}
|
||||||
<div className="max-h-80 overflow-y-auto">
|
<div className="max-h-80 overflow-y-auto">
|
||||||
{isVocab ? (
|
{isVocab ? (
|
||||||
/* Vocab table: EN/DE/Example columns */
|
/* Vocab table: EN/DE/Example columns + optional page_ref */
|
||||||
<table className="w-full text-xs">
|
(() => {
|
||||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
const hasPageRef = editedEntries.some(e => e.source_page)
|
||||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
return (
|
||||||
<th className="py-1 pr-2 w-8">#</th>
|
<table className="w-full text-xs">
|
||||||
<th className="py-1 pr-2">English</th>
|
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||||
<th className="py-1 pr-2">Deutsch</th>
|
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||||
<th className="py-1 pr-2">Example</th>
|
<th className="py-1 pr-2 w-8">#</th>
|
||||||
<th className="py-1 w-12 text-right">Conf</th>
|
{hasPageRef && <th className="py-1 pr-2 w-12 text-gray-400">Seite</th>}
|
||||||
</tr>
|
<th className="py-1 pr-2">English</th>
|
||||||
</thead>
|
<th className="py-1 pr-2">Deutsch</th>
|
||||||
<tbody>
|
<th className="py-1 pr-2">Example</th>
|
||||||
{editedEntries.map((entry, idx) => (
|
<th className="py-1 w-12 text-right">Conf</th>
|
||||||
<tr
|
</tr>
|
||||||
key={idx}
|
</thead>
|
||||||
className={`border-b dark:border-gray-700/50 ${
|
<tbody>
|
||||||
idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
{editedEntries.map((entry, idx) => (
|
||||||
}`}
|
<tr
|
||||||
onClick={() => { setActiveIndex(idx); setMode('labeling') }}
|
key={idx}
|
||||||
>
|
className={`border-b dark:border-gray-700/50 ${
|
||||||
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
|
idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
}`}
|
||||||
<MultilineText text={entry.english} />
|
onClick={() => { setActiveIndex(idx); setMode('labeling') }}
|
||||||
</td>
|
>
|
||||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
|
||||||
<MultilineText text={entry.german} />
|
{hasPageRef && (
|
||||||
</td>
|
<td className="py-1 pr-2 font-mono text-gray-400 dark:text-gray-500">
|
||||||
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
|
{entry.source_page || ''}
|
||||||
<MultilineText text={entry.example} />
|
</td>
|
||||||
</td>
|
)}
|
||||||
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
|
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||||
{entry.confidence}%
|
<MultilineText text={entry.english} />
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||||
))}
|
<MultilineText text={entry.german} />
|
||||||
</tbody>
|
</td>
|
||||||
</table>
|
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
|
||||||
|
<MultilineText text={entry.example} />
|
||||||
|
</td>
|
||||||
|
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
|
||||||
|
{entry.confidence}%
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
)
|
||||||
|
})()
|
||||||
) : (
|
) : (
|
||||||
/* Generic table: dynamic columns from columns_used */
|
/* Generic table: dynamic columns from columns_used */
|
||||||
<table className="w-full text-xs">
|
<table className="w-full text-xs">
|
||||||
|
|||||||
@@ -4215,12 +4215,14 @@ def _cells_to_vocab_entries(
|
|||||||
'column_de': 'german',
|
'column_de': 'german',
|
||||||
'column_example': 'example',
|
'column_example': 'example',
|
||||||
'page_ref': 'source_page',
|
'page_ref': 'source_page',
|
||||||
|
'column_marker': 'marker',
|
||||||
}
|
}
|
||||||
bbox_key_map = {
|
bbox_key_map = {
|
||||||
'column_en': 'bbox_en',
|
'column_en': 'bbox_en',
|
||||||
'column_de': 'bbox_de',
|
'column_de': 'bbox_de',
|
||||||
'column_example': 'bbox_ex',
|
'column_example': 'bbox_ex',
|
||||||
'page_ref': 'bbox_ref',
|
'page_ref': 'bbox_ref',
|
||||||
|
'column_marker': 'bbox_marker',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Group cells by row_index
|
# Group cells by row_index
|
||||||
@@ -4238,12 +4240,14 @@ def _cells_to_vocab_entries(
|
|||||||
'german': '',
|
'german': '',
|
||||||
'example': '',
|
'example': '',
|
||||||
'source_page': '',
|
'source_page': '',
|
||||||
|
'marker': '',
|
||||||
'confidence': 0.0,
|
'confidence': 0.0,
|
||||||
'bbox': None,
|
'bbox': None,
|
||||||
'bbox_en': None,
|
'bbox_en': None,
|
||||||
'bbox_de': None,
|
'bbox_de': None,
|
||||||
'bbox_ex': None,
|
'bbox_ex': None,
|
||||||
'bbox_ref': None,
|
'bbox_ref': None,
|
||||||
|
'bbox_marker': None,
|
||||||
'ocr_engine': row_cells[0].get('ocr_engine', '') if row_cells else '',
|
'ocr_engine': row_cells[0].get('ocr_engine', '') if row_cells else '',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user