fix(ocr-pipeline): preserve sub-column data in vocab table display
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m51s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s

Three fixes for sub-columns disappearing at end of streaming:

1. Backend: add column_marker mapping in _cells_to_vocab_entries()
   so marker text is included in vocab entries (not silently dropped)

2. Frontend types: add source_page and bbox_ref to WordEntry interface

3. Frontend table: show page_ref column (Seite) in vocab table when
   entries have source_page data, instead of only EN/DE/Example

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-03 08:06:15 +01:00
parent 0d72f2c836
commit dea3349b23
3 changed files with 54 additions and 37 deletions

View File

@@ -180,11 +180,13 @@ export interface WordEntry {
english: string english: string
german: string german: string
example: string example: string
source_page?: string
confidence: number confidence: number
bbox: WordBbox bbox: WordBbox
bbox_en: WordBbox | null bbox_en: WordBbox | null
bbox_de: WordBbox | null bbox_de: WordBbox | null
bbox_ex: WordBbox | null bbox_ex: WordBbox | null
bbox_ref?: WordBbox | null
status?: 'pending' | 'confirmed' | 'edited' | 'skipped' status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
} }

View File

@@ -518,43 +518,54 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
{/* Entry/Cell table */} {/* Entry/Cell table */}
<div className="max-h-80 overflow-y-auto"> <div className="max-h-80 overflow-y-auto">
{isVocab ? ( {isVocab ? (
/* Vocab table: EN/DE/Example columns */ /* Vocab table: EN/DE/Example columns + optional page_ref */
<table className="w-full text-xs"> (() => {
<thead className="sticky top-0 bg-white dark:bg-gray-800"> const hasPageRef = editedEntries.some(e => e.source_page)
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700"> return (
<th className="py-1 pr-2 w-8">#</th> <table className="w-full text-xs">
<th className="py-1 pr-2">English</th> <thead className="sticky top-0 bg-white dark:bg-gray-800">
<th className="py-1 pr-2">Deutsch</th> <tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
<th className="py-1 pr-2">Example</th> <th className="py-1 pr-2 w-8">#</th>
<th className="py-1 w-12 text-right">Conf</th> {hasPageRef && <th className="py-1 pr-2 w-12 text-gray-400">Seite</th>}
</tr> <th className="py-1 pr-2">English</th>
</thead> <th className="py-1 pr-2">Deutsch</th>
<tbody> <th className="py-1 pr-2">Example</th>
{editedEntries.map((entry, idx) => ( <th className="py-1 w-12 text-right">Conf</th>
<tr </tr>
key={idx} </thead>
className={`border-b dark:border-gray-700/50 ${ <tbody>
idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : '' {editedEntries.map((entry, idx) => (
}`} <tr
onClick={() => { setActiveIndex(idx); setMode('labeling') }} key={idx}
> className={`border-b dark:border-gray-700/50 ${
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td> idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer"> }`}
<MultilineText text={entry.english} /> onClick={() => { setActiveIndex(idx); setMode('labeling') }}
</td> >
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer"> <td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
<MultilineText text={entry.german} /> {hasPageRef && (
</td> <td className="py-1 pr-2 font-mono text-gray-400 dark:text-gray-500">
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]"> {entry.source_page || ''}
<MultilineText text={entry.example} /> </td>
</td> )}
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}> <td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
{entry.confidence}% <MultilineText text={entry.english} />
</td> </td>
</tr> <td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
))} <MultilineText text={entry.german} />
</tbody> </td>
</table> <td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
<MultilineText text={entry.example} />
</td>
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
{entry.confidence}%
</td>
</tr>
))}
</tbody>
</table>
)
})()
) : ( ) : (
/* Generic table: dynamic columns from columns_used */ /* Generic table: dynamic columns from columns_used */
<table className="w-full text-xs"> <table className="w-full text-xs">

View File

@@ -4215,12 +4215,14 @@ def _cells_to_vocab_entries(
'column_de': 'german', 'column_de': 'german',
'column_example': 'example', 'column_example': 'example',
'page_ref': 'source_page', 'page_ref': 'source_page',
'column_marker': 'marker',
} }
bbox_key_map = { bbox_key_map = {
'column_en': 'bbox_en', 'column_en': 'bbox_en',
'column_de': 'bbox_de', 'column_de': 'bbox_de',
'column_example': 'bbox_ex', 'column_example': 'bbox_ex',
'page_ref': 'bbox_ref', 'page_ref': 'bbox_ref',
'column_marker': 'bbox_marker',
} }
# Group cells by row_index # Group cells by row_index
@@ -4238,12 +4240,14 @@ def _cells_to_vocab_entries(
'german': '', 'german': '',
'example': '', 'example': '',
'source_page': '', 'source_page': '',
'marker': '',
'confidence': 0.0, 'confidence': 0.0,
'bbox': None, 'bbox': None,
'bbox_en': None, 'bbox_en': None,
'bbox_de': None, 'bbox_de': None,
'bbox_ex': None, 'bbox_ex': None,
'bbox_ref': None, 'bbox_ref': None,
'bbox_marker': None,
'ocr_engine': row_cells[0].get('ocr_engine', '') if row_cells else '', 'ocr_engine': row_cells[0].get('ocr_engine', '') if row_cells else '',
} }