refactor(word-step): make table fully generic and fix marker-only row filter
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 24s
CI / test-python-klausur (push) Failing after 1m43s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 17s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 24s
CI / test-python-klausur (push) Failing after 1m43s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 17s
Frontend: Replace hardcoded EN/DE/Example vocab table with unified dynamic table driven by columns_used from backend. Labeling, confirmation, counts, and summary badges are now all cell-based instead of branching on isVocab. Backend: Change _cells_to_vocab_entries() entry filter from checking only english/german/example to checking ANY mapped field. This preserves rows with only marker or source_page content, fixing the issue where marker sub-columns disappeared at the end of OCR processing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -241,34 +241,27 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step-through: confirm current entry
|
// Step-through: confirm current row (always cell-based)
|
||||||
const confirmEntry = () => {
|
const confirmEntry = () => {
|
||||||
if (isVocab) {
|
const rowCells = getRowCells(activeIndex)
|
||||||
setEditedEntries(prev => prev.map((e, i) =>
|
const cellIds = new Set(rowCells.map(c => c.cell_id))
|
||||||
i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e
|
setEditedCells(prev => prev.map(c =>
|
||||||
))
|
cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c
|
||||||
} else {
|
))
|
||||||
// Generic: confirm all cells in this row
|
const maxIdx = getUniqueRowCount() - 1
|
||||||
const rowCells = getRowCells(activeIndex)
|
|
||||||
const cellIds = new Set(rowCells.map(c => c.cell_id))
|
|
||||||
setEditedCells(prev => prev.map(c =>
|
|
||||||
cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c
|
|
||||||
))
|
|
||||||
}
|
|
||||||
const maxIdx = isVocab ? editedEntries.length - 1 : getUniqueRowCount() - 1
|
|
||||||
if (activeIndex < maxIdx) {
|
if (activeIndex < maxIdx) {
|
||||||
setActiveIndex(activeIndex + 1)
|
setActiveIndex(activeIndex + 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step-through: skip current entry
|
// Step-through: skip current row
|
||||||
const skipEntry = () => {
|
const skipEntry = () => {
|
||||||
if (isVocab) {
|
const rowCells = getRowCells(activeIndex)
|
||||||
setEditedEntries(prev => prev.map((e, i) =>
|
const cellIds = new Set(rowCells.map(c => c.cell_id))
|
||||||
i === activeIndex ? { ...e, status: 'skipped' as const } : e
|
setEditedCells(prev => prev.map(c =>
|
||||||
))
|
cellIds.has(c.cell_id) ? { ...c, status: 'skipped' as const } : c
|
||||||
}
|
))
|
||||||
const maxIdx = isVocab ? editedEntries.length - 1 : getUniqueRowCount() - 1
|
const maxIdx = getUniqueRowCount() - 1
|
||||||
if (activeIndex < maxIdx) {
|
if (activeIndex < maxIdx) {
|
||||||
setActiveIndex(activeIndex + 1)
|
setActiveIndex(activeIndex + 1)
|
||||||
}
|
}
|
||||||
@@ -351,11 +344,12 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
const columnsUsed = gridResult?.columns_used || []
|
const columnsUsed = gridResult?.columns_used || []
|
||||||
const gridShape = gridResult?.grid_shape
|
const gridShape = gridResult?.grid_shape
|
||||||
|
|
||||||
// Counts for labeling progress
|
// Counts for labeling progress (always cell-based)
|
||||||
const confirmedCount = isVocab
|
const confirmedRowIds = new Set(
|
||||||
? editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length
|
editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').map(c => c.row_index)
|
||||||
: editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').length
|
)
|
||||||
const totalCount = isVocab ? editedEntries.length : getUniqueRowCount()
|
const confirmedCount = confirmedRowIds.size
|
||||||
|
const totalCount = getUniqueRowCount()
|
||||||
|
|
||||||
// Group cells by row for generic table display
|
// Group cells by row for generic table display
|
||||||
const cellsByRow: Map<number, GridCell[]> = new Map()
|
const cellsByRow: Map<number, GridCell[]> = new Map()
|
||||||
@@ -475,10 +469,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||||
{isVocab
|
Ergebnis: {summary.non_empty_cells}/{summary.total_cells} Zellen mit Text
|
||||||
? `Ergebnis: ${summary.total_entries ?? 0} Vokabel-Eintraege erkannt`
|
({sortedRowIndices.length} Zeilen, {columnsUsed.length} Spalten)
|
||||||
: `Ergebnis: ${summary.non_empty_cells}/${summary.total_cells} Zellen mit Text`
|
|
||||||
}
|
|
||||||
</h4>
|
</h4>
|
||||||
<span className="text-xs text-gray-400">
|
<span className="text-xs text-gray-400">
|
||||||
{gridResult.duration_seconds}s
|
{gridResult.duration_seconds}s
|
||||||
@@ -487,27 +479,14 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
|
|
||||||
{/* Summary badges */}
|
{/* Summary badges */}
|
||||||
<div className="flex gap-2 flex-wrap">
|
<div className="flex gap-2 flex-wrap">
|
||||||
{isVocab ? (
|
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
||||||
<>
|
Zellen: {summary.non_empty_cells}/{summary.total_cells}
|
||||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
</span>
|
||||||
EN: {summary.with_english ?? 0}
|
{columnsUsed.map((col, i) => (
|
||||||
</span>
|
<span key={i} className={`px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 ${colTypeColor(col.type)}`}>
|
||||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300">
|
C{col.index}: {colTypeLabel(col.type)}
|
||||||
DE: {summary.with_german ?? 0}
|
</span>
|
||||||
</span>
|
))}
|
||||||
</>
|
|
||||||
) : (
|
|
||||||
<>
|
|
||||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
|
||||||
Zellen: {summary.non_empty_cells}/{summary.total_cells}
|
|
||||||
</span>
|
|
||||||
{columnsUsed.map((col, i) => (
|
|
||||||
<span key={i} className={`px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 ${colTypeColor(col.type)}`}>
|
|
||||||
C{col.index}: {colTypeLabel(col.type)}
|
|
||||||
</span>
|
|
||||||
))}
|
|
||||||
</>
|
|
||||||
)}
|
|
||||||
{summary.low_confidence > 0 && (
|
{summary.low_confidence > 0 && (
|
||||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
|
<span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
|
||||||
Unsicher: {summary.low_confidence}
|
Unsicher: {summary.low_confidence}
|
||||||
@@ -517,103 +496,52 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
|
|
||||||
{/* Entry/Cell table */}
|
{/* Entry/Cell table */}
|
||||||
<div className="max-h-80 overflow-y-auto">
|
<div className="max-h-80 overflow-y-auto">
|
||||||
{isVocab ? (
|
{/* Unified dynamic table — columns driven by columns_used */}
|
||||||
/* Vocab table: EN/DE/Example columns + optional page_ref */
|
<table className="w-full text-xs">
|
||||||
(() => {
|
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||||
const hasPageRef = editedEntries.some(e => e.source_page)
|
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||||
return (
|
<th className="py-1 pr-2 w-12">Zeile</th>
|
||||||
<table className="w-full text-xs">
|
{columnsUsed.map((col, i) => (
|
||||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
|
||||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
{colTypeLabel(col.type)}
|
||||||
<th className="py-1 pr-2 w-8">#</th>
|
</th>
|
||||||
{hasPageRef && <th className="py-1 pr-2 w-12 text-gray-400">Seite</th>}
|
))}
|
||||||
<th className="py-1 pr-2">English</th>
|
<th className="py-1 w-12 text-right">Conf</th>
|
||||||
<th className="py-1 pr-2">Deutsch</th>
|
</tr>
|
||||||
<th className="py-1 pr-2">Example</th>
|
</thead>
|
||||||
<th className="py-1 w-12 text-right">Conf</th>
|
<tbody>
|
||||||
</tr>
|
{sortedRowIndices.map((rowIdx, posIdx) => {
|
||||||
</thead>
|
const rowCells = cellsByRow.get(rowIdx) || []
|
||||||
<tbody>
|
const avgConf = rowCells.length
|
||||||
{editedEntries.map((entry, idx) => (
|
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||||
<tr
|
: 0
|
||||||
key={idx}
|
return (
|
||||||
className={`border-b dark:border-gray-700/50 ${
|
<tr
|
||||||
idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
key={rowIdx}
|
||||||
}`}
|
className={`border-b dark:border-gray-700/50 ${
|
||||||
onClick={() => { setActiveIndex(idx); setMode('labeling') }}
|
posIdx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||||
>
|
}`}
|
||||||
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
|
onClick={() => { setActiveIndex(posIdx); setMode('labeling') }}
|
||||||
{hasPageRef && (
|
>
|
||||||
<td className="py-1 pr-2 font-mono text-gray-400 dark:text-gray-500">
|
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
|
||||||
{entry.source_page || ''}
|
R{String(rowIdx).padStart(2, '0')}
|
||||||
</td>
|
</td>
|
||||||
)}
|
{columnsUsed.map((col) => {
|
||||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
const cell = rowCells.find(c => c.col_index === col.index)
|
||||||
<MultilineText text={entry.english} />
|
return (
|
||||||
|
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||||
|
<MultilineText text={cell?.text || ''} />
|
||||||
</td>
|
</td>
|
||||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
)
|
||||||
<MultilineText text={entry.german} />
|
})}
|
||||||
</td>
|
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
|
||||||
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
|
{avgConf}%
|
||||||
<MultilineText text={entry.example} />
|
</td>
|
||||||
</td>
|
</tr>
|
||||||
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
|
)
|
||||||
{entry.confidence}%
|
})}
|
||||||
</td>
|
</tbody>
|
||||||
</tr>
|
</table>
|
||||||
))}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
)
|
|
||||||
})()
|
|
||||||
) : (
|
|
||||||
/* Generic table: dynamic columns from columns_used */
|
|
||||||
<table className="w-full text-xs">
|
|
||||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
|
||||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
|
||||||
<th className="py-1 pr-2 w-12">Zeile</th>
|
|
||||||
{columnsUsed.map((col, i) => (
|
|
||||||
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
|
|
||||||
{colTypeLabel(col.type)}
|
|
||||||
</th>
|
|
||||||
))}
|
|
||||||
<th className="py-1 w-12 text-right">Conf</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{sortedRowIndices.map((rowIdx, posIdx) => {
|
|
||||||
const rowCells = cellsByRow.get(rowIdx) || []
|
|
||||||
const avgConf = rowCells.length
|
|
||||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
|
||||||
: 0
|
|
||||||
return (
|
|
||||||
<tr
|
|
||||||
key={rowIdx}
|
|
||||||
className={`border-b dark:border-gray-700/50 ${
|
|
||||||
posIdx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
|
||||||
}`}
|
|
||||||
onClick={() => { setActiveIndex(posIdx); setMode('labeling') }}
|
|
||||||
>
|
|
||||||
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
|
|
||||||
R{String(rowIdx).padStart(2, '0')}
|
|
||||||
</td>
|
|
||||||
{columnsUsed.map((col) => {
|
|
||||||
const cell = rowCells.find(c => c.col_index === col.index)
|
|
||||||
return (
|
|
||||||
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
|
||||||
<MultilineText text={cell?.text || ''} />
|
|
||||||
</td>
|
|
||||||
)
|
|
||||||
})}
|
|
||||||
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
|
|
||||||
{avgConf}%
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
)
|
|
||||||
})}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
)}
|
|
||||||
<div ref={tableEndRef} />
|
<div ref={tableEndRef} />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -682,15 +610,12 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Labeling mode */}
|
{/* Labeling mode */}
|
||||||
{mode === 'labeling' && (isVocab ? editedEntries.length > 0 : editedCells.length > 0) && (
|
{mode === 'labeling' && editedCells.length > 0 && (
|
||||||
<div className="grid grid-cols-3 gap-4">
|
<div className="grid grid-cols-3 gap-4">
|
||||||
{/* Left 2/3: Image with highlighted active row */}
|
{/* Left 2/3: Image with highlighted active row */}
|
||||||
<div className="col-span-2">
|
<div className="col-span-2">
|
||||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
{isVocab
|
Zeile {activeIndex + 1} von {getUniqueRowCount()}
|
||||||
? `Eintrag ${activeIndex + 1} von ${editedEntries.length}`
|
|
||||||
: `Zeile ${activeIndex + 1} von ${getUniqueRowCount()}`
|
|
||||||
}
|
|
||||||
</div>
|
</div>
|
||||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
|
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
|
||||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
@@ -699,19 +624,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
alt="Wort-Overlay"
|
alt="Wort-Overlay"
|
||||||
className="w-full h-auto"
|
className="w-full h-auto"
|
||||||
/>
|
/>
|
||||||
{/* Highlight overlay for active row/entry */}
|
{/* Highlight overlay for active row */}
|
||||||
{isVocab && editedEntries[activeIndex]?.bbox && (
|
{(() => {
|
||||||
<div
|
|
||||||
className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
|
|
||||||
style={{
|
|
||||||
left: `${editedEntries[activeIndex].bbox.x}%`,
|
|
||||||
top: `${editedEntries[activeIndex].bbox.y}%`,
|
|
||||||
width: `${editedEntries[activeIndex].bbox.w}%`,
|
|
||||||
height: `${editedEntries[activeIndex].bbox.h}%`,
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
{!isVocab && (() => {
|
|
||||||
const rowCells = getRowCells(activeIndex)
|
const rowCells = getRowCells(activeIndex)
|
||||||
return rowCells.map(cell => (
|
return rowCells.map(cell => (
|
||||||
<div
|
<div
|
||||||
@@ -741,14 +655,14 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
Zurueck
|
Zurueck
|
||||||
</button>
|
</button>
|
||||||
<span className="text-xs text-gray-500">
|
<span className="text-xs text-gray-500">
|
||||||
{activeIndex + 1} / {isVocab ? editedEntries.length : getUniqueRowCount()}
|
{activeIndex + 1} / {getUniqueRowCount()}
|
||||||
</span>
|
</span>
|
||||||
<button
|
<button
|
||||||
onClick={() => setActiveIndex(Math.min(
|
onClick={() => setActiveIndex(Math.min(
|
||||||
(isVocab ? editedEntries.length : getUniqueRowCount()) - 1,
|
getUniqueRowCount() - 1,
|
||||||
activeIndex + 1
|
activeIndex + 1
|
||||||
))}
|
))}
|
||||||
disabled={activeIndex >= (isVocab ? editedEntries.length : getUniqueRowCount()) - 1}
|
disabled={activeIndex >= getUniqueRowCount() - 1}
|
||||||
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||||
>
|
>
|
||||||
Weiter
|
Weiter
|
||||||
@@ -757,17 +671,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
|
|
||||||
{/* Status badge */}
|
{/* Status badge */}
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
{isVocab && (
|
{(() => {
|
||||||
<>
|
|
||||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${statusBadge(editedEntries[activeIndex]?.status)}`}>
|
|
||||||
{editedEntries[activeIndex]?.status || 'pending'}
|
|
||||||
</span>
|
|
||||||
<span className={`text-xs font-mono ${confColor(editedEntries[activeIndex]?.confidence || 0)}`}>
|
|
||||||
{editedEntries[activeIndex]?.confidence}% Konfidenz
|
|
||||||
</span>
|
|
||||||
</>
|
|
||||||
)}
|
|
||||||
{!isVocab && (() => {
|
|
||||||
const rowCells = getRowCells(activeIndex)
|
const rowCells = getRowCells(activeIndex)
|
||||||
const avgConf = rowCells.length
|
const avgConf = rowCells.length
|
||||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||||
@@ -780,96 +684,36 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
})()}
|
})()}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Cell crops (vocab mode) */}
|
{/* Editable fields — one per column, driven by columns_used */}
|
||||||
{isVocab && editedEntries[activeIndex]?.bbox_en && (
|
|
||||||
<div>
|
|
||||||
<div className="text-[10px] font-medium text-blue-500 mb-0.5">EN-Zelle</div>
|
|
||||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
|
|
||||||
<CellCrop
|
|
||||||
imageUrl={dewarpedUrl}
|
|
||||||
bbox={editedEntries[activeIndex].bbox_en!}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
{isVocab && editedEntries[activeIndex]?.bbox_de && (
|
|
||||||
<div>
|
|
||||||
<div className="text-[10px] font-medium text-green-500 mb-0.5">DE-Zelle</div>
|
|
||||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
|
|
||||||
<CellCrop
|
|
||||||
imageUrl={dewarpedUrl}
|
|
||||||
bbox={editedEntries[activeIndex].bbox_de!}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{/* Editable fields */}
|
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
{isVocab ? (
|
{(() => {
|
||||||
/* Vocab mode: EN/DE/Example fields */
|
const rowCells = getRowCells(activeIndex)
|
||||||
<>
|
return columnsUsed.map((col, colIdx) => {
|
||||||
<div>
|
const cell = rowCells.find(c => c.col_index === col.index)
|
||||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">English</label>
|
if (!cell) return null
|
||||||
<textarea
|
return (
|
||||||
ref={enRef as any}
|
<div key={col.index}>
|
||||||
rows={Math.max(1, (editedEntries[activeIndex]?.english || '').split('\n').length)}
|
<div className="flex items-center gap-1 mb-0.5">
|
||||||
value={editedEntries[activeIndex]?.english || ''}
|
<label className={`text-[10px] font-medium ${colTypeColor(col.type)}`}>
|
||||||
onChange={(e) => updateEntry(activeIndex, 'english', e.target.value)}
|
{colTypeLabel(col.type)}
|
||||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
</label>
|
||||||
/>
|
<span className="text-[9px] text-gray-400">{cell.cell_id}</span>
|
||||||
</div>
|
</div>
|
||||||
<div>
|
{/* Cell crop */}
|
||||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Deutsch</label>
|
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative mb-1">
|
||||||
<textarea
|
<CellCrop imageUrl={dewarpedUrl} bbox={cell.bbox_pct} />
|
||||||
rows={Math.max(1, (editedEntries[activeIndex]?.german || '').split('\n').length)}
|
</div>
|
||||||
value={editedEntries[activeIndex]?.german || ''}
|
<textarea
|
||||||
onChange={(e) => updateEntry(activeIndex, 'german', e.target.value)}
|
ref={colIdx === 0 ? enRef as any : undefined}
|
||||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
rows={Math.max(1, (cell.text || '').split('\n').length)}
|
||||||
/>
|
value={cell.text || ''}
|
||||||
</div>
|
onChange={(e) => updateCell(cell.cell_id, e.target.value)}
|
||||||
<div>
|
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Example</label>
|
/>
|
||||||
<textarea
|
</div>
|
||||||
rows={Math.max(1, (editedEntries[activeIndex]?.example || '').split('\n').length)}
|
)
|
||||||
value={editedEntries[activeIndex]?.example || ''}
|
})
|
||||||
onChange={(e) => updateEntry(activeIndex, 'example', e.target.value)}
|
})()}
|
||||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</>
|
|
||||||
) : (
|
|
||||||
/* Generic mode: one field per column */
|
|
||||||
<>
|
|
||||||
{(() => {
|
|
||||||
const rowCells = getRowCells(activeIndex)
|
|
||||||
return columnsUsed.map((col) => {
|
|
||||||
const cell = rowCells.find(c => c.col_index === col.index)
|
|
||||||
if (!cell) return null
|
|
||||||
return (
|
|
||||||
<div key={col.index}>
|
|
||||||
<div className="flex items-center gap-1 mb-0.5">
|
|
||||||
<label className={`text-[10px] font-medium ${colTypeColor(col.type)}`}>
|
|
||||||
{colTypeLabel(col.type)}
|
|
||||||
</label>
|
|
||||||
<span className="text-[9px] text-gray-400">{cell.cell_id}</span>
|
|
||||||
</div>
|
|
||||||
{/* Cell crop */}
|
|
||||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative mb-1">
|
|
||||||
<CellCrop imageUrl={dewarpedUrl} bbox={cell.bbox_pct} />
|
|
||||||
</div>
|
|
||||||
<textarea
|
|
||||||
rows={Math.max(1, (cell.text || '').split('\n').length)}
|
|
||||||
value={cell.text || ''}
|
|
||||||
onChange={(e) => updateCell(cell.cell_id, e.target.value)}
|
|
||||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
})
|
|
||||||
})()}
|
|
||||||
</>
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Action buttons */}
|
{/* Action buttons */}
|
||||||
@@ -895,57 +739,32 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
|||||||
<div>Ctrl+Up = Zurueck</div>
|
<div>Ctrl+Up = Zurueck</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Entry/Row list (compact) */}
|
{/* Row list (compact) */}
|
||||||
<div className="border-t dark:border-gray-700 pt-2 mt-2">
|
<div className="border-t dark:border-gray-700 pt-2 mt-2">
|
||||||
<div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
|
<div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
{isVocab ? 'Alle Eintraege' : 'Alle Zeilen'}
|
Alle Zeilen
|
||||||
</div>
|
</div>
|
||||||
<div className="max-h-48 overflow-y-auto space-y-0.5">
|
<div className="max-h-48 overflow-y-auto space-y-0.5">
|
||||||
{isVocab ? (
|
{sortedRowIndices.map((rowIdx, posIdx) => {
|
||||||
editedEntries.map((entry, idx) => (
|
const rowCells = cellsByRow.get(rowIdx) || []
|
||||||
|
const textParts = rowCells.filter(c => c.text).map(c => c.text.replace(/\n/g, ' '))
|
||||||
|
return (
|
||||||
<div
|
<div
|
||||||
key={idx}
|
key={rowIdx}
|
||||||
onClick={() => setActiveIndex(idx)}
|
onClick={() => setActiveIndex(posIdx)}
|
||||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
||||||
idx === activeIndex
|
posIdx === activeIndex
|
||||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<span className="w-4 text-right text-gray-400">{idx + 1}</span>
|
<span className="w-6 text-right text-gray-400 font-mono">R{String(rowIdx).padStart(2, '0')}</span>
|
||||||
<span className={`w-2 h-2 rounded-full ${
|
|
||||||
entry.status === 'confirmed' ? 'bg-green-500' :
|
|
||||||
entry.status === 'edited' ? 'bg-blue-500' :
|
|
||||||
entry.status === 'skipped' ? 'bg-orange-400' :
|
|
||||||
'bg-gray-300 dark:bg-gray-600'
|
|
||||||
}`} />
|
|
||||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||||
{(entry.english || '\u2014').replace(/\n/g, ' ')} → {(entry.german || '\u2014').replace(/\n/g, ' ')}
|
{textParts.join(' \u2192 ') || '\u2014'}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
))
|
)
|
||||||
) : (
|
})}
|
||||||
sortedRowIndices.map((rowIdx, posIdx) => {
|
|
||||||
const rowCells = cellsByRow.get(rowIdx) || []
|
|
||||||
const firstText = rowCells.find(c => c.text)?.text || ''
|
|
||||||
return (
|
|
||||||
<div
|
|
||||||
key={rowIdx}
|
|
||||||
onClick={() => setActiveIndex(posIdx)}
|
|
||||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
|
||||||
posIdx === activeIndex
|
|
||||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
|
||||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
|
||||||
}`}
|
|
||||||
>
|
|
||||||
<span className="w-6 text-right text-gray-400 font-mono">R{String(rowIdx).padStart(2, '0')}</span>
|
|
||||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
|
||||||
{firstText.replace(/\n/g, ' ').substring(0, 60) || '\u2014'}
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
})
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -4281,8 +4281,12 @@ def _cells_to_vocab_entries(
|
|||||||
sum(confidences) / len(confidences), 1
|
sum(confidences) / len(confidences), 1
|
||||||
) if confidences else 0.0
|
) if confidences else 0.0
|
||||||
|
|
||||||
# Only include if at least one vocab field has text
|
# Only include if at least one mapped field has text
|
||||||
if entry['english'] or entry['german'] or entry['example']:
|
has_content = any(
|
||||||
|
entry.get(f)
|
||||||
|
for f in col_type_to_field.values()
|
||||||
|
)
|
||||||
|
if has_content:
|
||||||
entries.append(entry)
|
entries.append(entry)
|
||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|||||||
@@ -1465,6 +1465,106 @@ class TestCellsToVocabEntriesPageRef:
|
|||||||
assert entries[0]['source_page'] == ''
|
assert entries[0]['source_page'] == ''
|
||||||
assert entries[0]['bbox_ref'] is None
|
assert entries[0]['bbox_ref'] is None
|
||||||
|
|
||||||
|
def test_marker_only_row_included(self):
|
||||||
|
"""Row with only a marker (no english/german/example) is kept."""
|
||||||
|
from cv_vocab_pipeline import _cells_to_vocab_entries
|
||||||
|
|
||||||
|
cells = [
|
||||||
|
# Row 0: has english + marker
|
||||||
|
{
|
||||||
|
'row_index': 0,
|
||||||
|
'col_type': 'column_en',
|
||||||
|
'text': 'hello',
|
||||||
|
'bbox_pct': {'x': 10, 'y': 10, 'w': 30, 'h': 5},
|
||||||
|
'confidence': 95.0,
|
||||||
|
'ocr_engine': 'tesseract',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'row_index': 0,
|
||||||
|
'col_type': 'column_marker',
|
||||||
|
'text': '!',
|
||||||
|
'bbox_pct': {'x': 5, 'y': 10, 'w': 3, 'h': 5},
|
||||||
|
'confidence': 80.0,
|
||||||
|
'ocr_engine': 'tesseract',
|
||||||
|
},
|
||||||
|
# Row 1: marker only (no english/german/example)
|
||||||
|
{
|
||||||
|
'row_index': 1,
|
||||||
|
'col_type': 'column_en',
|
||||||
|
'text': '',
|
||||||
|
'bbox_pct': {'x': 10, 'y': 20, 'w': 30, 'h': 5},
|
||||||
|
'confidence': 0.0,
|
||||||
|
'ocr_engine': 'tesseract',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'row_index': 1,
|
||||||
|
'col_type': 'column_marker',
|
||||||
|
'text': '!',
|
||||||
|
'bbox_pct': {'x': 5, 'y': 20, 'w': 3, 'h': 5},
|
||||||
|
'confidence': 70.0,
|
||||||
|
'ocr_engine': 'tesseract',
|
||||||
|
},
|
||||||
|
# Row 2: completely empty (should be excluded)
|
||||||
|
{
|
||||||
|
'row_index': 2,
|
||||||
|
'col_type': 'column_en',
|
||||||
|
'text': '',
|
||||||
|
'bbox_pct': {'x': 10, 'y': 30, 'w': 30, 'h': 5},
|
||||||
|
'confidence': 0.0,
|
||||||
|
'ocr_engine': 'tesseract',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'row_index': 2,
|
||||||
|
'col_type': 'column_marker',
|
||||||
|
'text': '',
|
||||||
|
'bbox_pct': {'x': 5, 'y': 30, 'w': 3, 'h': 5},
|
||||||
|
'confidence': 0.0,
|
||||||
|
'ocr_engine': 'tesseract',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
columns_meta = [
|
||||||
|
{'type': 'column_en'}, {'type': 'column_marker'},
|
||||||
|
]
|
||||||
|
|
||||||
|
entries = _cells_to_vocab_entries(cells, columns_meta)
|
||||||
|
|
||||||
|
# Row 0 (has english) and Row 1 (has marker) should be included
|
||||||
|
# Row 2 (completely empty) should be excluded
|
||||||
|
assert len(entries) == 2
|
||||||
|
assert entries[0]['english'] == 'hello'
|
||||||
|
assert entries[0]['marker'] == '!'
|
||||||
|
assert entries[1]['english'] == ''
|
||||||
|
assert entries[1]['marker'] == '!'
|
||||||
|
|
||||||
|
def test_page_ref_only_row_included(self):
|
||||||
|
"""Row with only source_page text is kept (no english/german/example)."""
|
||||||
|
from cv_vocab_pipeline import _cells_to_vocab_entries
|
||||||
|
|
||||||
|
cells = [
|
||||||
|
{
|
||||||
|
'row_index': 0,
|
||||||
|
'col_type': 'column_en',
|
||||||
|
'text': '',
|
||||||
|
'bbox_pct': {'x': 10, 'y': 10, 'w': 30, 'h': 5},
|
||||||
|
'confidence': 0.0,
|
||||||
|
'ocr_engine': 'tesseract',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'row_index': 0,
|
||||||
|
'col_type': 'page_ref',
|
||||||
|
'text': 'p.59',
|
||||||
|
'bbox_pct': {'x': 5, 'y': 10, 'w': 5, 'h': 5},
|
||||||
|
'confidence': 80.0,
|
||||||
|
'ocr_engine': 'tesseract',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
columns_meta = [{'type': 'column_en'}, {'type': 'page_ref'}]
|
||||||
|
|
||||||
|
entries = _cells_to_vocab_entries(cells, columns_meta)
|
||||||
|
|
||||||
|
assert len(entries) == 1
|
||||||
|
assert entries[0]['source_page'] == 'p.59'
|
||||||
|
|
||||||
|
|
||||||
# =============================================
|
# =============================================
|
||||||
# RUN TESTS
|
# RUN TESTS
|
||||||
|
|||||||
Reference in New Issue
Block a user