feat(ocr-pipeline): generic cell-grid with optional vocab mapping
Extract build_cell_grid() as layout-agnostic foundation from build_word_grid(). Step 5 now produces a generic cell grid (columns x rows) and auto-detects whether vocab layout is present. Frontend dynamically switches between vocab table (EN/DE/Example) and generic cell table based on layout type. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -29,7 +29,7 @@ export interface SessionInfo {
|
||||
dewarp_result?: DewarpResult
|
||||
column_result?: ColumnResult
|
||||
row_result?: RowResult
|
||||
word_result?: WordResult
|
||||
word_result?: GridResult
|
||||
}
|
||||
|
||||
export interface DeskewResult {
|
||||
@@ -124,6 +124,49 @@ export interface WordBbox {
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface GridCell {
|
||||
cell_id: string // "R03_C1"
|
||||
row_index: number
|
||||
col_index: number
|
||||
col_type: string
|
||||
text: string
|
||||
confidence: number
|
||||
bbox_px: WordBbox
|
||||
bbox_pct: WordBbox
|
||||
ocr_engine?: string
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
}
|
||||
|
||||
export interface ColumnMeta {
|
||||
index: number
|
||||
type: string
|
||||
x: number
|
||||
width: number
|
||||
}
|
||||
|
||||
export interface GridResult {
|
||||
cells: GridCell[]
|
||||
grid_shape: { rows: number; cols: number; total_cells: number }
|
||||
columns_used: ColumnMeta[]
|
||||
layout: 'vocab' | 'generic'
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
ocr_engine?: string
|
||||
vocab_entries?: WordEntry[] // Only when layout='vocab'
|
||||
entries?: WordEntry[] // Backwards compat alias for vocab_entries
|
||||
entry_count?: number
|
||||
summary: {
|
||||
total_cells: number
|
||||
non_empty_cells: number
|
||||
low_confidence: number
|
||||
// Only when layout='vocab':
|
||||
total_entries?: number
|
||||
with_english?: number
|
||||
with_german?: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface WordEntry {
|
||||
row_index: number
|
||||
english: string
|
||||
@@ -137,6 +180,7 @@ export interface WordEntry {
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
}
|
||||
|
||||
/** @deprecated Use GridResult instead */
|
||||
export interface WordResult {
|
||||
entries: WordEntry[]
|
||||
entry_count: number
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { WordResult, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
/** Render text with \n as line breaks */
|
||||
function MultilineText({ text }: { text: string }) {
|
||||
if (!text) return <span className="text-gray-300 dark:text-gray-600">—</span>
|
||||
if (!text) return <span className="text-gray-300 dark:text-gray-600">—</span>
|
||||
const lines = text.split('\n')
|
||||
if (lines.length === 1) return <>{text}</>
|
||||
return <>{lines.map((line, i) => (
|
||||
@@ -15,6 +15,31 @@ function MultilineText({ text }: { text: string }) {
|
||||
))}</>
|
||||
}
|
||||
|
||||
/** Column type → human-readable header */
|
||||
function colTypeLabel(colType: string): string {
|
||||
const labels: Record<string, string> = {
|
||||
column_en: 'English',
|
||||
column_de: 'Deutsch',
|
||||
column_example: 'Example',
|
||||
column_text: 'Text',
|
||||
column_marker: 'Marker',
|
||||
page_ref: 'Seite',
|
||||
}
|
||||
return labels[colType] || colType.replace('column_', '')
|
||||
}
|
||||
|
||||
/** Column type → color class */
|
||||
function colTypeColor(colType: string): string {
|
||||
const colors: Record<string, string> = {
|
||||
column_en: 'text-blue-600 dark:text-blue-400',
|
||||
column_de: 'text-green-600 dark:text-green-400',
|
||||
column_example: 'text-orange-600 dark:text-orange-400',
|
||||
column_text: 'text-purple-600 dark:text-purple-400',
|
||||
column_marker: 'text-gray-500 dark:text-gray-400',
|
||||
}
|
||||
return colors[colType] || 'text-gray-600 dark:text-gray-400'
|
||||
}
|
||||
|
||||
interface StepWordRecognitionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
@@ -22,7 +47,7 @@ interface StepWordRecognitionProps {
|
||||
}
|
||||
|
||||
export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRecognitionProps) {
|
||||
const [wordResult, setWordResult] = useState<WordResult | null>(null)
|
||||
const [gridResult, setGridResult] = useState<GridResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
@@ -31,6 +56,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
// Step-through labeling state
|
||||
const [activeIndex, setActiveIndex] = useState(0)
|
||||
const [editedEntries, setEditedEntries] = useState<WordEntry[]>([])
|
||||
const [editedCells, setEditedCells] = useState<GridCell[]>([])
|
||||
const [mode, setMode] = useState<'overview' | 'labeling'>('overview')
|
||||
const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid'>('auto')
|
||||
const [usedEngine, setUsedEngine] = useState<string>('')
|
||||
@@ -38,6 +64,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
|
||||
const enRef = useRef<HTMLInputElement>(null)
|
||||
|
||||
const isVocab = gridResult?.layout === 'vocab'
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
|
||||
@@ -47,9 +75,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
if (res.ok) {
|
||||
const info = await res.json()
|
||||
if (info.word_result) {
|
||||
setWordResult(info.word_result)
|
||||
setUsedEngine(info.word_result.ocr_engine || '')
|
||||
initEntries(info.word_result.entries)
|
||||
applyGridResult(info.word_result)
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -63,6 +89,17 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const applyGridResult = (data: GridResult) => {
|
||||
setGridResult(data)
|
||||
setUsedEngine(data.ocr_engine || '')
|
||||
if (data.layout === 'vocab' && data.entries) {
|
||||
initEntries(data.entries)
|
||||
}
|
||||
if (data.cells) {
|
||||
setEditedCells(data.cells.map(c => ({ ...c, status: c.status || 'pending' })))
|
||||
}
|
||||
}
|
||||
|
||||
const initEntries = (entries: WordEntry[]) => {
|
||||
setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' })))
|
||||
setActiveIndex(0)
|
||||
@@ -82,21 +119,20 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
|
||||
}
|
||||
const data = await res.json()
|
||||
setWordResult(data)
|
||||
setUsedEngine(data.ocr_engine || eng)
|
||||
initEntries(data.entries)
|
||||
applyGridResult(data)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionId, ocrEngine])
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId, ocrEngine, pronunciation])
|
||||
|
||||
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
|
||||
if (!sessionId) return
|
||||
const gt: WordGroundTruth = {
|
||||
is_correct: isCorrect,
|
||||
corrected_entries: isCorrect ? undefined : editedEntries,
|
||||
corrected_entries: isCorrect ? undefined : (isVocab ? editedEntries : undefined),
|
||||
notes: gtNotes || undefined,
|
||||
}
|
||||
try {
|
||||
@@ -109,35 +145,68 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId, gtNotes, editedEntries])
|
||||
}, [sessionId, gtNotes, editedEntries, isVocab])
|
||||
|
||||
// Step-through: update entry field
|
||||
// Vocab mode: update entry field
|
||||
const updateEntry = (index: number, field: 'english' | 'german' | 'example', value: string) => {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === index ? { ...e, [field]: value, status: 'edited' as const } : e
|
||||
))
|
||||
}
|
||||
|
||||
// Generic mode: update cell text
|
||||
const updateCell = (cellId: string, value: string) => {
|
||||
setEditedCells(prev => prev.map(c =>
|
||||
c.cell_id === cellId ? { ...c, text: value, status: 'edited' as const } : c
|
||||
))
|
||||
}
|
||||
|
||||
// Step-through: confirm current entry
|
||||
const confirmEntry = () => {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e
|
||||
))
|
||||
if (activeIndex < editedEntries.length - 1) {
|
||||
if (isVocab) {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e
|
||||
))
|
||||
} else {
|
||||
// Generic: confirm all cells in this row
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
const cellIds = new Set(rowCells.map(c => c.cell_id))
|
||||
setEditedCells(prev => prev.map(c =>
|
||||
cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c
|
||||
))
|
||||
}
|
||||
const maxIdx = isVocab ? editedEntries.length - 1 : getUniqueRowCount() - 1
|
||||
if (activeIndex < maxIdx) {
|
||||
setActiveIndex(activeIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Step-through: skip current entry
|
||||
const skipEntry = () => {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === activeIndex ? { ...e, status: 'skipped' as const } : e
|
||||
))
|
||||
if (activeIndex < editedEntries.length - 1) {
|
||||
if (isVocab) {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === activeIndex ? { ...e, status: 'skipped' as const } : e
|
||||
))
|
||||
}
|
||||
const maxIdx = isVocab ? editedEntries.length - 1 : getUniqueRowCount() - 1
|
||||
if (activeIndex < maxIdx) {
|
||||
setActiveIndex(activeIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: get unique row indices from cells
|
||||
const getUniqueRowCount = () => {
|
||||
if (!editedCells.length) return 0
|
||||
return new Set(editedCells.map(c => c.row_index)).size
|
||||
}
|
||||
|
||||
// Helper: get cells for a given row index (by position in sorted unique rows)
|
||||
const getRowCells = (rowPosition: number) => {
|
||||
const uniqueRows = [...new Set(editedCells.map(c => c.row_index))].sort((a, b) => a - b)
|
||||
const rowIdx = uniqueRows[rowPosition]
|
||||
return editedCells.filter(c => c.row_index === rowIdx)
|
||||
}
|
||||
|
||||
// Focus english input when active entry changes in labeling mode
|
||||
useEffect(() => {
|
||||
if (mode === 'labeling' && enRef.current) {
|
||||
@@ -152,8 +221,6 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
confirmEntry()
|
||||
} else if (e.key === 'Tab' && !e.shiftKey) {
|
||||
// Let Tab move between fields naturally unless on last field
|
||||
} else if (e.key === 'ArrowDown' && e.ctrlKey) {
|
||||
e.preventDefault()
|
||||
skipEntry()
|
||||
@@ -165,7 +232,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [mode, activeIndex, editedEntries])
|
||||
}, [mode, activeIndex, editedEntries, editedCells])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
@@ -200,9 +267,24 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
return map[status || 'pending'] || map.pending
|
||||
}
|
||||
|
||||
const summary = wordResult?.summary
|
||||
const confirmedCount = editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length
|
||||
const totalCount = editedEntries.length
|
||||
const summary = gridResult?.summary
|
||||
const columnsUsed = gridResult?.columns_used || []
|
||||
const gridShape = gridResult?.grid_shape
|
||||
|
||||
// Counts for labeling progress
|
||||
const confirmedCount = isVocab
|
||||
? editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length
|
||||
: editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').length
|
||||
const totalCount = isVocab ? editedEntries.length : getUniqueRowCount()
|
||||
|
||||
// Group cells by row for generic table display
|
||||
const cellsByRow: Map<number, GridCell[]> = new Map()
|
||||
for (const cell of editedCells) {
|
||||
const existing = cellsByRow.get(cell.row_index) || []
|
||||
existing.push(cell)
|
||||
cellsByRow.set(cell.row_index, existing)
|
||||
}
|
||||
const sortedRowIndices = [...cellsByRow.keys()].sort((a, b) => a - b)
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
@@ -214,9 +296,26 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Mode toggle */}
|
||||
{wordResult && (
|
||||
{/* Layout badge + Mode toggle */}
|
||||
{gridResult && (
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Layout badge */}
|
||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
|
||||
isVocab
|
||||
? 'bg-indigo-100 dark:bg-indigo-900/30 text-indigo-700 dark:text-indigo-300'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||
}`}>
|
||||
{isVocab ? 'Vokabel-Layout' : 'Generisch'}
|
||||
</span>
|
||||
|
||||
{gridShape && (
|
||||
<span className="text-[10px] text-gray-400">
|
||||
{gridShape.rows}×{gridShape.cols} = {gridShape.total_cells} Zellen
|
||||
</span>
|
||||
)}
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
<button
|
||||
onClick={() => setMode('overview')}
|
||||
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||
@@ -240,7 +339,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Overview mode: side-by-side images + entry list */}
|
||||
{/* Overview mode */}
|
||||
{mode === 'overview' && (
|
||||
<>
|
||||
{/* Images: overlay vs clean */}
|
||||
@@ -250,7 +349,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
Mit Grid-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{wordResult ? (
|
||||
{gridResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
@@ -280,25 +379,43 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
</div>
|
||||
|
||||
{/* Result summary */}
|
||||
{wordResult && summary && (
|
||||
{gridResult && summary && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Ergebnis: {summary.total_entries} Eintraege erkannt
|
||||
{isVocab
|
||||
? `Ergebnis: ${summary.total_entries ?? 0} Vokabel-Eintraege erkannt`
|
||||
: `Ergebnis: ${summary.non_empty_cells}/${summary.total_cells} Zellen mit Text`
|
||||
}
|
||||
</h4>
|
||||
<span className="text-xs text-gray-400">
|
||||
{wordResult.duration_seconds}s
|
||||
{gridResult.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Summary badges */}
|
||||
<div className="flex gap-2 flex-wrap">
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
||||
EN: {summary.with_english}
|
||||
</span>
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300">
|
||||
DE: {summary.with_german}
|
||||
</span>
|
||||
{isVocab ? (
|
||||
<>
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
||||
EN: {summary.with_english ?? 0}
|
||||
</span>
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300">
|
||||
DE: {summary.with_german ?? 0}
|
||||
</span>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
||||
Zellen: {summary.non_empty_cells}/{summary.total_cells}
|
||||
</span>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<span key={i} className={`px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 ${colTypeColor(col.type)}`}>
|
||||
C{col.index}: {colTypeLabel(col.type)}
|
||||
</span>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
{summary.low_confidence > 0 && (
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
|
||||
Unsicher: {summary.low_confidence}
|
||||
@@ -306,57 +423,110 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Entry table */}
|
||||
{/* Entry/Cell table */}
|
||||
<div className="max-h-80 overflow-y-auto">
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-8">#</th>
|
||||
<th className="py-1 pr-2">English</th>
|
||||
<th className="py-1 pr-2">Deutsch</th>
|
||||
<th className="py-1 pr-2">Example</th>
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{editedEntries.map((entry, idx) => (
|
||||
<tr
|
||||
key={idx}
|
||||
className={`border-b dark:border-gray-700/50 ${
|
||||
idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||
}`}
|
||||
onClick={() => { setActiveIndex(idx); setMode('labeling') }}
|
||||
>
|
||||
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
<MultilineText text={entry.english} />
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
<MultilineText text={entry.german} />
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
|
||||
<MultilineText text={entry.example} />
|
||||
</td>
|
||||
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
|
||||
{entry.confidence}%
|
||||
</td>
|
||||
{isVocab ? (
|
||||
/* Vocab table: EN/DE/Example columns */
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-8">#</th>
|
||||
<th className="py-1 pr-2">English</th>
|
||||
<th className="py-1 pr-2">Deutsch</th>
|
||||
<th className="py-1 pr-2">Example</th>
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</thead>
|
||||
<tbody>
|
||||
{editedEntries.map((entry, idx) => (
|
||||
<tr
|
||||
key={idx}
|
||||
className={`border-b dark:border-gray-700/50 ${
|
||||
idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||
}`}
|
||||
onClick={() => { setActiveIndex(idx); setMode('labeling') }}
|
||||
>
|
||||
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
<MultilineText text={entry.english} />
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
<MultilineText text={entry.german} />
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
|
||||
<MultilineText text={entry.example} />
|
||||
</td>
|
||||
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
|
||||
{entry.confidence}%
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
) : (
|
||||
/* Generic table: dynamic columns from columns_used */
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-12">Zeile</th>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</th>
|
||||
))}
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sortedRowIndices.map((rowIdx, posIdx) => {
|
||||
const rowCells = cellsByRow.get(rowIdx) || []
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<tr
|
||||
key={rowIdx}
|
||||
className={`border-b dark:border-gray-700/50 ${
|
||||
posIdx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||
}`}
|
||||
onClick={() => { setActiveIndex(posIdx); setMode('labeling') }}
|
||||
>
|
||||
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
|
||||
R{String(rowIdx).padStart(2, '0')}
|
||||
</td>
|
||||
{columnsUsed.map((col) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
return (
|
||||
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
<MultilineText text={cell?.text || ''} />
|
||||
</td>
|
||||
)
|
||||
})}
|
||||
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}%
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Labeling mode: image crop + editable fields */}
|
||||
{mode === 'labeling' && editedEntries.length > 0 && (
|
||||
{/* Labeling mode */}
|
||||
{mode === 'labeling' && (isVocab ? editedEntries.length > 0 : editedCells.length > 0) && (
|
||||
<div className="grid grid-cols-3 gap-4">
|
||||
{/* Left 2/3: Image with highlighted active row */}
|
||||
<div className="col-span-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Eintrag {activeIndex + 1} von {editedEntries.length}
|
||||
{isVocab
|
||||
? `Eintrag ${activeIndex + 1} von ${editedEntries.length}`
|
||||
: `Zeile ${activeIndex + 1} von ${getUniqueRowCount()}`
|
||||
}
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
@@ -365,8 +535,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
alt="Wort-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
{/* Highlight overlay for active entry bbox */}
|
||||
{editedEntries[activeIndex]?.bbox && (
|
||||
{/* Highlight overlay for active row/entry */}
|
||||
{isVocab && editedEntries[activeIndex]?.bbox && (
|
||||
<div
|
||||
className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
|
||||
style={{
|
||||
@@ -377,10 +547,25 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
{!isVocab && (() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
return rowCells.map(cell => (
|
||||
<div
|
||||
key={cell.cell_id}
|
||||
className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
|
||||
style={{
|
||||
left: `${cell.bbox_pct.x}%`,
|
||||
top: `${cell.bbox_pct.y}%`,
|
||||
width: `${cell.bbox_pct.w}%`,
|
||||
height: `${cell.bbox_pct.h}%`,
|
||||
}}
|
||||
/>
|
||||
))
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right 1/3: Editable entry fields */}
|
||||
{/* Right 1/3: Editable fields */}
|
||||
<div className="space-y-3">
|
||||
{/* Navigation */}
|
||||
<div className="flex items-center justify-between">
|
||||
@@ -391,10 +576,15 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
>
|
||||
Zurueck
|
||||
</button>
|
||||
<span className="text-xs text-gray-500">{activeIndex + 1} / {editedEntries.length}</span>
|
||||
<span className="text-xs text-gray-500">
|
||||
{activeIndex + 1} / {isVocab ? editedEntries.length : getUniqueRowCount()}
|
||||
</span>
|
||||
<button
|
||||
onClick={() => setActiveIndex(Math.min(editedEntries.length - 1, activeIndex + 1))}
|
||||
disabled={activeIndex >= editedEntries.length - 1}
|
||||
onClick={() => setActiveIndex(Math.min(
|
||||
(isVocab ? editedEntries.length : getUniqueRowCount()) - 1,
|
||||
activeIndex + 1
|
||||
))}
|
||||
disabled={activeIndex >= (isVocab ? editedEntries.length : getUniqueRowCount()) - 1}
|
||||
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||
>
|
||||
Weiter
|
||||
@@ -403,16 +593,31 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
|
||||
{/* Status badge */}
|
||||
<div className="flex items-center gap-2">
|
||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${statusBadge(editedEntries[activeIndex]?.status)}`}>
|
||||
{editedEntries[activeIndex]?.status || 'pending'}
|
||||
</span>
|
||||
<span className={`text-xs font-mono ${confColor(editedEntries[activeIndex]?.confidence || 0)}`}>
|
||||
{editedEntries[activeIndex]?.confidence}% Konfidenz
|
||||
</span>
|
||||
{isVocab && (
|
||||
<>
|
||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${statusBadge(editedEntries[activeIndex]?.status)}`}>
|
||||
{editedEntries[activeIndex]?.status || 'pending'}
|
||||
</span>
|
||||
<span className={`text-xs font-mono ${confColor(editedEntries[activeIndex]?.confidence || 0)}`}>
|
||||
{editedEntries[activeIndex]?.confidence}% Konfidenz
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
{!isVocab && (() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<span className={`text-xs font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}% Konfidenz
|
||||
</span>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
|
||||
{/* Cell crops */}
|
||||
{editedEntries[activeIndex]?.bbox_en && (
|
||||
{/* Cell crops (vocab mode) */}
|
||||
{isVocab && editedEntries[activeIndex]?.bbox_en && (
|
||||
<div>
|
||||
<div className="text-[10px] font-medium text-blue-500 mb-0.5">EN-Zelle</div>
|
||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
|
||||
@@ -423,7 +628,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{editedEntries[activeIndex]?.bbox_de && (
|
||||
{isVocab && editedEntries[activeIndex]?.bbox_de && (
|
||||
<div>
|
||||
<div className="text-[10px] font-medium text-green-500 mb-0.5">DE-Zelle</div>
|
||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
|
||||
@@ -437,34 +642,70 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
|
||||
{/* Editable fields */}
|
||||
<div className="space-y-2">
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">English</label>
|
||||
<textarea
|
||||
ref={enRef as any}
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.english || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.english || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'english', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Deutsch</label>
|
||||
<textarea
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.german || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.german || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'german', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Example</label>
|
||||
<textarea
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.example || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.example || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'example', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
{isVocab ? (
|
||||
/* Vocab mode: EN/DE/Example fields */
|
||||
<>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">English</label>
|
||||
<textarea
|
||||
ref={enRef as any}
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.english || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.english || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'english', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Deutsch</label>
|
||||
<textarea
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.german || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.german || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'german', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Example</label>
|
||||
<textarea
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.example || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.example || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'example', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
/* Generic mode: one field per column */
|
||||
<>
|
||||
{(() => {
|
||||
const rowCells = getRowCells(activeIndex)
|
||||
return columnsUsed.map((col) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
if (!cell) return null
|
||||
return (
|
||||
<div key={col.index}>
|
||||
<div className="flex items-center gap-1 mb-0.5">
|
||||
<label className={`text-[10px] font-medium ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</label>
|
||||
<span className="text-[9px] text-gray-400">{cell.cell_id}</span>
|
||||
</div>
|
||||
{/* Cell crop */}
|
||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative mb-1">
|
||||
<CellCrop imageUrl={dewarpedUrl} bbox={cell.bbox_pct} />
|
||||
</div>
|
||||
<textarea
|
||||
rows={Math.max(1, (cell.text || '').split('\n').length)}
|
||||
value={cell.text || ''}
|
||||
onChange={(e) => updateCell(cell.cell_id, e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
})()}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
@@ -486,38 +727,61 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
{/* Shortcuts hint */}
|
||||
<div className="text-[10px] text-gray-400 space-y-0.5">
|
||||
<div>Enter = Bestaetigen & weiter</div>
|
||||
<div>Ctrl+↓ = Ueberspringen</div>
|
||||
<div>Ctrl+↑ = Zurueck</div>
|
||||
<div>Ctrl+Down = Ueberspringen</div>
|
||||
<div>Ctrl+Up = Zurueck</div>
|
||||
</div>
|
||||
|
||||
{/* Entry list (compact) */}
|
||||
{/* Entry/Row list (compact) */}
|
||||
<div className="border-t dark:border-gray-700 pt-2 mt-2">
|
||||
<div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Alle Eintraege
|
||||
{isVocab ? 'Alle Eintraege' : 'Alle Zeilen'}
|
||||
</div>
|
||||
<div className="max-h-48 overflow-y-auto space-y-0.5">
|
||||
{editedEntries.map((entry, idx) => (
|
||||
<div
|
||||
key={idx}
|
||||
onClick={() => setActiveIndex(idx)}
|
||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
||||
idx === activeIndex
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
<span className="w-4 text-right text-gray-400">{idx + 1}</span>
|
||||
<span className={`w-2 h-2 rounded-full ${
|
||||
entry.status === 'confirmed' ? 'bg-green-500' :
|
||||
entry.status === 'edited' ? 'bg-blue-500' :
|
||||
entry.status === 'skipped' ? 'bg-orange-400' :
|
||||
'bg-gray-300 dark:bg-gray-600'
|
||||
}`} />
|
||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||
{(entry.english || '—').replace(/\n/g, ' ')} → {(entry.german || '—').replace(/\n/g, ' ')}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
{isVocab ? (
|
||||
editedEntries.map((entry, idx) => (
|
||||
<div
|
||||
key={idx}
|
||||
onClick={() => setActiveIndex(idx)}
|
||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
||||
idx === activeIndex
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
<span className="w-4 text-right text-gray-400">{idx + 1}</span>
|
||||
<span className={`w-2 h-2 rounded-full ${
|
||||
entry.status === 'confirmed' ? 'bg-green-500' :
|
||||
entry.status === 'edited' ? 'bg-blue-500' :
|
||||
entry.status === 'skipped' ? 'bg-orange-400' :
|
||||
'bg-gray-300 dark:bg-gray-600'
|
||||
}`} />
|
||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||
{(entry.english || '\u2014').replace(/\n/g, ' ')} → {(entry.german || '\u2014').replace(/\n/g, ' ')}
|
||||
</span>
|
||||
</div>
|
||||
))
|
||||
) : (
|
||||
sortedRowIndices.map((rowIdx, posIdx) => {
|
||||
const rowCells = cellsByRow.get(rowIdx) || []
|
||||
const firstText = rowCells.find(c => c.text)?.text || ''
|
||||
return (
|
||||
<div
|
||||
key={rowIdx}
|
||||
onClick={() => setActiveIndex(posIdx)}
|
||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
||||
posIdx === activeIndex
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
<span className="w-6 text-right text-gray-400 font-mono">R{String(rowIdx).padStart(2, '0')}</span>
|
||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||
{firstText.replace(/\n/g, ' ').substring(0, 60) || '\u2014'}
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -525,7 +789,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{wordResult && (
|
||||
{gridResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center gap-3 flex-wrap">
|
||||
{/* OCR Engine selector */}
|
||||
@@ -539,15 +803,17 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
<option value="tesseract">Tesseract</option>
|
||||
</select>
|
||||
|
||||
{/* Pronunciation selector */}
|
||||
<select
|
||||
value={pronunciation}
|
||||
onChange={(e) => setPronunciation(e.target.value as 'british' | 'american')}
|
||||
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
<option value="british">Britisch (RP)</option>
|
||||
<option value="american">Amerikanisch</option>
|
||||
</select>
|
||||
{/* Pronunciation selector (only for vocab) */}
|
||||
{isVocab && (
|
||||
<select
|
||||
value={pronunciation}
|
||||
onChange={(e) => setPronunciation(e.target.value as 'british' | 'american')}
|
||||
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
<option value="british">Britisch (RP)</option>
|
||||
<option value="american">Amerikanisch</option>
|
||||
</select>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={() => runAutoDetection()}
|
||||
|
||||
@@ -3009,7 +3009,7 @@ def _replace_phonetics_in_text(text: str, pronunciation: str = 'british') -> str
|
||||
return _PHONETIC_BRACKET_RE.sub(replacer, text)
|
||||
|
||||
|
||||
def build_word_grid(
|
||||
def build_cell_grid(
|
||||
ocr_img: np.ndarray,
|
||||
column_regions: List[PageRegion],
|
||||
row_geometries: List[RowGeometry],
|
||||
@@ -3018,9 +3018,11 @@ def build_word_grid(
|
||||
lang: str = "eng+deu",
|
||||
ocr_engine: str = "auto",
|
||||
img_bgr: Optional[np.ndarray] = None,
|
||||
pronunciation: str = "british",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build a word grid by intersecting columns and rows, then OCR each cell.
|
||||
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||
"""Generic Cell-Grid: Columns × Rows → cells with OCR text.
|
||||
|
||||
This is the layout-agnostic foundation. Every column (except column_ignore)
|
||||
is intersected with every content row to produce numbered cells.
|
||||
|
||||
Args:
|
||||
ocr_img: Binarized full-page image (for Tesseract).
|
||||
@@ -3029,11 +3031,12 @@ def build_word_grid(
|
||||
img_w: Image width in pixels.
|
||||
img_h: Image height in pixels.
|
||||
lang: Default Tesseract language.
|
||||
ocr_engine: 'tesseract', 'rapid', or 'auto' (rapid if available, else tesseract).
|
||||
ocr_engine: 'tesseract', 'rapid', or 'auto'.
|
||||
img_bgr: BGR color image (required for RapidOCR).
|
||||
|
||||
Returns:
|
||||
List of entry dicts with english/german/example text and bbox info (percent).
|
||||
(cells, columns_meta) where cells is a list of cell dicts and
|
||||
columns_meta describes the columns used.
|
||||
"""
|
||||
# Resolve engine choice
|
||||
use_rapid = False
|
||||
@@ -3046,24 +3049,34 @@ def build_word_grid(
|
||||
use_rapid = True
|
||||
|
||||
engine_name = "rapid" if use_rapid else "tesseract"
|
||||
logger.info(f"build_word_grid: using OCR engine '{engine_name}'")
|
||||
logger.info(f"build_cell_grid: using OCR engine '{engine_name}'")
|
||||
|
||||
# Filter to content rows only (skip header/footer)
|
||||
content_rows = [r for r in row_geometries if r.row_type == 'content']
|
||||
if not content_rows:
|
||||
logger.warning("build_word_grid: no content rows found")
|
||||
return []
|
||||
logger.warning("build_cell_grid: no content rows found")
|
||||
return [], []
|
||||
|
||||
# Map column types to roles
|
||||
VOCAB_COLUMN_TYPES = {'column_en', 'column_de', 'column_example'}
|
||||
relevant_cols = [c for c in column_regions if c.type in VOCAB_COLUMN_TYPES]
|
||||
# Use all columns except column_ignore
|
||||
relevant_cols = [c for c in column_regions if c.type != 'column_ignore']
|
||||
if not relevant_cols:
|
||||
logger.warning("build_word_grid: no relevant vocabulary columns found")
|
||||
return []
|
||||
logger.warning("build_cell_grid: no usable columns found")
|
||||
return [], []
|
||||
|
||||
# Sort columns left-to-right
|
||||
relevant_cols.sort(key=lambda c: c.x)
|
||||
|
||||
# Build columns_meta
|
||||
columns_meta = [
|
||||
{
|
||||
'index': col_idx,
|
||||
'type': col.type,
|
||||
'x': col.x,
|
||||
'width': col.width,
|
||||
}
|
||||
for col_idx, col in enumerate(relevant_cols)
|
||||
]
|
||||
|
||||
# Choose OCR language per column type (Tesseract only)
|
||||
lang_map = {
|
||||
'column_en': 'eng',
|
||||
@@ -3071,47 +3084,40 @@ def build_word_grid(
|
||||
'column_example': 'eng+deu',
|
||||
}
|
||||
|
||||
entries: List[Dict[str, Any]] = []
|
||||
cells: List[Dict[str, Any]] = []
|
||||
|
||||
for row_idx, row in enumerate(content_rows):
|
||||
entry: Dict[str, Any] = {
|
||||
'row_index': row_idx,
|
||||
'english': '',
|
||||
'german': '',
|
||||
'example': '',
|
||||
'confidence': 0.0,
|
||||
'bbox': {
|
||||
'x': round(row.x / img_w * 100, 2),
|
||||
'y': round(row.y / img_h * 100, 2),
|
||||
'w': round(row.width / img_w * 100, 2),
|
||||
'h': round(row.height / img_h * 100, 2),
|
||||
},
|
||||
'bbox_en': None,
|
||||
'bbox_de': None,
|
||||
'bbox_ex': None,
|
||||
'ocr_engine': engine_name,
|
||||
}
|
||||
|
||||
confidences: List[float] = []
|
||||
|
||||
for col in relevant_cols:
|
||||
for col_idx, col in enumerate(relevant_cols):
|
||||
# Compute cell region: column x/width, row y/height
|
||||
# Add padding to avoid clipping edge words
|
||||
pad = 8 # pixels
|
||||
cell_x = col.x - pad
|
||||
cell_y = row.y - pad
|
||||
cell_x = max(0, col.x - pad)
|
||||
cell_y = max(0, row.y - pad)
|
||||
cell_w = col.width + 2 * pad
|
||||
cell_h = row.height + 2 * pad
|
||||
|
||||
# Clamp to image bounds
|
||||
cell_x = max(0, cell_x)
|
||||
cell_y = max(0, cell_y)
|
||||
if cell_x + cell_w > img_w:
|
||||
cell_w = img_w - cell_x
|
||||
if cell_y + cell_h > img_h:
|
||||
cell_h = img_h - cell_y
|
||||
|
||||
if cell_w <= 0 or cell_h <= 0:
|
||||
cells.append({
|
||||
'cell_id': f"R{row_idx:02d}_C{col_idx}",
|
||||
'row_index': row_idx,
|
||||
'col_index': col_idx,
|
||||
'col_type': col.type,
|
||||
'text': '',
|
||||
'confidence': 0.0,
|
||||
'bbox_px': {'x': col.x, 'y': row.y, 'w': col.width, 'h': row.height},
|
||||
'bbox_pct': {
|
||||
'x': round(col.x / img_w * 100, 2),
|
||||
'y': round(row.y / img_h * 100, 2),
|
||||
'w': round(col.width / img_w * 100, 2),
|
||||
'h': round(row.height / img_h * 100, 2),
|
||||
},
|
||||
'ocr_engine': engine_name,
|
||||
})
|
||||
continue
|
||||
|
||||
cell_region = PageRegion(
|
||||
@@ -3119,6 +3125,7 @@ def build_word_grid(
|
||||
x=cell_x, y=cell_y,
|
||||
width=cell_w, height=cell_h,
|
||||
)
|
||||
|
||||
# OCR the cell
|
||||
if use_rapid:
|
||||
words = ocr_region_rapid(img_bgr, cell_region)
|
||||
@@ -3126,8 +3133,7 @@ def build_word_grid(
|
||||
cell_lang = lang_map.get(col.type, lang)
|
||||
words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=6)
|
||||
|
||||
# Group into lines, then join in reading order (Fix A)
|
||||
# Use half of average word height as Y-tolerance
|
||||
# Group into lines, then join in reading order
|
||||
if words:
|
||||
avg_h = sum(w['height'] for w in words) / len(words)
|
||||
y_tol = max(10, int(avg_h * 0.5))
|
||||
@@ -3135,36 +3141,162 @@ def build_word_grid(
|
||||
y_tol = 15
|
||||
text = _words_to_reading_order_text(words, y_tolerance_px=y_tol)
|
||||
|
||||
avg_conf = 0.0
|
||||
if words:
|
||||
avg_conf = sum(w['conf'] for w in words) / len(words)
|
||||
confidences.append(avg_conf)
|
||||
avg_conf = round(sum(w['conf'] for w in words) / len(words), 1)
|
||||
|
||||
# Bbox in percent
|
||||
cell_bbox = {
|
||||
'x': round(cell_x / img_w * 100, 2),
|
||||
'y': round(cell_y / img_h * 100, 2),
|
||||
'w': round(cell_w / img_w * 100, 2),
|
||||
'h': round(cell_h / img_h * 100, 2),
|
||||
cells.append({
|
||||
'cell_id': f"R{row_idx:02d}_C{col_idx}",
|
||||
'row_index': row_idx,
|
||||
'col_index': col_idx,
|
||||
'col_type': col.type,
|
||||
'text': text,
|
||||
'confidence': avg_conf,
|
||||
'bbox_px': {'x': cell_x, 'y': cell_y, 'w': cell_w, 'h': cell_h},
|
||||
'bbox_pct': {
|
||||
'x': round(cell_x / img_w * 100, 2),
|
||||
'y': round(cell_y / img_h * 100, 2),
|
||||
'w': round(cell_w / img_w * 100, 2),
|
||||
'h': round(cell_h / img_h * 100, 2),
|
||||
},
|
||||
'ocr_engine': engine_name,
|
||||
})
|
||||
|
||||
logger.info(f"build_cell_grid: {len(cells)} cells from "
|
||||
f"{len(content_rows)} rows × {len(relevant_cols)} columns, "
|
||||
f"engine={engine_name}")
|
||||
|
||||
return cells, columns_meta
|
||||
|
||||
|
||||
def _cells_to_vocab_entries(
|
||||
cells: List[Dict[str, Any]],
|
||||
columns_meta: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Map generic cells to vocab entries with english/german/example fields.
|
||||
|
||||
Groups cells by row_index, maps col_type → field name, and produces
|
||||
one entry per row (only rows with at least one non-empty field).
|
||||
"""
|
||||
# Determine image dimensions from first cell (for row-level bbox)
|
||||
col_type_to_field = {
|
||||
'column_en': 'english',
|
||||
'column_de': 'german',
|
||||
'column_example': 'example',
|
||||
}
|
||||
bbox_key_map = {
|
||||
'column_en': 'bbox_en',
|
||||
'column_de': 'bbox_de',
|
||||
'column_example': 'bbox_ex',
|
||||
}
|
||||
|
||||
# Group cells by row_index
|
||||
rows: Dict[int, List[Dict]] = {}
|
||||
for cell in cells:
|
||||
ri = cell['row_index']
|
||||
rows.setdefault(ri, []).append(cell)
|
||||
|
||||
entries: List[Dict[str, Any]] = []
|
||||
for row_idx in sorted(rows.keys()):
|
||||
row_cells = rows[row_idx]
|
||||
entry: Dict[str, Any] = {
|
||||
'row_index': row_idx,
|
||||
'english': '',
|
||||
'german': '',
|
||||
'example': '',
|
||||
'confidence': 0.0,
|
||||
'bbox': None,
|
||||
'bbox_en': None,
|
||||
'bbox_de': None,
|
||||
'bbox_ex': None,
|
||||
'ocr_engine': row_cells[0].get('ocr_engine', '') if row_cells else '',
|
||||
}
|
||||
|
||||
confidences = []
|
||||
for cell in row_cells:
|
||||
col_type = cell['col_type']
|
||||
field = col_type_to_field.get(col_type)
|
||||
if field:
|
||||
entry[field] = cell['text']
|
||||
bbox_field = bbox_key_map.get(col_type)
|
||||
if bbox_field:
|
||||
entry[bbox_field] = cell['bbox_pct']
|
||||
if cell['confidence'] > 0:
|
||||
confidences.append(cell['confidence'])
|
||||
|
||||
# Compute row-level bbox as union of all cell bboxes
|
||||
all_bboxes = [c['bbox_pct'] for c in row_cells if c.get('bbox_pct')]
|
||||
if all_bboxes:
|
||||
min_x = min(b['x'] for b in all_bboxes)
|
||||
min_y = min(b['y'] for b in all_bboxes)
|
||||
max_x2 = max(b['x'] + b['w'] for b in all_bboxes)
|
||||
max_y2 = max(b['y'] + b['h'] for b in all_bboxes)
|
||||
entry['bbox'] = {
|
||||
'x': round(min_x, 2),
|
||||
'y': round(min_y, 2),
|
||||
'w': round(max_x2 - min_x, 2),
|
||||
'h': round(max_y2 - min_y, 2),
|
||||
}
|
||||
|
||||
if col.type == 'column_en':
|
||||
entry['english'] = text
|
||||
entry['bbox_en'] = cell_bbox
|
||||
elif col.type == 'column_de':
|
||||
entry['german'] = text
|
||||
entry['bbox_de'] = cell_bbox
|
||||
elif col.type == 'column_example':
|
||||
entry['example'] = text
|
||||
entry['bbox_ex'] = cell_bbox
|
||||
|
||||
entry['confidence'] = round(
|
||||
sum(confidences) / len(confidences), 1
|
||||
) if confidences else 0.0
|
||||
|
||||
# Only include if at least one field has text
|
||||
# Only include if at least one vocab field has text
|
||||
if entry['english'] or entry['german'] or entry['example']:
|
||||
entries.append(entry)
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def build_word_grid(
|
||||
ocr_img: np.ndarray,
|
||||
column_regions: List[PageRegion],
|
||||
row_geometries: List[RowGeometry],
|
||||
img_w: int,
|
||||
img_h: int,
|
||||
lang: str = "eng+deu",
|
||||
ocr_engine: str = "auto",
|
||||
img_bgr: Optional[np.ndarray] = None,
|
||||
pronunciation: str = "british",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Vocab-specific: Cell-Grid + Vocab-Mapping + Post-Processing.
|
||||
|
||||
Wrapper around build_cell_grid() that adds vocabulary-specific logic:
|
||||
- Maps cells to english/german/example entries
|
||||
- Applies character confusion fixes, IPA lookup, comma splitting, etc.
|
||||
- Falls back to returning raw cells if no vocab columns detected.
|
||||
|
||||
Args:
|
||||
ocr_img: Binarized full-page image (for Tesseract).
|
||||
column_regions: Classified columns from Step 3.
|
||||
row_geometries: Rows from Step 4.
|
||||
img_w, img_h: Image dimensions.
|
||||
lang: Default Tesseract language.
|
||||
ocr_engine: 'tesseract', 'rapid', or 'auto'.
|
||||
img_bgr: BGR color image (required for RapidOCR).
|
||||
pronunciation: 'british' or 'american' for IPA lookup.
|
||||
|
||||
Returns:
|
||||
List of entry dicts with english/german/example text and bbox info (percent).
|
||||
"""
|
||||
cells, columns_meta = build_cell_grid(
|
||||
ocr_img, column_regions, row_geometries, img_w, img_h,
|
||||
lang=lang, ocr_engine=ocr_engine, img_bgr=img_bgr,
|
||||
)
|
||||
|
||||
if not cells:
|
||||
return []
|
||||
|
||||
# Check if vocab layout is present
|
||||
col_types = {c['type'] for c in columns_meta}
|
||||
if not (col_types & {'column_en', 'column_de'}):
|
||||
logger.info("build_word_grid: no vocab columns — returning raw cells")
|
||||
return cells
|
||||
|
||||
# Vocab mapping: cells → entries
|
||||
entries = _cells_to_vocab_entries(cells, columns_meta)
|
||||
|
||||
# --- Post-processing pipeline (deterministic, no LLM) ---
|
||||
n_raw = len(entries)
|
||||
|
||||
@@ -3177,13 +3309,13 @@ def build_word_grid(
|
||||
# 3. Split comma-separated word forms (break, broke, broken → 3 entries)
|
||||
entries = _split_comma_entries(entries)
|
||||
|
||||
# 5. Attach example sentences (rows without DE → examples for preceding entry)
|
||||
# 4. Attach example sentences (rows without DE → examples for preceding entry)
|
||||
entries = _attach_example_sentences(entries)
|
||||
|
||||
engine_name = cells[0].get('ocr_engine', 'unknown') if cells else 'unknown'
|
||||
logger.info(f"build_word_grid: {len(entries)} entries from "
|
||||
f"{n_raw} raw → {len(entries)} after post-processing "
|
||||
f"({len(content_rows)} content rows × {len(relevant_cols)} columns, "
|
||||
f"engine={engine_name})")
|
||||
f"(engine={engine_name})")
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
@@ -31,8 +31,14 @@ from pydantic import BaseModel
|
||||
from cv_vocab_pipeline import (
|
||||
PageRegion,
|
||||
RowGeometry,
|
||||
_cells_to_vocab_entries,
|
||||
_fix_character_confusion,
|
||||
_fix_phonetic_brackets,
|
||||
_split_comma_entries,
|
||||
_attach_example_sentences,
|
||||
analyze_layout,
|
||||
analyze_layout_by_words,
|
||||
build_cell_grid,
|
||||
build_word_grid,
|
||||
classify_column_types,
|
||||
create_layout_image,
|
||||
@@ -1075,35 +1081,60 @@ async def detect_words(session_id: str, engine: str = "auto", pronunciation: str
|
||||
for r in row_result["rows"]
|
||||
]
|
||||
|
||||
# Build word grid — pass both binarized (for Tesseract) and BGR (for RapidOCR)
|
||||
entries = build_word_grid(
|
||||
# Build generic cell grid
|
||||
cells, columns_meta = build_cell_grid(
|
||||
ocr_img, col_regions, row_geoms, img_w, img_h,
|
||||
ocr_engine=engine, img_bgr=dewarped_bgr,
|
||||
pronunciation=pronunciation,
|
||||
)
|
||||
duration = time.time() - t0
|
||||
|
||||
# Build summary
|
||||
summary = {
|
||||
"total_entries": len(entries),
|
||||
"with_english": sum(1 for e in entries if e.get("english")),
|
||||
"with_german": sum(1 for e in entries if e.get("german")),
|
||||
"low_confidence": sum(1 for e in entries if e.get("confidence", 0) < 50),
|
||||
}
|
||||
# Layout detection
|
||||
col_types = {c['type'] for c in columns_meta}
|
||||
is_vocab = bool(col_types & {'column_en', 'column_de'})
|
||||
|
||||
# Count content rows and columns for grid_shape
|
||||
n_content_rows = len([r for r in row_geoms if r.row_type == 'content'])
|
||||
n_cols = len(columns_meta)
|
||||
|
||||
# Determine which engine was actually used
|
||||
used_engine = entries[0].get("ocr_engine", "tesseract") if entries else engine
|
||||
used_engine = cells[0].get("ocr_engine", "tesseract") if cells else engine
|
||||
|
||||
# Grid result (always generic)
|
||||
word_result = {
|
||||
"entries": entries,
|
||||
"entry_count": len(entries),
|
||||
"cells": cells,
|
||||
"grid_shape": {
|
||||
"rows": n_content_rows,
|
||||
"cols": n_cols,
|
||||
"total_cells": len(cells),
|
||||
},
|
||||
"columns_used": columns_meta,
|
||||
"layout": "vocab" if is_vocab else "generic",
|
||||
"image_width": img_w,
|
||||
"image_height": img_h,
|
||||
"duration_seconds": round(duration, 2),
|
||||
"summary": summary,
|
||||
"ocr_engine": used_engine,
|
||||
"summary": {
|
||||
"total_cells": len(cells),
|
||||
"non_empty_cells": sum(1 for c in cells if c.get("text")),
|
||||
"low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50),
|
||||
},
|
||||
}
|
||||
|
||||
# For vocab layout: add post-processed vocab_entries (backwards compat)
|
||||
if is_vocab:
|
||||
entries = _cells_to_vocab_entries(cells, columns_meta)
|
||||
entries = _fix_character_confusion(entries)
|
||||
entries = _fix_phonetic_brackets(entries, pronunciation=pronunciation)
|
||||
entries = _split_comma_entries(entries)
|
||||
entries = _attach_example_sentences(entries)
|
||||
word_result["vocab_entries"] = entries
|
||||
# Also keep "entries" key for backwards compatibility
|
||||
word_result["entries"] = entries
|
||||
word_result["entry_count"] = len(entries)
|
||||
word_result["summary"]["total_entries"] = len(entries)
|
||||
word_result["summary"]["with_english"] = sum(1 for e in entries if e.get("english"))
|
||||
word_result["summary"]["with_german"] = sum(1 for e in entries if e.get("german"))
|
||||
|
||||
# Persist to DB
|
||||
await update_session_db(
|
||||
session_id,
|
||||
@@ -1114,7 +1145,8 @@ async def detect_words(session_id: str, engine: str = "auto", pronunciation: str
|
||||
cached["word_result"] = word_result
|
||||
|
||||
logger.info(f"OCR Pipeline: words session {session_id}: "
|
||||
f"{len(entries)} entries ({duration:.2f}s), summary: {summary}")
|
||||
f"layout={word_result['layout']}, "
|
||||
f"{len(cells)} cells ({duration:.2f}s), summary: {word_result['summary']}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
@@ -1232,17 +1264,19 @@ async def _get_rows_overlay(session_id: str) -> Response:
|
||||
|
||||
|
||||
async def _get_words_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with word grid cells drawn on it."""
|
||||
"""Generate dewarped image with cell grid drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
word_result = session.get("word_result")
|
||||
if not word_result or not word_result.get("entries"):
|
||||
if not word_result:
|
||||
raise HTTPException(status_code=404, detail="No word data available")
|
||||
|
||||
column_result = session.get("column_result")
|
||||
row_result = session.get("row_result")
|
||||
# Support both new cell-based and legacy entry-based formats
|
||||
cells = word_result.get("cells")
|
||||
if not cells and not word_result.get("entries"):
|
||||
raise HTTPException(status_code=404, detail="No word data available")
|
||||
|
||||
# Load dewarped image
|
||||
dewarped_png = await get_session_image(session_id, "dewarped")
|
||||
@@ -1256,80 +1290,105 @@ async def _get_words_overlay(session_id: str) -> Response:
|
||||
|
||||
img_h, img_w = img.shape[:2]
|
||||
|
||||
# Color map for column types (BGR)
|
||||
col_colors = {
|
||||
"column_en": (255, 180, 0), # Blue
|
||||
"column_de": (0, 200, 0), # Green
|
||||
"column_example": (0, 140, 255), # Orange
|
||||
}
|
||||
|
||||
overlay = img.copy()
|
||||
|
||||
# Build grid from column_result × row_result (the actual cells)
|
||||
columns = []
|
||||
if column_result and column_result.get("columns"):
|
||||
columns = [c for c in column_result["columns"]
|
||||
if c.get("type", "").startswith("column_")]
|
||||
if cells:
|
||||
# New cell-based overlay: color by column index
|
||||
col_palette = [
|
||||
(255, 180, 0), # Blue (BGR)
|
||||
(0, 200, 0), # Green
|
||||
(0, 140, 255), # Orange
|
||||
(200, 100, 200), # Purple
|
||||
(200, 200, 0), # Cyan
|
||||
(100, 200, 200), # Yellow-ish
|
||||
]
|
||||
|
||||
content_rows_data = []
|
||||
if row_result and row_result.get("rows"):
|
||||
content_rows_data = [r for r in row_result["rows"]
|
||||
if r.get("row_type") == "content"]
|
||||
for cell in cells:
|
||||
bbox = cell.get("bbox_px", {})
|
||||
cx = bbox.get("x", 0)
|
||||
cy = bbox.get("y", 0)
|
||||
cw = bbox.get("w", 0)
|
||||
ch = bbox.get("h", 0)
|
||||
if cw <= 0 or ch <= 0:
|
||||
continue
|
||||
|
||||
# Draw grid: column × row cells
|
||||
for col in columns:
|
||||
col_type = col.get("type", "")
|
||||
color = col_colors.get(col_type, (200, 200, 200))
|
||||
cx, cw = col["x"], col["width"]
|
||||
col_idx = cell.get("col_index", 0)
|
||||
color = col_palette[col_idx % len(col_palette)]
|
||||
|
||||
for row in content_rows_data:
|
||||
ry, rh = row["y"], row["height"]
|
||||
# Cell rectangle (exact grid intersection, no padding)
|
||||
cv2.rectangle(img, (cx, ry), (cx + cw, ry + rh), color, 1)
|
||||
# Cell rectangle border
|
||||
cv2.rectangle(img, (cx, cy), (cx + cw, cy + ch), color, 1)
|
||||
# Semi-transparent fill
|
||||
cv2.rectangle(overlay, (cx, ry), (cx + cw, ry + rh), color, -1)
|
||||
cv2.rectangle(overlay, (cx, cy), (cx + cw, cy + ch), color, -1)
|
||||
|
||||
# Place OCR text labels inside grid cells
|
||||
# Build lookup: row_index → entry for fast access
|
||||
entries = word_result["entries"]
|
||||
entry_by_row: Dict[int, Dict] = {}
|
||||
for entry in entries:
|
||||
entry_by_row[entry.get("row_index", -1)] = entry
|
||||
# Cell-ID label (top-left corner)
|
||||
cell_id = cell.get("cell_id", "")
|
||||
cv2.putText(img, cell_id, (cx + 2, cy + 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.28, color, 1)
|
||||
|
||||
for row_idx, row in enumerate(content_rows_data):
|
||||
entry = entry_by_row.get(row_idx)
|
||||
if not entry:
|
||||
continue
|
||||
# Text label (bottom of cell)
|
||||
text = cell.get("text", "")
|
||||
if text:
|
||||
conf = cell.get("confidence", 0)
|
||||
if conf >= 70:
|
||||
text_color = (0, 180, 0)
|
||||
elif conf >= 50:
|
||||
text_color = (0, 180, 220)
|
||||
else:
|
||||
text_color = (0, 0, 220)
|
||||
|
||||
conf = entry.get("confidence", 0)
|
||||
if conf >= 70:
|
||||
text_color = (0, 180, 0)
|
||||
elif conf >= 50:
|
||||
text_color = (0, 180, 220)
|
||||
else:
|
||||
text_color = (0, 0, 220)
|
||||
label = text.replace('\n', ' ')[:30]
|
||||
cv2.putText(img, label, (cx + 3, cy + ch - 4),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.35, text_color, 1)
|
||||
else:
|
||||
# Legacy fallback: entry-based overlay (for old sessions)
|
||||
column_result = session.get("column_result")
|
||||
row_result = session.get("row_result")
|
||||
col_colors = {
|
||||
"column_en": (255, 180, 0),
|
||||
"column_de": (0, 200, 0),
|
||||
"column_example": (0, 140, 255),
|
||||
}
|
||||
|
||||
ry, rh = row["y"], row["height"]
|
||||
columns = []
|
||||
if column_result and column_result.get("columns"):
|
||||
columns = [c for c in column_result["columns"]
|
||||
if c.get("type", "").startswith("column_")]
|
||||
|
||||
content_rows_data = []
|
||||
if row_result and row_result.get("rows"):
|
||||
content_rows_data = [r for r in row_result["rows"]
|
||||
if r.get("row_type") == "content"]
|
||||
|
||||
for col in columns:
|
||||
col_type = col.get("type", "")
|
||||
color = col_colors.get(col_type, (200, 200, 200))
|
||||
cx, cw = col["x"], col["width"]
|
||||
for row in content_rows_data:
|
||||
ry, rh = row["y"], row["height"]
|
||||
cv2.rectangle(img, (cx, ry), (cx + cw, ry + rh), color, 1)
|
||||
cv2.rectangle(overlay, (cx, ry), (cx + cw, ry + rh), color, -1)
|
||||
|
||||
# Pick the right text field for this column
|
||||
if col_type == "column_en":
|
||||
text = entry.get("english", "")
|
||||
elif col_type == "column_de":
|
||||
text = entry.get("german", "")
|
||||
elif col_type == "column_example":
|
||||
text = entry.get("example", "")
|
||||
else:
|
||||
text = ""
|
||||
entries = word_result["entries"]
|
||||
entry_by_row: Dict[int, Dict] = {}
|
||||
for entry in entries:
|
||||
entry_by_row[entry.get("row_index", -1)] = entry
|
||||
|
||||
if text:
|
||||
label = text.replace('\n', ' ')[:30]
|
||||
font_scale = 0.35
|
||||
cv2.putText(img, label, (cx + 3, ry + rh - 4),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1)
|
||||
for row_idx, row in enumerate(content_rows_data):
|
||||
entry = entry_by_row.get(row_idx)
|
||||
if not entry:
|
||||
continue
|
||||
conf = entry.get("confidence", 0)
|
||||
text_color = (0, 180, 0) if conf >= 70 else (0, 180, 220) if conf >= 50 else (0, 0, 220)
|
||||
ry, rh = row["y"], row["height"]
|
||||
for col in columns:
|
||||
col_type = col.get("type", "")
|
||||
cx, cw = col["x"], col["width"]
|
||||
field = {"column_en": "english", "column_de": "german", "column_example": "example"}.get(col_type, "")
|
||||
text = entry.get(field, "") if field else ""
|
||||
if text:
|
||||
label = text.replace('\n', ' ')[:30]
|
||||
cv2.putText(img, label, (cx + 3, ry + rh - 4),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.35, text_color, 1)
|
||||
|
||||
# Blend overlay at 10% opacity
|
||||
cv2.addWeighted(overlay, 0.1, img, 0.9, 0, img)
|
||||
|
||||
Reference in New Issue
Block a user