feat(ocr-pipeline): line breaks, hyphen rejoin & oversized row splitting
- Preserve \n between visual lines within cells (instead of joining with space) - Rejoin hyphenated words split across line breaks (e.g. Fuß-\nboden → Fußboden) - Split oversized rows (>1.5× median height) into sub-entries when EN/DE line counts match — deterministic fix for missed Step 4 row boundaries - Frontend: render \n as <br/>, use textarea for multiline editing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,6 +5,16 @@ import type { WordResult, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/oc
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
/** Render text with \n as line breaks */
|
||||
function MultilineText({ text }: { text: string }) {
|
||||
if (!text) return <span className="text-gray-300 dark:text-gray-600">—</span>
|
||||
const lines = text.split('\n')
|
||||
if (lines.length === 1) return <>{text}</>
|
||||
return <>{lines.map((line, i) => (
|
||||
<span key={i}>{line}{i < lines.length - 1 && <br />}</span>
|
||||
))}</>
|
||||
}
|
||||
|
||||
interface StepWordRecognitionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
@@ -318,13 +328,13 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
>
|
||||
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
{entry.english || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||
<MultilineText text={entry.english} />
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
{entry.german || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||
<MultilineText text={entry.german} />
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px] truncate">
|
||||
{entry.example || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px]">
|
||||
<MultilineText text={entry.example} />
|
||||
</td>
|
||||
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
|
||||
{entry.confidence}%
|
||||
@@ -428,30 +438,30 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
<div className="space-y-2">
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">English</label>
|
||||
<input
|
||||
ref={enRef}
|
||||
type="text"
|
||||
<textarea
|
||||
ref={enRef as any}
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.english || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.english || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'english', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Deutsch</label>
|
||||
<input
|
||||
type="text"
|
||||
<textarea
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.german || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.german || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'german', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Example</label>
|
||||
<input
|
||||
type="text"
|
||||
<textarea
|
||||
rows={Math.max(1, (editedEntries[activeIndex]?.example || '').split('\n').length)}
|
||||
value={editedEntries[activeIndex]?.example || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'example', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono resize-none"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
@@ -503,7 +513,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
'bg-gray-300 dark:bg-gray-600'
|
||||
}`} />
|
||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||
{entry.english || '—'} → {entry.german || '—'}
|
||||
{(entry.english || '—').replace(/\n/g, ' ')} → {(entry.german || '—').replace(/\n/g, ' ')}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
|
||||
Reference in New Issue
Block a user