feat(ocr-pipeline): add Step 5 word recognition (grid from columns × rows)

Backend: build_word_grid() intersects column regions with content rows,
OCRs each cell with language-specific Tesseract, and returns vocabulary
entries with percent-based bounding boxes. New endpoints: POST /words,
GET /image/words-overlay, ground-truth save/retrieve for words.
Frontend: StepWordRecognition with overview + step-through labeling modes,
goToStep callback for row correction feedback loop.
MkDocs: OCR Pipeline documentation added.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-28 02:18:29 +01:00
parent 47dc2e6f7a
commit 954103cdf2
9 changed files with 1429 additions and 21 deletions

View File

@@ -112,6 +112,16 @@ export default function OcrPipelinePage() {
}
}
const goToStep = (step: number) => {
setCurrentStep(step)
setSteps((prev) =>
prev.map((s, i) => ({
...s,
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
})),
)
}
const handleNext = () => {
if (currentStep < steps.length - 1) {
setSteps((prev) =>
@@ -161,7 +171,7 @@ export default function OcrPipelinePage() {
case 3:
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
case 4:
return <StepWordRecognition />
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
case 5:
return <StepCoordinates />
case 6:

View File

@@ -29,6 +29,7 @@ export interface SessionInfo {
dewarp_result?: DewarpResult
column_result?: ColumnResult
row_result?: RowResult
word_result?: WordResult
}
export interface DeskewResult {
@@ -116,6 +117,46 @@ export interface RowGroundTruth {
notes?: string
}
export interface WordBbox {
x: number
y: number
w: number
h: number
}
export interface WordEntry {
row_index: number
english: string
german: string
example: string
confidence: number
bbox: WordBbox
bbox_en: WordBbox | null
bbox_de: WordBbox | null
bbox_ex: WordBbox | null
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
}
export interface WordResult {
entries: WordEntry[]
entry_count: number
image_width: number
image_height: number
duration_seconds: number
summary: {
total_entries: number
with_english: number
with_german: number
low_confidence: number
}
}
export interface WordGroundTruth {
is_correct: boolean
corrected_entries?: WordEntry[]
notes?: string
}
export const PIPELINE_STEPS: PipelineStep[] = [
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },