feat(ocr-pipeline): add Step 5 word recognition (grid from columns × rows)
Backend: build_word_grid() intersects column regions with content rows, OCRs each cell with language-specific Tesseract, and returns vocabulary entries with percent-based bounding boxes. New endpoints: POST /words, GET /image/words-overlay, ground-truth save/retrieve for words. Frontend: StepWordRecognition with overview + step-through labeling modes, goToStep callback for row correction feedback loop. MkDocs: OCR Pipeline documentation added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -112,6 +112,16 @@ export default function OcrPipelinePage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const goToStep = (step: number) => {
|
||||||
|
setCurrentStep(step)
|
||||||
|
setSteps((prev) =>
|
||||||
|
prev.map((s, i) => ({
|
||||||
|
...s,
|
||||||
|
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
const handleNext = () => {
|
const handleNext = () => {
|
||||||
if (currentStep < steps.length - 1) {
|
if (currentStep < steps.length - 1) {
|
||||||
setSteps((prev) =>
|
setSteps((prev) =>
|
||||||
@@ -161,7 +171,7 @@ export default function OcrPipelinePage() {
|
|||||||
case 3:
|
case 3:
|
||||||
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
||||||
case 4:
|
case 4:
|
||||||
return <StepWordRecognition />
|
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
|
||||||
case 5:
|
case 5:
|
||||||
return <StepCoordinates />
|
return <StepCoordinates />
|
||||||
case 6:
|
case 6:
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ export interface SessionInfo {
|
|||||||
dewarp_result?: DewarpResult
|
dewarp_result?: DewarpResult
|
||||||
column_result?: ColumnResult
|
column_result?: ColumnResult
|
||||||
row_result?: RowResult
|
row_result?: RowResult
|
||||||
|
word_result?: WordResult
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DeskewResult {
|
export interface DeskewResult {
|
||||||
@@ -116,6 +117,46 @@ export interface RowGroundTruth {
|
|||||||
notes?: string
|
notes?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface WordBbox {
|
||||||
|
x: number
|
||||||
|
y: number
|
||||||
|
w: number
|
||||||
|
h: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface WordEntry {
|
||||||
|
row_index: number
|
||||||
|
english: string
|
||||||
|
german: string
|
||||||
|
example: string
|
||||||
|
confidence: number
|
||||||
|
bbox: WordBbox
|
||||||
|
bbox_en: WordBbox | null
|
||||||
|
bbox_de: WordBbox | null
|
||||||
|
bbox_ex: WordBbox | null
|
||||||
|
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface WordResult {
|
||||||
|
entries: WordEntry[]
|
||||||
|
entry_count: number
|
||||||
|
image_width: number
|
||||||
|
image_height: number
|
||||||
|
duration_seconds: number
|
||||||
|
summary: {
|
||||||
|
total_entries: number
|
||||||
|
with_english: number
|
||||||
|
with_german: number
|
||||||
|
low_confidence: number
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface WordGroundTruth {
|
||||||
|
is_correct: boolean
|
||||||
|
corrected_entries?: WordEntry[]
|
||||||
|
notes?: string
|
||||||
|
}
|
||||||
|
|
||||||
export const PIPELINE_STEPS: PipelineStep[] = [
|
export const PIPELINE_STEPS: PipelineStep[] = [
|
||||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||||
|
|||||||
@@ -1,19 +1,602 @@
|
|||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
export function StepWordRecognition() {
|
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||||
return (
|
import type { WordResult, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
|
||||||
<div className="text-5xl mb-4">🔤</div>
|
const KLAUSUR_API = '/klausur-api'
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
|
||||||
Schritt 4: Worterkennung
|
interface StepWordRecognitionProps {
|
||||||
</h3>
|
sessionId: string | null
|
||||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
onNext: () => void
|
||||||
OCR mit Bounding Boxes fuer jedes erkannte Wort.
|
goToStep: (step: number) => void
|
||||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
}
|
||||||
</p>
|
|
||||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRecognitionProps) {
|
||||||
Kommt bald
|
const [wordResult, setWordResult] = useState<WordResult | null>(null)
|
||||||
|
const [detecting, setDetecting] = useState(false)
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
const [gtNotes, setGtNotes] = useState('')
|
||||||
|
const [gtSaved, setGtSaved] = useState(false)
|
||||||
|
|
||||||
|
// Step-through labeling state
|
||||||
|
const [activeIndex, setActiveIndex] = useState(0)
|
||||||
|
const [editedEntries, setEditedEntries] = useState<WordEntry[]>([])
|
||||||
|
const [mode, setMode] = useState<'overview' | 'labeling'>('overview')
|
||||||
|
|
||||||
|
const enRef = useRef<HTMLInputElement>(null)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!sessionId) return
|
||||||
|
|
||||||
|
const fetchSession = async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||||
|
if (res.ok) {
|
||||||
|
const info = await res.json()
|
||||||
|
if (info.word_result) {
|
||||||
|
setWordResult(info.word_result)
|
||||||
|
initEntries(info.word_result.entries)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to fetch session info:', e)
|
||||||
|
}
|
||||||
|
runAutoDetection()
|
||||||
|
}
|
||||||
|
|
||||||
|
fetchSession()
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const initEntries = (entries: WordEntry[]) => {
|
||||||
|
setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' })))
|
||||||
|
setActiveIndex(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
const runAutoDetection = useCallback(async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setDetecting(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||||
|
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
|
||||||
|
}
|
||||||
|
const data: WordResult = await res.json()
|
||||||
|
setWordResult(data)
|
||||||
|
initEntries(data.entries)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
|
} finally {
|
||||||
|
setDetecting(false)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
|
||||||
|
if (!sessionId) return
|
||||||
|
const gt: WordGroundTruth = {
|
||||||
|
is_correct: isCorrect,
|
||||||
|
corrected_entries: isCorrect ? undefined : editedEntries,
|
||||||
|
notes: gtNotes || undefined,
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/words`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(gt),
|
||||||
|
})
|
||||||
|
setGtSaved(true)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Ground truth save failed:', e)
|
||||||
|
}
|
||||||
|
}, [sessionId, gtNotes, editedEntries])
|
||||||
|
|
||||||
|
// Step-through: update entry field
|
||||||
|
const updateEntry = (index: number, field: 'english' | 'german' | 'example', value: string) => {
|
||||||
|
setEditedEntries(prev => prev.map((e, i) =>
|
||||||
|
i === index ? { ...e, [field]: value, status: 'edited' as const } : e
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step-through: confirm current entry
|
||||||
|
const confirmEntry = () => {
|
||||||
|
setEditedEntries(prev => prev.map((e, i) =>
|
||||||
|
i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e
|
||||||
|
))
|
||||||
|
if (activeIndex < editedEntries.length - 1) {
|
||||||
|
setActiveIndex(activeIndex + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step-through: skip current entry
|
||||||
|
const skipEntry = () => {
|
||||||
|
setEditedEntries(prev => prev.map((e, i) =>
|
||||||
|
i === activeIndex ? { ...e, status: 'skipped' as const } : e
|
||||||
|
))
|
||||||
|
if (activeIndex < editedEntries.length - 1) {
|
||||||
|
setActiveIndex(activeIndex + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Focus english input when active entry changes in labeling mode
|
||||||
|
useEffect(() => {
|
||||||
|
if (mode === 'labeling' && enRef.current) {
|
||||||
|
enRef.current.focus()
|
||||||
|
}
|
||||||
|
}, [activeIndex, mode])
|
||||||
|
|
||||||
|
// Keyboard shortcuts in labeling mode
|
||||||
|
useEffect(() => {
|
||||||
|
if (mode !== 'labeling') return
|
||||||
|
const handler = (e: KeyboardEvent) => {
|
||||||
|
if (e.key === 'Enter' && !e.shiftKey) {
|
||||||
|
e.preventDefault()
|
||||||
|
confirmEntry()
|
||||||
|
} else if (e.key === 'Tab' && !e.shiftKey) {
|
||||||
|
// Let Tab move between fields naturally unless on last field
|
||||||
|
} else if (e.key === 'ArrowDown' && e.ctrlKey) {
|
||||||
|
e.preventDefault()
|
||||||
|
skipEntry()
|
||||||
|
} else if (e.key === 'ArrowUp' && e.ctrlKey) {
|
||||||
|
e.preventDefault()
|
||||||
|
if (activeIndex > 0) setActiveIndex(activeIndex - 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
window.addEventListener('keydown', handler)
|
||||||
|
return () => window.removeEventListener('keydown', handler)
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [mode, activeIndex, editedEntries])
|
||||||
|
|
||||||
|
if (!sessionId) {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
|
<div className="text-5xl mb-4">🔤</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Schritt 5: Worterkennung
|
||||||
|
</h3>
|
||||||
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
Bitte zuerst Schritte 1-4 abschliessen.
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`
|
||||||
|
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||||
|
|
||||||
|
const confColor = (conf: number) => {
|
||||||
|
if (conf >= 70) return 'text-green-600 dark:text-green-400'
|
||||||
|
if (conf >= 50) return 'text-yellow-600 dark:text-yellow-400'
|
||||||
|
return 'text-red-600 dark:text-red-400'
|
||||||
|
}
|
||||||
|
|
||||||
|
const statusBadge = (status?: string) => {
|
||||||
|
const map: Record<string, string> = {
|
||||||
|
pending: 'bg-gray-100 dark:bg-gray-700 text-gray-500',
|
||||||
|
confirmed: 'bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-400',
|
||||||
|
edited: 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-400',
|
||||||
|
skipped: 'bg-orange-100 dark:bg-orange-900/30 text-orange-700 dark:text-orange-400',
|
||||||
|
}
|
||||||
|
return map[status || 'pending'] || map.pending
|
||||||
|
}
|
||||||
|
|
||||||
|
const summary = wordResult?.summary
|
||||||
|
const confirmedCount = editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length
|
||||||
|
const totalCount = editedEntries.length
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Loading */}
|
||||||
|
{detecting && (
|
||||||
|
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||||
|
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||||
|
Worterkennung laeuft...
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Mode toggle */}
|
||||||
|
{wordResult && (
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<button
|
||||||
|
onClick={() => setMode('overview')}
|
||||||
|
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||||
|
mode === 'overview'
|
||||||
|
? 'bg-teal-600 text-white'
|
||||||
|
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Uebersicht
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setMode('labeling')}
|
||||||
|
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||||
|
mode === 'labeling'
|
||||||
|
? 'bg-teal-600 text-white'
|
||||||
|
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Labeling ({confirmedCount}/{totalCount})
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Overview mode: side-by-side images + entry list */}
|
||||||
|
{mode === 'overview' && (
|
||||||
|
<>
|
||||||
|
{/* Images: overlay vs clean */}
|
||||||
|
<div className="grid grid-cols-2 gap-4">
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Mit Grid-Overlay
|
||||||
|
</div>
|
||||||
|
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||||
|
{wordResult ? (
|
||||||
|
// eslint-disable-next-line @next/next/no-img-element
|
||||||
|
<img
|
||||||
|
src={`${overlayUrl}?t=${Date.now()}`}
|
||||||
|
alt="Wort-Overlay"
|
||||||
|
className="w-full h-auto"
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||||
|
{detecting ? 'Erkenne Woerter...' : 'Keine Daten'}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Entzerrtes Bild
|
||||||
|
</div>
|
||||||
|
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||||
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
|
<img
|
||||||
|
src={dewarpedUrl}
|
||||||
|
alt="Entzerrt"
|
||||||
|
className="w-full h-auto"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Result summary */}
|
||||||
|
{wordResult && summary && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
Ergebnis: {summary.total_entries} Eintraege erkannt
|
||||||
|
</h4>
|
||||||
|
<span className="text-xs text-gray-400">
|
||||||
|
{wordResult.duration_seconds}s
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Summary badges */}
|
||||||
|
<div className="flex gap-2 flex-wrap">
|
||||||
|
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
||||||
|
EN: {summary.with_english}
|
||||||
|
</span>
|
||||||
|
<span className="px-2 py-0.5 rounded text-xs font-medium bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300">
|
||||||
|
DE: {summary.with_german}
|
||||||
|
</span>
|
||||||
|
{summary.low_confidence > 0 && (
|
||||||
|
<span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
|
||||||
|
Unsicher: {summary.low_confidence}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Entry table */}
|
||||||
|
<div className="max-h-80 overflow-y-auto">
|
||||||
|
<table className="w-full text-xs">
|
||||||
|
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||||
|
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||||
|
<th className="py-1 pr-2 w-8">#</th>
|
||||||
|
<th className="py-1 pr-2">English</th>
|
||||||
|
<th className="py-1 pr-2">Deutsch</th>
|
||||||
|
<th className="py-1 pr-2">Example</th>
|
||||||
|
<th className="py-1 w-12 text-right">Conf</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{editedEntries.map((entry, idx) => (
|
||||||
|
<tr
|
||||||
|
key={idx}
|
||||||
|
className={`border-b dark:border-gray-700/50 ${
|
||||||
|
idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||||
|
}`}
|
||||||
|
onClick={() => { setActiveIndex(idx); setMode('labeling') }}
|
||||||
|
>
|
||||||
|
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
|
||||||
|
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||||
|
{entry.english || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||||
|
</td>
|
||||||
|
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||||
|
{entry.german || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||||
|
</td>
|
||||||
|
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px] truncate">
|
||||||
|
{entry.example || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||||
|
</td>
|
||||||
|
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
|
||||||
|
{entry.confidence}%
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Labeling mode: image crop + editable fields */}
|
||||||
|
{mode === 'labeling' && editedEntries.length > 0 && (
|
||||||
|
<div className="grid grid-cols-3 gap-4">
|
||||||
|
{/* Left 2/3: Image with highlighted active row */}
|
||||||
|
<div className="col-span-2">
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Eintrag {activeIndex + 1} von {editedEntries.length}
|
||||||
|
</div>
|
||||||
|
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
|
||||||
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
|
<img
|
||||||
|
src={`${overlayUrl}?t=${Date.now()}`}
|
||||||
|
alt="Wort-Overlay"
|
||||||
|
className="w-full h-auto"
|
||||||
|
/>
|
||||||
|
{/* Highlight overlay for active entry bbox */}
|
||||||
|
{editedEntries[activeIndex]?.bbox && (
|
||||||
|
<div
|
||||||
|
className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
|
||||||
|
style={{
|
||||||
|
left: `${editedEntries[activeIndex].bbox.x}%`,
|
||||||
|
top: `${editedEntries[activeIndex].bbox.y}%`,
|
||||||
|
width: `${editedEntries[activeIndex].bbox.w}%`,
|
||||||
|
height: `${editedEntries[activeIndex].bbox.h}%`,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right 1/3: Editable entry fields */}
|
||||||
|
<div className="space-y-3">
|
||||||
|
{/* Navigation */}
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveIndex(Math.max(0, activeIndex - 1))}
|
||||||
|
disabled={activeIndex === 0}
|
||||||
|
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||||
|
>
|
||||||
|
Zurueck
|
||||||
|
</button>
|
||||||
|
<span className="text-xs text-gray-500">{activeIndex + 1} / {editedEntries.length}</span>
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveIndex(Math.min(editedEntries.length - 1, activeIndex + 1))}
|
||||||
|
disabled={activeIndex >= editedEntries.length - 1}
|
||||||
|
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||||
|
>
|
||||||
|
Weiter
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Status badge */}
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${statusBadge(editedEntries[activeIndex]?.status)}`}>
|
||||||
|
{editedEntries[activeIndex]?.status || 'pending'}
|
||||||
|
</span>
|
||||||
|
<span className={`text-xs font-mono ${confColor(editedEntries[activeIndex]?.confidence || 0)}`}>
|
||||||
|
{editedEntries[activeIndex]?.confidence}% Konfidenz
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Cell crops */}
|
||||||
|
{editedEntries[activeIndex]?.bbox_en && (
|
||||||
|
<div>
|
||||||
|
<div className="text-[10px] font-medium text-blue-500 mb-0.5">EN-Zelle</div>
|
||||||
|
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
|
||||||
|
<CellCrop
|
||||||
|
imageUrl={dewarpedUrl}
|
||||||
|
bbox={editedEntries[activeIndex].bbox_en!}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{editedEntries[activeIndex]?.bbox_de && (
|
||||||
|
<div>
|
||||||
|
<div className="text-[10px] font-medium text-green-500 mb-0.5">DE-Zelle</div>
|
||||||
|
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
|
||||||
|
<CellCrop
|
||||||
|
imageUrl={dewarpedUrl}
|
||||||
|
bbox={editedEntries[activeIndex].bbox_de!}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Editable fields */}
|
||||||
|
<div className="space-y-2">
|
||||||
|
<div>
|
||||||
|
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">English</label>
|
||||||
|
<input
|
||||||
|
ref={enRef}
|
||||||
|
type="text"
|
||||||
|
value={editedEntries[activeIndex]?.english || ''}
|
||||||
|
onChange={(e) => updateEntry(activeIndex, 'english', e.target.value)}
|
||||||
|
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Deutsch</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={editedEntries[activeIndex]?.german || ''}
|
||||||
|
onChange={(e) => updateEntry(activeIndex, 'german', e.target.value)}
|
||||||
|
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Example</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={editedEntries[activeIndex]?.example || ''}
|
||||||
|
onChange={(e) => updateEntry(activeIndex, 'example', e.target.value)}
|
||||||
|
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Action buttons */}
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<button
|
||||||
|
onClick={confirmEntry}
|
||||||
|
className="flex-1 px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700 font-medium"
|
||||||
|
>
|
||||||
|
Bestaetigen (Enter)
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={skipEntry}
|
||||||
|
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600"
|
||||||
|
>
|
||||||
|
Skip
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Shortcuts hint */}
|
||||||
|
<div className="text-[10px] text-gray-400 space-y-0.5">
|
||||||
|
<div>Enter = Bestaetigen & weiter</div>
|
||||||
|
<div>Ctrl+↓ = Ueberspringen</div>
|
||||||
|
<div>Ctrl+↑ = Zurueck</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Entry list (compact) */}
|
||||||
|
<div className="border-t dark:border-gray-700 pt-2 mt-2">
|
||||||
|
<div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Alle Eintraege
|
||||||
|
</div>
|
||||||
|
<div className="max-h-48 overflow-y-auto space-y-0.5">
|
||||||
|
{editedEntries.map((entry, idx) => (
|
||||||
|
<div
|
||||||
|
key={idx}
|
||||||
|
onClick={() => setActiveIndex(idx)}
|
||||||
|
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
||||||
|
idx === activeIndex
|
||||||
|
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||||
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<span className="w-4 text-right text-gray-400">{idx + 1}</span>
|
||||||
|
<span className={`w-2 h-2 rounded-full ${
|
||||||
|
entry.status === 'confirmed' ? 'bg-green-500' :
|
||||||
|
entry.status === 'edited' ? 'bg-blue-500' :
|
||||||
|
entry.status === 'skipped' ? 'bg-orange-400' :
|
||||||
|
'bg-gray-300 dark:bg-gray-600'
|
||||||
|
}`} />
|
||||||
|
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||||
|
{entry.english || '—'} → {entry.german || '—'}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Controls */}
|
||||||
|
{wordResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||||
|
<div className="flex items-center gap-3 flex-wrap">
|
||||||
|
<button
|
||||||
|
onClick={() => runAutoDetection()}
|
||||||
|
disabled={detecting}
|
||||||
|
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
Erneut erkennen
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={() => goToStep(3)}
|
||||||
|
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 text-orange-600 dark:text-orange-400 border-orange-300 dark:border-orange-700"
|
||||||
|
>
|
||||||
|
Zeilen korrigieren (Step 4)
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<div className="flex-1" />
|
||||||
|
|
||||||
|
{/* Ground truth */}
|
||||||
|
{!gtSaved ? (
|
||||||
|
<>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
placeholder="Notizen (optional)"
|
||||||
|
value={gtNotes}
|
||||||
|
onChange={(e) => setGtNotes(e.target.value)}
|
||||||
|
className="px-2 py-1 text-xs border rounded dark:bg-gray-700 dark:border-gray-600 w-48"
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
onClick={() => handleGroundTruth(true)}
|
||||||
|
className="px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700"
|
||||||
|
>
|
||||||
|
Korrekt
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => handleGroundTruth(false)}
|
||||||
|
className="px-3 py-1.5 text-xs bg-red-600 text-white rounded-lg hover:bg-red-700"
|
||||||
|
>
|
||||||
|
Fehlerhaft
|
||||||
|
</button>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<span className="text-xs text-green-600 dark:text-green-400">
|
||||||
|
Ground Truth gespeichert
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={onNext}
|
||||||
|
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium"
|
||||||
|
>
|
||||||
|
Weiter
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CellCrop: Shows a cropped portion of the dewarped image based on percent bbox.
|
||||||
|
* Uses CSS background-image + background-position for efficient cropping.
|
||||||
|
*/
|
||||||
|
function CellCrop({ imageUrl, bbox }: { imageUrl: string; bbox: { x: number; y: number; w: number; h: number } }) {
|
||||||
|
// Scale factor: how much to zoom into the cell
|
||||||
|
const scaleX = 100 / bbox.w
|
||||||
|
const scaleY = 100 / bbox.h
|
||||||
|
const scale = Math.min(scaleX, scaleY, 8) // Cap zoom at 8x
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className="w-full h-full"
|
||||||
|
style={{
|
||||||
|
backgroundImage: `url(${imageUrl})`,
|
||||||
|
backgroundSize: `${scale * 100}%`,
|
||||||
|
backgroundPosition: `${-bbox.x * scale}% ${-bbox.y * scale}%`,
|
||||||
|
backgroundRepeat: 'no-repeat',
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|||||||
373
docs-src/services/klausur-service/OCR-Pipeline.md
Normal file
373
docs-src/services/klausur-service/OCR-Pipeline.md
Normal file
@@ -0,0 +1,373 @@
|
|||||||
|
# OCR Pipeline - Schrittweise Seitenrekonstruktion
|
||||||
|
|
||||||
|
**Version:** 1.0.0
|
||||||
|
**Status:** In Entwicklung
|
||||||
|
**URL:** https://macmini:3002/ai/ocr-pipeline
|
||||||
|
|
||||||
|
## Uebersicht
|
||||||
|
|
||||||
|
Die OCR Pipeline zerlegt den OCR-Prozess in **8 einzelne Schritte**, um eingescannte Vokabelseiten Wort fuer Wort zu rekonstruieren. Jeder Schritt kann individuell geprueft, korrigiert und mit Ground-Truth-Daten versehen werden.
|
||||||
|
|
||||||
|
**Ziel:** 10 Vokabelseiten fehlerfrei rekonstruieren.
|
||||||
|
|
||||||
|
### Pipeline-Schritte
|
||||||
|
|
||||||
|
| Schritt | Name | Beschreibung | Status |
|
||||||
|
|---------|------|--------------|--------|
|
||||||
|
| 1 | Begradigung (Deskew) | Scan begradigen (Hough Lines + Word Alignment) | Implementiert |
|
||||||
|
| 2 | Entzerrung (Dewarp) | Buchwoelbung entzerren (Vertikalkanten-Analyse) | Implementiert |
|
||||||
|
| 3 | Spaltenerkennung | Unsichtbare Spalten finden (Projektionsprofile) | Implementiert |
|
||||||
|
| 4 | Zeilenerkennung | Horizontale Zeilen + Kopf-/Fusszeilen-Klassifikation | Implementiert |
|
||||||
|
| 5 | Worterkennung | Grid aus Spalten x Zeilen, OCR pro Zelle | Implementiert |
|
||||||
|
| 6 | Koordinatenzuweisung | Exakte Positionen innerhalb Zellen | Geplant |
|
||||||
|
| 7 | Seitenrekonstruktion | Seite nachbauen aus Koordinaten | Geplant |
|
||||||
|
| 8 | Ground Truth Validierung | Gesamtpruefung aller Schritte | Geplant |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architektur
|
||||||
|
|
||||||
|
```
|
||||||
|
Admin-Lehrer (Next.js) klausur-service (FastAPI :8086)
|
||||||
|
┌────────────────────┐ ┌─────────────────────────────┐
|
||||||
|
│ /ai/ocr-pipeline │ │ /api/v1/ocr-pipeline/ │
|
||||||
|
│ │ REST │ │
|
||||||
|
│ PipelineStepper │◄────────►│ Sessions CRUD │
|
||||||
|
│ StepDeskew │ │ Image Serving │
|
||||||
|
│ StepDewarp │ │ Deskew/Dewarp/Columns/Rows │
|
||||||
|
│ StepColumnDetection│ │ Word Recognition │
|
||||||
|
│ StepRowDetection │ │ Ground Truth │
|
||||||
|
│ StepWordRecognition│ │ Overlay Images │
|
||||||
|
└────────────────────┘ └─────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ PostgreSQL │
|
||||||
|
│ ocr_pipeline_sessions│
|
||||||
|
│ (Images + JSONB) │
|
||||||
|
└─────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Dateistruktur
|
||||||
|
|
||||||
|
```
|
||||||
|
klausur-service/backend/
|
||||||
|
├── ocr_pipeline_api.py # FastAPI Router (alle Endpoints)
|
||||||
|
├── ocr_pipeline_session_store.py # PostgreSQL Persistence
|
||||||
|
├── cv_vocab_pipeline.py # Computer Vision Algorithmen
|
||||||
|
└── migrations/
|
||||||
|
├── 002_ocr_pipeline_sessions.sql # Basis-Schema
|
||||||
|
├── 003_add_row_result.sql # Row-Result Spalte
|
||||||
|
└── 004_add_word_result.sql # Word-Result Spalte
|
||||||
|
|
||||||
|
admin-lehrer/
|
||||||
|
├── app/(admin)/ai/ocr-pipeline/
|
||||||
|
│ ├── page.tsx # Haupt-Page mit Session-Management
|
||||||
|
│ └── types.ts # TypeScript Interfaces
|
||||||
|
└── components/ocr-pipeline/
|
||||||
|
├── PipelineStepper.tsx # Fortschritts-Stepper
|
||||||
|
├── StepDeskew.tsx # Schritt 1
|
||||||
|
├── StepDewarp.tsx # Schritt 2
|
||||||
|
├── StepColumnDetection.tsx # Schritt 3
|
||||||
|
├── StepRowDetection.tsx # Schritt 4
|
||||||
|
├── StepWordRecognition.tsx # Schritt 5
|
||||||
|
├── StepCoordinates.tsx # Schritt 6 (Platzhalter)
|
||||||
|
├── StepReconstruction.tsx # Schritt 7 (Platzhalter)
|
||||||
|
└── StepGroundTruth.tsx # Schritt 8 (Platzhalter)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API-Referenz
|
||||||
|
|
||||||
|
Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||||
|
|
||||||
|
### Sessions
|
||||||
|
|
||||||
|
| Methode | Pfad | Beschreibung |
|
||||||
|
|---------|------|--------------|
|
||||||
|
| `POST` | `/sessions` | Neue Session erstellen (Bild hochladen) |
|
||||||
|
| `GET` | `/sessions` | Alle Sessions auflisten |
|
||||||
|
| `GET` | `/sessions/{id}` | Session-Info mit allen Step-Results |
|
||||||
|
| `PUT` | `/sessions/{id}` | Session umbenennen |
|
||||||
|
| `DELETE` | `/sessions/{id}` | Session loeschen |
|
||||||
|
|
||||||
|
### Bilder
|
||||||
|
|
||||||
|
| Methode | Pfad | Beschreibung |
|
||||||
|
|---------|------|--------------|
|
||||||
|
| `GET` | `/sessions/{id}/image/original` | Originalbild |
|
||||||
|
| `GET` | `/sessions/{id}/image/deskewed` | Begradigtes Bild |
|
||||||
|
| `GET` | `/sessions/{id}/image/dewarped` | Entzerrtes Bild |
|
||||||
|
| `GET` | `/sessions/{id}/image/binarized` | Binarisiertes Bild |
|
||||||
|
| `GET` | `/sessions/{id}/image/columns-overlay` | Spalten-Overlay |
|
||||||
|
| `GET` | `/sessions/{id}/image/rows-overlay` | Zeilen-Overlay |
|
||||||
|
| `GET` | `/sessions/{id}/image/words-overlay` | Wort-Grid-Overlay |
|
||||||
|
|
||||||
|
### Schritt 1: Begradigung
|
||||||
|
|
||||||
|
| Methode | Pfad | Beschreibung |
|
||||||
|
|---------|------|--------------|
|
||||||
|
| `POST` | `/sessions/{id}/deskew` | Automatische Begradigung |
|
||||||
|
| `POST` | `/sessions/{id}/deskew/manual` | Manuelle Winkelkorrektur |
|
||||||
|
| `POST` | `/sessions/{id}/ground-truth/deskew` | Ground Truth speichern |
|
||||||
|
|
||||||
|
### Schritt 2: Entzerrung
|
||||||
|
|
||||||
|
| Methode | Pfad | Beschreibung |
|
||||||
|
|---------|------|--------------|
|
||||||
|
| `POST` | `/sessions/{id}/dewarp` | Automatische Entzerrung |
|
||||||
|
| `POST` | `/sessions/{id}/dewarp/manual` | Manueller Scherbungswinkel |
|
||||||
|
| `POST` | `/sessions/{id}/ground-truth/dewarp` | Ground Truth speichern |
|
||||||
|
|
||||||
|
### Schritt 3: Spalten
|
||||||
|
|
||||||
|
| Methode | Pfad | Beschreibung |
|
||||||
|
|---------|------|--------------|
|
||||||
|
| `POST` | `/sessions/{id}/columns` | Automatische Spaltenerkennung |
|
||||||
|
| `POST` | `/sessions/{id}/columns/manual` | Manuelle Spalten-Definition |
|
||||||
|
| `POST` | `/sessions/{id}/ground-truth/columns` | Ground Truth speichern |
|
||||||
|
|
||||||
|
### Schritt 4: Zeilen
|
||||||
|
|
||||||
|
| Methode | Pfad | Beschreibung |
|
||||||
|
|---------|------|--------------|
|
||||||
|
| `POST` | `/sessions/{id}/rows` | Automatische Zeilenerkennung |
|
||||||
|
| `POST` | `/sessions/{id}/rows/manual` | Manuelle Zeilen-Definition |
|
||||||
|
| `POST` | `/sessions/{id}/ground-truth/rows` | Ground Truth speichern |
|
||||||
|
| `GET` | `/sessions/{id}/ground-truth/rows` | Ground Truth abrufen |
|
||||||
|
|
||||||
|
### Schritt 5: Worterkennung
|
||||||
|
|
||||||
|
| Methode | Pfad | Beschreibung |
|
||||||
|
|---------|------|--------------|
|
||||||
|
| `POST` | `/sessions/{id}/words` | Wort-Grid aus Spalten x Zeilen erstellen |
|
||||||
|
| `POST` | `/sessions/{id}/ground-truth/words` | Ground Truth speichern |
|
||||||
|
| `GET` | `/sessions/{id}/ground-truth/words` | Ground Truth abrufen |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Schritt 5: Worterkennung (Detail)
|
||||||
|
|
||||||
|
### Algorithmus: `build_word_grid()`
|
||||||
|
|
||||||
|
Schritt 5 nutzt die Ergebnisse von Schritt 3 (Spalten) und Schritt 4 (Zeilen), um ein Grid zu erstellen und jede Zelle per OCR auszulesen.
|
||||||
|
|
||||||
|
```
|
||||||
|
Spalten (Step 3): column_en | column_de | column_example
|
||||||
|
───────────┼─────────────┼────────────────
|
||||||
|
Zeilen (Step 4): R0 │ hello │ hallo │ Hello, World!
|
||||||
|
R1 │ world │ Welt │ The whole world
|
||||||
|
R2 │ book │ Buch │ Read a book
|
||||||
|
───────────┼─────────────┼────────────────
|
||||||
|
```
|
||||||
|
|
||||||
|
**Ablauf:**
|
||||||
|
|
||||||
|
1. **Filterung**: Nur `content`-Zeilen (kein Header/Footer) und relevante Spalten (`column_en`, `column_de`, `column_example`)
|
||||||
|
2. **Zell-Bildung**: Pro content-Zeile x pro relevante Spalte eine `PageRegion` berechnen
|
||||||
|
3. **OCR**: `ocr_region()` mit PSM 7 (Single Line) pro Zelle aufrufen
|
||||||
|
4. **Sprache**: `eng` fuer EN-Spalte, `deu` fuer DE-Spalte, `eng+deu` fuer Beispiele
|
||||||
|
5. **Gruppierung**: Zellen zu Vokabel-Eintraegen zusammenfuehren
|
||||||
|
|
||||||
|
### Response-Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"entries": [
|
||||||
|
{
|
||||||
|
"row_index": 0,
|
||||||
|
"english": "hello",
|
||||||
|
"german": "hallo",
|
||||||
|
"example": "Hello, how are you?",
|
||||||
|
"confidence": 85.3,
|
||||||
|
"bbox": {"x": 5.2, "y": 12.1, "w": 90.0, "h": 2.8},
|
||||||
|
"bbox_en": {"x": 5.2, "y": 12.1, "w": 30.0, "h": 2.8},
|
||||||
|
"bbox_de": {"x": 35.5, "y": 12.1, "w": 25.0, "h": 2.8},
|
||||||
|
"bbox_ex": {"x": 61.0, "y": 12.1, "w": 34.2, "h": 2.8}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"entry_count": 25,
|
||||||
|
"image_width": 2480,
|
||||||
|
"image_height": 3508,
|
||||||
|
"duration_seconds": 3.2,
|
||||||
|
"summary": {
|
||||||
|
"total_entries": 25,
|
||||||
|
"with_english": 24,
|
||||||
|
"with_german": 22,
|
||||||
|
"low_confidence": 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
!!! info "Bounding Boxes in Prozent"
|
||||||
|
Alle `bbox`-Werte sind Prozent (0-100) relativ zur Bildgroesse.
|
||||||
|
Das erleichtert die Darstellung im Frontend unabhaengig von der Bildaufloesung.
|
||||||
|
|
||||||
|
### Frontend: StepWordRecognition
|
||||||
|
|
||||||
|
Die Komponente bietet zwei Modi:
|
||||||
|
|
||||||
|
**Uebersicht-Modus:**
|
||||||
|
|
||||||
|
- Zwei Bilder nebeneinander: Grid-Overlay vs. sauberes Bild
|
||||||
|
- Tabelle aller erkannten Eintraege mit Konfidenz-Werten
|
||||||
|
- Klick auf Eintrag wechselt zum Labeling-Modus
|
||||||
|
|
||||||
|
**Labeling-Modus (Step-Through):**
|
||||||
|
|
||||||
|
- Links (2/3): Bild mit hervorgehobenem aktiven Eintrag (gelber Rahmen)
|
||||||
|
- Rechts (1/3): Zell-Ausschnitte + editierbare Felder (English, Deutsch, Example)
|
||||||
|
- Tastaturkuerzel:
|
||||||
|
- `Enter` = Bestaetigen und weiter
|
||||||
|
- `Ctrl+Pfeil runter` = Ueberspringen
|
||||||
|
- `Ctrl+Pfeil hoch` = Zurueck
|
||||||
|
|
||||||
|
**Feedback-Loop:**
|
||||||
|
|
||||||
|
- "Zeilen korrigieren" springt zurueck zu Schritt 4
|
||||||
|
- Nach Korrektur der Zeilen kann Schritt 5 erneut ausgefuehrt werden
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Datenbank-Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE ocr_pipeline_sessions (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
name VARCHAR(255),
|
||||||
|
filename VARCHAR(255),
|
||||||
|
status VARCHAR(50) DEFAULT 'active',
|
||||||
|
current_step INT DEFAULT 1,
|
||||||
|
|
||||||
|
-- Bilder (BYTEA)
|
||||||
|
original_png BYTEA,
|
||||||
|
deskewed_png BYTEA,
|
||||||
|
binarized_png BYTEA,
|
||||||
|
dewarped_png BYTEA,
|
||||||
|
|
||||||
|
-- Step-Results (JSONB)
|
||||||
|
deskew_result JSONB,
|
||||||
|
dewarp_result JSONB,
|
||||||
|
column_result JSONB,
|
||||||
|
row_result JSONB,
|
||||||
|
word_result JSONB,
|
||||||
|
|
||||||
|
-- Ground Truth + Meta
|
||||||
|
ground_truth JSONB,
|
||||||
|
auto_shear_degrees REAL,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Migrationen
|
||||||
|
|
||||||
|
| Datei | Beschreibung |
|
||||||
|
|-------|--------------|
|
||||||
|
| `002_ocr_pipeline_sessions.sql` | Basis-Schema (Steps 1-3) |
|
||||||
|
| `003_add_row_result.sql` | `row_result JSONB` fuer Step 4 |
|
||||||
|
| `004_add_word_result.sql` | `word_result JSONB` fuer Step 5 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## TypeScript Interfaces
|
||||||
|
|
||||||
|
Die wichtigsten Typen in `types.ts`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface WordEntry {
|
||||||
|
row_index: number
|
||||||
|
english: string
|
||||||
|
german: string
|
||||||
|
example: string
|
||||||
|
confidence: number
|
||||||
|
bbox: WordBbox // Gesamte Zeile
|
||||||
|
bbox_en: WordBbox | null // EN-Zelle
|
||||||
|
bbox_de: WordBbox | null // DE-Zelle
|
||||||
|
bbox_ex: WordBbox | null // Example-Zelle
|
||||||
|
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||||
|
}
|
||||||
|
|
||||||
|
interface WordResult {
|
||||||
|
entries: WordEntry[]
|
||||||
|
entry_count: number
|
||||||
|
image_width: number
|
||||||
|
image_height: number
|
||||||
|
duration_seconds: number
|
||||||
|
summary: {
|
||||||
|
total_entries: number
|
||||||
|
with_english: number
|
||||||
|
with_german: number
|
||||||
|
low_confidence: number
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Ground Truth System
|
||||||
|
|
||||||
|
Jeder Schritt kann mit Ground-Truth-Feedback versehen werden:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"is_correct": false,
|
||||||
|
"corrected_entries": [...],
|
||||||
|
"notes": "Zeile 5 falsch erkannt",
|
||||||
|
"saved_at": "2026-02-28T10:30:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Ground-Truth-Daten werden in der `ground_truth` JSONB-Spalte gespeichert, gruppiert nach Schritt:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"deskew": { "is_correct": true, ... },
|
||||||
|
"dewarp": { "is_correct": true, ... },
|
||||||
|
"columns": { "is_correct": false, ... },
|
||||||
|
"rows": { "is_correct": true, ... },
|
||||||
|
"words": { "is_correct": false, ... }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Git push
|
||||||
|
git push origin main && git push gitea main
|
||||||
|
|
||||||
|
# 2. Mac Mini pull + build
|
||||||
|
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && git pull --no-rebase origin main"
|
||||||
|
|
||||||
|
# klausur-service (Backend)
|
||||||
|
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && \
|
||||||
|
/usr/local/bin/docker compose build --no-cache klausur-service && \
|
||||||
|
/usr/local/bin/docker compose up -d klausur-service"
|
||||||
|
|
||||||
|
# admin-lehrer (Frontend)
|
||||||
|
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && \
|
||||||
|
/usr/local/bin/docker compose build --no-cache admin-lehrer && \
|
||||||
|
/usr/local/bin/docker compose up -d admin-lehrer"
|
||||||
|
|
||||||
|
# 3. Migration ausfuehren
|
||||||
|
ssh macmini "/usr/local/bin/docker exec bp-lehrer-klausur-service \
|
||||||
|
python -c \"import asyncio; from ocr_pipeline_session_store import *; asyncio.run(init_ocr_pipeline_tables())\""
|
||||||
|
|
||||||
|
# 4. Testen unter:
|
||||||
|
# https://macmini:3002/ai/ocr-pipeline
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Aenderungshistorie
|
||||||
|
|
||||||
|
| Datum | Version | Aenderung |
|
||||||
|
|-------|---------|----------|
|
||||||
|
| 2026-02-28 | 1.0.0 | Schritt 5 (Worterkennung) implementiert |
|
||||||
|
| 2026-02-22 | 0.4.0 | Schritt 4 (Zeilenerkennung) implementiert |
|
||||||
|
| 2026-02-20 | 0.3.0 | Schritt 3 (Spaltenerkennung) mit Typ-Klassifikation |
|
||||||
|
| 2026-02-15 | 0.2.0 | Schritt 2 (Entzerrung/Dewarp) |
|
||||||
|
| 2026-02-12 | 0.1.0 | Schritt 1 (Begradigung/Deskew) + Session-Management |
|
||||||
@@ -2169,6 +2169,142 @@ def analyze_layout_by_words(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Li
|
|||||||
return regions
|
return regions
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Pipeline Step 5: Word Grid from Columns × Rows
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def build_word_grid(
|
||||||
|
ocr_img: np.ndarray,
|
||||||
|
column_regions: List[PageRegion],
|
||||||
|
row_geometries: List[RowGeometry],
|
||||||
|
img_w: int,
|
||||||
|
img_h: int,
|
||||||
|
lang: str = "eng+deu",
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Build a word grid by intersecting columns and rows, then OCR each cell.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ocr_img: Binarized full-page image.
|
||||||
|
column_regions: Classified columns from Step 3 (PageRegion list).
|
||||||
|
row_geometries: Rows from Step 4 (RowGeometry list).
|
||||||
|
img_w: Image width in pixels.
|
||||||
|
img_h: Image height in pixels.
|
||||||
|
lang: Default Tesseract language.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of entry dicts with english/german/example text and bbox info (percent).
|
||||||
|
"""
|
||||||
|
# Filter to content rows only (skip header/footer)
|
||||||
|
content_rows = [r for r in row_geometries if r.row_type == 'content']
|
||||||
|
if not content_rows:
|
||||||
|
logger.warning("build_word_grid: no content rows found")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Map column types to roles
|
||||||
|
VOCAB_COLUMN_TYPES = {'column_en', 'column_de', 'column_example'}
|
||||||
|
relevant_cols = [c for c in column_regions if c.type in VOCAB_COLUMN_TYPES]
|
||||||
|
if not relevant_cols:
|
||||||
|
logger.warning("build_word_grid: no relevant vocabulary columns found")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Sort columns left-to-right
|
||||||
|
relevant_cols.sort(key=lambda c: c.x)
|
||||||
|
|
||||||
|
# Choose OCR language per column type
|
||||||
|
lang_map = {
|
||||||
|
'column_en': 'eng',
|
||||||
|
'column_de': 'deu',
|
||||||
|
'column_example': 'eng+deu',
|
||||||
|
}
|
||||||
|
|
||||||
|
entries: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
for row_idx, row in enumerate(content_rows):
|
||||||
|
entry: Dict[str, Any] = {
|
||||||
|
'row_index': row_idx,
|
||||||
|
'english': '',
|
||||||
|
'german': '',
|
||||||
|
'example': '',
|
||||||
|
'confidence': 0.0,
|
||||||
|
'bbox': {
|
||||||
|
'x': round(row.x / img_w * 100, 2),
|
||||||
|
'y': round(row.y / img_h * 100, 2),
|
||||||
|
'w': round(row.width / img_w * 100, 2),
|
||||||
|
'h': round(row.height / img_h * 100, 2),
|
||||||
|
},
|
||||||
|
'bbox_en': None,
|
||||||
|
'bbox_de': None,
|
||||||
|
'bbox_ex': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
confidences: List[float] = []
|
||||||
|
|
||||||
|
for col in relevant_cols:
|
||||||
|
# Compute cell region: column x/width, row y/height
|
||||||
|
cell_x = col.x
|
||||||
|
cell_y = row.y
|
||||||
|
cell_w = col.width
|
||||||
|
cell_h = row.height
|
||||||
|
|
||||||
|
# Clamp to image bounds
|
||||||
|
cell_x = max(0, cell_x)
|
||||||
|
cell_y = max(0, cell_y)
|
||||||
|
if cell_x + cell_w > img_w:
|
||||||
|
cell_w = img_w - cell_x
|
||||||
|
if cell_y + cell_h > img_h:
|
||||||
|
cell_h = img_h - cell_y
|
||||||
|
|
||||||
|
if cell_w <= 0 or cell_h <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cell_region = PageRegion(
|
||||||
|
type=col.type,
|
||||||
|
x=cell_x, y=cell_y,
|
||||||
|
width=cell_w, height=cell_h,
|
||||||
|
)
|
||||||
|
|
||||||
|
cell_lang = lang_map.get(col.type, lang)
|
||||||
|
words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=7)
|
||||||
|
|
||||||
|
# Sort words by x position, join to text
|
||||||
|
words.sort(key=lambda w: w['left'])
|
||||||
|
text = ' '.join(w['text'] for w in words)
|
||||||
|
if words:
|
||||||
|
avg_conf = sum(w['conf'] for w in words) / len(words)
|
||||||
|
confidences.append(avg_conf)
|
||||||
|
|
||||||
|
# Bbox in percent
|
||||||
|
cell_bbox = {
|
||||||
|
'x': round(cell_x / img_w * 100, 2),
|
||||||
|
'y': round(cell_y / img_h * 100, 2),
|
||||||
|
'w': round(cell_w / img_w * 100, 2),
|
||||||
|
'h': round(cell_h / img_h * 100, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
if col.type == 'column_en':
|
||||||
|
entry['english'] = text
|
||||||
|
entry['bbox_en'] = cell_bbox
|
||||||
|
elif col.type == 'column_de':
|
||||||
|
entry['german'] = text
|
||||||
|
entry['bbox_de'] = cell_bbox
|
||||||
|
elif col.type == 'column_example':
|
||||||
|
entry['example'] = text
|
||||||
|
entry['bbox_ex'] = cell_bbox
|
||||||
|
|
||||||
|
entry['confidence'] = round(
|
||||||
|
sum(confidences) / len(confidences), 1
|
||||||
|
) if confidences else 0.0
|
||||||
|
|
||||||
|
# Only include if at least one field has text
|
||||||
|
if entry['english'] or entry['german'] or entry['example']:
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
logger.info(f"build_word_grid: {len(entries)} entries from "
|
||||||
|
f"{len(content_rows)} content rows × {len(relevant_cols)} columns")
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Stage 6: Multi-Pass OCR
|
# Stage 6: Multi-Pass OCR
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
-- Migration 004: Add word_result column for OCR Pipeline Step 5
|
||||||
|
-- Stores the word recognition grid result (entries with english/german/example + bboxes)
|
||||||
|
|
||||||
|
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS word_result JSONB;
|
||||||
@@ -29,8 +29,11 @@ from fastapi.responses import Response
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from cv_vocab_pipeline import (
|
from cv_vocab_pipeline import (
|
||||||
|
PageRegion,
|
||||||
|
RowGeometry,
|
||||||
analyze_layout,
|
analyze_layout,
|
||||||
analyze_layout_by_words,
|
analyze_layout_by_words,
|
||||||
|
build_word_grid,
|
||||||
classify_column_types,
|
classify_column_types,
|
||||||
create_layout_image,
|
create_layout_image,
|
||||||
create_ocr_image,
|
create_ocr_image,
|
||||||
@@ -261,6 +264,10 @@ async def get_session_info(session_id: str):
|
|||||||
result["dewarp_result"] = session["dewarp_result"]
|
result["dewarp_result"] = session["dewarp_result"]
|
||||||
if session.get("column_result"):
|
if session.get("column_result"):
|
||||||
result["column_result"] = session["column_result"]
|
result["column_result"] = session["column_result"]
|
||||||
|
if session.get("row_result"):
|
||||||
|
result["row_result"] = session["row_result"]
|
||||||
|
if session.get("word_result"):
|
||||||
|
result["word_result"] = session["word_result"]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -291,7 +298,7 @@ async def delete_session(session_id: str):
|
|||||||
@router.get("/sessions/{session_id}/image/{image_type}")
|
@router.get("/sessions/{session_id}/image/{image_type}")
|
||||||
async def get_image(session_id: str, image_type: str):
|
async def get_image(session_id: str, image_type: str):
|
||||||
"""Serve session images: original, deskewed, dewarped, binarized, columns-overlay, or rows-overlay."""
|
"""Serve session images: original, deskewed, dewarped, binarized, columns-overlay, or rows-overlay."""
|
||||||
valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay", "rows-overlay"}
|
valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay", "rows-overlay", "words-overlay"}
|
||||||
if image_type not in valid_types:
|
if image_type not in valid_types:
|
||||||
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
||||||
|
|
||||||
@@ -301,6 +308,9 @@ async def get_image(session_id: str, image_type: str):
|
|||||||
if image_type == "rows-overlay":
|
if image_type == "rows-overlay":
|
||||||
return await _get_rows_overlay(session_id)
|
return await _get_rows_overlay(session_id)
|
||||||
|
|
||||||
|
if image_type == "words-overlay":
|
||||||
|
return await _get_words_overlay(session_id)
|
||||||
|
|
||||||
# Try cache first for fast serving
|
# Try cache first for fast serving
|
||||||
cached = _cache.get(session_id)
|
cached = _cache.get(session_id)
|
||||||
if cached:
|
if cached:
|
||||||
@@ -992,6 +1002,153 @@ async def get_row_ground_truth(session_id: str):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Word Recognition Endpoints (Step 5)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/words")
|
||||||
|
async def detect_words(session_id: str):
|
||||||
|
"""Build word grid from columns × rows, OCR each cell."""
|
||||||
|
if session_id not in _cache:
|
||||||
|
await _load_session_to_cache(session_id)
|
||||||
|
cached = _get_cached(session_id)
|
||||||
|
|
||||||
|
dewarped_bgr = cached.get("dewarped_bgr")
|
||||||
|
if dewarped_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Dewarp must be completed before word detection")
|
||||||
|
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
column_result = session.get("column_result")
|
||||||
|
row_result = session.get("row_result")
|
||||||
|
if not column_result or not column_result.get("columns"):
|
||||||
|
raise HTTPException(status_code=400, detail="Column detection must be completed first")
|
||||||
|
if not row_result or not row_result.get("rows"):
|
||||||
|
raise HTTPException(status_code=400, detail="Row detection must be completed first")
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
# Create binarized OCR image
|
||||||
|
ocr_img = create_ocr_image(dewarped_bgr)
|
||||||
|
img_h, img_w = dewarped_bgr.shape[:2]
|
||||||
|
|
||||||
|
# Convert column dicts back to PageRegion objects
|
||||||
|
col_regions = [
|
||||||
|
PageRegion(
|
||||||
|
type=c["type"],
|
||||||
|
x=c["x"], y=c["y"],
|
||||||
|
width=c["width"], height=c["height"],
|
||||||
|
classification_confidence=c.get("classification_confidence", 1.0),
|
||||||
|
classification_method=c.get("classification_method", ""),
|
||||||
|
)
|
||||||
|
for c in column_result["columns"]
|
||||||
|
]
|
||||||
|
|
||||||
|
# Convert row dicts back to RowGeometry objects
|
||||||
|
row_geoms = [
|
||||||
|
RowGeometry(
|
||||||
|
index=r["index"],
|
||||||
|
x=r["x"], y=r["y"],
|
||||||
|
width=r["width"], height=r["height"],
|
||||||
|
word_count=r.get("word_count", 0),
|
||||||
|
words=[],
|
||||||
|
row_type=r.get("row_type", "content"),
|
||||||
|
gap_before=r.get("gap_before", 0),
|
||||||
|
)
|
||||||
|
for r in row_result["rows"]
|
||||||
|
]
|
||||||
|
|
||||||
|
# Build word grid
|
||||||
|
entries = build_word_grid(ocr_img, col_regions, row_geoms, img_w, img_h)
|
||||||
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
# Build summary
|
||||||
|
summary = {
|
||||||
|
"total_entries": len(entries),
|
||||||
|
"with_english": sum(1 for e in entries if e.get("english")),
|
||||||
|
"with_german": sum(1 for e in entries if e.get("german")),
|
||||||
|
"low_confidence": sum(1 for e in entries if e.get("confidence", 0) < 50),
|
||||||
|
}
|
||||||
|
|
||||||
|
word_result = {
|
||||||
|
"entries": entries,
|
||||||
|
"entry_count": len(entries),
|
||||||
|
"image_width": img_w,
|
||||||
|
"image_height": img_h,
|
||||||
|
"duration_seconds": round(duration, 2),
|
||||||
|
"summary": summary,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Persist to DB
|
||||||
|
await update_session_db(
|
||||||
|
session_id,
|
||||||
|
word_result=word_result,
|
||||||
|
current_step=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
cached["word_result"] = word_result
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: words session {session_id}: "
|
||||||
|
f"{len(entries)} entries ({duration:.2f}s), summary: {summary}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
**word_result,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WordGroundTruthRequest(BaseModel):
|
||||||
|
is_correct: bool
|
||||||
|
corrected_entries: Optional[List[Dict[str, Any]]] = None
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/ground-truth/words")
|
||||||
|
async def save_word_ground_truth(session_id: str, req: WordGroundTruthRequest):
|
||||||
|
"""Save ground truth feedback for the word recognition step."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
ground_truth = session.get("ground_truth") or {}
|
||||||
|
gt = {
|
||||||
|
"is_correct": req.is_correct,
|
||||||
|
"corrected_entries": req.corrected_entries,
|
||||||
|
"notes": req.notes,
|
||||||
|
"saved_at": datetime.utcnow().isoformat(),
|
||||||
|
"word_result": session.get("word_result"),
|
||||||
|
}
|
||||||
|
ground_truth["words"] = gt
|
||||||
|
|
||||||
|
await update_session_db(session_id, ground_truth=ground_truth)
|
||||||
|
|
||||||
|
if session_id in _cache:
|
||||||
|
_cache[session_id]["ground_truth"] = ground_truth
|
||||||
|
|
||||||
|
return {"session_id": session_id, "ground_truth": gt}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}/ground-truth/words")
|
||||||
|
async def get_word_ground_truth(session_id: str):
|
||||||
|
"""Retrieve saved ground truth for word recognition."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
ground_truth = session.get("ground_truth") or {}
|
||||||
|
words_gt = ground_truth.get("words")
|
||||||
|
if not words_gt:
|
||||||
|
raise HTTPException(status_code=404, detail="No word ground truth saved")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
"words_gt": words_gt,
|
||||||
|
"words_auto": session.get("word_result"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def _get_rows_overlay(session_id: str) -> Response:
|
async def _get_rows_overlay(session_id: str) -> Response:
|
||||||
"""Generate dewarped image with row bands drawn on it."""
|
"""Generate dewarped image with row bands drawn on it."""
|
||||||
session = await get_session_db(session_id)
|
session = await get_session_db(session_id)
|
||||||
@@ -1049,3 +1206,106 @@ async def _get_rows_overlay(session_id: str) -> Response:
|
|||||||
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
||||||
|
|
||||||
return Response(content=result_png.tobytes(), media_type="image/png")
|
return Response(content=result_png.tobytes(), media_type="image/png")
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_words_overlay(session_id: str) -> Response:
|
||||||
|
"""Generate dewarped image with word grid cells drawn on it."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
word_result = session.get("word_result")
|
||||||
|
if not word_result or not word_result.get("entries"):
|
||||||
|
raise HTTPException(status_code=404, detail="No word data available")
|
||||||
|
|
||||||
|
column_result = session.get("column_result")
|
||||||
|
row_result = session.get("row_result")
|
||||||
|
|
||||||
|
# Load dewarped image
|
||||||
|
dewarped_png = await get_session_image(session_id, "dewarped")
|
||||||
|
if not dewarped_png:
|
||||||
|
raise HTTPException(status_code=404, detail="Dewarped image not available")
|
||||||
|
|
||||||
|
arr = np.frombuffer(dewarped_png, dtype=np.uint8)
|
||||||
|
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||||
|
if img is None:
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to decode image")
|
||||||
|
|
||||||
|
img_h, img_w = img.shape[:2]
|
||||||
|
|
||||||
|
# Color map for cell types (BGR)
|
||||||
|
cell_colors = {
|
||||||
|
"column_en": (255, 180, 0), # Blue
|
||||||
|
"column_de": (0, 200, 0), # Green
|
||||||
|
"column_example": (0, 140, 255), # Orange
|
||||||
|
}
|
||||||
|
|
||||||
|
overlay = img.copy()
|
||||||
|
|
||||||
|
# Draw column divider lines (vertical)
|
||||||
|
if column_result and column_result.get("columns"):
|
||||||
|
for col in column_result["columns"]:
|
||||||
|
col_type = col.get("type", "")
|
||||||
|
if col_type in cell_colors:
|
||||||
|
cx = col["x"]
|
||||||
|
cv2.line(img, (cx, 0), (cx, img_h), cell_colors[col_type], 1)
|
||||||
|
cx_end = col["x"] + col["width"]
|
||||||
|
cv2.line(img, (cx_end, 0), (cx_end, img_h), cell_colors[col_type], 1)
|
||||||
|
|
||||||
|
# Draw row divider lines (horizontal) for content rows
|
||||||
|
if row_result and row_result.get("rows"):
|
||||||
|
for row in row_result["rows"]:
|
||||||
|
if row.get("row_type") == "content":
|
||||||
|
ry = row["y"]
|
||||||
|
cv2.line(img, (0, ry), (img_w, ry), (180, 180, 180), 1)
|
||||||
|
|
||||||
|
# Draw entry cells with text labels
|
||||||
|
entries = word_result["entries"]
|
||||||
|
for entry in entries:
|
||||||
|
conf = entry.get("confidence", 0)
|
||||||
|
# Color by confidence: green > 70, yellow 50-70, red < 50
|
||||||
|
if conf >= 70:
|
||||||
|
text_color = (0, 180, 0)
|
||||||
|
elif conf >= 50:
|
||||||
|
text_color = (0, 180, 220)
|
||||||
|
else:
|
||||||
|
text_color = (0, 0, 220)
|
||||||
|
|
||||||
|
for bbox_key, field_key, col_type in [
|
||||||
|
("bbox_en", "english", "column_en"),
|
||||||
|
("bbox_de", "german", "column_de"),
|
||||||
|
("bbox_ex", "example", "column_example"),
|
||||||
|
]:
|
||||||
|
bbox = entry.get(bbox_key)
|
||||||
|
text = entry.get(field_key, "")
|
||||||
|
if not bbox or not text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Convert percent to pixels
|
||||||
|
bx = int(bbox["x"] / 100 * img_w)
|
||||||
|
by = int(bbox["y"] / 100 * img_h)
|
||||||
|
bw = int(bbox["w"] / 100 * img_w)
|
||||||
|
bh = int(bbox["h"] / 100 * img_h)
|
||||||
|
|
||||||
|
color = cell_colors.get(col_type, (200, 200, 200))
|
||||||
|
|
||||||
|
# Semi-transparent fill
|
||||||
|
cv2.rectangle(overlay, (bx, by), (bx + bw, by + bh), color, -1)
|
||||||
|
|
||||||
|
# Border
|
||||||
|
cv2.rectangle(img, (bx, by), (bx + bw, by + bh), text_color, 1)
|
||||||
|
|
||||||
|
# Text label (truncate if too long)
|
||||||
|
label = text[:30] if len(text) > 30 else text
|
||||||
|
font_scale = 0.35
|
||||||
|
cv2.putText(img, label, (bx + 3, by + bh - 4),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1)
|
||||||
|
|
||||||
|
# Blend overlay at 10% opacity
|
||||||
|
cv2.addWeighted(overlay, 0.1, img, 0.9, 0, img)
|
||||||
|
|
||||||
|
success, result_png = cv2.imencode(".png", img)
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
||||||
|
|
||||||
|
return Response(content=result_png.tobytes(), media_type="image/png")
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ async def create_session_db(
|
|||||||
) VALUES ($1, $2, $3, $4, 'active', 1)
|
) VALUES ($1, $2, $3, $4, 'active', 1)
|
||||||
RETURNING id, name, filename, status, current_step,
|
RETURNING id, name, filename, status, current_step,
|
||||||
deskew_result, dewarp_result, column_result, row_result,
|
deskew_result, dewarp_result, column_result, row_result,
|
||||||
ground_truth, auto_shear_degrees,
|
word_result, ground_truth, auto_shear_degrees,
|
||||||
created_at, updated_at
|
created_at, updated_at
|
||||||
""", uuid.UUID(session_id), name, filename, original_png)
|
""", uuid.UUID(session_id), name, filename, original_png)
|
||||||
|
|
||||||
@@ -94,7 +94,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
|||||||
row = await conn.fetchrow("""
|
row = await conn.fetchrow("""
|
||||||
SELECT id, name, filename, status, current_step,
|
SELECT id, name, filename, status, current_step,
|
||||||
deskew_result, dewarp_result, column_result, row_result,
|
deskew_result, dewarp_result, column_result, row_result,
|
||||||
ground_truth, auto_shear_degrees,
|
word_result, ground_truth, auto_shear_degrees,
|
||||||
created_at, updated_at
|
created_at, updated_at
|
||||||
FROM ocr_pipeline_sessions WHERE id = $1
|
FROM ocr_pipeline_sessions WHERE id = $1
|
||||||
""", uuid.UUID(session_id))
|
""", uuid.UUID(session_id))
|
||||||
@@ -136,10 +136,10 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
|||||||
'name', 'filename', 'status', 'current_step',
|
'name', 'filename', 'status', 'current_step',
|
||||||
'original_png', 'deskewed_png', 'binarized_png', 'dewarped_png',
|
'original_png', 'deskewed_png', 'binarized_png', 'dewarped_png',
|
||||||
'deskew_result', 'dewarp_result', 'column_result', 'row_result',
|
'deskew_result', 'dewarp_result', 'column_result', 'row_result',
|
||||||
'ground_truth', 'auto_shear_degrees',
|
'word_result', 'ground_truth', 'auto_shear_degrees',
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'ground_truth'}
|
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth'}
|
||||||
|
|
||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
if key in allowed_fields:
|
if key in allowed_fields:
|
||||||
@@ -164,7 +164,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
|||||||
WHERE id = ${param_idx}
|
WHERE id = ${param_idx}
|
||||||
RETURNING id, name, filename, status, current_step,
|
RETURNING id, name, filename, status, current_step,
|
||||||
deskew_result, dewarp_result, column_result, row_result,
|
deskew_result, dewarp_result, column_result, row_result,
|
||||||
ground_truth, auto_shear_degrees,
|
word_result, ground_truth, auto_shear_degrees,
|
||||||
created_at, updated_at
|
created_at, updated_at
|
||||||
""", *values)
|
""", *values)
|
||||||
|
|
||||||
@@ -220,7 +220,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
|||||||
result[key] = result[key].isoformat()
|
result[key] = result[key].isoformat()
|
||||||
|
|
||||||
# JSONB → parsed (asyncpg returns str for JSONB)
|
# JSONB → parsed (asyncpg returns str for JSONB)
|
||||||
for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'ground_truth']:
|
for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth']:
|
||||||
if key in result and result[key] is not None:
|
if key in result and result[key] is not None:
|
||||||
if isinstance(result[key], str):
|
if isinstance(result[key], str):
|
||||||
result[key] = json.loads(result[key])
|
result[key] = json.loads(result[key])
|
||||||
|
|||||||
@@ -65,6 +65,7 @@ nav:
|
|||||||
- BYOEH Architektur: services/klausur-service/BYOEH-Architecture.md
|
- BYOEH Architektur: services/klausur-service/BYOEH-Architecture.md
|
||||||
- BYOEH Developer Guide: services/klausur-service/BYOEH-Developer-Guide.md
|
- BYOEH Developer Guide: services/klausur-service/BYOEH-Developer-Guide.md
|
||||||
- NiBiS Pipeline: services/klausur-service/NiBiS-Ingestion-Pipeline.md
|
- NiBiS Pipeline: services/klausur-service/NiBiS-Ingestion-Pipeline.md
|
||||||
|
- OCR Pipeline: services/klausur-service/OCR-Pipeline.md
|
||||||
- OCR Labeling: services/klausur-service/OCR-Labeling-Spec.md
|
- OCR Labeling: services/klausur-service/OCR-Labeling-Spec.md
|
||||||
- OCR Vergleich: services/klausur-service/OCR-Compare.md
|
- OCR Vergleich: services/klausur-service/OCR-Compare.md
|
||||||
- RAG Admin: services/klausur-service/RAG-Admin-Spec.md
|
- RAG Admin: services/klausur-service/RAG-Admin-Spec.md
|
||||||
|
|||||||
Reference in New Issue
Block a user