feat(ocr-pipeline): add Step 5 word recognition (grid from columns × rows)
Backend: build_word_grid() intersects column regions with content rows, OCRs each cell with language-specific Tesseract, and returns vocabulary entries with percent-based bounding boxes. New endpoints: POST /words, GET /image/words-overlay, ground-truth save/retrieve for words. Frontend: StepWordRecognition with overview + step-through labeling modes, goToStep callback for row correction feedback loop. MkDocs: OCR Pipeline documentation added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -112,6 +112,16 @@ export default function OcrPipelinePage() {
|
||||
}
|
||||
}
|
||||
|
||||
const goToStep = (step: number) => {
|
||||
setCurrentStep(step)
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => ({
|
||||
...s,
|
||||
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (currentStep < steps.length - 1) {
|
||||
setSteps((prev) =>
|
||||
@@ -161,7 +171,7 @@ export default function OcrPipelinePage() {
|
||||
case 3:
|
||||
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 4:
|
||||
return <StepWordRecognition />
|
||||
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
|
||||
case 5:
|
||||
return <StepCoordinates />
|
||||
case 6:
|
||||
|
||||
@@ -29,6 +29,7 @@ export interface SessionInfo {
|
||||
dewarp_result?: DewarpResult
|
||||
column_result?: ColumnResult
|
||||
row_result?: RowResult
|
||||
word_result?: WordResult
|
||||
}
|
||||
|
||||
export interface DeskewResult {
|
||||
@@ -116,6 +117,46 @@ export interface RowGroundTruth {
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface WordBbox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface WordEntry {
|
||||
row_index: number
|
||||
english: string
|
||||
german: string
|
||||
example: string
|
||||
confidence: number
|
||||
bbox: WordBbox
|
||||
bbox_en: WordBbox | null
|
||||
bbox_de: WordBbox | null
|
||||
bbox_ex: WordBbox | null
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
}
|
||||
|
||||
export interface WordResult {
|
||||
entries: WordEntry[]
|
||||
entry_count: number
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
summary: {
|
||||
total_entries: number
|
||||
with_english: number
|
||||
with_german: number
|
||||
low_confidence: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface WordGroundTruth {
|
||||
is_correct: boolean
|
||||
corrected_entries?: WordEntry[]
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export const PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
|
||||
@@ -1,19 +1,602 @@
|
||||
'use client'
|
||||
|
||||
export function StepWordRecognition() {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔤</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 4: Worterkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
OCR mit Bounding Boxes fuer jedes erkannte Wort.
|
||||
Dieser Schritt wird in einer zukuenftigen Version implementiert.
|
||||
</p>
|
||||
<div className="mt-6 px-4 py-2 bg-amber-100 dark:bg-amber-900/30 text-amber-700 dark:text-amber-400 rounded-full text-sm font-medium">
|
||||
Kommt bald
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { WordResult, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepWordRecognitionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
goToStep: (step: number) => void
|
||||
}
|
||||
|
||||
export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRecognitionProps) {
|
||||
const [wordResult, setWordResult] = useState<WordResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [gtNotes, setGtNotes] = useState('')
|
||||
const [gtSaved, setGtSaved] = useState(false)
|
||||
|
||||
// Step-through labeling state
|
||||
const [activeIndex, setActiveIndex] = useState(0)
|
||||
const [editedEntries, setEditedEntries] = useState<WordEntry[]>([])
|
||||
const [mode, setMode] = useState<'overview' | 'labeling'>('overview')
|
||||
|
||||
const enRef = useRef<HTMLInputElement>(null)
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
|
||||
const fetchSession = async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (res.ok) {
|
||||
const info = await res.json()
|
||||
if (info.word_result) {
|
||||
setWordResult(info.word_result)
|
||||
initEntries(info.word_result.entries)
|
||||
return
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to fetch session info:', e)
|
||||
}
|
||||
runAutoDetection()
|
||||
}
|
||||
|
||||
fetchSession()
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
const initEntries = (entries: WordEntry[]) => {
|
||||
setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' })))
|
||||
setActiveIndex(0)
|
||||
}
|
||||
|
||||
const runAutoDetection = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
|
||||
}
|
||||
const data: WordResult = await res.json()
|
||||
setWordResult(data)
|
||||
initEntries(data.entries)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const handleGroundTruth = useCallback(async (isCorrect: boolean) => {
|
||||
if (!sessionId) return
|
||||
const gt: WordGroundTruth = {
|
||||
is_correct: isCorrect,
|
||||
corrected_entries: isCorrect ? undefined : editedEntries,
|
||||
notes: gtNotes || undefined,
|
||||
}
|
||||
try {
|
||||
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/words`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(gt),
|
||||
})
|
||||
setGtSaved(true)
|
||||
} catch (e) {
|
||||
console.error('Ground truth save failed:', e)
|
||||
}
|
||||
}, [sessionId, gtNotes, editedEntries])
|
||||
|
||||
// Step-through: update entry field
|
||||
const updateEntry = (index: number, field: 'english' | 'german' | 'example', value: string) => {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === index ? { ...e, [field]: value, status: 'edited' as const } : e
|
||||
))
|
||||
}
|
||||
|
||||
// Step-through: confirm current entry
|
||||
const confirmEntry = () => {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e
|
||||
))
|
||||
if (activeIndex < editedEntries.length - 1) {
|
||||
setActiveIndex(activeIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Step-through: skip current entry
|
||||
const skipEntry = () => {
|
||||
setEditedEntries(prev => prev.map((e, i) =>
|
||||
i === activeIndex ? { ...e, status: 'skipped' as const } : e
|
||||
))
|
||||
if (activeIndex < editedEntries.length - 1) {
|
||||
setActiveIndex(activeIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Focus english input when active entry changes in labeling mode
|
||||
useEffect(() => {
|
||||
if (mode === 'labeling' && enRef.current) {
|
||||
enRef.current.focus()
|
||||
}
|
||||
}, [activeIndex, mode])
|
||||
|
||||
// Keyboard shortcuts in labeling mode
|
||||
useEffect(() => {
|
||||
if (mode !== 'labeling') return
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
confirmEntry()
|
||||
} else if (e.key === 'Tab' && !e.shiftKey) {
|
||||
// Let Tab move between fields naturally unless on last field
|
||||
} else if (e.key === 'ArrowDown' && e.ctrlKey) {
|
||||
e.preventDefault()
|
||||
skipEntry()
|
||||
} else if (e.key === 'ArrowUp' && e.ctrlKey) {
|
||||
e.preventDefault()
|
||||
if (activeIndex > 0) setActiveIndex(activeIndex - 1)
|
||||
}
|
||||
}
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [mode, activeIndex, editedEntries])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="text-5xl mb-4">🔤</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 5: Worterkennung
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||
Bitte zuerst Schritte 1-4 abschliessen.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`
|
||||
const dewarpedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||
|
||||
const confColor = (conf: number) => {
|
||||
if (conf >= 70) return 'text-green-600 dark:text-green-400'
|
||||
if (conf >= 50) return 'text-yellow-600 dark:text-yellow-400'
|
||||
return 'text-red-600 dark:text-red-400'
|
||||
}
|
||||
|
||||
const statusBadge = (status?: string) => {
|
||||
const map: Record<string, string> = {
|
||||
pending: 'bg-gray-100 dark:bg-gray-700 text-gray-500',
|
||||
confirmed: 'bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-400',
|
||||
edited: 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-400',
|
||||
skipped: 'bg-orange-100 dark:bg-orange-900/30 text-orange-700 dark:text-orange-400',
|
||||
}
|
||||
return map[status || 'pending'] || map.pending
|
||||
}
|
||||
|
||||
const summary = wordResult?.summary
|
||||
const confirmedCount = editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length
|
||||
const totalCount = editedEntries.length
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Worterkennung laeuft...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Mode toggle */}
|
||||
{wordResult && (
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={() => setMode('overview')}
|
||||
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||
mode === 'overview'
|
||||
? 'bg-teal-600 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||
}`}
|
||||
>
|
||||
Uebersicht
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setMode('labeling')}
|
||||
className={`px-3 py-1.5 text-xs rounded-lg font-medium transition-colors ${
|
||||
mode === 'labeling'
|
||||
? 'bg-teal-600 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||
}`}
|
||||
>
|
||||
Labeling ({confirmedCount}/{totalCount})
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Overview mode: side-by-side images + entry list */}
|
||||
{mode === 'overview' && (
|
||||
<>
|
||||
{/* Images: overlay vs clean */}
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Mit Grid-Overlay
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{wordResult ? (
|
||||
// eslint-disable-next-line @next/next/no-img-element
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Wort-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
) : (
|
||||
<div className="aspect-[3/4] flex items-center justify-center text-gray-400 text-sm">
|
||||
{detecting ? 'Erkenne Woerter...' : 'Keine Daten'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Entzerrtes Bild
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={dewarpedUrl}
|
||||
alt="Entzerrt"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Result summary */}
|
||||
{wordResult && summary && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Ergebnis: {summary.total_entries} Eintraege erkannt
|
||||
</h4>
|
||||
<span className="text-xs text-gray-400">
|
||||
{wordResult.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Summary badges */}
|
||||
<div className="flex gap-2 flex-wrap">
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300">
|
||||
EN: {summary.with_english}
|
||||
</span>
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300">
|
||||
DE: {summary.with_german}
|
||||
</span>
|
||||
{summary.low_confidence > 0 && (
|
||||
<span className="px-2 py-0.5 rounded text-xs font-medium bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300">
|
||||
Unsicher: {summary.low_confidence}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Entry table */}
|
||||
<div className="max-h-80 overflow-y-auto">
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-8">#</th>
|
||||
<th className="py-1 pr-2">English</th>
|
||||
<th className="py-1 pr-2">Deutsch</th>
|
||||
<th className="py-1 pr-2">Example</th>
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{editedEntries.map((entry, idx) => (
|
||||
<tr
|
||||
key={idx}
|
||||
className={`border-b dark:border-gray-700/50 ${
|
||||
idx === activeIndex ? 'bg-teal-50 dark:bg-teal-900/20' : ''
|
||||
}`}
|
||||
onClick={() => { setActiveIndex(idx); setMode('labeling') }}
|
||||
>
|
||||
<td className="py-1 pr-2 text-gray-400">{idx + 1}</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
{entry.english || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300 cursor-pointer">
|
||||
{entry.german || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||
</td>
|
||||
<td className="py-1 pr-2 font-mono text-gray-500 dark:text-gray-400 cursor-pointer max-w-[200px] truncate">
|
||||
{entry.example || <span className="text-gray-300 dark:text-gray-600">—</span>}
|
||||
</td>
|
||||
<td className={`py-1 text-right font-mono ${confColor(entry.confidence)}`}>
|
||||
{entry.confidence}%
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Labeling mode: image crop + editable fields */}
|
||||
{mode === 'labeling' && editedEntries.length > 0 && (
|
||||
<div className="grid grid-cols-3 gap-4">
|
||||
{/* Left 2/3: Image with highlighted active row */}
|
||||
<div className="col-span-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Eintrag {activeIndex + 1} von {editedEntries.length}
|
||||
</div>
|
||||
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 relative">
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={`${overlayUrl}?t=${Date.now()}`}
|
||||
alt="Wort-Overlay"
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
{/* Highlight overlay for active entry bbox */}
|
||||
{editedEntries[activeIndex]?.bbox && (
|
||||
<div
|
||||
className="absolute border-2 border-yellow-400 bg-yellow-400/10 pointer-events-none"
|
||||
style={{
|
||||
left: `${editedEntries[activeIndex].bbox.x}%`,
|
||||
top: `${editedEntries[activeIndex].bbox.y}%`,
|
||||
width: `${editedEntries[activeIndex].bbox.w}%`,
|
||||
height: `${editedEntries[activeIndex].bbox.h}%`,
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right 1/3: Editable entry fields */}
|
||||
<div className="space-y-3">
|
||||
{/* Navigation */}
|
||||
<div className="flex items-center justify-between">
|
||||
<button
|
||||
onClick={() => setActiveIndex(Math.max(0, activeIndex - 1))}
|
||||
disabled={activeIndex === 0}
|
||||
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||
>
|
||||
Zurueck
|
||||
</button>
|
||||
<span className="text-xs text-gray-500">{activeIndex + 1} / {editedEntries.length}</span>
|
||||
<button
|
||||
onClick={() => setActiveIndex(Math.min(editedEntries.length - 1, activeIndex + 1))}
|
||||
disabled={activeIndex >= editedEntries.length - 1}
|
||||
className="px-2 py-1 text-xs border rounded hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-30"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Status badge */}
|
||||
<div className="flex items-center gap-2">
|
||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${statusBadge(editedEntries[activeIndex]?.status)}`}>
|
||||
{editedEntries[activeIndex]?.status || 'pending'}
|
||||
</span>
|
||||
<span className={`text-xs font-mono ${confColor(editedEntries[activeIndex]?.confidence || 0)}`}>
|
||||
{editedEntries[activeIndex]?.confidence}% Konfidenz
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Cell crops */}
|
||||
{editedEntries[activeIndex]?.bbox_en && (
|
||||
<div>
|
||||
<div className="text-[10px] font-medium text-blue-500 mb-0.5">EN-Zelle</div>
|
||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
|
||||
<CellCrop
|
||||
imageUrl={dewarpedUrl}
|
||||
bbox={editedEntries[activeIndex].bbox_en!}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{editedEntries[activeIndex]?.bbox_de && (
|
||||
<div>
|
||||
<div className="text-[10px] font-medium text-green-500 mb-0.5">DE-Zelle</div>
|
||||
<div className="border rounded dark:border-gray-700 overflow-hidden bg-white dark:bg-gray-900 h-10 relative">
|
||||
<CellCrop
|
||||
imageUrl={dewarpedUrl}
|
||||
bbox={editedEntries[activeIndex].bbox_de!}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Editable fields */}
|
||||
<div className="space-y-2">
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">English</label>
|
||||
<input
|
||||
ref={enRef}
|
||||
type="text"
|
||||
value={editedEntries[activeIndex]?.english || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'english', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Deutsch</label>
|
||||
<input
|
||||
type="text"
|
||||
value={editedEntries[activeIndex]?.german || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'german', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[10px] font-medium text-gray-500 dark:text-gray-400">Example</label>
|
||||
<input
|
||||
type="text"
|
||||
value={editedEntries[activeIndex]?.example || ''}
|
||||
onChange={(e) => updateEntry(activeIndex, 'example', e.target.value)}
|
||||
className="w-full px-2 py-1.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600 font-mono"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={confirmEntry}
|
||||
className="flex-1 px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700 font-medium"
|
||||
>
|
||||
Bestaetigen (Enter)
|
||||
</button>
|
||||
<button
|
||||
onClick={skipEntry}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600"
|
||||
>
|
||||
Skip
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Shortcuts hint */}
|
||||
<div className="text-[10px] text-gray-400 space-y-0.5">
|
||||
<div>Enter = Bestaetigen & weiter</div>
|
||||
<div>Ctrl+↓ = Ueberspringen</div>
|
||||
<div>Ctrl+↑ = Zurueck</div>
|
||||
</div>
|
||||
|
||||
{/* Entry list (compact) */}
|
||||
<div className="border-t dark:border-gray-700 pt-2 mt-2">
|
||||
<div className="text-[10px] font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||
Alle Eintraege
|
||||
</div>
|
||||
<div className="max-h-48 overflow-y-auto space-y-0.5">
|
||||
{editedEntries.map((entry, idx) => (
|
||||
<div
|
||||
key={idx}
|
||||
onClick={() => setActiveIndex(idx)}
|
||||
className={`flex items-center gap-1 px-2 py-1 rounded text-[10px] cursor-pointer transition-colors ${
|
||||
idx === activeIndex
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
|
||||
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
|
||||
}`}
|
||||
>
|
||||
<span className="w-4 text-right text-gray-400">{idx + 1}</span>
|
||||
<span className={`w-2 h-2 rounded-full ${
|
||||
entry.status === 'confirmed' ? 'bg-green-500' :
|
||||
entry.status === 'edited' ? 'bg-blue-500' :
|
||||
entry.status === 'skipped' ? 'bg-orange-400' :
|
||||
'bg-gray-300 dark:bg-gray-600'
|
||||
}`} />
|
||||
<span className="truncate text-gray-600 dark:text-gray-400 font-mono">
|
||||
{entry.english || '—'} → {entry.german || '—'}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
{wordResult && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center gap-3 flex-wrap">
|
||||
<button
|
||||
onClick={() => runAutoDetection()}
|
||||
disabled={detecting}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={() => goToStep(3)}
|
||||
className="px-3 py-1.5 text-xs border rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 dark:border-gray-600 text-orange-600 dark:text-orange-400 border-orange-300 dark:border-orange-700"
|
||||
>
|
||||
Zeilen korrigieren (Step 4)
|
||||
</button>
|
||||
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Ground truth */}
|
||||
{!gtSaved ? (
|
||||
<>
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Notizen (optional)"
|
||||
value={gtNotes}
|
||||
onChange={(e) => setGtNotes(e.target.value)}
|
||||
className="px-2 py-1 text-xs border rounded dark:bg-gray-700 dark:border-gray-600 w-48"
|
||||
/>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(true)}
|
||||
className="px-3 py-1.5 text-xs bg-green-600 text-white rounded-lg hover:bg-green-700"
|
||||
>
|
||||
Korrekt
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleGroundTruth(false)}
|
||||
className="px-3 py-1.5 text-xs bg-red-600 text-white rounded-lg hover:bg-red-700"
|
||||
>
|
||||
Fehlerhaft
|
||||
</button>
|
||||
</>
|
||||
) : (
|
||||
<span className="text-xs text-green-600 dark:text-green-400">
|
||||
Ground Truth gespeichert
|
||||
</span>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-1.5 text-xs bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium"
|
||||
>
|
||||
Weiter
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* CellCrop: Shows a cropped portion of the dewarped image based on percent bbox.
|
||||
* Uses CSS background-image + background-position for efficient cropping.
|
||||
*/
|
||||
function CellCrop({ imageUrl, bbox }: { imageUrl: string; bbox: { x: number; y: number; w: number; h: number } }) {
|
||||
// Scale factor: how much to zoom into the cell
|
||||
const scaleX = 100 / bbox.w
|
||||
const scaleY = 100 / bbox.h
|
||||
const scale = Math.min(scaleX, scaleY, 8) // Cap zoom at 8x
|
||||
|
||||
return (
|
||||
<div
|
||||
className="w-full h-full"
|
||||
style={{
|
||||
backgroundImage: `url(${imageUrl})`,
|
||||
backgroundSize: `${scale * 100}%`,
|
||||
backgroundPosition: `${-bbox.x * scale}% ${-bbox.y * scale}%`,
|
||||
backgroundRepeat: 'no-repeat',
|
||||
}}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
373
docs-src/services/klausur-service/OCR-Pipeline.md
Normal file
373
docs-src/services/klausur-service/OCR-Pipeline.md
Normal file
@@ -0,0 +1,373 @@
|
||||
# OCR Pipeline - Schrittweise Seitenrekonstruktion
|
||||
|
||||
**Version:** 1.0.0
|
||||
**Status:** In Entwicklung
|
||||
**URL:** https://macmini:3002/ai/ocr-pipeline
|
||||
|
||||
## Uebersicht
|
||||
|
||||
Die OCR Pipeline zerlegt den OCR-Prozess in **8 einzelne Schritte**, um eingescannte Vokabelseiten Wort fuer Wort zu rekonstruieren. Jeder Schritt kann individuell geprueft, korrigiert und mit Ground-Truth-Daten versehen werden.
|
||||
|
||||
**Ziel:** 10 Vokabelseiten fehlerfrei rekonstruieren.
|
||||
|
||||
### Pipeline-Schritte
|
||||
|
||||
| Schritt | Name | Beschreibung | Status |
|
||||
|---------|------|--------------|--------|
|
||||
| 1 | Begradigung (Deskew) | Scan begradigen (Hough Lines + Word Alignment) | Implementiert |
|
||||
| 2 | Entzerrung (Dewarp) | Buchwoelbung entzerren (Vertikalkanten-Analyse) | Implementiert |
|
||||
| 3 | Spaltenerkennung | Unsichtbare Spalten finden (Projektionsprofile) | Implementiert |
|
||||
| 4 | Zeilenerkennung | Horizontale Zeilen + Kopf-/Fusszeilen-Klassifikation | Implementiert |
|
||||
| 5 | Worterkennung | Grid aus Spalten x Zeilen, OCR pro Zelle | Implementiert |
|
||||
| 6 | Koordinatenzuweisung | Exakte Positionen innerhalb Zellen | Geplant |
|
||||
| 7 | Seitenrekonstruktion | Seite nachbauen aus Koordinaten | Geplant |
|
||||
| 8 | Ground Truth Validierung | Gesamtpruefung aller Schritte | Geplant |
|
||||
|
||||
---
|
||||
|
||||
## Architektur
|
||||
|
||||
```
|
||||
Admin-Lehrer (Next.js) klausur-service (FastAPI :8086)
|
||||
┌────────────────────┐ ┌─────────────────────────────┐
|
||||
│ /ai/ocr-pipeline │ │ /api/v1/ocr-pipeline/ │
|
||||
│ │ REST │ │
|
||||
│ PipelineStepper │◄────────►│ Sessions CRUD │
|
||||
│ StepDeskew │ │ Image Serving │
|
||||
│ StepDewarp │ │ Deskew/Dewarp/Columns/Rows │
|
||||
│ StepColumnDetection│ │ Word Recognition │
|
||||
│ StepRowDetection │ │ Ground Truth │
|
||||
│ StepWordRecognition│ │ Overlay Images │
|
||||
└────────────────────┘ └─────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────┐
|
||||
│ PostgreSQL │
|
||||
│ ocr_pipeline_sessions│
|
||||
│ (Images + JSONB) │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
### Dateistruktur
|
||||
|
||||
```
|
||||
klausur-service/backend/
|
||||
├── ocr_pipeline_api.py # FastAPI Router (alle Endpoints)
|
||||
├── ocr_pipeline_session_store.py # PostgreSQL Persistence
|
||||
├── cv_vocab_pipeline.py # Computer Vision Algorithmen
|
||||
└── migrations/
|
||||
├── 002_ocr_pipeline_sessions.sql # Basis-Schema
|
||||
├── 003_add_row_result.sql # Row-Result Spalte
|
||||
└── 004_add_word_result.sql # Word-Result Spalte
|
||||
|
||||
admin-lehrer/
|
||||
├── app/(admin)/ai/ocr-pipeline/
|
||||
│ ├── page.tsx # Haupt-Page mit Session-Management
|
||||
│ └── types.ts # TypeScript Interfaces
|
||||
└── components/ocr-pipeline/
|
||||
├── PipelineStepper.tsx # Fortschritts-Stepper
|
||||
├── StepDeskew.tsx # Schritt 1
|
||||
├── StepDewarp.tsx # Schritt 2
|
||||
├── StepColumnDetection.tsx # Schritt 3
|
||||
├── StepRowDetection.tsx # Schritt 4
|
||||
├── StepWordRecognition.tsx # Schritt 5
|
||||
├── StepCoordinates.tsx # Schritt 6 (Platzhalter)
|
||||
├── StepReconstruction.tsx # Schritt 7 (Platzhalter)
|
||||
└── StepGroundTruth.tsx # Schritt 8 (Platzhalter)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API-Referenz
|
||||
|
||||
Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
|
||||
### Sessions
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions` | Neue Session erstellen (Bild hochladen) |
|
||||
| `GET` | `/sessions` | Alle Sessions auflisten |
|
||||
| `GET` | `/sessions/{id}` | Session-Info mit allen Step-Results |
|
||||
| `PUT` | `/sessions/{id}` | Session umbenennen |
|
||||
| `DELETE` | `/sessions/{id}` | Session loeschen |
|
||||
|
||||
### Bilder
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `GET` | `/sessions/{id}/image/original` | Originalbild |
|
||||
| `GET` | `/sessions/{id}/image/deskewed` | Begradigtes Bild |
|
||||
| `GET` | `/sessions/{id}/image/dewarped` | Entzerrtes Bild |
|
||||
| `GET` | `/sessions/{id}/image/binarized` | Binarisiertes Bild |
|
||||
| `GET` | `/sessions/{id}/image/columns-overlay` | Spalten-Overlay |
|
||||
| `GET` | `/sessions/{id}/image/rows-overlay` | Zeilen-Overlay |
|
||||
| `GET` | `/sessions/{id}/image/words-overlay` | Wort-Grid-Overlay |
|
||||
|
||||
### Schritt 1: Begradigung
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/deskew` | Automatische Begradigung |
|
||||
| `POST` | `/sessions/{id}/deskew/manual` | Manuelle Winkelkorrektur |
|
||||
| `POST` | `/sessions/{id}/ground-truth/deskew` | Ground Truth speichern |
|
||||
|
||||
### Schritt 2: Entzerrung
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/dewarp` | Automatische Entzerrung |
|
||||
| `POST` | `/sessions/{id}/dewarp/manual` | Manueller Scherbungswinkel |
|
||||
| `POST` | `/sessions/{id}/ground-truth/dewarp` | Ground Truth speichern |
|
||||
|
||||
### Schritt 3: Spalten
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/columns` | Automatische Spaltenerkennung |
|
||||
| `POST` | `/sessions/{id}/columns/manual` | Manuelle Spalten-Definition |
|
||||
| `POST` | `/sessions/{id}/ground-truth/columns` | Ground Truth speichern |
|
||||
|
||||
### Schritt 4: Zeilen
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/rows` | Automatische Zeilenerkennung |
|
||||
| `POST` | `/sessions/{id}/rows/manual` | Manuelle Zeilen-Definition |
|
||||
| `POST` | `/sessions/{id}/ground-truth/rows` | Ground Truth speichern |
|
||||
| `GET` | `/sessions/{id}/ground-truth/rows` | Ground Truth abrufen |
|
||||
|
||||
### Schritt 5: Worterkennung
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/words` | Wort-Grid aus Spalten x Zeilen erstellen |
|
||||
| `POST` | `/sessions/{id}/ground-truth/words` | Ground Truth speichern |
|
||||
| `GET` | `/sessions/{id}/ground-truth/words` | Ground Truth abrufen |
|
||||
|
||||
---
|
||||
|
||||
## Schritt 5: Worterkennung (Detail)
|
||||
|
||||
### Algorithmus: `build_word_grid()`
|
||||
|
||||
Schritt 5 nutzt die Ergebnisse von Schritt 3 (Spalten) und Schritt 4 (Zeilen), um ein Grid zu erstellen und jede Zelle per OCR auszulesen.
|
||||
|
||||
```
|
||||
Spalten (Step 3): column_en | column_de | column_example
|
||||
───────────┼─────────────┼────────────────
|
||||
Zeilen (Step 4): R0 │ hello │ hallo │ Hello, World!
|
||||
R1 │ world │ Welt │ The whole world
|
||||
R2 │ book │ Buch │ Read a book
|
||||
───────────┼─────────────┼────────────────
|
||||
```
|
||||
|
||||
**Ablauf:**
|
||||
|
||||
1. **Filterung**: Nur `content`-Zeilen (kein Header/Footer) und relevante Spalten (`column_en`, `column_de`, `column_example`)
|
||||
2. **Zell-Bildung**: Pro content-Zeile x pro relevante Spalte eine `PageRegion` berechnen
|
||||
3. **OCR**: `ocr_region()` mit PSM 7 (Single Line) pro Zelle aufrufen
|
||||
4. **Sprache**: `eng` fuer EN-Spalte, `deu` fuer DE-Spalte, `eng+deu` fuer Beispiele
|
||||
5. **Gruppierung**: Zellen zu Vokabel-Eintraegen zusammenfuehren
|
||||
|
||||
### Response-Format
|
||||
|
||||
```json
|
||||
{
|
||||
"entries": [
|
||||
{
|
||||
"row_index": 0,
|
||||
"english": "hello",
|
||||
"german": "hallo",
|
||||
"example": "Hello, how are you?",
|
||||
"confidence": 85.3,
|
||||
"bbox": {"x": 5.2, "y": 12.1, "w": 90.0, "h": 2.8},
|
||||
"bbox_en": {"x": 5.2, "y": 12.1, "w": 30.0, "h": 2.8},
|
||||
"bbox_de": {"x": 35.5, "y": 12.1, "w": 25.0, "h": 2.8},
|
||||
"bbox_ex": {"x": 61.0, "y": 12.1, "w": 34.2, "h": 2.8}
|
||||
}
|
||||
],
|
||||
"entry_count": 25,
|
||||
"image_width": 2480,
|
||||
"image_height": 3508,
|
||||
"duration_seconds": 3.2,
|
||||
"summary": {
|
||||
"total_entries": 25,
|
||||
"with_english": 24,
|
||||
"with_german": 22,
|
||||
"low_confidence": 3
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
!!! info "Bounding Boxes in Prozent"
|
||||
Alle `bbox`-Werte sind Prozent (0-100) relativ zur Bildgroesse.
|
||||
Das erleichtert die Darstellung im Frontend unabhaengig von der Bildaufloesung.
|
||||
|
||||
### Frontend: StepWordRecognition
|
||||
|
||||
Die Komponente bietet zwei Modi:
|
||||
|
||||
**Uebersicht-Modus:**
|
||||
|
||||
- Zwei Bilder nebeneinander: Grid-Overlay vs. sauberes Bild
|
||||
- Tabelle aller erkannten Eintraege mit Konfidenz-Werten
|
||||
- Klick auf Eintrag wechselt zum Labeling-Modus
|
||||
|
||||
**Labeling-Modus (Step-Through):**
|
||||
|
||||
- Links (2/3): Bild mit hervorgehobenem aktiven Eintrag (gelber Rahmen)
|
||||
- Rechts (1/3): Zell-Ausschnitte + editierbare Felder (English, Deutsch, Example)
|
||||
- Tastaturkuerzel:
|
||||
- `Enter` = Bestaetigen und weiter
|
||||
- `Ctrl+Pfeil runter` = Ueberspringen
|
||||
- `Ctrl+Pfeil hoch` = Zurueck
|
||||
|
||||
**Feedback-Loop:**
|
||||
|
||||
- "Zeilen korrigieren" springt zurueck zu Schritt 4
|
||||
- Nach Korrektur der Zeilen kann Schritt 5 erneut ausgefuehrt werden
|
||||
|
||||
---
|
||||
|
||||
## Datenbank-Schema
|
||||
|
||||
```sql
|
||||
CREATE TABLE ocr_pipeline_sessions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
name VARCHAR(255),
|
||||
filename VARCHAR(255),
|
||||
status VARCHAR(50) DEFAULT 'active',
|
||||
current_step INT DEFAULT 1,
|
||||
|
||||
-- Bilder (BYTEA)
|
||||
original_png BYTEA,
|
||||
deskewed_png BYTEA,
|
||||
binarized_png BYTEA,
|
||||
dewarped_png BYTEA,
|
||||
|
||||
-- Step-Results (JSONB)
|
||||
deskew_result JSONB,
|
||||
dewarp_result JSONB,
|
||||
column_result JSONB,
|
||||
row_result JSONB,
|
||||
word_result JSONB,
|
||||
|
||||
-- Ground Truth + Meta
|
||||
ground_truth JSONB,
|
||||
auto_shear_degrees REAL,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
```
|
||||
|
||||
### Migrationen
|
||||
|
||||
| Datei | Beschreibung |
|
||||
|-------|--------------|
|
||||
| `002_ocr_pipeline_sessions.sql` | Basis-Schema (Steps 1-3) |
|
||||
| `003_add_row_result.sql` | `row_result JSONB` fuer Step 4 |
|
||||
| `004_add_word_result.sql` | `word_result JSONB` fuer Step 5 |
|
||||
|
||||
---
|
||||
|
||||
## TypeScript Interfaces
|
||||
|
||||
Die wichtigsten Typen in `types.ts`:
|
||||
|
||||
```typescript
|
||||
interface WordEntry {
|
||||
row_index: number
|
||||
english: string
|
||||
german: string
|
||||
example: string
|
||||
confidence: number
|
||||
bbox: WordBbox // Gesamte Zeile
|
||||
bbox_en: WordBbox | null // EN-Zelle
|
||||
bbox_de: WordBbox | null // DE-Zelle
|
||||
bbox_ex: WordBbox | null // Example-Zelle
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
}
|
||||
|
||||
interface WordResult {
|
||||
entries: WordEntry[]
|
||||
entry_count: number
|
||||
image_width: number
|
||||
image_height: number
|
||||
duration_seconds: number
|
||||
summary: {
|
||||
total_entries: number
|
||||
with_english: number
|
||||
with_german: number
|
||||
low_confidence: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Ground Truth System
|
||||
|
||||
Jeder Schritt kann mit Ground-Truth-Feedback versehen werden:
|
||||
|
||||
```json
|
||||
{
|
||||
"is_correct": false,
|
||||
"corrected_entries": [...],
|
||||
"notes": "Zeile 5 falsch erkannt",
|
||||
"saved_at": "2026-02-28T10:30:00"
|
||||
}
|
||||
```
|
||||
|
||||
Ground-Truth-Daten werden in der `ground_truth` JSONB-Spalte gespeichert, gruppiert nach Schritt:
|
||||
|
||||
```json
|
||||
{
|
||||
"deskew": { "is_correct": true, ... },
|
||||
"dewarp": { "is_correct": true, ... },
|
||||
"columns": { "is_correct": false, ... },
|
||||
"rows": { "is_correct": true, ... },
|
||||
"words": { "is_correct": false, ... }
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Deployment
|
||||
|
||||
```bash
|
||||
# 1. Git push
|
||||
git push origin main && git push gitea main
|
||||
|
||||
# 2. Mac Mini pull + build
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && git pull --no-rebase origin main"
|
||||
|
||||
# klausur-service (Backend)
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && \
|
||||
/usr/local/bin/docker compose build --no-cache klausur-service && \
|
||||
/usr/local/bin/docker compose up -d klausur-service"
|
||||
|
||||
# admin-lehrer (Frontend)
|
||||
ssh macmini "cd /Users/benjaminadmin/Projekte/breakpilot-lehrer && \
|
||||
/usr/local/bin/docker compose build --no-cache admin-lehrer && \
|
||||
/usr/local/bin/docker compose up -d admin-lehrer"
|
||||
|
||||
# 3. Migration ausfuehren
|
||||
ssh macmini "/usr/local/bin/docker exec bp-lehrer-klausur-service \
|
||||
python -c \"import asyncio; from ocr_pipeline_session_store import *; asyncio.run(init_ocr_pipeline_tables())\""
|
||||
|
||||
# 4. Testen unter:
|
||||
# https://macmini:3002/ai/ocr-pipeline
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Aenderungshistorie
|
||||
|
||||
| Datum | Version | Aenderung |
|
||||
|-------|---------|----------|
|
||||
| 2026-02-28 | 1.0.0 | Schritt 5 (Worterkennung) implementiert |
|
||||
| 2026-02-22 | 0.4.0 | Schritt 4 (Zeilenerkennung) implementiert |
|
||||
| 2026-02-20 | 0.3.0 | Schritt 3 (Spaltenerkennung) mit Typ-Klassifikation |
|
||||
| 2026-02-15 | 0.2.0 | Schritt 2 (Entzerrung/Dewarp) |
|
||||
| 2026-02-12 | 0.1.0 | Schritt 1 (Begradigung/Deskew) + Session-Management |
|
||||
@@ -2169,6 +2169,142 @@ def analyze_layout_by_words(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Li
|
||||
return regions
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pipeline Step 5: Word Grid from Columns × Rows
|
||||
# =============================================================================
|
||||
|
||||
def build_word_grid(
|
||||
ocr_img: np.ndarray,
|
||||
column_regions: List[PageRegion],
|
||||
row_geometries: List[RowGeometry],
|
||||
img_w: int,
|
||||
img_h: int,
|
||||
lang: str = "eng+deu",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build a word grid by intersecting columns and rows, then OCR each cell.
|
||||
|
||||
Args:
|
||||
ocr_img: Binarized full-page image.
|
||||
column_regions: Classified columns from Step 3 (PageRegion list).
|
||||
row_geometries: Rows from Step 4 (RowGeometry list).
|
||||
img_w: Image width in pixels.
|
||||
img_h: Image height in pixels.
|
||||
lang: Default Tesseract language.
|
||||
|
||||
Returns:
|
||||
List of entry dicts with english/german/example text and bbox info (percent).
|
||||
"""
|
||||
# Filter to content rows only (skip header/footer)
|
||||
content_rows = [r for r in row_geometries if r.row_type == 'content']
|
||||
if not content_rows:
|
||||
logger.warning("build_word_grid: no content rows found")
|
||||
return []
|
||||
|
||||
# Map column types to roles
|
||||
VOCAB_COLUMN_TYPES = {'column_en', 'column_de', 'column_example'}
|
||||
relevant_cols = [c for c in column_regions if c.type in VOCAB_COLUMN_TYPES]
|
||||
if not relevant_cols:
|
||||
logger.warning("build_word_grid: no relevant vocabulary columns found")
|
||||
return []
|
||||
|
||||
# Sort columns left-to-right
|
||||
relevant_cols.sort(key=lambda c: c.x)
|
||||
|
||||
# Choose OCR language per column type
|
||||
lang_map = {
|
||||
'column_en': 'eng',
|
||||
'column_de': 'deu',
|
||||
'column_example': 'eng+deu',
|
||||
}
|
||||
|
||||
entries: List[Dict[str, Any]] = []
|
||||
|
||||
for row_idx, row in enumerate(content_rows):
|
||||
entry: Dict[str, Any] = {
|
||||
'row_index': row_idx,
|
||||
'english': '',
|
||||
'german': '',
|
||||
'example': '',
|
||||
'confidence': 0.0,
|
||||
'bbox': {
|
||||
'x': round(row.x / img_w * 100, 2),
|
||||
'y': round(row.y / img_h * 100, 2),
|
||||
'w': round(row.width / img_w * 100, 2),
|
||||
'h': round(row.height / img_h * 100, 2),
|
||||
},
|
||||
'bbox_en': None,
|
||||
'bbox_de': None,
|
||||
'bbox_ex': None,
|
||||
}
|
||||
|
||||
confidences: List[float] = []
|
||||
|
||||
for col in relevant_cols:
|
||||
# Compute cell region: column x/width, row y/height
|
||||
cell_x = col.x
|
||||
cell_y = row.y
|
||||
cell_w = col.width
|
||||
cell_h = row.height
|
||||
|
||||
# Clamp to image bounds
|
||||
cell_x = max(0, cell_x)
|
||||
cell_y = max(0, cell_y)
|
||||
if cell_x + cell_w > img_w:
|
||||
cell_w = img_w - cell_x
|
||||
if cell_y + cell_h > img_h:
|
||||
cell_h = img_h - cell_y
|
||||
|
||||
if cell_w <= 0 or cell_h <= 0:
|
||||
continue
|
||||
|
||||
cell_region = PageRegion(
|
||||
type=col.type,
|
||||
x=cell_x, y=cell_y,
|
||||
width=cell_w, height=cell_h,
|
||||
)
|
||||
|
||||
cell_lang = lang_map.get(col.type, lang)
|
||||
words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=7)
|
||||
|
||||
# Sort words by x position, join to text
|
||||
words.sort(key=lambda w: w['left'])
|
||||
text = ' '.join(w['text'] for w in words)
|
||||
if words:
|
||||
avg_conf = sum(w['conf'] for w in words) / len(words)
|
||||
confidences.append(avg_conf)
|
||||
|
||||
# Bbox in percent
|
||||
cell_bbox = {
|
||||
'x': round(cell_x / img_w * 100, 2),
|
||||
'y': round(cell_y / img_h * 100, 2),
|
||||
'w': round(cell_w / img_w * 100, 2),
|
||||
'h': round(cell_h / img_h * 100, 2),
|
||||
}
|
||||
|
||||
if col.type == 'column_en':
|
||||
entry['english'] = text
|
||||
entry['bbox_en'] = cell_bbox
|
||||
elif col.type == 'column_de':
|
||||
entry['german'] = text
|
||||
entry['bbox_de'] = cell_bbox
|
||||
elif col.type == 'column_example':
|
||||
entry['example'] = text
|
||||
entry['bbox_ex'] = cell_bbox
|
||||
|
||||
entry['confidence'] = round(
|
||||
sum(confidences) / len(confidences), 1
|
||||
) if confidences else 0.0
|
||||
|
||||
# Only include if at least one field has text
|
||||
if entry['english'] or entry['german'] or entry['example']:
|
||||
entries.append(entry)
|
||||
|
||||
logger.info(f"build_word_grid: {len(entries)} entries from "
|
||||
f"{len(content_rows)} content rows × {len(relevant_cols)} columns")
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Stage 6: Multi-Pass OCR
|
||||
# =============================================================================
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Migration 004: Add word_result column for OCR Pipeline Step 5
|
||||
-- Stores the word recognition grid result (entries with english/german/example + bboxes)
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS word_result JSONB;
|
||||
@@ -29,8 +29,11 @@ from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cv_vocab_pipeline import (
|
||||
PageRegion,
|
||||
RowGeometry,
|
||||
analyze_layout,
|
||||
analyze_layout_by_words,
|
||||
build_word_grid,
|
||||
classify_column_types,
|
||||
create_layout_image,
|
||||
create_ocr_image,
|
||||
@@ -261,6 +264,10 @@ async def get_session_info(session_id: str):
|
||||
result["dewarp_result"] = session["dewarp_result"]
|
||||
if session.get("column_result"):
|
||||
result["column_result"] = session["column_result"]
|
||||
if session.get("row_result"):
|
||||
result["row_result"] = session["row_result"]
|
||||
if session.get("word_result"):
|
||||
result["word_result"] = session["word_result"]
|
||||
|
||||
return result
|
||||
|
||||
@@ -291,7 +298,7 @@ async def delete_session(session_id: str):
|
||||
@router.get("/sessions/{session_id}/image/{image_type}")
|
||||
async def get_image(session_id: str, image_type: str):
|
||||
"""Serve session images: original, deskewed, dewarped, binarized, columns-overlay, or rows-overlay."""
|
||||
valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay", "rows-overlay"}
|
||||
valid_types = {"original", "deskewed", "dewarped", "binarized", "columns-overlay", "rows-overlay", "words-overlay"}
|
||||
if image_type not in valid_types:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown image type: {image_type}")
|
||||
|
||||
@@ -301,6 +308,9 @@ async def get_image(session_id: str, image_type: str):
|
||||
if image_type == "rows-overlay":
|
||||
return await _get_rows_overlay(session_id)
|
||||
|
||||
if image_type == "words-overlay":
|
||||
return await _get_words_overlay(session_id)
|
||||
|
||||
# Try cache first for fast serving
|
||||
cached = _cache.get(session_id)
|
||||
if cached:
|
||||
@@ -992,6 +1002,153 @@ async def get_row_ground_truth(session_id: str):
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Word Recognition Endpoints (Step 5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/words")
|
||||
async def detect_words(session_id: str):
|
||||
"""Build word grid from columns × rows, OCR each cell."""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
dewarped_bgr = cached.get("dewarped_bgr")
|
||||
if dewarped_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Dewarp must be completed before word detection")
|
||||
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
column_result = session.get("column_result")
|
||||
row_result = session.get("row_result")
|
||||
if not column_result or not column_result.get("columns"):
|
||||
raise HTTPException(status_code=400, detail="Column detection must be completed first")
|
||||
if not row_result or not row_result.get("rows"):
|
||||
raise HTTPException(status_code=400, detail="Row detection must be completed first")
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
# Create binarized OCR image
|
||||
ocr_img = create_ocr_image(dewarped_bgr)
|
||||
img_h, img_w = dewarped_bgr.shape[:2]
|
||||
|
||||
# Convert column dicts back to PageRegion objects
|
||||
col_regions = [
|
||||
PageRegion(
|
||||
type=c["type"],
|
||||
x=c["x"], y=c["y"],
|
||||
width=c["width"], height=c["height"],
|
||||
classification_confidence=c.get("classification_confidence", 1.0),
|
||||
classification_method=c.get("classification_method", ""),
|
||||
)
|
||||
for c in column_result["columns"]
|
||||
]
|
||||
|
||||
# Convert row dicts back to RowGeometry objects
|
||||
row_geoms = [
|
||||
RowGeometry(
|
||||
index=r["index"],
|
||||
x=r["x"], y=r["y"],
|
||||
width=r["width"], height=r["height"],
|
||||
word_count=r.get("word_count", 0),
|
||||
words=[],
|
||||
row_type=r.get("row_type", "content"),
|
||||
gap_before=r.get("gap_before", 0),
|
||||
)
|
||||
for r in row_result["rows"]
|
||||
]
|
||||
|
||||
# Build word grid
|
||||
entries = build_word_grid(ocr_img, col_regions, row_geoms, img_w, img_h)
|
||||
duration = time.time() - t0
|
||||
|
||||
# Build summary
|
||||
summary = {
|
||||
"total_entries": len(entries),
|
||||
"with_english": sum(1 for e in entries if e.get("english")),
|
||||
"with_german": sum(1 for e in entries if e.get("german")),
|
||||
"low_confidence": sum(1 for e in entries if e.get("confidence", 0) < 50),
|
||||
}
|
||||
|
||||
word_result = {
|
||||
"entries": entries,
|
||||
"entry_count": len(entries),
|
||||
"image_width": img_w,
|
||||
"image_height": img_h,
|
||||
"duration_seconds": round(duration, 2),
|
||||
"summary": summary,
|
||||
}
|
||||
|
||||
# Persist to DB
|
||||
await update_session_db(
|
||||
session_id,
|
||||
word_result=word_result,
|
||||
current_step=5,
|
||||
)
|
||||
|
||||
cached["word_result"] = word_result
|
||||
|
||||
logger.info(f"OCR Pipeline: words session {session_id}: "
|
||||
f"{len(entries)} entries ({duration:.2f}s), summary: {summary}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
**word_result,
|
||||
}
|
||||
|
||||
|
||||
class WordGroundTruthRequest(BaseModel):
|
||||
is_correct: bool
|
||||
corrected_entries: Optional[List[Dict[str, Any]]] = None
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/words")
|
||||
async def save_word_ground_truth(session_id: str, req: WordGroundTruthRequest):
|
||||
"""Save ground truth feedback for the word recognition step."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
gt = {
|
||||
"is_correct": req.is_correct,
|
||||
"corrected_entries": req.corrected_entries,
|
||||
"notes": req.notes,
|
||||
"saved_at": datetime.utcnow().isoformat(),
|
||||
"word_result": session.get("word_result"),
|
||||
}
|
||||
ground_truth["words"] = gt
|
||||
|
||||
await update_session_db(session_id, ground_truth=ground_truth)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["ground_truth"] = ground_truth
|
||||
|
||||
return {"session_id": session_id, "ground_truth": gt}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/ground-truth/words")
|
||||
async def get_word_ground_truth(session_id: str):
|
||||
"""Retrieve saved ground truth for word recognition."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
words_gt = ground_truth.get("words")
|
||||
if not words_gt:
|
||||
raise HTTPException(status_code=404, detail="No word ground truth saved")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"words_gt": words_gt,
|
||||
"words_auto": session.get("word_result"),
|
||||
}
|
||||
|
||||
|
||||
async def _get_rows_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with row bands drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
@@ -1049,3 +1206,106 @@ async def _get_rows_overlay(session_id: str) -> Response:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
||||
|
||||
return Response(content=result_png.tobytes(), media_type="image/png")
|
||||
|
||||
|
||||
async def _get_words_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with word grid cells drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
word_result = session.get("word_result")
|
||||
if not word_result or not word_result.get("entries"):
|
||||
raise HTTPException(status_code=404, detail="No word data available")
|
||||
|
||||
column_result = session.get("column_result")
|
||||
row_result = session.get("row_result")
|
||||
|
||||
# Load dewarped image
|
||||
dewarped_png = await get_session_image(session_id, "dewarped")
|
||||
if not dewarped_png:
|
||||
raise HTTPException(status_code=404, detail="Dewarped image not available")
|
||||
|
||||
arr = np.frombuffer(dewarped_png, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
raise HTTPException(status_code=500, detail="Failed to decode image")
|
||||
|
||||
img_h, img_w = img.shape[:2]
|
||||
|
||||
# Color map for cell types (BGR)
|
||||
cell_colors = {
|
||||
"column_en": (255, 180, 0), # Blue
|
||||
"column_de": (0, 200, 0), # Green
|
||||
"column_example": (0, 140, 255), # Orange
|
||||
}
|
||||
|
||||
overlay = img.copy()
|
||||
|
||||
# Draw column divider lines (vertical)
|
||||
if column_result and column_result.get("columns"):
|
||||
for col in column_result["columns"]:
|
||||
col_type = col.get("type", "")
|
||||
if col_type in cell_colors:
|
||||
cx = col["x"]
|
||||
cv2.line(img, (cx, 0), (cx, img_h), cell_colors[col_type], 1)
|
||||
cx_end = col["x"] + col["width"]
|
||||
cv2.line(img, (cx_end, 0), (cx_end, img_h), cell_colors[col_type], 1)
|
||||
|
||||
# Draw row divider lines (horizontal) for content rows
|
||||
if row_result and row_result.get("rows"):
|
||||
for row in row_result["rows"]:
|
||||
if row.get("row_type") == "content":
|
||||
ry = row["y"]
|
||||
cv2.line(img, (0, ry), (img_w, ry), (180, 180, 180), 1)
|
||||
|
||||
# Draw entry cells with text labels
|
||||
entries = word_result["entries"]
|
||||
for entry in entries:
|
||||
conf = entry.get("confidence", 0)
|
||||
# Color by confidence: green > 70, yellow 50-70, red < 50
|
||||
if conf >= 70:
|
||||
text_color = (0, 180, 0)
|
||||
elif conf >= 50:
|
||||
text_color = (0, 180, 220)
|
||||
else:
|
||||
text_color = (0, 0, 220)
|
||||
|
||||
for bbox_key, field_key, col_type in [
|
||||
("bbox_en", "english", "column_en"),
|
||||
("bbox_de", "german", "column_de"),
|
||||
("bbox_ex", "example", "column_example"),
|
||||
]:
|
||||
bbox = entry.get(bbox_key)
|
||||
text = entry.get(field_key, "")
|
||||
if not bbox or not text:
|
||||
continue
|
||||
|
||||
# Convert percent to pixels
|
||||
bx = int(bbox["x"] / 100 * img_w)
|
||||
by = int(bbox["y"] / 100 * img_h)
|
||||
bw = int(bbox["w"] / 100 * img_w)
|
||||
bh = int(bbox["h"] / 100 * img_h)
|
||||
|
||||
color = cell_colors.get(col_type, (200, 200, 200))
|
||||
|
||||
# Semi-transparent fill
|
||||
cv2.rectangle(overlay, (bx, by), (bx + bw, by + bh), color, -1)
|
||||
|
||||
# Border
|
||||
cv2.rectangle(img, (bx, by), (bx + bw, by + bh), text_color, 1)
|
||||
|
||||
# Text label (truncate if too long)
|
||||
label = text[:30] if len(text) > 30 else text
|
||||
font_scale = 0.35
|
||||
cv2.putText(img, label, (bx + 3, by + bh - 4),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1)
|
||||
|
||||
# Blend overlay at 10% opacity
|
||||
cv2.addWeighted(overlay, 0.1, img, 0.9, 0, img)
|
||||
|
||||
success, result_png = cv2.imencode(".png", img)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
||||
|
||||
return Response(content=result_png.tobytes(), media_type="image/png")
|
||||
|
||||
@@ -80,7 +80,7 @@ async def create_session_db(
|
||||
) VALUES ($1, $2, $3, $4, 'active', 1)
|
||||
RETURNING id, name, filename, status, current_step,
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
ground_truth, auto_shear_degrees,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
created_at, updated_at
|
||||
""", uuid.UUID(session_id), name, filename, original_png)
|
||||
|
||||
@@ -94,7 +94,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT id, name, filename, status, current_step,
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
ground_truth, auto_shear_degrees,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions WHERE id = $1
|
||||
""", uuid.UUID(session_id))
|
||||
@@ -136,10 +136,10 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
'name', 'filename', 'status', 'current_step',
|
||||
'original_png', 'deskewed_png', 'binarized_png', 'dewarped_png',
|
||||
'deskew_result', 'dewarp_result', 'column_result', 'row_result',
|
||||
'ground_truth', 'auto_shear_degrees',
|
||||
'word_result', 'ground_truth', 'auto_shear_degrees',
|
||||
}
|
||||
|
||||
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'ground_truth'}
|
||||
jsonb_fields = {'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth'}
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if key in allowed_fields:
|
||||
@@ -164,7 +164,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
WHERE id = ${param_idx}
|
||||
RETURNING id, name, filename, status, current_step,
|
||||
deskew_result, dewarp_result, column_result, row_result,
|
||||
ground_truth, auto_shear_degrees,
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
created_at, updated_at
|
||||
""", *values)
|
||||
|
||||
@@ -220,7 +220,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
||||
result[key] = result[key].isoformat()
|
||||
|
||||
# JSONB → parsed (asyncpg returns str for JSONB)
|
||||
for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'ground_truth']:
|
||||
for key in ['deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth']:
|
||||
if key in result and result[key] is not None:
|
||||
if isinstance(result[key], str):
|
||||
result[key] = json.loads(result[key])
|
||||
|
||||
@@ -65,6 +65,7 @@ nav:
|
||||
- BYOEH Architektur: services/klausur-service/BYOEH-Architecture.md
|
||||
- BYOEH Developer Guide: services/klausur-service/BYOEH-Developer-Guide.md
|
||||
- NiBiS Pipeline: services/klausur-service/NiBiS-Ingestion-Pipeline.md
|
||||
- OCR Pipeline: services/klausur-service/OCR-Pipeline.md
|
||||
- OCR Labeling: services/klausur-service/OCR-Labeling-Spec.md
|
||||
- OCR Vergleich: services/klausur-service/OCR-Compare.md
|
||||
- RAG Admin: services/klausur-service/RAG-Admin-Spec.md
|
||||
|
||||
Reference in New Issue
Block a user