feat: add Kombi-Modus (PaddleOCR + Tesseract) for OCR Overlay
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 35s
CI / test-go-edu-search (push) Successful in 33s
CI / test-python-klausur (push) Failing after 2m20s
CI / test-python-agent-core (push) Successful in 22s
CI / test-nodejs-website (push) Successful in 41s

Runs both OCR engines on the preprocessed image and merges results:
word boxes matched by IoU, coordinates averaged by confidence weight.
Unmatched Tesseract words (bullets, symbols) are added for better coverage.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-12 20:05:50 +01:00
parent d335a7bbf3
commit e9ccd1e35c
4 changed files with 279 additions and 26 deletions

View File

@@ -11,12 +11,12 @@ import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
const KLAUSUR_API = '/klausur-api'
export default function OcrOverlayPage() {
const [mode, setMode] = useState<'pipeline' | 'paddle-direct'>('pipeline')
const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline')
const [currentStep, setCurrentStep] = useState(0)
const [sessionId, setSessionId] = useState<string | null>(null)
const [sessionName, setSessionName] = useState<string>('')
@@ -63,13 +63,17 @@ export default function OcrOverlayPage() {
setSessionName(data.name || data.filename || '')
setActiveCategory(data.document_category || undefined)
// Check if this session was processed with paddle_direct
const isPaddleDirect = data.word_result?.ocr_engine === 'paddle_direct'
// Check if this session was processed with paddle_direct or kombi
const ocrEngine = data.word_result?.ocr_engine
const isPaddleDirect = ocrEngine === 'paddle_direct'
const isKombi = ocrEngine === 'kombi'
if (isPaddleDirect) {
setMode('paddle-direct')
if (isPaddleDirect || isKombi) {
const m = isKombi ? 'kombi' : 'paddle-direct'
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
setMode(m)
setSteps(
PADDLE_DIRECT_STEPS.map((s, i) => ({
baseSteps.map((s, i) => ({
...s,
status: i < 4 ? 'completed' : i === 4 ? 'active' : 'pending',
})),
@@ -101,7 +105,7 @@ export default function OcrOverlayPage() {
if (sessionId === sid) {
setSessionId(null)
setCurrentStep(0)
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
} catch (e) {
@@ -158,7 +162,7 @@ export default function OcrOverlayPage() {
const handleNext = () => {
if (currentStep >= steps.length - 1) {
// Last step completed — return to session list
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
setCurrentStep(0)
setSessionId(null)
@@ -187,7 +191,7 @@ export default function OcrOverlayPage() {
setSessionId(null)
setSessionName('')
setCurrentStep(0)
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
@@ -226,7 +230,7 @@ export default function OcrOverlayPage() {
}, [sessionId, goToStep])
const renderStep = () => {
if (mode === 'paddle-direct') {
if (mode === 'paddle-direct' || mode === 'kombi') {
switch (currentStep) {
case 0:
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
@@ -237,7 +241,21 @@ export default function OcrOverlayPage() {
case 3:
return <StepCrop sessionId={sessionId} onNext={handleNext} />
case 4:
return <PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
return mode === 'kombi' ? (
<PaddleDirectStep
sessionId={sessionId}
onNext={handleNext}
endpoint="paddle-kombi"
title="Kombi-Modus"
description="PaddleOCR und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
icon="🔀"
buttonLabel="Paddle + Tesseract starten"
runningLabel="Paddle + Tesseract laufen..."
engineKey="kombi"
/>
) : (
<PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
)
default:
return null
}
@@ -480,13 +498,29 @@ export default function OcrOverlayPage() {
>
Paddle Direct (5 Schritte)
</button>
<button
onClick={() => {
if (mode === 'kombi') return
setMode('kombi')
setCurrentStep(0)
setSessionId(null)
setSteps(KOMBI_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}}
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
mode === 'kombi'
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
}`}
>
Kombi (5 Schritte)
</button>
</div>
<PipelineStepper
steps={steps}
currentStep={currentStep}
onStepClick={handleStepClick}
onReprocess={mode === 'pipeline' && sessionId ? reprocessFromStep : undefined}
onReprocess={mode === 'pipeline' && sessionId != null ? reprocessFromStep : undefined}
/>
<div className="min-h-[400px]">{renderStep()}</div>

View File

@@ -60,6 +60,18 @@ export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
{ id: 'paddle-direct', name: 'PaddleOCR + Overlay', icon: '⚡', status: 'pending' },
]
/**
* 5-step pipeline for Kombi mode (PaddleOCR + Tesseract).
* Same preprocessing, then both engines run and results are merged.
*/
export const KOMBI_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'kombi', name: 'Paddle + Tesseract', icon: '🔀', status: 'pending' },
]
/** Map from DB step to overlay UI step index */
export function dbStepToOverlayUi(dbStep: number): number {
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt

View File

@@ -10,14 +10,38 @@ type Phase = 'idle' | 'running' | 'overlay'
interface PaddleDirectStepProps {
sessionId: string | null
onNext: () => void
/** Backend endpoint suffix, default: 'paddle-direct' */
endpoint?: string
/** Title shown in idle state */
title?: string
/** Description shown in idle state */
description?: string
/** Icon shown in idle state */
icon?: string
/** Button label */
buttonLabel?: string
/** Running label */
runningLabel?: string
/** OCR engine key to check for auto-detect */
engineKey?: string
}
export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
export function PaddleDirectStep({
sessionId,
onNext,
endpoint = 'paddle-direct',
title = 'Paddle Direct',
description = 'PaddleOCR erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.',
icon = '⚡',
buttonLabel = 'PaddleOCR starten',
runningLabel = 'PaddleOCR laeuft...',
engineKey = 'paddle_direct',
}: PaddleDirectStepProps) {
const [phase, setPhase] = useState<Phase>('idle')
const [error, setError] = useState<string | null>(null)
const [stats, setStats] = useState<{ cells: number; rows: number; duration: number } | null>(null)
// Auto-detect: if session already has paddle_direct word_result → show overlay
// Auto-detect: if session already has matching word_result → show overlay
useEffect(() => {
if (!sessionId) return
let cancelled = false
@@ -26,7 +50,7 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
if (!res.ok || cancelled) return
const data = await res.json()
if (data.word_result?.ocr_engine === 'paddle_direct') {
if (data.word_result?.ocr_engine === engineKey) {
setPhase('overlay')
}
} catch {
@@ -34,14 +58,14 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
}
})()
return () => { cancelled = true }
}, [sessionId])
}, [sessionId, engineKey])
const runPaddleDirect = useCallback(async () => {
const runOcr = useCallback(async () => {
if (!sessionId) return
setPhase('running')
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/paddle-direct`, {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`, {
method: 'POST',
})
if (!res.ok) {
@@ -59,7 +83,7 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
setPhase('idle')
}
}, [sessionId])
}, [sessionId, endpoint])
if (!sessionId) {
return (
@@ -91,7 +115,7 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
<div className="w-10 h-10 border-4 border-teal-200 dark:border-teal-800 border-t-teal-600 dark:border-t-teal-400 rounded-full animate-spin" />
<div className="text-center space-y-1">
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">
PaddleOCR laeuft...
{runningLabel}
</p>
<p className="text-xs text-gray-400">
Bild wird analysiert (ca. 5-30s)
@@ -101,12 +125,12 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
) : (
<>
<div className="text-center space-y-2">
<div className="text-4xl"></div>
<div className="text-4xl">{icon}</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300">
Paddle Direct
{title}
</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-md">
PaddleOCR erkennt alle Woerter direkt auf dem Originalbild ohne Begradigung, Entzerrung oder Zuschnitt.
{description}
</p>
</div>
@@ -117,10 +141,10 @@ export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
)}
<button
onClick={runPaddleDirect}
onClick={runOcr}
className="px-6 py-2.5 bg-teal-600 text-white text-sm font-medium rounded-lg hover:bg-teal-700 transition-colors"
>
PaddleOCR starten
{buttonLabel}
</button>
</>
)}