feat: Paddle Direct — 1-click OCR without deskew/dewarp/crop
Some checks failed
CI / go-lint (push) Has been cancelled
CI / python-lint (push) Has been cancelled
CI / nodejs-lint (push) Has been cancelled
CI / test-go-school (push) Has been cancelled
CI / test-go-edu-search (push) Has been cancelled
CI / test-python-klausur (push) Has been cancelled
CI / test-python-agent-core (push) Has been cancelled
CI / test-nodejs-website (push) Has been cancelled
Some checks failed
CI / go-lint (push) Has been cancelled
CI / python-lint (push) Has been cancelled
CI / nodejs-lint (push) Has been cancelled
CI / test-go-school (push) Has been cancelled
CI / test-go-edu-search (push) Has been cancelled
CI / test-python-klausur (push) Has been cancelled
CI / test-python-agent-core (push) Has been cancelled
CI / test-nodejs-website (push) Has been cancelled
New 2-step mode (Upload → PaddleOCR+Overlay) alongside the existing 7-step pipeline. Backend endpoint runs PaddleOCR on the original image and clusters words into rows/cells directly. Frontend adds a mode toggle and PaddleDirectStep component. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,11 +10,13 @@ import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
|
||||
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
||||
import { OVERLAY_PIPELINE_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
||||
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
export default function OcrOverlayPage() {
|
||||
const [mode, setMode] = useState<'pipeline' | 'paddle-direct'>('pipeline')
|
||||
const [currentStep, setCurrentStep] = useState(0)
|
||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||
const [sessionName, setSessionName] = useState<string>('')
|
||||
@@ -61,17 +63,32 @@ export default function OcrOverlayPage() {
|
||||
setSessionName(data.name || data.filename || '')
|
||||
setActiveCategory(data.document_category || undefined)
|
||||
|
||||
// Map DB step to overlay UI step
|
||||
const dbStep = data.current_step || 1
|
||||
const uiStep = dbStepToOverlayUi(dbStep)
|
||||
// Check if this session was processed with paddle_direct
|
||||
const isPaddleDirect = data.word_result?.ocr_engine === 'paddle_direct'
|
||||
|
||||
setSteps(
|
||||
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
if (isPaddleDirect) {
|
||||
setMode('paddle-direct')
|
||||
setSteps(
|
||||
PADDLE_DIRECT_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i < 1 ? 'completed' : i === 1 ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(1)
|
||||
} else {
|
||||
setMode('pipeline')
|
||||
// Map DB step to overlay UI step
|
||||
const dbStep = data.current_step || 1
|
||||
const uiStep = dbStepToOverlayUi(dbStep)
|
||||
|
||||
setSteps(
|
||||
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to open session:', e)
|
||||
}
|
||||
@@ -84,12 +101,13 @@ export default function OcrOverlayPage() {
|
||||
if (sessionId === sid) {
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to delete session:', e)
|
||||
}
|
||||
}, [sessionId])
|
||||
}, [sessionId, mode])
|
||||
|
||||
const renameSession = useCallback(async (sid: string, newName: string) => {
|
||||
try {
|
||||
@@ -140,7 +158,8 @@ export default function OcrOverlayPage() {
|
||||
const handleNext = () => {
|
||||
if (currentStep >= steps.length - 1) {
|
||||
// Last step completed — return to session list
|
||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
loadSessions()
|
||||
@@ -168,7 +187,8 @@ export default function OcrOverlayPage() {
|
||||
setSessionId(null)
|
||||
setSessionName('')
|
||||
setCurrentStep(0)
|
||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
const baseSteps = mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
|
||||
const stepNames: Record<number, string> = {
|
||||
@@ -206,6 +226,16 @@ export default function OcrOverlayPage() {
|
||||
}, [sessionId, goToStep])
|
||||
|
||||
const renderStep = () => {
|
||||
if (mode === 'paddle-direct') {
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
case 1:
|
||||
return <PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
switch (currentStep) {
|
||||
case 0:
|
||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||
@@ -410,11 +440,47 @@ export default function OcrOverlayPage() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Mode Toggle */}
|
||||
<div className="flex items-center gap-1 bg-gray-100 dark:bg-gray-800 rounded-lg p-1 w-fit">
|
||||
<button
|
||||
onClick={() => {
|
||||
if (mode === 'pipeline') return
|
||||
setMode('pipeline')
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}}
|
||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
mode === 'pipeline'
|
||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Pipeline (7 Schritte)
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
if (mode === 'paddle-direct') return
|
||||
setMode('paddle-direct')
|
||||
setCurrentStep(0)
|
||||
setSessionId(null)
|
||||
setSteps(PADDLE_DIRECT_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}}
|
||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
mode === 'paddle-direct'
|
||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Paddle Direct (2 Schritte)
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<PipelineStepper
|
||||
steps={steps}
|
||||
currentStep={currentStep}
|
||||
onStepClick={handleStepClick}
|
||||
onReprocess={sessionId ? reprocessFromStep : undefined}
|
||||
onReprocess={mode === 'pipeline' && sessionId ? reprocessFromStep : undefined}
|
||||
/>
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
|
||||
@@ -48,6 +48,15 @@ export const OVERLAY_UI_TO_DB: Record<number, number> = {
|
||||
6: 9, // reconstruction
|
||||
}
|
||||
|
||||
/**
|
||||
* 2-step pipeline for Paddle Direct mode.
|
||||
* Upload → PaddleOCR+Overlay (skips deskew/dewarp/crop/rows)
|
||||
*/
|
||||
export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
|
||||
{ id: 'orientation', name: 'Upload', icon: '📤', status: 'pending' },
|
||||
{ id: 'paddle-direct', name: 'PaddleOCR + Overlay', icon: '⚡', status: 'pending' },
|
||||
]
|
||||
|
||||
/** Map from DB step to overlay UI step index */
|
||||
export function dbStepToOverlayUi(dbStep: number): number {
|
||||
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
|
||||
|
||||
129
admin-lehrer/components/ocr-overlay/PaddleDirectStep.tsx
Normal file
129
admin-lehrer/components/ocr-overlay/PaddleDirectStep.tsx
Normal file
@@ -0,0 +1,129 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { OverlayReconstruction } from './OverlayReconstruction'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
type Phase = 'idle' | 'running' | 'overlay'
|
||||
|
||||
interface PaddleDirectStepProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
export function PaddleDirectStep({ sessionId, onNext }: PaddleDirectStepProps) {
|
||||
const [phase, setPhase] = useState<Phase>('idle')
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [stats, setStats] = useState<{ cells: number; rows: number; duration: number } | null>(null)
|
||||
|
||||
// Auto-detect: if session already has paddle_direct word_result → show overlay
|
||||
useEffect(() => {
|
||||
if (!sessionId) return
|
||||
let cancelled = false
|
||||
;(async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (!res.ok || cancelled) return
|
||||
const data = await res.json()
|
||||
if (data.word_result?.ocr_engine === 'paddle_direct') {
|
||||
setPhase('overlay')
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
})()
|
||||
return () => { cancelled = true }
|
||||
}, [sessionId])
|
||||
|
||||
const runPaddleDirect = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setPhase('running')
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/paddle-direct`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
setStats({
|
||||
cells: data.summary?.total_cells || 0,
|
||||
rows: data.grid_shape?.rows || 0,
|
||||
duration: data.duration_seconds || 0,
|
||||
})
|
||||
setPhase('overlay')
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
setPhase('idle')
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="text-sm text-gray-400 py-8 text-center">
|
||||
Bitte zuerst ein Bild hochladen.
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (phase === 'overlay') {
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
{stats && (
|
||||
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span>{stats.cells} Woerter erkannt</span>
|
||||
<span>{stats.rows} Zeilen</span>
|
||||
<span>{stats.duration.toFixed(1)}s</span>
|
||||
</div>
|
||||
)}
|
||||
<OverlayReconstruction sessionId={sessionId} onNext={onNext} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 space-y-6">
|
||||
{phase === 'running' ? (
|
||||
<>
|
||||
<div className="w-10 h-10 border-4 border-teal-200 dark:border-teal-800 border-t-teal-600 dark:border-t-teal-400 rounded-full animate-spin" />
|
||||
<div className="text-center space-y-1">
|
||||
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
PaddleOCR laeuft...
|
||||
</p>
|
||||
<p className="text-xs text-gray-400">
|
||||
Bild wird analysiert (ca. 5-30s)
|
||||
</p>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div className="text-center space-y-2">
|
||||
<div className="text-4xl">⚡</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300">
|
||||
Paddle Direct
|
||||
</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-md">
|
||||
PaddleOCR erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="text-sm text-red-500 bg-red-50 dark:bg-red-900/20 px-4 py-2 rounded-lg">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={runPaddleDirect}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white text-sm font-medium rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
PaddleOCR starten
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user