refactor: rename PaddleOCR → PP-OCRv5 in frontend, remove Kombi-Vergleich tab
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m53s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m53s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s
Since ocr_region_paddle() now runs RapidOCR locally (same PP-OCRv5 models), the "PaddleOCR (Hetzner)" labels were misleading. Renamed to "PP-OCRv5 (lokal)". Removed the Kombi-Vergleich tab since both sides would produce identical results. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,13 +11,12 @@ import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
|||||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||||
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
||||||
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||||
import { KombiCompareStep } from '@/components/ocr-overlay/KombiCompareStep'
|
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
||||||
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, KOMBI_COMPARE_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
export default function OcrOverlayPage() {
|
export default function OcrOverlayPage() {
|
||||||
const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi' | 'kombi-compare'>('pipeline')
|
const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline')
|
||||||
const [currentStep, setCurrentStep] = useState(0)
|
const [currentStep, setCurrentStep] = useState(0)
|
||||||
const [sessionId, setSessionId] = useState<string | null>(null)
|
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||||
const [sessionName, setSessionName] = useState<string>('')
|
const [sessionName, setSessionName] = useState<string>('')
|
||||||
@@ -67,12 +66,11 @@ export default function OcrOverlayPage() {
|
|||||||
// Check if this session was processed with paddle_direct, kombi, or rapid_kombi
|
// Check if this session was processed with paddle_direct, kombi, or rapid_kombi
|
||||||
const ocrEngine = data.word_result?.ocr_engine
|
const ocrEngine = data.word_result?.ocr_engine
|
||||||
const isPaddleDirect = ocrEngine === 'paddle_direct'
|
const isPaddleDirect = ocrEngine === 'paddle_direct'
|
||||||
const isKombi = ocrEngine === 'kombi'
|
const isKombi = ocrEngine === 'kombi' || ocrEngine === 'rapid_kombi'
|
||||||
const isRapidKombi = ocrEngine === 'rapid_kombi'
|
|
||||||
|
|
||||||
if (isPaddleDirect || isKombi || isRapidKombi) {
|
if (isPaddleDirect || isKombi) {
|
||||||
const m = isKombi ? 'kombi' : isPaddleDirect ? 'paddle-direct' : 'kombi-compare'
|
const m = isKombi ? 'kombi' : 'paddle-direct'
|
||||||
const baseSteps = isKombi ? KOMBI_STEPS : isRapidKombi ? KOMBI_COMPARE_STEPS : PADDLE_DIRECT_STEPS
|
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
|
||||||
setMode(m)
|
setMode(m)
|
||||||
setSteps(
|
setSteps(
|
||||||
baseSteps.map((s, i) => ({
|
baseSteps.map((s, i) => ({
|
||||||
@@ -107,7 +105,7 @@ export default function OcrOverlayPage() {
|
|||||||
if (sessionId === sid) {
|
if (sessionId === sid) {
|
||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -164,7 +162,7 @@ export default function OcrOverlayPage() {
|
|||||||
const handleNext = () => {
|
const handleNext = () => {
|
||||||
if (currentStep >= steps.length - 1) {
|
if (currentStep >= steps.length - 1) {
|
||||||
// Last step completed — return to session list
|
// Last step completed — return to session list
|
||||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
@@ -193,7 +191,7 @@ export default function OcrOverlayPage() {
|
|||||||
setSessionId(null)
|
setSessionId(null)
|
||||||
setSessionName('')
|
setSessionName('')
|
||||||
setCurrentStep(0)
|
setCurrentStep(0)
|
||||||
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
|
||||||
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -232,7 +230,7 @@ export default function OcrOverlayPage() {
|
|||||||
}, [sessionId, goToStep])
|
}, [sessionId, goToStep])
|
||||||
|
|
||||||
const renderStep = () => {
|
const renderStep = () => {
|
||||||
if (mode === 'paddle-direct' || mode === 'kombi' || mode === 'kombi-compare') {
|
if (mode === 'paddle-direct' || mode === 'kombi') {
|
||||||
switch (currentStep) {
|
switch (currentStep) {
|
||||||
case 0:
|
case 0:
|
||||||
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
return <StepOrientation sessionId={sessionId} onNext={handleOrientationComplete} />
|
||||||
@@ -243,19 +241,16 @@ export default function OcrOverlayPage() {
|
|||||||
case 3:
|
case 3:
|
||||||
return <StepCrop sessionId={sessionId} onNext={handleNext} />
|
return <StepCrop sessionId={sessionId} onNext={handleNext} />
|
||||||
case 4:
|
case 4:
|
||||||
if (mode === 'kombi-compare') {
|
|
||||||
return <KombiCompareStep sessionId={sessionId} onNext={handleNext} />
|
|
||||||
}
|
|
||||||
return mode === 'kombi' ? (
|
return mode === 'kombi' ? (
|
||||||
<PaddleDirectStep
|
<PaddleDirectStep
|
||||||
sessionId={sessionId}
|
sessionId={sessionId}
|
||||||
onNext={handleNext}
|
onNext={handleNext}
|
||||||
endpoint="paddle-kombi"
|
endpoint="paddle-kombi"
|
||||||
title="Kombi-Modus"
|
title="Kombi-Modus"
|
||||||
description="PaddleOCR und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
|
description="PP-OCRv5 und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
|
||||||
icon="🔀"
|
icon="🔀"
|
||||||
buttonLabel="Paddle + Tesseract starten"
|
buttonLabel="PP-OCRv5 + Tesseract starten"
|
||||||
runningLabel="Paddle + Tesseract laufen..."
|
runningLabel="PP-OCRv5 + Tesseract laufen..."
|
||||||
engineKey="kombi"
|
engineKey="kombi"
|
||||||
/>
|
/>
|
||||||
) : (
|
) : (
|
||||||
@@ -501,7 +496,7 @@ export default function OcrOverlayPage() {
|
|||||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
Paddle Direct (5 Schritte)
|
PP-OCRv5 Direct (5 Schritte)
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
@@ -519,22 +514,6 @@ export default function OcrOverlayPage() {
|
|||||||
>
|
>
|
||||||
Kombi (5 Schritte)
|
Kombi (5 Schritte)
|
||||||
</button>
|
</button>
|
||||||
<button
|
|
||||||
onClick={() => {
|
|
||||||
if (mode === 'kombi-compare') return
|
|
||||||
setMode('kombi-compare')
|
|
||||||
setCurrentStep(0)
|
|
||||||
setSessionId(null)
|
|
||||||
setSteps(KOMBI_COMPARE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
|
||||||
}}
|
|
||||||
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
|
||||||
mode === 'kombi-compare'
|
|
||||||
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
|
|
||||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
|
||||||
}`}
|
|
||||||
>
|
|
||||||
Vergleich (5 Schritte)
|
|
||||||
</button>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<PipelineStepper
|
<PipelineStepper
|
||||||
|
|||||||
@@ -57,11 +57,11 @@ export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
|
|||||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||||
{ id: 'paddle-direct', name: 'PaddleOCR + Overlay', icon: '⚡', status: 'pending' },
|
{ id: 'paddle-direct', name: 'PP-OCRv5 + Overlay', icon: '⚡', status: 'pending' },
|
||||||
]
|
]
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 5-step pipeline for Kombi mode (PaddleOCR + Tesseract).
|
* 5-step pipeline for Kombi mode (PP-OCRv5 + Tesseract).
|
||||||
* Same preprocessing, then both engines run and results are merged.
|
* Same preprocessing, then both engines run and results are merged.
|
||||||
*/
|
*/
|
||||||
export const KOMBI_STEPS: PipelineStep[] = [
|
export const KOMBI_STEPS: PipelineStep[] = [
|
||||||
@@ -69,19 +69,7 @@ export const KOMBI_STEPS: PipelineStep[] = [
|
|||||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||||
{ id: 'kombi', name: 'Paddle + Tesseract', icon: '🔀', status: 'pending' },
|
{ id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: '🔀', status: 'pending' },
|
||||||
]
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 5-step pipeline for Kombi-Vergleich mode (Paddle-Kombi vs Rapid-Kombi side-by-side).
|
|
||||||
* Same preprocessing, then both kombi engines run in parallel and are shown side-by-side.
|
|
||||||
*/
|
|
||||||
export const KOMBI_COMPARE_STEPS: PipelineStep[] = [
|
|
||||||
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
|
|
||||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
|
||||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
|
||||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
|
||||||
{ id: 'kombi-compare', name: 'Kombi-Vergleich', icon: '⚖️', status: 'pending' },
|
|
||||||
]
|
]
|
||||||
|
|
||||||
/** Map from DB step to overlay UI step index */
|
/** Map from DB step to overlay UI step index */
|
||||||
|
|||||||
@@ -30,11 +30,11 @@ export function PaddleDirectStep({
|
|||||||
sessionId,
|
sessionId,
|
||||||
onNext,
|
onNext,
|
||||||
endpoint = 'paddle-direct',
|
endpoint = 'paddle-direct',
|
||||||
title = 'Paddle Direct',
|
title = 'PP-OCRv5 Direct',
|
||||||
description = 'PaddleOCR erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.',
|
description = 'PP-OCRv5 (lokal via RapidOCR) erkennt alle Woerter direkt auf dem Originalbild — ohne Begradigung, Entzerrung oder Zuschnitt.',
|
||||||
icon = '⚡',
|
icon = '⚡',
|
||||||
buttonLabel = 'PaddleOCR starten',
|
buttonLabel = 'PP-OCRv5 starten',
|
||||||
runningLabel = 'PaddleOCR laeuft...',
|
runningLabel = 'PP-OCRv5 laeuft...',
|
||||||
engineKey = 'paddle_direct',
|
engineKey = 'paddle_direct',
|
||||||
}: PaddleDirectStepProps) {
|
}: PaddleDirectStepProps) {
|
||||||
const [phase, setPhase] = useState<Phase>('idle')
|
const [phase, setPhase] = useState<Phase>('idle')
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps
|
|||||||
setGridResult(null)
|
setGridResult(null)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// PaddleOCR forces words_first on the backend, so align frontend accordingly
|
// PP-OCRv5 forces words_first on the backend, so align frontend accordingly
|
||||||
const effectiveGridMethod = eng === 'paddle' ? 'words_first' : gridMethod
|
const effectiveGridMethod = eng === 'paddle' ? 'words_first' : gridMethod
|
||||||
const useStream = effectiveGridMethod === 'v2'
|
const useStream = effectiveGridMethod === 'v2'
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps
|
|||||||
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
|
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
|
||||||
}
|
}
|
||||||
|
|
||||||
// words_first / paddle returns plain JSON (no streaming)
|
// words_first / pp-ocrv5 returns plain JSON (no streaming)
|
||||||
if (!useStream) {
|
if (!useStream) {
|
||||||
const data = await res.json() as GridResult
|
const data = await res.json() as GridResult
|
||||||
applyGridResult(data)
|
applyGridResult(data)
|
||||||
@@ -820,7 +820,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps
|
|||||||
<option value="auto">Auto (RapidOCR wenn verfuegbar)</option>
|
<option value="auto">Auto (RapidOCR wenn verfuegbar)</option>
|
||||||
<option value="rapid">RapidOCR (ONNX)</option>
|
<option value="rapid">RapidOCR (ONNX)</option>
|
||||||
<option value="tesseract">Tesseract</option>
|
<option value="tesseract">Tesseract</option>
|
||||||
<option value="paddle">PaddleOCR (Hetzner)</option>
|
<option value="paddle">PP-OCRv5 (lokal)</option>
|
||||||
</select>
|
</select>
|
||||||
|
|
||||||
{/* Pronunciation selector (only for vocab) */}
|
{/* Pronunciation selector (only for vocab) */}
|
||||||
@@ -846,13 +846,11 @@ export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps
|
|||||||
{/* Show which engine was used */}
|
{/* Show which engine was used */}
|
||||||
{usedEngine && (
|
{usedEngine && (
|
||||||
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
|
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
|
||||||
usedEngine === 'rapid'
|
usedEngine === 'rapid' || usedEngine === 'paddle'
|
||||||
? 'bg-purple-100 dark:bg-purple-900/30 text-purple-700 dark:text-purple-300'
|
? 'bg-purple-100 dark:bg-purple-900/30 text-purple-700 dark:text-purple-300'
|
||||||
: usedEngine === 'paddle'
|
|
||||||
? 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300'
|
|
||||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
|
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||||
}`}>
|
}`}>
|
||||||
{usedEngine}
|
{usedEngine === 'paddle' ? 'pp-ocrv5' : usedEngine}
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user