Implementiert Buchwoelbungs-Entzerrung mit zwei Methoden: - Methode A: Vertikale-Kanten-Analyse (Sobel + Polynom 2. Grades) - Methode B: Textzeilen-Baseline (Tesseract + Baseline-Kruemmung) Beste Methode wird automatisch gewaehlt, manueller Slider (-3 bis +3). Backend: 3 neue Endpoints (auto/manual dewarp, ground truth) Frontend: StepDewarp + DewarpControls, Pipeline von 6 auf 7 Schritte Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
93 lines
3.1 KiB
TypeScript
93 lines
3.1 KiB
TypeScript
'use client'
|
|
|
|
import { useState } from 'react'
|
|
import { PagePurpose } from '@/components/common/PagePurpose'
|
|
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
|
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
|
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
|
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
|
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
|
import { StepCoordinates } from '@/components/ocr-pipeline/StepCoordinates'
|
|
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
|
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
|
import { PIPELINE_STEPS, type PipelineStep } from './types'
|
|
|
|
export default function OcrPipelinePage() {
|
|
const [currentStep, setCurrentStep] = useState(0)
|
|
const [sessionId, setSessionId] = useState<string | null>(null)
|
|
const [steps, setSteps] = useState<PipelineStep[]>(
|
|
PIPELINE_STEPS.map((s, i) => ({
|
|
...s,
|
|
status: i === 0 ? 'active' : 'pending',
|
|
})),
|
|
)
|
|
|
|
const handleStepClick = (index: number) => {
|
|
if (index <= currentStep || steps[index].status === 'completed') {
|
|
setCurrentStep(index)
|
|
}
|
|
}
|
|
|
|
const handleNext = () => {
|
|
if (currentStep < steps.length - 1) {
|
|
setSteps((prev) =>
|
|
prev.map((s, i) => {
|
|
if (i === currentStep) return { ...s, status: 'completed' }
|
|
if (i === currentStep + 1) return { ...s, status: 'active' }
|
|
return s
|
|
}),
|
|
)
|
|
setCurrentStep((prev) => prev + 1)
|
|
}
|
|
}
|
|
|
|
const handleDeskewComplete = (sid: string) => {
|
|
setSessionId(sid)
|
|
handleNext()
|
|
}
|
|
|
|
const renderStep = () => {
|
|
switch (currentStep) {
|
|
case 0:
|
|
return <StepDeskew onNext={handleDeskewComplete} />
|
|
case 1:
|
|
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
|
case 2:
|
|
return <StepColumnDetection />
|
|
case 3:
|
|
return <StepWordRecognition />
|
|
case 4:
|
|
return <StepCoordinates />
|
|
case 5:
|
|
return <StepReconstruction />
|
|
case 6:
|
|
return <StepGroundTruth />
|
|
default:
|
|
return null
|
|
}
|
|
}
|
|
|
|
return (
|
|
<div className="space-y-6">
|
|
<PagePurpose
|
|
title="OCR Pipeline"
|
|
purpose="Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. Ziel: 10 Vokabelseiten fehlerfrei rekonstruieren."
|
|
audience={['Entwickler', 'Data Scientists']}
|
|
architecture={{
|
|
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
|
|
databases: ['In-Memory Sessions'],
|
|
}}
|
|
relatedPages={[
|
|
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
|
|
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Trainingsdaten' },
|
|
]}
|
|
defaultCollapsed
|
|
/>
|
|
|
|
<PipelineStepper steps={steps} currentStep={currentStep} onStepClick={handleStepClick} />
|
|
|
|
<div className="min-h-[400px]">{renderStep()}</div>
|
|
</div>
|
|
)
|
|
}
|