feat: Dewarp-Korrektur als Schritt 2 in OCR Pipeline (7 Schritte)
Implementiert Buchwoelbungs-Entzerrung mit zwei Methoden: - Methode A: Vertikale-Kanten-Analyse (Sobel + Polynom 2. Grades) - Methode B: Textzeilen-Baseline (Tesseract + Baseline-Kruemmung) Beste Methode wird automatisch gewaehlt, manueller Slider (-3 bis +3). Backend: 3 neue Endpoints (auto/manual dewarp, ground truth) Frontend: StepDewarp + DewarpControls, Pipeline von 6 auf 7 Schritte Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ import { useState } from 'react'
|
|||||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||||
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
|
||||||
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||||
|
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||||
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
||||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||||
import { StepCoordinates } from '@/components/ocr-pipeline/StepCoordinates'
|
import { StepCoordinates } from '@/components/ocr-pipeline/StepCoordinates'
|
||||||
@@ -13,6 +14,7 @@ import { PIPELINE_STEPS, type PipelineStep } from './types'
|
|||||||
|
|
||||||
export default function OcrPipelinePage() {
|
export default function OcrPipelinePage() {
|
||||||
const [currentStep, setCurrentStep] = useState(0)
|
const [currentStep, setCurrentStep] = useState(0)
|
||||||
|
const [sessionId, setSessionId] = useState<string | null>(null)
|
||||||
const [steps, setSteps] = useState<PipelineStep[]>(
|
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||||
PIPELINE_STEPS.map((s, i) => ({
|
PIPELINE_STEPS.map((s, i) => ({
|
||||||
...s,
|
...s,
|
||||||
@@ -39,19 +41,26 @@ export default function OcrPipelinePage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const handleDeskewComplete = (sid: string) => {
|
||||||
|
setSessionId(sid)
|
||||||
|
handleNext()
|
||||||
|
}
|
||||||
|
|
||||||
const renderStep = () => {
|
const renderStep = () => {
|
||||||
switch (currentStep) {
|
switch (currentStep) {
|
||||||
case 0:
|
case 0:
|
||||||
return <StepDeskew onNext={handleNext} />
|
return <StepDeskew onNext={handleDeskewComplete} />
|
||||||
case 1:
|
case 1:
|
||||||
return <StepColumnDetection />
|
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||||
case 2:
|
case 2:
|
||||||
return <StepWordRecognition />
|
return <StepColumnDetection />
|
||||||
case 3:
|
case 3:
|
||||||
return <StepCoordinates />
|
return <StepWordRecognition />
|
||||||
case 4:
|
case 4:
|
||||||
return <StepReconstruction />
|
return <StepCoordinates />
|
||||||
case 5:
|
case 5:
|
||||||
|
return <StepReconstruction />
|
||||||
|
case 6:
|
||||||
return <StepGroundTruth />
|
return <StepGroundTruth />
|
||||||
default:
|
default:
|
||||||
return null
|
return null
|
||||||
|
|||||||
@@ -33,8 +33,25 @@ export interface DeskewGroundTruth {
|
|||||||
notes?: string
|
notes?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface DewarpResult {
|
||||||
|
session_id: string
|
||||||
|
method_used: 'vertical_edge' | 'text_baseline' | 'manual' | 'none'
|
||||||
|
curvature_px: number
|
||||||
|
confidence: number
|
||||||
|
duration_seconds: number
|
||||||
|
dewarped_image_url: string
|
||||||
|
scale_applied?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DewarpGroundTruth {
|
||||||
|
is_correct: boolean
|
||||||
|
corrected_scale?: number
|
||||||
|
notes?: string
|
||||||
|
}
|
||||||
|
|
||||||
export const PIPELINE_STEPS: PipelineStep[] = [
|
export const PIPELINE_STEPS: PipelineStep[] = [
|
||||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||||
|
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||||
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
||||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||||
{ id: 'coordinates', name: 'Koordinaten', icon: '📍', status: 'pending' },
|
{ id: 'coordinates', name: 'Koordinaten', icon: '📍', status: 'pending' },
|
||||||
|
|||||||
194
admin-lehrer/components/ocr-pipeline/DewarpControls.tsx
Normal file
194
admin-lehrer/components/ocr-pipeline/DewarpControls.tsx
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState } from 'react'
|
||||||
|
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
interface DewarpControlsProps {
|
||||||
|
dewarpResult: DewarpResult | null
|
||||||
|
showGrid: boolean
|
||||||
|
onToggleGrid: () => void
|
||||||
|
onManualDewarp: (scale: number) => void
|
||||||
|
onGroundTruth: (gt: DewarpGroundTruth) => void
|
||||||
|
onNext: () => void
|
||||||
|
isApplying: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
const METHOD_LABELS: Record<string, string> = {
|
||||||
|
vertical_edge: 'Vertikale Kanten',
|
||||||
|
text_baseline: 'Textzeilen-Baseline',
|
||||||
|
manual: 'Manuell',
|
||||||
|
none: 'Keine Korrektur',
|
||||||
|
}
|
||||||
|
|
||||||
|
export function DewarpControls({
|
||||||
|
dewarpResult,
|
||||||
|
showGrid,
|
||||||
|
onToggleGrid,
|
||||||
|
onManualDewarp,
|
||||||
|
onGroundTruth,
|
||||||
|
onNext,
|
||||||
|
isApplying,
|
||||||
|
}: DewarpControlsProps) {
|
||||||
|
const [manualScale, setManualScale] = useState(0)
|
||||||
|
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||||
|
const [gtNotes, setGtNotes] = useState('')
|
||||||
|
const [gtSaved, setGtSaved] = useState(false)
|
||||||
|
|
||||||
|
const handleGroundTruth = (isCorrect: boolean) => {
|
||||||
|
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||||
|
if (isCorrect) {
|
||||||
|
onGroundTruth({ is_correct: true })
|
||||||
|
setGtSaved(true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleGroundTruthIncorrect = () => {
|
||||||
|
onGroundTruth({
|
||||||
|
is_correct: false,
|
||||||
|
corrected_scale: manualScale !== 0 ? manualScale : undefined,
|
||||||
|
notes: gtNotes || undefined,
|
||||||
|
})
|
||||||
|
setGtSaved(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Results */}
|
||||||
|
{dewarpResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Kruemmung:</span>{' '}
|
||||||
|
<span className="font-mono font-medium">{dewarpResult.curvature_px} px</span>
|
||||||
|
</div>
|
||||||
|
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Methode:</span>{' '}
|
||||||
|
<span className="inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300">
|
||||||
|
{METHOD_LABELS[dewarpResult.method_used] || dewarpResult.method_used}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||||
|
<div>
|
||||||
|
<span className="text-gray-500">Konfidenz:</span>{' '}
|
||||||
|
<span className="font-mono">{Math.round(dewarpResult.confidence * 100)}%</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Toggle */}
|
||||||
|
<div className="flex gap-3 mt-3">
|
||||||
|
<button
|
||||||
|
onClick={onToggleGrid}
|
||||||
|
className={`text-xs px-3 py-1 rounded-full border transition-colors ${
|
||||||
|
showGrid
|
||||||
|
? 'bg-teal-100 border-teal-300 text-teal-700 dark:bg-teal-900/40 dark:border-teal-600 dark:text-teal-300'
|
||||||
|
: 'border-gray-300 text-gray-500 dark:border-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Raster anzeigen
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Manual scale slider */}
|
||||||
|
{dewarpResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Manuelle Staerke</div>
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<span className="text-xs text-gray-400 w-8 text-right">-3.0</span>
|
||||||
|
<input
|
||||||
|
type="range"
|
||||||
|
min={-3}
|
||||||
|
max={3}
|
||||||
|
step={0.1}
|
||||||
|
value={manualScale}
|
||||||
|
onChange={(e) => setManualScale(parseFloat(e.target.value))}
|
||||||
|
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||||
|
/>
|
||||||
|
<span className="text-xs text-gray-400 w-8">+3.0</span>
|
||||||
|
<span className="font-mono text-sm w-14 text-right">{manualScale.toFixed(1)}</span>
|
||||||
|
<button
|
||||||
|
onClick={() => onManualDewarp(manualScale)}
|
||||||
|
disabled={isApplying}
|
||||||
|
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||||
|
>
|
||||||
|
{isApplying ? '...' : 'Anwenden'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p className="text-xs text-gray-400 mt-1">
|
||||||
|
0 = keine Korrektur, positiv = nach rechts entzerren, negativ = nach links
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Ground Truth */}
|
||||||
|
{dewarpResult && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
|
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Korrekt entzerrt?
|
||||||
|
</div>
|
||||||
|
{!gtSaved ? (
|
||||||
|
<div className="space-y-3">
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<button
|
||||||
|
onClick={() => handleGroundTruth(true)}
|
||||||
|
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||||
|
gtFeedback === 'correct'
|
||||||
|
? 'bg-green-100 text-green-700 ring-2 ring-green-400'
|
||||||
|
: 'bg-gray-100 text-gray-600 hover:bg-green-50 dark:bg-gray-700 dark:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Ja
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => handleGroundTruth(false)}
|
||||||
|
className={`px-4 py-1.5 rounded-md text-sm font-medium transition-colors ${
|
||||||
|
gtFeedback === 'incorrect'
|
||||||
|
? 'bg-red-100 text-red-700 ring-2 ring-red-400'
|
||||||
|
: 'bg-gray-100 text-gray-600 hover:bg-red-50 dark:bg-gray-700 dark:text-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Nein
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{gtFeedback === 'incorrect' && (
|
||||||
|
<div className="space-y-2">
|
||||||
|
<textarea
|
||||||
|
value={gtNotes}
|
||||||
|
onChange={(e) => setGtNotes(e.target.value)}
|
||||||
|
placeholder="Notizen zur Korrektur..."
|
||||||
|
className="w-full text-sm border border-gray-300 dark:border-gray-600 rounded-md p-2 bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200"
|
||||||
|
rows={2}
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
onClick={handleGroundTruthIncorrect}
|
||||||
|
className="text-sm px-3 py-1 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||||
|
>
|
||||||
|
Feedback speichern
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="text-sm text-green-600 dark:text-green-400">
|
||||||
|
Feedback gespeichert
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Next button */}
|
||||||
|
{dewarpResult && (
|
||||||
|
<div className="flex justify-end">
|
||||||
|
<button
|
||||||
|
onClick={onNext}
|
||||||
|
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||||
|
>
|
||||||
|
Uebernehmen & Weiter →
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -11,6 +11,8 @@ interface ImageCompareViewProps {
|
|||||||
showGrid: boolean
|
showGrid: boolean
|
||||||
showBinarized: boolean
|
showBinarized: boolean
|
||||||
binarizedUrl: string | null
|
binarizedUrl: string | null
|
||||||
|
leftLabel?: string
|
||||||
|
rightLabel?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
function MmGridOverlay() {
|
function MmGridOverlay() {
|
||||||
@@ -77,6 +79,8 @@ export function ImageCompareView({
|
|||||||
showGrid,
|
showGrid,
|
||||||
showBinarized,
|
showBinarized,
|
||||||
binarizedUrl,
|
binarizedUrl,
|
||||||
|
leftLabel,
|
||||||
|
rightLabel,
|
||||||
}: ImageCompareViewProps) {
|
}: ImageCompareViewProps) {
|
||||||
const [leftError, setLeftError] = useState(false)
|
const [leftError, setLeftError] = useState(false)
|
||||||
const [rightError, setRightError] = useState(false)
|
const [rightError, setRightError] = useState(false)
|
||||||
@@ -87,7 +91,7 @@ export function ImageCompareView({
|
|||||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||||
{/* Left: Original */}
|
{/* Left: Original */}
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">Original (unbearbeitet)</h3>
|
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">{leftLabel || 'Original (unbearbeitet)'}</h3>
|
||||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||||
style={{ aspectRatio: '210/297' }}>
|
style={{ aspectRatio: '210/297' }}>
|
||||||
{originalUrl && !leftError ? (
|
{originalUrl && !leftError ? (
|
||||||
@@ -108,7 +112,7 @@ export function ImageCompareView({
|
|||||||
{/* Right: Deskewed with Grid */}
|
{/* Right: Deskewed with Grid */}
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">
|
<h3 className="text-sm font-medium text-gray-500 dark:text-gray-400">
|
||||||
{showBinarized ? 'Binarisiert' : 'Begradigt'} {showGrid && '+ Raster (mm)'}
|
{rightLabel || `${showBinarized ? 'Binarisiert' : 'Begradigt'}${showGrid ? ' + Raster (mm)' : ''}`}
|
||||||
</h3>
|
</h3>
|
||||||
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
<div className="relative bg-gray-100 dark:bg-gray-900 rounded-lg overflow-hidden border border-gray-200 dark:border-gray-700"
|
||||||
style={{ aspectRatio: '210/297' }}>
|
style={{ aspectRatio: '210/297' }}>
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ export function StepColumnDetection() {
|
|||||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
<div className="text-5xl mb-4">📊</div>
|
<div className="text-5xl mb-4">📊</div>
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
Schritt 2: Spaltenerkennung
|
Schritt 3: Spaltenerkennung
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
Erkennung unsichtbarer Spaltentrennungen in der Vokabelseite.
|
Erkennung unsichtbarer Spaltentrennungen in der Vokabelseite.
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ export function StepCoordinates() {
|
|||||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
<div className="text-5xl mb-4">📍</div>
|
<div className="text-5xl mb-4">📍</div>
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
Schritt 4: Koordinatenzuweisung
|
Schritt 5: Koordinatenzuweisung
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
Exakte Positionszuweisung fuer jedes Wort auf der Seite.
|
Exakte Positionszuweisung fuer jedes Wort auf der Seite.
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import { ImageCompareView } from './ImageCompareView'
|
|||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
interface StepDeskewProps {
|
interface StepDeskewProps {
|
||||||
onNext: () => void
|
onNext: (sessionId: string) => void
|
||||||
}
|
}
|
||||||
|
|
||||||
export function StepDeskew({ onNext }: StepDeskewProps) {
|
export function StepDeskew({ onNext }: StepDeskewProps) {
|
||||||
@@ -208,7 +208,7 @@ export function StepDeskew({ onNext }: StepDeskewProps) {
|
|||||||
onToggleGrid={() => setShowGrid((v) => !v)}
|
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||||
onManualDeskew={handleManualDeskew}
|
onManualDeskew={handleManualDeskew}
|
||||||
onGroundTruth={handleGroundTruth}
|
onGroundTruth={handleGroundTruth}
|
||||||
onNext={onNext}
|
onNext={() => session && onNext(session.session_id)}
|
||||||
isApplying={applying}
|
isApplying={applying}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
|||||||
150
admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
Normal file
150
admin-lehrer/components/ocr-pipeline/StepDewarp.tsx
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useState } from 'react'
|
||||||
|
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
import { DewarpControls } from './DewarpControls'
|
||||||
|
import { ImageCompareView } from './ImageCompareView'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
interface StepDewarpProps {
|
||||||
|
sessionId: string | null
|
||||||
|
onNext: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
||||||
|
const [dewarpResult, setDewarpResult] = useState<DewarpResult | null>(null)
|
||||||
|
const [dewarping, setDewarping] = useState(false)
|
||||||
|
const [applying, setApplying] = useState(false)
|
||||||
|
const [showGrid, setShowGrid] = useState(true)
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
|
||||||
|
// Auto-trigger dewarp when component mounts with a sessionId
|
||||||
|
useEffect(() => {
|
||||||
|
if (!sessionId || dewarpResult) return
|
||||||
|
|
||||||
|
const runDewarp = async () => {
|
||||||
|
setDewarping(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||||
|
throw new Error(err.detail || 'Entzerrung fehlgeschlagen')
|
||||||
|
}
|
||||||
|
const data: DewarpResult = await res.json()
|
||||||
|
data.dewarped_image_url = `${KLAUSUR_API}${data.dewarped_image_url}`
|
||||||
|
setDewarpResult(data)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
|
} finally {
|
||||||
|
setDewarping(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runDewarp()
|
||||||
|
}, [sessionId, dewarpResult])
|
||||||
|
|
||||||
|
const handleManualDewarp = useCallback(async (scale: number) => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setApplying(true)
|
||||||
|
setError(null)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ scale }),
|
||||||
|
})
|
||||||
|
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
|
||||||
|
|
||||||
|
const data = await res.json()
|
||||||
|
setDewarpResult((prev) =>
|
||||||
|
prev
|
||||||
|
? {
|
||||||
|
...prev,
|
||||||
|
method_used: data.method_used,
|
||||||
|
scale_applied: data.scale_applied,
|
||||||
|
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
||||||
|
}
|
||||||
|
: null,
|
||||||
|
)
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : 'Fehler')
|
||||||
|
} finally {
|
||||||
|
setApplying(false)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const handleGroundTruth = useCallback(async (gt: DewarpGroundTruth) => {
|
||||||
|
if (!sessionId) return
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/dewarp`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(gt),
|
||||||
|
})
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Ground truth save failed:', e)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
if (!sessionId) {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
|
<div className="text-5xl mb-4">🔧</div>
|
||||||
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
|
Schritt 2: Entzerrung (Dewarp)
|
||||||
|
</h3>
|
||||||
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
|
Bitte zuerst Schritt 1 (Begradigung) abschliessen.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const deskewedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/deskewed`
|
||||||
|
const dewarpedUrl = dewarpResult?.dewarped_image_url ?? null
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Loading indicator */}
|
||||||
|
{dewarping && (
|
||||||
|
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||||
|
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||||
|
Entzerrung laeuft (beide Methoden)...
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Image comparison: deskewed (left) vs dewarped (right) */}
|
||||||
|
<ImageCompareView
|
||||||
|
originalUrl={deskewedUrl}
|
||||||
|
deskewedUrl={dewarpedUrl}
|
||||||
|
showGrid={showGrid}
|
||||||
|
showBinarized={false}
|
||||||
|
binarizedUrl={null}
|
||||||
|
leftLabel="Begradigt (nach Deskew)"
|
||||||
|
rightLabel={`Entzerrt${showGrid ? ' + Raster (mm)' : ''}`}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{/* Controls */}
|
||||||
|
<DewarpControls
|
||||||
|
dewarpResult={dewarpResult}
|
||||||
|
showGrid={showGrid}
|
||||||
|
onToggleGrid={() => setShowGrid((v) => !v)}
|
||||||
|
onManualDewarp={handleManualDewarp}
|
||||||
|
onGroundTruth={handleGroundTruth}
|
||||||
|
onNext={onNext}
|
||||||
|
isApplying={applying}
|
||||||
|
/>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -5,7 +5,7 @@ export function StepGroundTruth() {
|
|||||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
<div className="text-5xl mb-4">✅</div>
|
<div className="text-5xl mb-4">✅</div>
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
Schritt 6: Ground Truth Validierung
|
Schritt 7: Ground Truth Validierung
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
Gesamtpruefung der rekonstruierten Seite gegen das Original.
|
Gesamtpruefung der rekonstruierten Seite gegen das Original.
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ export function StepReconstruction() {
|
|||||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
<div className="text-5xl mb-4">🏗️</div>
|
<div className="text-5xl mb-4">🏗️</div>
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
Schritt 5: Seitenrekonstruktion
|
Schritt 6: Seitenrekonstruktion
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
Nachbau der Originalseite aus erkannten Woertern und Positionen.
|
Nachbau der Originalseite aus erkannten Woertern und Positionen.
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ export function StepWordRecognition() {
|
|||||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||||
<div className="text-5xl mb-4">🔤</div>
|
<div className="text-5xl mb-4">🔤</div>
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
Schritt 3: Worterkennung
|
Schritt 4: Worterkennung
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
<p className="text-gray-500 dark:text-gray-400 max-w-md">
|
||||||
OCR mit Bounding Boxes fuer jedes erkannte Wort.
|
OCR mit Bounding Boxes fuer jedes erkannte Wort.
|
||||||
|
|||||||
@@ -315,22 +315,356 @@ def deskew_image_by_word_alignment(
|
|||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Stage 3: Dewarp (Book Curvature) — Pass-Through for now
|
# Stage 3: Dewarp (Book Curvature Correction)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
def dewarp_image(img: np.ndarray) -> np.ndarray:
|
def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
|
||||||
"""Correct book curvature distortion.
|
"""Method A: Detect curvature from strongest vertical text edges.
|
||||||
|
|
||||||
Currently a pass-through. Will be implemented when book scans are tested.
|
Splits image into horizontal strips, finds the dominant vertical edge
|
||||||
|
X-position per strip, fits a 2nd-degree polynomial, and generates a
|
||||||
|
displacement map if curvature exceeds threshold.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with keys: method, curvature_px, confidence, displacement_map (or None).
|
||||||
|
"""
|
||||||
|
h, w = img.shape[:2]
|
||||||
|
result = {"method": "vertical_edge", "curvature_px": 0.0, "confidence": 0.0, "displacement_map": None}
|
||||||
|
|
||||||
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# Vertical Sobel to find vertical edges
|
||||||
|
sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
|
||||||
|
abs_sobel = np.abs(sobel_x).astype(np.uint8)
|
||||||
|
|
||||||
|
# Binarize with Otsu
|
||||||
|
_, binary = cv2.threshold(abs_sobel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||||
|
|
||||||
|
num_strips = 20
|
||||||
|
strip_h = h // num_strips
|
||||||
|
edge_positions = [] # (y_center, x_position)
|
||||||
|
|
||||||
|
for i in range(num_strips):
|
||||||
|
y_start = i * strip_h
|
||||||
|
y_end = min((i + 1) * strip_h, h)
|
||||||
|
strip = binary[y_start:y_end, :]
|
||||||
|
|
||||||
|
# Project vertically (sum along y-axis)
|
||||||
|
projection = np.sum(strip, axis=0).astype(np.float64)
|
||||||
|
if projection.max() == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Find the strongest vertical edge in left 40% of image (left margin area)
|
||||||
|
search_w = int(w * 0.4)
|
||||||
|
left_proj = projection[:search_w]
|
||||||
|
if left_proj.max() == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Smooth and find peak
|
||||||
|
kernel_size = max(3, w // 100)
|
||||||
|
if kernel_size % 2 == 0:
|
||||||
|
kernel_size += 1
|
||||||
|
smoothed = cv2.GaussianBlur(left_proj.reshape(1, -1), (kernel_size, 1), 0).flatten()
|
||||||
|
x_pos = float(np.argmax(smoothed))
|
||||||
|
y_center = (y_start + y_end) / 2.0
|
||||||
|
edge_positions.append((y_center, x_pos))
|
||||||
|
|
||||||
|
if len(edge_positions) < 8:
|
||||||
|
return result
|
||||||
|
|
||||||
|
ys = np.array([p[0] for p in edge_positions])
|
||||||
|
xs = np.array([p[1] for p in edge_positions])
|
||||||
|
|
||||||
|
# Remove outliers (> 2 std from median)
|
||||||
|
median_x = np.median(xs)
|
||||||
|
std_x = max(np.std(xs), 1.0)
|
||||||
|
mask = np.abs(xs - median_x) < 2 * std_x
|
||||||
|
ys = ys[mask]
|
||||||
|
xs = xs[mask]
|
||||||
|
|
||||||
|
if len(ys) < 6:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Fit 2nd degree polynomial: x = a*y^2 + b*y + c
|
||||||
|
coeffs = np.polyfit(ys, xs, 2)
|
||||||
|
fitted = np.polyval(coeffs, ys)
|
||||||
|
residuals = xs - fitted
|
||||||
|
rmse = float(np.sqrt(np.mean(residuals ** 2)))
|
||||||
|
|
||||||
|
# Measure curvature: max deviation from straight line
|
||||||
|
straight_coeffs = np.polyfit(ys, xs, 1)
|
||||||
|
straight_fitted = np.polyval(straight_coeffs, ys)
|
||||||
|
curvature_px = float(np.max(np.abs(fitted - straight_fitted)))
|
||||||
|
|
||||||
|
if curvature_px < 2.0:
|
||||||
|
result["confidence"] = 0.3
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Generate displacement map
|
||||||
|
y_coords = np.arange(h)
|
||||||
|
all_fitted = np.polyval(coeffs, y_coords)
|
||||||
|
all_straight = np.polyval(straight_coeffs, y_coords)
|
||||||
|
dx_per_row = all_fitted - all_straight # displacement per row
|
||||||
|
|
||||||
|
# Create full displacement map: each pixel shifts horizontally by dx_per_row[y]
|
||||||
|
displacement_map = np.zeros((h, w), dtype=np.float32)
|
||||||
|
for y in range(h):
|
||||||
|
displacement_map[y, :] = -dx_per_row[y]
|
||||||
|
|
||||||
|
confidence = min(1.0, len(ys) / 15.0) * max(0.5, 1.0 - rmse / 5.0)
|
||||||
|
|
||||||
|
result["curvature_px"] = round(curvature_px, 2)
|
||||||
|
result["confidence"] = round(float(confidence), 2)
|
||||||
|
result["displacement_map"] = displacement_map
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _dewarp_by_text_baseline(img: np.ndarray) -> Dict[str, Any]:
|
||||||
|
"""Method B: Detect curvature from Tesseract text baseline positions.
|
||||||
|
|
||||||
|
Uses a quick Tesseract pass on a downscaled image, groups words into lines,
|
||||||
|
measures baseline curvature per line, and aggregates into a displacement map.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with keys: method, curvature_px, confidence, displacement_map (or None).
|
||||||
|
"""
|
||||||
|
h, w = img.shape[:2]
|
||||||
|
result = {"method": "text_baseline", "curvature_px": 0.0, "confidence": 0.0, "displacement_map": None}
|
||||||
|
|
||||||
|
if not TESSERACT_AVAILABLE:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Downscale for speed
|
||||||
|
max_dim = 1500
|
||||||
|
scale_factor = min(1.0, max_dim / max(h, w))
|
||||||
|
if scale_factor < 1.0:
|
||||||
|
small = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)), interpolation=cv2.INTER_AREA)
|
||||||
|
else:
|
||||||
|
small = img
|
||||||
|
scale_factor = 1.0
|
||||||
|
|
||||||
|
pil_img = Image.fromarray(cv2.cvtColor(small, cv2.COLOR_BGR2RGB))
|
||||||
|
try:
|
||||||
|
data = pytesseract.image_to_data(
|
||||||
|
pil_img, lang="eng+deu", config="--psm 6 --oem 3",
|
||||||
|
output_type=pytesseract.Output.DICT,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"dewarp text_baseline: Tesseract failed: {e}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Group words by line
|
||||||
|
from collections import defaultdict
|
||||||
|
line_groups: Dict[tuple, list] = defaultdict(list)
|
||||||
|
for i in range(len(data["text"])):
|
||||||
|
text = (data["text"][i] or "").strip()
|
||||||
|
conf = int(data["conf"][i])
|
||||||
|
if not text or conf < 20:
|
||||||
|
continue
|
||||||
|
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
|
||||||
|
line_groups[key].append(i)
|
||||||
|
|
||||||
|
if len(line_groups) < 5:
|
||||||
|
return result
|
||||||
|
|
||||||
|
inv_scale = 1.0 / scale_factor
|
||||||
|
|
||||||
|
# For each line with enough words, measure baseline curvature
|
||||||
|
line_curvatures = [] # (y_center, curvature_px)
|
||||||
|
all_baselines = [] # (y_center, dx_offset) for displacement map
|
||||||
|
|
||||||
|
for key, indices in line_groups.items():
|
||||||
|
if len(indices) < 3:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Collect baseline points: (x_center, y_bottom) for each word
|
||||||
|
points = []
|
||||||
|
for idx in indices:
|
||||||
|
x_center = (data["left"][idx] + data["width"][idx] / 2.0) * inv_scale
|
||||||
|
y_bottom = (data["top"][idx] + data["height"][idx]) * inv_scale
|
||||||
|
points.append((x_center, y_bottom))
|
||||||
|
|
||||||
|
points.sort(key=lambda p: p[0])
|
||||||
|
xs_line = np.array([p[0] for p in points])
|
||||||
|
ys_line = np.array([p[1] for p in points])
|
||||||
|
|
||||||
|
if len(xs_line) < 3:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Fit 2nd degree: y = a*x^2 + b*x + c
|
||||||
|
try:
|
||||||
|
coeffs = np.polyfit(xs_line, ys_line, 2)
|
||||||
|
except (np.linalg.LinAlgError, ValueError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
fitted = np.polyval(coeffs, xs_line)
|
||||||
|
straight = np.polyval(np.polyfit(xs_line, ys_line, 1), xs_line)
|
||||||
|
curvature = float(np.max(np.abs(fitted - straight)))
|
||||||
|
|
||||||
|
y_center = float(np.mean(ys_line))
|
||||||
|
line_curvatures.append((y_center, curvature, coeffs, xs_line, ys_line))
|
||||||
|
|
||||||
|
if len(line_curvatures) < 3:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Average curvature
|
||||||
|
avg_curvature = float(np.mean([c[1] for c in line_curvatures]))
|
||||||
|
|
||||||
|
if avg_curvature < 1.5:
|
||||||
|
result["confidence"] = 0.3
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Build displacement map from line baselines
|
||||||
|
# For each line, compute the vertical offset needed to straighten
|
||||||
|
displacement_map = np.zeros((h, w), dtype=np.float32)
|
||||||
|
|
||||||
|
for y_center, curvature, coeffs, xs_line, ys_line in line_curvatures:
|
||||||
|
# The displacement is the difference between curved and straight baseline
|
||||||
|
x_range = np.arange(w, dtype=np.float64)
|
||||||
|
fitted_y = np.polyval(coeffs, x_range)
|
||||||
|
straight_y = np.polyval(np.polyfit(xs_line, ys_line, 1), x_range)
|
||||||
|
dy = fitted_y - straight_y
|
||||||
|
|
||||||
|
# Convert vertical curvature to horizontal displacement estimate
|
||||||
|
# (curvature bends text → horizontal shift proportional to curvature)
|
||||||
|
# Use the vertical curvature as proxy for horizontal distortion
|
||||||
|
y_int = int(y_center)
|
||||||
|
spread = max(int(h / len(line_curvatures) / 2), 20)
|
||||||
|
y_start = max(0, y_int - spread)
|
||||||
|
y_end = min(h, y_int + spread)
|
||||||
|
|
||||||
|
for y in range(y_start, y_end):
|
||||||
|
weight = 1.0 - abs(y - y_int) / spread
|
||||||
|
displacement_map[y, :] += (dy * weight).astype(np.float32)
|
||||||
|
|
||||||
|
# Normalize: the displacement map represents vertical shifts
|
||||||
|
# Convert to horizontal displacement (since curvature typically shifts columns)
|
||||||
|
# Use the sign of the 2nd-degree coefficient averaged across lines
|
||||||
|
avg_a = float(np.mean([c[2][0] for c in line_curvatures]))
|
||||||
|
if abs(avg_a) > 0:
|
||||||
|
# Scale displacement map to represent horizontal pixel shifts
|
||||||
|
max_disp = np.max(np.abs(displacement_map))
|
||||||
|
if max_disp > 0:
|
||||||
|
displacement_map = displacement_map * (avg_curvature / max_disp)
|
||||||
|
|
||||||
|
confidence = min(1.0, len(line_curvatures) / 10.0) * 0.8
|
||||||
|
result["curvature_px"] = round(avg_curvature, 2)
|
||||||
|
result["confidence"] = round(float(confidence), 2)
|
||||||
|
result["displacement_map"] = displacement_map
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_displacement_map(img: np.ndarray, displacement_map: np.ndarray,
|
||||||
|
scale: float = 1.0) -> np.ndarray:
|
||||||
|
"""Apply a horizontal displacement map to an image using cv2.remap().
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
img: BGR image.
|
img: BGR image.
|
||||||
|
displacement_map: Float32 array (h, w) of horizontal pixel shifts.
|
||||||
|
scale: Multiplier for the displacement (-3.0 to +3.0).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Corrected image (or original if no correction needed).
|
Corrected image.
|
||||||
"""
|
"""
|
||||||
# TODO: Implement polynomial fitting + cv2.remap() for book curvature
|
h, w = img.shape[:2]
|
||||||
return img
|
|
||||||
|
# Base coordinate grids
|
||||||
|
map_x = np.tile(np.arange(w, dtype=np.float32), (h, 1))
|
||||||
|
map_y = np.tile(np.arange(h, dtype=np.float32).reshape(-1, 1), (1, w))
|
||||||
|
|
||||||
|
# Apply scaled displacement
|
||||||
|
map_x = map_x + displacement_map * scale
|
||||||
|
|
||||||
|
# Remap
|
||||||
|
corrected = cv2.remap(img, map_x, map_y,
|
||||||
|
interpolation=cv2.INTER_LINEAR,
|
||||||
|
borderMode=cv2.BORDER_REPLICATE)
|
||||||
|
return corrected
|
||||||
|
|
||||||
|
|
||||||
|
def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]:
|
||||||
|
"""Correct book curvature distortion using the best of two methods.
|
||||||
|
|
||||||
|
Method A: Vertical edge analysis — detects curvature of the strongest
|
||||||
|
vertical text edge (left column margin).
|
||||||
|
|
||||||
|
Method B: Text baseline analysis — uses Tesseract word positions to
|
||||||
|
measure baseline curvature across text lines.
|
||||||
|
|
||||||
|
The method with higher confidence wins. Returns the corrected image
|
||||||
|
and a DewarpInfo dict for the API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img: BGR image (already deskewed).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (corrected_image, dewarp_info).
|
||||||
|
dewarp_info keys: method, curvature_px, confidence, displacement_map.
|
||||||
|
"""
|
||||||
|
no_correction = {
|
||||||
|
"method": "none",
|
||||||
|
"curvature_px": 0.0,
|
||||||
|
"confidence": 0.0,
|
||||||
|
"displacement_map": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not CV2_AVAILABLE:
|
||||||
|
return img, no_correction
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
# Run both methods
|
||||||
|
result_a = _dewarp_by_vertical_edges(img)
|
||||||
|
result_b = _dewarp_by_text_baseline(img)
|
||||||
|
|
||||||
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
logger.info(f"dewarp: vertical_edge conf={result_a['confidence']:.2f} "
|
||||||
|
f"curv={result_a['curvature_px']:.1f}px | "
|
||||||
|
f"text_baseline conf={result_b['confidence']:.2f} "
|
||||||
|
f"curv={result_b['curvature_px']:.1f}px "
|
||||||
|
f"({duration:.2f}s)")
|
||||||
|
|
||||||
|
# Pick method with higher confidence
|
||||||
|
if result_a["confidence"] >= result_b["confidence"]:
|
||||||
|
best = result_a
|
||||||
|
else:
|
||||||
|
best = result_b
|
||||||
|
|
||||||
|
if best["displacement_map"] is None or best["curvature_px"] < 2.0:
|
||||||
|
return img, no_correction
|
||||||
|
|
||||||
|
# Apply correction
|
||||||
|
corrected = _apply_displacement_map(img, best["displacement_map"], scale=1.0)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
"method": best["method"],
|
||||||
|
"curvature_px": best["curvature_px"],
|
||||||
|
"confidence": best["confidence"],
|
||||||
|
"displacement_map": best["displacement_map"],
|
||||||
|
}
|
||||||
|
|
||||||
|
return corrected, info
|
||||||
|
|
||||||
|
|
||||||
|
def dewarp_image_manual(img: np.ndarray, displacement_map: np.ndarray,
|
||||||
|
scale: float) -> np.ndarray:
|
||||||
|
"""Apply dewarp with manual scale adjustment.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img: BGR image (deskewed, before dewarp).
|
||||||
|
displacement_map: The displacement map from auto-dewarp.
|
||||||
|
scale: Manual scale factor (-3.0 to +3.0).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Corrected image.
|
||||||
|
"""
|
||||||
|
scale = max(-3.0, min(3.0, scale))
|
||||||
|
if abs(scale) < 0.01:
|
||||||
|
return img
|
||||||
|
return _apply_displacement_map(img, displacement_map, scale=scale)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
"""
|
"""
|
||||||
OCR Pipeline API - Schrittweise Seitenrekonstruktion.
|
OCR Pipeline API - Schrittweise Seitenrekonstruktion.
|
||||||
|
|
||||||
Zerlegt den OCR-Prozess in 6 einzelne Schritte:
|
Zerlegt den OCR-Prozess in 7 einzelne Schritte:
|
||||||
1. Deskewing - Scan begradigen
|
1. Deskewing - Scan begradigen
|
||||||
2. Spaltenerkennung - Unsichtbare Spalten finden
|
2. Dewarping - Buchwoelbung entzerren
|
||||||
3. Worterkennung - OCR mit Bounding Boxes
|
3. Spaltenerkennung - Unsichtbare Spalten finden
|
||||||
4. Koordinatenzuweisung - Exakte Positionen
|
4. Worterkennung - OCR mit Bounding Boxes
|
||||||
5. Seitenrekonstruktion - Seite nachbauen
|
5. Koordinatenzuweisung - Exakte Positionen
|
||||||
6. Ground Truth Validierung - Gesamtpruefung
|
6. Seitenrekonstruktion - Seite nachbauen
|
||||||
|
7. Ground Truth Validierung - Gesamtpruefung
|
||||||
|
|
||||||
Lizenz: Apache 2.0
|
Lizenz: Apache 2.0
|
||||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||||
@@ -30,6 +31,8 @@ from cv_vocab_pipeline import (
|
|||||||
create_ocr_image,
|
create_ocr_image,
|
||||||
deskew_image,
|
deskew_image,
|
||||||
deskew_image_by_word_alignment,
|
deskew_image_by_word_alignment,
|
||||||
|
dewarp_image,
|
||||||
|
dewarp_image_manual,
|
||||||
render_image_high_res,
|
render_image_high_res,
|
||||||
render_pdf_high_res,
|
render_pdf_high_res,
|
||||||
)
|
)
|
||||||
@@ -77,6 +80,16 @@ class DeskewGroundTruthRequest(BaseModel):
|
|||||||
notes: Optional[str] = None
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ManualDewarpRequest(BaseModel):
|
||||||
|
scale: float
|
||||||
|
|
||||||
|
|
||||||
|
class DewarpGroundTruthRequest(BaseModel):
|
||||||
|
is_correct: bool
|
||||||
|
corrected_scale: Optional[float] = None
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Endpoints
|
# Endpoints
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -116,6 +129,10 @@ async def create_session(file: UploadFile = File(...)):
|
|||||||
"deskewed_png": None,
|
"deskewed_png": None,
|
||||||
"binarized_png": None,
|
"binarized_png": None,
|
||||||
"deskew_result": None,
|
"deskew_result": None,
|
||||||
|
"dewarped_bgr": None,
|
||||||
|
"dewarped_png": None,
|
||||||
|
"dewarp_result": None,
|
||||||
|
"displacement_map": None,
|
||||||
"ground_truth": {},
|
"ground_truth": {},
|
||||||
"current_step": 1,
|
"current_step": 1,
|
||||||
}
|
}
|
||||||
@@ -263,13 +280,15 @@ async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
|||||||
|
|
||||||
@router.get("/sessions/{session_id}/image/{image_type}")
|
@router.get("/sessions/{session_id}/image/{image_type}")
|
||||||
async def get_image(session_id: str, image_type: str):
|
async def get_image(session_id: str, image_type: str):
|
||||||
"""Serve session images: original, deskewed, or binarized."""
|
"""Serve session images: original, deskewed, dewarped, or binarized."""
|
||||||
session = _get_session(session_id)
|
session = _get_session(session_id)
|
||||||
|
|
||||||
if image_type == "original":
|
if image_type == "original":
|
||||||
data = session.get("original_png")
|
data = session.get("original_png")
|
||||||
elif image_type == "deskewed":
|
elif image_type == "deskewed":
|
||||||
data = session.get("deskewed_png")
|
data = session.get("deskewed_png")
|
||||||
|
elif image_type == "dewarped":
|
||||||
|
data = session.get("dewarped_png")
|
||||||
elif image_type == "binarized":
|
elif image_type == "binarized":
|
||||||
data = session.get("binarized_png")
|
data = session.get("binarized_png")
|
||||||
else:
|
else:
|
||||||
@@ -299,3 +318,106 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
|
|||||||
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
|
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
|
||||||
|
|
||||||
return {"session_id": session_id, "ground_truth": gt}
|
return {"session_id": session_id, "ground_truth": gt}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Dewarp Endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/dewarp")
|
||||||
|
async def auto_dewarp(session_id: str):
|
||||||
|
"""Run both dewarp methods on the deskewed image and pick the best."""
|
||||||
|
session = _get_session(session_id)
|
||||||
|
deskewed_bgr = session.get("deskewed_bgr")
|
||||||
|
if deskewed_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
|
||||||
|
duration = time.time() - t0
|
||||||
|
|
||||||
|
# Encode dewarped as PNG
|
||||||
|
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||||
|
dewarped_png = png_buf.tobytes() if success else session["deskewed_png"]
|
||||||
|
|
||||||
|
session["dewarped_bgr"] = dewarped_bgr
|
||||||
|
session["dewarped_png"] = dewarped_png
|
||||||
|
session["dewarp_result"] = {
|
||||||
|
"method_used": dewarp_info["method"],
|
||||||
|
"curvature_px": dewarp_info["curvature_px"],
|
||||||
|
"confidence": dewarp_info["confidence"],
|
||||||
|
"duration_seconds": round(duration, 2),
|
||||||
|
}
|
||||||
|
session["displacement_map"] = dewarp_info.get("displacement_map")
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
|
||||||
|
f"method={dewarp_info['method']} curvature={dewarp_info['curvature_px']:.1f}px "
|
||||||
|
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
"method_used": dewarp_info["method"],
|
||||||
|
"curvature_px": dewarp_info["curvature_px"],
|
||||||
|
"confidence": dewarp_info["confidence"],
|
||||||
|
"duration_seconds": round(duration, 2),
|
||||||
|
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/dewarp/manual")
|
||||||
|
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
||||||
|
"""Apply dewarp with a manually scaled displacement map."""
|
||||||
|
session = _get_session(session_id)
|
||||||
|
deskewed_bgr = session.get("deskewed_bgr")
|
||||||
|
displacement_map = session.get("displacement_map")
|
||||||
|
|
||||||
|
if deskewed_bgr is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||||
|
|
||||||
|
scale = max(-3.0, min(3.0, req.scale))
|
||||||
|
|
||||||
|
if displacement_map is None or abs(scale) < 0.01:
|
||||||
|
# No displacement map or zero scale — use deskewed as-is
|
||||||
|
dewarped_bgr = deskewed_bgr
|
||||||
|
else:
|
||||||
|
dewarped_bgr = dewarp_image_manual(deskewed_bgr, displacement_map, scale)
|
||||||
|
|
||||||
|
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||||
|
dewarped_png = png_buf.tobytes() if success else session.get("deskewed_png")
|
||||||
|
|
||||||
|
session["dewarped_bgr"] = dewarped_bgr
|
||||||
|
session["dewarped_png"] = dewarped_png
|
||||||
|
session["dewarp_result"] = {
|
||||||
|
**(session.get("dewarp_result") or {}),
|
||||||
|
"method_used": "manual",
|
||||||
|
"scale_applied": round(scale, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: scale={scale:.2f}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
"scale_applied": round(scale, 2),
|
||||||
|
"method_used": "manual",
|
||||||
|
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/ground-truth/dewarp")
|
||||||
|
async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthRequest):
|
||||||
|
"""Save ground truth feedback for the dewarp step."""
|
||||||
|
session = _get_session(session_id)
|
||||||
|
|
||||||
|
gt = {
|
||||||
|
"is_correct": req.is_correct,
|
||||||
|
"corrected_scale": req.corrected_scale,
|
||||||
|
"notes": req.notes,
|
||||||
|
"saved_at": datetime.utcnow().isoformat(),
|
||||||
|
"dewarp_result": session.get("dewarp_result"),
|
||||||
|
}
|
||||||
|
session["ground_truth"]["dewarp"] = gt
|
||||||
|
|
||||||
|
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
|
||||||
|
f"correct={req.is_correct}, corrected_scale={req.corrected_scale}")
|
||||||
|
|
||||||
|
return {"session_id": session_id, "ground_truth": gt}
|
||||||
|
|||||||
Reference in New Issue
Block a user