refactor(dewarp): replace displacement map with affine shear correction
The old displacement-map approach shifted entire rows by a parabolic profile, creating a circle/barrel distortion. The actual problem is a linear vertical shear: after deskew aligns horizontal lines, the vertical column edges are still tilted by ~0.5°. New approach: - Detect shear angle from strongest vertical edge slope (not curvature) - Apply cv2.warpAffine shear to straighten vertical features - Manual slider: -2.0° to +2.0° in 0.05° steps - Slider initializes to auto-detected shear angle - Ground truth question: "Spalten vertikal ausgerichtet?" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -37,17 +37,16 @@ export interface DeskewGroundTruth {
|
|||||||
|
|
||||||
export interface DewarpResult {
|
export interface DewarpResult {
|
||||||
session_id: string
|
session_id: string
|
||||||
method_used: 'vertical_edge' | 'text_baseline' | 'manual' | 'none'
|
method_used: 'vertical_edge' | 'manual' | 'none'
|
||||||
curvature_px: number
|
shear_degrees: number
|
||||||
confidence: number
|
confidence: number
|
||||||
duration_seconds: number
|
duration_seconds: number
|
||||||
dewarped_image_url: string
|
dewarped_image_url: string
|
||||||
scale_applied?: number
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DewarpGroundTruth {
|
export interface DewarpGroundTruth {
|
||||||
is_correct: boolean
|
is_correct: boolean
|
||||||
corrected_scale?: number
|
corrected_shear?: number
|
||||||
notes?: string
|
notes?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
import { useState } from 'react'
|
import { useEffect, useState } from 'react'
|
||||||
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
interface DewarpControlsProps {
|
interface DewarpControlsProps {
|
||||||
dewarpResult: DewarpResult | null
|
dewarpResult: DewarpResult | null
|
||||||
showGrid: boolean
|
showGrid: boolean
|
||||||
onToggleGrid: () => void
|
onToggleGrid: () => void
|
||||||
onManualDewarp: (scale: number) => void
|
onManualDewarp: (shearDegrees: number) => void
|
||||||
onGroundTruth: (gt: DewarpGroundTruth) => void
|
onGroundTruth: (gt: DewarpGroundTruth) => void
|
||||||
onNext: () => void
|
onNext: () => void
|
||||||
isApplying: boolean
|
isApplying: boolean
|
||||||
@@ -15,7 +15,6 @@ interface DewarpControlsProps {
|
|||||||
|
|
||||||
const METHOD_LABELS: Record<string, string> = {
|
const METHOD_LABELS: Record<string, string> = {
|
||||||
vertical_edge: 'Vertikale Kanten',
|
vertical_edge: 'Vertikale Kanten',
|
||||||
text_baseline: 'Textzeilen-Baseline',
|
|
||||||
manual: 'Manuell',
|
manual: 'Manuell',
|
||||||
none: 'Keine Korrektur',
|
none: 'Keine Korrektur',
|
||||||
}
|
}
|
||||||
@@ -29,11 +28,18 @@ export function DewarpControls({
|
|||||||
onNext,
|
onNext,
|
||||||
isApplying,
|
isApplying,
|
||||||
}: DewarpControlsProps) {
|
}: DewarpControlsProps) {
|
||||||
const [manualScale, setManualScale] = useState(100)
|
const [manualShear, setManualShear] = useState(0)
|
||||||
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
|
||||||
const [gtNotes, setGtNotes] = useState('')
|
const [gtNotes, setGtNotes] = useState('')
|
||||||
const [gtSaved, setGtSaved] = useState(false)
|
const [gtSaved, setGtSaved] = useState(false)
|
||||||
|
|
||||||
|
// Initialize slider to auto-detected value when result arrives
|
||||||
|
useEffect(() => {
|
||||||
|
if (dewarpResult && dewarpResult.shear_degrees !== undefined) {
|
||||||
|
setManualShear(dewarpResult.shear_degrees)
|
||||||
|
}
|
||||||
|
}, [dewarpResult?.shear_degrees])
|
||||||
|
|
||||||
const handleGroundTruth = (isCorrect: boolean) => {
|
const handleGroundTruth = (isCorrect: boolean) => {
|
||||||
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
|
||||||
if (isCorrect) {
|
if (isCorrect) {
|
||||||
@@ -45,7 +51,7 @@ export function DewarpControls({
|
|||||||
const handleGroundTruthIncorrect = () => {
|
const handleGroundTruthIncorrect = () => {
|
||||||
onGroundTruth({
|
onGroundTruth({
|
||||||
is_correct: false,
|
is_correct: false,
|
||||||
corrected_scale: manualScale !== 0 ? manualScale : undefined,
|
corrected_shear: manualShear !== 0 ? manualShear : undefined,
|
||||||
notes: gtNotes || undefined,
|
notes: gtNotes || undefined,
|
||||||
})
|
})
|
||||||
setGtSaved(true)
|
setGtSaved(true)
|
||||||
@@ -58,8 +64,8 @@ export function DewarpControls({
|
|||||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||||
<div>
|
<div>
|
||||||
<span className="text-gray-500">Kruemmung:</span>{' '}
|
<span className="text-gray-500">Scherung:</span>{' '}
|
||||||
<span className="font-mono font-medium">{dewarpResult.curvature_px} px</span>
|
<span className="font-mono font-medium">{dewarpResult.shear_degrees}°</span>
|
||||||
</div>
|
</div>
|
||||||
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
|
||||||
<div>
|
<div>
|
||||||
@@ -91,25 +97,25 @@ export function DewarpControls({
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Manual scale slider */}
|
{/* Manual shear angle slider */}
|
||||||
{dewarpResult && (
|
{dewarpResult && (
|
||||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturstaerke</div>
|
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Scherwinkel (manuell)</div>
|
||||||
<div className="flex items-center gap-3">
|
<div className="flex items-center gap-3">
|
||||||
<span className="text-xs text-gray-400 w-8 text-right">0%</span>
|
<span className="text-xs text-gray-400 w-10 text-right">-2.0°</span>
|
||||||
<input
|
<input
|
||||||
type="range"
|
type="range"
|
||||||
min={0}
|
min={-200}
|
||||||
max={200}
|
max={200}
|
||||||
step={5}
|
step={5}
|
||||||
value={manualScale}
|
value={Math.round(manualShear * 100)}
|
||||||
onChange={(e) => setManualScale(parseInt(e.target.value))}
|
onChange={(e) => setManualShear(parseInt(e.target.value) / 100)}
|
||||||
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
|
||||||
/>
|
/>
|
||||||
<span className="text-xs text-gray-400 w-10">200%</span>
|
<span className="text-xs text-gray-400 w-10">+2.0°</span>
|
||||||
<span className="font-mono text-sm w-14 text-right">{manualScale}%</span>
|
<span className="font-mono text-sm w-16 text-right">{manualShear.toFixed(2)}°</span>
|
||||||
<button
|
<button
|
||||||
onClick={() => onManualDewarp(manualScale / 100)}
|
onClick={() => onManualDewarp(manualShear)}
|
||||||
disabled={isApplying}
|
disabled={isApplying}
|
||||||
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||||
>
|
>
|
||||||
@@ -117,7 +123,7 @@ export function DewarpControls({
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<p className="text-xs text-gray-400 mt-1">
|
<p className="text-xs text-gray-400 mt-1">
|
||||||
100% = automatisch erkannte Korrektur, 0% = keine, 200% = doppelt so stark
|
Scherung der vertikalen Achse in Grad. Positiv = Spalten nach rechts kippen, negativ = nach links.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
@@ -126,8 +132,9 @@ export function DewarpControls({
|
|||||||
{dewarpResult && (
|
{dewarpResult && (
|
||||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
|
||||||
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||||
Korrekt entzerrt?
|
Spalten vertikal ausgerichtet?
|
||||||
</div>
|
</div>
|
||||||
|
<p className="text-xs text-gray-400 mb-2">Pruefen ob die Spaltenraender jetzt senkrecht zum Raster stehen.</p>
|
||||||
{!gtSaved ? (
|
{!gtSaved ? (
|
||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
<div className="flex gap-2">
|
<div className="flex gap-2">
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
|||||||
runDewarp()
|
runDewarp()
|
||||||
}, [sessionId, dewarpResult])
|
}, [sessionId, dewarpResult])
|
||||||
|
|
||||||
const handleManualDewarp = useCallback(async (scale: number) => {
|
const handleManualDewarp = useCallback(async (shearDegrees: number) => {
|
||||||
if (!sessionId) return
|
if (!sessionId) return
|
||||||
setApplying(true)
|
setApplying(true)
|
||||||
setError(null)
|
setError(null)
|
||||||
@@ -56,7 +56,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
|||||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ scale }),
|
body: JSON.stringify({ shear_degrees: shearDegrees }),
|
||||||
})
|
})
|
||||||
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
|
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
|
||||||
|
|
||||||
@@ -66,7 +66,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
|
|||||||
? {
|
? {
|
||||||
...prev,
|
...prev,
|
||||||
method_used: data.method_used,
|
method_used: data.method_used,
|
||||||
scale_applied: data.scale_applied,
|
shear_degrees: data.shear_degrees,
|
||||||
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
|
||||||
}
|
}
|
||||||
: null,
|
: null,
|
||||||
|
|||||||
@@ -318,18 +318,21 @@ def deskew_image_by_word_alignment(
|
|||||||
# Stage 3: Dewarp (Book Curvature Correction)
|
# Stage 3: Dewarp (Book Curvature Correction)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
|
def _detect_shear_angle(img: np.ndarray) -> Dict[str, Any]:
|
||||||
"""Method A: Detect curvature from strongest vertical text edges.
|
"""Detect the vertical shear angle of the page.
|
||||||
|
|
||||||
Splits image into horizontal strips, finds the dominant vertical edge
|
After deskew (horizontal lines aligned), vertical features like column
|
||||||
X-position per strip, fits a 2nd-degree polynomial, and generates a
|
edges may still be tilted. This measures that tilt by tracking the
|
||||||
displacement map if curvature exceeds threshold.
|
strongest vertical edge across horizontal strips.
|
||||||
|
|
||||||
|
The result is a shear angle in degrees: the angular difference between
|
||||||
|
true vertical and the detected column edge.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict with keys: method, curvature_px, confidence, displacement_map (or None).
|
Dict with keys: method, shear_degrees, confidence.
|
||||||
"""
|
"""
|
||||||
h, w = img.shape[:2]
|
h, w = img.shape[:2]
|
||||||
result = {"method": "vertical_edge", "curvature_px": 0.0, "confidence": 0.0, "displacement_map": None}
|
result = {"method": "vertical_edge", "shear_degrees": 0.0, "confidence": 0.0}
|
||||||
|
|
||||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
@@ -354,7 +357,7 @@ def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
|
|||||||
if projection.max() == 0:
|
if projection.max() == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Find the strongest vertical edge in left 40% of image (left margin area)
|
# Find the strongest vertical edge in left 40% of image
|
||||||
search_w = int(w * 0.4)
|
search_w = int(w * 0.4)
|
||||||
left_proj = projection[:search_w]
|
left_proj = projection[:search_w]
|
||||||
if left_proj.max() == 0:
|
if left_proj.max() == 0:
|
||||||
@@ -385,229 +388,76 @@ def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
|
|||||||
if len(ys) < 6:
|
if len(ys) < 6:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Fit 2nd degree polynomial: x = a*y^2 + b*y + c
|
# Fit straight line: x = slope * y + intercept
|
||||||
coeffs = np.polyfit(ys, xs, 2)
|
# The slope tells us the tilt of the vertical edge
|
||||||
fitted = np.polyval(coeffs, ys)
|
straight_coeffs = np.polyfit(ys, xs, 1)
|
||||||
|
slope = straight_coeffs[0] # dx/dy in pixels
|
||||||
|
fitted = np.polyval(straight_coeffs, ys)
|
||||||
residuals = xs - fitted
|
residuals = xs - fitted
|
||||||
rmse = float(np.sqrt(np.mean(residuals ** 2)))
|
rmse = float(np.sqrt(np.mean(residuals ** 2)))
|
||||||
|
|
||||||
# Measure curvature: max deviation from straight line
|
# Convert slope to angle: arctan(dx/dy) in degrees
|
||||||
straight_coeffs = np.polyfit(ys, xs, 1)
|
import math
|
||||||
straight_fitted = np.polyval(straight_coeffs, ys)
|
shear_degrees = math.degrees(math.atan(slope))
|
||||||
curvature_px = float(np.max(np.abs(fitted - straight_fitted)))
|
|
||||||
|
|
||||||
if curvature_px < 2.0:
|
|
||||||
result["confidence"] = 0.3
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Generate displacement map
|
|
||||||
y_coords = np.arange(h)
|
|
||||||
all_fitted = np.polyval(coeffs, y_coords)
|
|
||||||
all_straight = np.polyval(straight_coeffs, y_coords)
|
|
||||||
dx_per_row = all_fitted - all_straight # displacement per row
|
|
||||||
|
|
||||||
# Create full displacement map: each pixel shifts horizontally by dx_per_row[y]
|
|
||||||
displacement_map = np.zeros((h, w), dtype=np.float32)
|
|
||||||
for y in range(h):
|
|
||||||
displacement_map[y, :] = -dx_per_row[y]
|
|
||||||
|
|
||||||
confidence = min(1.0, len(ys) / 15.0) * max(0.5, 1.0 - rmse / 5.0)
|
confidence = min(1.0, len(ys) / 15.0) * max(0.5, 1.0 - rmse / 5.0)
|
||||||
|
|
||||||
result["curvature_px"] = round(curvature_px, 2)
|
result["shear_degrees"] = round(shear_degrees, 3)
|
||||||
result["confidence"] = round(float(confidence), 2)
|
result["confidence"] = round(float(confidence), 2)
|
||||||
result["displacement_map"] = displacement_map
|
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _dewarp_by_text_baseline(img: np.ndarray) -> Dict[str, Any]:
|
def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
|
||||||
"""Method B: Detect curvature from Tesseract text baseline positions.
|
"""Apply a vertical shear correction to an image.
|
||||||
|
|
||||||
Uses a quick Tesseract pass on a downscaled image, groups words into lines,
|
Shifts each row horizontally proportional to its distance from the
|
||||||
measures baseline curvature per line, and aggregates into a displacement map.
|
vertical center. This corrects the tilt of vertical features (columns)
|
||||||
|
without affecting horizontal alignment (text lines).
|
||||||
Returns:
|
|
||||||
Dict with keys: method, curvature_px, confidence, displacement_map (or None).
|
|
||||||
"""
|
|
||||||
h, w = img.shape[:2]
|
|
||||||
result = {"method": "text_baseline", "curvature_px": 0.0, "confidence": 0.0, "displacement_map": None}
|
|
||||||
|
|
||||||
if not TESSERACT_AVAILABLE:
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Downscale for speed
|
|
||||||
max_dim = 1500
|
|
||||||
scale_factor = min(1.0, max_dim / max(h, w))
|
|
||||||
if scale_factor < 1.0:
|
|
||||||
small = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)), interpolation=cv2.INTER_AREA)
|
|
||||||
else:
|
|
||||||
small = img
|
|
||||||
scale_factor = 1.0
|
|
||||||
|
|
||||||
pil_img = Image.fromarray(cv2.cvtColor(small, cv2.COLOR_BGR2RGB))
|
|
||||||
try:
|
|
||||||
data = pytesseract.image_to_data(
|
|
||||||
pil_img, lang="eng+deu", config="--psm 6 --oem 3",
|
|
||||||
output_type=pytesseract.Output.DICT,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"dewarp text_baseline: Tesseract failed: {e}")
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Group words by line
|
|
||||||
from collections import defaultdict
|
|
||||||
line_groups: Dict[tuple, list] = defaultdict(list)
|
|
||||||
for i in range(len(data["text"])):
|
|
||||||
text = (data["text"][i] or "").strip()
|
|
||||||
conf = int(data["conf"][i])
|
|
||||||
if not text or conf < 20:
|
|
||||||
continue
|
|
||||||
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
|
|
||||||
line_groups[key].append(i)
|
|
||||||
|
|
||||||
if len(line_groups) < 5:
|
|
||||||
return result
|
|
||||||
|
|
||||||
inv_scale = 1.0 / scale_factor
|
|
||||||
|
|
||||||
# For each line with enough words, measure baseline curvature
|
|
||||||
line_curvatures = [] # (y_center, curvature_px)
|
|
||||||
all_baselines = [] # (y_center, dx_offset) for displacement map
|
|
||||||
|
|
||||||
for key, indices in line_groups.items():
|
|
||||||
if len(indices) < 3:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Collect baseline points: (x_center, y_bottom) for each word
|
|
||||||
points = []
|
|
||||||
for idx in indices:
|
|
||||||
x_center = (data["left"][idx] + data["width"][idx] / 2.0) * inv_scale
|
|
||||||
y_bottom = (data["top"][idx] + data["height"][idx]) * inv_scale
|
|
||||||
points.append((x_center, y_bottom))
|
|
||||||
|
|
||||||
points.sort(key=lambda p: p[0])
|
|
||||||
xs_line = np.array([p[0] for p in points])
|
|
||||||
ys_line = np.array([p[1] for p in points])
|
|
||||||
|
|
||||||
if len(xs_line) < 3:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Fit 2nd degree: y = a*x^2 + b*x + c
|
|
||||||
try:
|
|
||||||
coeffs = np.polyfit(xs_line, ys_line, 2)
|
|
||||||
except (np.linalg.LinAlgError, ValueError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
fitted = np.polyval(coeffs, xs_line)
|
|
||||||
straight = np.polyval(np.polyfit(xs_line, ys_line, 1), xs_line)
|
|
||||||
curvature = float(np.max(np.abs(fitted - straight)))
|
|
||||||
|
|
||||||
y_center = float(np.mean(ys_line))
|
|
||||||
line_curvatures.append((y_center, curvature, coeffs, xs_line, ys_line))
|
|
||||||
|
|
||||||
if len(line_curvatures) < 3:
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Average curvature
|
|
||||||
avg_curvature = float(np.mean([c[1] for c in line_curvatures]))
|
|
||||||
|
|
||||||
if avg_curvature < 1.5:
|
|
||||||
result["confidence"] = 0.3
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Build displacement map from line baselines
|
|
||||||
# For each line, compute the vertical offset needed to straighten
|
|
||||||
displacement_map = np.zeros((h, w), dtype=np.float32)
|
|
||||||
|
|
||||||
for y_center, curvature, coeffs, xs_line, ys_line in line_curvatures:
|
|
||||||
# The displacement is the difference between curved and straight baseline
|
|
||||||
x_range = np.arange(w, dtype=np.float64)
|
|
||||||
fitted_y = np.polyval(coeffs, x_range)
|
|
||||||
straight_y = np.polyval(np.polyfit(xs_line, ys_line, 1), x_range)
|
|
||||||
dy = fitted_y - straight_y
|
|
||||||
|
|
||||||
# Convert vertical curvature to horizontal displacement estimate
|
|
||||||
# (curvature bends text → horizontal shift proportional to curvature)
|
|
||||||
# Use the vertical curvature as proxy for horizontal distortion
|
|
||||||
y_int = int(y_center)
|
|
||||||
spread = max(int(h / len(line_curvatures) / 2), 20)
|
|
||||||
y_start = max(0, y_int - spread)
|
|
||||||
y_end = min(h, y_int + spread)
|
|
||||||
|
|
||||||
for y in range(y_start, y_end):
|
|
||||||
weight = 1.0 - abs(y - y_int) / spread
|
|
||||||
displacement_map[y, :] += (dy * weight).astype(np.float32)
|
|
||||||
|
|
||||||
# Normalize: the displacement map represents vertical shifts
|
|
||||||
# Convert to horizontal displacement (since curvature typically shifts columns)
|
|
||||||
# Use the sign of the 2nd-degree coefficient averaged across lines
|
|
||||||
avg_a = float(np.mean([c[2][0] for c in line_curvatures]))
|
|
||||||
if abs(avg_a) > 0:
|
|
||||||
# Scale displacement map to represent horizontal pixel shifts
|
|
||||||
max_disp = np.max(np.abs(displacement_map))
|
|
||||||
if max_disp > 0:
|
|
||||||
displacement_map = displacement_map * (avg_curvature / max_disp)
|
|
||||||
|
|
||||||
confidence = min(1.0, len(line_curvatures) / 10.0) * 0.8
|
|
||||||
result["curvature_px"] = round(avg_curvature, 2)
|
|
||||||
result["confidence"] = round(float(confidence), 2)
|
|
||||||
result["displacement_map"] = displacement_map
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def _apply_displacement_map(img: np.ndarray, displacement_map: np.ndarray,
|
|
||||||
scale: float = 1.0) -> np.ndarray:
|
|
||||||
"""Apply a horizontal displacement map to an image using cv2.remap().
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
img: BGR image.
|
img: BGR image.
|
||||||
displacement_map: Float32 array (h, w) of horizontal pixel shifts.
|
shear_degrees: Shear angle in degrees. Positive = shift top-right/bottom-left.
|
||||||
scale: Multiplier for the displacement (-3.0 to +3.0).
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Corrected image.
|
Corrected image.
|
||||||
"""
|
"""
|
||||||
|
import math
|
||||||
h, w = img.shape[:2]
|
h, w = img.shape[:2]
|
||||||
|
shear_tan = math.tan(math.radians(shear_degrees))
|
||||||
|
|
||||||
# Base coordinate grids
|
# Affine matrix: shift x by shear_tan * (y - h/2)
|
||||||
map_x = np.tile(np.arange(w, dtype=np.float32), (h, 1))
|
# [1 shear_tan -h/2*shear_tan]
|
||||||
map_y = np.tile(np.arange(h, dtype=np.float32).reshape(-1, 1), (1, w))
|
# [0 1 0 ]
|
||||||
|
M = np.float32([
|
||||||
|
[1, shear_tan, -h / 2.0 * shear_tan],
|
||||||
|
[0, 1, 0],
|
||||||
|
])
|
||||||
|
|
||||||
# Apply scaled displacement
|
corrected = cv2.warpAffine(img, M, (w, h),
|
||||||
map_x = map_x + displacement_map * scale
|
flags=cv2.INTER_LINEAR,
|
||||||
|
borderMode=cv2.BORDER_REPLICATE)
|
||||||
# Remap
|
|
||||||
corrected = cv2.remap(img, map_x, map_y,
|
|
||||||
interpolation=cv2.INTER_LINEAR,
|
|
||||||
borderMode=cv2.BORDER_REPLICATE)
|
|
||||||
return corrected
|
return corrected
|
||||||
|
|
||||||
|
|
||||||
def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]:
|
def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]:
|
||||||
"""Correct book curvature distortion using the best of two methods.
|
"""Correct vertical shear after deskew.
|
||||||
|
|
||||||
Method A: Vertical edge analysis — detects curvature of the strongest
|
After deskew aligns horizontal text lines, vertical features (column
|
||||||
vertical text edge (left column margin).
|
edges) may still be tilted. This detects the tilt angle of the strongest
|
||||||
|
vertical edge and applies an affine shear correction.
|
||||||
Method B: Text baseline analysis — uses Tesseract word positions to
|
|
||||||
measure baseline curvature across text lines.
|
|
||||||
|
|
||||||
The method with higher confidence wins. Returns the corrected image
|
|
||||||
and a DewarpInfo dict for the API.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
img: BGR image (already deskewed).
|
img: BGR image (already deskewed).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (corrected_image, dewarp_info).
|
Tuple of (corrected_image, dewarp_info).
|
||||||
dewarp_info keys: method, curvature_px, confidence, displacement_map.
|
dewarp_info keys: method, shear_degrees, confidence.
|
||||||
"""
|
"""
|
||||||
no_correction = {
|
no_correction = {
|
||||||
"method": "none",
|
"method": "none",
|
||||||
"curvature_px": 0.0,
|
"shear_degrees": 0.0,
|
||||||
"confidence": 0.0,
|
"confidence": 0.0,
|
||||||
"displacement_map": None,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if not CV2_AVAILABLE:
|
if not CV2_AVAILABLE:
|
||||||
@@ -615,68 +465,44 @@ def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]:
|
|||||||
|
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
|
||||||
# Run both methods
|
detection = _detect_shear_angle(img)
|
||||||
result_a = _dewarp_by_vertical_edges(img)
|
|
||||||
result_b = _dewarp_by_text_baseline(img)
|
|
||||||
|
|
||||||
duration = time.time() - t0
|
duration = time.time() - t0
|
||||||
|
|
||||||
logger.info(f"dewarp: vertical_edge conf={result_a['confidence']:.2f} "
|
shear_deg = detection["shear_degrees"]
|
||||||
f"curv={result_a['curvature_px']:.1f}px | "
|
confidence = detection["confidence"]
|
||||||
f"text_baseline conf={result_b['confidence']:.2f} "
|
|
||||||
f"curv={result_b['curvature_px']:.1f}px "
|
|
||||||
f"({duration:.2f}s)")
|
|
||||||
|
|
||||||
# Pick best method: prefer significant curvature over high confidence
|
logger.info(f"dewarp: detected shear={shear_deg:.3f}° "
|
||||||
# If one method found real curvature (>5px) and the other didn't (<3px),
|
f"conf={confidence:.2f} ({duration:.2f}s)")
|
||||||
# prefer the one with real curvature regardless of confidence.
|
|
||||||
a_has_curvature = result_a["curvature_px"] >= 5.0 and result_a["displacement_map"] is not None
|
|
||||||
b_has_curvature = result_b["curvature_px"] >= 5.0 and result_b["displacement_map"] is not None
|
|
||||||
|
|
||||||
if a_has_curvature and not b_has_curvature:
|
# Only correct if shear is significant (> 0.05°)
|
||||||
best = result_a
|
if abs(shear_deg) < 0.05 or confidence < 0.3:
|
||||||
elif b_has_curvature and not a_has_curvature:
|
|
||||||
best = result_b
|
|
||||||
elif result_a["confidence"] >= result_b["confidence"]:
|
|
||||||
best = result_a
|
|
||||||
else:
|
|
||||||
best = result_b
|
|
||||||
|
|
||||||
logger.info(f"dewarp: selected {best['method']} "
|
|
||||||
f"(curv={best['curvature_px']:.1f}px, conf={best['confidence']:.2f})")
|
|
||||||
|
|
||||||
if best["displacement_map"] is None or best["curvature_px"] < 2.0:
|
|
||||||
return img, no_correction
|
return img, no_correction
|
||||||
|
|
||||||
# Apply correction
|
# Apply correction (negate the detected shear to straighten)
|
||||||
corrected = _apply_displacement_map(img, best["displacement_map"], scale=1.0)
|
corrected = _apply_shear(img, -shear_deg)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
"method": best["method"],
|
"method": detection["method"],
|
||||||
"curvature_px": best["curvature_px"],
|
"shear_degrees": shear_deg,
|
||||||
"confidence": best["confidence"],
|
"confidence": confidence,
|
||||||
"displacement_map": best["displacement_map"],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return corrected, info
|
return corrected, info
|
||||||
|
|
||||||
|
|
||||||
def dewarp_image_manual(img: np.ndarray, displacement_map: np.ndarray,
|
def dewarp_image_manual(img: np.ndarray, shear_degrees: float) -> np.ndarray:
|
||||||
scale: float) -> np.ndarray:
|
"""Apply shear correction with a manual angle.
|
||||||
"""Apply dewarp with manual scale adjustment.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
img: BGR image (deskewed, before dewarp).
|
img: BGR image (deskewed, before dewarp).
|
||||||
displacement_map: The displacement map from auto-dewarp.
|
shear_degrees: Shear angle in degrees to correct.
|
||||||
scale: Fraction of auto-detected correction (0.0 = none, 1.0 = auto, 2.0 = double).
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Corrected image.
|
Corrected image.
|
||||||
"""
|
"""
|
||||||
scale = max(0.0, min(2.0, scale))
|
if abs(shear_degrees) < 0.001:
|
||||||
if scale < 0.01:
|
|
||||||
return img
|
return img
|
||||||
return _apply_displacement_map(img, displacement_map, scale=scale)
|
return _apply_shear(img, -shear_degrees)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -81,12 +81,12 @@ class DeskewGroundTruthRequest(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class ManualDewarpRequest(BaseModel):
|
class ManualDewarpRequest(BaseModel):
|
||||||
scale: float
|
shear_degrees: float
|
||||||
|
|
||||||
|
|
||||||
class DewarpGroundTruthRequest(BaseModel):
|
class DewarpGroundTruthRequest(BaseModel):
|
||||||
is_correct: bool
|
is_correct: bool
|
||||||
corrected_scale: Optional[float] = None
|
corrected_shear: Optional[float] = None
|
||||||
notes: Optional[str] = None
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
@@ -132,7 +132,7 @@ async def create_session(file: UploadFile = File(...)):
|
|||||||
"dewarped_bgr": None,
|
"dewarped_bgr": None,
|
||||||
"dewarped_png": None,
|
"dewarped_png": None,
|
||||||
"dewarp_result": None,
|
"dewarp_result": None,
|
||||||
"displacement_map": None,
|
"auto_shear_degrees": None,
|
||||||
"ground_truth": {},
|
"ground_truth": {},
|
||||||
"current_step": 1,
|
"current_step": 1,
|
||||||
}
|
}
|
||||||
@@ -352,7 +352,7 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
|
|||||||
|
|
||||||
@router.post("/sessions/{session_id}/dewarp")
|
@router.post("/sessions/{session_id}/dewarp")
|
||||||
async def auto_dewarp(session_id: str):
|
async def auto_dewarp(session_id: str):
|
||||||
"""Run both dewarp methods on the deskewed image and pick the best."""
|
"""Detect and correct vertical shear on the deskewed image."""
|
||||||
session = _get_session(session_id)
|
session = _get_session(session_id)
|
||||||
deskewed_bgr = session.get("deskewed_bgr")
|
deskewed_bgr = session.get("deskewed_bgr")
|
||||||
if deskewed_bgr is None:
|
if deskewed_bgr is None:
|
||||||
@@ -368,22 +368,22 @@ async def auto_dewarp(session_id: str):
|
|||||||
|
|
||||||
session["dewarped_bgr"] = dewarped_bgr
|
session["dewarped_bgr"] = dewarped_bgr
|
||||||
session["dewarped_png"] = dewarped_png
|
session["dewarped_png"] = dewarped_png
|
||||||
|
session["auto_shear_degrees"] = dewarp_info.get("shear_degrees", 0.0)
|
||||||
session["dewarp_result"] = {
|
session["dewarp_result"] = {
|
||||||
"method_used": dewarp_info["method"],
|
"method_used": dewarp_info["method"],
|
||||||
"curvature_px": dewarp_info["curvature_px"],
|
"shear_degrees": dewarp_info["shear_degrees"],
|
||||||
"confidence": dewarp_info["confidence"],
|
"confidence": dewarp_info["confidence"],
|
||||||
"duration_seconds": round(duration, 2),
|
"duration_seconds": round(duration, 2),
|
||||||
}
|
}
|
||||||
session["displacement_map"] = dewarp_info.get("displacement_map")
|
|
||||||
|
|
||||||
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
|
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
|
||||||
f"method={dewarp_info['method']} curvature={dewarp_info['curvature_px']:.1f}px "
|
f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f}° "
|
||||||
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
|
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
"method_used": dewarp_info["method"],
|
"method_used": dewarp_info["method"],
|
||||||
"curvature_px": dewarp_info["curvature_px"],
|
"shear_degrees": dewarp_info["shear_degrees"],
|
||||||
"confidence": dewarp_info["confidence"],
|
"confidence": dewarp_info["confidence"],
|
||||||
"duration_seconds": round(duration, 2),
|
"duration_seconds": round(duration, 2),
|
||||||
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||||
@@ -392,21 +392,19 @@ async def auto_dewarp(session_id: str):
|
|||||||
|
|
||||||
@router.post("/sessions/{session_id}/dewarp/manual")
|
@router.post("/sessions/{session_id}/dewarp/manual")
|
||||||
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
||||||
"""Apply dewarp with a manually scaled displacement map."""
|
"""Apply shear correction with a manual angle."""
|
||||||
session = _get_session(session_id)
|
session = _get_session(session_id)
|
||||||
deskewed_bgr = session.get("deskewed_bgr")
|
deskewed_bgr = session.get("deskewed_bgr")
|
||||||
displacement_map = session.get("displacement_map")
|
|
||||||
|
|
||||||
if deskewed_bgr is None:
|
if deskewed_bgr is None:
|
||||||
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||||
|
|
||||||
scale = max(0.0, min(2.0, req.scale))
|
shear_deg = max(-2.0, min(2.0, req.shear_degrees))
|
||||||
|
|
||||||
if displacement_map is None or scale < 0.01:
|
if abs(shear_deg) < 0.001:
|
||||||
# No displacement map or zero scale — use deskewed as-is
|
|
||||||
dewarped_bgr = deskewed_bgr
|
dewarped_bgr = deskewed_bgr
|
||||||
else:
|
else:
|
||||||
dewarped_bgr = dewarp_image_manual(deskewed_bgr, displacement_map, scale)
|
dewarped_bgr = dewarp_image_manual(deskewed_bgr, shear_deg)
|
||||||
|
|
||||||
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||||
dewarped_png = png_buf.tobytes() if success else session.get("deskewed_png")
|
dewarped_png = png_buf.tobytes() if success else session.get("deskewed_png")
|
||||||
@@ -416,14 +414,14 @@ async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
|||||||
session["dewarp_result"] = {
|
session["dewarp_result"] = {
|
||||||
**(session.get("dewarp_result") or {}),
|
**(session.get("dewarp_result") or {}),
|
||||||
"method_used": "manual",
|
"method_used": "manual",
|
||||||
"scale_applied": round(scale, 2),
|
"shear_degrees": round(shear_deg, 3),
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: scale={scale:.2f}")
|
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}°")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
"scale_applied": round(scale, 2),
|
"shear_degrees": round(shear_deg, 3),
|
||||||
"method_used": "manual",
|
"method_used": "manual",
|
||||||
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||||
}
|
}
|
||||||
@@ -436,7 +434,7 @@ async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthReques
|
|||||||
|
|
||||||
gt = {
|
gt = {
|
||||||
"is_correct": req.is_correct,
|
"is_correct": req.is_correct,
|
||||||
"corrected_scale": req.corrected_scale,
|
"corrected_shear": req.corrected_shear,
|
||||||
"notes": req.notes,
|
"notes": req.notes,
|
||||||
"saved_at": datetime.utcnow().isoformat(),
|
"saved_at": datetime.utcnow().isoformat(),
|
||||||
"dewarp_result": session.get("dewarp_result"),
|
"dewarp_result": session.get("dewarp_result"),
|
||||||
@@ -444,6 +442,6 @@ async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthReques
|
|||||||
session["ground_truth"]["dewarp"] = gt
|
session["ground_truth"]["dewarp"] = gt
|
||||||
|
|
||||||
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
|
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
|
||||||
f"correct={req.is_correct}, corrected_scale={req.corrected_scale}")
|
f"correct={req.is_correct}, corrected_shear={req.corrected_shear}")
|
||||||
|
|
||||||
return {"session_id": session_id, "ground_truth": gt}
|
return {"session_id": session_id, "ground_truth": gt}
|
||||||
|
|||||||
Reference in New Issue
Block a user