refactor(dewarp): replace displacement map with affine shear correction

The old displacement-map approach shifted entire rows by a parabolic
profile, creating a circle/barrel distortion. The actual problem is
a linear vertical shear: after deskew aligns horizontal lines, the
vertical column edges are still tilted by ~0.5°.

New approach:
- Detect shear angle from strongest vertical edge slope (not curvature)
- Apply cv2.warpAffine shear to straighten vertical features
- Manual slider: -2.0° to +2.0° in 0.05° steps
- Slider initializes to auto-detected shear angle
- Ground truth question: "Spalten vertikal ausgerichtet?"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-26 18:23:04 +01:00
parent ff2bb79a91
commit 09b820efbe
5 changed files with 109 additions and 279 deletions

View File

@@ -37,17 +37,16 @@ export interface DeskewGroundTruth {
export interface DewarpResult { export interface DewarpResult {
session_id: string session_id: string
method_used: 'vertical_edge' | 'text_baseline' | 'manual' | 'none' method_used: 'vertical_edge' | 'manual' | 'none'
curvature_px: number shear_degrees: number
confidence: number confidence: number
duration_seconds: number duration_seconds: number
dewarped_image_url: string dewarped_image_url: string
scale_applied?: number
} }
export interface DewarpGroundTruth { export interface DewarpGroundTruth {
is_correct: boolean is_correct: boolean
corrected_scale?: number corrected_shear?: number
notes?: string notes?: string
} }

View File

@@ -1,13 +1,13 @@
'use client' 'use client'
import { useState } from 'react' import { useEffect, useState } from 'react'
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types' import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
interface DewarpControlsProps { interface DewarpControlsProps {
dewarpResult: DewarpResult | null dewarpResult: DewarpResult | null
showGrid: boolean showGrid: boolean
onToggleGrid: () => void onToggleGrid: () => void
onManualDewarp: (scale: number) => void onManualDewarp: (shearDegrees: number) => void
onGroundTruth: (gt: DewarpGroundTruth) => void onGroundTruth: (gt: DewarpGroundTruth) => void
onNext: () => void onNext: () => void
isApplying: boolean isApplying: boolean
@@ -15,7 +15,6 @@ interface DewarpControlsProps {
const METHOD_LABELS: Record<string, string> = { const METHOD_LABELS: Record<string, string> = {
vertical_edge: 'Vertikale Kanten', vertical_edge: 'Vertikale Kanten',
text_baseline: 'Textzeilen-Baseline',
manual: 'Manuell', manual: 'Manuell',
none: 'Keine Korrektur', none: 'Keine Korrektur',
} }
@@ -29,11 +28,18 @@ export function DewarpControls({
onNext, onNext,
isApplying, isApplying,
}: DewarpControlsProps) { }: DewarpControlsProps) {
const [manualScale, setManualScale] = useState(100) const [manualShear, setManualShear] = useState(0)
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null) const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
const [gtNotes, setGtNotes] = useState('') const [gtNotes, setGtNotes] = useState('')
const [gtSaved, setGtSaved] = useState(false) const [gtSaved, setGtSaved] = useState(false)
// Initialize slider to auto-detected value when result arrives
useEffect(() => {
if (dewarpResult && dewarpResult.shear_degrees !== undefined) {
setManualShear(dewarpResult.shear_degrees)
}
}, [dewarpResult?.shear_degrees])
const handleGroundTruth = (isCorrect: boolean) => { const handleGroundTruth = (isCorrect: boolean) => {
setGtFeedback(isCorrect ? 'correct' : 'incorrect') setGtFeedback(isCorrect ? 'correct' : 'incorrect')
if (isCorrect) { if (isCorrect) {
@@ -45,7 +51,7 @@ export function DewarpControls({
const handleGroundTruthIncorrect = () => { const handleGroundTruthIncorrect = () => {
onGroundTruth({ onGroundTruth({
is_correct: false, is_correct: false,
corrected_scale: manualScale !== 0 ? manualScale : undefined, corrected_shear: manualShear !== 0 ? manualShear : undefined,
notes: gtNotes || undefined, notes: gtNotes || undefined,
}) })
setGtSaved(true) setGtSaved(true)
@@ -58,8 +64,8 @@ export function DewarpControls({
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4"> <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="flex flex-wrap items-center gap-3 text-sm"> <div className="flex flex-wrap items-center gap-3 text-sm">
<div> <div>
<span className="text-gray-500">Kruemmung:</span>{' '} <span className="text-gray-500">Scherung:</span>{' '}
<span className="font-mono font-medium">{dewarpResult.curvature_px} px</span> <span className="font-mono font-medium">{dewarpResult.shear_degrees}°</span>
</div> </div>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" /> <div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<div> <div>
@@ -91,25 +97,25 @@ export function DewarpControls({
</div> </div>
)} )}
{/* Manual scale slider */} {/* Manual shear angle slider */}
{dewarpResult && ( {dewarpResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4"> <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturstaerke</div> <div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Scherwinkel (manuell)</div>
<div className="flex items-center gap-3"> <div className="flex items-center gap-3">
<span className="text-xs text-gray-400 w-8 text-right">0%</span> <span className="text-xs text-gray-400 w-10 text-right">-2.0°</span>
<input <input
type="range" type="range"
min={0} min={-200}
max={200} max={200}
step={5} step={5}
value={manualScale} value={Math.round(manualShear * 100)}
onChange={(e) => setManualScale(parseInt(e.target.value))} onChange={(e) => setManualShear(parseInt(e.target.value) / 100)}
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500" className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
/> />
<span className="text-xs text-gray-400 w-10">200%</span> <span className="text-xs text-gray-400 w-10">+2.0°</span>
<span className="font-mono text-sm w-14 text-right">{manualScale}%</span> <span className="font-mono text-sm w-16 text-right">{manualShear.toFixed(2)}°</span>
<button <button
onClick={() => onManualDewarp(manualScale / 100)} onClick={() => onManualDewarp(manualShear)}
disabled={isApplying} disabled={isApplying}
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors" className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
> >
@@ -117,7 +123,7 @@ export function DewarpControls({
</button> </button>
</div> </div>
<p className="text-xs text-gray-400 mt-1"> <p className="text-xs text-gray-400 mt-1">
100% = automatisch erkannte Korrektur, 0% = keine, 200% = doppelt so stark Scherung der vertikalen Achse in Grad. Positiv = Spalten nach rechts kippen, negativ = nach links.
</p> </p>
</div> </div>
)} )}
@@ -126,8 +132,9 @@ export function DewarpControls({
{dewarpResult && ( {dewarpResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4"> <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2"> <div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
Korrekt entzerrt? Spalten vertikal ausgerichtet?
</div> </div>
<p className="text-xs text-gray-400 mb-2">Pruefen ob die Spaltenraender jetzt senkrecht zum Raster stehen.</p>
{!gtSaved ? ( {!gtSaved ? (
<div className="space-y-3"> <div className="space-y-3">
<div className="flex gap-2"> <div className="flex gap-2">

View File

@@ -47,7 +47,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
runDewarp() runDewarp()
}, [sessionId, dewarpResult]) }, [sessionId, dewarpResult])
const handleManualDewarp = useCallback(async (scale: number) => { const handleManualDewarp = useCallback(async (shearDegrees: number) => {
if (!sessionId) return if (!sessionId) return
setApplying(true) setApplying(true)
setError(null) setError(null)
@@ -56,7 +56,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, { const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ scale }), body: JSON.stringify({ shear_degrees: shearDegrees }),
}) })
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen') if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
@@ -66,7 +66,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
? { ? {
...prev, ...prev,
method_used: data.method_used, method_used: data.method_used,
scale_applied: data.scale_applied, shear_degrees: data.shear_degrees,
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`, dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
} }
: null, : null,

View File

@@ -318,18 +318,21 @@ def deskew_image_by_word_alignment(
# Stage 3: Dewarp (Book Curvature Correction) # Stage 3: Dewarp (Book Curvature Correction)
# ============================================================================= # =============================================================================
def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]: def _detect_shear_angle(img: np.ndarray) -> Dict[str, Any]:
"""Method A: Detect curvature from strongest vertical text edges. """Detect the vertical shear angle of the page.
Splits image into horizontal strips, finds the dominant vertical edge After deskew (horizontal lines aligned), vertical features like column
X-position per strip, fits a 2nd-degree polynomial, and generates a edges may still be tilted. This measures that tilt by tracking the
displacement map if curvature exceeds threshold. strongest vertical edge across horizontal strips.
The result is a shear angle in degrees: the angular difference between
true vertical and the detected column edge.
Returns: Returns:
Dict with keys: method, curvature_px, confidence, displacement_map (or None). Dict with keys: method, shear_degrees, confidence.
""" """
h, w = img.shape[:2] h, w = img.shape[:2]
result = {"method": "vertical_edge", "curvature_px": 0.0, "confidence": 0.0, "displacement_map": None} result = {"method": "vertical_edge", "shear_degrees": 0.0, "confidence": 0.0}
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -354,7 +357,7 @@ def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
if projection.max() == 0: if projection.max() == 0:
continue continue
# Find the strongest vertical edge in left 40% of image (left margin area) # Find the strongest vertical edge in left 40% of image
search_w = int(w * 0.4) search_w = int(w * 0.4)
left_proj = projection[:search_w] left_proj = projection[:search_w]
if left_proj.max() == 0: if left_proj.max() == 0:
@@ -385,229 +388,76 @@ def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
if len(ys) < 6: if len(ys) < 6:
return result return result
# Fit 2nd degree polynomial: x = a*y^2 + b*y + c # Fit straight line: x = slope * y + intercept
coeffs = np.polyfit(ys, xs, 2) # The slope tells us the tilt of the vertical edge
fitted = np.polyval(coeffs, ys) straight_coeffs = np.polyfit(ys, xs, 1)
slope = straight_coeffs[0] # dx/dy in pixels
fitted = np.polyval(straight_coeffs, ys)
residuals = xs - fitted residuals = xs - fitted
rmse = float(np.sqrt(np.mean(residuals ** 2))) rmse = float(np.sqrt(np.mean(residuals ** 2)))
# Measure curvature: max deviation from straight line # Convert slope to angle: arctan(dx/dy) in degrees
straight_coeffs = np.polyfit(ys, xs, 1) import math
straight_fitted = np.polyval(straight_coeffs, ys) shear_degrees = math.degrees(math.atan(slope))
curvature_px = float(np.max(np.abs(fitted - straight_fitted)))
if curvature_px < 2.0:
result["confidence"] = 0.3
return result
# Generate displacement map
y_coords = np.arange(h)
all_fitted = np.polyval(coeffs, y_coords)
all_straight = np.polyval(straight_coeffs, y_coords)
dx_per_row = all_fitted - all_straight # displacement per row
# Create full displacement map: each pixel shifts horizontally by dx_per_row[y]
displacement_map = np.zeros((h, w), dtype=np.float32)
for y in range(h):
displacement_map[y, :] = -dx_per_row[y]
confidence = min(1.0, len(ys) / 15.0) * max(0.5, 1.0 - rmse / 5.0) confidence = min(1.0, len(ys) / 15.0) * max(0.5, 1.0 - rmse / 5.0)
result["curvature_px"] = round(curvature_px, 2) result["shear_degrees"] = round(shear_degrees, 3)
result["confidence"] = round(float(confidence), 2) result["confidence"] = round(float(confidence), 2)
result["displacement_map"] = displacement_map
return result return result
def _dewarp_by_text_baseline(img: np.ndarray) -> Dict[str, Any]: def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
"""Method B: Detect curvature from Tesseract text baseline positions. """Apply a vertical shear correction to an image.
Uses a quick Tesseract pass on a downscaled image, groups words into lines, Shifts each row horizontally proportional to its distance from the
measures baseline curvature per line, and aggregates into a displacement map. vertical center. This corrects the tilt of vertical features (columns)
without affecting horizontal alignment (text lines).
Returns:
Dict with keys: method, curvature_px, confidence, displacement_map (or None).
"""
h, w = img.shape[:2]
result = {"method": "text_baseline", "curvature_px": 0.0, "confidence": 0.0, "displacement_map": None}
if not TESSERACT_AVAILABLE:
return result
# Downscale for speed
max_dim = 1500
scale_factor = min(1.0, max_dim / max(h, w))
if scale_factor < 1.0:
small = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)), interpolation=cv2.INTER_AREA)
else:
small = img
scale_factor = 1.0
pil_img = Image.fromarray(cv2.cvtColor(small, cv2.COLOR_BGR2RGB))
try:
data = pytesseract.image_to_data(
pil_img, lang="eng+deu", config="--psm 6 --oem 3",
output_type=pytesseract.Output.DICT,
)
except Exception as e:
logger.warning(f"dewarp text_baseline: Tesseract failed: {e}")
return result
# Group words by line
from collections import defaultdict
line_groups: Dict[tuple, list] = defaultdict(list)
for i in range(len(data["text"])):
text = (data["text"][i] or "").strip()
conf = int(data["conf"][i])
if not text or conf < 20:
continue
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
line_groups[key].append(i)
if len(line_groups) < 5:
return result
inv_scale = 1.0 / scale_factor
# For each line with enough words, measure baseline curvature
line_curvatures = [] # (y_center, curvature_px)
all_baselines = [] # (y_center, dx_offset) for displacement map
for key, indices in line_groups.items():
if len(indices) < 3:
continue
# Collect baseline points: (x_center, y_bottom) for each word
points = []
for idx in indices:
x_center = (data["left"][idx] + data["width"][idx] / 2.0) * inv_scale
y_bottom = (data["top"][idx] + data["height"][idx]) * inv_scale
points.append((x_center, y_bottom))
points.sort(key=lambda p: p[0])
xs_line = np.array([p[0] for p in points])
ys_line = np.array([p[1] for p in points])
if len(xs_line) < 3:
continue
# Fit 2nd degree: y = a*x^2 + b*x + c
try:
coeffs = np.polyfit(xs_line, ys_line, 2)
except (np.linalg.LinAlgError, ValueError):
continue
fitted = np.polyval(coeffs, xs_line)
straight = np.polyval(np.polyfit(xs_line, ys_line, 1), xs_line)
curvature = float(np.max(np.abs(fitted - straight)))
y_center = float(np.mean(ys_line))
line_curvatures.append((y_center, curvature, coeffs, xs_line, ys_line))
if len(line_curvatures) < 3:
return result
# Average curvature
avg_curvature = float(np.mean([c[1] for c in line_curvatures]))
if avg_curvature < 1.5:
result["confidence"] = 0.3
return result
# Build displacement map from line baselines
# For each line, compute the vertical offset needed to straighten
displacement_map = np.zeros((h, w), dtype=np.float32)
for y_center, curvature, coeffs, xs_line, ys_line in line_curvatures:
# The displacement is the difference between curved and straight baseline
x_range = np.arange(w, dtype=np.float64)
fitted_y = np.polyval(coeffs, x_range)
straight_y = np.polyval(np.polyfit(xs_line, ys_line, 1), x_range)
dy = fitted_y - straight_y
# Convert vertical curvature to horizontal displacement estimate
# (curvature bends text → horizontal shift proportional to curvature)
# Use the vertical curvature as proxy for horizontal distortion
y_int = int(y_center)
spread = max(int(h / len(line_curvatures) / 2), 20)
y_start = max(0, y_int - spread)
y_end = min(h, y_int + spread)
for y in range(y_start, y_end):
weight = 1.0 - abs(y - y_int) / spread
displacement_map[y, :] += (dy * weight).astype(np.float32)
# Normalize: the displacement map represents vertical shifts
# Convert to horizontal displacement (since curvature typically shifts columns)
# Use the sign of the 2nd-degree coefficient averaged across lines
avg_a = float(np.mean([c[2][0] for c in line_curvatures]))
if abs(avg_a) > 0:
# Scale displacement map to represent horizontal pixel shifts
max_disp = np.max(np.abs(displacement_map))
if max_disp > 0:
displacement_map = displacement_map * (avg_curvature / max_disp)
confidence = min(1.0, len(line_curvatures) / 10.0) * 0.8
result["curvature_px"] = round(avg_curvature, 2)
result["confidence"] = round(float(confidence), 2)
result["displacement_map"] = displacement_map
return result
def _apply_displacement_map(img: np.ndarray, displacement_map: np.ndarray,
scale: float = 1.0) -> np.ndarray:
"""Apply a horizontal displacement map to an image using cv2.remap().
Args: Args:
img: BGR image. img: BGR image.
displacement_map: Float32 array (h, w) of horizontal pixel shifts. shear_degrees: Shear angle in degrees. Positive = shift top-right/bottom-left.
scale: Multiplier for the displacement (-3.0 to +3.0).
Returns: Returns:
Corrected image. Corrected image.
""" """
import math
h, w = img.shape[:2] h, w = img.shape[:2]
shear_tan = math.tan(math.radians(shear_degrees))
# Base coordinate grids # Affine matrix: shift x by shear_tan * (y - h/2)
map_x = np.tile(np.arange(w, dtype=np.float32), (h, 1)) # [1 shear_tan -h/2*shear_tan]
map_y = np.tile(np.arange(h, dtype=np.float32).reshape(-1, 1), (1, w)) # [0 1 0 ]
M = np.float32([
[1, shear_tan, -h / 2.0 * shear_tan],
[0, 1, 0],
])
# Apply scaled displacement corrected = cv2.warpAffine(img, M, (w, h),
map_x = map_x + displacement_map * scale flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE)
# Remap
corrected = cv2.remap(img, map_x, map_y,
interpolation=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE)
return corrected return corrected
def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]: def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]:
"""Correct book curvature distortion using the best of two methods. """Correct vertical shear after deskew.
Method A: Vertical edge analysis — detects curvature of the strongest After deskew aligns horizontal text lines, vertical features (column
vertical text edge (left column margin). edges) may still be tilted. This detects the tilt angle of the strongest
vertical edge and applies an affine shear correction.
Method B: Text baseline analysis — uses Tesseract word positions to
measure baseline curvature across text lines.
The method with higher confidence wins. Returns the corrected image
and a DewarpInfo dict for the API.
Args: Args:
img: BGR image (already deskewed). img: BGR image (already deskewed).
Returns: Returns:
Tuple of (corrected_image, dewarp_info). Tuple of (corrected_image, dewarp_info).
dewarp_info keys: method, curvature_px, confidence, displacement_map. dewarp_info keys: method, shear_degrees, confidence.
""" """
no_correction = { no_correction = {
"method": "none", "method": "none",
"curvature_px": 0.0, "shear_degrees": 0.0,
"confidence": 0.0, "confidence": 0.0,
"displacement_map": None,
} }
if not CV2_AVAILABLE: if not CV2_AVAILABLE:
@@ -615,68 +465,44 @@ def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]:
t0 = time.time() t0 = time.time()
# Run both methods detection = _detect_shear_angle(img)
result_a = _dewarp_by_vertical_edges(img)
result_b = _dewarp_by_text_baseline(img)
duration = time.time() - t0 duration = time.time() - t0
logger.info(f"dewarp: vertical_edge conf={result_a['confidence']:.2f} " shear_deg = detection["shear_degrees"]
f"curv={result_a['curvature_px']:.1f}px | " confidence = detection["confidence"]
f"text_baseline conf={result_b['confidence']:.2f} "
f"curv={result_b['curvature_px']:.1f}px "
f"({duration:.2f}s)")
# Pick best method: prefer significant curvature over high confidence logger.info(f"dewarp: detected shear={shear_deg:.3f}° "
# If one method found real curvature (>5px) and the other didn't (<3px), f"conf={confidence:.2f} ({duration:.2f}s)")
# prefer the one with real curvature regardless of confidence.
a_has_curvature = result_a["curvature_px"] >= 5.0 and result_a["displacement_map"] is not None
b_has_curvature = result_b["curvature_px"] >= 5.0 and result_b["displacement_map"] is not None
if a_has_curvature and not b_has_curvature: # Only correct if shear is significant (> 0.05°)
best = result_a if abs(shear_deg) < 0.05 or confidence < 0.3:
elif b_has_curvature and not a_has_curvature:
best = result_b
elif result_a["confidence"] >= result_b["confidence"]:
best = result_a
else:
best = result_b
logger.info(f"dewarp: selected {best['method']} "
f"(curv={best['curvature_px']:.1f}px, conf={best['confidence']:.2f})")
if best["displacement_map"] is None or best["curvature_px"] < 2.0:
return img, no_correction return img, no_correction
# Apply correction # Apply correction (negate the detected shear to straighten)
corrected = _apply_displacement_map(img, best["displacement_map"], scale=1.0) corrected = _apply_shear(img, -shear_deg)
info = { info = {
"method": best["method"], "method": detection["method"],
"curvature_px": best["curvature_px"], "shear_degrees": shear_deg,
"confidence": best["confidence"], "confidence": confidence,
"displacement_map": best["displacement_map"],
} }
return corrected, info return corrected, info
def dewarp_image_manual(img: np.ndarray, displacement_map: np.ndarray, def dewarp_image_manual(img: np.ndarray, shear_degrees: float) -> np.ndarray:
scale: float) -> np.ndarray: """Apply shear correction with a manual angle.
"""Apply dewarp with manual scale adjustment.
Args: Args:
img: BGR image (deskewed, before dewarp). img: BGR image (deskewed, before dewarp).
displacement_map: The displacement map from auto-dewarp. shear_degrees: Shear angle in degrees to correct.
scale: Fraction of auto-detected correction (0.0 = none, 1.0 = auto, 2.0 = double).
Returns: Returns:
Corrected image. Corrected image.
""" """
scale = max(0.0, min(2.0, scale)) if abs(shear_degrees) < 0.001:
if scale < 0.01:
return img return img
return _apply_displacement_map(img, displacement_map, scale=scale) return _apply_shear(img, -shear_degrees)
# ============================================================================= # =============================================================================

View File

@@ -81,12 +81,12 @@ class DeskewGroundTruthRequest(BaseModel):
class ManualDewarpRequest(BaseModel): class ManualDewarpRequest(BaseModel):
scale: float shear_degrees: float
class DewarpGroundTruthRequest(BaseModel): class DewarpGroundTruthRequest(BaseModel):
is_correct: bool is_correct: bool
corrected_scale: Optional[float] = None corrected_shear: Optional[float] = None
notes: Optional[str] = None notes: Optional[str] = None
@@ -132,7 +132,7 @@ async def create_session(file: UploadFile = File(...)):
"dewarped_bgr": None, "dewarped_bgr": None,
"dewarped_png": None, "dewarped_png": None,
"dewarp_result": None, "dewarp_result": None,
"displacement_map": None, "auto_shear_degrees": None,
"ground_truth": {}, "ground_truth": {},
"current_step": 1, "current_step": 1,
} }
@@ -352,7 +352,7 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
@router.post("/sessions/{session_id}/dewarp") @router.post("/sessions/{session_id}/dewarp")
async def auto_dewarp(session_id: str): async def auto_dewarp(session_id: str):
"""Run both dewarp methods on the deskewed image and pick the best.""" """Detect and correct vertical shear on the deskewed image."""
session = _get_session(session_id) session = _get_session(session_id)
deskewed_bgr = session.get("deskewed_bgr") deskewed_bgr = session.get("deskewed_bgr")
if deskewed_bgr is None: if deskewed_bgr is None:
@@ -368,22 +368,22 @@ async def auto_dewarp(session_id: str):
session["dewarped_bgr"] = dewarped_bgr session["dewarped_bgr"] = dewarped_bgr
session["dewarped_png"] = dewarped_png session["dewarped_png"] = dewarped_png
session["auto_shear_degrees"] = dewarp_info.get("shear_degrees", 0.0)
session["dewarp_result"] = { session["dewarp_result"] = {
"method_used": dewarp_info["method"], "method_used": dewarp_info["method"],
"curvature_px": dewarp_info["curvature_px"], "shear_degrees": dewarp_info["shear_degrees"],
"confidence": dewarp_info["confidence"], "confidence": dewarp_info["confidence"],
"duration_seconds": round(duration, 2), "duration_seconds": round(duration, 2),
} }
session["displacement_map"] = dewarp_info.get("displacement_map")
logger.info(f"OCR Pipeline: dewarp session {session_id}: " logger.info(f"OCR Pipeline: dewarp session {session_id}: "
f"method={dewarp_info['method']} curvature={dewarp_info['curvature_px']:.1f}px " f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f}° "
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)") f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
return { return {
"session_id": session_id, "session_id": session_id,
"method_used": dewarp_info["method"], "method_used": dewarp_info["method"],
"curvature_px": dewarp_info["curvature_px"], "shear_degrees": dewarp_info["shear_degrees"],
"confidence": dewarp_info["confidence"], "confidence": dewarp_info["confidence"],
"duration_seconds": round(duration, 2), "duration_seconds": round(duration, 2),
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped", "dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
@@ -392,21 +392,19 @@ async def auto_dewarp(session_id: str):
@router.post("/sessions/{session_id}/dewarp/manual") @router.post("/sessions/{session_id}/dewarp/manual")
async def manual_dewarp(session_id: str, req: ManualDewarpRequest): async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
"""Apply dewarp with a manually scaled displacement map.""" """Apply shear correction with a manual angle."""
session = _get_session(session_id) session = _get_session(session_id)
deskewed_bgr = session.get("deskewed_bgr") deskewed_bgr = session.get("deskewed_bgr")
displacement_map = session.get("displacement_map")
if deskewed_bgr is None: if deskewed_bgr is None:
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp") raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
scale = max(0.0, min(2.0, req.scale)) shear_deg = max(-2.0, min(2.0, req.shear_degrees))
if displacement_map is None or scale < 0.01: if abs(shear_deg) < 0.001:
# No displacement map or zero scale — use deskewed as-is
dewarped_bgr = deskewed_bgr dewarped_bgr = deskewed_bgr
else: else:
dewarped_bgr = dewarp_image_manual(deskewed_bgr, displacement_map, scale) dewarped_bgr = dewarp_image_manual(deskewed_bgr, shear_deg)
success, png_buf = cv2.imencode(".png", dewarped_bgr) success, png_buf = cv2.imencode(".png", dewarped_bgr)
dewarped_png = png_buf.tobytes() if success else session.get("deskewed_png") dewarped_png = png_buf.tobytes() if success else session.get("deskewed_png")
@@ -416,14 +414,14 @@ async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
session["dewarp_result"] = { session["dewarp_result"] = {
**(session.get("dewarp_result") or {}), **(session.get("dewarp_result") or {}),
"method_used": "manual", "method_used": "manual",
"scale_applied": round(scale, 2), "shear_degrees": round(shear_deg, 3),
} }
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: scale={scale:.2f}") logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}°")
return { return {
"session_id": session_id, "session_id": session_id,
"scale_applied": round(scale, 2), "shear_degrees": round(shear_deg, 3),
"method_used": "manual", "method_used": "manual",
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped", "dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
} }
@@ -436,7 +434,7 @@ async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthReques
gt = { gt = {
"is_correct": req.is_correct, "is_correct": req.is_correct,
"corrected_scale": req.corrected_scale, "corrected_shear": req.corrected_shear,
"notes": req.notes, "notes": req.notes,
"saved_at": datetime.utcnow().isoformat(), "saved_at": datetime.utcnow().isoformat(),
"dewarp_result": session.get("dewarp_result"), "dewarp_result": session.get("dewarp_result"),
@@ -444,6 +442,6 @@ async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthReques
session["ground_truth"]["dewarp"] = gt session["ground_truth"]["dewarp"] = gt
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: " logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
f"correct={req.is_correct}, corrected_scale={req.corrected_scale}") f"correct={req.is_correct}, corrected_shear={req.corrected_shear}")
return {"session_id": session_id, "ground_truth": gt} return {"session_id": session_id, "ground_truth": gt}