refactor(dewarp): replace displacement map with affine shear correction

The old displacement-map approach shifted entire rows by a parabolic
profile, creating a circle/barrel distortion. The actual problem is
a linear vertical shear: after deskew aligns horizontal lines, the
vertical column edges are still tilted by ~0.5°.

New approach:
- Detect shear angle from strongest vertical edge slope (not curvature)
- Apply cv2.warpAffine shear to straighten vertical features
- Manual slider: -2.0° to +2.0° in 0.05° steps
- Slider initializes to auto-detected shear angle
- Ground truth question: "Spalten vertikal ausgerichtet?"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-26 18:23:04 +01:00
parent ff2bb79a91
commit 09b820efbe
5 changed files with 109 additions and 279 deletions

View File

@@ -37,17 +37,16 @@ export interface DeskewGroundTruth {
export interface DewarpResult {
session_id: string
method_used: 'vertical_edge' | 'text_baseline' | 'manual' | 'none'
curvature_px: number
method_used: 'vertical_edge' | 'manual' | 'none'
shear_degrees: number
confidence: number
duration_seconds: number
dewarped_image_url: string
scale_applied?: number
}
export interface DewarpGroundTruth {
is_correct: boolean
corrected_scale?: number
corrected_shear?: number
notes?: string
}

View File

@@ -1,13 +1,13 @@
'use client'
import { useState } from 'react'
import { useEffect, useState } from 'react'
import type { DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
interface DewarpControlsProps {
dewarpResult: DewarpResult | null
showGrid: boolean
onToggleGrid: () => void
onManualDewarp: (scale: number) => void
onManualDewarp: (shearDegrees: number) => void
onGroundTruth: (gt: DewarpGroundTruth) => void
onNext: () => void
isApplying: boolean
@@ -15,7 +15,6 @@ interface DewarpControlsProps {
const METHOD_LABELS: Record<string, string> = {
vertical_edge: 'Vertikale Kanten',
text_baseline: 'Textzeilen-Baseline',
manual: 'Manuell',
none: 'Keine Korrektur',
}
@@ -29,11 +28,18 @@ export function DewarpControls({
onNext,
isApplying,
}: DewarpControlsProps) {
const [manualScale, setManualScale] = useState(100)
const [manualShear, setManualShear] = useState(0)
const [gtFeedback, setGtFeedback] = useState<'correct' | 'incorrect' | null>(null)
const [gtNotes, setGtNotes] = useState('')
const [gtSaved, setGtSaved] = useState(false)
// Initialize slider to auto-detected value when result arrives
useEffect(() => {
if (dewarpResult && dewarpResult.shear_degrees !== undefined) {
setManualShear(dewarpResult.shear_degrees)
}
}, [dewarpResult?.shear_degrees])
const handleGroundTruth = (isCorrect: boolean) => {
setGtFeedback(isCorrect ? 'correct' : 'incorrect')
if (isCorrect) {
@@ -45,7 +51,7 @@ export function DewarpControls({
const handleGroundTruthIncorrect = () => {
onGroundTruth({
is_correct: false,
corrected_scale: manualScale !== 0 ? manualScale : undefined,
corrected_shear: manualShear !== 0 ? manualShear : undefined,
notes: gtNotes || undefined,
})
setGtSaved(true)
@@ -58,8 +64,8 @@ export function DewarpControls({
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="flex flex-wrap items-center gap-3 text-sm">
<div>
<span className="text-gray-500">Kruemmung:</span>{' '}
<span className="font-mono font-medium">{dewarpResult.curvature_px} px</span>
<span className="text-gray-500">Scherung:</span>{' '}
<span className="font-mono font-medium">{dewarpResult.shear_degrees}°</span>
</div>
<div className="h-4 w-px bg-gray-300 dark:bg-gray-600" />
<div>
@@ -91,25 +97,25 @@ export function DewarpControls({
</div>
)}
{/* Manual scale slider */}
{/* Manual shear angle slider */}
{dewarpResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturstaerke</div>
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">Scherwinkel (manuell)</div>
<div className="flex items-center gap-3">
<span className="text-xs text-gray-400 w-8 text-right">0%</span>
<span className="text-xs text-gray-400 w-10 text-right">-2.0°</span>
<input
type="range"
min={0}
min={-200}
max={200}
step={5}
value={manualScale}
onChange={(e) => setManualScale(parseInt(e.target.value))}
value={Math.round(manualShear * 100)}
onChange={(e) => setManualShear(parseInt(e.target.value) / 100)}
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 accent-teal-500"
/>
<span className="text-xs text-gray-400 w-10">200%</span>
<span className="font-mono text-sm w-14 text-right">{manualScale}%</span>
<span className="text-xs text-gray-400 w-10">+2.0°</span>
<span className="font-mono text-sm w-16 text-right">{manualShear.toFixed(2)}°</span>
<button
onClick={() => onManualDewarp(manualScale / 100)}
onClick={() => onManualDewarp(manualShear)}
disabled={isApplying}
className="px-3 py-1.5 text-sm bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:opacity-50 transition-colors"
>
@@ -117,7 +123,7 @@ export function DewarpControls({
</button>
</div>
<p className="text-xs text-gray-400 mt-1">
100% = automatisch erkannte Korrektur, 0% = keine, 200% = doppelt so stark
Scherung der vertikalen Achse in Grad. Positiv = Spalten nach rechts kippen, negativ = nach links.
</p>
</div>
)}
@@ -126,8 +132,9 @@ export function DewarpControls({
{dewarpResult && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4">
<div className="text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
Korrekt entzerrt?
Spalten vertikal ausgerichtet?
</div>
<p className="text-xs text-gray-400 mb-2">Pruefen ob die Spaltenraender jetzt senkrecht zum Raster stehen.</p>
{!gtSaved ? (
<div className="space-y-3">
<div className="flex gap-2">

View File

@@ -47,7 +47,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
runDewarp()
}, [sessionId, dewarpResult])
const handleManualDewarp = useCallback(async (scale: number) => {
const handleManualDewarp = useCallback(async (shearDegrees: number) => {
if (!sessionId) return
setApplying(true)
setError(null)
@@ -56,7 +56,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/dewarp/manual`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ scale }),
body: JSON.stringify({ shear_degrees: shearDegrees }),
})
if (!res.ok) throw new Error('Manuelle Entzerrung fehlgeschlagen')
@@ -66,7 +66,7 @@ export function StepDewarp({ sessionId, onNext }: StepDewarpProps) {
? {
...prev,
method_used: data.method_used,
scale_applied: data.scale_applied,
shear_degrees: data.shear_degrees,
dewarped_image_url: `${KLAUSUR_API}${data.dewarped_image_url}?t=${Date.now()}`,
}
: null,

View File

@@ -318,18 +318,21 @@ def deskew_image_by_word_alignment(
# Stage 3: Dewarp (Book Curvature Correction)
# =============================================================================
def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
"""Method A: Detect curvature from strongest vertical text edges.
def _detect_shear_angle(img: np.ndarray) -> Dict[str, Any]:
"""Detect the vertical shear angle of the page.
Splits image into horizontal strips, finds the dominant vertical edge
X-position per strip, fits a 2nd-degree polynomial, and generates a
displacement map if curvature exceeds threshold.
After deskew (horizontal lines aligned), vertical features like column
edges may still be tilted. This measures that tilt by tracking the
strongest vertical edge across horizontal strips.
The result is a shear angle in degrees: the angular difference between
true vertical and the detected column edge.
Returns:
Dict with keys: method, curvature_px, confidence, displacement_map (or None).
Dict with keys: method, shear_degrees, confidence.
"""
h, w = img.shape[:2]
result = {"method": "vertical_edge", "curvature_px": 0.0, "confidence": 0.0, "displacement_map": None}
result = {"method": "vertical_edge", "shear_degrees": 0.0, "confidence": 0.0}
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -354,7 +357,7 @@ def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
if projection.max() == 0:
continue
# Find the strongest vertical edge in left 40% of image (left margin area)
# Find the strongest vertical edge in left 40% of image
search_w = int(w * 0.4)
left_proj = projection[:search_w]
if left_proj.max() == 0:
@@ -385,229 +388,76 @@ def _dewarp_by_vertical_edges(img: np.ndarray) -> Dict[str, Any]:
if len(ys) < 6:
return result
# Fit 2nd degree polynomial: x = a*y^2 + b*y + c
coeffs = np.polyfit(ys, xs, 2)
fitted = np.polyval(coeffs, ys)
# Fit straight line: x = slope * y + intercept
# The slope tells us the tilt of the vertical edge
straight_coeffs = np.polyfit(ys, xs, 1)
slope = straight_coeffs[0] # dx/dy in pixels
fitted = np.polyval(straight_coeffs, ys)
residuals = xs - fitted
rmse = float(np.sqrt(np.mean(residuals ** 2)))
# Measure curvature: max deviation from straight line
straight_coeffs = np.polyfit(ys, xs, 1)
straight_fitted = np.polyval(straight_coeffs, ys)
curvature_px = float(np.max(np.abs(fitted - straight_fitted)))
if curvature_px < 2.0:
result["confidence"] = 0.3
return result
# Generate displacement map
y_coords = np.arange(h)
all_fitted = np.polyval(coeffs, y_coords)
all_straight = np.polyval(straight_coeffs, y_coords)
dx_per_row = all_fitted - all_straight # displacement per row
# Create full displacement map: each pixel shifts horizontally by dx_per_row[y]
displacement_map = np.zeros((h, w), dtype=np.float32)
for y in range(h):
displacement_map[y, :] = -dx_per_row[y]
# Convert slope to angle: arctan(dx/dy) in degrees
import math
shear_degrees = math.degrees(math.atan(slope))
confidence = min(1.0, len(ys) / 15.0) * max(0.5, 1.0 - rmse / 5.0)
result["curvature_px"] = round(curvature_px, 2)
result["shear_degrees"] = round(shear_degrees, 3)
result["confidence"] = round(float(confidence), 2)
result["displacement_map"] = displacement_map
return result
def _dewarp_by_text_baseline(img: np.ndarray) -> Dict[str, Any]:
"""Method B: Detect curvature from Tesseract text baseline positions.
def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
"""Apply a vertical shear correction to an image.
Uses a quick Tesseract pass on a downscaled image, groups words into lines,
measures baseline curvature per line, and aggregates into a displacement map.
Returns:
Dict with keys: method, curvature_px, confidence, displacement_map (or None).
"""
h, w = img.shape[:2]
result = {"method": "text_baseline", "curvature_px": 0.0, "confidence": 0.0, "displacement_map": None}
if not TESSERACT_AVAILABLE:
return result
# Downscale for speed
max_dim = 1500
scale_factor = min(1.0, max_dim / max(h, w))
if scale_factor < 1.0:
small = cv2.resize(img, (int(w * scale_factor), int(h * scale_factor)), interpolation=cv2.INTER_AREA)
else:
small = img
scale_factor = 1.0
pil_img = Image.fromarray(cv2.cvtColor(small, cv2.COLOR_BGR2RGB))
try:
data = pytesseract.image_to_data(
pil_img, lang="eng+deu", config="--psm 6 --oem 3",
output_type=pytesseract.Output.DICT,
)
except Exception as e:
logger.warning(f"dewarp text_baseline: Tesseract failed: {e}")
return result
# Group words by line
from collections import defaultdict
line_groups: Dict[tuple, list] = defaultdict(list)
for i in range(len(data["text"])):
text = (data["text"][i] or "").strip()
conf = int(data["conf"][i])
if not text or conf < 20:
continue
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
line_groups[key].append(i)
if len(line_groups) < 5:
return result
inv_scale = 1.0 / scale_factor
# For each line with enough words, measure baseline curvature
line_curvatures = [] # (y_center, curvature_px)
all_baselines = [] # (y_center, dx_offset) for displacement map
for key, indices in line_groups.items():
if len(indices) < 3:
continue
# Collect baseline points: (x_center, y_bottom) for each word
points = []
for idx in indices:
x_center = (data["left"][idx] + data["width"][idx] / 2.0) * inv_scale
y_bottom = (data["top"][idx] + data["height"][idx]) * inv_scale
points.append((x_center, y_bottom))
points.sort(key=lambda p: p[0])
xs_line = np.array([p[0] for p in points])
ys_line = np.array([p[1] for p in points])
if len(xs_line) < 3:
continue
# Fit 2nd degree: y = a*x^2 + b*x + c
try:
coeffs = np.polyfit(xs_line, ys_line, 2)
except (np.linalg.LinAlgError, ValueError):
continue
fitted = np.polyval(coeffs, xs_line)
straight = np.polyval(np.polyfit(xs_line, ys_line, 1), xs_line)
curvature = float(np.max(np.abs(fitted - straight)))
y_center = float(np.mean(ys_line))
line_curvatures.append((y_center, curvature, coeffs, xs_line, ys_line))
if len(line_curvatures) < 3:
return result
# Average curvature
avg_curvature = float(np.mean([c[1] for c in line_curvatures]))
if avg_curvature < 1.5:
result["confidence"] = 0.3
return result
# Build displacement map from line baselines
# For each line, compute the vertical offset needed to straighten
displacement_map = np.zeros((h, w), dtype=np.float32)
for y_center, curvature, coeffs, xs_line, ys_line in line_curvatures:
# The displacement is the difference between curved and straight baseline
x_range = np.arange(w, dtype=np.float64)
fitted_y = np.polyval(coeffs, x_range)
straight_y = np.polyval(np.polyfit(xs_line, ys_line, 1), x_range)
dy = fitted_y - straight_y
# Convert vertical curvature to horizontal displacement estimate
# (curvature bends text → horizontal shift proportional to curvature)
# Use the vertical curvature as proxy for horizontal distortion
y_int = int(y_center)
spread = max(int(h / len(line_curvatures) / 2), 20)
y_start = max(0, y_int - spread)
y_end = min(h, y_int + spread)
for y in range(y_start, y_end):
weight = 1.0 - abs(y - y_int) / spread
displacement_map[y, :] += (dy * weight).astype(np.float32)
# Normalize: the displacement map represents vertical shifts
# Convert to horizontal displacement (since curvature typically shifts columns)
# Use the sign of the 2nd-degree coefficient averaged across lines
avg_a = float(np.mean([c[2][0] for c in line_curvatures]))
if abs(avg_a) > 0:
# Scale displacement map to represent horizontal pixel shifts
max_disp = np.max(np.abs(displacement_map))
if max_disp > 0:
displacement_map = displacement_map * (avg_curvature / max_disp)
confidence = min(1.0, len(line_curvatures) / 10.0) * 0.8
result["curvature_px"] = round(avg_curvature, 2)
result["confidence"] = round(float(confidence), 2)
result["displacement_map"] = displacement_map
return result
def _apply_displacement_map(img: np.ndarray, displacement_map: np.ndarray,
scale: float = 1.0) -> np.ndarray:
"""Apply a horizontal displacement map to an image using cv2.remap().
Shifts each row horizontally proportional to its distance from the
vertical center. This corrects the tilt of vertical features (columns)
without affecting horizontal alignment (text lines).
Args:
img: BGR image.
displacement_map: Float32 array (h, w) of horizontal pixel shifts.
scale: Multiplier for the displacement (-3.0 to +3.0).
shear_degrees: Shear angle in degrees. Positive = shift top-right/bottom-left.
Returns:
Corrected image.
"""
import math
h, w = img.shape[:2]
shear_tan = math.tan(math.radians(shear_degrees))
# Base coordinate grids
map_x = np.tile(np.arange(w, dtype=np.float32), (h, 1))
map_y = np.tile(np.arange(h, dtype=np.float32).reshape(-1, 1), (1, w))
# Affine matrix: shift x by shear_tan * (y - h/2)
# [1 shear_tan -h/2*shear_tan]
# [0 1 0 ]
M = np.float32([
[1, shear_tan, -h / 2.0 * shear_tan],
[0, 1, 0],
])
# Apply scaled displacement
map_x = map_x + displacement_map * scale
# Remap
corrected = cv2.remap(img, map_x, map_y,
interpolation=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE)
corrected = cv2.warpAffine(img, M, (w, h),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE)
return corrected
def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]:
"""Correct book curvature distortion using the best of two methods.
"""Correct vertical shear after deskew.
Method A: Vertical edge analysis — detects curvature of the strongest
vertical text edge (left column margin).
Method B: Text baseline analysis — uses Tesseract word positions to
measure baseline curvature across text lines.
The method with higher confidence wins. Returns the corrected image
and a DewarpInfo dict for the API.
After deskew aligns horizontal text lines, vertical features (column
edges) may still be tilted. This detects the tilt angle of the strongest
vertical edge and applies an affine shear correction.
Args:
img: BGR image (already deskewed).
Returns:
Tuple of (corrected_image, dewarp_info).
dewarp_info keys: method, curvature_px, confidence, displacement_map.
dewarp_info keys: method, shear_degrees, confidence.
"""
no_correction = {
"method": "none",
"curvature_px": 0.0,
"shear_degrees": 0.0,
"confidence": 0.0,
"displacement_map": None,
}
if not CV2_AVAILABLE:
@@ -615,68 +465,44 @@ def dewarp_image(img: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]:
t0 = time.time()
# Run both methods
result_a = _dewarp_by_vertical_edges(img)
result_b = _dewarp_by_text_baseline(img)
detection = _detect_shear_angle(img)
duration = time.time() - t0
logger.info(f"dewarp: vertical_edge conf={result_a['confidence']:.2f} "
f"curv={result_a['curvature_px']:.1f}px | "
f"text_baseline conf={result_b['confidence']:.2f} "
f"curv={result_b['curvature_px']:.1f}px "
f"({duration:.2f}s)")
shear_deg = detection["shear_degrees"]
confidence = detection["confidence"]
# Pick best method: prefer significant curvature over high confidence
# If one method found real curvature (>5px) and the other didn't (<3px),
# prefer the one with real curvature regardless of confidence.
a_has_curvature = result_a["curvature_px"] >= 5.0 and result_a["displacement_map"] is not None
b_has_curvature = result_b["curvature_px"] >= 5.0 and result_b["displacement_map"] is not None
logger.info(f"dewarp: detected shear={shear_deg:.3f}° "
f"conf={confidence:.2f} ({duration:.2f}s)")
if a_has_curvature and not b_has_curvature:
best = result_a
elif b_has_curvature and not a_has_curvature:
best = result_b
elif result_a["confidence"] >= result_b["confidence"]:
best = result_a
else:
best = result_b
logger.info(f"dewarp: selected {best['method']} "
f"(curv={best['curvature_px']:.1f}px, conf={best['confidence']:.2f})")
if best["displacement_map"] is None or best["curvature_px"] < 2.0:
# Only correct if shear is significant (> 0.05°)
if abs(shear_deg) < 0.05 or confidence < 0.3:
return img, no_correction
# Apply correction
corrected = _apply_displacement_map(img, best["displacement_map"], scale=1.0)
# Apply correction (negate the detected shear to straighten)
corrected = _apply_shear(img, -shear_deg)
info = {
"method": best["method"],
"curvature_px": best["curvature_px"],
"confidence": best["confidence"],
"displacement_map": best["displacement_map"],
"method": detection["method"],
"shear_degrees": shear_deg,
"confidence": confidence,
}
return corrected, info
def dewarp_image_manual(img: np.ndarray, displacement_map: np.ndarray,
scale: float) -> np.ndarray:
"""Apply dewarp with manual scale adjustment.
def dewarp_image_manual(img: np.ndarray, shear_degrees: float) -> np.ndarray:
"""Apply shear correction with a manual angle.
Args:
img: BGR image (deskewed, before dewarp).
displacement_map: The displacement map from auto-dewarp.
scale: Fraction of auto-detected correction (0.0 = none, 1.0 = auto, 2.0 = double).
shear_degrees: Shear angle in degrees to correct.
Returns:
Corrected image.
"""
scale = max(0.0, min(2.0, scale))
if scale < 0.01:
if abs(shear_degrees) < 0.001:
return img
return _apply_displacement_map(img, displacement_map, scale=scale)
return _apply_shear(img, -shear_degrees)
# =============================================================================

View File

@@ -81,12 +81,12 @@ class DeskewGroundTruthRequest(BaseModel):
class ManualDewarpRequest(BaseModel):
scale: float
shear_degrees: float
class DewarpGroundTruthRequest(BaseModel):
is_correct: bool
corrected_scale: Optional[float] = None
corrected_shear: Optional[float] = None
notes: Optional[str] = None
@@ -132,7 +132,7 @@ async def create_session(file: UploadFile = File(...)):
"dewarped_bgr": None,
"dewarped_png": None,
"dewarp_result": None,
"displacement_map": None,
"auto_shear_degrees": None,
"ground_truth": {},
"current_step": 1,
}
@@ -352,7 +352,7 @@ async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthReques
@router.post("/sessions/{session_id}/dewarp")
async def auto_dewarp(session_id: str):
"""Run both dewarp methods on the deskewed image and pick the best."""
"""Detect and correct vertical shear on the deskewed image."""
session = _get_session(session_id)
deskewed_bgr = session.get("deskewed_bgr")
if deskewed_bgr is None:
@@ -368,22 +368,22 @@ async def auto_dewarp(session_id: str):
session["dewarped_bgr"] = dewarped_bgr
session["dewarped_png"] = dewarped_png
session["auto_shear_degrees"] = dewarp_info.get("shear_degrees", 0.0)
session["dewarp_result"] = {
"method_used": dewarp_info["method"],
"curvature_px": dewarp_info["curvature_px"],
"shear_degrees": dewarp_info["shear_degrees"],
"confidence": dewarp_info["confidence"],
"duration_seconds": round(duration, 2),
}
session["displacement_map"] = dewarp_info.get("displacement_map")
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
f"method={dewarp_info['method']} curvature={dewarp_info['curvature_px']:.1f}px "
f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f}° "
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
return {
"session_id": session_id,
"method_used": dewarp_info["method"],
"curvature_px": dewarp_info["curvature_px"],
"shear_degrees": dewarp_info["shear_degrees"],
"confidence": dewarp_info["confidence"],
"duration_seconds": round(duration, 2),
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
@@ -392,21 +392,19 @@ async def auto_dewarp(session_id: str):
@router.post("/sessions/{session_id}/dewarp/manual")
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
"""Apply dewarp with a manually scaled displacement map."""
"""Apply shear correction with a manual angle."""
session = _get_session(session_id)
deskewed_bgr = session.get("deskewed_bgr")
displacement_map = session.get("displacement_map")
if deskewed_bgr is None:
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
scale = max(0.0, min(2.0, req.scale))
shear_deg = max(-2.0, min(2.0, req.shear_degrees))
if displacement_map is None or scale < 0.01:
# No displacement map or zero scale — use deskewed as-is
if abs(shear_deg) < 0.001:
dewarped_bgr = deskewed_bgr
else:
dewarped_bgr = dewarp_image_manual(deskewed_bgr, displacement_map, scale)
dewarped_bgr = dewarp_image_manual(deskewed_bgr, shear_deg)
success, png_buf = cv2.imencode(".png", dewarped_bgr)
dewarped_png = png_buf.tobytes() if success else session.get("deskewed_png")
@@ -416,14 +414,14 @@ async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
session["dewarp_result"] = {
**(session.get("dewarp_result") or {}),
"method_used": "manual",
"scale_applied": round(scale, 2),
"shear_degrees": round(shear_deg, 3),
}
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: scale={scale:.2f}")
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}°")
return {
"session_id": session_id,
"scale_applied": round(scale, 2),
"shear_degrees": round(shear_deg, 3),
"method_used": "manual",
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
}
@@ -436,7 +434,7 @@ async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthReques
gt = {
"is_correct": req.is_correct,
"corrected_scale": req.corrected_scale,
"corrected_shear": req.corrected_shear,
"notes": req.notes,
"saved_at": datetime.utcnow().isoformat(),
"dewarp_result": session.get("dewarp_result"),
@@ -444,6 +442,6 @@ async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthReques
session["ground_truth"]["dewarp"] = gt
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
f"correct={req.is_correct}, corrected_scale={req.corrected_scale}")
f"correct={req.is_correct}, corrected_shear={req.corrected_shear}")
return {"session_id": session_id, "ground_truth": gt}