fix: deskew iterative — use vertical Sobel edges + vertical projection

Horizontal projection of binary image is insensitive at 0.5° because text rows look nearly identical. The real discriminator is vertical edge alignment: at the correct angle, word left-edges and column borders become truly vertical, producing sharp peaks in the vertical projection of Sobel-X edges. Also: BORDER_REPLICATE + trim to avoid artifacts. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 14:23:43 +01:00
parent 68a6b97654
commit b8a9493310
1 changed files with 43 additions and 26 deletions
@@ -405,8 +405,7 @@ def _projection_gradient_score(profile: np.ndarray) -> float:
    """Score a projection profile by the L2-norm of its first derivative.

    Higher score = sharper transitions between text-lines and gaps,
-    i.e. better row/column alignment.  Much more sensitive to small
-    angular differences than plain variance.
+    i.e. better row/column alignment.
    """
    diff = np.diff(profile)
    return float(np.sum(diff * diff))
@@ -419,14 +418,17 @@ def deskew_image_iterative(
    fine_range: float = 0.15,
    fine_step: float = 0.02,
 ) -> Tuple[np.ndarray, float, Dict[str, Any]]:
-    """Iterative deskew using projection-profile gradient optimisation.
+    """Iterative deskew using vertical-edge projection optimisation.

-    Two-phase search using *horizontal* projection profiles (row sums)
-    in both phases.  The gradient score (sum of squared first-differences)
-    is far more sensitive to small rotations than plain variance.
+    The key insight: at the correct rotation angle, vertical features
+    (word left-edges, column borders) become truly vertical, producing
+    the sharpest peaks in the vertical projection of vertical edges.

-    Phase 1 (coarse): -2.0° … +2.0° in 0.1° steps  (41 angles)
-    Phase 2 (fine):   ±0.15° around coarse winner in 0.02° steps (≤16 angles)
+    Method:
+      1. Detect vertical edges via Sobel-X on the central crop.
+      2. Coarse sweep: rotate edge image, compute vertical projection
+         gradient score.  The angle where vertical edges align best wins.
+      3. Fine sweep: refine around the coarse winner.

    Args:
        img: BGR image (full resolution).
@@ -441,37 +443,52 @@ def deskew_image_iterative(
    h, w = img.shape[:2]
    debug: Dict[str, Any] = {}

-    # --- Binarise once (grayscale + Otsu) ---
+    # --- Grayscale + vertical edge detection ---
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

-    # --- Central crop (20%-80% height) for fast rotation ---
-    y_lo = int(h * 0.2)
-    y_hi = int(h * 0.8)
-    crop = binary[y_lo:y_hi, :]
-    crop_h, crop_w = crop.shape[:2]
+    # Central crop (15%-85% height, 10%-90% width) to avoid page margins
+    y_lo, y_hi = int(h * 0.15), int(h * 0.85)
+    x_lo, x_hi = int(w * 0.10), int(w * 0.90)
+    gray_crop = gray[y_lo:y_hi, x_lo:x_hi]
+
+    # Sobel-X → absolute vertical edges
+    sobel_x = cv2.Sobel(gray_crop, cv2.CV_64F, 1, 0, ksize=3)
+    edges = np.abs(sobel_x)
+    # Normalise to 0-255 for consistent scoring
+    edge_max = edges.max()
+    if edge_max > 0:
+        edges = (edges / edge_max * 255).astype(np.uint8)
+    else:
+        return img, 0.0, {"error": "no edges detected"}
+
+    crop_h, crop_w = edges.shape[:2]
    crop_center = (crop_w // 2, crop_h // 2)

-    def _sweep(angles: np.ndarray) -> list:
-        """Return [(angle, score), ...] for horizontal projection gradient."""
+    # Trim margin after rotation to avoid border artifacts
+    trim_y = max(4, int(crop_h * 0.03))
+    trim_x = max(4, int(crop_w * 0.03))
+
+    def _sweep_edges(angles: np.ndarray) -> list:
+        """Score each angle by vertical projection gradient of vertical edges."""
        results = []
        for angle in angles:
            if abs(angle) < 1e-6:
-                rotated_crop = crop
+                rotated = edges
            else:
                M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
-                rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
-                                              flags=cv2.INTER_NEAREST,
-                                              borderMode=cv2.BORDER_CONSTANT,
-                                              borderValue=0)
-            h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64)
-            score = _projection_gradient_score(h_profile)
+                rotated = cv2.warpAffine(edges, M, (crop_w, crop_h),
+                                         flags=cv2.INTER_NEAREST,
+                                         borderMode=cv2.BORDER_REPLICATE)
+            # Trim borders to avoid edge artifacts
+            trimmed = rotated[trim_y:-trim_y, trim_x:-trim_x]
+            v_profile = np.sum(trimmed, axis=0, dtype=np.float64)
+            score = _projection_gradient_score(v_profile)
            results.append((float(angle), score))
        return results

    # --- Phase 1: coarse sweep ---
    coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step)
-    coarse_results = _sweep(coarse_angles)
+    coarse_results = _sweep_edges(coarse_angles)
    best_coarse = max(coarse_results, key=lambda x: x[1])
    best_coarse_angle, best_coarse_score = best_coarse

@@ -483,7 +500,7 @@ def deskew_image_iterative(
    fine_lo = best_coarse_angle - fine_range
    fine_hi = best_coarse_angle + fine_range
    fine_angles = np.arange(fine_lo, fine_hi + fine_step * 0.5, fine_step)
-    fine_results = _sweep(fine_angles)
+    fine_results = _sweep_edges(fine_angles)
    best_fine = max(fine_results, key=lambda x: x[1])
    best_fine_angle, best_fine_score = best_fine