fix: use gradient score instead of variance for iterative deskew

Variance is insensitive to 0.5° differences. Gradient score (L2 norm of first derivative) detects sharp text-line transitions much better. Also: use horizontal profile in both phases, finer coarse step (0.1°). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 14:11:19 +01:00
parent af1b12c97d
commit 68a6b97654
1 changed files with 48 additions and 49 deletions
@@ -401,18 +401,32 @@ def deskew_image_by_word_alignment(
    return png_buf.tobytes(), angle_deg
 def _projection_gradient_score(profile: np.ndarray) -> float:
    """Score a projection profile by the L2-norm of its first derivative.
    Higher score = sharper transitions between text-lines and gaps,
    i.e. better row/column alignment.  Much more sensitive to small
    angular differences than plain variance.
    """
    diff = np.diff(profile)
    return float(np.sum(diff * diff))
 def deskew_image_iterative(
    img: np.ndarray,
    coarse_range: float = 2.0,
-    coarse_step: float = 0.2,
+    coarse_step: float = 0.1,
-    fine_range: float = 0.5,
+    fine_range: float = 0.15,
-    fine_step: float = 0.1,
+    fine_step: float = 0.02,
 ) -> Tuple[np.ndarray, float, Dict[str, Any]]:
-    """Iterative deskew using projection-profile variance optimisation.
+    """Iterative deskew using projection-profile gradient optimisation.
-    Two-phase search:
+    Two-phase search using *horizontal* projection profiles (row sums)
-      Phase 1 (coarse): maximise horizontal projection variance (row alignment)
+    in both phases.  The gradient score (sum of squared first-differences)
-      Phase 2 (fine):   maximise vertical projection variance (column alignment)
+    is far more sensitive to small rotations than plain variance.
    Phase 1 (coarse): -2.0° … +2.0° in 0.1° steps  (41 angles)
    Phase 2 (fine):   ±0.15° around coarse winner in 0.02° steps (≤16 angles)
    Args:
        img: BGR image (full resolution).
@@ -438,59 +452,44 @@ def deskew_image_iterative(
    crop_h, crop_w = crop.shape[:2]
    crop_center = (crop_w // 2, crop_h // 2)
-    # --- Phase 1: coarse sweep (horizontal projection → row alignment) ---
+    def _sweep(angles: np.ndarray) -> list:
-    coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step)
+        """Return [(angle, score), ...] for horizontal projection gradient."""
-    best_coarse_angle = 0.0
+        results = []
-    best_coarse_score = -1.0
+        for angle in angles:
-    coarse_scores = []
+            if abs(angle) < 1e-6:
                rotated_crop = crop
            else:
                M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
                rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
                                              flags=cv2.INTER_NEAREST,
                                              borderMode=cv2.BORDER_CONSTANT,
                                              borderValue=0)
            h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64)
            score = _projection_gradient_score(h_profile)
            results.append((float(angle), score))
        return results
-    for angle in coarse_angles:
+    # --- Phase 1: coarse sweep ---
-        if abs(angle) < 1e-6:
+    coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step)
-            rotated_crop = crop
+    coarse_results = _sweep(coarse_angles)
-        else:
+    best_coarse = max(coarse_results, key=lambda x: x[1])
-            M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
+    best_coarse_angle, best_coarse_score = best_coarse
            rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
                                          flags=cv2.INTER_NEAREST,
                                          borderMode=cv2.BORDER_CONSTANT,
                                          borderValue=0)
        h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64)
        score = float(np.var(h_profile))
        coarse_scores.append((round(float(angle), 2), round(score, 1)))
        if score > best_coarse_score:
            best_coarse_score = score
            best_coarse_angle = float(angle)
    debug["coarse_best_angle"] = round(best_coarse_angle, 2)
    debug["coarse_best_score"] = round(best_coarse_score, 1)
-    debug["coarse_scores"] = coarse_scores
+    debug["coarse_scores"] = [(round(a, 2), round(s, 1)) for a, s in coarse_results]
-    # --- Phase 2: fine sweep (vertical projection → column alignment) ---
+    # --- Phase 2: fine sweep around coarse winner ---
    fine_lo = best_coarse_angle - fine_range
    fine_hi = best_coarse_angle + fine_range
    fine_angles = np.arange(fine_lo, fine_hi + fine_step * 0.5, fine_step)
-    best_fine_angle = best_coarse_angle
+    fine_results = _sweep(fine_angles)
-    best_fine_score = -1.0
+    best_fine = max(fine_results, key=lambda x: x[1])
-    fine_scores = []
+    best_fine_angle, best_fine_score = best_fine
    for angle in fine_angles:
        if abs(angle) < 1e-6:
            rotated_crop = crop
        else:
            M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
            rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
                                          flags=cv2.INTER_NEAREST,
                                          borderMode=cv2.BORDER_CONSTANT,
                                          borderValue=0)
        v_profile = np.sum(rotated_crop, axis=0, dtype=np.float64)
        score = float(np.var(v_profile))
        fine_scores.append((round(float(angle), 2), round(score, 1)))
        if score > best_fine_score:
            best_fine_score = score
            best_fine_angle = float(angle)
    debug["fine_best_angle"] = round(best_fine_angle, 2)
    debug["fine_best_score"] = round(best_fine_score, 1)
-    debug["fine_scores"] = fine_scores
+    debug["fine_scores"] = [(round(a, 2), round(s, 1)) for a, s in fine_results]
    final_angle = best_fine_angle