fix: use gradient score instead of variance for iterative deskew

Variance is insensitive to 0.5° differences. Gradient score (L2 norm of first derivative) detects sharp text-line transitions much better. Also: use horizontal profile in both phases, finer coarse step (0.1°). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 14:11:19 +01:00
parent af1b12c97d
commit 68a6b97654
1 changed files with 48 additions and 49 deletions
--- a/klausur-service/backend/cv_vocab_pipeline.py
+++ b/klausur-service/backend/cv_vocab_pipeline.py
@@ -401,18 +401,32 @@ def deskew_image_by_word_alignment(
    return png_buf.tobytes(), angle_deg


+def _projection_gradient_score(profile: np.ndarray) -> float:
+    """Score a projection profile by the L2-norm of its first derivative.
+
+    Higher score = sharper transitions between text-lines and gaps,
+    i.e. better row/column alignment.  Much more sensitive to small
+    angular differences than plain variance.
+    """
+    diff = np.diff(profile)
+    return float(np.sum(diff * diff))
+
+
 def deskew_image_iterative(
    img: np.ndarray,
    coarse_range: float = 2.0,
-    coarse_step: float = 0.2,
-    fine_range: float = 0.5,
-    fine_step: float = 0.1,
+    coarse_step: float = 0.1,
+    fine_range: float = 0.15,
+    fine_step: float = 0.02,
 ) -> Tuple[np.ndarray, float, Dict[str, Any]]:
-    """Iterative deskew using projection-profile variance optimisation.
+    """Iterative deskew using projection-profile gradient optimisation.

-    Two-phase search:
-      Phase 1 (coarse): maximise horizontal projection variance (row alignment)
-      Phase 2 (fine):   maximise vertical projection variance (column alignment)
+    Two-phase search using *horizontal* projection profiles (row sums)
+    in both phases.  The gradient score (sum of squared first-differences)
+    is far more sensitive to small rotations than plain variance.
+
+    Phase 1 (coarse): -2.0° … +2.0° in 0.1° steps  (41 angles)
+    Phase 2 (fine):   ±0.15° around coarse winner in 0.02° steps (≤16 angles)

    Args:
        img: BGR image (full resolution).
@@ -438,59 +452,44 @@ def deskew_image_iterative(
    crop_h, crop_w = crop.shape[:2]
    crop_center = (crop_w // 2, crop_h // 2)

-    # --- Phase 1: coarse sweep (horizontal projection → row alignment) ---
-    coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step)
-    best_coarse_angle = 0.0
-    best_coarse_score = -1.0
-    coarse_scores = []
+    def _sweep(angles: np.ndarray) -> list:
+        """Return [(angle, score), ...] for horizontal projection gradient."""
+        results = []
+        for angle in angles:
+            if abs(angle) < 1e-6:
+                rotated_crop = crop
+            else:
+                M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
+                rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
+                                              flags=cv2.INTER_NEAREST,
+                                              borderMode=cv2.BORDER_CONSTANT,
+                                              borderValue=0)
+            h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64)
+            score = _projection_gradient_score(h_profile)
+            results.append((float(angle), score))
+        return results

-    for angle in coarse_angles:
-        if abs(angle) < 1e-6:
-            rotated_crop = crop
-        else:
-            M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
-            rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
-                                          flags=cv2.INTER_NEAREST,
-                                          borderMode=cv2.BORDER_CONSTANT,
-                                          borderValue=0)
-        h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64)
-        score = float(np.var(h_profile))
-        coarse_scores.append((round(float(angle), 2), round(score, 1)))
-        if score > best_coarse_score:
-            best_coarse_score = score
-            best_coarse_angle = float(angle)
+    # --- Phase 1: coarse sweep ---
+    coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step)
+    coarse_results = _sweep(coarse_angles)
+    best_coarse = max(coarse_results, key=lambda x: x[1])
+    best_coarse_angle, best_coarse_score = best_coarse

    debug["coarse_best_angle"] = round(best_coarse_angle, 2)
    debug["coarse_best_score"] = round(best_coarse_score, 1)
-    debug["coarse_scores"] = coarse_scores
+    debug["coarse_scores"] = [(round(a, 2), round(s, 1)) for a, s in coarse_results]

-    # --- Phase 2: fine sweep (vertical projection → column alignment) ---
+    # --- Phase 2: fine sweep around coarse winner ---
    fine_lo = best_coarse_angle - fine_range
    fine_hi = best_coarse_angle + fine_range
    fine_angles = np.arange(fine_lo, fine_hi + fine_step * 0.5, fine_step)
-    best_fine_angle = best_coarse_angle
-    best_fine_score = -1.0
-    fine_scores = []
-
-    for angle in fine_angles:
-        if abs(angle) < 1e-6:
-            rotated_crop = crop
-        else:
-            M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
-            rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
-                                          flags=cv2.INTER_NEAREST,
-                                          borderMode=cv2.BORDER_CONSTANT,
-                                          borderValue=0)
-        v_profile = np.sum(rotated_crop, axis=0, dtype=np.float64)
-        score = float(np.var(v_profile))
-        fine_scores.append((round(float(angle), 2), round(score, 1)))
-        if score > best_fine_score:
-            best_fine_score = score
-            best_fine_angle = float(angle)
+    fine_results = _sweep(fine_angles)
+    best_fine = max(fine_results, key=lambda x: x[1])
+    best_fine_angle, best_fine_score = best_fine

    debug["fine_best_angle"] = round(best_fine_angle, 2)
    debug["fine_best_score"] = round(best_fine_score, 1)
-    debug["fine_scores"] = fine_scores
+    debug["fine_scores"] = [(round(a, 2), round(s, 1)) for a, s in fine_results]

    final_angle = best_fine_angle