fix: use gradient score instead of variance for iterative deskew
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m46s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 17s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m46s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 17s
Variance is insensitive to 0.5° differences. Gradient score (L2 norm of first derivative) detects sharp text-line transitions much better. Also: use horizontal profile in both phases, finer coarse step (0.1°). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -401,18 +401,32 @@ def deskew_image_by_word_alignment(
|
|||||||
return png_buf.tobytes(), angle_deg
|
return png_buf.tobytes(), angle_deg
|
||||||
|
|
||||||
|
|
||||||
|
def _projection_gradient_score(profile: np.ndarray) -> float:
|
||||||
|
"""Score a projection profile by the L2-norm of its first derivative.
|
||||||
|
|
||||||
|
Higher score = sharper transitions between text-lines and gaps,
|
||||||
|
i.e. better row/column alignment. Much more sensitive to small
|
||||||
|
angular differences than plain variance.
|
||||||
|
"""
|
||||||
|
diff = np.diff(profile)
|
||||||
|
return float(np.sum(diff * diff))
|
||||||
|
|
||||||
|
|
||||||
def deskew_image_iterative(
|
def deskew_image_iterative(
|
||||||
img: np.ndarray,
|
img: np.ndarray,
|
||||||
coarse_range: float = 2.0,
|
coarse_range: float = 2.0,
|
||||||
coarse_step: float = 0.2,
|
coarse_step: float = 0.1,
|
||||||
fine_range: float = 0.5,
|
fine_range: float = 0.15,
|
||||||
fine_step: float = 0.1,
|
fine_step: float = 0.02,
|
||||||
) -> Tuple[np.ndarray, float, Dict[str, Any]]:
|
) -> Tuple[np.ndarray, float, Dict[str, Any]]:
|
||||||
"""Iterative deskew using projection-profile variance optimisation.
|
"""Iterative deskew using projection-profile gradient optimisation.
|
||||||
|
|
||||||
Two-phase search:
|
Two-phase search using *horizontal* projection profiles (row sums)
|
||||||
Phase 1 (coarse): maximise horizontal projection variance (row alignment)
|
in both phases. The gradient score (sum of squared first-differences)
|
||||||
Phase 2 (fine): maximise vertical projection variance (column alignment)
|
is far more sensitive to small rotations than plain variance.
|
||||||
|
|
||||||
|
Phase 1 (coarse): -2.0° … +2.0° in 0.1° steps (41 angles)
|
||||||
|
Phase 2 (fine): ±0.15° around coarse winner in 0.02° steps (≤16 angles)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
img: BGR image (full resolution).
|
img: BGR image (full resolution).
|
||||||
@@ -438,59 +452,44 @@ def deskew_image_iterative(
|
|||||||
crop_h, crop_w = crop.shape[:2]
|
crop_h, crop_w = crop.shape[:2]
|
||||||
crop_center = (crop_w // 2, crop_h // 2)
|
crop_center = (crop_w // 2, crop_h // 2)
|
||||||
|
|
||||||
# --- Phase 1: coarse sweep (horizontal projection → row alignment) ---
|
def _sweep(angles: np.ndarray) -> list:
|
||||||
coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step)
|
"""Return [(angle, score), ...] for horizontal projection gradient."""
|
||||||
best_coarse_angle = 0.0
|
results = []
|
||||||
best_coarse_score = -1.0
|
for angle in angles:
|
||||||
coarse_scores = []
|
if abs(angle) < 1e-6:
|
||||||
|
rotated_crop = crop
|
||||||
|
else:
|
||||||
|
M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
|
||||||
|
rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
|
||||||
|
flags=cv2.INTER_NEAREST,
|
||||||
|
borderMode=cv2.BORDER_CONSTANT,
|
||||||
|
borderValue=0)
|
||||||
|
h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64)
|
||||||
|
score = _projection_gradient_score(h_profile)
|
||||||
|
results.append((float(angle), score))
|
||||||
|
return results
|
||||||
|
|
||||||
for angle in coarse_angles:
|
# --- Phase 1: coarse sweep ---
|
||||||
if abs(angle) < 1e-6:
|
coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step)
|
||||||
rotated_crop = crop
|
coarse_results = _sweep(coarse_angles)
|
||||||
else:
|
best_coarse = max(coarse_results, key=lambda x: x[1])
|
||||||
M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
|
best_coarse_angle, best_coarse_score = best_coarse
|
||||||
rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
|
|
||||||
flags=cv2.INTER_NEAREST,
|
|
||||||
borderMode=cv2.BORDER_CONSTANT,
|
|
||||||
borderValue=0)
|
|
||||||
h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64)
|
|
||||||
score = float(np.var(h_profile))
|
|
||||||
coarse_scores.append((round(float(angle), 2), round(score, 1)))
|
|
||||||
if score > best_coarse_score:
|
|
||||||
best_coarse_score = score
|
|
||||||
best_coarse_angle = float(angle)
|
|
||||||
|
|
||||||
debug["coarse_best_angle"] = round(best_coarse_angle, 2)
|
debug["coarse_best_angle"] = round(best_coarse_angle, 2)
|
||||||
debug["coarse_best_score"] = round(best_coarse_score, 1)
|
debug["coarse_best_score"] = round(best_coarse_score, 1)
|
||||||
debug["coarse_scores"] = coarse_scores
|
debug["coarse_scores"] = [(round(a, 2), round(s, 1)) for a, s in coarse_results]
|
||||||
|
|
||||||
# --- Phase 2: fine sweep (vertical projection → column alignment) ---
|
# --- Phase 2: fine sweep around coarse winner ---
|
||||||
fine_lo = best_coarse_angle - fine_range
|
fine_lo = best_coarse_angle - fine_range
|
||||||
fine_hi = best_coarse_angle + fine_range
|
fine_hi = best_coarse_angle + fine_range
|
||||||
fine_angles = np.arange(fine_lo, fine_hi + fine_step * 0.5, fine_step)
|
fine_angles = np.arange(fine_lo, fine_hi + fine_step * 0.5, fine_step)
|
||||||
best_fine_angle = best_coarse_angle
|
fine_results = _sweep(fine_angles)
|
||||||
best_fine_score = -1.0
|
best_fine = max(fine_results, key=lambda x: x[1])
|
||||||
fine_scores = []
|
best_fine_angle, best_fine_score = best_fine
|
||||||
|
|
||||||
for angle in fine_angles:
|
|
||||||
if abs(angle) < 1e-6:
|
|
||||||
rotated_crop = crop
|
|
||||||
else:
|
|
||||||
M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
|
|
||||||
rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h),
|
|
||||||
flags=cv2.INTER_NEAREST,
|
|
||||||
borderMode=cv2.BORDER_CONSTANT,
|
|
||||||
borderValue=0)
|
|
||||||
v_profile = np.sum(rotated_crop, axis=0, dtype=np.float64)
|
|
||||||
score = float(np.var(v_profile))
|
|
||||||
fine_scores.append((round(float(angle), 2), round(score, 1)))
|
|
||||||
if score > best_fine_score:
|
|
||||||
best_fine_score = score
|
|
||||||
best_fine_angle = float(angle)
|
|
||||||
|
|
||||||
debug["fine_best_angle"] = round(best_fine_angle, 2)
|
debug["fine_best_angle"] = round(best_fine_angle, 2)
|
||||||
debug["fine_best_score"] = round(best_fine_score, 1)
|
debug["fine_best_score"] = round(best_fine_score, 1)
|
||||||
debug["fine_scores"] = fine_scores
|
debug["fine_scores"] = [(round(a, 2), round(s, 1)) for a, s in fine_results]
|
||||||
|
|
||||||
final_angle = best_fine_angle
|
final_angle = best_fine_angle
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user