feat: two-pass deskew with wider angle range and residual correction
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m52s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 16s

- Increase iterative deskew coarse_range from ±2° to ±5° to handle
  heavily skewed scans
- New deskew_two_pass(): runs iterative projection first, then
  word-alignment on the corrected image to detect/fix residual skew
  (applied when residual ≥ 0.3°)
- OCR pipeline API auto_deskew now uses deskew_two_pass by default
- Vocab worksheet _run_ocr_pipeline_for_page uses deskew_two_pass
- Deskew result now includes angle_residual and two_pass_debug

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-05 17:34:57 +01:00
parent b9c3c47a37
commit 538d5c732e
3 changed files with 93 additions and 77 deletions

View File

@@ -65,6 +65,7 @@ try:
import numpy as np
from cv_vocab_pipeline import (
deskew_image, deskew_image_by_word_alignment, deskew_image_iterative,
deskew_two_pass,
dewarp_image, create_ocr_image,
detect_column_geometry, analyze_layout_by_words, analyze_layout, create_layout_image,
detect_row_geometry, build_cell_grid_v2,
@@ -1370,43 +1371,14 @@ async def _run_ocr_pipeline_for_page(
except Exception as e:
logger.warning(f"Could not create pipeline session in DB: {e}")
# 3. Deskew (3 methods, pick best)
# 3. Two-pass deskew: iterative (±5°) + word-alignment residual
t0 = _time.time()
try:
deskewed_hough, angle_hough = deskew_image(img_bgr.copy())
except Exception:
deskewed_hough, angle_hough = img_bgr, 0.0
deskewed_bgr, angle_applied, deskew_debug = deskew_two_pass(img_bgr.copy())
angle_pass1 = deskew_debug.get("pass1_angle", 0.0)
angle_pass2 = deskew_debug.get("pass2_angle", 0.0)
success_enc, png_orig = cv2.imencode(".png", img_bgr)
orig_bytes = png_orig.tobytes() if success_enc else b""
try:
deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes)
except Exception:
deskewed_wa_bytes, angle_wa = orig_bytes, 0.0
try:
deskewed_iter, angle_iterative, _ = deskew_image_iterative(img_bgr.copy())
except Exception:
deskewed_iter, angle_iterative = img_bgr, 0.0
# Pick best
if abs(angle_iterative) >= 0.05:
deskewed_bgr = deskewed_iter
angle_applied = angle_iterative
elif abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
angle_applied = angle_wa
wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR)
if deskewed_bgr is None:
deskewed_bgr = deskewed_hough
angle_applied = angle_hough
else:
deskewed_bgr = deskewed_hough
angle_applied = angle_hough
logger.info(f" deskew: hough={angle_hough:.2f} wa={angle_wa:.2f} "
f"iter={angle_iterative:.2f} → applied={angle_applied:.2f} "
f"({_time.time() - t0:.1f}s)")
logger.info(f" deskew: pass1={angle_pass1:.2f} pass2={angle_pass2:.2f} "
f"total={angle_applied:.2f} ({_time.time() - t0:.1f}s)")
# 4. Dewarp
t0 = _time.time()