feat: two-pass deskew with wider angle range and residual correction
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m52s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 16s

- Increase iterative deskew coarse_range from ±2° to ±5° to handle
  heavily skewed scans
- New deskew_two_pass(): runs iterative projection first, then
  word-alignment on the corrected image to detect/fix residual skew
  (applied when residual ≥ 0.3°)
- OCR pipeline API auto_deskew now uses deskew_two_pass by default
- Vocab worksheet _run_ocr_pipeline_for_page uses deskew_two_pass
- Deskew result now includes angle_residual and two_pass_debug

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-05 17:34:57 +01:00
parent b9c3c47a37
commit 538d5c732e
3 changed files with 93 additions and 77 deletions

View File

@@ -53,6 +53,7 @@ from cv_vocab_pipeline import (
deskew_image,
deskew_image_by_word_alignment,
deskew_image_iterative,
deskew_two_pass,
detect_column_geometry,
detect_document_type,
detect_row_geometry,
@@ -457,7 +458,7 @@ async def get_image(session_id: str, image_type: str):
@router.post("/sessions/{session_id}/deskew")
async def auto_deskew(session_id: str):
"""Run both deskew methods and pick the best one."""
"""Two-pass deskew: iterative projection (wide range) + word-alignment residual."""
# Ensure session is in cache
if session_id not in _cache:
await _load_session_to_cache(session_id)
@@ -469,52 +470,30 @@ async def auto_deskew(session_id: str):
t0 = time.time()
# Method 1: Hough Lines
try:
deskewed_hough, angle_hough = deskew_image(img_bgr.copy())
except Exception as e:
logger.warning(f"Hough deskew failed: {e}")
deskewed_hough, angle_hough = img_bgr, 0.0
# Two-pass deskew: iterative (±5°) + word-alignment residual check
deskewed_bgr, angle_applied, two_pass_debug = deskew_two_pass(img_bgr.copy())
# Also run individual methods for reporting (non-authoritative)
try:
_, angle_hough = deskew_image(img_bgr.copy())
except Exception:
angle_hough = 0.0
# Method 2: Word Alignment (needs image bytes)
success_enc, png_orig = cv2.imencode(".png", img_bgr)
orig_bytes = png_orig.tobytes() if success_enc else b""
try:
deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes)
except Exception as e:
logger.warning(f"Word alignment deskew failed: {e}")
deskewed_wa_bytes, angle_wa = orig_bytes, 0.0
_, angle_wa = deskew_image_by_word_alignment(orig_bytes)
except Exception:
angle_wa = 0.0
# Method 3: Iterative Projection-Profile
angle_iterative = 0.0
iterative_debug = {}
try:
deskewed_iter, angle_iterative, iterative_debug = deskew_image_iterative(img_bgr.copy())
except Exception as e:
logger.warning(f"Iterative deskew failed: {e}")
deskewed_iter = img_bgr
angle_iterative = two_pass_debug.get("pass1_angle", 0.0)
angle_residual = two_pass_debug.get("pass2_angle", 0.0)
duration = time.time() - t0
# Pick best method — prefer iterative when it found a non-zero angle
if abs(angle_iterative) >= 0.05:
method_used = "iterative"
angle_applied = angle_iterative
deskewed_bgr = deskewed_iter
elif abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
method_used = "word_alignment"
angle_applied = angle_wa
wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR)
if deskewed_bgr is None:
deskewed_bgr = deskewed_hough
method_used = "hough"
angle_applied = angle_hough
else:
method_used = "hough"
angle_applied = angle_hough
deskewed_bgr = deskewed_hough
method_used = "two_pass"
if abs(angle_residual) < 0.3:
method_used = "iterative" # pass2 didn't contribute
# Encode as PNG
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
@@ -535,10 +514,12 @@ async def auto_deskew(session_id: str):
"angle_hough": round(angle_hough, 3),
"angle_word_alignment": round(angle_wa, 3),
"angle_iterative": round(angle_iterative, 3),
"angle_residual": round(angle_residual, 3),
"angle_applied": round(angle_applied, 3),
"method_used": method_used,
"confidence": round(confidence, 2),
"duration_seconds": round(duration, 2),
"two_pass_debug": two_pass_debug,
}
# Update cache
@@ -557,11 +538,14 @@ async def auto_deskew(session_id: str):
await update_session_db(session_id, **db_update)
logger.info(f"OCR Pipeline: deskew session {session_id}: "
f"hough={angle_hough:.2f} wa={angle_wa:.2f} iter={angle_iterative:.2f} "
f"-> {method_used} {angle_applied:.2f}")
f"hough={angle_hough:.2f} wa={angle_wa:.2f} "
f"iter={angle_iterative:.2f} residual={angle_residual:.2f} "
f"-> {method_used} total={angle_applied:.2f}")
await _append_pipeline_log(session_id, "deskew", {
"angle_applied": round(angle_applied, 3),
"angle_iterative": round(angle_iterative, 3),
"angle_residual": round(angle_residual, 3),
"confidence": round(confidence, 2),
"method": method_used,
}, duration_ms=int(duration * 1000))