feat: two-pass deskew with wider angle range and residual correction
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m52s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 16s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m52s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 16s
- Increase iterative deskew coarse_range from ±2° to ±5° to handle heavily skewed scans - New deskew_two_pass(): runs iterative projection first, then word-alignment on the corrected image to detect/fix residual skew (applied when residual ≥ 0.3°) - OCR pipeline API auto_deskew now uses deskew_two_pass by default - Vocab worksheet _run_ocr_pipeline_for_page uses deskew_two_pass - Deskew result now includes angle_residual and two_pass_debug Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -413,7 +413,7 @@ def _projection_gradient_score(profile: np.ndarray) -> float:
|
|||||||
|
|
||||||
def deskew_image_iterative(
|
def deskew_image_iterative(
|
||||||
img: np.ndarray,
|
img: np.ndarray,
|
||||||
coarse_range: float = 2.0,
|
coarse_range: float = 5.0,
|
||||||
coarse_step: float = 0.1,
|
coarse_step: float = 0.1,
|
||||||
fine_range: float = 0.15,
|
fine_range: float = 0.15,
|
||||||
fine_step: float = 0.02,
|
fine_step: float = 0.02,
|
||||||
@@ -528,6 +528,66 @@ def deskew_image_iterative(
|
|||||||
return rotated, final_angle, debug
|
return rotated, final_angle, debug
|
||||||
|
|
||||||
|
|
||||||
|
def deskew_two_pass(
|
||||||
|
img: np.ndarray,
|
||||||
|
coarse_range: float = 5.0,
|
||||||
|
) -> Tuple[np.ndarray, float, Dict[str, Any]]:
|
||||||
|
"""Two-pass deskew: iterative projection + word-alignment residual check.
|
||||||
|
|
||||||
|
Pass 1: ``deskew_image_iterative()`` (vertical-edge projection, wide range).
|
||||||
|
Pass 2: ``deskew_image_by_word_alignment()`` on the already-corrected image
|
||||||
|
to detect and fix residual skew that the projection method missed.
|
||||||
|
|
||||||
|
The two corrections are summed. If the residual from Pass 2 is below
|
||||||
|
0.3° it is ignored (already good enough).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(corrected_bgr, total_angle_degrees, debug_dict)
|
||||||
|
"""
|
||||||
|
debug: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
# --- Pass 1: iterative projection ---
|
||||||
|
corrected, angle1, dbg1 = deskew_image_iterative(
|
||||||
|
img.copy(), coarse_range=coarse_range,
|
||||||
|
)
|
||||||
|
debug["pass1_angle"] = round(angle1, 3)
|
||||||
|
debug["pass1_method"] = "iterative"
|
||||||
|
debug["pass1_debug"] = dbg1
|
||||||
|
|
||||||
|
# --- Pass 2: word-alignment residual check on corrected image ---
|
||||||
|
angle2 = 0.0
|
||||||
|
try:
|
||||||
|
# Encode the corrected image to PNG bytes for word-alignment
|
||||||
|
ok, buf = cv2.imencode(".png", corrected)
|
||||||
|
if ok:
|
||||||
|
corrected_bytes, angle2 = deskew_image_by_word_alignment(buf.tobytes())
|
||||||
|
if abs(angle2) >= 0.3:
|
||||||
|
# Significant residual — decode and use the second correction
|
||||||
|
arr2 = np.frombuffer(corrected_bytes, dtype=np.uint8)
|
||||||
|
corrected2 = cv2.imdecode(arr2, cv2.IMREAD_COLOR)
|
||||||
|
if corrected2 is not None:
|
||||||
|
corrected = corrected2
|
||||||
|
logger.info(f"deskew_two_pass: pass2 residual={angle2:.2f}° applied "
|
||||||
|
f"(total={angle1 + angle2:.2f}°)")
|
||||||
|
else:
|
||||||
|
angle2 = 0.0
|
||||||
|
else:
|
||||||
|
logger.info(f"deskew_two_pass: pass2 residual={angle2:.2f}° < 0.3° — skipped")
|
||||||
|
angle2 = 0.0
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"deskew_two_pass: pass2 word-alignment failed: {e}")
|
||||||
|
angle2 = 0.0
|
||||||
|
|
||||||
|
total_angle = angle1 + angle2
|
||||||
|
debug["pass2_angle"] = round(angle2, 3)
|
||||||
|
debug["pass2_method"] = "word_alignment"
|
||||||
|
debug["total_angle"] = round(total_angle, 3)
|
||||||
|
|
||||||
|
logger.info(f"deskew_two_pass: pass1={angle1:.2f}° + pass2={angle2:.2f}° = {total_angle:.2f}°")
|
||||||
|
|
||||||
|
return corrected, total_angle, debug
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Stage 3: Dewarp (Book Curvature Correction)
|
# Stage 3: Dewarp (Book Curvature Correction)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ from cv_vocab_pipeline import (
|
|||||||
deskew_image,
|
deskew_image,
|
||||||
deskew_image_by_word_alignment,
|
deskew_image_by_word_alignment,
|
||||||
deskew_image_iterative,
|
deskew_image_iterative,
|
||||||
|
deskew_two_pass,
|
||||||
detect_column_geometry,
|
detect_column_geometry,
|
||||||
detect_document_type,
|
detect_document_type,
|
||||||
detect_row_geometry,
|
detect_row_geometry,
|
||||||
@@ -457,7 +458,7 @@ async def get_image(session_id: str, image_type: str):
|
|||||||
|
|
||||||
@router.post("/sessions/{session_id}/deskew")
|
@router.post("/sessions/{session_id}/deskew")
|
||||||
async def auto_deskew(session_id: str):
|
async def auto_deskew(session_id: str):
|
||||||
"""Run both deskew methods and pick the best one."""
|
"""Two-pass deskew: iterative projection (wide range) + word-alignment residual."""
|
||||||
# Ensure session is in cache
|
# Ensure session is in cache
|
||||||
if session_id not in _cache:
|
if session_id not in _cache:
|
||||||
await _load_session_to_cache(session_id)
|
await _load_session_to_cache(session_id)
|
||||||
@@ -469,52 +470,30 @@ async def auto_deskew(session_id: str):
|
|||||||
|
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
|
||||||
# Method 1: Hough Lines
|
# Two-pass deskew: iterative (±5°) + word-alignment residual check
|
||||||
try:
|
deskewed_bgr, angle_applied, two_pass_debug = deskew_two_pass(img_bgr.copy())
|
||||||
deskewed_hough, angle_hough = deskew_image(img_bgr.copy())
|
|
||||||
except Exception as e:
|
# Also run individual methods for reporting (non-authoritative)
|
||||||
logger.warning(f"Hough deskew failed: {e}")
|
try:
|
||||||
deskewed_hough, angle_hough = img_bgr, 0.0
|
_, angle_hough = deskew_image(img_bgr.copy())
|
||||||
|
except Exception:
|
||||||
|
angle_hough = 0.0
|
||||||
|
|
||||||
# Method 2: Word Alignment (needs image bytes)
|
|
||||||
success_enc, png_orig = cv2.imencode(".png", img_bgr)
|
success_enc, png_orig = cv2.imencode(".png", img_bgr)
|
||||||
orig_bytes = png_orig.tobytes() if success_enc else b""
|
orig_bytes = png_orig.tobytes() if success_enc else b""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes)
|
_, angle_wa = deskew_image_by_word_alignment(orig_bytes)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logger.warning(f"Word alignment deskew failed: {e}")
|
angle_wa = 0.0
|
||||||
deskewed_wa_bytes, angle_wa = orig_bytes, 0.0
|
|
||||||
|
|
||||||
# Method 3: Iterative Projection-Profile
|
angle_iterative = two_pass_debug.get("pass1_angle", 0.0)
|
||||||
angle_iterative = 0.0
|
angle_residual = two_pass_debug.get("pass2_angle", 0.0)
|
||||||
iterative_debug = {}
|
|
||||||
try:
|
|
||||||
deskewed_iter, angle_iterative, iterative_debug = deskew_image_iterative(img_bgr.copy())
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Iterative deskew failed: {e}")
|
|
||||||
deskewed_iter = img_bgr
|
|
||||||
|
|
||||||
duration = time.time() - t0
|
duration = time.time() - t0
|
||||||
|
|
||||||
# Pick best method — prefer iterative when it found a non-zero angle
|
method_used = "two_pass"
|
||||||
if abs(angle_iterative) >= 0.05:
|
if abs(angle_residual) < 0.3:
|
||||||
method_used = "iterative"
|
method_used = "iterative" # pass2 didn't contribute
|
||||||
angle_applied = angle_iterative
|
|
||||||
deskewed_bgr = deskewed_iter
|
|
||||||
elif abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
|
|
||||||
method_used = "word_alignment"
|
|
||||||
angle_applied = angle_wa
|
|
||||||
wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
|
|
||||||
deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR)
|
|
||||||
if deskewed_bgr is None:
|
|
||||||
deskewed_bgr = deskewed_hough
|
|
||||||
method_used = "hough"
|
|
||||||
angle_applied = angle_hough
|
|
||||||
else:
|
|
||||||
method_used = "hough"
|
|
||||||
angle_applied = angle_hough
|
|
||||||
deskewed_bgr = deskewed_hough
|
|
||||||
|
|
||||||
# Encode as PNG
|
# Encode as PNG
|
||||||
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
|
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||||
@@ -535,10 +514,12 @@ async def auto_deskew(session_id: str):
|
|||||||
"angle_hough": round(angle_hough, 3),
|
"angle_hough": round(angle_hough, 3),
|
||||||
"angle_word_alignment": round(angle_wa, 3),
|
"angle_word_alignment": round(angle_wa, 3),
|
||||||
"angle_iterative": round(angle_iterative, 3),
|
"angle_iterative": round(angle_iterative, 3),
|
||||||
|
"angle_residual": round(angle_residual, 3),
|
||||||
"angle_applied": round(angle_applied, 3),
|
"angle_applied": round(angle_applied, 3),
|
||||||
"method_used": method_used,
|
"method_used": method_used,
|
||||||
"confidence": round(confidence, 2),
|
"confidence": round(confidence, 2),
|
||||||
"duration_seconds": round(duration, 2),
|
"duration_seconds": round(duration, 2),
|
||||||
|
"two_pass_debug": two_pass_debug,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Update cache
|
# Update cache
|
||||||
@@ -557,11 +538,14 @@ async def auto_deskew(session_id: str):
|
|||||||
await update_session_db(session_id, **db_update)
|
await update_session_db(session_id, **db_update)
|
||||||
|
|
||||||
logger.info(f"OCR Pipeline: deskew session {session_id}: "
|
logger.info(f"OCR Pipeline: deskew session {session_id}: "
|
||||||
f"hough={angle_hough:.2f} wa={angle_wa:.2f} iter={angle_iterative:.2f} "
|
f"hough={angle_hough:.2f} wa={angle_wa:.2f} "
|
||||||
f"-> {method_used} {angle_applied:.2f}")
|
f"iter={angle_iterative:.2f} residual={angle_residual:.2f} "
|
||||||
|
f"-> {method_used} total={angle_applied:.2f}")
|
||||||
|
|
||||||
await _append_pipeline_log(session_id, "deskew", {
|
await _append_pipeline_log(session_id, "deskew", {
|
||||||
"angle_applied": round(angle_applied, 3),
|
"angle_applied": round(angle_applied, 3),
|
||||||
|
"angle_iterative": round(angle_iterative, 3),
|
||||||
|
"angle_residual": round(angle_residual, 3),
|
||||||
"confidence": round(confidence, 2),
|
"confidence": round(confidence, 2),
|
||||||
"method": method_used,
|
"method": method_used,
|
||||||
}, duration_ms=int(duration * 1000))
|
}, duration_ms=int(duration * 1000))
|
||||||
|
|||||||
@@ -65,6 +65,7 @@ try:
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from cv_vocab_pipeline import (
|
from cv_vocab_pipeline import (
|
||||||
deskew_image, deskew_image_by_word_alignment, deskew_image_iterative,
|
deskew_image, deskew_image_by_word_alignment, deskew_image_iterative,
|
||||||
|
deskew_two_pass,
|
||||||
dewarp_image, create_ocr_image,
|
dewarp_image, create_ocr_image,
|
||||||
detect_column_geometry, analyze_layout_by_words, analyze_layout, create_layout_image,
|
detect_column_geometry, analyze_layout_by_words, analyze_layout, create_layout_image,
|
||||||
detect_row_geometry, build_cell_grid_v2,
|
detect_row_geometry, build_cell_grid_v2,
|
||||||
@@ -1370,43 +1371,14 @@ async def _run_ocr_pipeline_for_page(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not create pipeline session in DB: {e}")
|
logger.warning(f"Could not create pipeline session in DB: {e}")
|
||||||
|
|
||||||
# 3. Deskew (3 methods, pick best)
|
# 3. Two-pass deskew: iterative (±5°) + word-alignment residual
|
||||||
t0 = _time.time()
|
t0 = _time.time()
|
||||||
try:
|
deskewed_bgr, angle_applied, deskew_debug = deskew_two_pass(img_bgr.copy())
|
||||||
deskewed_hough, angle_hough = deskew_image(img_bgr.copy())
|
angle_pass1 = deskew_debug.get("pass1_angle", 0.0)
|
||||||
except Exception:
|
angle_pass2 = deskew_debug.get("pass2_angle", 0.0)
|
||||||
deskewed_hough, angle_hough = img_bgr, 0.0
|
|
||||||
|
|
||||||
success_enc, png_orig = cv2.imencode(".png", img_bgr)
|
logger.info(f" deskew: pass1={angle_pass1:.2f} pass2={angle_pass2:.2f} "
|
||||||
orig_bytes = png_orig.tobytes() if success_enc else b""
|
f"total={angle_applied:.2f} ({_time.time() - t0:.1f}s)")
|
||||||
try:
|
|
||||||
deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes)
|
|
||||||
except Exception:
|
|
||||||
deskewed_wa_bytes, angle_wa = orig_bytes, 0.0
|
|
||||||
|
|
||||||
try:
|
|
||||||
deskewed_iter, angle_iterative, _ = deskew_image_iterative(img_bgr.copy())
|
|
||||||
except Exception:
|
|
||||||
deskewed_iter, angle_iterative = img_bgr, 0.0
|
|
||||||
|
|
||||||
# Pick best
|
|
||||||
if abs(angle_iterative) >= 0.05:
|
|
||||||
deskewed_bgr = deskewed_iter
|
|
||||||
angle_applied = angle_iterative
|
|
||||||
elif abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1:
|
|
||||||
angle_applied = angle_wa
|
|
||||||
wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8)
|
|
||||||
deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR)
|
|
||||||
if deskewed_bgr is None:
|
|
||||||
deskewed_bgr = deskewed_hough
|
|
||||||
angle_applied = angle_hough
|
|
||||||
else:
|
|
||||||
deskewed_bgr = deskewed_hough
|
|
||||||
angle_applied = angle_hough
|
|
||||||
|
|
||||||
logger.info(f" deskew: hough={angle_hough:.2f} wa={angle_wa:.2f} "
|
|
||||||
f"iter={angle_iterative:.2f} → applied={angle_applied:.2f} "
|
|
||||||
f"({_time.time() - t0:.1f}s)")
|
|
||||||
|
|
||||||
# 4. Dewarp
|
# 4. Dewarp
|
||||||
t0 = _time.time()
|
t0 = _time.time()
|
||||||
|
|||||||
Reference in New Issue
Block a user