From 538d5c732ede2c9574ff0aeffb418b68e08b3fde Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 5 Mar 2026 17:34:57 +0100 Subject: [PATCH] feat: two-pass deskew with wider angle range and residual correction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Increase iterative deskew coarse_range from ±2° to ±5° to handle heavily skewed scans - New deskew_two_pass(): runs iterative projection first, then word-alignment on the corrected image to detect/fix residual skew (applied when residual ≥ 0.3°) - OCR pipeline API auto_deskew now uses deskew_two_pass by default - Vocab worksheet _run_ocr_pipeline_for_page uses deskew_two_pass - Deskew result now includes angle_residual and two_pass_debug Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 62 ++++++++++++++++- klausur-service/backend/ocr_pipeline_api.py | 66 +++++++------------ .../backend/vocab_worksheet_api.py | 42 ++---------- 3 files changed, 93 insertions(+), 77 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index ff28000..e893c7f 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -413,7 +413,7 @@ def _projection_gradient_score(profile: np.ndarray) -> float: def deskew_image_iterative( img: np.ndarray, - coarse_range: float = 2.0, + coarse_range: float = 5.0, coarse_step: float = 0.1, fine_range: float = 0.15, fine_step: float = 0.02, @@ -528,6 +528,66 @@ def deskew_image_iterative( return rotated, final_angle, debug +def deskew_two_pass( + img: np.ndarray, + coarse_range: float = 5.0, +) -> Tuple[np.ndarray, float, Dict[str, Any]]: + """Two-pass deskew: iterative projection + word-alignment residual check. + + Pass 1: ``deskew_image_iterative()`` (vertical-edge projection, wide range). + Pass 2: ``deskew_image_by_word_alignment()`` on the already-corrected image + to detect and fix residual skew that the projection method missed. + + The two corrections are summed. If the residual from Pass 2 is below + 0.3° it is ignored (already good enough). + + Returns: + (corrected_bgr, total_angle_degrees, debug_dict) + """ + debug: Dict[str, Any] = {} + + # --- Pass 1: iterative projection --- + corrected, angle1, dbg1 = deskew_image_iterative( + img.copy(), coarse_range=coarse_range, + ) + debug["pass1_angle"] = round(angle1, 3) + debug["pass1_method"] = "iterative" + debug["pass1_debug"] = dbg1 + + # --- Pass 2: word-alignment residual check on corrected image --- + angle2 = 0.0 + try: + # Encode the corrected image to PNG bytes for word-alignment + ok, buf = cv2.imencode(".png", corrected) + if ok: + corrected_bytes, angle2 = deskew_image_by_word_alignment(buf.tobytes()) + if abs(angle2) >= 0.3: + # Significant residual — decode and use the second correction + arr2 = np.frombuffer(corrected_bytes, dtype=np.uint8) + corrected2 = cv2.imdecode(arr2, cv2.IMREAD_COLOR) + if corrected2 is not None: + corrected = corrected2 + logger.info(f"deskew_two_pass: pass2 residual={angle2:.2f}° applied " + f"(total={angle1 + angle2:.2f}°)") + else: + angle2 = 0.0 + else: + logger.info(f"deskew_two_pass: pass2 residual={angle2:.2f}° < 0.3° — skipped") + angle2 = 0.0 + except Exception as e: + logger.warning(f"deskew_two_pass: pass2 word-alignment failed: {e}") + angle2 = 0.0 + + total_angle = angle1 + angle2 + debug["pass2_angle"] = round(angle2, 3) + debug["pass2_method"] = "word_alignment" + debug["total_angle"] = round(total_angle, 3) + + logger.info(f"deskew_two_pass: pass1={angle1:.2f}° + pass2={angle2:.2f}° = {total_angle:.2f}°") + + return corrected, total_angle, debug + + # ============================================================================= # Stage 3: Dewarp (Book Curvature Correction) # ============================================================================= diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 75ba7ff..d8084b6 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -53,6 +53,7 @@ from cv_vocab_pipeline import ( deskew_image, deskew_image_by_word_alignment, deskew_image_iterative, + deskew_two_pass, detect_column_geometry, detect_document_type, detect_row_geometry, @@ -457,7 +458,7 @@ async def get_image(session_id: str, image_type: str): @router.post("/sessions/{session_id}/deskew") async def auto_deskew(session_id: str): - """Run both deskew methods and pick the best one.""" + """Two-pass deskew: iterative projection (wide range) + word-alignment residual.""" # Ensure session is in cache if session_id not in _cache: await _load_session_to_cache(session_id) @@ -469,52 +470,30 @@ async def auto_deskew(session_id: str): t0 = time.time() - # Method 1: Hough Lines - try: - deskewed_hough, angle_hough = deskew_image(img_bgr.copy()) - except Exception as e: - logger.warning(f"Hough deskew failed: {e}") - deskewed_hough, angle_hough = img_bgr, 0.0 + # Two-pass deskew: iterative (±5°) + word-alignment residual check + deskewed_bgr, angle_applied, two_pass_debug = deskew_two_pass(img_bgr.copy()) + + # Also run individual methods for reporting (non-authoritative) + try: + _, angle_hough = deskew_image(img_bgr.copy()) + except Exception: + angle_hough = 0.0 - # Method 2: Word Alignment (needs image bytes) success_enc, png_orig = cv2.imencode(".png", img_bgr) orig_bytes = png_orig.tobytes() if success_enc else b"" - try: - deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes) - except Exception as e: - logger.warning(f"Word alignment deskew failed: {e}") - deskewed_wa_bytes, angle_wa = orig_bytes, 0.0 + _, angle_wa = deskew_image_by_word_alignment(orig_bytes) + except Exception: + angle_wa = 0.0 - # Method 3: Iterative Projection-Profile - angle_iterative = 0.0 - iterative_debug = {} - try: - deskewed_iter, angle_iterative, iterative_debug = deskew_image_iterative(img_bgr.copy()) - except Exception as e: - logger.warning(f"Iterative deskew failed: {e}") - deskewed_iter = img_bgr + angle_iterative = two_pass_debug.get("pass1_angle", 0.0) + angle_residual = two_pass_debug.get("pass2_angle", 0.0) duration = time.time() - t0 - # Pick best method — prefer iterative when it found a non-zero angle - if abs(angle_iterative) >= 0.05: - method_used = "iterative" - angle_applied = angle_iterative - deskewed_bgr = deskewed_iter - elif abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1: - method_used = "word_alignment" - angle_applied = angle_wa - wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8) - deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR) - if deskewed_bgr is None: - deskewed_bgr = deskewed_hough - method_used = "hough" - angle_applied = angle_hough - else: - method_used = "hough" - angle_applied = angle_hough - deskewed_bgr = deskewed_hough + method_used = "two_pass" + if abs(angle_residual) < 0.3: + method_used = "iterative" # pass2 didn't contribute # Encode as PNG success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr) @@ -535,10 +514,12 @@ async def auto_deskew(session_id: str): "angle_hough": round(angle_hough, 3), "angle_word_alignment": round(angle_wa, 3), "angle_iterative": round(angle_iterative, 3), + "angle_residual": round(angle_residual, 3), "angle_applied": round(angle_applied, 3), "method_used": method_used, "confidence": round(confidence, 2), "duration_seconds": round(duration, 2), + "two_pass_debug": two_pass_debug, } # Update cache @@ -557,11 +538,14 @@ async def auto_deskew(session_id: str): await update_session_db(session_id, **db_update) logger.info(f"OCR Pipeline: deskew session {session_id}: " - f"hough={angle_hough:.2f} wa={angle_wa:.2f} iter={angle_iterative:.2f} " - f"-> {method_used} {angle_applied:.2f}") + f"hough={angle_hough:.2f} wa={angle_wa:.2f} " + f"iter={angle_iterative:.2f} residual={angle_residual:.2f} " + f"-> {method_used} total={angle_applied:.2f}") await _append_pipeline_log(session_id, "deskew", { "angle_applied": round(angle_applied, 3), + "angle_iterative": round(angle_iterative, 3), + "angle_residual": round(angle_residual, 3), "confidence": round(confidence, 2), "method": method_used, }, duration_ms=int(duration * 1000)) diff --git a/klausur-service/backend/vocab_worksheet_api.py b/klausur-service/backend/vocab_worksheet_api.py index cc68d81..8ece7c2 100644 --- a/klausur-service/backend/vocab_worksheet_api.py +++ b/klausur-service/backend/vocab_worksheet_api.py @@ -65,6 +65,7 @@ try: import numpy as np from cv_vocab_pipeline import ( deskew_image, deskew_image_by_word_alignment, deskew_image_iterative, + deskew_two_pass, dewarp_image, create_ocr_image, detect_column_geometry, analyze_layout_by_words, analyze_layout, create_layout_image, detect_row_geometry, build_cell_grid_v2, @@ -1370,43 +1371,14 @@ async def _run_ocr_pipeline_for_page( except Exception as e: logger.warning(f"Could not create pipeline session in DB: {e}") - # 3. Deskew (3 methods, pick best) + # 3. Two-pass deskew: iterative (±5°) + word-alignment residual t0 = _time.time() - try: - deskewed_hough, angle_hough = deskew_image(img_bgr.copy()) - except Exception: - deskewed_hough, angle_hough = img_bgr, 0.0 + deskewed_bgr, angle_applied, deskew_debug = deskew_two_pass(img_bgr.copy()) + angle_pass1 = deskew_debug.get("pass1_angle", 0.0) + angle_pass2 = deskew_debug.get("pass2_angle", 0.0) - success_enc, png_orig = cv2.imencode(".png", img_bgr) - orig_bytes = png_orig.tobytes() if success_enc else b"" - try: - deskewed_wa_bytes, angle_wa = deskew_image_by_word_alignment(orig_bytes) - except Exception: - deskewed_wa_bytes, angle_wa = orig_bytes, 0.0 - - try: - deskewed_iter, angle_iterative, _ = deskew_image_iterative(img_bgr.copy()) - except Exception: - deskewed_iter, angle_iterative = img_bgr, 0.0 - - # Pick best - if abs(angle_iterative) >= 0.05: - deskewed_bgr = deskewed_iter - angle_applied = angle_iterative - elif abs(angle_wa) >= abs(angle_hough) or abs(angle_hough) < 0.1: - angle_applied = angle_wa - wa_array = np.frombuffer(deskewed_wa_bytes, dtype=np.uint8) - deskewed_bgr = cv2.imdecode(wa_array, cv2.IMREAD_COLOR) - if deskewed_bgr is None: - deskewed_bgr = deskewed_hough - angle_applied = angle_hough - else: - deskewed_bgr = deskewed_hough - angle_applied = angle_hough - - logger.info(f" deskew: hough={angle_hough:.2f} wa={angle_wa:.2f} " - f"iter={angle_iterative:.2f} → applied={angle_applied:.2f} " - f"({_time.time() - t0:.1f}s)") + logger.info(f" deskew: pass1={angle_pass1:.2f} pass2={angle_pass2:.2f} " + f"total={angle_applied:.2f} ({_time.time() - t0:.1f}s)") # 4. Dewarp t0 = _time.time()