diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 93b20a5..74a5069 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -401,18 +401,32 @@ def deskew_image_by_word_alignment( return png_buf.tobytes(), angle_deg +def _projection_gradient_score(profile: np.ndarray) -> float: + """Score a projection profile by the L2-norm of its first derivative. + + Higher score = sharper transitions between text-lines and gaps, + i.e. better row/column alignment. Much more sensitive to small + angular differences than plain variance. + """ + diff = np.diff(profile) + return float(np.sum(diff * diff)) + + def deskew_image_iterative( img: np.ndarray, coarse_range: float = 2.0, - coarse_step: float = 0.2, - fine_range: float = 0.5, - fine_step: float = 0.1, + coarse_step: float = 0.1, + fine_range: float = 0.15, + fine_step: float = 0.02, ) -> Tuple[np.ndarray, float, Dict[str, Any]]: - """Iterative deskew using projection-profile variance optimisation. + """Iterative deskew using projection-profile gradient optimisation. - Two-phase search: - Phase 1 (coarse): maximise horizontal projection variance (row alignment) - Phase 2 (fine): maximise vertical projection variance (column alignment) + Two-phase search using *horizontal* projection profiles (row sums) + in both phases. The gradient score (sum of squared first-differences) + is far more sensitive to small rotations than plain variance. + + Phase 1 (coarse): -2.0° … +2.0° in 0.1° steps (41 angles) + Phase 2 (fine): ±0.15° around coarse winner in 0.02° steps (≤16 angles) Args: img: BGR image (full resolution). @@ -438,59 +452,44 @@ def deskew_image_iterative( crop_h, crop_w = crop.shape[:2] crop_center = (crop_w // 2, crop_h // 2) - # --- Phase 1: coarse sweep (horizontal projection → row alignment) --- - coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step) - best_coarse_angle = 0.0 - best_coarse_score = -1.0 - coarse_scores = [] + def _sweep(angles: np.ndarray) -> list: + """Return [(angle, score), ...] for horizontal projection gradient.""" + results = [] + for angle in angles: + if abs(angle) < 1e-6: + rotated_crop = crop + else: + M = cv2.getRotationMatrix2D(crop_center, angle, 1.0) + rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h), + flags=cv2.INTER_NEAREST, + borderMode=cv2.BORDER_CONSTANT, + borderValue=0) + h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64) + score = _projection_gradient_score(h_profile) + results.append((float(angle), score)) + return results - for angle in coarse_angles: - if abs(angle) < 1e-6: - rotated_crop = crop - else: - M = cv2.getRotationMatrix2D(crop_center, angle, 1.0) - rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h), - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=0) - h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64) - score = float(np.var(h_profile)) - coarse_scores.append((round(float(angle), 2), round(score, 1))) - if score > best_coarse_score: - best_coarse_score = score - best_coarse_angle = float(angle) + # --- Phase 1: coarse sweep --- + coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step) + coarse_results = _sweep(coarse_angles) + best_coarse = max(coarse_results, key=lambda x: x[1]) + best_coarse_angle, best_coarse_score = best_coarse debug["coarse_best_angle"] = round(best_coarse_angle, 2) debug["coarse_best_score"] = round(best_coarse_score, 1) - debug["coarse_scores"] = coarse_scores + debug["coarse_scores"] = [(round(a, 2), round(s, 1)) for a, s in coarse_results] - # --- Phase 2: fine sweep (vertical projection → column alignment) --- + # --- Phase 2: fine sweep around coarse winner --- fine_lo = best_coarse_angle - fine_range fine_hi = best_coarse_angle + fine_range fine_angles = np.arange(fine_lo, fine_hi + fine_step * 0.5, fine_step) - best_fine_angle = best_coarse_angle - best_fine_score = -1.0 - fine_scores = [] - - for angle in fine_angles: - if abs(angle) < 1e-6: - rotated_crop = crop - else: - M = cv2.getRotationMatrix2D(crop_center, angle, 1.0) - rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h), - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=0) - v_profile = np.sum(rotated_crop, axis=0, dtype=np.float64) - score = float(np.var(v_profile)) - fine_scores.append((round(float(angle), 2), round(score, 1))) - if score > best_fine_score: - best_fine_score = score - best_fine_angle = float(angle) + fine_results = _sweep(fine_angles) + best_fine = max(fine_results, key=lambda x: x[1]) + best_fine_angle, best_fine_score = best_fine debug["fine_best_angle"] = round(best_fine_angle, 2) debug["fine_best_score"] = round(best_fine_score, 1) - debug["fine_scores"] = fine_scores + debug["fine_scores"] = [(round(a, 2), round(s, 1)) for a, s in fine_results] final_angle = best_fine_angle