diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 74a5069..ff28000 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -405,8 +405,7 @@ def _projection_gradient_score(profile: np.ndarray) -> float: """Score a projection profile by the L2-norm of its first derivative. Higher score = sharper transitions between text-lines and gaps, - i.e. better row/column alignment. Much more sensitive to small - angular differences than plain variance. + i.e. better row/column alignment. """ diff = np.diff(profile) return float(np.sum(diff * diff)) @@ -419,14 +418,17 @@ def deskew_image_iterative( fine_range: float = 0.15, fine_step: float = 0.02, ) -> Tuple[np.ndarray, float, Dict[str, Any]]: - """Iterative deskew using projection-profile gradient optimisation. + """Iterative deskew using vertical-edge projection optimisation. - Two-phase search using *horizontal* projection profiles (row sums) - in both phases. The gradient score (sum of squared first-differences) - is far more sensitive to small rotations than plain variance. + The key insight: at the correct rotation angle, vertical features + (word left-edges, column borders) become truly vertical, producing + the sharpest peaks in the vertical projection of vertical edges. - Phase 1 (coarse): -2.0° … +2.0° in 0.1° steps (41 angles) - Phase 2 (fine): ±0.15° around coarse winner in 0.02° steps (≤16 angles) + Method: + 1. Detect vertical edges via Sobel-X on the central crop. + 2. Coarse sweep: rotate edge image, compute vertical projection + gradient score. The angle where vertical edges align best wins. + 3. Fine sweep: refine around the coarse winner. Args: img: BGR image (full resolution). @@ -441,37 +443,52 @@ def deskew_image_iterative( h, w = img.shape[:2] debug: Dict[str, Any] = {} - # --- Binarise once (grayscale + Otsu) --- + # --- Grayscale + vertical edge detection --- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) - # --- Central crop (20%-80% height) for fast rotation --- - y_lo = int(h * 0.2) - y_hi = int(h * 0.8) - crop = binary[y_lo:y_hi, :] - crop_h, crop_w = crop.shape[:2] + # Central crop (15%-85% height, 10%-90% width) to avoid page margins + y_lo, y_hi = int(h * 0.15), int(h * 0.85) + x_lo, x_hi = int(w * 0.10), int(w * 0.90) + gray_crop = gray[y_lo:y_hi, x_lo:x_hi] + + # Sobel-X → absolute vertical edges + sobel_x = cv2.Sobel(gray_crop, cv2.CV_64F, 1, 0, ksize=3) + edges = np.abs(sobel_x) + # Normalise to 0-255 for consistent scoring + edge_max = edges.max() + if edge_max > 0: + edges = (edges / edge_max * 255).astype(np.uint8) + else: + return img, 0.0, {"error": "no edges detected"} + + crop_h, crop_w = edges.shape[:2] crop_center = (crop_w // 2, crop_h // 2) - def _sweep(angles: np.ndarray) -> list: - """Return [(angle, score), ...] for horizontal projection gradient.""" + # Trim margin after rotation to avoid border artifacts + trim_y = max(4, int(crop_h * 0.03)) + trim_x = max(4, int(crop_w * 0.03)) + + def _sweep_edges(angles: np.ndarray) -> list: + """Score each angle by vertical projection gradient of vertical edges.""" results = [] for angle in angles: if abs(angle) < 1e-6: - rotated_crop = crop + rotated = edges else: M = cv2.getRotationMatrix2D(crop_center, angle, 1.0) - rotated_crop = cv2.warpAffine(crop, M, (crop_w, crop_h), - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=0) - h_profile = np.sum(rotated_crop, axis=1, dtype=np.float64) - score = _projection_gradient_score(h_profile) + rotated = cv2.warpAffine(edges, M, (crop_w, crop_h), + flags=cv2.INTER_NEAREST, + borderMode=cv2.BORDER_REPLICATE) + # Trim borders to avoid edge artifacts + trimmed = rotated[trim_y:-trim_y, trim_x:-trim_x] + v_profile = np.sum(trimmed, axis=0, dtype=np.float64) + score = _projection_gradient_score(v_profile) results.append((float(angle), score)) return results # --- Phase 1: coarse sweep --- coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step) - coarse_results = _sweep(coarse_angles) + coarse_results = _sweep_edges(coarse_angles) best_coarse = max(coarse_results, key=lambda x: x[1]) best_coarse_angle, best_coarse_score = best_coarse @@ -483,7 +500,7 @@ def deskew_image_iterative( fine_lo = best_coarse_angle - fine_range fine_hi = best_coarse_angle + fine_range fine_angles = np.arange(fine_lo, fine_hi + fine_step * 0.5, fine_step) - fine_results = _sweep(fine_angles) + fine_results = _sweep_edges(fine_angles) best_fine = max(fine_results, key=lambda x: x[1]) best_fine_angle, best_fine_score = best_fine