""" CV Preprocessing Dewarp — Vertical shear detection and correction. Provides four shear detection methods (vertical edge, projection variance, Hough lines, text-line drift), ensemble combination, quality gating, and the main dewarp_image() function. Lizenz: Apache 2.0 (kommerziell nutzbar) DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ import logging import math import time from typing import Any, Dict, List, Tuple import numpy as np from cv_vocab_types import ( CV2_AVAILABLE, TESSERACT_AVAILABLE, ) logger = logging.getLogger(__name__) try: import cv2 except ImportError: cv2 = None # type: ignore[assignment] try: import pytesseract from PIL import Image except ImportError: pytesseract = None # type: ignore[assignment] Image = None # type: ignore[assignment,misc] # ============================================================================= # Shear Detection Methods # ============================================================================= def _detect_shear_angle(img: np.ndarray) -> Dict[str, Any]: """Detect vertical shear angle via strongest vertical edge tracking (Method A).""" h, w = img.shape[:2] result = {"method": "vertical_edge", "shear_degrees": 0.0, "confidence": 0.0} gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3) abs_sobel = np.abs(sobel_x).astype(np.uint8) _, binary = cv2.threshold(abs_sobel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) num_strips = 20 strip_h = h // num_strips edge_positions = [] for i in range(num_strips): y_start = i * strip_h y_end = min((i + 1) * strip_h, h) strip = binary[y_start:y_end, :] projection = np.sum(strip, axis=0).astype(np.float64) if projection.max() == 0: continue search_w = int(w * 0.4) left_proj = projection[:search_w] if left_proj.max() == 0: continue kernel_size = max(3, w // 100) if kernel_size % 2 == 0: kernel_size += 1 smoothed = cv2.GaussianBlur(left_proj.reshape(1, -1), (kernel_size, 1), 0).flatten() x_pos = float(np.argmax(smoothed)) y_center = (y_start + y_end) / 2.0 edge_positions.append((y_center, x_pos)) if len(edge_positions) < 8: return result ys = np.array([p[0] for p in edge_positions]) xs = np.array([p[1] for p in edge_positions]) median_x = np.median(xs) std_x = max(np.std(xs), 1.0) mask = np.abs(xs - median_x) < 2 * std_x ys = ys[mask] xs = xs[mask] if len(ys) < 6: return result straight_coeffs = np.polyfit(ys, xs, 1) slope = straight_coeffs[0] fitted = np.polyval(straight_coeffs, ys) residuals = xs - fitted rmse = float(np.sqrt(np.mean(residuals ** 2))) shear_degrees = math.degrees(math.atan(slope)) confidence = min(1.0, len(ys) / 15.0) * max(0.5, 1.0 - rmse / 5.0) result["shear_degrees"] = round(shear_degrees, 3) result["confidence"] = round(float(confidence), 2) return result def _detect_shear_by_projection(img: np.ndarray) -> Dict[str, Any]: """Detect shear angle by maximising variance of horizontal text-line projections (Method B).""" result = {"method": "projection", "shear_degrees": 0.0, "confidence": 0.0} h, w = img.shape[:2] gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) small = cv2.resize(binary, (w // 2, h // 2), interpolation=cv2.INTER_AREA) sh, sw = small.shape def _sweep_variance(angles_list): results = [] for angle_deg in angles_list: if abs(angle_deg) < 0.001: rotated = small else: shear_tan = math.tan(math.radians(angle_deg)) M = np.float32([[1, shear_tan, -sh / 2.0 * shear_tan], [0, 1, 0]]) rotated = cv2.warpAffine(small, M, (sw, sh), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT) profile = np.sum(rotated, axis=1).astype(float) results.append((angle_deg, float(np.var(profile)))) return results coarse_angles = [a * 0.5 for a in range(-6, 7)] coarse_results = _sweep_variance(coarse_angles) coarse_best = max(coarse_results, key=lambda x: x[1]) fine_center = coarse_best[0] fine_angles = [fine_center + a * 0.05 for a in range(-10, 11)] fine_results = _sweep_variance(fine_angles) fine_best = max(fine_results, key=lambda x: x[1]) best_angle = fine_best[0] best_variance = fine_best[1] variances = coarse_results + fine_results all_mean = sum(v for _, v in variances) / len(variances) if all_mean > 0 and best_variance > all_mean: confidence = min(1.0, (best_variance - all_mean) / (all_mean + 1.0) * 0.6) else: confidence = 0.0 result["shear_degrees"] = round(best_angle, 3) result["confidence"] = round(max(0.0, min(1.0, confidence)), 2) return result def _detect_shear_by_hough(img: np.ndarray) -> Dict[str, Any]: """Detect shear using Hough transform on printed table / ruled lines (Method C).""" result = {"method": "hough_lines", "shear_degrees": 0.0, "confidence": 0.0} h, w = img.shape[:2] gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) edges = cv2.Canny(gray, 50, 150, apertureSize=3) min_len = int(w * 0.15) lines = cv2.HoughLinesP( edges, rho=1, theta=np.pi / 360, threshold=int(w * 0.08), minLineLength=min_len, maxLineGap=20, ) if lines is None or len(lines) < 3: return result horizontal_angles: List[Tuple[float, float]] = [] for line in lines: x1, y1, x2, y2 = line[0] if x1 == x2: continue angle = float(np.degrees(np.arctan2(y2 - y1, x2 - x1))) if abs(angle) <= 5.0: length = float(np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)) horizontal_angles.append((angle, length)) if len(horizontal_angles) < 3: return result angles_arr = np.array([a for a, _ in horizontal_angles]) weights_arr = np.array([l for _, l in horizontal_angles]) sorted_idx = np.argsort(angles_arr) s_angles = angles_arr[sorted_idx] s_weights = weights_arr[sorted_idx] cum = np.cumsum(s_weights) mid_idx = int(np.searchsorted(cum, cum[-1] / 2.0)) median_angle = float(s_angles[min(mid_idx, len(s_angles) - 1)]) agree = sum(1 for a, _ in horizontal_angles if abs(a - median_angle) < 1.0) confidence = min(1.0, agree / max(len(horizontal_angles), 1)) * 0.85 shear_degrees = -median_angle result["shear_degrees"] = round(shear_degrees, 3) result["confidence"] = round(max(0.0, min(1.0, confidence)), 2) return result def _detect_shear_by_text_lines(img: np.ndarray) -> Dict[str, Any]: """Detect shear by measuring text-line straightness (Method D).""" result = {"method": "text_lines", "shear_degrees": 0.0, "confidence": 0.0} h, w = img.shape[:2] scale = 0.5 small = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA) gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) pil_img = Image.fromarray(gray) try: data = pytesseract.image_to_data( pil_img, lang='eng+deu', config='--psm 11 --oem 3', output_type=pytesseract.Output.DICT, ) except Exception: return result words = [] for i in range(len(data['text'])): text = data['text'][i].strip() conf = int(data['conf'][i]) if not text or conf < 20 or len(text) < 2: continue left_x = float(data['left'][i]) cy = data['top'][i] + data['height'][i] / 2.0 word_w = float(data['width'][i]) words.append((left_x, cy, word_w)) if len(words) < 15: return result avg_w = sum(ww for _, _, ww in words) / len(words) x_tol = max(avg_w * 0.4, 8) words_by_x = sorted(words, key=lambda w: w[0]) columns: List[List[Tuple[float, float]]] = [] cur_col: List[Tuple[float, float]] = [(words_by_x[0][0], words_by_x[0][1])] cur_x = words_by_x[0][0] for lx, cy, _ in words_by_x[1:]: if abs(lx - cur_x) <= x_tol: cur_col.append((lx, cy)) cur_x = cur_x * 0.8 + lx * 0.2 else: if len(cur_col) >= 5: columns.append(cur_col) cur_col = [(lx, cy)] cur_x = lx if len(cur_col) >= 5: columns.append(cur_col) if len(columns) < 2: return result drifts = [] for col in columns: ys = np.array([p[1] for p in col]) xs = np.array([p[0] for p in col]) y_range = ys.max() - ys.min() if y_range < h * scale * 0.3: continue coeffs = np.polyfit(ys, xs, 1) drifts.append(coeffs[0]) if len(drifts) < 2: return result median_drift = float(np.median(drifts)) shear_degrees = math.degrees(math.atan(median_drift)) drift_std = float(np.std(drifts)) consistency = max(0.0, 1.0 - drift_std * 50) count_factor = min(1.0, len(drifts) / 4.0) confidence = count_factor * 0.5 + consistency * 0.5 result["shear_degrees"] = round(shear_degrees, 3) result["confidence"] = round(max(0.0, min(1.0, confidence)), 2) logger.info("text_lines(v2): %d columns, %d drifts, median=%.4f, " "shear=%.3f\u00b0, conf=%.2f", len(columns), len(drifts), median_drift, shear_degrees, confidence) return result # ============================================================================= # Quality Check and Shear Application # ============================================================================= def _dewarp_quality_check(original: np.ndarray, corrected: np.ndarray) -> bool: """Check whether the dewarp correction actually improved alignment.""" def _h_proj_variance(img: np.ndarray) -> float: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) small = cv2.resize(binary, (binary.shape[1] // 2, binary.shape[0] // 2), interpolation=cv2.INTER_AREA) profile = np.sum(small, axis=1).astype(float) return float(np.var(profile)) var_before = _h_proj_variance(original) var_after = _h_proj_variance(corrected) return var_after > var_before def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray: """Apply a vertical shear correction to an image.""" h, w = img.shape[:2] shear_tan = math.tan(math.radians(shear_degrees)) M = np.float32([ [1, shear_tan, -h / 2.0 * shear_tan], [0, 1, 0], ]) corrected = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE) return corrected # ============================================================================= # Ensemble Shear Combination # ============================================================================= def _ensemble_shear(detections: List[Dict[str, Any]]) -> Tuple[float, float, str]: """Combine multiple shear detections into a single weighted estimate (v2).""" _MIN_CONF = 0.35 _METHOD_WEIGHT_BOOST = {"text_lines": 1.5} accepted = [] for d in detections: if d["confidence"] < _MIN_CONF: continue boost = _METHOD_WEIGHT_BOOST.get(d["method"], 1.0) effective_conf = d["confidence"] * boost accepted.append((d["shear_degrees"], effective_conf, d["method"])) if not accepted: return 0.0, 0.0, "none" if len(accepted) == 1: deg, conf, method = accepted[0] return deg, min(conf, 1.0), method total_w = sum(c for _, c, _ in accepted) w_mean = sum(d * c for d, c, _ in accepted) / total_w filtered = [(d, c, m) for d, c, m in accepted if abs(d - w_mean) <= 1.0] if not filtered: filtered = accepted total_w2 = sum(c for _, c, _ in filtered) final_deg = sum(d * c for d, c, _ in filtered) / total_w2 avg_conf = total_w2 / len(filtered) spread = max(d for d, _, _ in filtered) - min(d for d, _, _ in filtered) agreement_bonus = 0.15 if spread < 0.5 else 0.0 ensemble_conf = min(1.0, avg_conf + agreement_bonus) methods_str = "+".join(m for _, _, m in filtered) return round(final_deg, 3), round(min(ensemble_conf, 1.0), 2), methods_str # ============================================================================= # Main Dewarp Function # ============================================================================= def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray, Dict[str, Any]]: """Correct vertical shear after deskew (v2 with quality gate). Methods (all run in ~150ms total): A. _detect_shear_angle() -- vertical edge profile (~50ms) B. _detect_shear_by_projection() -- horizontal text-line variance (~30ms) C. _detect_shear_by_hough() -- Hough lines on table borders (~20ms) D. _detect_shear_by_text_lines() -- text-line straightness (~50ms) Args: img: BGR image (already deskewed). use_ensemble: If False, fall back to single-method behaviour (method A only). Returns: Tuple of (corrected_image, dewarp_info). """ no_correction = { "method": "none", "shear_degrees": 0.0, "confidence": 0.0, "detections": [], } if not CV2_AVAILABLE: return img, no_correction t0 = time.time() if use_ensemble: det_a = _detect_shear_angle(img) det_b = _detect_shear_by_projection(img) det_c = _detect_shear_by_hough(img) det_d = _detect_shear_by_text_lines(img) detections = [det_a, det_b, det_c, det_d] shear_deg, confidence, method = _ensemble_shear(detections) else: det_a = _detect_shear_angle(img) detections = [det_a] shear_deg = det_a["shear_degrees"] confidence = det_a["confidence"] method = det_a["method"] duration = time.time() - t0 logger.info( "dewarp: ensemble shear=%.3f\u00b0 conf=%.2f method=%s (%.2fs) | " "A=%.3f/%.2f B=%.3f/%.2f C=%.3f/%.2f D=%.3f/%.2f", shear_deg, confidence, method, duration, detections[0]["shear_degrees"], detections[0]["confidence"], detections[1]["shear_degrees"] if len(detections) > 1 else 0.0, detections[1]["confidence"] if len(detections) > 1 else 0.0, detections[2]["shear_degrees"] if len(detections) > 2 else 0.0, detections[2]["confidence"] if len(detections) > 2 else 0.0, detections[3]["shear_degrees"] if len(detections) > 3 else 0.0, detections[3]["confidence"] if len(detections) > 3 else 0.0, ) _all_detections = [ {"method": d["method"], "shear_degrees": d["shear_degrees"], "confidence": d["confidence"]} for d in detections ] if abs(shear_deg) < 0.08 or confidence < 0.4: no_correction["detections"] = _all_detections return img, no_correction corrected = _apply_shear(img, -shear_deg) if abs(shear_deg) >= 0.5 and not _dewarp_quality_check(img, corrected): logger.info("dewarp: quality gate REJECTED correction (%.3f\u00b0) -- " "projection variance did not improve", shear_deg) no_correction["detections"] = _all_detections return img, no_correction info = { "method": method, "shear_degrees": shear_deg, "confidence": confidence, "detections": _all_detections, } return corrected, info def dewarp_image_manual(img: np.ndarray, shear_degrees: float) -> np.ndarray: """Apply shear correction with a manual angle.""" if abs(shear_degrees) < 0.001: return img return _apply_shear(img, -shear_degrees)