""" Image I/O, orientation detection, deskew, and dewarp for the CV vocabulary pipeline. Re-export facade -- all logic lives in the sub-modules: cv_preprocessing_deskew Rotation correction (Hough, word-alignment, iterative, two-pass) cv_preprocessing_dewarp Vertical shear detection and correction (4 methods + ensemble) This file contains the image I/O and orientation detection functions. Lizenz: Apache 2.0 (kommerziell nutzbar) DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ import logging from typing import Tuple import numpy as np from cv_vocab_types import ( CV2_AVAILABLE, TESSERACT_AVAILABLE, ) logger = logging.getLogger(__name__) # Guarded imports try: import cv2 except ImportError: cv2 = None # type: ignore[assignment] try: import pytesseract from PIL import Image except ImportError: pytesseract = None # type: ignore[assignment] Image = None # type: ignore[assignment,misc] # Re-export all deskew functions from cv_preprocessing_deskew import ( # noqa: F401 deskew_image, deskew_image_by_word_alignment, deskew_image_iterative, deskew_two_pass, _projection_gradient_score, _measure_textline_slope, ) # Re-export all dewarp functions from cv_preprocessing_dewarp import ( # noqa: F401 _apply_shear, _detect_shear_angle, _detect_shear_by_hough, _detect_shear_by_projection, _detect_shear_by_text_lines, _dewarp_quality_check, _ensemble_shear, dewarp_image, dewarp_image_manual, ) # ============================================================================= # Image I/O # ============================================================================= def render_pdf_high_res(pdf_data: bytes, page_number: int = 0, zoom: float = 3.0) -> np.ndarray: """Render a PDF page to a high-resolution numpy array (BGR). Args: pdf_data: Raw PDF bytes. page_number: 0-indexed page number. zoom: Zoom factor (3.0 = 432 DPI). Returns: numpy array in BGR format. """ import fitz # PyMuPDF pdf_doc = fitz.open(stream=pdf_data, filetype="pdf") if page_number >= pdf_doc.page_count: raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_doc.page_count} pages)") page = pdf_doc[page_number] mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat) img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n) if pix.n == 4: # RGBA img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGBA2BGR) elif pix.n == 3: # RGB img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR) else: # Grayscale img_bgr = cv2.cvtColor(img_data, cv2.COLOR_GRAY2BGR) pdf_doc.close() return img_bgr def render_image_high_res(image_data: bytes) -> np.ndarray: """Load an image (PNG/JPEG) into a numpy array (BGR). Args: image_data: Raw image bytes. Returns: numpy array in BGR format. """ img_array = np.frombuffer(image_data, dtype=np.uint8) img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR) if img_bgr is None: raise ValueError("Could not decode image data") return img_bgr # ============================================================================= # Orientation Detection (0/90/180/270) # ============================================================================= def detect_and_fix_orientation(img_bgr: np.ndarray) -> Tuple[np.ndarray, int]: """Detect page orientation via Tesseract OSD and rotate if needed. Returns: (corrected_image, rotation_degrees) -- rotation is 0, 90, 180, or 270. """ if pytesseract is None: return img_bgr, 0 try: gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) pil_img = Image.fromarray(gray) osd = pytesseract.image_to_osd(pil_img, output_type=pytesseract.Output.DICT) rotate = osd.get("rotate", 0) confidence = osd.get("orientation_conf", 0.0) logger.info(f"OSD: orientation={rotate}\u00b0 confidence={confidence:.1f}") if rotate == 0 or confidence < 1.0: return img_bgr, 0 if rotate == 180: corrected = cv2.rotate(img_bgr, cv2.ROTATE_180) elif rotate == 90: corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_CLOCKWISE) elif rotate == 270: corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE) else: return img_bgr, 0 logger.info(f"OSD: rotated {rotate}\u00b0 to fix orientation") return corrected, rotate except Exception as e: logger.warning(f"OSD orientation detection failed: {e}") return img_bgr, 0