breakpilot-lehrer/klausur-service/backend/cv_preprocessing.py

"""
Image I/O, orientation detection, deskew, and dewarp for the CV vocabulary pipeline.

Re-export facade -- all logic lives in the sub-modules:

  cv_preprocessing_deskew   Rotation correction (Hough, word-alignment, iterative, two-pass)
  cv_preprocessing_dewarp   Vertical shear detection and correction (4 methods + ensemble)

This file contains the image I/O and orientation detection functions.

Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""

import logging
from typing import Tuple

import numpy as np

from cv_vocab_types import (
    CV2_AVAILABLE,
    TESSERACT_AVAILABLE,
)

logger = logging.getLogger(__name__)

# Guarded imports
try:
    import cv2
except ImportError:
    cv2 = None  # type: ignore[assignment]

try:
    import pytesseract
    from PIL import Image
except ImportError:
    pytesseract = None  # type: ignore[assignment]
    Image = None  # type: ignore[assignment,misc]

# Re-export all deskew functions
from cv_preprocessing_deskew import (  # noqa: F401
    deskew_image,
    deskew_image_by_word_alignment,
    deskew_image_iterative,
    deskew_two_pass,
    _projection_gradient_score,
    _measure_textline_slope,
)

# Re-export all dewarp functions
from cv_preprocessing_dewarp import (  # noqa: F401
    _apply_shear,
    _detect_shear_angle,
    _detect_shear_by_hough,
    _detect_shear_by_projection,
    _detect_shear_by_text_lines,
    _dewarp_quality_check,
    _ensemble_shear,
    dewarp_image,
    dewarp_image_manual,
)


# =============================================================================
# Image I/O
# =============================================================================

def render_pdf_high_res(pdf_data: bytes, page_number: int = 0, zoom: float = 3.0) -> np.ndarray:
    """Render a PDF page to a high-resolution numpy array (BGR).

    Args:
        pdf_data: Raw PDF bytes.
        page_number: 0-indexed page number.
        zoom: Zoom factor (3.0 = 432 DPI).

    Returns:
        numpy array in BGR format.
    """
    import fitz  # PyMuPDF

    pdf_doc = fitz.open(stream=pdf_data, filetype="pdf")
    if page_number >= pdf_doc.page_count:
        raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_doc.page_count} pages)")

    page = pdf_doc[page_number]
    mat = fitz.Matrix(zoom, zoom)
    pix = page.get_pixmap(matrix=mat)

    img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
    if pix.n == 4:  # RGBA
        img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGBA2BGR)
    elif pix.n == 3:  # RGB
        img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR)
    else:  # Grayscale
        img_bgr = cv2.cvtColor(img_data, cv2.COLOR_GRAY2BGR)

    pdf_doc.close()
    return img_bgr


def render_image_high_res(image_data: bytes) -> np.ndarray:
    """Load an image (PNG/JPEG) into a numpy array (BGR).

    Args:
        image_data: Raw image bytes.

    Returns:
        numpy array in BGR format.
    """
    img_array = np.frombuffer(image_data, dtype=np.uint8)
    img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
    if img_bgr is None:
        raise ValueError("Could not decode image data")
    return img_bgr


# =============================================================================
# Orientation Detection (0/90/180/270)
# =============================================================================

def detect_and_fix_orientation(img_bgr: np.ndarray) -> Tuple[np.ndarray, int]:
    """Detect page orientation via Tesseract OSD and rotate if needed.

    Returns:
        (corrected_image, rotation_degrees) -- rotation is 0, 90, 180, or 270.
    """
    if pytesseract is None:
        return img_bgr, 0

    try:
        gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
        pil_img = Image.fromarray(gray)

        osd = pytesseract.image_to_osd(pil_img, output_type=pytesseract.Output.DICT)
        rotate = osd.get("rotate", 0)
        confidence = osd.get("orientation_conf", 0.0)

        logger.info(f"OSD: orientation={rotate}\u00b0 confidence={confidence:.1f}")

        if rotate == 0 or confidence < 1.0:
            return img_bgr, 0

        if rotate == 180:
            corrected = cv2.rotate(img_bgr, cv2.ROTATE_180)
        elif rotate == 90:
            corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_CLOCKWISE)
        elif rotate == 270:
            corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE)
        else:
            return img_bgr, 0

        logger.info(f"OSD: rotated {rotate}\u00b0 to fix orientation")
        return corrected, rotate

    except Exception as e:
        logger.warning(f"OSD orientation detection failed: {e}")
        return img_bgr, 0