Files
breakpilot-lehrer/klausur-service/backend/ocr/preprocessing/preprocessing.py
Benjamin Admin 098a2ff092 Fix: Resolve all lint errors from ocr/ restructure
- Added ocr_region import to cell_grid/build.py and legacy.py
- Fixed circular import in engines.py via lazy import
- Auto-fixed 22 unused imports via ruff --fix

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 21:15:53 +02:00

154 lines
4.5 KiB
Python

"""
Image I/O, orientation detection, deskew, and dewarp for the CV vocabulary pipeline.
Re-export facade -- all logic lives in the sub-modules:
cv_preprocessing_deskew Rotation correction (Hough, word-alignment, iterative, two-pass)
cv_preprocessing_dewarp Vertical shear detection and correction (4 methods + ensemble)
This file contains the image I/O and orientation detection functions.
Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import logging
from typing import Tuple
import numpy as np
logger = logging.getLogger(__name__)
# Guarded imports
try:
import cv2
except ImportError:
cv2 = None # type: ignore[assignment]
try:
import pytesseract
from PIL import Image
except ImportError:
pytesseract = None # type: ignore[assignment]
Image = None # type: ignore[assignment,misc]
# Re-export all deskew functions
from .deskew import ( # noqa: F401
deskew_image,
deskew_image_by_word_alignment,
deskew_image_iterative,
deskew_two_pass,
_projection_gradient_score,
_measure_textline_slope,
)
# Re-export all dewarp functions
from .dewarp import ( # noqa: F401
_apply_shear,
_detect_shear_angle,
_detect_shear_by_hough,
_detect_shear_by_projection,
_detect_shear_by_text_lines,
_dewarp_quality_check,
_ensemble_shear,
dewarp_image,
dewarp_image_manual,
)
# =============================================================================
# Image I/O
# =============================================================================
def render_pdf_high_res(pdf_data: bytes, page_number: int = 0, zoom: float = 3.0) -> np.ndarray:
"""Render a PDF page to a high-resolution numpy array (BGR).
Args:
pdf_data: Raw PDF bytes.
page_number: 0-indexed page number.
zoom: Zoom factor (3.0 = 432 DPI).
Returns:
numpy array in BGR format.
"""
import fitz # PyMuPDF
pdf_doc = fitz.open(stream=pdf_data, filetype="pdf")
if page_number >= pdf_doc.page_count:
raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_doc.page_count} pages)")
page = pdf_doc[page_number]
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
if pix.n == 4: # RGBA
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGBA2BGR)
elif pix.n == 3: # RGB
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR)
else: # Grayscale
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_GRAY2BGR)
pdf_doc.close()
return img_bgr
def render_image_high_res(image_data: bytes) -> np.ndarray:
"""Load an image (PNG/JPEG) into a numpy array (BGR).
Args:
image_data: Raw image bytes.
Returns:
numpy array in BGR format.
"""
img_array = np.frombuffer(image_data, dtype=np.uint8)
img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
if img_bgr is None:
raise ValueError("Could not decode image data")
return img_bgr
# =============================================================================
# Orientation Detection (0/90/180/270)
# =============================================================================
def detect_and_fix_orientation(img_bgr: np.ndarray) -> Tuple[np.ndarray, int]:
"""Detect page orientation via Tesseract OSD and rotate if needed.
Returns:
(corrected_image, rotation_degrees) -- rotation is 0, 90, 180, or 270.
"""
if pytesseract is None:
return img_bgr, 0
try:
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
pil_img = Image.fromarray(gray)
osd = pytesseract.image_to_osd(pil_img, output_type=pytesseract.Output.DICT)
rotate = osd.get("rotate", 0)
confidence = osd.get("orientation_conf", 0.0)
logger.info(f"OSD: orientation={rotate}\u00b0 confidence={confidence:.1f}")
if rotate == 0 or confidence < 1.0:
return img_bgr, 0
if rotate == 180:
corrected = cv2.rotate(img_bgr, cv2.ROTATE_180)
elif rotate == 90:
corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_CLOCKWISE)
elif rotate == 270:
corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE)
else:
return img_bgr, 0
logger.info(f"OSD: rotated {rotate}\u00b0 to fix orientation")
return corrected, rotate
except Exception as e:
logger.warning(f"OSD orientation detection failed: {e}")
return img_bgr, 0