Files
breakpilot-lehrer/klausur-service/backend/cv_preprocessing.py
Benjamin Admin b2a0126f14 [split-required] Split remaining Python monoliths (Phase 1 continued)
klausur-service (7 monoliths):
- grid_editor_helpers.py (1,737 → 5 files: columns, filters, headers, zones)
- cv_cell_grid.py (1,675 → 7 files: build, legacy, streaming, merge, vocab)
- worksheet_editor_api.py (1,305 → 4 files: models, AI, reconstruct, routes)
- legal_corpus_ingestion.py (1,280 → 3 files: registry, chunking, ingestion)
- cv_review.py (1,248 → 4 files: pipeline, spell, LLM, barrel)
- cv_preprocessing.py (1,166 → 3 files: deskew, dewarp, barrel)
- rbac.py, admin_api.py, routes/eh.py remain (next batch)

backend-lehrer (1 monolith):
- classroom_engine/repository.py (1,705 → 7 files by domain)

All re-export barrels preserve backward compatibility.
Zero import errors verified.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 22:47:59 +02:00

158 lines
4.6 KiB
Python

"""
Image I/O, orientation detection, deskew, and dewarp for the CV vocabulary pipeline.
Re-export facade -- all logic lives in the sub-modules:
cv_preprocessing_deskew Rotation correction (Hough, word-alignment, iterative, two-pass)
cv_preprocessing_dewarp Vertical shear detection and correction (4 methods + ensemble)
This file contains the image I/O and orientation detection functions.
Lizenz: Apache 2.0 (kommerziell nutzbar)
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import logging
from typing import Tuple
import numpy as np
from cv_vocab_types import (
CV2_AVAILABLE,
TESSERACT_AVAILABLE,
)
logger = logging.getLogger(__name__)
# Guarded imports
try:
import cv2
except ImportError:
cv2 = None # type: ignore[assignment]
try:
import pytesseract
from PIL import Image
except ImportError:
pytesseract = None # type: ignore[assignment]
Image = None # type: ignore[assignment,misc]
# Re-export all deskew functions
from cv_preprocessing_deskew import ( # noqa: F401
deskew_image,
deskew_image_by_word_alignment,
deskew_image_iterative,
deskew_two_pass,
_projection_gradient_score,
_measure_textline_slope,
)
# Re-export all dewarp functions
from cv_preprocessing_dewarp import ( # noqa: F401
_apply_shear,
_detect_shear_angle,
_detect_shear_by_hough,
_detect_shear_by_projection,
_detect_shear_by_text_lines,
_dewarp_quality_check,
_ensemble_shear,
dewarp_image,
dewarp_image_manual,
)
# =============================================================================
# Image I/O
# =============================================================================
def render_pdf_high_res(pdf_data: bytes, page_number: int = 0, zoom: float = 3.0) -> np.ndarray:
"""Render a PDF page to a high-resolution numpy array (BGR).
Args:
pdf_data: Raw PDF bytes.
page_number: 0-indexed page number.
zoom: Zoom factor (3.0 = 432 DPI).
Returns:
numpy array in BGR format.
"""
import fitz # PyMuPDF
pdf_doc = fitz.open(stream=pdf_data, filetype="pdf")
if page_number >= pdf_doc.page_count:
raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_doc.page_count} pages)")
page = pdf_doc[page_number]
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
if pix.n == 4: # RGBA
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGBA2BGR)
elif pix.n == 3: # RGB
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR)
else: # Grayscale
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_GRAY2BGR)
pdf_doc.close()
return img_bgr
def render_image_high_res(image_data: bytes) -> np.ndarray:
"""Load an image (PNG/JPEG) into a numpy array (BGR).
Args:
image_data: Raw image bytes.
Returns:
numpy array in BGR format.
"""
img_array = np.frombuffer(image_data, dtype=np.uint8)
img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
if img_bgr is None:
raise ValueError("Could not decode image data")
return img_bgr
# =============================================================================
# Orientation Detection (0/90/180/270)
# =============================================================================
def detect_and_fix_orientation(img_bgr: np.ndarray) -> Tuple[np.ndarray, int]:
"""Detect page orientation via Tesseract OSD and rotate if needed.
Returns:
(corrected_image, rotation_degrees) -- rotation is 0, 90, 180, or 270.
"""
if pytesseract is None:
return img_bgr, 0
try:
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
pil_img = Image.fromarray(gray)
osd = pytesseract.image_to_osd(pil_img, output_type=pytesseract.Output.DICT)
rotate = osd.get("rotate", 0)
confidence = osd.get("orientation_conf", 0.0)
logger.info(f"OSD: orientation={rotate}\u00b0 confidence={confidence:.1f}")
if rotate == 0 or confidence < 1.0:
return img_bgr, 0
if rotate == 180:
corrected = cv2.rotate(img_bgr, cv2.ROTATE_180)
elif rotate == 90:
corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_CLOCKWISE)
elif rotate == 270:
corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE)
else:
return img_bgr, 0
logger.info(f"OSD: rotated {rotate}\u00b0 to fix orientation")
return corrected, rotate
except Exception as e:
logger.warning(f"OSD orientation detection failed: {e}")
return img_bgr, 0