klausur-service (7 monoliths): - grid_editor_helpers.py (1,737 → 5 files: columns, filters, headers, zones) - cv_cell_grid.py (1,675 → 7 files: build, legacy, streaming, merge, vocab) - worksheet_editor_api.py (1,305 → 4 files: models, AI, reconstruct, routes) - legal_corpus_ingestion.py (1,280 → 3 files: registry, chunking, ingestion) - cv_review.py (1,248 → 4 files: pipeline, spell, LLM, barrel) - cv_preprocessing.py (1,166 → 3 files: deskew, dewarp, barrel) - rbac.py, admin_api.py, routes/eh.py remain (next batch) backend-lehrer (1 monolith): - classroom_engine/repository.py (1,705 → 7 files by domain) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
158 lines
4.6 KiB
Python
158 lines
4.6 KiB
Python
"""
|
|
Image I/O, orientation detection, deskew, and dewarp for the CV vocabulary pipeline.
|
|
|
|
Re-export facade -- all logic lives in the sub-modules:
|
|
|
|
cv_preprocessing_deskew Rotation correction (Hough, word-alignment, iterative, two-pass)
|
|
cv_preprocessing_dewarp Vertical shear detection and correction (4 methods + ensemble)
|
|
|
|
This file contains the image I/O and orientation detection functions.
|
|
|
|
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Tuple
|
|
|
|
import numpy as np
|
|
|
|
from cv_vocab_types import (
|
|
CV2_AVAILABLE,
|
|
TESSERACT_AVAILABLE,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Guarded imports
|
|
try:
|
|
import cv2
|
|
except ImportError:
|
|
cv2 = None # type: ignore[assignment]
|
|
|
|
try:
|
|
import pytesseract
|
|
from PIL import Image
|
|
except ImportError:
|
|
pytesseract = None # type: ignore[assignment]
|
|
Image = None # type: ignore[assignment,misc]
|
|
|
|
# Re-export all deskew functions
|
|
from cv_preprocessing_deskew import ( # noqa: F401
|
|
deskew_image,
|
|
deskew_image_by_word_alignment,
|
|
deskew_image_iterative,
|
|
deskew_two_pass,
|
|
_projection_gradient_score,
|
|
_measure_textline_slope,
|
|
)
|
|
|
|
# Re-export all dewarp functions
|
|
from cv_preprocessing_dewarp import ( # noqa: F401
|
|
_apply_shear,
|
|
_detect_shear_angle,
|
|
_detect_shear_by_hough,
|
|
_detect_shear_by_projection,
|
|
_detect_shear_by_text_lines,
|
|
_dewarp_quality_check,
|
|
_ensemble_shear,
|
|
dewarp_image,
|
|
dewarp_image_manual,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Image I/O
|
|
# =============================================================================
|
|
|
|
def render_pdf_high_res(pdf_data: bytes, page_number: int = 0, zoom: float = 3.0) -> np.ndarray:
|
|
"""Render a PDF page to a high-resolution numpy array (BGR).
|
|
|
|
Args:
|
|
pdf_data: Raw PDF bytes.
|
|
page_number: 0-indexed page number.
|
|
zoom: Zoom factor (3.0 = 432 DPI).
|
|
|
|
Returns:
|
|
numpy array in BGR format.
|
|
"""
|
|
import fitz # PyMuPDF
|
|
|
|
pdf_doc = fitz.open(stream=pdf_data, filetype="pdf")
|
|
if page_number >= pdf_doc.page_count:
|
|
raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_doc.page_count} pages)")
|
|
|
|
page = pdf_doc[page_number]
|
|
mat = fitz.Matrix(zoom, zoom)
|
|
pix = page.get_pixmap(matrix=mat)
|
|
|
|
img_data = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
|
|
if pix.n == 4: # RGBA
|
|
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGBA2BGR)
|
|
elif pix.n == 3: # RGB
|
|
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR)
|
|
else: # Grayscale
|
|
img_bgr = cv2.cvtColor(img_data, cv2.COLOR_GRAY2BGR)
|
|
|
|
pdf_doc.close()
|
|
return img_bgr
|
|
|
|
|
|
def render_image_high_res(image_data: bytes) -> np.ndarray:
|
|
"""Load an image (PNG/JPEG) into a numpy array (BGR).
|
|
|
|
Args:
|
|
image_data: Raw image bytes.
|
|
|
|
Returns:
|
|
numpy array in BGR format.
|
|
"""
|
|
img_array = np.frombuffer(image_data, dtype=np.uint8)
|
|
img_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
|
|
if img_bgr is None:
|
|
raise ValueError("Could not decode image data")
|
|
return img_bgr
|
|
|
|
|
|
# =============================================================================
|
|
# Orientation Detection (0/90/180/270)
|
|
# =============================================================================
|
|
|
|
def detect_and_fix_orientation(img_bgr: np.ndarray) -> Tuple[np.ndarray, int]:
|
|
"""Detect page orientation via Tesseract OSD and rotate if needed.
|
|
|
|
Returns:
|
|
(corrected_image, rotation_degrees) -- rotation is 0, 90, 180, or 270.
|
|
"""
|
|
if pytesseract is None:
|
|
return img_bgr, 0
|
|
|
|
try:
|
|
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
|
pil_img = Image.fromarray(gray)
|
|
|
|
osd = pytesseract.image_to_osd(pil_img, output_type=pytesseract.Output.DICT)
|
|
rotate = osd.get("rotate", 0)
|
|
confidence = osd.get("orientation_conf", 0.0)
|
|
|
|
logger.info(f"OSD: orientation={rotate}\u00b0 confidence={confidence:.1f}")
|
|
|
|
if rotate == 0 or confidence < 1.0:
|
|
return img_bgr, 0
|
|
|
|
if rotate == 180:
|
|
corrected = cv2.rotate(img_bgr, cv2.ROTATE_180)
|
|
elif rotate == 90:
|
|
corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_CLOCKWISE)
|
|
elif rotate == 270:
|
|
corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE)
|
|
else:
|
|
return img_bgr, 0
|
|
|
|
logger.info(f"OSD: rotated {rotate}\u00b0 to fix orientation")
|
|
return corrected, rotate
|
|
|
|
except Exception as e:
|
|
logger.warning(f"OSD orientation detection failed: {e}")
|
|
return img_bgr, 0
|