klausur-service (7 monoliths): - grid_editor_helpers.py (1,737 → 5 files: columns, filters, headers, zones) - cv_cell_grid.py (1,675 → 7 files: build, legacy, streaming, merge, vocab) - worksheet_editor_api.py (1,305 → 4 files: models, AI, reconstruct, routes) - legal_corpus_ingestion.py (1,280 → 3 files: registry, chunking, ingestion) - cv_review.py (1,248 → 4 files: pipeline, spell, LLM, barrel) - cv_preprocessing.py (1,166 → 3 files: deskew, dewarp, barrel) - rbac.py, admin_api.py, routes/eh.py remain (next batch) backend-lehrer (1 monolith): - classroom_engine/repository.py (1,705 → 7 files by domain) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
438 lines
15 KiB
Python
438 lines
15 KiB
Python
"""
|
|
CV Preprocessing Deskew — Rotation correction via Hough lines, word alignment, and iterative projection.
|
|
|
|
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
"""
|
|
|
|
import logging
|
|
from collections import defaultdict
|
|
from typing import Any, Dict, Tuple
|
|
|
|
import numpy as np
|
|
|
|
from cv_vocab_types import (
|
|
CV2_AVAILABLE,
|
|
TESSERACT_AVAILABLE,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import cv2
|
|
except ImportError:
|
|
cv2 = None # type: ignore[assignment]
|
|
|
|
try:
|
|
import pytesseract
|
|
from PIL import Image
|
|
except ImportError:
|
|
pytesseract = None # type: ignore[assignment]
|
|
Image = None # type: ignore[assignment,misc]
|
|
|
|
|
|
# =============================================================================
|
|
# Deskew via Hough Lines
|
|
# =============================================================================
|
|
|
|
def deskew_image(img: np.ndarray) -> Tuple[np.ndarray, float]:
|
|
"""Correct rotation using Hough Line detection.
|
|
|
|
Args:
|
|
img: BGR image.
|
|
|
|
Returns:
|
|
Tuple of (corrected image, detected angle in degrees).
|
|
"""
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
|
|
|
lines = cv2.HoughLinesP(binary, 1, np.pi / 180, threshold=100,
|
|
minLineLength=img.shape[1] // 4, maxLineGap=20)
|
|
|
|
if lines is None or len(lines) < 3:
|
|
return img, 0.0
|
|
|
|
angles = []
|
|
for line in lines:
|
|
x1, y1, x2, y2 = line[0]
|
|
angle = np.degrees(np.arctan2(y2 - y1, x2 - x1))
|
|
if abs(angle) < 15:
|
|
angles.append(angle)
|
|
|
|
if not angles:
|
|
return img, 0.0
|
|
|
|
median_angle = float(np.median(angles))
|
|
|
|
if abs(median_angle) > 5.0:
|
|
median_angle = 5.0 * np.sign(median_angle)
|
|
|
|
if abs(median_angle) < 0.1:
|
|
return img, 0.0
|
|
|
|
h, w = img.shape[:2]
|
|
center = (w // 2, h // 2)
|
|
M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
|
|
corrected = cv2.warpAffine(img, M, (w, h),
|
|
flags=cv2.INTER_LINEAR,
|
|
borderMode=cv2.BORDER_REPLICATE)
|
|
|
|
logger.info(f"Deskew: corrected {median_angle:.2f}\u00b0 rotation")
|
|
return corrected, median_angle
|
|
|
|
|
|
# =============================================================================
|
|
# Deskew via Word Alignment
|
|
# =============================================================================
|
|
|
|
def deskew_image_by_word_alignment(
|
|
image_data: bytes,
|
|
lang: str = "eng+deu",
|
|
downscale_factor: float = 0.5,
|
|
) -> Tuple[bytes, float]:
|
|
"""Correct rotation by fitting a line through left-most word starts per text line.
|
|
|
|
More robust than Hough-based deskew for vocabulary worksheets where text lines
|
|
have consistent left-alignment.
|
|
|
|
Args:
|
|
image_data: Raw image bytes (PNG/JPEG).
|
|
lang: Tesseract language string for the quick pass.
|
|
downscale_factor: Shrink factor for the quick Tesseract pass (0.5 = 50%).
|
|
|
|
Returns:
|
|
Tuple of (rotated image as PNG bytes, detected angle in degrees).
|
|
"""
|
|
if not CV2_AVAILABLE or not TESSERACT_AVAILABLE:
|
|
return image_data, 0.0
|
|
|
|
img_array = np.frombuffer(image_data, dtype=np.uint8)
|
|
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
|
|
if img is None:
|
|
logger.warning("deskew_by_word_alignment: could not decode image")
|
|
return image_data, 0.0
|
|
|
|
orig_h, orig_w = img.shape[:2]
|
|
|
|
small_w = int(orig_w * downscale_factor)
|
|
small_h = int(orig_h * downscale_factor)
|
|
small = cv2.resize(img, (small_w, small_h), interpolation=cv2.INTER_AREA)
|
|
|
|
pil_small = Image.fromarray(cv2.cvtColor(small, cv2.COLOR_BGR2RGB))
|
|
try:
|
|
data = pytesseract.image_to_data(
|
|
pil_small, lang=lang, config="--psm 6 --oem 3",
|
|
output_type=pytesseract.Output.DICT,
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"deskew_by_word_alignment: Tesseract failed: {e}")
|
|
return image_data, 0.0
|
|
|
|
line_groups: Dict[tuple, list] = defaultdict(list)
|
|
for i in range(len(data["text"])):
|
|
text = (data["text"][i] or "").strip()
|
|
conf = int(data["conf"][i])
|
|
if not text or conf < 20:
|
|
continue
|
|
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
|
|
line_groups[key].append(i)
|
|
|
|
if len(line_groups) < 5:
|
|
logger.info(f"deskew_by_word_alignment: only {len(line_groups)} lines, skipping")
|
|
return image_data, 0.0
|
|
|
|
scale = 1.0 / downscale_factor
|
|
points = []
|
|
for key, indices in line_groups.items():
|
|
best_idx = min(indices, key=lambda i: data["left"][i])
|
|
lx = data["left"][best_idx] * scale
|
|
top = data["top"][best_idx] * scale
|
|
h = data["height"][best_idx] * scale
|
|
cy = top + h / 2.0
|
|
points.append((lx, cy))
|
|
|
|
xs = np.array([p[0] for p in points])
|
|
ys = np.array([p[1] for p in points])
|
|
median_x = float(np.median(xs))
|
|
tolerance = orig_w * 0.03
|
|
|
|
mask = np.abs(xs - median_x) <= tolerance
|
|
filtered_xs = xs[mask]
|
|
filtered_ys = ys[mask]
|
|
|
|
if len(filtered_xs) < 5:
|
|
logger.info(f"deskew_by_word_alignment: only {len(filtered_xs)} aligned points after filter, skipping")
|
|
return image_data, 0.0
|
|
|
|
coeffs = np.polyfit(filtered_ys, filtered_xs, 1)
|
|
slope = coeffs[0]
|
|
angle_rad = np.arctan(slope)
|
|
angle_deg = float(np.degrees(angle_rad))
|
|
|
|
angle_deg = max(-5.0, min(5.0, angle_deg))
|
|
|
|
logger.info(f"deskew_by_word_alignment: detected {angle_deg:.2f}\u00b0 from {len(filtered_xs)} points "
|
|
f"(total lines: {len(line_groups)})")
|
|
|
|
if abs(angle_deg) < 0.05:
|
|
return image_data, 0.0
|
|
|
|
center = (orig_w // 2, orig_h // 2)
|
|
M = cv2.getRotationMatrix2D(center, angle_deg, 1.0)
|
|
rotated = cv2.warpAffine(img, M, (orig_w, orig_h),
|
|
flags=cv2.INTER_LINEAR,
|
|
borderMode=cv2.BORDER_REPLICATE)
|
|
|
|
success, png_buf = cv2.imencode(".png", rotated)
|
|
if not success:
|
|
logger.warning("deskew_by_word_alignment: PNG encoding failed")
|
|
return image_data, 0.0
|
|
|
|
return png_buf.tobytes(), angle_deg
|
|
|
|
|
|
# =============================================================================
|
|
# Projection Gradient Scoring
|
|
# =============================================================================
|
|
|
|
def _projection_gradient_score(profile: np.ndarray) -> float:
|
|
"""Score a projection profile by the L2-norm of its first derivative."""
|
|
diff = np.diff(profile)
|
|
return float(np.sum(diff * diff))
|
|
|
|
|
|
# =============================================================================
|
|
# Iterative Deskew (Vertical-Edge Projection)
|
|
# =============================================================================
|
|
|
|
def deskew_image_iterative(
|
|
img: np.ndarray,
|
|
coarse_range: float = 5.0,
|
|
coarse_step: float = 0.1,
|
|
fine_range: float = 0.15,
|
|
fine_step: float = 0.02,
|
|
) -> Tuple[np.ndarray, float, Dict[str, Any]]:
|
|
"""Iterative deskew using vertical-edge projection optimisation.
|
|
|
|
Args:
|
|
img: BGR image (full resolution).
|
|
coarse_range: half-range in degrees for the coarse sweep.
|
|
coarse_step: step size in degrees for the coarse sweep.
|
|
fine_range: half-range around the coarse winner for the fine sweep.
|
|
fine_step: step size in degrees for the fine sweep.
|
|
|
|
Returns:
|
|
(rotated_bgr, angle_degrees, debug_dict)
|
|
"""
|
|
h, w = img.shape[:2]
|
|
debug: Dict[str, Any] = {}
|
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
y_lo, y_hi = int(h * 0.15), int(h * 0.85)
|
|
x_lo, x_hi = int(w * 0.10), int(w * 0.90)
|
|
gray_crop = gray[y_lo:y_hi, x_lo:x_hi]
|
|
|
|
sobel_x = cv2.Sobel(gray_crop, cv2.CV_64F, 1, 0, ksize=3)
|
|
edges = np.abs(sobel_x)
|
|
edge_max = edges.max()
|
|
if edge_max > 0:
|
|
edges = (edges / edge_max * 255).astype(np.uint8)
|
|
else:
|
|
return img, 0.0, {"error": "no edges detected"}
|
|
|
|
crop_h, crop_w = edges.shape[:2]
|
|
crop_center = (crop_w // 2, crop_h // 2)
|
|
|
|
trim_y = max(4, int(crop_h * 0.03))
|
|
trim_x = max(4, int(crop_w * 0.03))
|
|
|
|
def _sweep_edges(angles: np.ndarray) -> list:
|
|
results = []
|
|
for angle in angles:
|
|
if abs(angle) < 1e-6:
|
|
rotated = edges
|
|
else:
|
|
M = cv2.getRotationMatrix2D(crop_center, angle, 1.0)
|
|
rotated = cv2.warpAffine(edges, M, (crop_w, crop_h),
|
|
flags=cv2.INTER_NEAREST,
|
|
borderMode=cv2.BORDER_REPLICATE)
|
|
trimmed = rotated[trim_y:-trim_y, trim_x:-trim_x]
|
|
v_profile = np.sum(trimmed, axis=0, dtype=np.float64)
|
|
score = _projection_gradient_score(v_profile)
|
|
results.append((float(angle), score))
|
|
return results
|
|
|
|
coarse_angles = np.arange(-coarse_range, coarse_range + coarse_step * 0.5, coarse_step)
|
|
coarse_results = _sweep_edges(coarse_angles)
|
|
best_coarse = max(coarse_results, key=lambda x: x[1])
|
|
best_coarse_angle, best_coarse_score = best_coarse
|
|
|
|
debug["coarse_best_angle"] = round(best_coarse_angle, 2)
|
|
debug["coarse_best_score"] = round(best_coarse_score, 1)
|
|
debug["coarse_scores"] = [(round(a, 2), round(s, 1)) for a, s in coarse_results]
|
|
|
|
fine_lo = best_coarse_angle - fine_range
|
|
fine_hi = best_coarse_angle + fine_range
|
|
fine_angles = np.arange(fine_lo, fine_hi + fine_step * 0.5, fine_step)
|
|
fine_results = _sweep_edges(fine_angles)
|
|
best_fine = max(fine_results, key=lambda x: x[1])
|
|
best_fine_angle, best_fine_score = best_fine
|
|
|
|
debug["fine_best_angle"] = round(best_fine_angle, 2)
|
|
debug["fine_best_score"] = round(best_fine_score, 1)
|
|
debug["fine_scores"] = [(round(a, 2), round(s, 1)) for a, s in fine_results]
|
|
|
|
final_angle = best_fine_angle
|
|
final_angle = max(-5.0, min(5.0, final_angle))
|
|
|
|
logger.info(f"deskew_iterative: coarse={best_coarse_angle:.2f}\u00b0 fine={best_fine_angle:.2f}\u00b0 -> {final_angle:.2f}\u00b0")
|
|
|
|
if abs(final_angle) < 0.05:
|
|
return img, 0.0, debug
|
|
|
|
center = (w // 2, h // 2)
|
|
M = cv2.getRotationMatrix2D(center, final_angle, 1.0)
|
|
rotated = cv2.warpAffine(img, M, (w, h),
|
|
flags=cv2.INTER_LINEAR,
|
|
borderMode=cv2.BORDER_REPLICATE)
|
|
|
|
return rotated, final_angle, debug
|
|
|
|
|
|
# =============================================================================
|
|
# Text-Line Slope Measurement
|
|
# =============================================================================
|
|
|
|
def _measure_textline_slope(img: np.ndarray) -> float:
|
|
"""Measure residual text-line slope via Tesseract word-position regression."""
|
|
import math as _math
|
|
|
|
if not TESSERACT_AVAILABLE or not CV2_AVAILABLE:
|
|
return 0.0
|
|
|
|
h, w = img.shape[:2]
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
data = pytesseract.image_to_data(
|
|
Image.fromarray(gray),
|
|
output_type=pytesseract.Output.DICT,
|
|
config="--psm 6",
|
|
)
|
|
|
|
lines: Dict[tuple, list] = {}
|
|
for i in range(len(data["text"])):
|
|
txt = (data["text"][i] or "").strip()
|
|
if len(txt) < 2 or int(data["conf"][i]) < 30:
|
|
continue
|
|
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
|
|
cx = data["left"][i] + data["width"][i] / 2.0
|
|
cy = data["top"][i] + data["height"][i] / 2.0
|
|
lines.setdefault(key, []).append((cx, cy))
|
|
|
|
slopes: list = []
|
|
for pts in lines.values():
|
|
if len(pts) < 3:
|
|
continue
|
|
pts.sort(key=lambda p: p[0])
|
|
xs = np.array([p[0] for p in pts], dtype=np.float64)
|
|
ys = np.array([p[1] for p in pts], dtype=np.float64)
|
|
if xs[-1] - xs[0] < w * 0.15:
|
|
continue
|
|
A = np.vstack([xs, np.ones_like(xs)]).T
|
|
result = np.linalg.lstsq(A, ys, rcond=None)
|
|
slope = result[0][0]
|
|
slopes.append(_math.degrees(_math.atan(slope)))
|
|
|
|
if len(slopes) < 3:
|
|
return 0.0
|
|
|
|
slopes.sort()
|
|
trim = max(1, len(slopes) // 10)
|
|
trimmed = slopes[trim:-trim] if len(slopes) > 2 * trim else slopes
|
|
if not trimmed:
|
|
return 0.0
|
|
|
|
return sum(trimmed) / len(trimmed)
|
|
|
|
|
|
# =============================================================================
|
|
# Two-Pass Deskew
|
|
# =============================================================================
|
|
|
|
def deskew_two_pass(
|
|
img: np.ndarray,
|
|
coarse_range: float = 5.0,
|
|
) -> Tuple[np.ndarray, float, Dict[str, Any]]:
|
|
"""Two-pass deskew: iterative projection + word-alignment residual check.
|
|
|
|
Returns:
|
|
(corrected_bgr, total_angle_degrees, debug_dict)
|
|
"""
|
|
debug: Dict[str, Any] = {}
|
|
|
|
# --- Pass 1: iterative projection ---
|
|
corrected, angle1, dbg1 = deskew_image_iterative(
|
|
img.copy(), coarse_range=coarse_range,
|
|
)
|
|
debug["pass1_angle"] = round(angle1, 3)
|
|
debug["pass1_method"] = "iterative"
|
|
debug["pass1_debug"] = dbg1
|
|
|
|
# --- Pass 2: word-alignment residual check ---
|
|
angle2 = 0.0
|
|
try:
|
|
ok, buf = cv2.imencode(".png", corrected)
|
|
if ok:
|
|
corrected_bytes, angle2 = deskew_image_by_word_alignment(buf.tobytes())
|
|
if abs(angle2) >= 0.3:
|
|
arr2 = np.frombuffer(corrected_bytes, dtype=np.uint8)
|
|
corrected2 = cv2.imdecode(arr2, cv2.IMREAD_COLOR)
|
|
if corrected2 is not None:
|
|
corrected = corrected2
|
|
logger.info(f"deskew_two_pass: pass2 residual={angle2:.2f}\u00b0 applied "
|
|
f"(total={angle1 + angle2:.2f}\u00b0)")
|
|
else:
|
|
angle2 = 0.0
|
|
else:
|
|
logger.info(f"deskew_two_pass: pass2 residual={angle2:.2f}\u00b0 < 0.3\u00b0 -- skipped")
|
|
angle2 = 0.0
|
|
except Exception as e:
|
|
logger.warning(f"deskew_two_pass: pass2 word-alignment failed: {e}")
|
|
angle2 = 0.0
|
|
|
|
# --- Pass 3: Tesseract text-line regression residual check ---
|
|
angle3 = 0.0
|
|
try:
|
|
residual = _measure_textline_slope(corrected)
|
|
debug["pass3_raw"] = round(residual, 3)
|
|
if abs(residual) >= 0.3:
|
|
h3, w3 = corrected.shape[:2]
|
|
center3 = (w3 // 2, h3 // 2)
|
|
M3 = cv2.getRotationMatrix2D(center3, residual, 1.0)
|
|
corrected = cv2.warpAffine(
|
|
corrected, M3, (w3, h3),
|
|
flags=cv2.INTER_LINEAR,
|
|
borderMode=cv2.BORDER_REPLICATE,
|
|
)
|
|
angle3 = residual
|
|
logger.info("deskew_two_pass: pass3 text-line residual=%.2f\u00b0 applied", residual)
|
|
else:
|
|
logger.info("deskew_two_pass: pass3 text-line residual=%.2f\u00b0 < 0.3\u00b0 -- skipped", residual)
|
|
except Exception as e:
|
|
logger.warning("deskew_two_pass: pass3 text-line check failed: %s", e)
|
|
|
|
total_angle = angle1 + angle2 + angle3
|
|
debug["pass2_angle"] = round(angle2, 3)
|
|
debug["pass2_method"] = "word_alignment"
|
|
debug["pass3_angle"] = round(angle3, 3)
|
|
debug["pass3_method"] = "textline_regression"
|
|
debug["total_angle"] = round(total_angle, 3)
|
|
|
|
logger.info(
|
|
"deskew_two_pass: pass1=%.2f\u00b0 + pass2=%.2f\u00b0 + pass3=%.2f\u00b0 = %.2f\u00b0",
|
|
angle1, angle2, angle3, total_angle,
|
|
)
|
|
|
|
return corrected, total_angle, debug
|