klausur-service (7 monoliths): - grid_editor_helpers.py (1,737 → 5 files: columns, filters, headers, zones) - cv_cell_grid.py (1,675 → 7 files: build, legacy, streaming, merge, vocab) - worksheet_editor_api.py (1,305 → 4 files: models, AI, reconstruct, routes) - legal_corpus_ingestion.py (1,280 → 3 files: registry, chunking, ingestion) - cv_review.py (1,248 → 4 files: pipeline, spell, LLM, barrel) - cv_preprocessing.py (1,166 → 3 files: deskew, dewarp, barrel) - rbac.py, admin_api.py, routes/eh.py remain (next batch) backend-lehrer (1 monolith): - classroom_engine/repository.py (1,705 → 7 files by domain) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
475 lines
16 KiB
Python
475 lines
16 KiB
Python
"""
|
|
CV Preprocessing Dewarp — Vertical shear detection and correction.
|
|
|
|
Provides four shear detection methods (vertical edge, projection variance,
|
|
Hough lines, text-line drift), ensemble combination, quality gating,
|
|
and the main dewarp_image() function.
|
|
|
|
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
|
"""
|
|
|
|
import logging
|
|
import math
|
|
import time
|
|
from typing import Any, Dict, List, Tuple
|
|
|
|
import numpy as np
|
|
|
|
from cv_vocab_types import (
|
|
CV2_AVAILABLE,
|
|
TESSERACT_AVAILABLE,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import cv2
|
|
except ImportError:
|
|
cv2 = None # type: ignore[assignment]
|
|
|
|
try:
|
|
import pytesseract
|
|
from PIL import Image
|
|
except ImportError:
|
|
pytesseract = None # type: ignore[assignment]
|
|
Image = None # type: ignore[assignment,misc]
|
|
|
|
|
|
# =============================================================================
|
|
# Shear Detection Methods
|
|
# =============================================================================
|
|
|
|
def _detect_shear_angle(img: np.ndarray) -> Dict[str, Any]:
|
|
"""Detect vertical shear angle via strongest vertical edge tracking (Method A)."""
|
|
h, w = img.shape[:2]
|
|
result = {"method": "vertical_edge", "shear_degrees": 0.0, "confidence": 0.0}
|
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
|
|
abs_sobel = np.abs(sobel_x).astype(np.uint8)
|
|
|
|
_, binary = cv2.threshold(abs_sobel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
|
|
num_strips = 20
|
|
strip_h = h // num_strips
|
|
edge_positions = []
|
|
|
|
for i in range(num_strips):
|
|
y_start = i * strip_h
|
|
y_end = min((i + 1) * strip_h, h)
|
|
strip = binary[y_start:y_end, :]
|
|
|
|
projection = np.sum(strip, axis=0).astype(np.float64)
|
|
if projection.max() == 0:
|
|
continue
|
|
|
|
search_w = int(w * 0.4)
|
|
left_proj = projection[:search_w]
|
|
if left_proj.max() == 0:
|
|
continue
|
|
|
|
kernel_size = max(3, w // 100)
|
|
if kernel_size % 2 == 0:
|
|
kernel_size += 1
|
|
smoothed = cv2.GaussianBlur(left_proj.reshape(1, -1), (kernel_size, 1), 0).flatten()
|
|
x_pos = float(np.argmax(smoothed))
|
|
y_center = (y_start + y_end) / 2.0
|
|
edge_positions.append((y_center, x_pos))
|
|
|
|
if len(edge_positions) < 8:
|
|
return result
|
|
|
|
ys = np.array([p[0] for p in edge_positions])
|
|
xs = np.array([p[1] for p in edge_positions])
|
|
|
|
median_x = np.median(xs)
|
|
std_x = max(np.std(xs), 1.0)
|
|
mask = np.abs(xs - median_x) < 2 * std_x
|
|
ys = ys[mask]
|
|
xs = xs[mask]
|
|
|
|
if len(ys) < 6:
|
|
return result
|
|
|
|
straight_coeffs = np.polyfit(ys, xs, 1)
|
|
slope = straight_coeffs[0]
|
|
fitted = np.polyval(straight_coeffs, ys)
|
|
residuals = xs - fitted
|
|
rmse = float(np.sqrt(np.mean(residuals ** 2)))
|
|
|
|
shear_degrees = math.degrees(math.atan(slope))
|
|
|
|
confidence = min(1.0, len(ys) / 15.0) * max(0.5, 1.0 - rmse / 5.0)
|
|
|
|
result["shear_degrees"] = round(shear_degrees, 3)
|
|
result["confidence"] = round(float(confidence), 2)
|
|
|
|
return result
|
|
|
|
|
|
def _detect_shear_by_projection(img: np.ndarray) -> Dict[str, Any]:
|
|
"""Detect shear angle by maximising variance of horizontal text-line projections (Method B)."""
|
|
result = {"method": "projection", "shear_degrees": 0.0, "confidence": 0.0}
|
|
|
|
h, w = img.shape[:2]
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
|
|
|
small = cv2.resize(binary, (w // 2, h // 2), interpolation=cv2.INTER_AREA)
|
|
sh, sw = small.shape
|
|
|
|
def _sweep_variance(angles_list):
|
|
results = []
|
|
for angle_deg in angles_list:
|
|
if abs(angle_deg) < 0.001:
|
|
rotated = small
|
|
else:
|
|
shear_tan = math.tan(math.radians(angle_deg))
|
|
M = np.float32([[1, shear_tan, -sh / 2.0 * shear_tan], [0, 1, 0]])
|
|
rotated = cv2.warpAffine(small, M, (sw, sh),
|
|
flags=cv2.INTER_NEAREST,
|
|
borderMode=cv2.BORDER_CONSTANT)
|
|
profile = np.sum(rotated, axis=1).astype(float)
|
|
results.append((angle_deg, float(np.var(profile))))
|
|
return results
|
|
|
|
coarse_angles = [a * 0.5 for a in range(-6, 7)]
|
|
coarse_results = _sweep_variance(coarse_angles)
|
|
coarse_best = max(coarse_results, key=lambda x: x[1])
|
|
|
|
fine_center = coarse_best[0]
|
|
fine_angles = [fine_center + a * 0.05 for a in range(-10, 11)]
|
|
fine_results = _sweep_variance(fine_angles)
|
|
fine_best = max(fine_results, key=lambda x: x[1])
|
|
|
|
best_angle = fine_best[0]
|
|
best_variance = fine_best[1]
|
|
variances = coarse_results + fine_results
|
|
|
|
all_mean = sum(v for _, v in variances) / len(variances)
|
|
if all_mean > 0 and best_variance > all_mean:
|
|
confidence = min(1.0, (best_variance - all_mean) / (all_mean + 1.0) * 0.6)
|
|
else:
|
|
confidence = 0.0
|
|
|
|
result["shear_degrees"] = round(best_angle, 3)
|
|
result["confidence"] = round(max(0.0, min(1.0, confidence)), 2)
|
|
return result
|
|
|
|
|
|
def _detect_shear_by_hough(img: np.ndarray) -> Dict[str, Any]:
|
|
"""Detect shear using Hough transform on printed table / ruled lines (Method C)."""
|
|
result = {"method": "hough_lines", "shear_degrees": 0.0, "confidence": 0.0}
|
|
|
|
h, w = img.shape[:2]
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
|
|
|
|
min_len = int(w * 0.15)
|
|
lines = cv2.HoughLinesP(
|
|
edges, rho=1, theta=np.pi / 360,
|
|
threshold=int(w * 0.08),
|
|
minLineLength=min_len,
|
|
maxLineGap=20,
|
|
)
|
|
|
|
if lines is None or len(lines) < 3:
|
|
return result
|
|
|
|
horizontal_angles: List[Tuple[float, float]] = []
|
|
for line in lines:
|
|
x1, y1, x2, y2 = line[0]
|
|
if x1 == x2:
|
|
continue
|
|
angle = float(np.degrees(np.arctan2(y2 - y1, x2 - x1)))
|
|
if abs(angle) <= 5.0:
|
|
length = float(np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2))
|
|
horizontal_angles.append((angle, length))
|
|
|
|
if len(horizontal_angles) < 3:
|
|
return result
|
|
|
|
angles_arr = np.array([a for a, _ in horizontal_angles])
|
|
weights_arr = np.array([l for _, l in horizontal_angles])
|
|
sorted_idx = np.argsort(angles_arr)
|
|
s_angles = angles_arr[sorted_idx]
|
|
s_weights = weights_arr[sorted_idx]
|
|
cum = np.cumsum(s_weights)
|
|
mid_idx = int(np.searchsorted(cum, cum[-1] / 2.0))
|
|
median_angle = float(s_angles[min(mid_idx, len(s_angles) - 1)])
|
|
|
|
agree = sum(1 for a, _ in horizontal_angles if abs(a - median_angle) < 1.0)
|
|
confidence = min(1.0, agree / max(len(horizontal_angles), 1)) * 0.85
|
|
|
|
shear_degrees = -median_angle
|
|
|
|
result["shear_degrees"] = round(shear_degrees, 3)
|
|
result["confidence"] = round(max(0.0, min(1.0, confidence)), 2)
|
|
return result
|
|
|
|
|
|
def _detect_shear_by_text_lines(img: np.ndarray) -> Dict[str, Any]:
|
|
"""Detect shear by measuring text-line straightness (Method D)."""
|
|
result = {"method": "text_lines", "shear_degrees": 0.0, "confidence": 0.0}
|
|
|
|
h, w = img.shape[:2]
|
|
scale = 0.5
|
|
small = cv2.resize(img, (int(w * scale), int(h * scale)),
|
|
interpolation=cv2.INTER_AREA)
|
|
gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY)
|
|
pil_img = Image.fromarray(gray)
|
|
|
|
try:
|
|
data = pytesseract.image_to_data(
|
|
pil_img, lang='eng+deu', config='--psm 11 --oem 3',
|
|
output_type=pytesseract.Output.DICT,
|
|
)
|
|
except Exception:
|
|
return result
|
|
|
|
words = []
|
|
for i in range(len(data['text'])):
|
|
text = data['text'][i].strip()
|
|
conf = int(data['conf'][i])
|
|
if not text or conf < 20 or len(text) < 2:
|
|
continue
|
|
left_x = float(data['left'][i])
|
|
cy = data['top'][i] + data['height'][i] / 2.0
|
|
word_w = float(data['width'][i])
|
|
words.append((left_x, cy, word_w))
|
|
|
|
if len(words) < 15:
|
|
return result
|
|
|
|
avg_w = sum(ww for _, _, ww in words) / len(words)
|
|
x_tol = max(avg_w * 0.4, 8)
|
|
|
|
words_by_x = sorted(words, key=lambda w: w[0])
|
|
columns: List[List[Tuple[float, float]]] = []
|
|
cur_col: List[Tuple[float, float]] = [(words_by_x[0][0], words_by_x[0][1])]
|
|
cur_x = words_by_x[0][0]
|
|
|
|
for lx, cy, _ in words_by_x[1:]:
|
|
if abs(lx - cur_x) <= x_tol:
|
|
cur_col.append((lx, cy))
|
|
cur_x = cur_x * 0.8 + lx * 0.2
|
|
else:
|
|
if len(cur_col) >= 5:
|
|
columns.append(cur_col)
|
|
cur_col = [(lx, cy)]
|
|
cur_x = lx
|
|
if len(cur_col) >= 5:
|
|
columns.append(cur_col)
|
|
|
|
if len(columns) < 2:
|
|
return result
|
|
|
|
drifts = []
|
|
for col in columns:
|
|
ys = np.array([p[1] for p in col])
|
|
xs = np.array([p[0] for p in col])
|
|
y_range = ys.max() - ys.min()
|
|
if y_range < h * scale * 0.3:
|
|
continue
|
|
coeffs = np.polyfit(ys, xs, 1)
|
|
drifts.append(coeffs[0])
|
|
|
|
if len(drifts) < 2:
|
|
return result
|
|
|
|
median_drift = float(np.median(drifts))
|
|
shear_degrees = math.degrees(math.atan(median_drift))
|
|
|
|
drift_std = float(np.std(drifts))
|
|
consistency = max(0.0, 1.0 - drift_std * 50)
|
|
count_factor = min(1.0, len(drifts) / 4.0)
|
|
confidence = count_factor * 0.5 + consistency * 0.5
|
|
|
|
result["shear_degrees"] = round(shear_degrees, 3)
|
|
result["confidence"] = round(max(0.0, min(1.0, confidence)), 2)
|
|
logger.info("text_lines(v2): %d columns, %d drifts, median=%.4f, "
|
|
"shear=%.3f\u00b0, conf=%.2f",
|
|
len(columns), len(drifts), median_drift,
|
|
shear_degrees, confidence)
|
|
return result
|
|
|
|
|
|
# =============================================================================
|
|
# Quality Check and Shear Application
|
|
# =============================================================================
|
|
|
|
def _dewarp_quality_check(original: np.ndarray, corrected: np.ndarray) -> bool:
|
|
"""Check whether the dewarp correction actually improved alignment."""
|
|
def _h_proj_variance(img: np.ndarray) -> float:
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
_, binary = cv2.threshold(gray, 0, 255,
|
|
cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
|
small = cv2.resize(binary, (binary.shape[1] // 2, binary.shape[0] // 2),
|
|
interpolation=cv2.INTER_AREA)
|
|
profile = np.sum(small, axis=1).astype(float)
|
|
return float(np.var(profile))
|
|
|
|
var_before = _h_proj_variance(original)
|
|
var_after = _h_proj_variance(corrected)
|
|
|
|
return var_after > var_before
|
|
|
|
|
|
def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
|
|
"""Apply a vertical shear correction to an image."""
|
|
h, w = img.shape[:2]
|
|
shear_tan = math.tan(math.radians(shear_degrees))
|
|
|
|
M = np.float32([
|
|
[1, shear_tan, -h / 2.0 * shear_tan],
|
|
[0, 1, 0],
|
|
])
|
|
|
|
corrected = cv2.warpAffine(img, M, (w, h),
|
|
flags=cv2.INTER_LINEAR,
|
|
borderMode=cv2.BORDER_REPLICATE)
|
|
return corrected
|
|
|
|
|
|
# =============================================================================
|
|
# Ensemble Shear Combination
|
|
# =============================================================================
|
|
|
|
def _ensemble_shear(detections: List[Dict[str, Any]]) -> Tuple[float, float, str]:
|
|
"""Combine multiple shear detections into a single weighted estimate (v2)."""
|
|
_MIN_CONF = 0.35
|
|
_METHOD_WEIGHT_BOOST = {"text_lines": 1.5}
|
|
|
|
accepted = []
|
|
for d in detections:
|
|
if d["confidence"] < _MIN_CONF:
|
|
continue
|
|
boost = _METHOD_WEIGHT_BOOST.get(d["method"], 1.0)
|
|
effective_conf = d["confidence"] * boost
|
|
accepted.append((d["shear_degrees"], effective_conf, d["method"]))
|
|
|
|
if not accepted:
|
|
return 0.0, 0.0, "none"
|
|
|
|
if len(accepted) == 1:
|
|
deg, conf, method = accepted[0]
|
|
return deg, min(conf, 1.0), method
|
|
|
|
total_w = sum(c for _, c, _ in accepted)
|
|
w_mean = sum(d * c for d, c, _ in accepted) / total_w
|
|
|
|
filtered = [(d, c, m) for d, c, m in accepted if abs(d - w_mean) <= 1.0]
|
|
if not filtered:
|
|
filtered = accepted
|
|
|
|
total_w2 = sum(c for _, c, _ in filtered)
|
|
final_deg = sum(d * c for d, c, _ in filtered) / total_w2
|
|
|
|
avg_conf = total_w2 / len(filtered)
|
|
spread = max(d for d, _, _ in filtered) - min(d for d, _, _ in filtered)
|
|
agreement_bonus = 0.15 if spread < 0.5 else 0.0
|
|
ensemble_conf = min(1.0, avg_conf + agreement_bonus)
|
|
|
|
methods_str = "+".join(m for _, _, m in filtered)
|
|
return round(final_deg, 3), round(min(ensemble_conf, 1.0), 2), methods_str
|
|
|
|
|
|
# =============================================================================
|
|
# Main Dewarp Function
|
|
# =============================================================================
|
|
|
|
def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray, Dict[str, Any]]:
|
|
"""Correct vertical shear after deskew (v2 with quality gate).
|
|
|
|
Methods (all run in ~150ms total):
|
|
A. _detect_shear_angle() -- vertical edge profile (~50ms)
|
|
B. _detect_shear_by_projection() -- horizontal text-line variance (~30ms)
|
|
C. _detect_shear_by_hough() -- Hough lines on table borders (~20ms)
|
|
D. _detect_shear_by_text_lines() -- text-line straightness (~50ms)
|
|
|
|
Args:
|
|
img: BGR image (already deskewed).
|
|
use_ensemble: If False, fall back to single-method behaviour (method A only).
|
|
|
|
Returns:
|
|
Tuple of (corrected_image, dewarp_info).
|
|
"""
|
|
no_correction = {
|
|
"method": "none",
|
|
"shear_degrees": 0.0,
|
|
"confidence": 0.0,
|
|
"detections": [],
|
|
}
|
|
|
|
if not CV2_AVAILABLE:
|
|
return img, no_correction
|
|
|
|
t0 = time.time()
|
|
|
|
if use_ensemble:
|
|
det_a = _detect_shear_angle(img)
|
|
det_b = _detect_shear_by_projection(img)
|
|
det_c = _detect_shear_by_hough(img)
|
|
det_d = _detect_shear_by_text_lines(img)
|
|
detections = [det_a, det_b, det_c, det_d]
|
|
shear_deg, confidence, method = _ensemble_shear(detections)
|
|
else:
|
|
det_a = _detect_shear_angle(img)
|
|
detections = [det_a]
|
|
shear_deg = det_a["shear_degrees"]
|
|
confidence = det_a["confidence"]
|
|
method = det_a["method"]
|
|
|
|
duration = time.time() - t0
|
|
|
|
logger.info(
|
|
"dewarp: ensemble shear=%.3f\u00b0 conf=%.2f method=%s (%.2fs) | "
|
|
"A=%.3f/%.2f B=%.3f/%.2f C=%.3f/%.2f D=%.3f/%.2f",
|
|
shear_deg, confidence, method, duration,
|
|
detections[0]["shear_degrees"], detections[0]["confidence"],
|
|
detections[1]["shear_degrees"] if len(detections) > 1 else 0.0,
|
|
detections[1]["confidence"] if len(detections) > 1 else 0.0,
|
|
detections[2]["shear_degrees"] if len(detections) > 2 else 0.0,
|
|
detections[2]["confidence"] if len(detections) > 2 else 0.0,
|
|
detections[3]["shear_degrees"] if len(detections) > 3 else 0.0,
|
|
detections[3]["confidence"] if len(detections) > 3 else 0.0,
|
|
)
|
|
|
|
_all_detections = [
|
|
{"method": d["method"], "shear_degrees": d["shear_degrees"],
|
|
"confidence": d["confidence"]}
|
|
for d in detections
|
|
]
|
|
|
|
if abs(shear_deg) < 0.08 or confidence < 0.4:
|
|
no_correction["detections"] = _all_detections
|
|
return img, no_correction
|
|
|
|
corrected = _apply_shear(img, -shear_deg)
|
|
|
|
if abs(shear_deg) >= 0.5 and not _dewarp_quality_check(img, corrected):
|
|
logger.info("dewarp: quality gate REJECTED correction (%.3f\u00b0) -- "
|
|
"projection variance did not improve", shear_deg)
|
|
no_correction["detections"] = _all_detections
|
|
return img, no_correction
|
|
|
|
info = {
|
|
"method": method,
|
|
"shear_degrees": shear_deg,
|
|
"confidence": confidence,
|
|
"detections": _all_detections,
|
|
}
|
|
|
|
return corrected, info
|
|
|
|
|
|
def dewarp_image_manual(img: np.ndarray, shear_degrees: float) -> np.ndarray:
|
|
"""Apply shear correction with a manual angle."""
|
|
if abs(shear_degrees) < 0.001:
|
|
return img
|
|
return _apply_shear(img, -shear_degrees)
|