feat: add pass 3 text-line regression to deskew pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m53s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 15s

After iterative projection (pass 1) and word-alignment (pass 2), a third
pass uses Tesseract word positions + linear regression per text line to
measure and correct residual rotation. This catches cases where passes 1-2
leave significant slope (e.g. 1.7° residual on heavily skewed scans).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-05 17:53:11 +01:00
parent 538d5c732e
commit d39d249daa
3 changed files with 109 additions and 8 deletions

View File

@@ -528,6 +528,67 @@ def deskew_image_iterative(
return rotated, final_angle, debug
def _measure_textline_slope(img: np.ndarray) -> float:
"""Measure residual text-line slope via Tesseract word-position regression.
Groups Tesseract words by (block, par, line), fits a linear regression
per line (y = slope * x + b), and returns the trimmed-mean slope in
degrees. Positive = text rises to the right, negative = falls.
This is the most direct measurement of remaining rotation after deskew.
"""
import math as _math
if not TESSERACT_AVAILABLE or not CV2_AVAILABLE:
return 0.0
h, w = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
data = pytesseract.image_to_data(
Image.fromarray(gray),
output_type=pytesseract.Output.DICT,
config="--psm 6",
)
# Group word centres by text line
lines: Dict[tuple, list] = {}
for i in range(len(data["text"])):
txt = (data["text"][i] or "").strip()
if len(txt) < 2 or int(data["conf"][i]) < 30:
continue
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
cx = data["left"][i] + data["width"][i] / 2.0
cy = data["top"][i] + data["height"][i] / 2.0
lines.setdefault(key, []).append((cx, cy))
# Per-line linear regression → slope angle
slopes: list = []
for pts in lines.values():
if len(pts) < 3:
continue
pts.sort(key=lambda p: p[0])
xs = np.array([p[0] for p in pts], dtype=np.float64)
ys = np.array([p[1] for p in pts], dtype=np.float64)
if xs[-1] - xs[0] < w * 0.15:
continue # skip short lines
A = np.vstack([xs, np.ones_like(xs)]).T
result = np.linalg.lstsq(A, ys, rcond=None)
slope = result[0][0]
slopes.append(_math.degrees(_math.atan(slope)))
if len(slopes) < 3:
return 0.0
# Trimmed mean (drop 10% extremes on each side)
slopes.sort()
trim = max(1, len(slopes) // 10)
trimmed = slopes[trim:-trim] if len(slopes) > 2 * trim else slopes
if not trimmed:
return 0.0
return sum(trimmed) / len(trimmed)
def deskew_two_pass(
img: np.ndarray,
coarse_range: float = 5.0,
@@ -578,12 +639,46 @@ def deskew_two_pass(
logger.warning(f"deskew_two_pass: pass2 word-alignment failed: {e}")
angle2 = 0.0
total_angle = angle1 + angle2
# --- Pass 3: Tesseract text-line regression residual check ---
# The most reliable final check: measure actual text-line slopes
# using Tesseract word positions and linear regression per line.
angle3 = 0.0
try:
residual = _measure_textline_slope(corrected)
debug["pass3_raw"] = round(residual, 3)
if abs(residual) >= 0.3:
h3, w3 = corrected.shape[:2]
center3 = (w3 // 2, h3 // 2)
M3 = cv2.getRotationMatrix2D(center3, residual, 1.0)
corrected = cv2.warpAffine(
corrected, M3, (w3, h3),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE,
)
angle3 = residual
logger.info(
"deskew_two_pass: pass3 text-line residual=%.2f° applied",
residual,
)
else:
logger.info(
"deskew_two_pass: pass3 text-line residual=%.2f° < 0.3° — skipped",
residual,
)
except Exception as e:
logger.warning("deskew_two_pass: pass3 text-line check failed: %s", e)
total_angle = angle1 + angle2 + angle3
debug["pass2_angle"] = round(angle2, 3)
debug["pass2_method"] = "word_alignment"
debug["pass3_angle"] = round(angle3, 3)
debug["pass3_method"] = "textline_regression"
debug["total_angle"] = round(total_angle, 3)
logger.info(f"deskew_two_pass: pass1={angle1:.2f}° + pass2={angle2:.2f}° = {total_angle:.2f}°")
logger.info(
"deskew_two_pass: pass1=%.2f° + pass2=%.2f° + pass3=%.2f° = %.2f°",
angle1, angle2, angle3, total_angle,
)
return corrected, total_angle, debug