fix(ocr-pipeline): improve page crop spine detection and cell assignment
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m54s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 17s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m54s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 17s
1. page_crop: Score all dark runs by center-proximity × darkness × narrowness instead of picking the widest. Fixes ad810209 where a wide dark area at 35% was chosen over the actual spine at 50%. 2. cv_words_first: Replace x-center-only word→column assignment with overlap-based three-pass strategy (overlap → midpoint-range → nearest). Fixes truncated German translations like "Schal" instead of "Schal - die Schals" in session 079cd0d9. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -83,10 +83,9 @@ def detect_page_splits(
|
||||
darkest_val, spine_thresh)
|
||||
return []
|
||||
|
||||
# Find the contiguous dark region (spine area)
|
||||
# Find ALL contiguous dark runs in the center region
|
||||
is_dark = center_brightness < spine_thresh
|
||||
# Find the widest dark run
|
||||
best_start, best_end = 0, 0
|
||||
dark_runs: list = [] # list of (start, end) pairs
|
||||
run_start = -1
|
||||
for i in range(len(is_dark)):
|
||||
if is_dark[i]:
|
||||
@@ -94,20 +93,70 @@ def detect_page_splits(
|
||||
run_start = i
|
||||
else:
|
||||
if run_start >= 0:
|
||||
if i - run_start > best_end - best_start:
|
||||
best_start, best_end = run_start, i
|
||||
dark_runs.append((run_start, i))
|
||||
run_start = -1
|
||||
if run_start >= 0 and len(is_dark) - run_start > best_end - best_start:
|
||||
best_start, best_end = run_start, len(is_dark)
|
||||
if run_start >= 0:
|
||||
dark_runs.append((run_start, len(is_dark)))
|
||||
|
||||
spine_w = best_end - best_start
|
||||
if spine_w < w * 0.01:
|
||||
logger.debug("Spine too narrow: %dpx (< %dpx)", spine_w, int(w * 0.01))
|
||||
# Filter out runs that are too narrow (< 1% of image width)
|
||||
min_spine_px = int(w * 0.01)
|
||||
dark_runs = [(s, e) for s, e in dark_runs if e - s >= min_spine_px]
|
||||
|
||||
if not dark_runs:
|
||||
logger.debug("No dark runs wider than %dpx in center region", min_spine_px)
|
||||
return []
|
||||
|
||||
# Score each dark run: prefer centered, dark, narrow valleys
|
||||
center_region_len = center_hi - center_lo
|
||||
image_center_in_region = (w * 0.5 - center_lo) # x=50% mapped into region coords
|
||||
best_score = -1.0
|
||||
best_start, best_end = dark_runs[0]
|
||||
|
||||
for rs, re in dark_runs:
|
||||
run_width = re - rs
|
||||
run_center = (rs + re) / 2.0
|
||||
|
||||
# --- Factor 1: Proximity to image center (gaussian, sigma = 15% of region) ---
|
||||
sigma = center_region_len * 0.15
|
||||
dist = abs(run_center - image_center_in_region)
|
||||
center_factor = float(np.exp(-0.5 * (dist / sigma) ** 2))
|
||||
|
||||
# --- Factor 2: Darkness (how dark is the valley relative to threshold) ---
|
||||
run_brightness = float(np.mean(center_brightness[rs:re]))
|
||||
# Normalize: 1.0 when run_brightness == 0, 0.0 when run_brightness == spine_thresh
|
||||
darkness_factor = max(0.0, (spine_thresh - run_brightness) / spine_thresh)
|
||||
|
||||
# --- Factor 3: Narrowness bonus (spine shadows are narrow, not wide plateaus) ---
|
||||
# Typical spine: 1-5% of image width. Penalise runs wider than ~8%.
|
||||
width_frac = run_width / w
|
||||
if width_frac <= 0.05:
|
||||
narrowness_bonus = 1.0
|
||||
elif width_frac <= 0.15:
|
||||
narrowness_bonus = 1.0 - (width_frac - 0.05) / 0.10 # linear decay 1.0 → 0.0
|
||||
else:
|
||||
narrowness_bonus = 0.0
|
||||
|
||||
score = center_factor * darkness_factor * (0.3 + 0.7 * narrowness_bonus)
|
||||
|
||||
logger.debug(
|
||||
"Dark run x=%d..%d (w=%d): center_f=%.3f dark_f=%.3f narrow_b=%.3f → score=%.4f",
|
||||
center_lo + rs, center_lo + re, run_width,
|
||||
center_factor, darkness_factor, narrowness_bonus, score,
|
||||
)
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_start, best_end = rs, re
|
||||
|
||||
spine_w = best_end - best_start
|
||||
spine_x = center_lo + best_start
|
||||
spine_center = spine_x + spine_w // 2
|
||||
|
||||
logger.debug(
|
||||
"Best spine candidate: x=%d..%d (w=%d), score=%.4f",
|
||||
spine_x, spine_x + spine_w, spine_w, best_score,
|
||||
)
|
||||
|
||||
# Verify: must have bright (paper) content on BOTH sides
|
||||
left_brightness = float(np.mean(brightness_smooth[max(0, spine_x - w // 10):spine_x]))
|
||||
right_end = center_lo + best_end
|
||||
|
||||
Reference in New Issue
Block a user