fix(columns): extend rightmost column to content right edge (right_x)
Previously detect_column_geometry() ended the last column at the start of the detected right-margin gap (left_x + right_boundary), which could cut into actual text near the right edge of the Example column. Since only the page margin lies to the right of the last column, the rightmost column now always extends to right_x regardless of whether a right-margin gap was detected. This prevents OCR crops from missing words at the right edge of wide columns like column_example. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1630,9 +1630,10 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt
|
||||
for i, start_x in enumerate(col_starts):
|
||||
if i + 1 < len(col_starts):
|
||||
next_start = col_starts[i + 1]
|
||||
elif is_right_margin:
|
||||
next_start = left_x + right_boundary
|
||||
else:
|
||||
# Rightmost column always extends to content right edge.
|
||||
# There is nothing to the right of the last column except page margin,
|
||||
# so there is no reason to end earlier.
|
||||
next_start = right_x
|
||||
|
||||
col_left_rel = start_x - left_x
|
||||
@@ -1651,9 +1652,9 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt
|
||||
for i, start_x in enumerate(col_starts):
|
||||
if i + 1 < len(col_starts):
|
||||
end_x = col_starts[i + 1]
|
||||
elif is_right_margin:
|
||||
end_x = left_x + right_boundary
|
||||
else:
|
||||
# Rightmost column always extends to content right edge (right_x).
|
||||
# Page margin detection may underestimate — extend fully so no text is cropped.
|
||||
end_x = right_x
|
||||
all_boundaries.append((start_x, end_x))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user