diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 89d3238..1303c22 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -1631,10 +1631,11 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt if i + 1 < len(col_starts): next_start = col_starts[i + 1] else: - # Rightmost column always extends to content right edge. - # There is nothing to the right of the last column except page margin, - # so there is no reason to end earlier. - next_start = right_x + # Rightmost column always extends to full image width (w). + # The page margin contains only white space — extending the OCR + # crop to the image edge is safe and prevents text near the right + # border from being cut off. + next_start = w col_left_rel = start_x - left_x col_right_rel = next_start - left_x @@ -1653,9 +1654,8 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt if i + 1 < len(col_starts): end_x = col_starts[i + 1] else: - # Rightmost column always extends to content right edge (right_x). - # Page margin detection may underestimate — extend fully so no text is cropped. - end_x = right_x + # Rightmost column always extends to full image width (w). + end_x = w all_boundaries.append((start_x, end_x)) geometries = [] @@ -1696,7 +1696,7 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt if i + 1 < len(filtered_geoms): g.width = filtered_geoms[i + 1].x - g.x else: - g.width = right_x - g.x + g.width = w - g.x g.index = i col_left_rel = g.x - left_x col_right_rel = col_left_rel + g.width