From 6623a5d10e368bd8b3531cfca0d7c7b95978978a Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 3 Mar 2026 15:26:38 +0100 Subject: [PATCH] fix(columns): extend rightmost column to content right edge (right_x) Previously detect_column_geometry() ended the last column at the start of the detected right-margin gap (left_x + right_boundary), which could cut into actual text near the right edge of the Example column. Since only the page margin lies to the right of the last column, the rightmost column now always extends to right_x regardless of whether a right-margin gap was detected. This prevents OCR crops from missing words at the right edge of wide columns like column_example. Co-Authored-By: Claude Sonnet 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 36fe9ed..cad6ce5 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -1630,9 +1630,10 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt for i, start_x in enumerate(col_starts): if i + 1 < len(col_starts): next_start = col_starts[i + 1] - elif is_right_margin: - next_start = left_x + right_boundary else: + # Rightmost column always extends to content right edge. + # There is nothing to the right of the last column except page margin, + # so there is no reason to end earlier. next_start = right_x col_left_rel = start_x - left_x @@ -1651,9 +1652,9 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt for i, start_x in enumerate(col_starts): if i + 1 < len(col_starts): end_x = col_starts[i + 1] - elif is_right_margin: - end_x = left_x + right_boundary else: + # Rightmost column always extends to content right edge (right_x). + # Page margin detection may underestimate — extend fully so no text is cropped. end_x = right_x all_boundaries.append((start_x, end_x))