diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 0613c95..e7806e9 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -1460,7 +1460,10 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt f"y=[{top_y}..{bottom_y}] ({content_h}px)") # --- Step 2: Get word bounding boxes from Tesseract --- - content_roi = dewarped_bgr[top_y:bottom_y, left_x:right_x] + # Crop from left_x to full image width (not right_x) so words at the right + # edge of the last column are included even if they extend past the detected + # content boundary (right_x). + content_roi = dewarped_bgr[top_y:bottom_y, left_x:w] pil_img = Image.fromarray(cv2.cvtColor(content_roi, cv2.COLOR_BGR2RGB)) try: