From a052f73de322125a425c927bced8255092410b66 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Mon, 2 Mar 2026 15:42:39 +0100 Subject: [PATCH] fix(ocr-pipeline): pass left_x/right_x to classify_column_types in API path The ocr_pipeline_api.py code path called classify_column_types without left_x/right_x, so margin regions were never created. Also add logging to _build_margin_regions for debugging. Co-Authored-By: Claude Sonnet 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 4 ++++ klausur-service/backend/ocr_pipeline_api.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 27b3915..a35e8d8 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -2062,6 +2062,10 @@ def _build_margin_regions( classification_method='content_bounds', )) + if margins: + logger.info(f"Margins: {[(m.type, m.x, m.width) for m in margins]} " + f"(left_x={left_x}, right_x={right_x}, img_w={img_w})") + return margins diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index cba9b80..9da70a9 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -699,7 +699,8 @@ async def detect_columns(session_id: str): cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y) # Phase B: Content-based classification - regions = classify_column_types(geometries, content_w, top_y, w, h, bottom_y) + regions = classify_column_types(geometries, content_w, top_y, w, h, bottom_y, + left_x=left_x, right_x=right_x) duration = time.time() - t0