diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py
index ae91187..155dc6e 100644
--- a/klausur-service/backend/cv_vocab_pipeline.py
+++ b/klausur-service/backend/cv_vocab_pipeline.py
@@ -1037,6 +1037,7 @@ def _detect_columns_by_clustering(
 def _detect_sub_columns(
     geometries: List[ColumnGeometry],
     content_w: int,
+    left_x: int = 0,
     _edge_tolerance: int = 8,
     _min_col_start_ratio: float = 0.10,
 ) -> List[ColumnGeometry]:
@@ -1048,6 +1049,10 @@ def _detect_sub_columns(
     start.  Any words to the left of that bin form a sub-column, provided they
     number >= 2 and < 35 % of total.
 
+    Word ``left`` values are relative to the content ROI (offset by *left_x*),
+    while ``ColumnGeometry.x`` is in absolute image coordinates.  *left_x*
+    bridges the two coordinate systems.
+
     Returns a new list of ColumnGeometry — potentially longer than the input.
     """
     if content_w <= 0:
@@ -1101,13 +1106,16 @@ def _detect_sub_columns(
             continue
 
         # --- Build two sub-column geometries ---
+        # Word 'left' values are relative to left_x; geo.x is absolute.
+        # Convert the split position from relative to absolute coordinates.
         max_sub_left = max(w['left'] for w in sub_words)
-        split_x = (max_sub_left + col_start_bin[2]) // 2
+        split_rel = (max_sub_left + col_start_bin[2]) // 2
+        split_abs = split_rel + left_x
 
         sub_x = geo.x
-        sub_width = split_x - geo.x
-        main_x = split_x
-        main_width = (geo.x + geo.width) - split_x
+        sub_width = split_abs - geo.x
+        main_x = split_abs
+        main_width = (geo.x + geo.width) - split_abs
 
         if sub_width <= 0 or main_width <= 0:
             result.append(geo)
@@ -1138,8 +1146,9 @@ def _detect_sub_columns(
         result.append(main_geo)
 
         logger.info(
-            f"SubColumnSplit: column idx={geo.index} split at x={split_x}, "
-            f"sub={len(sub_words)} words (left), main={len(main_words)} words, "
+            f"SubColumnSplit: column idx={geo.index} split at abs_x={split_abs} "
+            f"(rel={split_rel}), sub={len(sub_words)} words, "
+            f"main={len(main_words)} words, "
             f"col_start_bin=({col_start_bin[0]}, n={col_start_bin[1]})"
         )
 
@@ -2846,7 +2855,7 @@ def analyze_layout_by_words(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Li
     content_w = right_x - left_x
 
     # Split sub-columns (e.g. page references) before classification
-    geometries = _detect_sub_columns(geometries, content_w)
+    geometries = _detect_sub_columns(geometries, content_w, left_x=left_x)
 
     # Phase B: Content-based classification
     regions = classify_column_types(geometries, content_w, top_y, w, h, bottom_y,
diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py
index e5f83d2..2dff162 100644
--- a/klausur-service/backend/ocr_pipeline_api.py
+++ b/klausur-service/backend/ocr_pipeline_api.py
@@ -700,7 +700,7 @@ async def detect_columns(session_id: str):
         cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
 
         # Split sub-columns (e.g. page references) before classification
-        geometries = _detect_sub_columns(geometries, content_w)
+        geometries = _detect_sub_columns(geometries, content_w, left_x=left_x)
 
         # Phase B: Content-based classification
         regions = classify_column_types(geometries, content_w, top_y, w, h, bottom_y,
diff --git a/klausur-service/backend/tests/test_cv_vocab_pipeline.py b/klausur-service/backend/tests/test_cv_vocab_pipeline.py
index b95164b..1752334 100644
--- a/klausur-service/backend/tests/test_cv_vocab_pipeline.py
+++ b/klausur-service/backend/tests/test_cv_vocab_pipeline.py
@@ -1307,6 +1307,29 @@ class TestSubColumnDetection:
 
         assert len(result) == 1
 
+    def test_sub_column_split_with_left_x_offset(self):
+        """Word 'left' values are relative to left_x; geo.x is absolute.
+
+        Real-world scenario: left_x=195, EN column at geo.x=310.
+        Page refs at relative left=115-157, vocab words at relative left=216.
+        Without left_x, split_x would be ~202 (< geo.x=310) → negative width → no split.
+        With left_x=195, split_abs = 202 + 195 = 397, which is between geo.x(310)
+        and geo.x+geo.width(748) → valid split.
+        """
+        content_w = 1469
+        left_x = 195
+        page_refs = [self._make_word(115, "p.59"), self._make_word(157, "p.60"),
+                     self._make_word(157, "p.61")]
+        vocab = [self._make_word(216, f"word{i}") for i in range(40)]
+        all_words = page_refs + vocab
+        geo = self._make_geo(x=310, width=438, words=all_words, content_w=content_w)
+
+        result = _detect_sub_columns([geo], content_w, left_x=left_x)
+
+        assert len(result) == 2, f"Expected 2 columns, got {len(result)}"
+        assert result[0].word_count == 3
+        assert result[1].word_count == 40
+
 
 class TestCellsToVocabEntriesPageRef:
     """Test that page_ref cells are mapped to source_page field."""