diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py
index 2931906..e3b88a7 100644
--- a/klausur-service/backend/cv_vocab_pipeline.py
+++ b/klausur-service/backend/cv_vocab_pipeline.py
@@ -3145,6 +3145,17 @@ def build_cell_grid(
         logger.warning("build_cell_grid: no content rows found")
         return [], []
 
+    # Filter phantom rows: rows with no Tesseract words assigned are
+    # inter-line whitespace gaps that would produce garbage OCR.
+    before = len(content_rows)
+    content_rows = [r for r in content_rows if r.word_count > 0]
+    skipped = before - len(content_rows)
+    if skipped > 0:
+        logger.info(f"build_cell_grid: skipped {skipped} phantom rows (word_count=0)")
+    if not content_rows:
+        logger.warning("build_cell_grid: no content rows with words found")
+        return [], []
+
     # Use columns only — skip ignore, header, footer, page_ref
     _skip_types = {'column_ignore', 'header', 'footer', 'page_ref'}
     relevant_cols = [c for c in column_regions if c.type not in _skip_types]
@@ -3222,6 +3233,16 @@ def build_cell_grid_streaming(
     if not content_rows:
         return
 
+    # Filter phantom rows: rows with no Tesseract words assigned are
+    # inter-line whitespace gaps that would produce garbage OCR.
+    before = len(content_rows)
+    content_rows = [r for r in content_rows if r.word_count > 0]
+    skipped = before - len(content_rows)
+    if skipped > 0:
+        logger.info(f"build_cell_grid_streaming: skipped {skipped} phantom rows (word_count=0)")
+    if not content_rows:
+        return
+
     _skip_types = {'column_ignore', 'header', 'footer', 'page_ref'}
     relevant_cols = [c for c in column_regions if c.type not in _skip_types]
     if not relevant_cols: