diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py
index 55ec82a..6e4a6be 100644
--- a/klausur-service/backend/cv_vocab_pipeline.py
+++ b/klausur-service/backend/cv_vocab_pipeline.py
@@ -1585,7 +1585,7 @@ def _regularize_row_grid(
         return rows
 
     # --- Step A: Group ALL words into line clusters ---
-    # Collect words that belong to content rows
+    # Collect words that belong to content rows (deduplicated)
     content_words: List[Dict] = []
     seen_keys: set = set()
     for r in content_rows:
@@ -1598,26 +1598,54 @@ def _regularize_row_grid(
     if len(content_words) < 5:
         return rows
 
-    # Use half the median word height as grouping tolerance
-    word_heights = [w['height'] for w in content_words]
-    median_wh = sorted(word_heights)[len(word_heights) // 2]
-    y_tol = max(8, int(median_wh * 0.5))
+    # Compute median word height (excluding outliers like tall brackets/IPA)
+    word_heights = sorted(w['height'] for w in content_words)
+    median_wh = word_heights[len(word_heights) // 2]
 
-    line_clusters = _group_words_into_lines(content_words, y_tolerance_px=y_tol)
+    # Group by VERTICAL CENTER, not by top.  Tall characters (brackets,
+    # phonetic symbols) have a much lower top but the same center_y as
+    # normal text on the same line.  Grouping by top would split them
+    # into separate clusters → halved pitch → halved row heights.
+    y_tol = max(10, int(median_wh * 0.6))
+
+    # Sort by center_y, then group by proximity
+    words_by_center = sorted(content_words,
+                             key=lambda w: (w['top'] + w['height'] / 2, w['left']))
+    line_clusters: List[List[Dict]] = []
+    current_line: List[Dict] = [words_by_center[0]]
+    current_center = words_by_center[0]['top'] + words_by_center[0]['height'] / 2
+
+    for w in words_by_center[1:]:
+        w_center = w['top'] + w['height'] / 2
+        if abs(w_center - current_center) <= y_tol:
+            current_line.append(w)
+        else:
+            current_line.sort(key=lambda w: w['left'])
+            line_clusters.append(current_line)
+            current_line = [w]
+            current_center = w_center
+
+    if current_line:
+        current_line.sort(key=lambda w: w['left'])
+        line_clusters.append(current_line)
 
     if len(line_clusters) < 3:
         return rows
 
     # --- Step B: Compute center_y per cluster ---
     # center_y = median of (word_top + word_height/2) across all words in cluster
-    # letter_h = median word height in cluster
-    # All coordinates are relative to content ROI (same as word_dicts)
+    # letter_h = median of word heights, but excluding outlier-height words
+    #            (>2× median) so that tall brackets/IPA don't skew the height
     cluster_info: List[Dict] = []
     for cl_words in line_clusters:
         centers = [w['top'] + w['height'] / 2 for w in cl_words]
-        heights = [w['height'] for w in cl_words]
+        # Filter outlier heights for letter_h computation
+        normal_heights = [w['height'] for w in cl_words
+                          if w['height'] <= median_wh * 2.0]
+        if not normal_heights:
+            normal_heights = [w['height'] for w in cl_words]
         center_y = float(np.median(centers))
-        letter_h = float(np.median(heights))
+        letter_h = float(np.median(normal_heights))
         cluster_info.append({
             'center_y_rel': center_y,  # relative to content ROI
             'center_y_abs': center_y + top_y,  # absolute
@@ -1627,6 +1655,34 @@ def _regularize_row_grid(
 
     cluster_info.sort(key=lambda c: c['center_y_rel'])
 
+    # --- Step B2: Merge clusters that are too close together ---
+    # Even with center-based grouping, some edge cases can produce
+    # spurious clusters.  Merge any pair whose centers are closer
+    # than 0.4× median_wh (they're definitely the same text line).
+    merge_threshold = max(5, median_wh * 0.4)
+    merged: List[Dict] = [cluster_info[0]]
+    for cl in cluster_info[1:]:
+        prev = merged[-1]
+        if cl['center_y_rel'] - prev['center_y_rel'] < merge_threshold:
+            # Merge: combine words, recompute center
+            combined_words = prev['words'] + cl['words']
+            centers = [w['top'] + w['height'] / 2 for w in combined_words]
+            normal_heights = [w['height'] for w in combined_words
+                              if w['height'] <= median_wh * 2.0]
+            if not normal_heights:
+                normal_heights = [w['height'] for w in combined_words]
+            prev['center_y_rel'] = float(np.median(centers))
+            prev['center_y_abs'] = prev['center_y_rel'] + top_y
+            prev['letter_h'] = float(np.median(normal_heights))
+            prev['words'] = combined_words
+        else:
+            merged.append(cl)
+
+    cluster_info = merged
+
+    if len(cluster_info) < 3:
+        return rows
+
     # --- Step C: Compute pitches and detect section breaks ---
     pitches: List[float] = []
     for i in range(1, len(cluster_info)):
@@ -1772,10 +1828,14 @@ def _regularize_row_grid(
     for i, r in enumerate(result):
         r.index = i
 
+    row_heights = [gr.height for gr in grid_rows]
+    min_h = min(row_heights) if row_heights else 0
+    max_h = max(row_heights) if row_heights else 0
     logger.info(f"RowGrid: word-center grid applied "
-                f"(median_pitch={median_pitch:.0f}px, "
+                f"(median_pitch={median_pitch:.0f}px, median_wh={median_wh}px, "
+                f"y_tol={y_tol}px, {len(line_clusters)} clusters→{len(cluster_info)} merged, "
                 f"{len(sections)} sections, "
-                f"{len(grid_rows)} grid rows, "
+                f"{len(grid_rows)} grid rows [h={min_h}-{max_h}px], "
                 f"was {len(content_rows)} gap-based rows)")
 
     return result