From 8f3a50b981abed4291687f9bab2822b538f87d49 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 5 Mar 2026 22:50:27 +0100 Subject: [PATCH] fix: full-width Zeilen vor Spaltenerkennung maskieren Farbige Sub-Header (z.B. "Unit 4: Bonnie Scotland") mit voller Breite fuellten die Spaltenluecken im vertikalen Projektionsprofil auf und fuehrten zu 11 statt 5 erkannten Spalten. Zeilen mit >40% Tintendichte werden jetzt vor der Projektion maskiert. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 42 ++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index e775c27..90ca2f9 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -2131,10 +2131,46 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt logger.info(f"ColumnGeometry: {len(left_edges)} words detected in content area") - # --- Step 3: Vertical projection profile --- + # --- Step 2b: Mask out full-width rows (sub-headers, colored bands) --- + # Rows where ink spans nearly the full content width distort the vertical + # projection by filling in column gaps. Detect them via horizontal density + # and zero them out before computing v_proj. content_strip = inv[top_y:bottom_y, left_x:right_x] - v_proj = np.sum(content_strip, axis=0).astype(float) - v_proj_norm = v_proj / (content_h * 255) if content_h > 0 else v_proj + h_proj_row = np.sum(content_strip, axis=1).astype(float) + h_proj_row_norm = h_proj_row / (content_w * 255) if content_w > 0 else h_proj_row + + FULLWIDTH_THRESHOLD = 0.40 # normal text ~10-25%; full-width bands 40%+ + fullwidth_mask = h_proj_row_norm > FULLWIDTH_THRESHOLD + + # Only mask contiguous bands (>=3 rows), not isolated noisy rows + masked_strip = content_strip.copy() + n_masked = 0 + band_start = None + for y_idx in range(len(fullwidth_mask)): + if fullwidth_mask[y_idx]: + if band_start is None: + band_start = y_idx + else: + if band_start is not None: + band_height = y_idx - band_start + if band_height >= 3: + masked_strip[band_start:y_idx, :] = 0 + n_masked += band_height + band_start = None + if band_start is not None: + band_height = len(fullwidth_mask) - band_start + if band_height >= 3: + masked_strip[band_start:len(fullwidth_mask), :] = 0 + n_masked += band_height + + if n_masked > 0: + logger.info(f"ColumnGeometry: masked {n_masked} full-width rows " + f"({n_masked * 100 / content_h:.1f}% of content height)") + + # --- Step 3: Vertical projection profile --- + effective_h = content_h - n_masked + v_proj = np.sum(masked_strip, axis=0).astype(float) + v_proj_norm = v_proj / (effective_h * 255) if effective_h > 0 else v_proj # Smooth the projection to avoid noise-induced micro-gaps kernel_size = max(5, content_w // 80)