From a079ffe8e9d684b9120d1f68396b5e4eeec2f5a7 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 17 Mar 2026 18:09:16 +0100 Subject: [PATCH] fix: robust colored-text detection in graphic filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 25x25 dilation kernel merges nearby green words into large regions, so pixel-overlap with OCR word boxes drops below 50%. Previous density checks alone weren't sufficient. New multi-layered approach: - Count OCR word CENTROIDS inside each colored region - ≥2 centroids → definitely text (images don't produce multiple words) - 1 centroid + 10%+ pixel overlap → likely text - Lower pixel overlap threshold from 50% to 40% - Raise density+height thresholds for text-line detection - Use INFO logging to diagnose remaining false positives Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_graphic_detect.py | 65 +++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/klausur-service/backend/cv_graphic_detect.py b/klausur-service/backend/cv_graphic_detect.py index 621fcc9..2f66efd 100644 --- a/klausur-service/backend/cv_graphic_detect.py +++ b/klausur-service/backend/cv_graphic_detect.py @@ -181,40 +181,71 @@ def detect_graphic_elements( word_pixel_count = int(np.sum(roi_words > 0)) word_overlap = word_pixel_count / bbox_area if bbox_area > 0 else 0 + # Check: how many OCR word centroids fall inside this region? + # Colored text that OCR detected will have multiple centroids inside. + # Actual images may have 0-1 spurious OCR artifacts. + word_centroid_count = sum( + 1 for wb in word_boxes + if (bx <= int(wb.get("left", 0) + wb.get("width", 0) / 2) <= bx + bw + and by <= int(wb.get("top", 0) + wb.get("height", 0) / 2) <= by + bh) + ) + # Check: how many actual colored pixels are in this region? roi_color = color_pixel_raw[by:by + bh, bx:bx + bw] color_pixel_count = int(np.sum(roi_color > 0)) - # If most of the region is covered by word boxes → colored text, skip - if word_overlap > 0.5: - logger.debug("GraphicDetect PASS1 skip text region (%d,%d) %dx%d overlap=%.0f%%", - bx, by, bw, bh, word_overlap * 100) + # Color pixel density (before any skip checks so we can log it) + density = color_pixel_count / bbox_area if bbox_area > 0 else 0 + + # --- Skip heuristics for colored TEXT (not images) --- + + # (a) High word-box pixel overlap → clearly text + if word_overlap > 0.40: + logger.info( + "GraphicDetect PASS1 skip text-overlap (%d,%d) %dx%d " + "overlap=%.0f%% centroids=%d", + bx, by, bw, bh, word_overlap * 100, word_centroid_count, + ) + continue + + # (b) Multiple OCR words detected inside → colored text + # (images rarely produce 2+ confident word detections) + if word_centroid_count >= 2: + logger.info( + "GraphicDetect PASS1 skip multi-word (%d,%d) %dx%d " + "centroids=%d overlap=%.0f%% density=%.0f%%", + bx, by, bw, bh, word_centroid_count, + word_overlap * 100, density * 100, + ) + continue + + # (c) Even 1 word + some pixel overlap → likely text + if word_centroid_count >= 1 and word_overlap > 0.10: + logger.info( + "GraphicDetect PASS1 skip word+overlap (%d,%d) %dx%d " + "centroids=%d overlap=%.0f%%", + bx, by, bw, bh, word_centroid_count, word_overlap * 100, + ) continue # Need a minimum number of colored pixels (not just dilated area) if color_pixel_count < 200: continue - # Color pixel density: fraction of bbox filled with colored pixels. - # Text strokes are thin → low density (typically 5-20%). - # Actual images/graphics are filled → high density (30%+). - density = color_pixel_count / bbox_area if bbox_area > 0 else 0 - - # Very low density → almost certainly colored text, not an image + # (d) Very low density → thin strokes, almost certainly text if density < 0.20: - logger.debug( + logger.info( "GraphicDetect PASS1 skip low-density (%d,%d) %dx%d " "density=%.0f%% (likely colored text)", bx, by, bw, bh, density * 100, ) continue - # Moderate density + small height → likely a colored text line - # (text-line height is typically < 3% of page height) - if density < 0.30 and bh < h * 0.04: - logger.debug( - "GraphicDetect PASS1 skip text-height region (%d,%d) %dx%d " - "density=%.0f%% height=%.1f%% (likely colored text line)", + # (e) Moderate density + small height → colored text line + if density < 0.35 and bh < h * 0.05: + logger.info( + "GraphicDetect PASS1 skip text-height (%d,%d) %dx%d " + "density=%.0f%% height=%.1f%%", bx, by, bw, bh, density * 100, 100.0 * bh / h, ) continue