diff --git a/klausur-service/backend/cv_graphic_detect.py b/klausur-service/backend/cv_graphic_detect.py index 621fcc9..2f66efd 100644 --- a/klausur-service/backend/cv_graphic_detect.py +++ b/klausur-service/backend/cv_graphic_detect.py @@ -181,40 +181,71 @@ def detect_graphic_elements( word_pixel_count = int(np.sum(roi_words > 0)) word_overlap = word_pixel_count / bbox_area if bbox_area > 0 else 0 + # Check: how many OCR word centroids fall inside this region? + # Colored text that OCR detected will have multiple centroids inside. + # Actual images may have 0-1 spurious OCR artifacts. + word_centroid_count = sum( + 1 for wb in word_boxes + if (bx <= int(wb.get("left", 0) + wb.get("width", 0) / 2) <= bx + bw + and by <= int(wb.get("top", 0) + wb.get("height", 0) / 2) <= by + bh) + ) + # Check: how many actual colored pixels are in this region? roi_color = color_pixel_raw[by:by + bh, bx:bx + bw] color_pixel_count = int(np.sum(roi_color > 0)) - # If most of the region is covered by word boxes → colored text, skip - if word_overlap > 0.5: - logger.debug("GraphicDetect PASS1 skip text region (%d,%d) %dx%d overlap=%.0f%%", - bx, by, bw, bh, word_overlap * 100) + # Color pixel density (before any skip checks so we can log it) + density = color_pixel_count / bbox_area if bbox_area > 0 else 0 + + # --- Skip heuristics for colored TEXT (not images) --- + + # (a) High word-box pixel overlap → clearly text + if word_overlap > 0.40: + logger.info( + "GraphicDetect PASS1 skip text-overlap (%d,%d) %dx%d " + "overlap=%.0f%% centroids=%d", + bx, by, bw, bh, word_overlap * 100, word_centroid_count, + ) + continue + + # (b) Multiple OCR words detected inside → colored text + # (images rarely produce 2+ confident word detections) + if word_centroid_count >= 2: + logger.info( + "GraphicDetect PASS1 skip multi-word (%d,%d) %dx%d " + "centroids=%d overlap=%.0f%% density=%.0f%%", + bx, by, bw, bh, word_centroid_count, + word_overlap * 100, density * 100, + ) + continue + + # (c) Even 1 word + some pixel overlap → likely text + if word_centroid_count >= 1 and word_overlap > 0.10: + logger.info( + "GraphicDetect PASS1 skip word+overlap (%d,%d) %dx%d " + "centroids=%d overlap=%.0f%%", + bx, by, bw, bh, word_centroid_count, word_overlap * 100, + ) continue # Need a minimum number of colored pixels (not just dilated area) if color_pixel_count < 200: continue - # Color pixel density: fraction of bbox filled with colored pixels. - # Text strokes are thin → low density (typically 5-20%). - # Actual images/graphics are filled → high density (30%+). - density = color_pixel_count / bbox_area if bbox_area > 0 else 0 - - # Very low density → almost certainly colored text, not an image + # (d) Very low density → thin strokes, almost certainly text if density < 0.20: - logger.debug( + logger.info( "GraphicDetect PASS1 skip low-density (%d,%d) %dx%d " "density=%.0f%% (likely colored text)", bx, by, bw, bh, density * 100, ) continue - # Moderate density + small height → likely a colored text line - # (text-line height is typically < 3% of page height) - if density < 0.30 and bh < h * 0.04: - logger.debug( - "GraphicDetect PASS1 skip text-height region (%d,%d) %dx%d " - "density=%.0f%% height=%.1f%% (likely colored text line)", + # (e) Moderate density + small height → colored text line + if density < 0.35 and bh < h * 0.05: + logger.info( + "GraphicDetect PASS1 skip text-height (%d,%d) %dx%d " + "density=%.0f%% height=%.1f%%", bx, by, bw, bh, density * 100, 100.0 * bh / h, ) continue