fix: remove morph close that merged balloons into giant blob
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 19s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 19s
The 5x5 MORPH_CLOSE was connecting scattered color pixels into one page-spanning contour that swallowed individual balloons. Fix: - Remove MORPH_CLOSE, keep only MORPH_OPEN for speckle removal - Lower sat threshold 50→40 to catch more colored elements - Filter contours spanning >50% of width OR height (was AND) - Filter contours >10% of image area Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -137,15 +137,14 @@ def detect_graphic_elements(
|
|||||||
# PASS 1 — COLOR CHANNEL (no word exclusion needed)
|
# PASS 1 — COLOR CHANNEL (no word exclusion needed)
|
||||||
# =====================================================================
|
# =====================================================================
|
||||||
# Saturated pixels = colored ink. Black text has sat ≈ 0 → invisible.
|
# Saturated pixels = colored ink. Black text has sat ≈ 0 → invisible.
|
||||||
sat_mask = (hsv[:, :, 1] > 50).astype(np.uint8) * 255
|
sat_mask = (hsv[:, :, 1] > 40).astype(np.uint8) * 255
|
||||||
# Exclude very bright backgrounds (white/near-white with color cast)
|
# Exclude very bright backgrounds (white/near-white with color cast)
|
||||||
val_mask = (hsv[:, :, 2] < 235).astype(np.uint8) * 255
|
val_mask = (hsv[:, :, 2] < 240).astype(np.uint8) * 255
|
||||||
color_mask = cv2.bitwise_and(sat_mask, val_mask)
|
color_mask = cv2.bitwise_and(sat_mask, val_mask)
|
||||||
|
|
||||||
# Morphological cleanup: close small gaps, remove speckle
|
# Only remove tiny speckle — NO closing, which would merge nearby
|
||||||
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
|
# colored elements into one giant blob spanning half the page.
|
||||||
color_mask = cv2.morphologyEx(color_mask, cv2.MORPH_CLOSE, kernel)
|
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||||
kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
|
||||||
color_mask = cv2.morphologyEx(color_mask, cv2.MORPH_OPEN, kernel_open)
|
color_mask = cv2.morphologyEx(color_mask, cv2.MORPH_OPEN, kernel_open)
|
||||||
|
|
||||||
contours_color, _ = cv2.findContours(
|
contours_color, _ = cv2.findContours(
|
||||||
@@ -162,8 +161,8 @@ def detect_graphic_elements(
|
|||||||
if bw < 8 or bh < 8:
|
if bw < 8 or bh < 8:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip page-spanning contours (background color cast)
|
# Skip page-spanning contours (background color cast / merged blobs)
|
||||||
if bw > w * 0.8 and bh > h * 0.8:
|
if bw > w * 0.5 or bh > h * 0.5 or area > img_area * 0.10:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
perimeter = cv2.arcLength(cnt, True)
|
perimeter = cv2.arcLength(cnt, True)
|
||||||
|
|||||||
Reference in New Issue
Block a user