fix: relax graphic detection for small circles/balloons
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 30s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 1m56s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 18s

- Lower min_area from 200 to 80 (small balloons ~100-300px²)
- Lower word_pad from 10 to 5 (10px was eating nearby graphics)
- Relax circle detection: circularity>0.55, min_dim>15 (was 0.70/25)
- Text fragments still filtered by _classify_shape noise threshold
- Add ACCEPT logging for debugging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-16 14:00:09 +01:00
parent f717e1c0df
commit ba513968c5

View File

@@ -122,8 +122,8 @@ def _classify_shape(
max_dim = max(bw, bh)
# --- Circle / balloon --- (check first, most reliable)
# Must be reasonably large (not a dot/period)
if circularity > 0.70 and 0.6 < aspect < 1.7 and min_dim > 25:
# Must be reasonably large (not a dot/period) — min 15px
if circularity > 0.55 and 0.5 < aspect < 2.0 and min_dim > 15:
conf = min(0.95, circularity)
return "circle", conf
@@ -169,9 +169,9 @@ def detect_graphic_elements(
img_bgr: np.ndarray,
word_boxes: List[Dict],
detected_boxes: Optional[List[Dict]] = None,
min_area: int = 200,
min_area: int = 80,
max_area_ratio: float = 0.25,
word_pad: int = 10,
word_pad: int = 5,
max_elements: int = 50,
) -> List[GraphicElement]:
"""Find non-text graphical elements on the page.
@@ -184,9 +184,9 @@ def detect_graphic_elements(
img_bgr: BGR color image.
word_boxes: List of OCR word dicts with left/top/width/height.
detected_boxes: Optional list of detected box dicts (x/y/w/h).
min_area: Minimum contour area to keep (200 filters text fragments).
min_area: Minimum contour area to keep (80 filters tiny noise).
max_area_ratio: Maximum area as fraction of image area.
word_pad: Padding around word boxes for exclusion (10px covers font edges).
word_pad: Padding around word boxes for exclusion (5px).
max_elements: Maximum number of elements to return.
Returns:
@@ -306,6 +306,8 @@ def detect_graphic_elements(
masked_hsv = roi_hsv[cnt_mask > 0]
color_name, color_hex = _dominant_color(masked_hsv)
logger.info("GraphicDetect ACCEPT: %s at (%d,%d) %dx%d area=%d color=%s conf=%.2f",
shape, bx, by, bw, bh, int(area), color_name, conf)
candidates.append(GraphicElement(
x=bx, y=by, width=bw, height=bh,
area=int(area),