From fe7339c7a139ba4660b882836fecfc7636a513ce Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBookPro.fritz.box>
Date: Mon, 16 Mar 2026 13:51:02 +0100
Subject: [PATCH] fix: suppress text fragments in graphic detection

- Raise min_area from 30 to 200 (text fragments are small)
- Raise word_pad from 3 to 10px (OCR bboxes are tight)
- Reduce morph close kernel from 5x5 to 3x3 (avoid reconnecting text)
- Tighten arrow detection: min 20px, circularity<0.35, >=2 defects
- Add 'noise' category for too-small elements, filter them out
- Raise min dimension from 4 to 8px
- Add debug logging for word count and exclusion coverage
- Raise max_area_ratio to 0.25 (allow larger illustrations)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 klausur-service/backend/cv_graphic_detect.py | 104 +++++++++++--------
 klausur-service/backend/ocr_pipeline_api.py  |   4 +
 2 files changed, 63 insertions(+), 45 deletions(-)

diff --git a/klausur-service/backend/cv_graphic_detect.py b/klausur-service/backend/cv_graphic_detect.py
index 5fadf9d..06da34f 100644
--- a/klausur-service/backend/cv_graphic_detect.py
+++ b/klausur-service/backend/cv_graphic_detect.py
@@ -102,6 +102,8 @@ def _classify_shape(
     """Classify contour shape → (shape_name, confidence).
 
     Uses circularity, aspect ratio, solidity, and vertex count.
+    Only classifies as arrow/circle/line if the element is large enough
+    to be a genuine graphic (not a text fragment).
     """
     aspect = bw / bh if bh > 0 else 1.0
     perimeter = cv2.arcLength(contour, True)
@@ -116,46 +118,47 @@ def _classify_shape(
     approx = cv2.approxPolyDP(contour, epsilon, True)
     vertices = len(approx)
 
-    # --- Arrow detection ---
-    # Arrows typically have: vertices 5-8, moderate solidity (0.4-0.8),
-    # moderate aspect ratio, low circularity
-    if 4 <= vertices <= 9 and 0.3 < solidity < 0.85 and circularity < 0.5:
-        # Check for a pointed tip via convexity defects
+    min_dim = min(bw, bh)
+    max_dim = max(bw, bh)
+
+    # --- Circle / balloon --- (check first, most reliable)
+    # Must be reasonably large (not a dot/period)
+    if circularity > 0.70 and 0.6 < aspect < 1.7 and min_dim > 25:
+        conf = min(0.95, circularity)
+        return "circle", conf
+
+    # --- Arrow detection --- (strict: must be sizable, distinct shape)
+    # Arrows must be at least 20px in both dimensions
+    if (min_dim > 20 and max_dim > 30
+            and 5 <= vertices <= 9
+            and 0.35 < solidity < 0.80
+            and circularity < 0.35):
         hull_idx = cv2.convexHull(contour, returnPoints=False)
         if len(hull_idx) >= 4:
             try:
                 defects = cv2.convexityDefects(contour, hull_idx)
-                if defects is not None and len(defects) >= 1:
-                    # Significant defect = pointed shape
+                if defects is not None and len(defects) >= 2:
                     max_depth = max(d[0][3] for d in defects) / 256.0
-                    if max_depth > min(bw, bh) * 0.15:
-                        return "arrow", min(0.75, 0.5 + max_depth / max(bw, bh))
+                    if max_depth > min_dim * 0.25:
+                        return "arrow", min(0.75, 0.5 + max_depth / max_dim)
             except cv2.error:
                 pass
 
-    # --- Circle / balloon ---
-    if circularity > 0.65 and 0.5 < aspect < 2.0:
-        conf = min(0.95, circularity)
-        return "circle", conf
-
-    # --- Line ---
-    if aspect > 4.0 or aspect < 0.25:
+    # --- Line (decorative rule, separator) ---
+    # Must be long enough to not be a dash/hyphen
+    if (aspect > 6.0 or aspect < 1 / 6.0) and max_dim > 40:
         return "line", 0.7
 
-    # --- Exclamation mark (tall narrow + high solidity) ---
-    if aspect < 0.45 and bh > 12 and solidity > 0.5:
-        return "exclamation", 0.7
+    # --- Larger illustration (drawing, image) ---
+    if area > 3000 and min_dim > 30:
+        return "illustration", 0.6
 
-    # --- Dot / bullet (small, roughly square, high solidity) ---
-    if max(bw, bh) < 20 and 0.5 < aspect < 2.0 and solidity > 0.6:
-        return "dot", 0.6
+    # --- Generic icon (moderate size, non-text shape) ---
+    if area > 500 and min_dim > 15:
+        return "icon", 0.4
 
-    # --- Larger illustration ---
-    if area > 2000:
-        return "illustration", 0.5
-
-    # --- Generic icon ---
-    return "icon", 0.4
+    # Everything else is too small or text-like — skip
+    return "noise", 0.0
 
 
 # ---------------------------------------------------------------------------
@@ -166,10 +169,10 @@ def detect_graphic_elements(
     img_bgr: np.ndarray,
     word_boxes: List[Dict],
     detected_boxes: Optional[List[Dict]] = None,
-    min_area: int = 30,
-    max_area_ratio: float = 0.05,
-    word_pad: int = 3,
-    max_elements: int = 80,
+    min_area: int = 200,
+    max_area_ratio: float = 0.25,
+    word_pad: int = 10,
+    max_elements: int = 50,
 ) -> List[GraphicElement]:
     """Find non-text graphical elements on the page.
 
@@ -181,9 +184,9 @@ def detect_graphic_elements(
         img_bgr: BGR color image.
         word_boxes: List of OCR word dicts with left/top/width/height.
         detected_boxes: Optional list of detected box dicts (x/y/w/h).
-        min_area: Minimum contour area to keep.
+        min_area: Minimum contour area to keep (200 filters text fragments).
         max_area_ratio: Maximum area as fraction of image area.
-        word_pad: Padding around word boxes for exclusion.
+        word_pad: Padding around word boxes for exclusion (10px covers font edges).
         max_elements: Maximum number of elements to return.
 
     Returns:
@@ -195,16 +198,17 @@ def detect_graphic_elements(
     h, w = img_bgr.shape[:2]
     max_area = int(h * w * max_area_ratio)
 
+    logger.info("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
+                w, h, len(word_boxes), len(detected_boxes or []))
+
     gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
     hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
 
     # --- 1. Build ink mask: dark pixels + saturated colored pixels ---
-    # Adaptive threshold for dark ink
     _, dark_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
 
     # Saturated colored pixels (catches colored arrows, markers)
     sat_mask = (hsv[:, :, 1] > 40).astype(np.uint8) * 255
-    # Only include saturated pixels that are also reasonably dark (not background)
     val_mask = (hsv[:, :, 2] < 230).astype(np.uint8) * 255
     color_ink = cv2.bitwise_and(sat_mask, val_mask)
 
@@ -236,15 +240,19 @@ def detect_graphic_elements(
             if x2 > x1 and y2 > y1:
                 exclusion[y1:y2, x1:x2] = 255
 
+    excl_pct = int(np.sum(exclusion > 0) * 100 / (h * w)) if h * w else 0
+    logger.info("GraphicDetect: exclusion mask covers %d%% of image", excl_pct)
+
     # Subtract exclusion from ink
     graphic_mask = cv2.bitwise_and(ink_mask, cv2.bitwise_not(exclusion))
 
     # --- 3. Morphological cleanup ---
-    # Close small gaps (connects arrow stroke + head)
-    kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    # Close small gaps (connects arrow stroke + head) — but not too large
+    # to avoid reconnecting text fragments
+    kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
     graphic_mask = cv2.morphologyEx(graphic_mask, cv2.MORPH_CLOSE, kernel_close)
-    # Remove tiny noise
-    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+    # Remove small noise
+    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
     graphic_mask = cv2.morphologyEx(graphic_mask, cv2.MORPH_OPEN, kernel_open)
 
     # --- 4. Find contours ---
@@ -252,6 +260,8 @@ def detect_graphic_elements(
         graphic_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
     )
 
+    logger.info("GraphicDetect: %d raw contours after exclusion", len(contours))
+
     # --- 5. Analyse and classify ---
     candidates: List[GraphicElement] = []
     for cnt in contours:
@@ -260,22 +270,24 @@ def detect_graphic_elements(
             continue
 
         bx, by, bw, bh = cv2.boundingRect(cnt)
-        if bw < 4 or bh < 4:
+        if bw < 8 or bh < 8:
             continue
 
-        # Skip elements that are mostly inside the exclusion zone
-        # (partial overlap with a word)
+        # Skip elements that overlap significantly with the exclusion zone
         roi_excl = exclusion[by:by + bh, bx:bx + bw]
         excl_ratio = np.sum(roi_excl > 0) / (bw * bh) if bw * bh > 0 else 0
-        if excl_ratio > 0.6:
+        if excl_ratio > 0.4:
             continue
 
         # Classify shape
         shape, conf = _classify_shape(cnt, bw, bh, area)
 
+        # Skip noise (too small or text-like)
+        if shape == "noise":
+            continue
+
         # Determine dominant color
         roi_hsv = hsv[by:by + bh, bx:bx + bw]
-        # Only sample pixels that are actually in the contour
         cnt_mask = np.zeros((bh, bw), dtype=np.uint8)
         shifted_cnt = cnt - np.array([bx, by])
         cv2.drawContours(cnt_mask, [shifted_cnt], -1, 255, -1)
@@ -305,5 +317,7 @@ def detect_graphic_elements(
             len(result),
             ", ".join(f"{s}: {c}" for s, c in sorted(shape_counts.items())),
         )
+    else:
+        logger.info("GraphicDetect: no graphic elements found")
 
     return result
diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py
index f03f44d..6ba8b91 100644
--- a/klausur-service/backend/ocr_pipeline_api.py
+++ b/klausur-service/backend/ocr_pipeline_api.py
@@ -1236,6 +1236,10 @@ async def detect_structure(session_id: str):
         for cell in word_result["cells"]:
             for wb in (cell.get("word_boxes") or []):
                 words.append(wb)
+    logger.info("detect-structure: word_result present=%s, cells=%d, word_boxes extracted=%d",
+                word_result is not None,
+                len(word_result.get("cells", [])) if word_result else 0,
+                len(words))
     # If no words yet, use image dimensions with small margin
     if words:
         content_x = max(0, min(int(wb["left"]) for wb in words))