diff --git a/klausur-service/backend/cv_graphic_detect.py b/klausur-service/backend/cv_graphic_detect.py
index 2f66efd..fb9f5c3 100644
--- a/klausur-service/backend/cv_graphic_detect.py
+++ b/klausur-service/backend/cv_graphic_detect.py
@@ -170,7 +170,7 @@ def detect_graphic_elements(
             continue
 
         # Skip page-spanning regions
-        if bw > w * 0.5 or bh > h * 0.5:
+        if bw > w * 0.6 or bh > h * 0.6:
             logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
             continue
 
@@ -232,12 +232,16 @@ def detect_graphic_elements(
         if color_pixel_count < 200:
             continue
 
-        # (d) Very low density → thin strokes, almost certainly text
-        if density < 0.20:
+        # (d) Very low density → thin strokes, almost certainly text.
+        # Large regions (photos/illustrations) can have low color density
+        # because most pixels are grayscale ink.  Use a lower threshold
+        # for regions bigger than 100×80 px.
+        _min_density = 0.05 if (bw > 100 and bh > 80) else 0.20
+        if density < _min_density:
             logger.info(
                 "GraphicDetect PASS1 skip low-density (%d,%d) %dx%d "
-                "density=%.0f%% (likely colored text)",
-                bx, by, bw, bh, density * 100,
+                "density=%.0f%% (min=%.0f%%, likely colored text)",
+                bx, by, bw, bh, density * 100, _min_density * 100,
             )
             continue
 
diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py
index ae1d063..bc34694 100644
--- a/klausur-service/backend/grid_editor_api.py
+++ b/klausur-service/backend/grid_editor_api.py
@@ -21,6 +21,7 @@ import numpy as np
 from fastapi import APIRouter, HTTPException, Request
 
 from cv_box_detect import detect_boxes, split_page_into_zones
+from cv_graphic_detect import detect_graphic_elements
 from cv_vocab_types import PageZone
 from cv_color_detect import detect_word_colors, recover_colored_text
 from cv_ocr_engines import fix_cell_phonetics, fix_ipa_continuation_cell, _text_has_garbled_ipa, _lookup_ipa, _words_to_reading_order_text, _group_words_into_lines
@@ -1469,13 +1470,12 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
                 session_id, removed, len(exclude_rects),
             )
 
-    # 2e. Filter words inside detected graphic/image regions
-    # Only remove LOW-CONFIDENCE words (likely OCR artifacts from images).
-    # High-confidence words are real text even if they overlap a detected
-    # graphic region (e.g. colored text that graphic detection couldn't
-    # fully distinguish from an image).
-    _GRAPHIC_CONF_THRESHOLD = 50  # keep words with conf >= 50
-    graphic_rects = []
+    # 2e. Hard-filter words inside graphic/image regions from structure step.
+    # ALL words inside graphic regions are removed regardless of confidence —
+    # images cannot contain real text; any OCR words inside are artifacts.
+    # After image loading (Step 3a) we augment these with freshly detected
+    # graphic regions from cv_graphic_detect.
+    graphic_rects: List[Dict[str, int]] = []
     if structure_result:
         for g in structure_result.get("graphics", []):
             graphic_rects.append({
@@ -1484,23 +1484,18 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
             })
     if graphic_rects:
         before = len(all_words)
-        filtered = []
-        for w in all_words:
-            w_cx = w["left"] + w.get("width", 0) / 2
-            w_cy = w["top"] + w.get("height", 0) / 2
-            inside = any(
-                gr["x"] <= w_cx <= gr["x"] + gr["w"]
-                and gr["y"] <= w_cy <= gr["y"] + gr["h"]
+        all_words = [
+            w for w in all_words
+            if not any(
+                gr["x"] <= w["left"] + w.get("width", 0) / 2 <= gr["x"] + gr["w"]
+                and gr["y"] <= w["top"] + w.get("height", 0) / 2 <= gr["y"] + gr["h"]
                 for gr in graphic_rects
             )
-            if inside and w.get("conf", 0) < _GRAPHIC_CONF_THRESHOLD:
-                continue  # remove low-confidence artifact
-            filtered.append(w)
-        removed = before - len(filtered)
+        ]
+        removed = before - len(all_words)
         if removed:
-            all_words = filtered
             logger.info(
-                "build-grid session %s: removed %d low-conf words inside %d graphic region(s)",
+                "build-grid session %s: hard-removed %d words inside %d structure graphic region(s)",
                 session_id, removed, len(graphic_rects),
             )
 
@@ -1525,6 +1520,39 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
         img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
 
         if img_bgr is not None:
+            # --- 3a. Detect graphic/image regions via CV and hard-filter ---
+            # Pass only significant words (len >= 3) to the detector so that
+            # short OCR artifacts inside images don't fool the text-vs-graphic
+            # heuristic (it counts word centroids to distinguish text from images).
+            sig_words = [w for w in all_words if len((w.get("text") or "").strip()) >= 3]
+            fresh_graphics = detect_graphic_elements(img_bgr, sig_words)
+            if fresh_graphics:
+                fresh_rects = [
+                    {"x": g.x, "y": g.y, "w": g.width, "h": g.height}
+                    for g in fresh_graphics
+                ]
+                graphic_rects.extend(fresh_rects)
+                logger.info(
+                    "build-grid session %s: detected %d graphic region(s) via CV",
+                    session_id, len(fresh_graphics),
+                )
+                # Hard-filter words inside newly detected graphic regions
+                before = len(all_words)
+                all_words = [
+                    w for w in all_words
+                    if not any(
+                        gr["x"] <= w["left"] + w.get("width", 0) / 2 <= gr["x"] + gr["w"]
+                        and gr["y"] <= w["top"] + w.get("height", 0) / 2 <= gr["y"] + gr["h"]
+                        for gr in fresh_rects
+                    )
+                ]
+                removed = before - len(all_words)
+                if removed:
+                    logger.info(
+                        "build-grid session %s: hard-removed %d words inside %d fresh graphic region(s)",
+                        session_id, removed, len(fresh_rects),
+                    )
+
             # --- Recover colored text that OCR missed (before grid building) ---
             recovered = recover_colored_text(img_bgr, all_words)
             if recovered and graphic_rects: