feat: add border ghost filter + graphic detection tests + structure overlay

- Add _filter_border_ghost_words() to remove OCR artefacts from box borders (vertical + horizontal edge detection, column cleanup, re-indexing) - Add 20 tests for border ghost filter (basic filtering + column cleanup) - Add 24 tests for cv_graphic_detect (color detection, word overlap, boxes) - Clean up cv_graphic_detect.py logging (per-candidate → DEBUG) - Add structure overlay layer to StepReconstruction (boxes + graphics toggle) - Show border_ghosts_removed badge in StepStructureDetection - Update MkDocs with structure detection documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 18:28:53 +01:00
parent 6668661895
commit 729ebff63c
8 changed files with 1006 additions and 29 deletions
@@ -219,7 +219,7 @@ export interface StructureGraphic {
  w: number
  h: number
  area: number
-  shape: string   // arrow, circle, line, exclamation, dot, icon, illustration
+  shape: string   // image, illustration
  color_name: string
  color_hex: string
  confidence: number
@@ -235,6 +235,7 @@ export interface StructureResult {
  color_pixel_counts: Record<string, number>
  has_words: boolean
  word_count: number
+  border_ghosts_removed?: number
  duration_seconds: number
 }

@@ -2,7 +2,7 @@

 import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import dynamic from 'next/dynamic'
-import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem } from '@/app/(admin)/ai/ocr-pipeline/types'
+import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem, StructureResult, StructureBox, StructureGraphic } from '@/app/(admin)/ai/ocr-pipeline/types'
 import { usePixelWordPositions } from './usePixelWordPositions'

 const KLAUSUR_API = '/klausur-api'
@@ -60,6 +60,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
  const [fontScale, setFontScale] = useState(0.7)
  const [globalBold, setGlobalBold] = useState(false)
  const [imageRotation, setImageRotation] = useState<0 | 180>(0)
+  const [structureBoxes, setStructureBoxes] = useState<StructureBox[]>([])
+  const [structureGraphics, setStructureGraphics] = useState<StructureGraphic[]>([])
+  const [showStructure, setShowStructure] = useState(true)
  const reconRef = useRef<HTMLDivElement>(null)
  const [reconWidth, setReconWidth] = useState(0)

@@ -92,12 +95,15 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
  // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [sessionId])

-  // Track image natural height for font scaling
+  // Track image natural dimensions for font scaling and structure layer
  const handleImageLoad = useCallback(() => {
    if (imageRef.current) {
      setImageNaturalH(imageRef.current.naturalHeight)
+      if (!imageNaturalSize) {
+        setImageNaturalSize({ w: imageRef.current.naturalWidth, h: imageRef.current.naturalHeight })
+      }
    }
-  }, [])
+  }, [imageNaturalSize])

  const loadSessionData = async () => {
    if (!sessionId) return
@@ -132,6 +138,13 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
      setUndoStack([])
      setRedoStack([])

+      // Load structure result (boxes, graphics, colors)
+      const structureResult: StructureResult | undefined = data.structure_result
+      if (structureResult) {
+        setStructureBoxes(structureResult.boxes || [])
+        setStructureGraphics(structureResult.graphics || [])
+      }
+
      // Check for parent with boxes (sub-sessions + zones)
      const columnResult: ColumnResult | undefined = data.column_result
      const rowResult: RowResult | undefined = data.row_result
@@ -517,6 +530,65 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
    return bboxPct
  }

+  // Structure layer: boxes and graphic elements as background
+  const renderStructureLayer = (imgW: number, imgH: number) => {
+    if (!showStructure) return null
+    const hasElements = structureBoxes.length > 0 || structureGraphics.length > 0
+    if (!hasElements) return null
+
+    return (
+      <>
+        {/* Structure boxes */}
+        {structureBoxes.map((box, i) => {
+          const bgColor = box.bg_color_hex || '#6b7280'
+          return (
+            <div
+              key={`sbox-${i}`}
+              className="absolute pointer-events-none"
+              style={{
+                left: `${(box.x / imgW) * 100}%`,
+                top: `${(box.y / imgH) * 100}%`,
+                width: `${(box.w / imgW) * 100}%`,
+                height: `${(box.h / imgH) * 100}%`,
+                border: `${Math.max(1, box.border_thickness)}px solid ${bgColor}40`,
+                backgroundColor: `${bgColor}0a`,
+                borderRadius: '2px',
+              }}
+            />
+          )
+        })}
+
+        {/* Graphic elements */}
+        {structureGraphics.map((g, i) => (
+          <div
+            key={`sgfx-${i}`}
+            className="absolute pointer-events-none"
+            style={{
+              left: `${(g.x / imgW) * 100}%`,
+              top: `${(g.y / imgH) * 100}%`,
+              width: `${(g.w / imgW) * 100}%`,
+              height: `${(g.h / imgH) * 100}%`,
+              border: `1px dashed ${g.color_hex}60`,
+              backgroundColor: `${g.color_hex}08`,
+              borderRadius: '2px',
+            }}
+          >
+            <span
+              className="absolute text-[8px] leading-none opacity-50"
+              style={{
+                top: '1px',
+                left: '2px',
+                color: g.color_hex,
+              }}
+            >
+              {g.shape === 'illustration' ? 'Illust' : 'Bild'}
+            </span>
+          </div>
+        ))}
+      </>
+    )
+  }
+
  // Overlay rendering helper
  const renderOverlayMode = () => {
    const imgW = imageNaturalSize?.w || 1
@@ -597,6 +669,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
                  )
                })}

+              {/* Structure elements (boxes, graphics) */}
+              {renderStructureLayer(imgW, imgH)}
+
              {/* Pixel-positioned words / editable inputs */}
              {cells.map((cell) => {
                const displayText = getDisplayText(cell)
@@ -831,6 +906,19 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
              >
                180°
              </button>
+              {(structureBoxes.length > 0 || structureGraphics.length > 0) && (
+                <button
+                  onClick={() => setShowStructure(v => !v)}
+                  className={`px-2 py-1 text-xs border rounded transition-colors ${
+                    showStructure
+                      ? 'border-violet-300 bg-violet-50 text-violet-600 dark:border-violet-700 dark:bg-violet-900/30 dark:text-violet-400'
+                      : 'border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
+                  }`}
+                  title="Strukturelemente anzeigen"
+                >
+                  Struktur
+                </button>
+              )}
              <div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
            </>
          )}
@@ -851,6 +939,21 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
                Leer
              </button>

+              {/* Structure toggle */}
+              {(structureBoxes.length > 0 || structureGraphics.length > 0) && (
+                <button
+                  onClick={() => setShowStructure(v => !v)}
+                  className={`px-2 py-1 text-xs border rounded transition-colors ${
+                    showStructure
+                      ? 'border-violet-300 bg-violet-50 text-violet-600 dark:border-violet-700 dark:bg-violet-900/30 dark:text-violet-400'
+                      : 'border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
+                  }`}
+                  title="Strukturelemente anzeigen"
+                >
+                  Struktur
+                </button>
+              )}
+
              <div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />

              {/* Zoom controls */}
@@ -915,6 +1018,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
              onLoad={handleImageLoad}
            />

+            {/* Structure elements (boxes, graphics) */}
+            {imageNaturalSize && renderStructureLayer(imageNaturalSize.w, imageNaturalSize.h)}
+
            {/* Empty field markers */}
            {showEmptyHighlight && cells
              .filter(c => emptyCellIds.has(c.cellId))
@@ -165,6 +165,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
                {result.word_count} Woerter
              </span>
            )}
+            {(result.border_ghosts_removed ?? 0) > 0 && (
+              <span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
+                {result.border_ghosts_removed} Rahmenlinien entfernt
+              </span>
+            )}
            <span className="text-gray-400 text-xs ml-auto">
              {result.image_width}x{result.image_height}px | {result.duration_seconds}s
            </span>
@@ -149,6 +149,8 @@ klausur-service/backend/
 ├── ocr_pipeline_api.py                 # FastAPI Router (Schritte 2-10)
 ├── orientation_crop_api.py             # FastAPI Router (Schritte 1 + 4)
 ├── cv_box_detect.py                    # Box-Erkennung + Zonen-Aufteilung
+├── cv_graphic_detect.py                # Grafik-/Bilderkennung (Region-basiert)
+├── cv_color_detect.py                  # Farbtext-Erkennung (HSV-Analyse)
 ├── cv_words_first.py                   # Words-First Grid Builder (bottom-up)
 ├── page_crop.py                        # Content-basierter Crop-Algorithmus
 ├── ocr_pipeline_session_store.py       # PostgreSQL Persistence
@@ -177,7 +179,8 @@ admin-lehrer/
    ├── StepColumnDetection.tsx          # Schritt 5: Spaltenerkennung
    ├── StepRowDetection.tsx             # Schritt 6: Zeilenerkennung
    ├── StepWordRecognition.tsx          # Schritt 7: Worterkennung
-    ├── StepLlmReview.tsx               # Schritt 8: Korrektur (SSE-Stream)
+    ├── StepStructureDetection.tsx       # Schritt 8: Strukturerkennung
+    ├── StepLlmReview.tsx               # Schritt 9: Korrektur (SSE-Stream)
    ├── StepReconstruction.tsx           # Schritt 9: Rekonstruktion (Canvas + Overlay)
    ├── usePixelWordPositions.ts        # Shared Hook: Pixel-basierte Wortpositionierung
    ├── FabricReconstructionCanvas.tsx   # Fabric.js Editor
@@ -281,14 +284,21 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
 | `skip_heal_gaps` | `false` | Zeilen-Luecken nicht heilen (Overlay-Modus) |
 | `grid_method` | `v2` | Grid-Strategie: `v2` (top-down) oder `words_first` (bottom-up) |

-### Schritt 8: Korrektur
+### Schritt 8: Strukturerkennung
+
+| Methode | Pfad | Beschreibung |
+|---------|------|--------------|
+| `POST` | `/sessions/{id}/detect-structure` | Boxen, Zonen, Farben und Grafiken erkennen |
+| `GET` | `/sessions/{id}/image/structure-overlay` | Overlay mit allen Strukturelementen |
+
+### Schritt 9: Korrektur

 | Methode | Pfad | Beschreibung |
 |---------|------|--------------|
 | `POST` | `/sessions/{id}/llm-review?stream=true` | SSE-Stream Korrektur starten |
 | `POST` | `/sessions/{id}/llm-review/apply` | Ausgewaehlte Korrekturen speichern |

-### Schritt 9: Rekonstruktion
+### Schritt 10: Rekonstruktion

 | Methode | Pfad | Beschreibung |
 |---------|------|--------------|
@@ -853,6 +863,93 @@ Change-Format:

 ---

+## Schritt 8: Strukturerkennung (Detail)
+
+Erkennt Boxen, Zonen, Farbregionen und grafische Elemente auf der Seite.
+Laeuft **nach** der Worterkennung (Schritt 7), damit OCR-Wortpositionen
+fuer die Unterscheidung von Text vs. Grafik zur Verfuegung stehen.
+
+### Teilschritte
+
+1. **Box-Erkennung** (`cv_box_detect.py`): Linien-Rahmen und farbige Hintergruende
+2. **Zonen-Aufteilung** (`split_page_into_zones`): Seite in Box- und Content-Zonen aufteilen
+3. **Farb-Analyse** (`cv_color_detect.py`): HSV-basierte Erkennung farbiger Textbereiche
+4. **Grafik-Erkennung** (`cv_graphic_detect.py`): Nicht-Text-Grafiken identifizieren
+
+### Grafik-Erkennung: Region-basierter Ansatz
+
+Zwei Paesse trennen farbige Grafiken von farbigem Text und erkennen
+schwarze Illustrationen:
+
+**Pass 1 — Farbige Bildregionen:**
+
+1. HSV-Saturation-Kanal extrahieren (Schwelle > 40)
+   - Schwarzer Text hat Saettigung ≈ 0 → unsichtbar auf diesem Kanal
+2. Starke Dilation (25×25 Ellipse) verschmilzt nahe Farbpixel zu Regionen
+3. Fuer jede Region: Wort-Ueberlappung pruefen
+   - \> 50 % Ueberlappung mit OCR-Woertern → farbiger Text → ueberspringen
+   - ≤ 50 % → farbige Grafik/Bild → behalten
+4. Minimum 200 Farbpixel erforderlich (kein Rauschen)
+5. Regionen > 50 % der Bildbreite oder -hoehe → Seitenumfassend → ueberspringen
+
+**Pass 2 — Schwarze Illustrationen:**
+
+1. Otsu-Binarisierung fuer Tinten-Maske
+2. Ausschlusszonen: OCR-Woerter (5 px Padding) + erkannte Boxen (8 px Inset)
+3. Farbige Pixel aus Pass 1 ebenfalls ausschliessen
+4. Nur Konturen mit Flaeche > 5000 px und min(Breite, Hoehe) > 40 px
+
+**Deduplizierung:** Ueberlappende Elemente (> 50 % IoU der kleineren
+Bounding-Box) werden zusammengefasst. Ergebnis nach Flaeche absteigend
+sortiert.
+
+### Response-Format
+
+```json
+{
+  "boxes": [
+    {"x": 50, "y": 300, "w": 1100, "h": 200, "confidence": 0.85,
+     "border_thickness": 3, "bg_color_name": "blue", "bg_color_hex": "#2563eb"}
+  ],
+  "zones": [
+    {"index": 0, "zone_type": "content", "x": 50, "y": 50, "w": 1100, "h": 250},
+    {"index": 1, "zone_type": "box", "x": 50, "y": 300, "w": 1100, "h": 200}
+  ],
+  "graphics": [
+    {"x": 100, "y": 500, "w": 150, "h": 120, "area": 8500,
+     "shape": "image", "color_name": "red", "color_hex": "#dc2626",
+     "confidence": 0.72}
+  ],
+  "color_pixel_counts": {"red": 1234, "blue": 5678},
+  "has_words": true,
+  "word_count": 96,
+  "duration_seconds": 0.45
+}
+```
+
+### Grafik-Shape-Typen
+
+| Shape | Quelle | Beschreibung |
+|-------|--------|--------------|
+| `image` | Pass 1 | Farbige Grafik/Bild (Ballons, Pfeile, Icons) |
+| `illustration` | Pass 2 | Grosse schwarze Zeichnung/Illustration |
+
+### Erkannte Farben
+
+`red`, `orange`, `yellow`, `green`, `blue`, `purple`, `black`
+— basierend auf dem Median-Hue der saturierten Pixel in der Region.
+
+### Frontend-Anzeige
+
+`StepStructureDetection.tsx` zeigt:
+
+- Boxen-Liste mit Position, Hintergrundfarbe und Confidence
+- Zonen-Uebersicht (Content vs. Box)
+- Farb-Zusammenfassung (Pixel-Counts)
+- Grafik-Liste mit Shape, Abmessungen, Farbe und Confidence
+
+---
+
 ## Schritt 9: Rekonstruktion (Detail)

 Drei Modi verfuegbar:
@@ -1263,6 +1360,7 @@ cd klausur-service/backend && pytest tests/test_paddle_kombi.py -v  # 36 Tests

 | Datum | Version | Aenderung |
 |-------|---------|----------|
+| 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. |
 | 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. |
 | 2026-03-12 | 4.4.0 | PaddleOCR Remote-Engine (`engine=paddle`): PP-OCRv5 Latin auf Hetzner x86_64. Neuer Microservice (`paddleocr-service/`), HTTP-Client (`paddleocr_remote.py`), Frontend-Dropdown-Option. Nutzt words_first Grid-Methode. |
 | 2026-03-12 | 4.3.0 | Words-First Grid Builder (`cv_words_first.py`): Bottom-up-Algorithmus clustert Tesseract word_boxes direkt zu Spalten/Zeilen/Zellen. Neuer `grid_method` Parameter im `/words` Endpoint. Frontend-Toggle in StepWordRecognition. |
@@ -121,10 +121,9 @@ def detect_graphic_elements(
        return []

    h, w = img_bgr.shape[:2]
-    img_area = h * w

-    logger.info("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
-                w, h, len(word_boxes), len(detected_boxes or []))
+    logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
+                 w, h, len(word_boxes), len(detected_boxes or []))

    hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
    candidates: List[GraphicElement] = []
@@ -161,7 +160,7 @@ def detect_graphic_elements(
    contours_regions, _ = cv2.findContours(
        region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
    )
-    logger.info("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
+    logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))

    for cnt in contours_regions:
        bx, by, bw, bh = cv2.boundingRect(cnt)
@@ -172,7 +171,7 @@ def detect_graphic_elements(

        # Skip page-spanning regions
        if bw > w * 0.5 or bh > h * 0.5:
-            logger.info("GraphicDetect PASS1 SKIP page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
+            logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
            continue

        bbox_area = bw * bh
@@ -188,8 +187,8 @@ def detect_graphic_elements(

        # If most of the region is covered by word boxes → colored text, skip
        if word_overlap > 0.5:
-            logger.info("GraphicDetect PASS1 SKIP text region (%d,%d) %dx%d word_overlap=%.0f%%",
-                        bx, by, bw, bh, word_overlap * 100)
+            logger.debug("GraphicDetect PASS1 skip text region (%d,%d) %dx%d overlap=%.0f%%",
+                         bx, by, bw, bh, word_overlap * 100)
            continue

        # Need a minimum number of colored pixels (not just dilated area)
@@ -209,8 +208,7 @@ def detect_graphic_elements(
        density = color_pixel_count / bbox_area if bbox_area > 0 else 0
        conf = min(0.95, 0.5 + density * 0.5)

-        logger.info("GraphicDetect PASS1 ACCEPT image at (%d,%d) %dx%d "
-                     "color_px=%d word_overlap=%.0f%% color=%s",
+        logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d overlap=%.0f%% %s",
                     bx, by, bw, bh, color_pixel_count, word_overlap * 100, color_name)
        candidates.append(GraphicElement(
            x=bx, y=by, width=bw, height=bh,
@@ -256,7 +254,7 @@ def detect_graphic_elements(
    contours_ink, _ = cv2.findContours(
        ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
    )
-    logger.info("GraphicDetect PASS2 (ink): %d contours", len(contours_ink))
+    logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink))

    for cnt in contours_ink:
        area = cv2.contourArea(cnt)
@@ -267,8 +265,8 @@ def detect_graphic_elements(
        if bw > w * 0.8 or bh > h * 0.8:
            continue

-        logger.info("GraphicDetect PASS2 ACCEPT illustration at (%d,%d) %dx%d area=%d",
-                    bx, by, bw, bh, int(area))
+        logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d",
+                     bx, by, bw, bh, int(area))
        candidates.append(GraphicElement(
            x=bx, y=by, width=bw, height=bh,
            area=int(area), shape="illustration",
@@ -1202,6 +1202,147 @@ async def detect_type(session_id: str):
    return {"session_id": session_id, **result_dict}


+# ---------------------------------------------------------------------------
+# Border-ghost word filter
+# ---------------------------------------------------------------------------
+
+# Characters that OCR produces when reading box-border lines.
+_BORDER_GHOST_CHARS = set("|1lI![](){}iíì/\\-—–_~.,;:'\"")
+
+
+def _filter_border_ghost_words(
+    word_result: Dict,
+    boxes: List,
+) -> int:
+    """Remove OCR words that are actually box border lines.
+
+    A word is considered a border ghost when it sits on a known box edge
+    (left, right, top, or bottom) and looks like a line artefact (narrow
+    aspect ratio or text consists only of line-like characters).
+
+    After removing ghost cells, columns that have become empty are also
+    removed from ``columns_used`` so the grid no longer shows phantom
+    columns.
+
+    Modifies *word_result* in-place and returns the number of removed cells.
+    """
+    if not boxes or not word_result:
+        return 0
+
+    cells = word_result.get("cells")
+    if not cells:
+        return 0
+
+    # Build border bands — vertical (X) and horizontal (Y)
+    x_bands = []  # list of (x_lo, x_hi)
+    y_bands = []  # list of (y_lo, y_hi)
+    for b in boxes:
+        bx = b.x if hasattr(b, "x") else b.get("x", 0)
+        by = b.y if hasattr(b, "y") else b.get("y", 0)
+        bw = b.width if hasattr(b, "width") else b.get("w", b.get("width", 0))
+        bh = b.height if hasattr(b, "height") else b.get("h", b.get("height", 0))
+        bt = b.border_thickness if hasattr(b, "border_thickness") else b.get("border_thickness", 3)
+        margin = max(bt * 2, 10) + 6  # generous margin
+
+        # Vertical edges (left / right)
+        x_bands.append((bx - margin, bx + margin))
+        x_bands.append((bx + bw - margin, bx + bw + margin))
+        # Horizontal edges (top / bottom)
+        y_bands.append((by - margin, by + margin))
+        y_bands.append((by + bh - margin, by + bh + margin))
+
+    img_w = word_result.get("image_width", 1)
+    img_h = word_result.get("image_height", 1)
+
+    def _is_ghost(cell: Dict) -> bool:
+        text = (cell.get("text") or "").strip()
+        if not text:
+            return False
+
+        # Compute absolute pixel position
+        if cell.get("bbox_px"):
+            px = cell["bbox_px"]
+            cx = px["x"] + px["w"] / 2
+            cy = px["y"] + px["h"] / 2
+            cw = px["w"]
+            ch = px["h"]
+        elif cell.get("bbox_pct"):
+            pct = cell["bbox_pct"]
+            cx = (pct["x"] / 100) * img_w + (pct["w"] / 100) * img_w / 2
+            cy = (pct["y"] / 100) * img_h + (pct["h"] / 100) * img_h / 2
+            cw = (pct["w"] / 100) * img_w
+            ch = (pct["h"] / 100) * img_h
+        else:
+            return False
+
+        # Check if center sits on a vertical or horizontal border
+        on_vertical = any(lo <= cx <= hi for lo, hi in x_bands)
+        on_horizontal = any(lo <= cy <= hi for lo, hi in y_bands)
+        if not on_vertical and not on_horizontal:
+            return False
+
+        # Very short text (1-2 chars) on a border → very likely ghost
+        if len(text) <= 2:
+            # Narrow vertically (line-like) or narrow horizontally (dash-like)?
+            if ch > 0 and cw / ch < 0.5:
+                return True
+            if cw > 0 and ch / cw < 0.5:
+                return True
+            # Text is only border-ghost characters?
+            if all(c in _BORDER_GHOST_CHARS for c in text):
+                return True
+
+        # Longer text but still only ghost chars and very narrow
+        if all(c in _BORDER_GHOST_CHARS for c in text):
+            if ch > 0 and cw / ch < 0.35:
+                return True
+            if cw > 0 and ch / cw < 0.35:
+                return True
+            return True  # all ghost chars on a border → remove
+
+        return False
+
+    before = len(cells)
+    word_result["cells"] = [c for c in cells if not _is_ghost(c)]
+    removed = before - len(word_result["cells"])
+
+    # --- Remove empty columns from columns_used ---
+    columns_used = word_result.get("columns_used")
+    if removed and columns_used and len(columns_used) > 1:
+        remaining_cells = word_result["cells"]
+        occupied_cols = {c.get("col_index") for c in remaining_cells}
+        before_cols = len(columns_used)
+        columns_used = [col for col in columns_used if col.get("index") in occupied_cols]
+
+        # Re-index columns and remap cell col_index values
+        if len(columns_used) < before_cols:
+            old_to_new = {}
+            for new_i, col in enumerate(columns_used):
+                old_to_new[col["index"]] = new_i
+                col["index"] = new_i
+            for cell in remaining_cells:
+                old_ci = cell.get("col_index")
+                if old_ci in old_to_new:
+                    cell["col_index"] = old_to_new[old_ci]
+            word_result["columns_used"] = columns_used
+            logger.info("border-ghost: removed %d empty column(s), %d remaining",
+                        before_cols - len(columns_used), len(columns_used))
+
+    if removed:
+        # Update summary counts
+        summary = word_result.get("summary", {})
+        summary["total_cells"] = len(word_result["cells"])
+        summary["non_empty_cells"] = sum(1 for c in word_result["cells"] if c.get("text"))
+        word_result["summary"] = summary
+        gs = word_result.get("grid_shape", {})
+        gs["total_cells"] = len(word_result["cells"])
+        if columns_used is not None:
+            gs["cols"] = len(columns_used)
+        word_result["grid_shape"] = gs
+
+    return removed
+
+
 # ---------------------------------------------------------------------------
 # Structure Detection Endpoint
 # ---------------------------------------------------------------------------
@@ -1236,10 +1377,6 @@ async def detect_structure(session_id: str):
        for cell in word_result["cells"]:
            for wb in (cell.get("word_boxes") or []):
                words.append(wb)
-    logger.info("detect-structure: word_result present=%s, cells=%d, word_boxes extracted=%d",
-                word_result is not None,
-                len(word_result.get("cells", [])) if word_result else 0,
-                len(words))
    # If no words yet, use image dimensions with small margin
    if words:
        content_x = max(0, min(int(wb["left"]) for wb in words))
@@ -1319,6 +1456,15 @@ async def detect_structure(session_id: str):
        detected_boxes=box_dicts,
    )

+    # --- Filter border-ghost words from OCR result ---
+    ghost_count = 0
+    if boxes and word_result:
+        ghost_count = _filter_border_ghost_words(word_result, boxes)
+        if ghost_count:
+            logger.info("detect-structure: removed %d border-ghost words", ghost_count)
+            await update_session_db(session_id, word_result=word_result)
+            cached["word_result"] = word_result
+
    duration = time.time() - t0

    result_dict = {
@@ -1361,6 +1507,7 @@ async def detect_structure(session_id: str):
        "color_pixel_counts": color_summary,
        "has_words": len(words) > 0,
        "word_count": len(words),
+        "border_ghosts_removed": ghost_count,
        "duration_seconds": round(duration, 2),
    }

@@ -1806,12 +1953,7 @@ async def _get_structure_overlay(session_id: str) -> Response:
    # --- Draw graphic elements ---
    graphics_data = structure.get("graphics", [])
    shape_icons = {
-        "arrow": "ARROW",
-        "circle": "CIRCLE",
-        "line": "LINE",
-        "exclamation": "!",
-        "dot": "DOT",
-        "icon": "ICON",
+        "image": "IMAGE",
        "illustration": "ILLUST",
    }
    for gfx in graphics_data:
@@ -0,0 +1,307 @@
+"""
+Tests for _filter_border_ghost_words() — removes OCR artefacts from box borders.
+
+When OCR reads a scanned document, box border lines (vertical/horizontal
+strokes) are often misrecognised as characters like '|', '1', 'l', '-'.
+These phantom words create spurious columns/rows in the grid. The filter
+removes them by checking if a word sits on a known box border and looks
+like a line artefact.
+
+Lizenz: Apache 2.0
+"""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from ocr_pipeline_api import _filter_border_ghost_words, _BORDER_GHOST_CHARS
+from cv_vocab_types import DetectedBox
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_cell(text: str, x: int, y: int, w: int, h: int,
+               col_index: int = 0) -> dict:
+    """Create a cell dict with bbox_px matching the word recognition output."""
+    return {
+        "cell_id": f"c_{x}_{y}",
+        "text": text,
+        "bbox_px": {"x": x, "y": y, "w": w, "h": h},
+        "bbox_pct": {
+            "x": x / 12, "y": y / 18,
+            "w": w / 12, "h": h / 18,
+        },
+        "confidence": 80,
+        "row_index": 0,
+        "col_index": col_index,
+    }
+
+
+def _make_word_result(cells: list, img_w: int = 1200, img_h: int = 1800,
+                      columns_used: list = None) -> dict:
+    return {
+        "cells": cells,
+        "image_width": img_w,
+        "image_height": img_h,
+        "columns_used": columns_used,
+        "summary": {
+            "total_cells": len(cells),
+            "non_empty_cells": sum(1 for c in cells if c.get("text")),
+        },
+        "grid_shape": {
+            "total_cells": len(cells),
+            "cols": len(columns_used) if columns_used else 1,
+        },
+    }
+
+
+def _make_box(x: int, y: int, w: int, h: int, bt: int = 3) -> DetectedBox:
+    return DetectedBox(x=x, y=y, width=w, height=h, confidence=0.9, border_thickness=bt)
+
+
+# ---------------------------------------------------------------------------
+# Basic filtering tests
+# ---------------------------------------------------------------------------
+
+class TestBorderGhostFilter:
+    """Tests for the _filter_border_ghost_words() function."""
+
+    def test_no_boxes_no_change(self):
+        """Without boxes, nothing should be filtered."""
+        cells = [_make_cell("hello", 100, 200, 80, 30)]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [])
+        assert removed == 0
+        assert len(wr["cells"]) == 1
+
+    def test_no_word_result_no_crash(self):
+        removed = _filter_border_ghost_words(None, [_make_box(50, 300, 1100, 200)])
+        assert removed == 0
+
+    def test_empty_cells_no_crash(self):
+        wr = _make_word_result([])
+        removed = _filter_border_ghost_words(wr, [_make_box(50, 300, 1100, 200)])
+        assert removed == 0
+
+    def test_pipe_on_left_border_removed(self):
+        """A '|' character sitting on the left border of a box should be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cells = [
+            _make_cell("|", x=48, y=350, w=3, h=25),
+            _make_cell("hello", x=200, y=350, w=80, h=25),
+        ]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 1
+        assert wr["cells"][0]["text"] == "hello"
+
+    def test_pipe_on_right_border_removed(self):
+        """A '|' character on the right border should be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cells = [
+            _make_cell("|", x=1148, y=350, w=4, h=25),
+            _make_cell("world", x=600, y=350, w=80, h=25),
+        ]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 1
+        assert wr["cells"][0]["text"] == "world"
+
+    def test_digit_1_on_border_narrow_removed(self):
+        """A narrow '1' on a box border should be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cells = [_make_cell("1", x=49, y=400, w=5, h=20)]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 1
+
+    def test_dash_on_horizontal_border_removed(self):
+        """A '-' on the bottom horizontal border should be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        # Bottom border at y=500, dash at y=498
+        cells = [_make_cell("-", x=600, y=498, w=20, h=4)]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 1
+
+    def test_real_word_on_border_not_removed(self):
+        """A normal word near a border should NOT be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cells = [_make_cell("Tip", x=52, y=350, w=60, h=25)]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 0
+
+    def test_word_far_from_border_not_removed(self):
+        """Words far from any border should never be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cells = [_make_cell("|", x=600, y=400, w=3, h=25)]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 0
+
+    def test_multiple_ghosts_on_same_box(self):
+        """Multiple ghost words on the same box should all be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cells = [
+            _make_cell("|", x=48, y=350, w=3, h=25),
+            _make_cell("l", x=1149, y=350, w=4, h=25),
+            _make_cell("text", x=400, y=350, w=80, h=25),
+        ]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 2
+        assert len(wr["cells"]) == 1
+        assert wr["cells"][0]["text"] == "text"
+
+    def test_summary_updated_after_removal(self):
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cells = [
+            _make_cell("|", x=48, y=350, w=3, h=25),
+            _make_cell("hello", x=200, y=350, w=80, h=25),
+        ]
+        wr = _make_word_result(cells)
+        _filter_border_ghost_words(wr, [box])
+        assert wr["summary"]["total_cells"] == 1
+        assert wr["grid_shape"]["total_cells"] == 1
+
+    def test_ghost_chars_covers_common_artefacts(self):
+        """The ghost chars set should include common border-line OCR artefacts."""
+        expected = {"|", "1", "l", "I", "!", "[", "]", "-", "—", "_", "/", "\\"}
+        assert expected.issubset(_BORDER_GHOST_CHARS)
+
+    def test_multiple_boxes(self):
+        box1 = _make_box(x=50, y=300, w=500, h=200, bt=3)
+        box2 = _make_box(x=600, y=300, w=500, h=200, bt=3)
+        cells = [
+            _make_cell("|", x=49, y=350, w=3, h=25),
+            _make_cell("I", x=599, y=350, w=4, h=25),
+            _make_cell("real", x=300, y=350, w=80, h=25),
+        ]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box1, box2])
+        assert removed == 2
+
+    def test_uses_bbox_pct_fallback(self):
+        """Should work with bbox_pct when bbox_px is not available."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cell = {
+            "cell_id": "c_test",
+            "text": "|",
+            "bbox_pct": {"x": (48 / 1200) * 100, "y": (350 / 1800) * 100,
+                         "w": (4 / 1200) * 100, "h": (25 / 1800) * 100},
+            "confidence": 80,
+            "col_index": 0,
+        }
+        wr = _make_word_result([cell])
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 1
+
+    def test_generous_margin_catches_offset_ghosts(self):
+        """Even if OCR word is slightly offset from border, it should be caught."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        # Word 15px away from right border (at x=1135 vs border at x=1150)
+        cells = [_make_cell("|", x=1135, y=350, w=4, h=25)]
+        wr = _make_word_result(cells)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 1
+
+
+# ---------------------------------------------------------------------------
+# Column cleanup tests
+# ---------------------------------------------------------------------------
+
+class TestColumnCleanup:
+    """Tests for empty column removal after ghost filtering."""
+
+    def test_empty_column_removed(self):
+        """After filtering all cells of column 4, it should be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cols = [
+            {"index": 0, "type": "column_en", "x": 60, "width": 250},
+            {"index": 1, "type": "column_de", "x": 320, "width": 250},
+            {"index": 2, "type": "column_3", "x": 580, "width": 250},
+            {"index": 3, "type": "column_4", "x": 840, "width": 250},
+            {"index": 4, "type": "column_5", "x": 1140, "width": 60},  # ghost column
+        ]
+        cells = [
+            _make_cell("word", x=100, y=350, w=60, h=25, col_index=0),
+            _make_cell("Wort", x=360, y=350, w=60, h=25, col_index=1),
+            _make_cell("txt", x=620, y=350, w=50, h=25, col_index=2),
+            _make_cell("abc", x=880, y=350, w=50, h=25, col_index=3),
+            _make_cell("|", x=1148, y=350, w=4, h=25, col_index=4),   # ghost
+            _make_cell("l", x=1149, y=400, w=3, h=25, col_index=4),   # ghost
+        ]
+        wr = _make_word_result(cells, columns_used=cols)
+        removed = _filter_border_ghost_words(wr, [box])
+
+        assert removed == 2
+        assert len(wr["columns_used"]) == 4  # column 5 removed
+        assert wr["grid_shape"]["cols"] == 4
+
+    def test_columns_reindexed_after_removal(self):
+        """After removing a middle column, indices should be sequential."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cols = [
+            {"index": 0, "type": "column_1", "x": 60, "width": 200},
+            {"index": 1, "type": "column_2", "x": 280, "width": 30},  # border col
+            {"index": 2, "type": "column_3", "x": 400, "width": 200},
+        ]
+        # Column 1 only has ghost cells
+        cells = [
+            _make_cell("hello", x=100, y=350, w=60, h=25, col_index=0),
+            # This cell is NOT on a border so it won't be filtered by the ghost filter
+            # For this test, put a ghost on the box border
+            _make_cell("|", x=49, y=350, w=3, h=25, col_index=1),
+            _make_cell("world", x=440, y=350, w=60, h=25, col_index=2),
+        ]
+        wr = _make_word_result(cells, columns_used=cols)
+        _filter_border_ghost_words(wr, [box])
+
+        # Column 1 should be removed, column 2 becomes column 1
+        assert len(wr["columns_used"]) == 2
+        assert wr["columns_used"][0]["index"] == 0
+        assert wr["columns_used"][1]["index"] == 1
+        # Remaining cells should have updated col_index
+        assert wr["cells"][0]["col_index"] == 0
+        assert wr["cells"][1]["col_index"] == 1
+
+    def test_no_columns_used_no_crash(self):
+        """If columns_used is None, column cleanup should be skipped."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cells = [_make_cell("|", x=48, y=350, w=3, h=25)]
+        wr = _make_word_result(cells, columns_used=None)
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 1
+
+    def test_occupied_columns_kept(self):
+        """Columns that still have cells after filtering should be kept."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cols = [
+            {"index": 0, "type": "column_en", "x": 60, "width": 250},
+            {"index": 1, "type": "column_de", "x": 320, "width": 250},
+        ]
+        cells = [
+            _make_cell("word", x=100, y=350, w=60, h=25, col_index=0),
+            _make_cell("Wort", x=360, y=350, w=60, h=25, col_index=1),
+        ]
+        wr = _make_word_result(cells, columns_used=cols)
+        removed = _filter_border_ghost_words(wr, [box])
+
+        assert removed == 0
+        assert len(wr["columns_used"]) == 2
+
+    def test_single_column_not_removed(self):
+        """A single remaining column should never be removed."""
+        box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
+        cols = [{"index": 0, "type": "column_text", "x": 60, "width": 1000}]
+        cells = [_make_cell("|", x=49, y=350, w=3, h=25, col_index=0)]
+        wr = _make_word_result(cells, columns_used=cols)
+        # Even if the only cell is filtered, we don't remove the last column
+        removed = _filter_border_ghost_words(wr, [box])
+        assert removed == 1
+        # columns_used should still have 1 entry (we skip cleanup for len <= 1)
+        assert len(wr["columns_used"]) == 1
@@ -0,0 +1,320 @@
+"""
+Tests for cv_graphic_detect.py — graphic element detection.
+
+Lizenz: Apache 2.0
+"""
+
+import numpy as np
+import pytest
+
+import cv2
+
+from cv_graphic_detect import detect_graphic_elements, GraphicElement, _dominant_color
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _white_image(width: int = 1200, height: int = 1800) -> np.ndarray:
+    """Create a plain white BGR image."""
+    return np.ones((height, width, 3), dtype=np.uint8) * 255
+
+
+def _draw_colored_circle(img: np.ndarray, cx: int, cy: int, radius: int,
+                         color_bgr: tuple) -> np.ndarray:
+    """Draw a filled colored circle (simulates a balloon / graphic)."""
+    cv2.circle(img, (cx, cy), radius, color_bgr, -1)
+    return img
+
+
+def _draw_colored_region(img: np.ndarray, x: int, y: int, w: int, h: int,
+                         color_bgr: tuple) -> np.ndarray:
+    """Draw a filled colored rectangle (simulates an image region)."""
+    cv2.rectangle(img, (x, y), (x + w, y + h), color_bgr, -1)
+    return img
+
+
+def _draw_black_illustration(img: np.ndarray, x: int, y: int, w: int, h: int) -> np.ndarray:
+    """Draw a large black filled shape (simulates a black-ink illustration)."""
+    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), -1)
+    return img
+
+
+def _word_box(left: int, top: int, width: int, height: int) -> dict:
+    """Create a word box dict matching OCR output format."""
+    return {"left": left, "top": top, "width": width, "height": height}
+
+
+# ---------------------------------------------------------------------------
+# _dominant_color tests
+# ---------------------------------------------------------------------------
+
+class TestDominantColor:
+    """Tests for the _dominant_color helper."""
+
+    def test_empty_array(self):
+        hsv = np.array([], dtype=np.uint8).reshape(0, 3)
+        name, hex_val = _dominant_color(hsv)
+        assert name == "black"
+        assert hex_val == "#000000"
+
+    def test_low_saturation_returns_black(self):
+        """Pixels with low saturation should be classified as black."""
+        # HSV: H=90 (irrelevant), S=10 (low), V=200
+        hsv = np.full((50, 50, 3), [90, 10, 200], dtype=np.uint8)
+        name, _ = _dominant_color(hsv)
+        assert name == "black"
+
+    def test_red_hue(self):
+        """Pixels with hue ~0-10 or ~170+ should be red."""
+        hsv = np.full((50, 50, 3), [5, 200, 200], dtype=np.uint8)
+        name, hex_val = _dominant_color(hsv)
+        assert name == "red"
+        assert hex_val == "#dc2626"
+
+    def test_blue_hue(self):
+        """Pixels with hue ~100 should be blue."""
+        hsv = np.full((50, 50, 3), [110, 200, 200], dtype=np.uint8)
+        name, hex_val = _dominant_color(hsv)
+        assert name == "blue"
+        assert hex_val == "#2563eb"
+
+    def test_green_hue(self):
+        """Pixels with hue ~60 should be green."""
+        hsv = np.full((50, 50, 3), [60, 200, 200], dtype=np.uint8)
+        name, hex_val = _dominant_color(hsv)
+        assert name == "green"
+        assert hex_val == "#16a34a"
+
+    def test_yellow_hue(self):
+        """Pixels with hue ~30 should be yellow."""
+        hsv = np.full((50, 50, 3), [30, 200, 200], dtype=np.uint8)
+        name, hex_val = _dominant_color(hsv)
+        assert name == "yellow"
+
+    def test_orange_hue(self):
+        """Pixels with hue ~15 should be orange."""
+        hsv = np.full((50, 50, 3), [15, 200, 200], dtype=np.uint8)
+        name, hex_val = _dominant_color(hsv)
+        assert name == "orange"
+
+    def test_purple_hue(self):
+        """Pixels with hue ~140 should be purple."""
+        hsv = np.full((50, 50, 3), [140, 200, 200], dtype=np.uint8)
+        name, hex_val = _dominant_color(hsv)
+        assert name == "purple"
+
+
+# ---------------------------------------------------------------------------
+# detect_graphic_elements tests
+# ---------------------------------------------------------------------------
+
+class TestDetectGraphicElements:
+    """Tests for the detect_graphic_elements() function."""
+
+    def test_none_image_returns_empty(self):
+        """None input should return empty list."""
+        result = detect_graphic_elements(None, [])
+        assert result == []
+
+    def test_white_image_no_graphics(self):
+        """A plain white image should produce no graphic elements."""
+        img = _white_image()
+        result = detect_graphic_elements(img, [])
+        assert result == []
+
+    def test_colored_region_detected_as_image(self):
+        """A large colored rectangle should be detected as an image."""
+        img = _white_image()
+        # Draw a large red region (not text-like)
+        _draw_colored_region(img, x=100, y=300, w=200, h=200, color_bgr=(0, 0, 220))
+
+        result = detect_graphic_elements(img, word_boxes=[])
+
+        assert len(result) >= 1
+        graphic = result[0]
+        assert isinstance(graphic, GraphicElement)
+        assert graphic.shape == "image"
+        assert graphic.color_name == "red"
+        assert graphic.confidence > 0
+
+    def test_colored_text_excluded_by_word_overlap(self):
+        """Colored regions that overlap heavily with word boxes should be skipped."""
+        img = _white_image()
+        # Draw colored region
+        _draw_colored_region(img, x=100, y=300, w=400, h=50, color_bgr=(0, 0, 220))
+
+        # Word boxes covering >50% of the colored region
+        words = [
+            _word_box(100, 300, 200, 50),
+            _word_box(300, 300, 200, 50),
+        ]
+
+        result = detect_graphic_elements(img, word_boxes=words)
+
+        # Should be filtered out (word overlap > 50%)
+        for g in result:
+            # If anything is detected at that location, overlap check failed
+            if g.x >= 90 and g.x <= 110 and g.y >= 290 and g.y <= 310:
+                pytest.fail("Colored text region should be excluded by word overlap")
+
+    def test_colored_graphic_with_low_word_overlap_kept(self):
+        """A colored region with low word overlap should be kept."""
+        img = _white_image()
+        # Draw a large colored circle
+        _draw_colored_circle(img, cx=300, cy=400, radius=80, color_bgr=(0, 200, 0))
+
+        # One small word box overlapping only a tiny portion
+        words = [_word_box(250, 390, 30, 20)]
+
+        result = detect_graphic_elements(img, word_boxes=words)
+
+        assert len(result) >= 1
+        assert result[0].shape == "image"
+        assert result[0].color_name == "green"
+
+    def test_black_illustration_detected(self):
+        """A large black filled area should be detected as illustration."""
+        img = _white_image()
+        # Draw a large black rectangle (simulating an illustration)
+        _draw_black_illustration(img, x=200, y=400, w=300, h=300)
+
+        result = detect_graphic_elements(img, word_boxes=[])
+
+        assert len(result) >= 1
+        illust = [g for g in result if g.shape == "illustration"]
+        assert len(illust) >= 1
+        assert illust[0].color_name == "black"
+
+    def test_black_illustration_excluded_by_word_boxes(self):
+        """Black ink in word regions should NOT be detected as illustration."""
+        img = _white_image()
+        # Draw black text-like region
+        _draw_black_illustration(img, x=100, y=300, w=400, h=60)
+
+        # Word boxes covering the same area
+        words = [
+            _word_box(100, 300, 200, 60),
+            _word_box(300, 300, 200, 60),
+        ]
+
+        result = detect_graphic_elements(img, word_boxes=words)
+
+        # Should be empty — the word exclusion mask covers the ink
+        illust = [g for g in result if g.shape == "illustration"]
+        assert len(illust) == 0
+
+    def test_tiny_colored_region_filtered(self):
+        """Very small colored regions (<200 colored pixels) should be filtered."""
+        img = _white_image()
+        # Draw a tiny colored dot (5x5 pixels)
+        _draw_colored_region(img, x=500, y=500, w=5, h=5, color_bgr=(220, 0, 0))
+
+        result = detect_graphic_elements(img, word_boxes=[])
+        assert result == []
+
+    def test_page_spanning_region_filtered(self):
+        """Colored regions spanning >50% of width/height should be skipped."""
+        img = _white_image(width=1200, height=1800)
+        # Draw a region wider than 50% of the image
+        _draw_colored_region(img, x=50, y=300, w=700, h=100, color_bgr=(0, 0, 220))
+
+        result = detect_graphic_elements(img, word_boxes=[])
+
+        # Should be filtered as page-spanning
+        assert result == []
+
+    def test_multiple_graphics_detected(self):
+        """Multiple separate colored regions should all be detected."""
+        img = _white_image()
+        # Three separate colored circles
+        _draw_colored_circle(img, cx=200, cy=300, radius=60, color_bgr=(0, 0, 220))
+        _draw_colored_circle(img, cx=500, cy=300, radius=60, color_bgr=(0, 200, 0))
+        _draw_colored_circle(img, cx=200, cy=600, radius=60, color_bgr=(220, 0, 0))
+
+        result = detect_graphic_elements(img, word_boxes=[])
+
+        # Should detect at least 2 (some may merge if dilation connects them)
+        assert len(result) >= 2
+
+    def test_results_sorted_by_area_descending(self):
+        """Results should be sorted by area, largest first."""
+        img = _white_image()
+        # Small circle
+        _draw_colored_circle(img, cx=200, cy=300, radius=30, color_bgr=(0, 0, 220))
+        # Large circle
+        _draw_colored_circle(img, cx=600, cy=800, radius=100, color_bgr=(0, 200, 0))
+
+        result = detect_graphic_elements(img, word_boxes=[])
+
+        if len(result) >= 2:
+            assert result[0].area >= result[1].area
+
+    def test_max_elements_limit(self):
+        """Should respect max_elements parameter."""
+        img = _white_image(width=2000, height=2000)
+        # Draw many colored regions
+        for i in range(10):
+            _draw_colored_circle(img, cx=100 + i * 180, cy=300, radius=40,
+                                 color_bgr=(0, 0, 220))
+
+        result = detect_graphic_elements(img, word_boxes=[], max_elements=3)
+
+        assert len(result) <= 3
+
+    def test_detected_boxes_excluded_from_ink(self):
+        """Detected box regions should be excluded from ink illustration detection."""
+        img = _white_image()
+        # Draw a black rectangle well inside the "box" area (8px inset is used)
+        _draw_black_illustration(img, x=120, y=320, w=360, h=160)
+
+        # Mark the outer box — the 8px inset still covers the drawn region
+        detected_boxes = [{"x": 100, "y": 300, "w": 400, "h": 200}]
+
+        result = detect_graphic_elements(img, word_boxes=[], detected_boxes=detected_boxes)
+
+        illust = [g for g in result if g.shape == "illustration"]
+        assert len(illust) == 0
+
+    def test_deduplication_overlapping_regions(self):
+        """Overlapping elements should be deduplicated."""
+        img = _white_image()
+        # Two overlapping colored regions
+        _draw_colored_region(img, x=200, y=300, w=200, h=200, color_bgr=(0, 0, 220))
+        _draw_colored_region(img, x=250, y=350, w=200, h=200, color_bgr=(0, 0, 220))
+
+        result = detect_graphic_elements(img, word_boxes=[])
+
+        # Should be merged/deduplicated into 1 element (heavy dilation merges them)
+        assert len(result) <= 2
+
+    def test_graphicelement_dataclass_fields(self):
+        """GraphicElement should have all expected fields."""
+        elem = GraphicElement(
+            x=10, y=20, width=100, height=80,
+            area=5000, shape="image",
+            color_name="red", color_hex="#dc2626",
+            confidence=0.85,
+        )
+        assert elem.x == 10
+        assert elem.y == 20
+        assert elem.width == 100
+        assert elem.height == 80
+        assert elem.area == 5000
+        assert elem.shape == "image"
+        assert elem.color_name == "red"
+        assert elem.color_hex == "#dc2626"
+        assert elem.confidence == 0.85
+        assert elem.contour is None
+
+    def test_small_ink_area_filtered(self):
+        """Black ink areas smaller than 5000px should be filtered."""
+        img = _white_image()
+        # Small black mark (50x50 = 2500 area, below 5000 threshold)
+        _draw_black_illustration(img, x=500, y=500, w=50, h=50)
+
+        result = detect_graphic_elements(img, word_boxes=[])
+
+        illust = [g for g in result if g.shape == "illustration"]
+        assert len(illust) == 0