feat: add border ghost filter + graphic detection tests + structure overlay

- Add _filter_border_ghost_words() to remove OCR artefacts from box borders
  (vertical + horizontal edge detection, column cleanup, re-indexing)
- Add 20 tests for border ghost filter (basic filtering + column cleanup)
- Add 24 tests for cv_graphic_detect (color detection, word overlap, boxes)
- Clean up cv_graphic_detect.py logging (per-candidate → DEBUG)
- Add structure overlay layer to StepReconstruction (boxes + graphics toggle)
- Show border_ghosts_removed badge in StepStructureDetection
- Update MkDocs with structure detection documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-16 18:28:53 +01:00
parent 6668661895
commit 729ebff63c
8 changed files with 1006 additions and 29 deletions

View File

@@ -219,7 +219,7 @@ export interface StructureGraphic {
w: number
h: number
area: number
shape: string // arrow, circle, line, exclamation, dot, icon, illustration
shape: string // image, illustration
color_name: string
color_hex: string
confidence: number
@@ -235,6 +235,7 @@ export interface StructureResult {
color_pixel_counts: Record<string, number>
has_words: boolean
word_count: number
border_ghosts_removed?: number
duration_seconds: number
}

View File

@@ -2,7 +2,7 @@
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import dynamic from 'next/dynamic'
import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem, StructureResult, StructureBox, StructureGraphic } from '@/app/(admin)/ai/ocr-pipeline/types'
import { usePixelWordPositions } from './usePixelWordPositions'
const KLAUSUR_API = '/klausur-api'
@@ -60,6 +60,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
const [fontScale, setFontScale] = useState(0.7)
const [globalBold, setGlobalBold] = useState(false)
const [imageRotation, setImageRotation] = useState<0 | 180>(0)
const [structureBoxes, setStructureBoxes] = useState<StructureBox[]>([])
const [structureGraphics, setStructureGraphics] = useState<StructureGraphic[]>([])
const [showStructure, setShowStructure] = useState(true)
const reconRef = useRef<HTMLDivElement>(null)
const [reconWidth, setReconWidth] = useState(0)
@@ -92,12 +95,15 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
// Track image natural height for font scaling
// Track image natural dimensions for font scaling and structure layer
const handleImageLoad = useCallback(() => {
if (imageRef.current) {
setImageNaturalH(imageRef.current.naturalHeight)
if (!imageNaturalSize) {
setImageNaturalSize({ w: imageRef.current.naturalWidth, h: imageRef.current.naturalHeight })
}
}
}, [])
}, [imageNaturalSize])
const loadSessionData = async () => {
if (!sessionId) return
@@ -132,6 +138,13 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
setUndoStack([])
setRedoStack([])
// Load structure result (boxes, graphics, colors)
const structureResult: StructureResult | undefined = data.structure_result
if (structureResult) {
setStructureBoxes(structureResult.boxes || [])
setStructureGraphics(structureResult.graphics || [])
}
// Check for parent with boxes (sub-sessions + zones)
const columnResult: ColumnResult | undefined = data.column_result
const rowResult: RowResult | undefined = data.row_result
@@ -517,6 +530,65 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
return bboxPct
}
// Structure layer: boxes and graphic elements as background
const renderStructureLayer = (imgW: number, imgH: number) => {
if (!showStructure) return null
const hasElements = structureBoxes.length > 0 || structureGraphics.length > 0
if (!hasElements) return null
return (
<>
{/* Structure boxes */}
{structureBoxes.map((box, i) => {
const bgColor = box.bg_color_hex || '#6b7280'
return (
<div
key={`sbox-${i}`}
className="absolute pointer-events-none"
style={{
left: `${(box.x / imgW) * 100}%`,
top: `${(box.y / imgH) * 100}%`,
width: `${(box.w / imgW) * 100}%`,
height: `${(box.h / imgH) * 100}%`,
border: `${Math.max(1, box.border_thickness)}px solid ${bgColor}40`,
backgroundColor: `${bgColor}0a`,
borderRadius: '2px',
}}
/>
)
})}
{/* Graphic elements */}
{structureGraphics.map((g, i) => (
<div
key={`sgfx-${i}`}
className="absolute pointer-events-none"
style={{
left: `${(g.x / imgW) * 100}%`,
top: `${(g.y / imgH) * 100}%`,
width: `${(g.w / imgW) * 100}%`,
height: `${(g.h / imgH) * 100}%`,
border: `1px dashed ${g.color_hex}60`,
backgroundColor: `${g.color_hex}08`,
borderRadius: '2px',
}}
>
<span
className="absolute text-[8px] leading-none opacity-50"
style={{
top: '1px',
left: '2px',
color: g.color_hex,
}}
>
{g.shape === 'illustration' ? 'Illust' : 'Bild'}
</span>
</div>
))}
</>
)
}
// Overlay rendering helper
const renderOverlayMode = () => {
const imgW = imageNaturalSize?.w || 1
@@ -597,6 +669,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
)
})}
{/* Structure elements (boxes, graphics) */}
{renderStructureLayer(imgW, imgH)}
{/* Pixel-positioned words / editable inputs */}
{cells.map((cell) => {
const displayText = getDisplayText(cell)
@@ -831,6 +906,19 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
>
180°
</button>
{(structureBoxes.length > 0 || structureGraphics.length > 0) && (
<button
onClick={() => setShowStructure(v => !v)}
className={`px-2 py-1 text-xs border rounded transition-colors ${
showStructure
? 'border-violet-300 bg-violet-50 text-violet-600 dark:border-violet-700 dark:bg-violet-900/30 dark:text-violet-400'
: 'border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
}`}
title="Strukturelemente anzeigen"
>
Struktur
</button>
)}
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
</>
)}
@@ -851,6 +939,21 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
Leer
</button>
{/* Structure toggle */}
{(structureBoxes.length > 0 || structureGraphics.length > 0) && (
<button
onClick={() => setShowStructure(v => !v)}
className={`px-2 py-1 text-xs border rounded transition-colors ${
showStructure
? 'border-violet-300 bg-violet-50 text-violet-600 dark:border-violet-700 dark:bg-violet-900/30 dark:text-violet-400'
: 'border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
}`}
title="Strukturelemente anzeigen"
>
Struktur
</button>
)}
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
{/* Zoom controls */}
@@ -915,6 +1018,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
onLoad={handleImageLoad}
/>
{/* Structure elements (boxes, graphics) */}
{imageNaturalSize && renderStructureLayer(imageNaturalSize.w, imageNaturalSize.h)}
{/* Empty field markers */}
{showEmptyHighlight && cells
.filter(c => emptyCellIds.has(c.cellId))

View File

@@ -165,6 +165,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
{result.word_count} Woerter
</span>
)}
{(result.border_ghosts_removed ?? 0) > 0 && (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
{result.border_ghosts_removed} Rahmenlinien entfernt
</span>
)}
<span className="text-gray-400 text-xs ml-auto">
{result.image_width}x{result.image_height}px | {result.duration_seconds}s
</span>

View File

@@ -149,6 +149,8 @@ klausur-service/backend/
├── ocr_pipeline_api.py # FastAPI Router (Schritte 2-10)
├── orientation_crop_api.py # FastAPI Router (Schritte 1 + 4)
├── cv_box_detect.py # Box-Erkennung + Zonen-Aufteilung
├── cv_graphic_detect.py # Grafik-/Bilderkennung (Region-basiert)
├── cv_color_detect.py # Farbtext-Erkennung (HSV-Analyse)
├── cv_words_first.py # Words-First Grid Builder (bottom-up)
├── page_crop.py # Content-basierter Crop-Algorithmus
├── ocr_pipeline_session_store.py # PostgreSQL Persistence
@@ -177,7 +179,8 @@ admin-lehrer/
├── StepColumnDetection.tsx # Schritt 5: Spaltenerkennung
├── StepRowDetection.tsx # Schritt 6: Zeilenerkennung
├── StepWordRecognition.tsx # Schritt 7: Worterkennung
├── StepLlmReview.tsx # Schritt 8: Korrektur (SSE-Stream)
├── StepStructureDetection.tsx # Schritt 8: Strukturerkennung
├── StepLlmReview.tsx # Schritt 9: Korrektur (SSE-Stream)
├── StepReconstruction.tsx # Schritt 9: Rekonstruktion (Canvas + Overlay)
├── usePixelWordPositions.ts # Shared Hook: Pixel-basierte Wortpositionierung
├── FabricReconstructionCanvas.tsx # Fabric.js Editor
@@ -281,14 +284,21 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
| `skip_heal_gaps` | `false` | Zeilen-Luecken nicht heilen (Overlay-Modus) |
| `grid_method` | `v2` | Grid-Strategie: `v2` (top-down) oder `words_first` (bottom-up) |
### Schritt 8: Korrektur
### Schritt 8: Strukturerkennung
| Methode | Pfad | Beschreibung |
|---------|------|--------------|
| `POST` | `/sessions/{id}/detect-structure` | Boxen, Zonen, Farben und Grafiken erkennen |
| `GET` | `/sessions/{id}/image/structure-overlay` | Overlay mit allen Strukturelementen |
### Schritt 9: Korrektur
| Methode | Pfad | Beschreibung |
|---------|------|--------------|
| `POST` | `/sessions/{id}/llm-review?stream=true` | SSE-Stream Korrektur starten |
| `POST` | `/sessions/{id}/llm-review/apply` | Ausgewaehlte Korrekturen speichern |
### Schritt 9: Rekonstruktion
### Schritt 10: Rekonstruktion
| Methode | Pfad | Beschreibung |
|---------|------|--------------|
@@ -853,6 +863,93 @@ Change-Format:
---
## Schritt 8: Strukturerkennung (Detail)
Erkennt Boxen, Zonen, Farbregionen und grafische Elemente auf der Seite.
Laeuft **nach** der Worterkennung (Schritt 7), damit OCR-Wortpositionen
fuer die Unterscheidung von Text vs. Grafik zur Verfuegung stehen.
### Teilschritte
1. **Box-Erkennung** (`cv_box_detect.py`): Linien-Rahmen und farbige Hintergruende
2. **Zonen-Aufteilung** (`split_page_into_zones`): Seite in Box- und Content-Zonen aufteilen
3. **Farb-Analyse** (`cv_color_detect.py`): HSV-basierte Erkennung farbiger Textbereiche
4. **Grafik-Erkennung** (`cv_graphic_detect.py`): Nicht-Text-Grafiken identifizieren
### Grafik-Erkennung: Region-basierter Ansatz
Zwei Paesse trennen farbige Grafiken von farbigem Text und erkennen
schwarze Illustrationen:
**Pass 1 — Farbige Bildregionen:**
1. HSV-Saturation-Kanal extrahieren (Schwelle > 40)
- Schwarzer Text hat Saettigung ≈ 0 → unsichtbar auf diesem Kanal
2. Starke Dilation (25×25 Ellipse) verschmilzt nahe Farbpixel zu Regionen
3. Fuer jede Region: Wort-Ueberlappung pruefen
- \> 50 % Ueberlappung mit OCR-Woertern → farbiger Text → ueberspringen
- ≤ 50 % → farbige Grafik/Bild → behalten
4. Minimum 200 Farbpixel erforderlich (kein Rauschen)
5. Regionen > 50 % der Bildbreite oder -hoehe → Seitenumfassend → ueberspringen
**Pass 2 — Schwarze Illustrationen:**
1. Otsu-Binarisierung fuer Tinten-Maske
2. Ausschlusszonen: OCR-Woerter (5 px Padding) + erkannte Boxen (8 px Inset)
3. Farbige Pixel aus Pass 1 ebenfalls ausschliessen
4. Nur Konturen mit Flaeche > 5000 px und min(Breite, Hoehe) > 40 px
**Deduplizierung:** Ueberlappende Elemente (> 50 % IoU der kleineren
Bounding-Box) werden zusammengefasst. Ergebnis nach Flaeche absteigend
sortiert.
### Response-Format
```json
{
"boxes": [
{"x": 50, "y": 300, "w": 1100, "h": 200, "confidence": 0.85,
"border_thickness": 3, "bg_color_name": "blue", "bg_color_hex": "#2563eb"}
],
"zones": [
{"index": 0, "zone_type": "content", "x": 50, "y": 50, "w": 1100, "h": 250},
{"index": 1, "zone_type": "box", "x": 50, "y": 300, "w": 1100, "h": 200}
],
"graphics": [
{"x": 100, "y": 500, "w": 150, "h": 120, "area": 8500,
"shape": "image", "color_name": "red", "color_hex": "#dc2626",
"confidence": 0.72}
],
"color_pixel_counts": {"red": 1234, "blue": 5678},
"has_words": true,
"word_count": 96,
"duration_seconds": 0.45
}
```
### Grafik-Shape-Typen
| Shape | Quelle | Beschreibung |
|-------|--------|--------------|
| `image` | Pass 1 | Farbige Grafik/Bild (Ballons, Pfeile, Icons) |
| `illustration` | Pass 2 | Grosse schwarze Zeichnung/Illustration |
### Erkannte Farben
`red`, `orange`, `yellow`, `green`, `blue`, `purple`, `black`
— basierend auf dem Median-Hue der saturierten Pixel in der Region.
### Frontend-Anzeige
`StepStructureDetection.tsx` zeigt:
- Boxen-Liste mit Position, Hintergrundfarbe und Confidence
- Zonen-Uebersicht (Content vs. Box)
- Farb-Zusammenfassung (Pixel-Counts)
- Grafik-Liste mit Shape, Abmessungen, Farbe und Confidence
---
## Schritt 9: Rekonstruktion (Detail)
Drei Modi verfuegbar:
@@ -1263,6 +1360,7 @@ cd klausur-service/backend && pytest tests/test_paddle_kombi.py -v # 36 Tests
| Datum | Version | Aenderung |
|-------|---------|----------|
| 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. |
| 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. |
| 2026-03-12 | 4.4.0 | PaddleOCR Remote-Engine (`engine=paddle`): PP-OCRv5 Latin auf Hetzner x86_64. Neuer Microservice (`paddleocr-service/`), HTTP-Client (`paddleocr_remote.py`), Frontend-Dropdown-Option. Nutzt words_first Grid-Methode. |
| 2026-03-12 | 4.3.0 | Words-First Grid Builder (`cv_words_first.py`): Bottom-up-Algorithmus clustert Tesseract word_boxes direkt zu Spalten/Zeilen/Zellen. Neuer `grid_method` Parameter im `/words` Endpoint. Frontend-Toggle in StepWordRecognition. |

View File

@@ -121,10 +121,9 @@ def detect_graphic_elements(
return []
h, w = img_bgr.shape[:2]
img_area = h * w
logger.info("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
w, h, len(word_boxes), len(detected_boxes or []))
logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
w, h, len(word_boxes), len(detected_boxes or []))
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
candidates: List[GraphicElement] = []
@@ -161,7 +160,7 @@ def detect_graphic_elements(
contours_regions, _ = cv2.findContours(
region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
)
logger.info("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
for cnt in contours_regions:
bx, by, bw, bh = cv2.boundingRect(cnt)
@@ -172,7 +171,7 @@ def detect_graphic_elements(
# Skip page-spanning regions
if bw > w * 0.5 or bh > h * 0.5:
logger.info("GraphicDetect PASS1 SKIP page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
continue
bbox_area = bw * bh
@@ -188,8 +187,8 @@ def detect_graphic_elements(
# If most of the region is covered by word boxes → colored text, skip
if word_overlap > 0.5:
logger.info("GraphicDetect PASS1 SKIP text region (%d,%d) %dx%d word_overlap=%.0f%%",
bx, by, bw, bh, word_overlap * 100)
logger.debug("GraphicDetect PASS1 skip text region (%d,%d) %dx%d overlap=%.0f%%",
bx, by, bw, bh, word_overlap * 100)
continue
# Need a minimum number of colored pixels (not just dilated area)
@@ -209,8 +208,7 @@ def detect_graphic_elements(
density = color_pixel_count / bbox_area if bbox_area > 0 else 0
conf = min(0.95, 0.5 + density * 0.5)
logger.info("GraphicDetect PASS1 ACCEPT image at (%d,%d) %dx%d "
"color_px=%d word_overlap=%.0f%% color=%s",
logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d overlap=%.0f%% %s",
bx, by, bw, bh, color_pixel_count, word_overlap * 100, color_name)
candidates.append(GraphicElement(
x=bx, y=by, width=bw, height=bh,
@@ -256,7 +254,7 @@ def detect_graphic_elements(
contours_ink, _ = cv2.findContours(
ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
)
logger.info("GraphicDetect PASS2 (ink): %d contours", len(contours_ink))
logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink))
for cnt in contours_ink:
area = cv2.contourArea(cnt)
@@ -267,8 +265,8 @@ def detect_graphic_elements(
if bw > w * 0.8 or bh > h * 0.8:
continue
logger.info("GraphicDetect PASS2 ACCEPT illustration at (%d,%d) %dx%d area=%d",
bx, by, bw, bh, int(area))
logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d",
bx, by, bw, bh, int(area))
candidates.append(GraphicElement(
x=bx, y=by, width=bw, height=bh,
area=int(area), shape="illustration",

View File

@@ -1202,6 +1202,147 @@ async def detect_type(session_id: str):
return {"session_id": session_id, **result_dict}
# ---------------------------------------------------------------------------
# Border-ghost word filter
# ---------------------------------------------------------------------------
# Characters that OCR produces when reading box-border lines.
_BORDER_GHOST_CHARS = set("|1lI![](){}iíì/\\-—_~.,;:'\"")
def _filter_border_ghost_words(
word_result: Dict,
boxes: List,
) -> int:
"""Remove OCR words that are actually box border lines.
A word is considered a border ghost when it sits on a known box edge
(left, right, top, or bottom) and looks like a line artefact (narrow
aspect ratio or text consists only of line-like characters).
After removing ghost cells, columns that have become empty are also
removed from ``columns_used`` so the grid no longer shows phantom
columns.
Modifies *word_result* in-place and returns the number of removed cells.
"""
if not boxes or not word_result:
return 0
cells = word_result.get("cells")
if not cells:
return 0
# Build border bands — vertical (X) and horizontal (Y)
x_bands = [] # list of (x_lo, x_hi)
y_bands = [] # list of (y_lo, y_hi)
for b in boxes:
bx = b.x if hasattr(b, "x") else b.get("x", 0)
by = b.y if hasattr(b, "y") else b.get("y", 0)
bw = b.width if hasattr(b, "width") else b.get("w", b.get("width", 0))
bh = b.height if hasattr(b, "height") else b.get("h", b.get("height", 0))
bt = b.border_thickness if hasattr(b, "border_thickness") else b.get("border_thickness", 3)
margin = max(bt * 2, 10) + 6 # generous margin
# Vertical edges (left / right)
x_bands.append((bx - margin, bx + margin))
x_bands.append((bx + bw - margin, bx + bw + margin))
# Horizontal edges (top / bottom)
y_bands.append((by - margin, by + margin))
y_bands.append((by + bh - margin, by + bh + margin))
img_w = word_result.get("image_width", 1)
img_h = word_result.get("image_height", 1)
def _is_ghost(cell: Dict) -> bool:
text = (cell.get("text") or "").strip()
if not text:
return False
# Compute absolute pixel position
if cell.get("bbox_px"):
px = cell["bbox_px"]
cx = px["x"] + px["w"] / 2
cy = px["y"] + px["h"] / 2
cw = px["w"]
ch = px["h"]
elif cell.get("bbox_pct"):
pct = cell["bbox_pct"]
cx = (pct["x"] / 100) * img_w + (pct["w"] / 100) * img_w / 2
cy = (pct["y"] / 100) * img_h + (pct["h"] / 100) * img_h / 2
cw = (pct["w"] / 100) * img_w
ch = (pct["h"] / 100) * img_h
else:
return False
# Check if center sits on a vertical or horizontal border
on_vertical = any(lo <= cx <= hi for lo, hi in x_bands)
on_horizontal = any(lo <= cy <= hi for lo, hi in y_bands)
if not on_vertical and not on_horizontal:
return False
# Very short text (1-2 chars) on a border → very likely ghost
if len(text) <= 2:
# Narrow vertically (line-like) or narrow horizontally (dash-like)?
if ch > 0 and cw / ch < 0.5:
return True
if cw > 0 and ch / cw < 0.5:
return True
# Text is only border-ghost characters?
if all(c in _BORDER_GHOST_CHARS for c in text):
return True
# Longer text but still only ghost chars and very narrow
if all(c in _BORDER_GHOST_CHARS for c in text):
if ch > 0 and cw / ch < 0.35:
return True
if cw > 0 and ch / cw < 0.35:
return True
return True # all ghost chars on a border → remove
return False
before = len(cells)
word_result["cells"] = [c for c in cells if not _is_ghost(c)]
removed = before - len(word_result["cells"])
# --- Remove empty columns from columns_used ---
columns_used = word_result.get("columns_used")
if removed and columns_used and len(columns_used) > 1:
remaining_cells = word_result["cells"]
occupied_cols = {c.get("col_index") for c in remaining_cells}
before_cols = len(columns_used)
columns_used = [col for col in columns_used if col.get("index") in occupied_cols]
# Re-index columns and remap cell col_index values
if len(columns_used) < before_cols:
old_to_new = {}
for new_i, col in enumerate(columns_used):
old_to_new[col["index"]] = new_i
col["index"] = new_i
for cell in remaining_cells:
old_ci = cell.get("col_index")
if old_ci in old_to_new:
cell["col_index"] = old_to_new[old_ci]
word_result["columns_used"] = columns_used
logger.info("border-ghost: removed %d empty column(s), %d remaining",
before_cols - len(columns_used), len(columns_used))
if removed:
# Update summary counts
summary = word_result.get("summary", {})
summary["total_cells"] = len(word_result["cells"])
summary["non_empty_cells"] = sum(1 for c in word_result["cells"] if c.get("text"))
word_result["summary"] = summary
gs = word_result.get("grid_shape", {})
gs["total_cells"] = len(word_result["cells"])
if columns_used is not None:
gs["cols"] = len(columns_used)
word_result["grid_shape"] = gs
return removed
# ---------------------------------------------------------------------------
# Structure Detection Endpoint
# ---------------------------------------------------------------------------
@@ -1236,10 +1377,6 @@ async def detect_structure(session_id: str):
for cell in word_result["cells"]:
for wb in (cell.get("word_boxes") or []):
words.append(wb)
logger.info("detect-structure: word_result present=%s, cells=%d, word_boxes extracted=%d",
word_result is not None,
len(word_result.get("cells", [])) if word_result else 0,
len(words))
# If no words yet, use image dimensions with small margin
if words:
content_x = max(0, min(int(wb["left"]) for wb in words))
@@ -1319,6 +1456,15 @@ async def detect_structure(session_id: str):
detected_boxes=box_dicts,
)
# --- Filter border-ghost words from OCR result ---
ghost_count = 0
if boxes and word_result:
ghost_count = _filter_border_ghost_words(word_result, boxes)
if ghost_count:
logger.info("detect-structure: removed %d border-ghost words", ghost_count)
await update_session_db(session_id, word_result=word_result)
cached["word_result"] = word_result
duration = time.time() - t0
result_dict = {
@@ -1361,6 +1507,7 @@ async def detect_structure(session_id: str):
"color_pixel_counts": color_summary,
"has_words": len(words) > 0,
"word_count": len(words),
"border_ghosts_removed": ghost_count,
"duration_seconds": round(duration, 2),
}
@@ -1806,12 +1953,7 @@ async def _get_structure_overlay(session_id: str) -> Response:
# --- Draw graphic elements ---
graphics_data = structure.get("graphics", [])
shape_icons = {
"arrow": "ARROW",
"circle": "CIRCLE",
"line": "LINE",
"exclamation": "!",
"dot": "DOT",
"icon": "ICON",
"image": "IMAGE",
"illustration": "ILLUST",
}
for gfx in graphics_data:

View File

@@ -0,0 +1,307 @@
"""
Tests for _filter_border_ghost_words() — removes OCR artefacts from box borders.
When OCR reads a scanned document, box border lines (vertical/horizontal
strokes) are often misrecognised as characters like '|', '1', 'l', '-'.
These phantom words create spurious columns/rows in the grid. The filter
removes them by checking if a word sits on a known box border and looks
like a line artefact.
Lizenz: Apache 2.0
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from ocr_pipeline_api import _filter_border_ghost_words, _BORDER_GHOST_CHARS
from cv_vocab_types import DetectedBox
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_cell(text: str, x: int, y: int, w: int, h: int,
col_index: int = 0) -> dict:
"""Create a cell dict with bbox_px matching the word recognition output."""
return {
"cell_id": f"c_{x}_{y}",
"text": text,
"bbox_px": {"x": x, "y": y, "w": w, "h": h},
"bbox_pct": {
"x": x / 12, "y": y / 18,
"w": w / 12, "h": h / 18,
},
"confidence": 80,
"row_index": 0,
"col_index": col_index,
}
def _make_word_result(cells: list, img_w: int = 1200, img_h: int = 1800,
columns_used: list = None) -> dict:
return {
"cells": cells,
"image_width": img_w,
"image_height": img_h,
"columns_used": columns_used,
"summary": {
"total_cells": len(cells),
"non_empty_cells": sum(1 for c in cells if c.get("text")),
},
"grid_shape": {
"total_cells": len(cells),
"cols": len(columns_used) if columns_used else 1,
},
}
def _make_box(x: int, y: int, w: int, h: int, bt: int = 3) -> DetectedBox:
return DetectedBox(x=x, y=y, width=w, height=h, confidence=0.9, border_thickness=bt)
# ---------------------------------------------------------------------------
# Basic filtering tests
# ---------------------------------------------------------------------------
class TestBorderGhostFilter:
"""Tests for the _filter_border_ghost_words() function."""
def test_no_boxes_no_change(self):
"""Without boxes, nothing should be filtered."""
cells = [_make_cell("hello", 100, 200, 80, 30)]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [])
assert removed == 0
assert len(wr["cells"]) == 1
def test_no_word_result_no_crash(self):
removed = _filter_border_ghost_words(None, [_make_box(50, 300, 1100, 200)])
assert removed == 0
def test_empty_cells_no_crash(self):
wr = _make_word_result([])
removed = _filter_border_ghost_words(wr, [_make_box(50, 300, 1100, 200)])
assert removed == 0
def test_pipe_on_left_border_removed(self):
"""A '|' character sitting on the left border of a box should be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cells = [
_make_cell("|", x=48, y=350, w=3, h=25),
_make_cell("hello", x=200, y=350, w=80, h=25),
]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 1
assert wr["cells"][0]["text"] == "hello"
def test_pipe_on_right_border_removed(self):
"""A '|' character on the right border should be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cells = [
_make_cell("|", x=1148, y=350, w=4, h=25),
_make_cell("world", x=600, y=350, w=80, h=25),
]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 1
assert wr["cells"][0]["text"] == "world"
def test_digit_1_on_border_narrow_removed(self):
"""A narrow '1' on a box border should be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cells = [_make_cell("1", x=49, y=400, w=5, h=20)]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 1
def test_dash_on_horizontal_border_removed(self):
"""A '-' on the bottom horizontal border should be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
# Bottom border at y=500, dash at y=498
cells = [_make_cell("-", x=600, y=498, w=20, h=4)]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 1
def test_real_word_on_border_not_removed(self):
"""A normal word near a border should NOT be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cells = [_make_cell("Tip", x=52, y=350, w=60, h=25)]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 0
def test_word_far_from_border_not_removed(self):
"""Words far from any border should never be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cells = [_make_cell("|", x=600, y=400, w=3, h=25)]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 0
def test_multiple_ghosts_on_same_box(self):
"""Multiple ghost words on the same box should all be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cells = [
_make_cell("|", x=48, y=350, w=3, h=25),
_make_cell("l", x=1149, y=350, w=4, h=25),
_make_cell("text", x=400, y=350, w=80, h=25),
]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 2
assert len(wr["cells"]) == 1
assert wr["cells"][0]["text"] == "text"
def test_summary_updated_after_removal(self):
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cells = [
_make_cell("|", x=48, y=350, w=3, h=25),
_make_cell("hello", x=200, y=350, w=80, h=25),
]
wr = _make_word_result(cells)
_filter_border_ghost_words(wr, [box])
assert wr["summary"]["total_cells"] == 1
assert wr["grid_shape"]["total_cells"] == 1
def test_ghost_chars_covers_common_artefacts(self):
"""The ghost chars set should include common border-line OCR artefacts."""
expected = {"|", "1", "l", "I", "!", "[", "]", "-", "", "_", "/", "\\"}
assert expected.issubset(_BORDER_GHOST_CHARS)
def test_multiple_boxes(self):
box1 = _make_box(x=50, y=300, w=500, h=200, bt=3)
box2 = _make_box(x=600, y=300, w=500, h=200, bt=3)
cells = [
_make_cell("|", x=49, y=350, w=3, h=25),
_make_cell("I", x=599, y=350, w=4, h=25),
_make_cell("real", x=300, y=350, w=80, h=25),
]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box1, box2])
assert removed == 2
def test_uses_bbox_pct_fallback(self):
"""Should work with bbox_pct when bbox_px is not available."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cell = {
"cell_id": "c_test",
"text": "|",
"bbox_pct": {"x": (48 / 1200) * 100, "y": (350 / 1800) * 100,
"w": (4 / 1200) * 100, "h": (25 / 1800) * 100},
"confidence": 80,
"col_index": 0,
}
wr = _make_word_result([cell])
removed = _filter_border_ghost_words(wr, [box])
assert removed == 1
def test_generous_margin_catches_offset_ghosts(self):
"""Even if OCR word is slightly offset from border, it should be caught."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
# Word 15px away from right border (at x=1135 vs border at x=1150)
cells = [_make_cell("|", x=1135, y=350, w=4, h=25)]
wr = _make_word_result(cells)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 1
# ---------------------------------------------------------------------------
# Column cleanup tests
# ---------------------------------------------------------------------------
class TestColumnCleanup:
"""Tests for empty column removal after ghost filtering."""
def test_empty_column_removed(self):
"""After filtering all cells of column 4, it should be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cols = [
{"index": 0, "type": "column_en", "x": 60, "width": 250},
{"index": 1, "type": "column_de", "x": 320, "width": 250},
{"index": 2, "type": "column_3", "x": 580, "width": 250},
{"index": 3, "type": "column_4", "x": 840, "width": 250},
{"index": 4, "type": "column_5", "x": 1140, "width": 60}, # ghost column
]
cells = [
_make_cell("word", x=100, y=350, w=60, h=25, col_index=0),
_make_cell("Wort", x=360, y=350, w=60, h=25, col_index=1),
_make_cell("txt", x=620, y=350, w=50, h=25, col_index=2),
_make_cell("abc", x=880, y=350, w=50, h=25, col_index=3),
_make_cell("|", x=1148, y=350, w=4, h=25, col_index=4), # ghost
_make_cell("l", x=1149, y=400, w=3, h=25, col_index=4), # ghost
]
wr = _make_word_result(cells, columns_used=cols)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 2
assert len(wr["columns_used"]) == 4 # column 5 removed
assert wr["grid_shape"]["cols"] == 4
def test_columns_reindexed_after_removal(self):
"""After removing a middle column, indices should be sequential."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cols = [
{"index": 0, "type": "column_1", "x": 60, "width": 200},
{"index": 1, "type": "column_2", "x": 280, "width": 30}, # border col
{"index": 2, "type": "column_3", "x": 400, "width": 200},
]
# Column 1 only has ghost cells
cells = [
_make_cell("hello", x=100, y=350, w=60, h=25, col_index=0),
# This cell is NOT on a border so it won't be filtered by the ghost filter
# For this test, put a ghost on the box border
_make_cell("|", x=49, y=350, w=3, h=25, col_index=1),
_make_cell("world", x=440, y=350, w=60, h=25, col_index=2),
]
wr = _make_word_result(cells, columns_used=cols)
_filter_border_ghost_words(wr, [box])
# Column 1 should be removed, column 2 becomes column 1
assert len(wr["columns_used"]) == 2
assert wr["columns_used"][0]["index"] == 0
assert wr["columns_used"][1]["index"] == 1
# Remaining cells should have updated col_index
assert wr["cells"][0]["col_index"] == 0
assert wr["cells"][1]["col_index"] == 1
def test_no_columns_used_no_crash(self):
"""If columns_used is None, column cleanup should be skipped."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cells = [_make_cell("|", x=48, y=350, w=3, h=25)]
wr = _make_word_result(cells, columns_used=None)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 1
def test_occupied_columns_kept(self):
"""Columns that still have cells after filtering should be kept."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cols = [
{"index": 0, "type": "column_en", "x": 60, "width": 250},
{"index": 1, "type": "column_de", "x": 320, "width": 250},
]
cells = [
_make_cell("word", x=100, y=350, w=60, h=25, col_index=0),
_make_cell("Wort", x=360, y=350, w=60, h=25, col_index=1),
]
wr = _make_word_result(cells, columns_used=cols)
removed = _filter_border_ghost_words(wr, [box])
assert removed == 0
assert len(wr["columns_used"]) == 2
def test_single_column_not_removed(self):
"""A single remaining column should never be removed."""
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
cols = [{"index": 0, "type": "column_text", "x": 60, "width": 1000}]
cells = [_make_cell("|", x=49, y=350, w=3, h=25, col_index=0)]
wr = _make_word_result(cells, columns_used=cols)
# Even if the only cell is filtered, we don't remove the last column
removed = _filter_border_ghost_words(wr, [box])
assert removed == 1
# columns_used should still have 1 entry (we skip cleanup for len <= 1)
assert len(wr["columns_used"]) == 1

View File

@@ -0,0 +1,320 @@
"""
Tests for cv_graphic_detect.py — graphic element detection.
Lizenz: Apache 2.0
"""
import numpy as np
import pytest
import cv2
from cv_graphic_detect import detect_graphic_elements, GraphicElement, _dominant_color
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _white_image(width: int = 1200, height: int = 1800) -> np.ndarray:
"""Create a plain white BGR image."""
return np.ones((height, width, 3), dtype=np.uint8) * 255
def _draw_colored_circle(img: np.ndarray, cx: int, cy: int, radius: int,
color_bgr: tuple) -> np.ndarray:
"""Draw a filled colored circle (simulates a balloon / graphic)."""
cv2.circle(img, (cx, cy), radius, color_bgr, -1)
return img
def _draw_colored_region(img: np.ndarray, x: int, y: int, w: int, h: int,
color_bgr: tuple) -> np.ndarray:
"""Draw a filled colored rectangle (simulates an image region)."""
cv2.rectangle(img, (x, y), (x + w, y + h), color_bgr, -1)
return img
def _draw_black_illustration(img: np.ndarray, x: int, y: int, w: int, h: int) -> np.ndarray:
"""Draw a large black filled shape (simulates a black-ink illustration)."""
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), -1)
return img
def _word_box(left: int, top: int, width: int, height: int) -> dict:
"""Create a word box dict matching OCR output format."""
return {"left": left, "top": top, "width": width, "height": height}
# ---------------------------------------------------------------------------
# _dominant_color tests
# ---------------------------------------------------------------------------
class TestDominantColor:
"""Tests for the _dominant_color helper."""
def test_empty_array(self):
hsv = np.array([], dtype=np.uint8).reshape(0, 3)
name, hex_val = _dominant_color(hsv)
assert name == "black"
assert hex_val == "#000000"
def test_low_saturation_returns_black(self):
"""Pixels with low saturation should be classified as black."""
# HSV: H=90 (irrelevant), S=10 (low), V=200
hsv = np.full((50, 50, 3), [90, 10, 200], dtype=np.uint8)
name, _ = _dominant_color(hsv)
assert name == "black"
def test_red_hue(self):
"""Pixels with hue ~0-10 or ~170+ should be red."""
hsv = np.full((50, 50, 3), [5, 200, 200], dtype=np.uint8)
name, hex_val = _dominant_color(hsv)
assert name == "red"
assert hex_val == "#dc2626"
def test_blue_hue(self):
"""Pixels with hue ~100 should be blue."""
hsv = np.full((50, 50, 3), [110, 200, 200], dtype=np.uint8)
name, hex_val = _dominant_color(hsv)
assert name == "blue"
assert hex_val == "#2563eb"
def test_green_hue(self):
"""Pixels with hue ~60 should be green."""
hsv = np.full((50, 50, 3), [60, 200, 200], dtype=np.uint8)
name, hex_val = _dominant_color(hsv)
assert name == "green"
assert hex_val == "#16a34a"
def test_yellow_hue(self):
"""Pixels with hue ~30 should be yellow."""
hsv = np.full((50, 50, 3), [30, 200, 200], dtype=np.uint8)
name, hex_val = _dominant_color(hsv)
assert name == "yellow"
def test_orange_hue(self):
"""Pixels with hue ~15 should be orange."""
hsv = np.full((50, 50, 3), [15, 200, 200], dtype=np.uint8)
name, hex_val = _dominant_color(hsv)
assert name == "orange"
def test_purple_hue(self):
"""Pixels with hue ~140 should be purple."""
hsv = np.full((50, 50, 3), [140, 200, 200], dtype=np.uint8)
name, hex_val = _dominant_color(hsv)
assert name == "purple"
# ---------------------------------------------------------------------------
# detect_graphic_elements tests
# ---------------------------------------------------------------------------
class TestDetectGraphicElements:
"""Tests for the detect_graphic_elements() function."""
def test_none_image_returns_empty(self):
"""None input should return empty list."""
result = detect_graphic_elements(None, [])
assert result == []
def test_white_image_no_graphics(self):
"""A plain white image should produce no graphic elements."""
img = _white_image()
result = detect_graphic_elements(img, [])
assert result == []
def test_colored_region_detected_as_image(self):
"""A large colored rectangle should be detected as an image."""
img = _white_image()
# Draw a large red region (not text-like)
_draw_colored_region(img, x=100, y=300, w=200, h=200, color_bgr=(0, 0, 220))
result = detect_graphic_elements(img, word_boxes=[])
assert len(result) >= 1
graphic = result[0]
assert isinstance(graphic, GraphicElement)
assert graphic.shape == "image"
assert graphic.color_name == "red"
assert graphic.confidence > 0
def test_colored_text_excluded_by_word_overlap(self):
"""Colored regions that overlap heavily with word boxes should be skipped."""
img = _white_image()
# Draw colored region
_draw_colored_region(img, x=100, y=300, w=400, h=50, color_bgr=(0, 0, 220))
# Word boxes covering >50% of the colored region
words = [
_word_box(100, 300, 200, 50),
_word_box(300, 300, 200, 50),
]
result = detect_graphic_elements(img, word_boxes=words)
# Should be filtered out (word overlap > 50%)
for g in result:
# If anything is detected at that location, overlap check failed
if g.x >= 90 and g.x <= 110 and g.y >= 290 and g.y <= 310:
pytest.fail("Colored text region should be excluded by word overlap")
def test_colored_graphic_with_low_word_overlap_kept(self):
"""A colored region with low word overlap should be kept."""
img = _white_image()
# Draw a large colored circle
_draw_colored_circle(img, cx=300, cy=400, radius=80, color_bgr=(0, 200, 0))
# One small word box overlapping only a tiny portion
words = [_word_box(250, 390, 30, 20)]
result = detect_graphic_elements(img, word_boxes=words)
assert len(result) >= 1
assert result[0].shape == "image"
assert result[0].color_name == "green"
def test_black_illustration_detected(self):
"""A large black filled area should be detected as illustration."""
img = _white_image()
# Draw a large black rectangle (simulating an illustration)
_draw_black_illustration(img, x=200, y=400, w=300, h=300)
result = detect_graphic_elements(img, word_boxes=[])
assert len(result) >= 1
illust = [g for g in result if g.shape == "illustration"]
assert len(illust) >= 1
assert illust[0].color_name == "black"
def test_black_illustration_excluded_by_word_boxes(self):
"""Black ink in word regions should NOT be detected as illustration."""
img = _white_image()
# Draw black text-like region
_draw_black_illustration(img, x=100, y=300, w=400, h=60)
# Word boxes covering the same area
words = [
_word_box(100, 300, 200, 60),
_word_box(300, 300, 200, 60),
]
result = detect_graphic_elements(img, word_boxes=words)
# Should be empty — the word exclusion mask covers the ink
illust = [g for g in result if g.shape == "illustration"]
assert len(illust) == 0
def test_tiny_colored_region_filtered(self):
"""Very small colored regions (<200 colored pixels) should be filtered."""
img = _white_image()
# Draw a tiny colored dot (5x5 pixels)
_draw_colored_region(img, x=500, y=500, w=5, h=5, color_bgr=(220, 0, 0))
result = detect_graphic_elements(img, word_boxes=[])
assert result == []
def test_page_spanning_region_filtered(self):
"""Colored regions spanning >50% of width/height should be skipped."""
img = _white_image(width=1200, height=1800)
# Draw a region wider than 50% of the image
_draw_colored_region(img, x=50, y=300, w=700, h=100, color_bgr=(0, 0, 220))
result = detect_graphic_elements(img, word_boxes=[])
# Should be filtered as page-spanning
assert result == []
def test_multiple_graphics_detected(self):
"""Multiple separate colored regions should all be detected."""
img = _white_image()
# Three separate colored circles
_draw_colored_circle(img, cx=200, cy=300, radius=60, color_bgr=(0, 0, 220))
_draw_colored_circle(img, cx=500, cy=300, radius=60, color_bgr=(0, 200, 0))
_draw_colored_circle(img, cx=200, cy=600, radius=60, color_bgr=(220, 0, 0))
result = detect_graphic_elements(img, word_boxes=[])
# Should detect at least 2 (some may merge if dilation connects them)
assert len(result) >= 2
def test_results_sorted_by_area_descending(self):
"""Results should be sorted by area, largest first."""
img = _white_image()
# Small circle
_draw_colored_circle(img, cx=200, cy=300, radius=30, color_bgr=(0, 0, 220))
# Large circle
_draw_colored_circle(img, cx=600, cy=800, radius=100, color_bgr=(0, 200, 0))
result = detect_graphic_elements(img, word_boxes=[])
if len(result) >= 2:
assert result[0].area >= result[1].area
def test_max_elements_limit(self):
"""Should respect max_elements parameter."""
img = _white_image(width=2000, height=2000)
# Draw many colored regions
for i in range(10):
_draw_colored_circle(img, cx=100 + i * 180, cy=300, radius=40,
color_bgr=(0, 0, 220))
result = detect_graphic_elements(img, word_boxes=[], max_elements=3)
assert len(result) <= 3
def test_detected_boxes_excluded_from_ink(self):
"""Detected box regions should be excluded from ink illustration detection."""
img = _white_image()
# Draw a black rectangle well inside the "box" area (8px inset is used)
_draw_black_illustration(img, x=120, y=320, w=360, h=160)
# Mark the outer box — the 8px inset still covers the drawn region
detected_boxes = [{"x": 100, "y": 300, "w": 400, "h": 200}]
result = detect_graphic_elements(img, word_boxes=[], detected_boxes=detected_boxes)
illust = [g for g in result if g.shape == "illustration"]
assert len(illust) == 0
def test_deduplication_overlapping_regions(self):
"""Overlapping elements should be deduplicated."""
img = _white_image()
# Two overlapping colored regions
_draw_colored_region(img, x=200, y=300, w=200, h=200, color_bgr=(0, 0, 220))
_draw_colored_region(img, x=250, y=350, w=200, h=200, color_bgr=(0, 0, 220))
result = detect_graphic_elements(img, word_boxes=[])
# Should be merged/deduplicated into 1 element (heavy dilation merges them)
assert len(result) <= 2
def test_graphicelement_dataclass_fields(self):
"""GraphicElement should have all expected fields."""
elem = GraphicElement(
x=10, y=20, width=100, height=80,
area=5000, shape="image",
color_name="red", color_hex="#dc2626",
confidence=0.85,
)
assert elem.x == 10
assert elem.y == 20
assert elem.width == 100
assert elem.height == 80
assert elem.area == 5000
assert elem.shape == "image"
assert elem.color_name == "red"
assert elem.color_hex == "#dc2626"
assert elem.confidence == 0.85
assert elem.contour is None
def test_small_ink_area_filtered(self):
"""Black ink areas smaller than 5000px should be filtered."""
img = _white_image()
# Small black mark (50x50 = 2500 area, below 5000 threshold)
_draw_black_illustration(img, x=500, y=500, w=50, h=50)
result = detect_graphic_elements(img, word_boxes=[])
illust = [g for g in result if g.shape == "illustration"]
assert len(illust) == 0