diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts index 2a205cd..ea0ba01 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts @@ -219,7 +219,7 @@ export interface StructureGraphic { w: number h: number area: number - shape: string // arrow, circle, line, exclamation, dot, icon, illustration + shape: string // image, illustration color_name: string color_hex: string confidence: number @@ -235,6 +235,7 @@ export interface StructureResult { color_pixel_counts: Record has_words: boolean word_count: number + border_ghosts_removed?: number duration_seconds: number } diff --git a/admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx b/admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx index a0a1cdc..d4f2ce5 100644 --- a/admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx @@ -2,7 +2,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react' import dynamic from 'next/dynamic' -import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem } from '@/app/(admin)/ai/ocr-pipeline/types' +import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem, StructureResult, StructureBox, StructureGraphic } from '@/app/(admin)/ai/ocr-pipeline/types' import { usePixelWordPositions } from './usePixelWordPositions' const KLAUSUR_API = '/klausur-api' @@ -60,6 +60,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp const [fontScale, setFontScale] = useState(0.7) const [globalBold, setGlobalBold] = useState(false) const [imageRotation, setImageRotation] = useState<0 | 180>(0) + const [structureBoxes, setStructureBoxes] = useState([]) + const [structureGraphics, setStructureGraphics] = useState([]) + const [showStructure, setShowStructure] = useState(true) const reconRef = useRef(null) const [reconWidth, setReconWidth] = useState(0) @@ -92,12 +95,15 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp // eslint-disable-next-line react-hooks/exhaustive-deps }, [sessionId]) - // Track image natural height for font scaling + // Track image natural dimensions for font scaling and structure layer const handleImageLoad = useCallback(() => { if (imageRef.current) { setImageNaturalH(imageRef.current.naturalHeight) + if (!imageNaturalSize) { + setImageNaturalSize({ w: imageRef.current.naturalWidth, h: imageRef.current.naturalHeight }) + } } - }, []) + }, [imageNaturalSize]) const loadSessionData = async () => { if (!sessionId) return @@ -132,6 +138,13 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp setUndoStack([]) setRedoStack([]) + // Load structure result (boxes, graphics, colors) + const structureResult: StructureResult | undefined = data.structure_result + if (structureResult) { + setStructureBoxes(structureResult.boxes || []) + setStructureGraphics(structureResult.graphics || []) + } + // Check for parent with boxes (sub-sessions + zones) const columnResult: ColumnResult | undefined = data.column_result const rowResult: RowResult | undefined = data.row_result @@ -517,6 +530,65 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp return bboxPct } + // Structure layer: boxes and graphic elements as background + const renderStructureLayer = (imgW: number, imgH: number) => { + if (!showStructure) return null + const hasElements = structureBoxes.length > 0 || structureGraphics.length > 0 + if (!hasElements) return null + + return ( + <> + {/* Structure boxes */} + {structureBoxes.map((box, i) => { + const bgColor = box.bg_color_hex || '#6b7280' + return ( +
+ ) + })} + + {/* Graphic elements */} + {structureGraphics.map((g, i) => ( +
+ + {g.shape === 'illustration' ? 'Illust' : 'Bild'} + +
+ ))} + + ) + } + // Overlay rendering helper const renderOverlayMode = () => { const imgW = imageNaturalSize?.w || 1 @@ -597,6 +669,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp ) })} + {/* Structure elements (boxes, graphics) */} + {renderStructureLayer(imgW, imgH)} + {/* Pixel-positioned words / editable inputs */} {cells.map((cell) => { const displayText = getDisplayText(cell) @@ -831,6 +906,19 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp > 180° + {(structureBoxes.length > 0 || structureGraphics.length > 0) && ( + + )}
)} @@ -851,6 +939,21 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp Leer + {/* Structure toggle */} + {(structureBoxes.length > 0 || structureGraphics.length > 0) && ( + + )} +
{/* Zoom controls */} @@ -915,6 +1018,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp onLoad={handleImageLoad} /> + {/* Structure elements (boxes, graphics) */} + {imageNaturalSize && renderStructureLayer(imageNaturalSize.w, imageNaturalSize.h)} + {/* Empty field markers */} {showEmptyHighlight && cells .filter(c => emptyCellIds.has(c.cellId)) diff --git a/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx b/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx index e902d55..73f2e48 100644 --- a/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx @@ -165,6 +165,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec {result.word_count} Woerter )} + {(result.border_ghosts_removed ?? 0) > 0 && ( + + {result.border_ghosts_removed} Rahmenlinien entfernt + + )} {result.image_width}x{result.image_height}px | {result.duration_seconds}s diff --git a/docs-src/services/klausur-service/OCR-Pipeline.md b/docs-src/services/klausur-service/OCR-Pipeline.md index 0796a3d..0daf297 100644 --- a/docs-src/services/klausur-service/OCR-Pipeline.md +++ b/docs-src/services/klausur-service/OCR-Pipeline.md @@ -149,6 +149,8 @@ klausur-service/backend/ ├── ocr_pipeline_api.py # FastAPI Router (Schritte 2-10) ├── orientation_crop_api.py # FastAPI Router (Schritte 1 + 4) ├── cv_box_detect.py # Box-Erkennung + Zonen-Aufteilung +├── cv_graphic_detect.py # Grafik-/Bilderkennung (Region-basiert) +├── cv_color_detect.py # Farbtext-Erkennung (HSV-Analyse) ├── cv_words_first.py # Words-First Grid Builder (bottom-up) ├── page_crop.py # Content-basierter Crop-Algorithmus ├── ocr_pipeline_session_store.py # PostgreSQL Persistence @@ -177,7 +179,8 @@ admin-lehrer/ ├── StepColumnDetection.tsx # Schritt 5: Spaltenerkennung ├── StepRowDetection.tsx # Schritt 6: Zeilenerkennung ├── StepWordRecognition.tsx # Schritt 7: Worterkennung - ├── StepLlmReview.tsx # Schritt 8: Korrektur (SSE-Stream) + ├── StepStructureDetection.tsx # Schritt 8: Strukturerkennung + ├── StepLlmReview.tsx # Schritt 9: Korrektur (SSE-Stream) ├── StepReconstruction.tsx # Schritt 9: Rekonstruktion (Canvas + Overlay) ├── usePixelWordPositions.ts # Shared Hook: Pixel-basierte Wortpositionierung ├── FabricReconstructionCanvas.tsx # Fabric.js Editor @@ -281,14 +284,21 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`. | `skip_heal_gaps` | `false` | Zeilen-Luecken nicht heilen (Overlay-Modus) | | `grid_method` | `v2` | Grid-Strategie: `v2` (top-down) oder `words_first` (bottom-up) | -### Schritt 8: Korrektur +### Schritt 8: Strukturerkennung + +| Methode | Pfad | Beschreibung | +|---------|------|--------------| +| `POST` | `/sessions/{id}/detect-structure` | Boxen, Zonen, Farben und Grafiken erkennen | +| `GET` | `/sessions/{id}/image/structure-overlay` | Overlay mit allen Strukturelementen | + +### Schritt 9: Korrektur | Methode | Pfad | Beschreibung | |---------|------|--------------| | `POST` | `/sessions/{id}/llm-review?stream=true` | SSE-Stream Korrektur starten | | `POST` | `/sessions/{id}/llm-review/apply` | Ausgewaehlte Korrekturen speichern | -### Schritt 9: Rekonstruktion +### Schritt 10: Rekonstruktion | Methode | Pfad | Beschreibung | |---------|------|--------------| @@ -853,6 +863,93 @@ Change-Format: --- +## Schritt 8: Strukturerkennung (Detail) + +Erkennt Boxen, Zonen, Farbregionen und grafische Elemente auf der Seite. +Laeuft **nach** der Worterkennung (Schritt 7), damit OCR-Wortpositionen +fuer die Unterscheidung von Text vs. Grafik zur Verfuegung stehen. + +### Teilschritte + +1. **Box-Erkennung** (`cv_box_detect.py`): Linien-Rahmen und farbige Hintergruende +2. **Zonen-Aufteilung** (`split_page_into_zones`): Seite in Box- und Content-Zonen aufteilen +3. **Farb-Analyse** (`cv_color_detect.py`): HSV-basierte Erkennung farbiger Textbereiche +4. **Grafik-Erkennung** (`cv_graphic_detect.py`): Nicht-Text-Grafiken identifizieren + +### Grafik-Erkennung: Region-basierter Ansatz + +Zwei Paesse trennen farbige Grafiken von farbigem Text und erkennen +schwarze Illustrationen: + +**Pass 1 — Farbige Bildregionen:** + +1. HSV-Saturation-Kanal extrahieren (Schwelle > 40) + - Schwarzer Text hat Saettigung ≈ 0 → unsichtbar auf diesem Kanal +2. Starke Dilation (25×25 Ellipse) verschmilzt nahe Farbpixel zu Regionen +3. Fuer jede Region: Wort-Ueberlappung pruefen + - \> 50 % Ueberlappung mit OCR-Woertern → farbiger Text → ueberspringen + - ≤ 50 % → farbige Grafik/Bild → behalten +4. Minimum 200 Farbpixel erforderlich (kein Rauschen) +5. Regionen > 50 % der Bildbreite oder -hoehe → Seitenumfassend → ueberspringen + +**Pass 2 — Schwarze Illustrationen:** + +1. Otsu-Binarisierung fuer Tinten-Maske +2. Ausschlusszonen: OCR-Woerter (5 px Padding) + erkannte Boxen (8 px Inset) +3. Farbige Pixel aus Pass 1 ebenfalls ausschliessen +4. Nur Konturen mit Flaeche > 5000 px und min(Breite, Hoehe) > 40 px + +**Deduplizierung:** Ueberlappende Elemente (> 50 % IoU der kleineren +Bounding-Box) werden zusammengefasst. Ergebnis nach Flaeche absteigend +sortiert. + +### Response-Format + +```json +{ + "boxes": [ + {"x": 50, "y": 300, "w": 1100, "h": 200, "confidence": 0.85, + "border_thickness": 3, "bg_color_name": "blue", "bg_color_hex": "#2563eb"} + ], + "zones": [ + {"index": 0, "zone_type": "content", "x": 50, "y": 50, "w": 1100, "h": 250}, + {"index": 1, "zone_type": "box", "x": 50, "y": 300, "w": 1100, "h": 200} + ], + "graphics": [ + {"x": 100, "y": 500, "w": 150, "h": 120, "area": 8500, + "shape": "image", "color_name": "red", "color_hex": "#dc2626", + "confidence": 0.72} + ], + "color_pixel_counts": {"red": 1234, "blue": 5678}, + "has_words": true, + "word_count": 96, + "duration_seconds": 0.45 +} +``` + +### Grafik-Shape-Typen + +| Shape | Quelle | Beschreibung | +|-------|--------|--------------| +| `image` | Pass 1 | Farbige Grafik/Bild (Ballons, Pfeile, Icons) | +| `illustration` | Pass 2 | Grosse schwarze Zeichnung/Illustration | + +### Erkannte Farben + +`red`, `orange`, `yellow`, `green`, `blue`, `purple`, `black` +— basierend auf dem Median-Hue der saturierten Pixel in der Region. + +### Frontend-Anzeige + +`StepStructureDetection.tsx` zeigt: + +- Boxen-Liste mit Position, Hintergrundfarbe und Confidence +- Zonen-Uebersicht (Content vs. Box) +- Farb-Zusammenfassung (Pixel-Counts) +- Grafik-Liste mit Shape, Abmessungen, Farbe und Confidence + +--- + ## Schritt 9: Rekonstruktion (Detail) Drei Modi verfuegbar: @@ -1263,6 +1360,7 @@ cd klausur-service/backend && pytest tests/test_paddle_kombi.py -v # 36 Tests | Datum | Version | Aenderung | |-------|---------|----------| +| 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. | | 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. | | 2026-03-12 | 4.4.0 | PaddleOCR Remote-Engine (`engine=paddle`): PP-OCRv5 Latin auf Hetzner x86_64. Neuer Microservice (`paddleocr-service/`), HTTP-Client (`paddleocr_remote.py`), Frontend-Dropdown-Option. Nutzt words_first Grid-Methode. | | 2026-03-12 | 4.3.0 | Words-First Grid Builder (`cv_words_first.py`): Bottom-up-Algorithmus clustert Tesseract word_boxes direkt zu Spalten/Zeilen/Zellen. Neuer `grid_method` Parameter im `/words` Endpoint. Frontend-Toggle in StepWordRecognition. | diff --git a/klausur-service/backend/cv_graphic_detect.py b/klausur-service/backend/cv_graphic_detect.py index 773966a..891d853 100644 --- a/klausur-service/backend/cv_graphic_detect.py +++ b/klausur-service/backend/cv_graphic_detect.py @@ -121,10 +121,9 @@ def detect_graphic_elements( return [] h, w = img_bgr.shape[:2] - img_area = h * w - logger.info("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes", - w, h, len(word_boxes), len(detected_boxes or [])) + logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes", + w, h, len(word_boxes), len(detected_boxes or [])) hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV) candidates: List[GraphicElement] = [] @@ -161,7 +160,7 @@ def detect_graphic_elements( contours_regions, _ = cv2.findContours( region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE, ) - logger.info("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions)) + logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions)) for cnt in contours_regions: bx, by, bw, bh = cv2.boundingRect(cnt) @@ -172,7 +171,7 @@ def detect_graphic_elements( # Skip page-spanning regions if bw > w * 0.5 or bh > h * 0.5: - logger.info("GraphicDetect PASS1 SKIP page-spanning (%d,%d) %dx%d", bx, by, bw, bh) + logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh) continue bbox_area = bw * bh @@ -188,8 +187,8 @@ def detect_graphic_elements( # If most of the region is covered by word boxes → colored text, skip if word_overlap > 0.5: - logger.info("GraphicDetect PASS1 SKIP text region (%d,%d) %dx%d word_overlap=%.0f%%", - bx, by, bw, bh, word_overlap * 100) + logger.debug("GraphicDetect PASS1 skip text region (%d,%d) %dx%d overlap=%.0f%%", + bx, by, bw, bh, word_overlap * 100) continue # Need a minimum number of colored pixels (not just dilated area) @@ -209,8 +208,7 @@ def detect_graphic_elements( density = color_pixel_count / bbox_area if bbox_area > 0 else 0 conf = min(0.95, 0.5 + density * 0.5) - logger.info("GraphicDetect PASS1 ACCEPT image at (%d,%d) %dx%d " - "color_px=%d word_overlap=%.0f%% color=%s", + logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d overlap=%.0f%% %s", bx, by, bw, bh, color_pixel_count, word_overlap * 100, color_name) candidates.append(GraphicElement( x=bx, y=by, width=bw, height=bh, @@ -256,7 +254,7 @@ def detect_graphic_elements( contours_ink, _ = cv2.findContours( ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE, ) - logger.info("GraphicDetect PASS2 (ink): %d contours", len(contours_ink)) + logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink)) for cnt in contours_ink: area = cv2.contourArea(cnt) @@ -267,8 +265,8 @@ def detect_graphic_elements( if bw > w * 0.8 or bh > h * 0.8: continue - logger.info("GraphicDetect PASS2 ACCEPT illustration at (%d,%d) %dx%d area=%d", - bx, by, bw, bh, int(area)) + logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d", + bx, by, bw, bh, int(area)) candidates.append(GraphicElement( x=bx, y=by, width=bw, height=bh, area=int(area), shape="illustration", diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 6ba8b91..3fed1d3 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -1202,6 +1202,147 @@ async def detect_type(session_id: str): return {"session_id": session_id, **result_dict} +# --------------------------------------------------------------------------- +# Border-ghost word filter +# --------------------------------------------------------------------------- + +# Characters that OCR produces when reading box-border lines. +_BORDER_GHOST_CHARS = set("|1lI![](){}iíì/\\-—–_~.,;:'\"") + + +def _filter_border_ghost_words( + word_result: Dict, + boxes: List, +) -> int: + """Remove OCR words that are actually box border lines. + + A word is considered a border ghost when it sits on a known box edge + (left, right, top, or bottom) and looks like a line artefact (narrow + aspect ratio or text consists only of line-like characters). + + After removing ghost cells, columns that have become empty are also + removed from ``columns_used`` so the grid no longer shows phantom + columns. + + Modifies *word_result* in-place and returns the number of removed cells. + """ + if not boxes or not word_result: + return 0 + + cells = word_result.get("cells") + if not cells: + return 0 + + # Build border bands — vertical (X) and horizontal (Y) + x_bands = [] # list of (x_lo, x_hi) + y_bands = [] # list of (y_lo, y_hi) + for b in boxes: + bx = b.x if hasattr(b, "x") else b.get("x", 0) + by = b.y if hasattr(b, "y") else b.get("y", 0) + bw = b.width if hasattr(b, "width") else b.get("w", b.get("width", 0)) + bh = b.height if hasattr(b, "height") else b.get("h", b.get("height", 0)) + bt = b.border_thickness if hasattr(b, "border_thickness") else b.get("border_thickness", 3) + margin = max(bt * 2, 10) + 6 # generous margin + + # Vertical edges (left / right) + x_bands.append((bx - margin, bx + margin)) + x_bands.append((bx + bw - margin, bx + bw + margin)) + # Horizontal edges (top / bottom) + y_bands.append((by - margin, by + margin)) + y_bands.append((by + bh - margin, by + bh + margin)) + + img_w = word_result.get("image_width", 1) + img_h = word_result.get("image_height", 1) + + def _is_ghost(cell: Dict) -> bool: + text = (cell.get("text") or "").strip() + if not text: + return False + + # Compute absolute pixel position + if cell.get("bbox_px"): + px = cell["bbox_px"] + cx = px["x"] + px["w"] / 2 + cy = px["y"] + px["h"] / 2 + cw = px["w"] + ch = px["h"] + elif cell.get("bbox_pct"): + pct = cell["bbox_pct"] + cx = (pct["x"] / 100) * img_w + (pct["w"] / 100) * img_w / 2 + cy = (pct["y"] / 100) * img_h + (pct["h"] / 100) * img_h / 2 + cw = (pct["w"] / 100) * img_w + ch = (pct["h"] / 100) * img_h + else: + return False + + # Check if center sits on a vertical or horizontal border + on_vertical = any(lo <= cx <= hi for lo, hi in x_bands) + on_horizontal = any(lo <= cy <= hi for lo, hi in y_bands) + if not on_vertical and not on_horizontal: + return False + + # Very short text (1-2 chars) on a border → very likely ghost + if len(text) <= 2: + # Narrow vertically (line-like) or narrow horizontally (dash-like)? + if ch > 0 and cw / ch < 0.5: + return True + if cw > 0 and ch / cw < 0.5: + return True + # Text is only border-ghost characters? + if all(c in _BORDER_GHOST_CHARS for c in text): + return True + + # Longer text but still only ghost chars and very narrow + if all(c in _BORDER_GHOST_CHARS for c in text): + if ch > 0 and cw / ch < 0.35: + return True + if cw > 0 and ch / cw < 0.35: + return True + return True # all ghost chars on a border → remove + + return False + + before = len(cells) + word_result["cells"] = [c for c in cells if not _is_ghost(c)] + removed = before - len(word_result["cells"]) + + # --- Remove empty columns from columns_used --- + columns_used = word_result.get("columns_used") + if removed and columns_used and len(columns_used) > 1: + remaining_cells = word_result["cells"] + occupied_cols = {c.get("col_index") for c in remaining_cells} + before_cols = len(columns_used) + columns_used = [col for col in columns_used if col.get("index") in occupied_cols] + + # Re-index columns and remap cell col_index values + if len(columns_used) < before_cols: + old_to_new = {} + for new_i, col in enumerate(columns_used): + old_to_new[col["index"]] = new_i + col["index"] = new_i + for cell in remaining_cells: + old_ci = cell.get("col_index") + if old_ci in old_to_new: + cell["col_index"] = old_to_new[old_ci] + word_result["columns_used"] = columns_used + logger.info("border-ghost: removed %d empty column(s), %d remaining", + before_cols - len(columns_used), len(columns_used)) + + if removed: + # Update summary counts + summary = word_result.get("summary", {}) + summary["total_cells"] = len(word_result["cells"]) + summary["non_empty_cells"] = sum(1 for c in word_result["cells"] if c.get("text")) + word_result["summary"] = summary + gs = word_result.get("grid_shape", {}) + gs["total_cells"] = len(word_result["cells"]) + if columns_used is not None: + gs["cols"] = len(columns_used) + word_result["grid_shape"] = gs + + return removed + + # --------------------------------------------------------------------------- # Structure Detection Endpoint # --------------------------------------------------------------------------- @@ -1236,10 +1377,6 @@ async def detect_structure(session_id: str): for cell in word_result["cells"]: for wb in (cell.get("word_boxes") or []): words.append(wb) - logger.info("detect-structure: word_result present=%s, cells=%d, word_boxes extracted=%d", - word_result is not None, - len(word_result.get("cells", [])) if word_result else 0, - len(words)) # If no words yet, use image dimensions with small margin if words: content_x = max(0, min(int(wb["left"]) for wb in words)) @@ -1319,6 +1456,15 @@ async def detect_structure(session_id: str): detected_boxes=box_dicts, ) + # --- Filter border-ghost words from OCR result --- + ghost_count = 0 + if boxes and word_result: + ghost_count = _filter_border_ghost_words(word_result, boxes) + if ghost_count: + logger.info("detect-structure: removed %d border-ghost words", ghost_count) + await update_session_db(session_id, word_result=word_result) + cached["word_result"] = word_result + duration = time.time() - t0 result_dict = { @@ -1361,6 +1507,7 @@ async def detect_structure(session_id: str): "color_pixel_counts": color_summary, "has_words": len(words) > 0, "word_count": len(words), + "border_ghosts_removed": ghost_count, "duration_seconds": round(duration, 2), } @@ -1806,12 +1953,7 @@ async def _get_structure_overlay(session_id: str) -> Response: # --- Draw graphic elements --- graphics_data = structure.get("graphics", []) shape_icons = { - "arrow": "ARROW", - "circle": "CIRCLE", - "line": "LINE", - "exclamation": "!", - "dot": "DOT", - "icon": "ICON", + "image": "IMAGE", "illustration": "ILLUST", } for gfx in graphics_data: diff --git a/klausur-service/backend/tests/test_border_ghost_filter.py b/klausur-service/backend/tests/test_border_ghost_filter.py new file mode 100644 index 0000000..99f9c5e --- /dev/null +++ b/klausur-service/backend/tests/test_border_ghost_filter.py @@ -0,0 +1,307 @@ +""" +Tests for _filter_border_ghost_words() — removes OCR artefacts from box borders. + +When OCR reads a scanned document, box border lines (vertical/horizontal +strokes) are often misrecognised as characters like '|', '1', 'l', '-'. +These phantom words create spurious columns/rows in the grid. The filter +removes them by checking if a word sits on a known box border and looks +like a line artefact. + +Lizenz: Apache 2.0 +""" + +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from ocr_pipeline_api import _filter_border_ghost_words, _BORDER_GHOST_CHARS +from cv_vocab_types import DetectedBox + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_cell(text: str, x: int, y: int, w: int, h: int, + col_index: int = 0) -> dict: + """Create a cell dict with bbox_px matching the word recognition output.""" + return { + "cell_id": f"c_{x}_{y}", + "text": text, + "bbox_px": {"x": x, "y": y, "w": w, "h": h}, + "bbox_pct": { + "x": x / 12, "y": y / 18, + "w": w / 12, "h": h / 18, + }, + "confidence": 80, + "row_index": 0, + "col_index": col_index, + } + + +def _make_word_result(cells: list, img_w: int = 1200, img_h: int = 1800, + columns_used: list = None) -> dict: + return { + "cells": cells, + "image_width": img_w, + "image_height": img_h, + "columns_used": columns_used, + "summary": { + "total_cells": len(cells), + "non_empty_cells": sum(1 for c in cells if c.get("text")), + }, + "grid_shape": { + "total_cells": len(cells), + "cols": len(columns_used) if columns_used else 1, + }, + } + + +def _make_box(x: int, y: int, w: int, h: int, bt: int = 3) -> DetectedBox: + return DetectedBox(x=x, y=y, width=w, height=h, confidence=0.9, border_thickness=bt) + + +# --------------------------------------------------------------------------- +# Basic filtering tests +# --------------------------------------------------------------------------- + +class TestBorderGhostFilter: + """Tests for the _filter_border_ghost_words() function.""" + + def test_no_boxes_no_change(self): + """Without boxes, nothing should be filtered.""" + cells = [_make_cell("hello", 100, 200, 80, 30)] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, []) + assert removed == 0 + assert len(wr["cells"]) == 1 + + def test_no_word_result_no_crash(self): + removed = _filter_border_ghost_words(None, [_make_box(50, 300, 1100, 200)]) + assert removed == 0 + + def test_empty_cells_no_crash(self): + wr = _make_word_result([]) + removed = _filter_border_ghost_words(wr, [_make_box(50, 300, 1100, 200)]) + assert removed == 0 + + def test_pipe_on_left_border_removed(self): + """A '|' character sitting on the left border of a box should be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cells = [ + _make_cell("|", x=48, y=350, w=3, h=25), + _make_cell("hello", x=200, y=350, w=80, h=25), + ] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 1 + assert wr["cells"][0]["text"] == "hello" + + def test_pipe_on_right_border_removed(self): + """A '|' character on the right border should be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cells = [ + _make_cell("|", x=1148, y=350, w=4, h=25), + _make_cell("world", x=600, y=350, w=80, h=25), + ] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 1 + assert wr["cells"][0]["text"] == "world" + + def test_digit_1_on_border_narrow_removed(self): + """A narrow '1' on a box border should be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cells = [_make_cell("1", x=49, y=400, w=5, h=20)] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 1 + + def test_dash_on_horizontal_border_removed(self): + """A '-' on the bottom horizontal border should be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + # Bottom border at y=500, dash at y=498 + cells = [_make_cell("-", x=600, y=498, w=20, h=4)] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 1 + + def test_real_word_on_border_not_removed(self): + """A normal word near a border should NOT be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cells = [_make_cell("Tip", x=52, y=350, w=60, h=25)] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 0 + + def test_word_far_from_border_not_removed(self): + """Words far from any border should never be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cells = [_make_cell("|", x=600, y=400, w=3, h=25)] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 0 + + def test_multiple_ghosts_on_same_box(self): + """Multiple ghost words on the same box should all be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cells = [ + _make_cell("|", x=48, y=350, w=3, h=25), + _make_cell("l", x=1149, y=350, w=4, h=25), + _make_cell("text", x=400, y=350, w=80, h=25), + ] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 2 + assert len(wr["cells"]) == 1 + assert wr["cells"][0]["text"] == "text" + + def test_summary_updated_after_removal(self): + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cells = [ + _make_cell("|", x=48, y=350, w=3, h=25), + _make_cell("hello", x=200, y=350, w=80, h=25), + ] + wr = _make_word_result(cells) + _filter_border_ghost_words(wr, [box]) + assert wr["summary"]["total_cells"] == 1 + assert wr["grid_shape"]["total_cells"] == 1 + + def test_ghost_chars_covers_common_artefacts(self): + """The ghost chars set should include common border-line OCR artefacts.""" + expected = {"|", "1", "l", "I", "!", "[", "]", "-", "—", "_", "/", "\\"} + assert expected.issubset(_BORDER_GHOST_CHARS) + + def test_multiple_boxes(self): + box1 = _make_box(x=50, y=300, w=500, h=200, bt=3) + box2 = _make_box(x=600, y=300, w=500, h=200, bt=3) + cells = [ + _make_cell("|", x=49, y=350, w=3, h=25), + _make_cell("I", x=599, y=350, w=4, h=25), + _make_cell("real", x=300, y=350, w=80, h=25), + ] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box1, box2]) + assert removed == 2 + + def test_uses_bbox_pct_fallback(self): + """Should work with bbox_pct when bbox_px is not available.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cell = { + "cell_id": "c_test", + "text": "|", + "bbox_pct": {"x": (48 / 1200) * 100, "y": (350 / 1800) * 100, + "w": (4 / 1200) * 100, "h": (25 / 1800) * 100}, + "confidence": 80, + "col_index": 0, + } + wr = _make_word_result([cell]) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 1 + + def test_generous_margin_catches_offset_ghosts(self): + """Even if OCR word is slightly offset from border, it should be caught.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + # Word 15px away from right border (at x=1135 vs border at x=1150) + cells = [_make_cell("|", x=1135, y=350, w=4, h=25)] + wr = _make_word_result(cells) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 1 + + +# --------------------------------------------------------------------------- +# Column cleanup tests +# --------------------------------------------------------------------------- + +class TestColumnCleanup: + """Tests for empty column removal after ghost filtering.""" + + def test_empty_column_removed(self): + """After filtering all cells of column 4, it should be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cols = [ + {"index": 0, "type": "column_en", "x": 60, "width": 250}, + {"index": 1, "type": "column_de", "x": 320, "width": 250}, + {"index": 2, "type": "column_3", "x": 580, "width": 250}, + {"index": 3, "type": "column_4", "x": 840, "width": 250}, + {"index": 4, "type": "column_5", "x": 1140, "width": 60}, # ghost column + ] + cells = [ + _make_cell("word", x=100, y=350, w=60, h=25, col_index=0), + _make_cell("Wort", x=360, y=350, w=60, h=25, col_index=1), + _make_cell("txt", x=620, y=350, w=50, h=25, col_index=2), + _make_cell("abc", x=880, y=350, w=50, h=25, col_index=3), + _make_cell("|", x=1148, y=350, w=4, h=25, col_index=4), # ghost + _make_cell("l", x=1149, y=400, w=3, h=25, col_index=4), # ghost + ] + wr = _make_word_result(cells, columns_used=cols) + removed = _filter_border_ghost_words(wr, [box]) + + assert removed == 2 + assert len(wr["columns_used"]) == 4 # column 5 removed + assert wr["grid_shape"]["cols"] == 4 + + def test_columns_reindexed_after_removal(self): + """After removing a middle column, indices should be sequential.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cols = [ + {"index": 0, "type": "column_1", "x": 60, "width": 200}, + {"index": 1, "type": "column_2", "x": 280, "width": 30}, # border col + {"index": 2, "type": "column_3", "x": 400, "width": 200}, + ] + # Column 1 only has ghost cells + cells = [ + _make_cell("hello", x=100, y=350, w=60, h=25, col_index=0), + # This cell is NOT on a border so it won't be filtered by the ghost filter + # For this test, put a ghost on the box border + _make_cell("|", x=49, y=350, w=3, h=25, col_index=1), + _make_cell("world", x=440, y=350, w=60, h=25, col_index=2), + ] + wr = _make_word_result(cells, columns_used=cols) + _filter_border_ghost_words(wr, [box]) + + # Column 1 should be removed, column 2 becomes column 1 + assert len(wr["columns_used"]) == 2 + assert wr["columns_used"][0]["index"] == 0 + assert wr["columns_used"][1]["index"] == 1 + # Remaining cells should have updated col_index + assert wr["cells"][0]["col_index"] == 0 + assert wr["cells"][1]["col_index"] == 1 + + def test_no_columns_used_no_crash(self): + """If columns_used is None, column cleanup should be skipped.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cells = [_make_cell("|", x=48, y=350, w=3, h=25)] + wr = _make_word_result(cells, columns_used=None) + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 1 + + def test_occupied_columns_kept(self): + """Columns that still have cells after filtering should be kept.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cols = [ + {"index": 0, "type": "column_en", "x": 60, "width": 250}, + {"index": 1, "type": "column_de", "x": 320, "width": 250}, + ] + cells = [ + _make_cell("word", x=100, y=350, w=60, h=25, col_index=0), + _make_cell("Wort", x=360, y=350, w=60, h=25, col_index=1), + ] + wr = _make_word_result(cells, columns_used=cols) + removed = _filter_border_ghost_words(wr, [box]) + + assert removed == 0 + assert len(wr["columns_used"]) == 2 + + def test_single_column_not_removed(self): + """A single remaining column should never be removed.""" + box = _make_box(x=50, y=300, w=1100, h=200, bt=3) + cols = [{"index": 0, "type": "column_text", "x": 60, "width": 1000}] + cells = [_make_cell("|", x=49, y=350, w=3, h=25, col_index=0)] + wr = _make_word_result(cells, columns_used=cols) + # Even if the only cell is filtered, we don't remove the last column + removed = _filter_border_ghost_words(wr, [box]) + assert removed == 1 + # columns_used should still have 1 entry (we skip cleanup for len <= 1) + assert len(wr["columns_used"]) == 1 diff --git a/klausur-service/backend/tests/test_cv_graphic_detect.py b/klausur-service/backend/tests/test_cv_graphic_detect.py new file mode 100644 index 0000000..1a008f0 --- /dev/null +++ b/klausur-service/backend/tests/test_cv_graphic_detect.py @@ -0,0 +1,320 @@ +""" +Tests for cv_graphic_detect.py — graphic element detection. + +Lizenz: Apache 2.0 +""" + +import numpy as np +import pytest + +import cv2 + +from cv_graphic_detect import detect_graphic_elements, GraphicElement, _dominant_color + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _white_image(width: int = 1200, height: int = 1800) -> np.ndarray: + """Create a plain white BGR image.""" + return np.ones((height, width, 3), dtype=np.uint8) * 255 + + +def _draw_colored_circle(img: np.ndarray, cx: int, cy: int, radius: int, + color_bgr: tuple) -> np.ndarray: + """Draw a filled colored circle (simulates a balloon / graphic).""" + cv2.circle(img, (cx, cy), radius, color_bgr, -1) + return img + + +def _draw_colored_region(img: np.ndarray, x: int, y: int, w: int, h: int, + color_bgr: tuple) -> np.ndarray: + """Draw a filled colored rectangle (simulates an image region).""" + cv2.rectangle(img, (x, y), (x + w, y + h), color_bgr, -1) + return img + + +def _draw_black_illustration(img: np.ndarray, x: int, y: int, w: int, h: int) -> np.ndarray: + """Draw a large black filled shape (simulates a black-ink illustration).""" + cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), -1) + return img + + +def _word_box(left: int, top: int, width: int, height: int) -> dict: + """Create a word box dict matching OCR output format.""" + return {"left": left, "top": top, "width": width, "height": height} + + +# --------------------------------------------------------------------------- +# _dominant_color tests +# --------------------------------------------------------------------------- + +class TestDominantColor: + """Tests for the _dominant_color helper.""" + + def test_empty_array(self): + hsv = np.array([], dtype=np.uint8).reshape(0, 3) + name, hex_val = _dominant_color(hsv) + assert name == "black" + assert hex_val == "#000000" + + def test_low_saturation_returns_black(self): + """Pixels with low saturation should be classified as black.""" + # HSV: H=90 (irrelevant), S=10 (low), V=200 + hsv = np.full((50, 50, 3), [90, 10, 200], dtype=np.uint8) + name, _ = _dominant_color(hsv) + assert name == "black" + + def test_red_hue(self): + """Pixels with hue ~0-10 or ~170+ should be red.""" + hsv = np.full((50, 50, 3), [5, 200, 200], dtype=np.uint8) + name, hex_val = _dominant_color(hsv) + assert name == "red" + assert hex_val == "#dc2626" + + def test_blue_hue(self): + """Pixels with hue ~100 should be blue.""" + hsv = np.full((50, 50, 3), [110, 200, 200], dtype=np.uint8) + name, hex_val = _dominant_color(hsv) + assert name == "blue" + assert hex_val == "#2563eb" + + def test_green_hue(self): + """Pixels with hue ~60 should be green.""" + hsv = np.full((50, 50, 3), [60, 200, 200], dtype=np.uint8) + name, hex_val = _dominant_color(hsv) + assert name == "green" + assert hex_val == "#16a34a" + + def test_yellow_hue(self): + """Pixels with hue ~30 should be yellow.""" + hsv = np.full((50, 50, 3), [30, 200, 200], dtype=np.uint8) + name, hex_val = _dominant_color(hsv) + assert name == "yellow" + + def test_orange_hue(self): + """Pixels with hue ~15 should be orange.""" + hsv = np.full((50, 50, 3), [15, 200, 200], dtype=np.uint8) + name, hex_val = _dominant_color(hsv) + assert name == "orange" + + def test_purple_hue(self): + """Pixels with hue ~140 should be purple.""" + hsv = np.full((50, 50, 3), [140, 200, 200], dtype=np.uint8) + name, hex_val = _dominant_color(hsv) + assert name == "purple" + + +# --------------------------------------------------------------------------- +# detect_graphic_elements tests +# --------------------------------------------------------------------------- + +class TestDetectGraphicElements: + """Tests for the detect_graphic_elements() function.""" + + def test_none_image_returns_empty(self): + """None input should return empty list.""" + result = detect_graphic_elements(None, []) + assert result == [] + + def test_white_image_no_graphics(self): + """A plain white image should produce no graphic elements.""" + img = _white_image() + result = detect_graphic_elements(img, []) + assert result == [] + + def test_colored_region_detected_as_image(self): + """A large colored rectangle should be detected as an image.""" + img = _white_image() + # Draw a large red region (not text-like) + _draw_colored_region(img, x=100, y=300, w=200, h=200, color_bgr=(0, 0, 220)) + + result = detect_graphic_elements(img, word_boxes=[]) + + assert len(result) >= 1 + graphic = result[0] + assert isinstance(graphic, GraphicElement) + assert graphic.shape == "image" + assert graphic.color_name == "red" + assert graphic.confidence > 0 + + def test_colored_text_excluded_by_word_overlap(self): + """Colored regions that overlap heavily with word boxes should be skipped.""" + img = _white_image() + # Draw colored region + _draw_colored_region(img, x=100, y=300, w=400, h=50, color_bgr=(0, 0, 220)) + + # Word boxes covering >50% of the colored region + words = [ + _word_box(100, 300, 200, 50), + _word_box(300, 300, 200, 50), + ] + + result = detect_graphic_elements(img, word_boxes=words) + + # Should be filtered out (word overlap > 50%) + for g in result: + # If anything is detected at that location, overlap check failed + if g.x >= 90 and g.x <= 110 and g.y >= 290 and g.y <= 310: + pytest.fail("Colored text region should be excluded by word overlap") + + def test_colored_graphic_with_low_word_overlap_kept(self): + """A colored region with low word overlap should be kept.""" + img = _white_image() + # Draw a large colored circle + _draw_colored_circle(img, cx=300, cy=400, radius=80, color_bgr=(0, 200, 0)) + + # One small word box overlapping only a tiny portion + words = [_word_box(250, 390, 30, 20)] + + result = detect_graphic_elements(img, word_boxes=words) + + assert len(result) >= 1 + assert result[0].shape == "image" + assert result[0].color_name == "green" + + def test_black_illustration_detected(self): + """A large black filled area should be detected as illustration.""" + img = _white_image() + # Draw a large black rectangle (simulating an illustration) + _draw_black_illustration(img, x=200, y=400, w=300, h=300) + + result = detect_graphic_elements(img, word_boxes=[]) + + assert len(result) >= 1 + illust = [g for g in result if g.shape == "illustration"] + assert len(illust) >= 1 + assert illust[0].color_name == "black" + + def test_black_illustration_excluded_by_word_boxes(self): + """Black ink in word regions should NOT be detected as illustration.""" + img = _white_image() + # Draw black text-like region + _draw_black_illustration(img, x=100, y=300, w=400, h=60) + + # Word boxes covering the same area + words = [ + _word_box(100, 300, 200, 60), + _word_box(300, 300, 200, 60), + ] + + result = detect_graphic_elements(img, word_boxes=words) + + # Should be empty — the word exclusion mask covers the ink + illust = [g for g in result if g.shape == "illustration"] + assert len(illust) == 0 + + def test_tiny_colored_region_filtered(self): + """Very small colored regions (<200 colored pixels) should be filtered.""" + img = _white_image() + # Draw a tiny colored dot (5x5 pixels) + _draw_colored_region(img, x=500, y=500, w=5, h=5, color_bgr=(220, 0, 0)) + + result = detect_graphic_elements(img, word_boxes=[]) + assert result == [] + + def test_page_spanning_region_filtered(self): + """Colored regions spanning >50% of width/height should be skipped.""" + img = _white_image(width=1200, height=1800) + # Draw a region wider than 50% of the image + _draw_colored_region(img, x=50, y=300, w=700, h=100, color_bgr=(0, 0, 220)) + + result = detect_graphic_elements(img, word_boxes=[]) + + # Should be filtered as page-spanning + assert result == [] + + def test_multiple_graphics_detected(self): + """Multiple separate colored regions should all be detected.""" + img = _white_image() + # Three separate colored circles + _draw_colored_circle(img, cx=200, cy=300, radius=60, color_bgr=(0, 0, 220)) + _draw_colored_circle(img, cx=500, cy=300, radius=60, color_bgr=(0, 200, 0)) + _draw_colored_circle(img, cx=200, cy=600, radius=60, color_bgr=(220, 0, 0)) + + result = detect_graphic_elements(img, word_boxes=[]) + + # Should detect at least 2 (some may merge if dilation connects them) + assert len(result) >= 2 + + def test_results_sorted_by_area_descending(self): + """Results should be sorted by area, largest first.""" + img = _white_image() + # Small circle + _draw_colored_circle(img, cx=200, cy=300, radius=30, color_bgr=(0, 0, 220)) + # Large circle + _draw_colored_circle(img, cx=600, cy=800, radius=100, color_bgr=(0, 200, 0)) + + result = detect_graphic_elements(img, word_boxes=[]) + + if len(result) >= 2: + assert result[0].area >= result[1].area + + def test_max_elements_limit(self): + """Should respect max_elements parameter.""" + img = _white_image(width=2000, height=2000) + # Draw many colored regions + for i in range(10): + _draw_colored_circle(img, cx=100 + i * 180, cy=300, radius=40, + color_bgr=(0, 0, 220)) + + result = detect_graphic_elements(img, word_boxes=[], max_elements=3) + + assert len(result) <= 3 + + def test_detected_boxes_excluded_from_ink(self): + """Detected box regions should be excluded from ink illustration detection.""" + img = _white_image() + # Draw a black rectangle well inside the "box" area (8px inset is used) + _draw_black_illustration(img, x=120, y=320, w=360, h=160) + + # Mark the outer box — the 8px inset still covers the drawn region + detected_boxes = [{"x": 100, "y": 300, "w": 400, "h": 200}] + + result = detect_graphic_elements(img, word_boxes=[], detected_boxes=detected_boxes) + + illust = [g for g in result if g.shape == "illustration"] + assert len(illust) == 0 + + def test_deduplication_overlapping_regions(self): + """Overlapping elements should be deduplicated.""" + img = _white_image() + # Two overlapping colored regions + _draw_colored_region(img, x=200, y=300, w=200, h=200, color_bgr=(0, 0, 220)) + _draw_colored_region(img, x=250, y=350, w=200, h=200, color_bgr=(0, 0, 220)) + + result = detect_graphic_elements(img, word_boxes=[]) + + # Should be merged/deduplicated into 1 element (heavy dilation merges them) + assert len(result) <= 2 + + def test_graphicelement_dataclass_fields(self): + """GraphicElement should have all expected fields.""" + elem = GraphicElement( + x=10, y=20, width=100, height=80, + area=5000, shape="image", + color_name="red", color_hex="#dc2626", + confidence=0.85, + ) + assert elem.x == 10 + assert elem.y == 20 + assert elem.width == 100 + assert elem.height == 80 + assert elem.area == 5000 + assert elem.shape == "image" + assert elem.color_name == "red" + assert elem.color_hex == "#dc2626" + assert elem.confidence == 0.85 + assert elem.contour is None + + def test_small_ink_area_filtered(self): + """Black ink areas smaller than 5000px should be filtered.""" + img = _white_image() + # Small black mark (50x50 = 2500 area, below 5000 threshold) + _draw_black_illustration(img, x=500, y=500, w=50, h=50) + + result = detect_graphic_elements(img, word_boxes=[]) + + illust = [g for g in result if g.shape == "illustration"] + assert len(illust) == 0