feat: add border ghost filter + graphic detection tests + structure overlay
- Add _filter_border_ghost_words() to remove OCR artefacts from box borders (vertical + horizontal edge detection, column cleanup, re-indexing) - Add 20 tests for border ghost filter (basic filtering + column cleanup) - Add 24 tests for cv_graphic_detect (color detection, word overlap, boxes) - Clean up cv_graphic_detect.py logging (per-candidate → DEBUG) - Add structure overlay layer to StepReconstruction (boxes + graphics toggle) - Show border_ghosts_removed badge in StepStructureDetection - Update MkDocs with structure detection documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -219,7 +219,7 @@ export interface StructureGraphic {
|
||||
w: number
|
||||
h: number
|
||||
area: number
|
||||
shape: string // arrow, circle, line, exclamation, dot, icon, illustration
|
||||
shape: string // image, illustration
|
||||
color_name: string
|
||||
color_hex: string
|
||||
confidence: number
|
||||
@@ -235,6 +235,7 @@ export interface StructureResult {
|
||||
color_pixel_counts: Record<string, number>
|
||||
has_words: boolean
|
||||
word_count: number
|
||||
border_ghosts_removed?: number
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import dynamic from 'next/dynamic'
|
||||
import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem, StructureResult, StructureBox, StructureGraphic } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { usePixelWordPositions } from './usePixelWordPositions'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
@@ -60,6 +60,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
const [fontScale, setFontScale] = useState(0.7)
|
||||
const [globalBold, setGlobalBold] = useState(false)
|
||||
const [imageRotation, setImageRotation] = useState<0 | 180>(0)
|
||||
const [structureBoxes, setStructureBoxes] = useState<StructureBox[]>([])
|
||||
const [structureGraphics, setStructureGraphics] = useState<StructureGraphic[]>([])
|
||||
const [showStructure, setShowStructure] = useState(true)
|
||||
const reconRef = useRef<HTMLDivElement>(null)
|
||||
const [reconWidth, setReconWidth] = useState(0)
|
||||
|
||||
@@ -92,12 +95,15 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [sessionId])
|
||||
|
||||
// Track image natural height for font scaling
|
||||
// Track image natural dimensions for font scaling and structure layer
|
||||
const handleImageLoad = useCallback(() => {
|
||||
if (imageRef.current) {
|
||||
setImageNaturalH(imageRef.current.naturalHeight)
|
||||
if (!imageNaturalSize) {
|
||||
setImageNaturalSize({ w: imageRef.current.naturalWidth, h: imageRef.current.naturalHeight })
|
||||
}
|
||||
}
|
||||
}, [])
|
||||
}, [imageNaturalSize])
|
||||
|
||||
const loadSessionData = async () => {
|
||||
if (!sessionId) return
|
||||
@@ -132,6 +138,13 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
setUndoStack([])
|
||||
setRedoStack([])
|
||||
|
||||
// Load structure result (boxes, graphics, colors)
|
||||
const structureResult: StructureResult | undefined = data.structure_result
|
||||
if (structureResult) {
|
||||
setStructureBoxes(structureResult.boxes || [])
|
||||
setStructureGraphics(structureResult.graphics || [])
|
||||
}
|
||||
|
||||
// Check for parent with boxes (sub-sessions + zones)
|
||||
const columnResult: ColumnResult | undefined = data.column_result
|
||||
const rowResult: RowResult | undefined = data.row_result
|
||||
@@ -517,6 +530,65 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
return bboxPct
|
||||
}
|
||||
|
||||
// Structure layer: boxes and graphic elements as background
|
||||
const renderStructureLayer = (imgW: number, imgH: number) => {
|
||||
if (!showStructure) return null
|
||||
const hasElements = structureBoxes.length > 0 || structureGraphics.length > 0
|
||||
if (!hasElements) return null
|
||||
|
||||
return (
|
||||
<>
|
||||
{/* Structure boxes */}
|
||||
{structureBoxes.map((box, i) => {
|
||||
const bgColor = box.bg_color_hex || '#6b7280'
|
||||
return (
|
||||
<div
|
||||
key={`sbox-${i}`}
|
||||
className="absolute pointer-events-none"
|
||||
style={{
|
||||
left: `${(box.x / imgW) * 100}%`,
|
||||
top: `${(box.y / imgH) * 100}%`,
|
||||
width: `${(box.w / imgW) * 100}%`,
|
||||
height: `${(box.h / imgH) * 100}%`,
|
||||
border: `${Math.max(1, box.border_thickness)}px solid ${bgColor}40`,
|
||||
backgroundColor: `${bgColor}0a`,
|
||||
borderRadius: '2px',
|
||||
}}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
|
||||
{/* Graphic elements */}
|
||||
{structureGraphics.map((g, i) => (
|
||||
<div
|
||||
key={`sgfx-${i}`}
|
||||
className="absolute pointer-events-none"
|
||||
style={{
|
||||
left: `${(g.x / imgW) * 100}%`,
|
||||
top: `${(g.y / imgH) * 100}%`,
|
||||
width: `${(g.w / imgW) * 100}%`,
|
||||
height: `${(g.h / imgH) * 100}%`,
|
||||
border: `1px dashed ${g.color_hex}60`,
|
||||
backgroundColor: `${g.color_hex}08`,
|
||||
borderRadius: '2px',
|
||||
}}
|
||||
>
|
||||
<span
|
||||
className="absolute text-[8px] leading-none opacity-50"
|
||||
style={{
|
||||
top: '1px',
|
||||
left: '2px',
|
||||
color: g.color_hex,
|
||||
}}
|
||||
>
|
||||
{g.shape === 'illustration' ? 'Illust' : 'Bild'}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
// Overlay rendering helper
|
||||
const renderOverlayMode = () => {
|
||||
const imgW = imageNaturalSize?.w || 1
|
||||
@@ -597,6 +669,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
)
|
||||
})}
|
||||
|
||||
{/* Structure elements (boxes, graphics) */}
|
||||
{renderStructureLayer(imgW, imgH)}
|
||||
|
||||
{/* Pixel-positioned words / editable inputs */}
|
||||
{cells.map((cell) => {
|
||||
const displayText = getDisplayText(cell)
|
||||
@@ -831,6 +906,19 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
>
|
||||
180°
|
||||
</button>
|
||||
{(structureBoxes.length > 0 || structureGraphics.length > 0) && (
|
||||
<button
|
||||
onClick={() => setShowStructure(v => !v)}
|
||||
className={`px-2 py-1 text-xs border rounded transition-colors ${
|
||||
showStructure
|
||||
? 'border-violet-300 bg-violet-50 text-violet-600 dark:border-violet-700 dark:bg-violet-900/30 dark:text-violet-400'
|
||||
: 'border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
title="Strukturelemente anzeigen"
|
||||
>
|
||||
Struktur
|
||||
</button>
|
||||
)}
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
</>
|
||||
)}
|
||||
@@ -851,6 +939,21 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
Leer
|
||||
</button>
|
||||
|
||||
{/* Structure toggle */}
|
||||
{(structureBoxes.length > 0 || structureGraphics.length > 0) && (
|
||||
<button
|
||||
onClick={() => setShowStructure(v => !v)}
|
||||
className={`px-2 py-1 text-xs border rounded transition-colors ${
|
||||
showStructure
|
||||
? 'border-violet-300 bg-violet-50 text-violet-600 dark:border-violet-700 dark:bg-violet-900/30 dark:text-violet-400'
|
||||
: 'border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
title="Strukturelemente anzeigen"
|
||||
>
|
||||
Struktur
|
||||
</button>
|
||||
)}
|
||||
|
||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||
|
||||
{/* Zoom controls */}
|
||||
@@ -915,6 +1018,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
||||
onLoad={handleImageLoad}
|
||||
/>
|
||||
|
||||
{/* Structure elements (boxes, graphics) */}
|
||||
{imageNaturalSize && renderStructureLayer(imageNaturalSize.w, imageNaturalSize.h)}
|
||||
|
||||
{/* Empty field markers */}
|
||||
{showEmptyHighlight && cells
|
||||
.filter(c => emptyCellIds.has(c.cellId))
|
||||
|
||||
@@ -165,6 +165,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
{result.word_count} Woerter
|
||||
</span>
|
||||
)}
|
||||
{(result.border_ghosts_removed ?? 0) > 0 && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
|
||||
{result.border_ghosts_removed} Rahmenlinien entfernt
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400 text-xs ml-auto">
|
||||
{result.image_width}x{result.image_height}px | {result.duration_seconds}s
|
||||
</span>
|
||||
|
||||
@@ -149,6 +149,8 @@ klausur-service/backend/
|
||||
├── ocr_pipeline_api.py # FastAPI Router (Schritte 2-10)
|
||||
├── orientation_crop_api.py # FastAPI Router (Schritte 1 + 4)
|
||||
├── cv_box_detect.py # Box-Erkennung + Zonen-Aufteilung
|
||||
├── cv_graphic_detect.py # Grafik-/Bilderkennung (Region-basiert)
|
||||
├── cv_color_detect.py # Farbtext-Erkennung (HSV-Analyse)
|
||||
├── cv_words_first.py # Words-First Grid Builder (bottom-up)
|
||||
├── page_crop.py # Content-basierter Crop-Algorithmus
|
||||
├── ocr_pipeline_session_store.py # PostgreSQL Persistence
|
||||
@@ -177,7 +179,8 @@ admin-lehrer/
|
||||
├── StepColumnDetection.tsx # Schritt 5: Spaltenerkennung
|
||||
├── StepRowDetection.tsx # Schritt 6: Zeilenerkennung
|
||||
├── StepWordRecognition.tsx # Schritt 7: Worterkennung
|
||||
├── StepLlmReview.tsx # Schritt 8: Korrektur (SSE-Stream)
|
||||
├── StepStructureDetection.tsx # Schritt 8: Strukturerkennung
|
||||
├── StepLlmReview.tsx # Schritt 9: Korrektur (SSE-Stream)
|
||||
├── StepReconstruction.tsx # Schritt 9: Rekonstruktion (Canvas + Overlay)
|
||||
├── usePixelWordPositions.ts # Shared Hook: Pixel-basierte Wortpositionierung
|
||||
├── FabricReconstructionCanvas.tsx # Fabric.js Editor
|
||||
@@ -281,14 +284,21 @@ Alle Endpoints unter `/api/v1/ocr-pipeline/`.
|
||||
| `skip_heal_gaps` | `false` | Zeilen-Luecken nicht heilen (Overlay-Modus) |
|
||||
| `grid_method` | `v2` | Grid-Strategie: `v2` (top-down) oder `words_first` (bottom-up) |
|
||||
|
||||
### Schritt 8: Korrektur
|
||||
### Schritt 8: Strukturerkennung
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/detect-structure` | Boxen, Zonen, Farben und Grafiken erkennen |
|
||||
| `GET` | `/sessions/{id}/image/structure-overlay` | Overlay mit allen Strukturelementen |
|
||||
|
||||
### Schritt 9: Korrektur
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
| `POST` | `/sessions/{id}/llm-review?stream=true` | SSE-Stream Korrektur starten |
|
||||
| `POST` | `/sessions/{id}/llm-review/apply` | Ausgewaehlte Korrekturen speichern |
|
||||
|
||||
### Schritt 9: Rekonstruktion
|
||||
### Schritt 10: Rekonstruktion
|
||||
|
||||
| Methode | Pfad | Beschreibung |
|
||||
|---------|------|--------------|
|
||||
@@ -853,6 +863,93 @@ Change-Format:
|
||||
|
||||
---
|
||||
|
||||
## Schritt 8: Strukturerkennung (Detail)
|
||||
|
||||
Erkennt Boxen, Zonen, Farbregionen und grafische Elemente auf der Seite.
|
||||
Laeuft **nach** der Worterkennung (Schritt 7), damit OCR-Wortpositionen
|
||||
fuer die Unterscheidung von Text vs. Grafik zur Verfuegung stehen.
|
||||
|
||||
### Teilschritte
|
||||
|
||||
1. **Box-Erkennung** (`cv_box_detect.py`): Linien-Rahmen und farbige Hintergruende
|
||||
2. **Zonen-Aufteilung** (`split_page_into_zones`): Seite in Box- und Content-Zonen aufteilen
|
||||
3. **Farb-Analyse** (`cv_color_detect.py`): HSV-basierte Erkennung farbiger Textbereiche
|
||||
4. **Grafik-Erkennung** (`cv_graphic_detect.py`): Nicht-Text-Grafiken identifizieren
|
||||
|
||||
### Grafik-Erkennung: Region-basierter Ansatz
|
||||
|
||||
Zwei Paesse trennen farbige Grafiken von farbigem Text und erkennen
|
||||
schwarze Illustrationen:
|
||||
|
||||
**Pass 1 — Farbige Bildregionen:**
|
||||
|
||||
1. HSV-Saturation-Kanal extrahieren (Schwelle > 40)
|
||||
- Schwarzer Text hat Saettigung ≈ 0 → unsichtbar auf diesem Kanal
|
||||
2. Starke Dilation (25×25 Ellipse) verschmilzt nahe Farbpixel zu Regionen
|
||||
3. Fuer jede Region: Wort-Ueberlappung pruefen
|
||||
- \> 50 % Ueberlappung mit OCR-Woertern → farbiger Text → ueberspringen
|
||||
- ≤ 50 % → farbige Grafik/Bild → behalten
|
||||
4. Minimum 200 Farbpixel erforderlich (kein Rauschen)
|
||||
5. Regionen > 50 % der Bildbreite oder -hoehe → Seitenumfassend → ueberspringen
|
||||
|
||||
**Pass 2 — Schwarze Illustrationen:**
|
||||
|
||||
1. Otsu-Binarisierung fuer Tinten-Maske
|
||||
2. Ausschlusszonen: OCR-Woerter (5 px Padding) + erkannte Boxen (8 px Inset)
|
||||
3. Farbige Pixel aus Pass 1 ebenfalls ausschliessen
|
||||
4. Nur Konturen mit Flaeche > 5000 px und min(Breite, Hoehe) > 40 px
|
||||
|
||||
**Deduplizierung:** Ueberlappende Elemente (> 50 % IoU der kleineren
|
||||
Bounding-Box) werden zusammengefasst. Ergebnis nach Flaeche absteigend
|
||||
sortiert.
|
||||
|
||||
### Response-Format
|
||||
|
||||
```json
|
||||
{
|
||||
"boxes": [
|
||||
{"x": 50, "y": 300, "w": 1100, "h": 200, "confidence": 0.85,
|
||||
"border_thickness": 3, "bg_color_name": "blue", "bg_color_hex": "#2563eb"}
|
||||
],
|
||||
"zones": [
|
||||
{"index": 0, "zone_type": "content", "x": 50, "y": 50, "w": 1100, "h": 250},
|
||||
{"index": 1, "zone_type": "box", "x": 50, "y": 300, "w": 1100, "h": 200}
|
||||
],
|
||||
"graphics": [
|
||||
{"x": 100, "y": 500, "w": 150, "h": 120, "area": 8500,
|
||||
"shape": "image", "color_name": "red", "color_hex": "#dc2626",
|
||||
"confidence": 0.72}
|
||||
],
|
||||
"color_pixel_counts": {"red": 1234, "blue": 5678},
|
||||
"has_words": true,
|
||||
"word_count": 96,
|
||||
"duration_seconds": 0.45
|
||||
}
|
||||
```
|
||||
|
||||
### Grafik-Shape-Typen
|
||||
|
||||
| Shape | Quelle | Beschreibung |
|
||||
|-------|--------|--------------|
|
||||
| `image` | Pass 1 | Farbige Grafik/Bild (Ballons, Pfeile, Icons) |
|
||||
| `illustration` | Pass 2 | Grosse schwarze Zeichnung/Illustration |
|
||||
|
||||
### Erkannte Farben
|
||||
|
||||
`red`, `orange`, `yellow`, `green`, `blue`, `purple`, `black`
|
||||
— basierend auf dem Median-Hue der saturierten Pixel in der Region.
|
||||
|
||||
### Frontend-Anzeige
|
||||
|
||||
`StepStructureDetection.tsx` zeigt:
|
||||
|
||||
- Boxen-Liste mit Position, Hintergrundfarbe und Confidence
|
||||
- Zonen-Uebersicht (Content vs. Box)
|
||||
- Farb-Zusammenfassung (Pixel-Counts)
|
||||
- Grafik-Liste mit Shape, Abmessungen, Farbe und Confidence
|
||||
|
||||
---
|
||||
|
||||
## Schritt 9: Rekonstruktion (Detail)
|
||||
|
||||
Drei Modi verfuegbar:
|
||||
@@ -1263,6 +1360,7 @@ cd klausur-service/backend && pytest tests/test_paddle_kombi.py -v # 36 Tests
|
||||
|
||||
| Datum | Version | Aenderung |
|
||||
|-------|---------|----------|
|
||||
| 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. |
|
||||
| 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. |
|
||||
| 2026-03-12 | 4.4.0 | PaddleOCR Remote-Engine (`engine=paddle`): PP-OCRv5 Latin auf Hetzner x86_64. Neuer Microservice (`paddleocr-service/`), HTTP-Client (`paddleocr_remote.py`), Frontend-Dropdown-Option. Nutzt words_first Grid-Methode. |
|
||||
| 2026-03-12 | 4.3.0 | Words-First Grid Builder (`cv_words_first.py`): Bottom-up-Algorithmus clustert Tesseract word_boxes direkt zu Spalten/Zeilen/Zellen. Neuer `grid_method` Parameter im `/words` Endpoint. Frontend-Toggle in StepWordRecognition. |
|
||||
|
||||
@@ -121,10 +121,9 @@ def detect_graphic_elements(
|
||||
return []
|
||||
|
||||
h, w = img_bgr.shape[:2]
|
||||
img_area = h * w
|
||||
|
||||
logger.info("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
|
||||
w, h, len(word_boxes), len(detected_boxes or []))
|
||||
logger.debug("GraphicDetect: image %dx%d, %d word_boxes, %d detected_boxes",
|
||||
w, h, len(word_boxes), len(detected_boxes or []))
|
||||
|
||||
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
candidates: List[GraphicElement] = []
|
||||
@@ -161,7 +160,7 @@ def detect_graphic_elements(
|
||||
contours_regions, _ = cv2.findContours(
|
||||
region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
|
||||
)
|
||||
logger.info("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
|
||||
logger.debug("GraphicDetect PASS1: %d color regions after dilation", len(contours_regions))
|
||||
|
||||
for cnt in contours_regions:
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
@@ -172,7 +171,7 @@ def detect_graphic_elements(
|
||||
|
||||
# Skip page-spanning regions
|
||||
if bw > w * 0.5 or bh > h * 0.5:
|
||||
logger.info("GraphicDetect PASS1 SKIP page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
|
||||
logger.debug("GraphicDetect PASS1 skip page-spanning (%d,%d) %dx%d", bx, by, bw, bh)
|
||||
continue
|
||||
|
||||
bbox_area = bw * bh
|
||||
@@ -188,8 +187,8 @@ def detect_graphic_elements(
|
||||
|
||||
# If most of the region is covered by word boxes → colored text, skip
|
||||
if word_overlap > 0.5:
|
||||
logger.info("GraphicDetect PASS1 SKIP text region (%d,%d) %dx%d word_overlap=%.0f%%",
|
||||
bx, by, bw, bh, word_overlap * 100)
|
||||
logger.debug("GraphicDetect PASS1 skip text region (%d,%d) %dx%d overlap=%.0f%%",
|
||||
bx, by, bw, bh, word_overlap * 100)
|
||||
continue
|
||||
|
||||
# Need a minimum number of colored pixels (not just dilated area)
|
||||
@@ -209,8 +208,7 @@ def detect_graphic_elements(
|
||||
density = color_pixel_count / bbox_area if bbox_area > 0 else 0
|
||||
conf = min(0.95, 0.5 + density * 0.5)
|
||||
|
||||
logger.info("GraphicDetect PASS1 ACCEPT image at (%d,%d) %dx%d "
|
||||
"color_px=%d word_overlap=%.0f%% color=%s",
|
||||
logger.debug("GraphicDetect PASS1 accept (%d,%d) %dx%d px=%d overlap=%.0f%% %s",
|
||||
bx, by, bw, bh, color_pixel_count, word_overlap * 100, color_name)
|
||||
candidates.append(GraphicElement(
|
||||
x=bx, y=by, width=bw, height=bh,
|
||||
@@ -256,7 +254,7 @@ def detect_graphic_elements(
|
||||
contours_ink, _ = cv2.findContours(
|
||||
ink_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE,
|
||||
)
|
||||
logger.info("GraphicDetect PASS2 (ink): %d contours", len(contours_ink))
|
||||
logger.debug("GraphicDetect PASS2 ink: %d contours", len(contours_ink))
|
||||
|
||||
for cnt in contours_ink:
|
||||
area = cv2.contourArea(cnt)
|
||||
@@ -267,8 +265,8 @@ def detect_graphic_elements(
|
||||
if bw > w * 0.8 or bh > h * 0.8:
|
||||
continue
|
||||
|
||||
logger.info("GraphicDetect PASS2 ACCEPT illustration at (%d,%d) %dx%d area=%d",
|
||||
bx, by, bw, bh, int(area))
|
||||
logger.debug("GraphicDetect PASS2 accept (%d,%d) %dx%d area=%d",
|
||||
bx, by, bw, bh, int(area))
|
||||
candidates.append(GraphicElement(
|
||||
x=bx, y=by, width=bw, height=bh,
|
||||
area=int(area), shape="illustration",
|
||||
|
||||
@@ -1202,6 +1202,147 @@ async def detect_type(session_id: str):
|
||||
return {"session_id": session_id, **result_dict}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Border-ghost word filter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Characters that OCR produces when reading box-border lines.
|
||||
_BORDER_GHOST_CHARS = set("|1lI![](){}iíì/\\-—–_~.,;:'\"")
|
||||
|
||||
|
||||
def _filter_border_ghost_words(
|
||||
word_result: Dict,
|
||||
boxes: List,
|
||||
) -> int:
|
||||
"""Remove OCR words that are actually box border lines.
|
||||
|
||||
A word is considered a border ghost when it sits on a known box edge
|
||||
(left, right, top, or bottom) and looks like a line artefact (narrow
|
||||
aspect ratio or text consists only of line-like characters).
|
||||
|
||||
After removing ghost cells, columns that have become empty are also
|
||||
removed from ``columns_used`` so the grid no longer shows phantom
|
||||
columns.
|
||||
|
||||
Modifies *word_result* in-place and returns the number of removed cells.
|
||||
"""
|
||||
if not boxes or not word_result:
|
||||
return 0
|
||||
|
||||
cells = word_result.get("cells")
|
||||
if not cells:
|
||||
return 0
|
||||
|
||||
# Build border bands — vertical (X) and horizontal (Y)
|
||||
x_bands = [] # list of (x_lo, x_hi)
|
||||
y_bands = [] # list of (y_lo, y_hi)
|
||||
for b in boxes:
|
||||
bx = b.x if hasattr(b, "x") else b.get("x", 0)
|
||||
by = b.y if hasattr(b, "y") else b.get("y", 0)
|
||||
bw = b.width if hasattr(b, "width") else b.get("w", b.get("width", 0))
|
||||
bh = b.height if hasattr(b, "height") else b.get("h", b.get("height", 0))
|
||||
bt = b.border_thickness if hasattr(b, "border_thickness") else b.get("border_thickness", 3)
|
||||
margin = max(bt * 2, 10) + 6 # generous margin
|
||||
|
||||
# Vertical edges (left / right)
|
||||
x_bands.append((bx - margin, bx + margin))
|
||||
x_bands.append((bx + bw - margin, bx + bw + margin))
|
||||
# Horizontal edges (top / bottom)
|
||||
y_bands.append((by - margin, by + margin))
|
||||
y_bands.append((by + bh - margin, by + bh + margin))
|
||||
|
||||
img_w = word_result.get("image_width", 1)
|
||||
img_h = word_result.get("image_height", 1)
|
||||
|
||||
def _is_ghost(cell: Dict) -> bool:
|
||||
text = (cell.get("text") or "").strip()
|
||||
if not text:
|
||||
return False
|
||||
|
||||
# Compute absolute pixel position
|
||||
if cell.get("bbox_px"):
|
||||
px = cell["bbox_px"]
|
||||
cx = px["x"] + px["w"] / 2
|
||||
cy = px["y"] + px["h"] / 2
|
||||
cw = px["w"]
|
||||
ch = px["h"]
|
||||
elif cell.get("bbox_pct"):
|
||||
pct = cell["bbox_pct"]
|
||||
cx = (pct["x"] / 100) * img_w + (pct["w"] / 100) * img_w / 2
|
||||
cy = (pct["y"] / 100) * img_h + (pct["h"] / 100) * img_h / 2
|
||||
cw = (pct["w"] / 100) * img_w
|
||||
ch = (pct["h"] / 100) * img_h
|
||||
else:
|
||||
return False
|
||||
|
||||
# Check if center sits on a vertical or horizontal border
|
||||
on_vertical = any(lo <= cx <= hi for lo, hi in x_bands)
|
||||
on_horizontal = any(lo <= cy <= hi for lo, hi in y_bands)
|
||||
if not on_vertical and not on_horizontal:
|
||||
return False
|
||||
|
||||
# Very short text (1-2 chars) on a border → very likely ghost
|
||||
if len(text) <= 2:
|
||||
# Narrow vertically (line-like) or narrow horizontally (dash-like)?
|
||||
if ch > 0 and cw / ch < 0.5:
|
||||
return True
|
||||
if cw > 0 and ch / cw < 0.5:
|
||||
return True
|
||||
# Text is only border-ghost characters?
|
||||
if all(c in _BORDER_GHOST_CHARS for c in text):
|
||||
return True
|
||||
|
||||
# Longer text but still only ghost chars and very narrow
|
||||
if all(c in _BORDER_GHOST_CHARS for c in text):
|
||||
if ch > 0 and cw / ch < 0.35:
|
||||
return True
|
||||
if cw > 0 and ch / cw < 0.35:
|
||||
return True
|
||||
return True # all ghost chars on a border → remove
|
||||
|
||||
return False
|
||||
|
||||
before = len(cells)
|
||||
word_result["cells"] = [c for c in cells if not _is_ghost(c)]
|
||||
removed = before - len(word_result["cells"])
|
||||
|
||||
# --- Remove empty columns from columns_used ---
|
||||
columns_used = word_result.get("columns_used")
|
||||
if removed and columns_used and len(columns_used) > 1:
|
||||
remaining_cells = word_result["cells"]
|
||||
occupied_cols = {c.get("col_index") for c in remaining_cells}
|
||||
before_cols = len(columns_used)
|
||||
columns_used = [col for col in columns_used if col.get("index") in occupied_cols]
|
||||
|
||||
# Re-index columns and remap cell col_index values
|
||||
if len(columns_used) < before_cols:
|
||||
old_to_new = {}
|
||||
for new_i, col in enumerate(columns_used):
|
||||
old_to_new[col["index"]] = new_i
|
||||
col["index"] = new_i
|
||||
for cell in remaining_cells:
|
||||
old_ci = cell.get("col_index")
|
||||
if old_ci in old_to_new:
|
||||
cell["col_index"] = old_to_new[old_ci]
|
||||
word_result["columns_used"] = columns_used
|
||||
logger.info("border-ghost: removed %d empty column(s), %d remaining",
|
||||
before_cols - len(columns_used), len(columns_used))
|
||||
|
||||
if removed:
|
||||
# Update summary counts
|
||||
summary = word_result.get("summary", {})
|
||||
summary["total_cells"] = len(word_result["cells"])
|
||||
summary["non_empty_cells"] = sum(1 for c in word_result["cells"] if c.get("text"))
|
||||
word_result["summary"] = summary
|
||||
gs = word_result.get("grid_shape", {})
|
||||
gs["total_cells"] = len(word_result["cells"])
|
||||
if columns_used is not None:
|
||||
gs["cols"] = len(columns_used)
|
||||
word_result["grid_shape"] = gs
|
||||
|
||||
return removed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Structure Detection Endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1236,10 +1377,6 @@ async def detect_structure(session_id: str):
|
||||
for cell in word_result["cells"]:
|
||||
for wb in (cell.get("word_boxes") or []):
|
||||
words.append(wb)
|
||||
logger.info("detect-structure: word_result present=%s, cells=%d, word_boxes extracted=%d",
|
||||
word_result is not None,
|
||||
len(word_result.get("cells", [])) if word_result else 0,
|
||||
len(words))
|
||||
# If no words yet, use image dimensions with small margin
|
||||
if words:
|
||||
content_x = max(0, min(int(wb["left"]) for wb in words))
|
||||
@@ -1319,6 +1456,15 @@ async def detect_structure(session_id: str):
|
||||
detected_boxes=box_dicts,
|
||||
)
|
||||
|
||||
# --- Filter border-ghost words from OCR result ---
|
||||
ghost_count = 0
|
||||
if boxes and word_result:
|
||||
ghost_count = _filter_border_ghost_words(word_result, boxes)
|
||||
if ghost_count:
|
||||
logger.info("detect-structure: removed %d border-ghost words", ghost_count)
|
||||
await update_session_db(session_id, word_result=word_result)
|
||||
cached["word_result"] = word_result
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
result_dict = {
|
||||
@@ -1361,6 +1507,7 @@ async def detect_structure(session_id: str):
|
||||
"color_pixel_counts": color_summary,
|
||||
"has_words": len(words) > 0,
|
||||
"word_count": len(words),
|
||||
"border_ghosts_removed": ghost_count,
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
@@ -1806,12 +1953,7 @@ async def _get_structure_overlay(session_id: str) -> Response:
|
||||
# --- Draw graphic elements ---
|
||||
graphics_data = structure.get("graphics", [])
|
||||
shape_icons = {
|
||||
"arrow": "ARROW",
|
||||
"circle": "CIRCLE",
|
||||
"line": "LINE",
|
||||
"exclamation": "!",
|
||||
"dot": "DOT",
|
||||
"icon": "ICON",
|
||||
"image": "IMAGE",
|
||||
"illustration": "ILLUST",
|
||||
}
|
||||
for gfx in graphics_data:
|
||||
|
||||
307
klausur-service/backend/tests/test_border_ghost_filter.py
Normal file
307
klausur-service/backend/tests/test_border_ghost_filter.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
Tests for _filter_border_ghost_words() — removes OCR artefacts from box borders.
|
||||
|
||||
When OCR reads a scanned document, box border lines (vertical/horizontal
|
||||
strokes) are often misrecognised as characters like '|', '1', 'l', '-'.
|
||||
These phantom words create spurious columns/rows in the grid. The filter
|
||||
removes them by checking if a word sits on a known box border and looks
|
||||
like a line artefact.
|
||||
|
||||
Lizenz: Apache 2.0
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from ocr_pipeline_api import _filter_border_ghost_words, _BORDER_GHOST_CHARS
|
||||
from cv_vocab_types import DetectedBox
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_cell(text: str, x: int, y: int, w: int, h: int,
|
||||
col_index: int = 0) -> dict:
|
||||
"""Create a cell dict with bbox_px matching the word recognition output."""
|
||||
return {
|
||||
"cell_id": f"c_{x}_{y}",
|
||||
"text": text,
|
||||
"bbox_px": {"x": x, "y": y, "w": w, "h": h},
|
||||
"bbox_pct": {
|
||||
"x": x / 12, "y": y / 18,
|
||||
"w": w / 12, "h": h / 18,
|
||||
},
|
||||
"confidence": 80,
|
||||
"row_index": 0,
|
||||
"col_index": col_index,
|
||||
}
|
||||
|
||||
|
||||
def _make_word_result(cells: list, img_w: int = 1200, img_h: int = 1800,
|
||||
columns_used: list = None) -> dict:
|
||||
return {
|
||||
"cells": cells,
|
||||
"image_width": img_w,
|
||||
"image_height": img_h,
|
||||
"columns_used": columns_used,
|
||||
"summary": {
|
||||
"total_cells": len(cells),
|
||||
"non_empty_cells": sum(1 for c in cells if c.get("text")),
|
||||
},
|
||||
"grid_shape": {
|
||||
"total_cells": len(cells),
|
||||
"cols": len(columns_used) if columns_used else 1,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _make_box(x: int, y: int, w: int, h: int, bt: int = 3) -> DetectedBox:
|
||||
return DetectedBox(x=x, y=y, width=w, height=h, confidence=0.9, border_thickness=bt)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Basic filtering tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBorderGhostFilter:
|
||||
"""Tests for the _filter_border_ghost_words() function."""
|
||||
|
||||
def test_no_boxes_no_change(self):
|
||||
"""Without boxes, nothing should be filtered."""
|
||||
cells = [_make_cell("hello", 100, 200, 80, 30)]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [])
|
||||
assert removed == 0
|
||||
assert len(wr["cells"]) == 1
|
||||
|
||||
def test_no_word_result_no_crash(self):
|
||||
removed = _filter_border_ghost_words(None, [_make_box(50, 300, 1100, 200)])
|
||||
assert removed == 0
|
||||
|
||||
def test_empty_cells_no_crash(self):
|
||||
wr = _make_word_result([])
|
||||
removed = _filter_border_ghost_words(wr, [_make_box(50, 300, 1100, 200)])
|
||||
assert removed == 0
|
||||
|
||||
def test_pipe_on_left_border_removed(self):
|
||||
"""A '|' character sitting on the left border of a box should be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cells = [
|
||||
_make_cell("|", x=48, y=350, w=3, h=25),
|
||||
_make_cell("hello", x=200, y=350, w=80, h=25),
|
||||
]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 1
|
||||
assert wr["cells"][0]["text"] == "hello"
|
||||
|
||||
def test_pipe_on_right_border_removed(self):
|
||||
"""A '|' character on the right border should be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cells = [
|
||||
_make_cell("|", x=1148, y=350, w=4, h=25),
|
||||
_make_cell("world", x=600, y=350, w=80, h=25),
|
||||
]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 1
|
||||
assert wr["cells"][0]["text"] == "world"
|
||||
|
||||
def test_digit_1_on_border_narrow_removed(self):
|
||||
"""A narrow '1' on a box border should be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cells = [_make_cell("1", x=49, y=400, w=5, h=20)]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 1
|
||||
|
||||
def test_dash_on_horizontal_border_removed(self):
|
||||
"""A '-' on the bottom horizontal border should be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
# Bottom border at y=500, dash at y=498
|
||||
cells = [_make_cell("-", x=600, y=498, w=20, h=4)]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 1
|
||||
|
||||
def test_real_word_on_border_not_removed(self):
|
||||
"""A normal word near a border should NOT be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cells = [_make_cell("Tip", x=52, y=350, w=60, h=25)]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 0
|
||||
|
||||
def test_word_far_from_border_not_removed(self):
|
||||
"""Words far from any border should never be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cells = [_make_cell("|", x=600, y=400, w=3, h=25)]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 0
|
||||
|
||||
def test_multiple_ghosts_on_same_box(self):
|
||||
"""Multiple ghost words on the same box should all be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cells = [
|
||||
_make_cell("|", x=48, y=350, w=3, h=25),
|
||||
_make_cell("l", x=1149, y=350, w=4, h=25),
|
||||
_make_cell("text", x=400, y=350, w=80, h=25),
|
||||
]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 2
|
||||
assert len(wr["cells"]) == 1
|
||||
assert wr["cells"][0]["text"] == "text"
|
||||
|
||||
def test_summary_updated_after_removal(self):
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cells = [
|
||||
_make_cell("|", x=48, y=350, w=3, h=25),
|
||||
_make_cell("hello", x=200, y=350, w=80, h=25),
|
||||
]
|
||||
wr = _make_word_result(cells)
|
||||
_filter_border_ghost_words(wr, [box])
|
||||
assert wr["summary"]["total_cells"] == 1
|
||||
assert wr["grid_shape"]["total_cells"] == 1
|
||||
|
||||
def test_ghost_chars_covers_common_artefacts(self):
|
||||
"""The ghost chars set should include common border-line OCR artefacts."""
|
||||
expected = {"|", "1", "l", "I", "!", "[", "]", "-", "—", "_", "/", "\\"}
|
||||
assert expected.issubset(_BORDER_GHOST_CHARS)
|
||||
|
||||
def test_multiple_boxes(self):
|
||||
box1 = _make_box(x=50, y=300, w=500, h=200, bt=3)
|
||||
box2 = _make_box(x=600, y=300, w=500, h=200, bt=3)
|
||||
cells = [
|
||||
_make_cell("|", x=49, y=350, w=3, h=25),
|
||||
_make_cell("I", x=599, y=350, w=4, h=25),
|
||||
_make_cell("real", x=300, y=350, w=80, h=25),
|
||||
]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box1, box2])
|
||||
assert removed == 2
|
||||
|
||||
def test_uses_bbox_pct_fallback(self):
|
||||
"""Should work with bbox_pct when bbox_px is not available."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cell = {
|
||||
"cell_id": "c_test",
|
||||
"text": "|",
|
||||
"bbox_pct": {"x": (48 / 1200) * 100, "y": (350 / 1800) * 100,
|
||||
"w": (4 / 1200) * 100, "h": (25 / 1800) * 100},
|
||||
"confidence": 80,
|
||||
"col_index": 0,
|
||||
}
|
||||
wr = _make_word_result([cell])
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 1
|
||||
|
||||
def test_generous_margin_catches_offset_ghosts(self):
|
||||
"""Even if OCR word is slightly offset from border, it should be caught."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
# Word 15px away from right border (at x=1135 vs border at x=1150)
|
||||
cells = [_make_cell("|", x=1135, y=350, w=4, h=25)]
|
||||
wr = _make_word_result(cells)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Column cleanup tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestColumnCleanup:
|
||||
"""Tests for empty column removal after ghost filtering."""
|
||||
|
||||
def test_empty_column_removed(self):
|
||||
"""After filtering all cells of column 4, it should be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cols = [
|
||||
{"index": 0, "type": "column_en", "x": 60, "width": 250},
|
||||
{"index": 1, "type": "column_de", "x": 320, "width": 250},
|
||||
{"index": 2, "type": "column_3", "x": 580, "width": 250},
|
||||
{"index": 3, "type": "column_4", "x": 840, "width": 250},
|
||||
{"index": 4, "type": "column_5", "x": 1140, "width": 60}, # ghost column
|
||||
]
|
||||
cells = [
|
||||
_make_cell("word", x=100, y=350, w=60, h=25, col_index=0),
|
||||
_make_cell("Wort", x=360, y=350, w=60, h=25, col_index=1),
|
||||
_make_cell("txt", x=620, y=350, w=50, h=25, col_index=2),
|
||||
_make_cell("abc", x=880, y=350, w=50, h=25, col_index=3),
|
||||
_make_cell("|", x=1148, y=350, w=4, h=25, col_index=4), # ghost
|
||||
_make_cell("l", x=1149, y=400, w=3, h=25, col_index=4), # ghost
|
||||
]
|
||||
wr = _make_word_result(cells, columns_used=cols)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
|
||||
assert removed == 2
|
||||
assert len(wr["columns_used"]) == 4 # column 5 removed
|
||||
assert wr["grid_shape"]["cols"] == 4
|
||||
|
||||
def test_columns_reindexed_after_removal(self):
|
||||
"""After removing a middle column, indices should be sequential."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cols = [
|
||||
{"index": 0, "type": "column_1", "x": 60, "width": 200},
|
||||
{"index": 1, "type": "column_2", "x": 280, "width": 30}, # border col
|
||||
{"index": 2, "type": "column_3", "x": 400, "width": 200},
|
||||
]
|
||||
# Column 1 only has ghost cells
|
||||
cells = [
|
||||
_make_cell("hello", x=100, y=350, w=60, h=25, col_index=0),
|
||||
# This cell is NOT on a border so it won't be filtered by the ghost filter
|
||||
# For this test, put a ghost on the box border
|
||||
_make_cell("|", x=49, y=350, w=3, h=25, col_index=1),
|
||||
_make_cell("world", x=440, y=350, w=60, h=25, col_index=2),
|
||||
]
|
||||
wr = _make_word_result(cells, columns_used=cols)
|
||||
_filter_border_ghost_words(wr, [box])
|
||||
|
||||
# Column 1 should be removed, column 2 becomes column 1
|
||||
assert len(wr["columns_used"]) == 2
|
||||
assert wr["columns_used"][0]["index"] == 0
|
||||
assert wr["columns_used"][1]["index"] == 1
|
||||
# Remaining cells should have updated col_index
|
||||
assert wr["cells"][0]["col_index"] == 0
|
||||
assert wr["cells"][1]["col_index"] == 1
|
||||
|
||||
def test_no_columns_used_no_crash(self):
|
||||
"""If columns_used is None, column cleanup should be skipped."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cells = [_make_cell("|", x=48, y=350, w=3, h=25)]
|
||||
wr = _make_word_result(cells, columns_used=None)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 1
|
||||
|
||||
def test_occupied_columns_kept(self):
|
||||
"""Columns that still have cells after filtering should be kept."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cols = [
|
||||
{"index": 0, "type": "column_en", "x": 60, "width": 250},
|
||||
{"index": 1, "type": "column_de", "x": 320, "width": 250},
|
||||
]
|
||||
cells = [
|
||||
_make_cell("word", x=100, y=350, w=60, h=25, col_index=0),
|
||||
_make_cell("Wort", x=360, y=350, w=60, h=25, col_index=1),
|
||||
]
|
||||
wr = _make_word_result(cells, columns_used=cols)
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
|
||||
assert removed == 0
|
||||
assert len(wr["columns_used"]) == 2
|
||||
|
||||
def test_single_column_not_removed(self):
|
||||
"""A single remaining column should never be removed."""
|
||||
box = _make_box(x=50, y=300, w=1100, h=200, bt=3)
|
||||
cols = [{"index": 0, "type": "column_text", "x": 60, "width": 1000}]
|
||||
cells = [_make_cell("|", x=49, y=350, w=3, h=25, col_index=0)]
|
||||
wr = _make_word_result(cells, columns_used=cols)
|
||||
# Even if the only cell is filtered, we don't remove the last column
|
||||
removed = _filter_border_ghost_words(wr, [box])
|
||||
assert removed == 1
|
||||
# columns_used should still have 1 entry (we skip cleanup for len <= 1)
|
||||
assert len(wr["columns_used"]) == 1
|
||||
320
klausur-service/backend/tests/test_cv_graphic_detect.py
Normal file
320
klausur-service/backend/tests/test_cv_graphic_detect.py
Normal file
@@ -0,0 +1,320 @@
|
||||
"""
|
||||
Tests for cv_graphic_detect.py — graphic element detection.
|
||||
|
||||
Lizenz: Apache 2.0
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import cv2
|
||||
|
||||
from cv_graphic_detect import detect_graphic_elements, GraphicElement, _dominant_color
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _white_image(width: int = 1200, height: int = 1800) -> np.ndarray:
|
||||
"""Create a plain white BGR image."""
|
||||
return np.ones((height, width, 3), dtype=np.uint8) * 255
|
||||
|
||||
|
||||
def _draw_colored_circle(img: np.ndarray, cx: int, cy: int, radius: int,
|
||||
color_bgr: tuple) -> np.ndarray:
|
||||
"""Draw a filled colored circle (simulates a balloon / graphic)."""
|
||||
cv2.circle(img, (cx, cy), radius, color_bgr, -1)
|
||||
return img
|
||||
|
||||
|
||||
def _draw_colored_region(img: np.ndarray, x: int, y: int, w: int, h: int,
|
||||
color_bgr: tuple) -> np.ndarray:
|
||||
"""Draw a filled colored rectangle (simulates an image region)."""
|
||||
cv2.rectangle(img, (x, y), (x + w, y + h), color_bgr, -1)
|
||||
return img
|
||||
|
||||
|
||||
def _draw_black_illustration(img: np.ndarray, x: int, y: int, w: int, h: int) -> np.ndarray:
|
||||
"""Draw a large black filled shape (simulates a black-ink illustration)."""
|
||||
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), -1)
|
||||
return img
|
||||
|
||||
|
||||
def _word_box(left: int, top: int, width: int, height: int) -> dict:
|
||||
"""Create a word box dict matching OCR output format."""
|
||||
return {"left": left, "top": top, "width": width, "height": height}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _dominant_color tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDominantColor:
|
||||
"""Tests for the _dominant_color helper."""
|
||||
|
||||
def test_empty_array(self):
|
||||
hsv = np.array([], dtype=np.uint8).reshape(0, 3)
|
||||
name, hex_val = _dominant_color(hsv)
|
||||
assert name == "black"
|
||||
assert hex_val == "#000000"
|
||||
|
||||
def test_low_saturation_returns_black(self):
|
||||
"""Pixels with low saturation should be classified as black."""
|
||||
# HSV: H=90 (irrelevant), S=10 (low), V=200
|
||||
hsv = np.full((50, 50, 3), [90, 10, 200], dtype=np.uint8)
|
||||
name, _ = _dominant_color(hsv)
|
||||
assert name == "black"
|
||||
|
||||
def test_red_hue(self):
|
||||
"""Pixels with hue ~0-10 or ~170+ should be red."""
|
||||
hsv = np.full((50, 50, 3), [5, 200, 200], dtype=np.uint8)
|
||||
name, hex_val = _dominant_color(hsv)
|
||||
assert name == "red"
|
||||
assert hex_val == "#dc2626"
|
||||
|
||||
def test_blue_hue(self):
|
||||
"""Pixels with hue ~100 should be blue."""
|
||||
hsv = np.full((50, 50, 3), [110, 200, 200], dtype=np.uint8)
|
||||
name, hex_val = _dominant_color(hsv)
|
||||
assert name == "blue"
|
||||
assert hex_val == "#2563eb"
|
||||
|
||||
def test_green_hue(self):
|
||||
"""Pixels with hue ~60 should be green."""
|
||||
hsv = np.full((50, 50, 3), [60, 200, 200], dtype=np.uint8)
|
||||
name, hex_val = _dominant_color(hsv)
|
||||
assert name == "green"
|
||||
assert hex_val == "#16a34a"
|
||||
|
||||
def test_yellow_hue(self):
|
||||
"""Pixels with hue ~30 should be yellow."""
|
||||
hsv = np.full((50, 50, 3), [30, 200, 200], dtype=np.uint8)
|
||||
name, hex_val = _dominant_color(hsv)
|
||||
assert name == "yellow"
|
||||
|
||||
def test_orange_hue(self):
|
||||
"""Pixels with hue ~15 should be orange."""
|
||||
hsv = np.full((50, 50, 3), [15, 200, 200], dtype=np.uint8)
|
||||
name, hex_val = _dominant_color(hsv)
|
||||
assert name == "orange"
|
||||
|
||||
def test_purple_hue(self):
|
||||
"""Pixels with hue ~140 should be purple."""
|
||||
hsv = np.full((50, 50, 3), [140, 200, 200], dtype=np.uint8)
|
||||
name, hex_val = _dominant_color(hsv)
|
||||
assert name == "purple"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# detect_graphic_elements tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDetectGraphicElements:
|
||||
"""Tests for the detect_graphic_elements() function."""
|
||||
|
||||
def test_none_image_returns_empty(self):
|
||||
"""None input should return empty list."""
|
||||
result = detect_graphic_elements(None, [])
|
||||
assert result == []
|
||||
|
||||
def test_white_image_no_graphics(self):
|
||||
"""A plain white image should produce no graphic elements."""
|
||||
img = _white_image()
|
||||
result = detect_graphic_elements(img, [])
|
||||
assert result == []
|
||||
|
||||
def test_colored_region_detected_as_image(self):
|
||||
"""A large colored rectangle should be detected as an image."""
|
||||
img = _white_image()
|
||||
# Draw a large red region (not text-like)
|
||||
_draw_colored_region(img, x=100, y=300, w=200, h=200, color_bgr=(0, 0, 220))
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[])
|
||||
|
||||
assert len(result) >= 1
|
||||
graphic = result[0]
|
||||
assert isinstance(graphic, GraphicElement)
|
||||
assert graphic.shape == "image"
|
||||
assert graphic.color_name == "red"
|
||||
assert graphic.confidence > 0
|
||||
|
||||
def test_colored_text_excluded_by_word_overlap(self):
|
||||
"""Colored regions that overlap heavily with word boxes should be skipped."""
|
||||
img = _white_image()
|
||||
# Draw colored region
|
||||
_draw_colored_region(img, x=100, y=300, w=400, h=50, color_bgr=(0, 0, 220))
|
||||
|
||||
# Word boxes covering >50% of the colored region
|
||||
words = [
|
||||
_word_box(100, 300, 200, 50),
|
||||
_word_box(300, 300, 200, 50),
|
||||
]
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=words)
|
||||
|
||||
# Should be filtered out (word overlap > 50%)
|
||||
for g in result:
|
||||
# If anything is detected at that location, overlap check failed
|
||||
if g.x >= 90 and g.x <= 110 and g.y >= 290 and g.y <= 310:
|
||||
pytest.fail("Colored text region should be excluded by word overlap")
|
||||
|
||||
def test_colored_graphic_with_low_word_overlap_kept(self):
|
||||
"""A colored region with low word overlap should be kept."""
|
||||
img = _white_image()
|
||||
# Draw a large colored circle
|
||||
_draw_colored_circle(img, cx=300, cy=400, radius=80, color_bgr=(0, 200, 0))
|
||||
|
||||
# One small word box overlapping only a tiny portion
|
||||
words = [_word_box(250, 390, 30, 20)]
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=words)
|
||||
|
||||
assert len(result) >= 1
|
||||
assert result[0].shape == "image"
|
||||
assert result[0].color_name == "green"
|
||||
|
||||
def test_black_illustration_detected(self):
|
||||
"""A large black filled area should be detected as illustration."""
|
||||
img = _white_image()
|
||||
# Draw a large black rectangle (simulating an illustration)
|
||||
_draw_black_illustration(img, x=200, y=400, w=300, h=300)
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[])
|
||||
|
||||
assert len(result) >= 1
|
||||
illust = [g for g in result if g.shape == "illustration"]
|
||||
assert len(illust) >= 1
|
||||
assert illust[0].color_name == "black"
|
||||
|
||||
def test_black_illustration_excluded_by_word_boxes(self):
|
||||
"""Black ink in word regions should NOT be detected as illustration."""
|
||||
img = _white_image()
|
||||
# Draw black text-like region
|
||||
_draw_black_illustration(img, x=100, y=300, w=400, h=60)
|
||||
|
||||
# Word boxes covering the same area
|
||||
words = [
|
||||
_word_box(100, 300, 200, 60),
|
||||
_word_box(300, 300, 200, 60),
|
||||
]
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=words)
|
||||
|
||||
# Should be empty — the word exclusion mask covers the ink
|
||||
illust = [g for g in result if g.shape == "illustration"]
|
||||
assert len(illust) == 0
|
||||
|
||||
def test_tiny_colored_region_filtered(self):
|
||||
"""Very small colored regions (<200 colored pixels) should be filtered."""
|
||||
img = _white_image()
|
||||
# Draw a tiny colored dot (5x5 pixels)
|
||||
_draw_colored_region(img, x=500, y=500, w=5, h=5, color_bgr=(220, 0, 0))
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[])
|
||||
assert result == []
|
||||
|
||||
def test_page_spanning_region_filtered(self):
|
||||
"""Colored regions spanning >50% of width/height should be skipped."""
|
||||
img = _white_image(width=1200, height=1800)
|
||||
# Draw a region wider than 50% of the image
|
||||
_draw_colored_region(img, x=50, y=300, w=700, h=100, color_bgr=(0, 0, 220))
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[])
|
||||
|
||||
# Should be filtered as page-spanning
|
||||
assert result == []
|
||||
|
||||
def test_multiple_graphics_detected(self):
|
||||
"""Multiple separate colored regions should all be detected."""
|
||||
img = _white_image()
|
||||
# Three separate colored circles
|
||||
_draw_colored_circle(img, cx=200, cy=300, radius=60, color_bgr=(0, 0, 220))
|
||||
_draw_colored_circle(img, cx=500, cy=300, radius=60, color_bgr=(0, 200, 0))
|
||||
_draw_colored_circle(img, cx=200, cy=600, radius=60, color_bgr=(220, 0, 0))
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[])
|
||||
|
||||
# Should detect at least 2 (some may merge if dilation connects them)
|
||||
assert len(result) >= 2
|
||||
|
||||
def test_results_sorted_by_area_descending(self):
|
||||
"""Results should be sorted by area, largest first."""
|
||||
img = _white_image()
|
||||
# Small circle
|
||||
_draw_colored_circle(img, cx=200, cy=300, radius=30, color_bgr=(0, 0, 220))
|
||||
# Large circle
|
||||
_draw_colored_circle(img, cx=600, cy=800, radius=100, color_bgr=(0, 200, 0))
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[])
|
||||
|
||||
if len(result) >= 2:
|
||||
assert result[0].area >= result[1].area
|
||||
|
||||
def test_max_elements_limit(self):
|
||||
"""Should respect max_elements parameter."""
|
||||
img = _white_image(width=2000, height=2000)
|
||||
# Draw many colored regions
|
||||
for i in range(10):
|
||||
_draw_colored_circle(img, cx=100 + i * 180, cy=300, radius=40,
|
||||
color_bgr=(0, 0, 220))
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[], max_elements=3)
|
||||
|
||||
assert len(result) <= 3
|
||||
|
||||
def test_detected_boxes_excluded_from_ink(self):
|
||||
"""Detected box regions should be excluded from ink illustration detection."""
|
||||
img = _white_image()
|
||||
# Draw a black rectangle well inside the "box" area (8px inset is used)
|
||||
_draw_black_illustration(img, x=120, y=320, w=360, h=160)
|
||||
|
||||
# Mark the outer box — the 8px inset still covers the drawn region
|
||||
detected_boxes = [{"x": 100, "y": 300, "w": 400, "h": 200}]
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[], detected_boxes=detected_boxes)
|
||||
|
||||
illust = [g for g in result if g.shape == "illustration"]
|
||||
assert len(illust) == 0
|
||||
|
||||
def test_deduplication_overlapping_regions(self):
|
||||
"""Overlapping elements should be deduplicated."""
|
||||
img = _white_image()
|
||||
# Two overlapping colored regions
|
||||
_draw_colored_region(img, x=200, y=300, w=200, h=200, color_bgr=(0, 0, 220))
|
||||
_draw_colored_region(img, x=250, y=350, w=200, h=200, color_bgr=(0, 0, 220))
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[])
|
||||
|
||||
# Should be merged/deduplicated into 1 element (heavy dilation merges them)
|
||||
assert len(result) <= 2
|
||||
|
||||
def test_graphicelement_dataclass_fields(self):
|
||||
"""GraphicElement should have all expected fields."""
|
||||
elem = GraphicElement(
|
||||
x=10, y=20, width=100, height=80,
|
||||
area=5000, shape="image",
|
||||
color_name="red", color_hex="#dc2626",
|
||||
confidence=0.85,
|
||||
)
|
||||
assert elem.x == 10
|
||||
assert elem.y == 20
|
||||
assert elem.width == 100
|
||||
assert elem.height == 80
|
||||
assert elem.area == 5000
|
||||
assert elem.shape == "image"
|
||||
assert elem.color_name == "red"
|
||||
assert elem.color_hex == "#dc2626"
|
||||
assert elem.confidence == 0.85
|
||||
assert elem.contour is None
|
||||
|
||||
def test_small_ink_area_filtered(self):
|
||||
"""Black ink areas smaller than 5000px should be filtered."""
|
||||
img = _white_image()
|
||||
# Small black mark (50x50 = 2500 area, below 5000 threshold)
|
||||
_draw_black_illustration(img, x=500, y=500, w=50, h=50)
|
||||
|
||||
result = detect_graphic_elements(img, word_boxes=[])
|
||||
|
||||
illust = [g for g in result if g.shape == "illustration"]
|
||||
assert len(illust) == 0
|
||||
Reference in New Issue
Block a user