Add exclude region marking in Structure step
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m47s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 16s

Users can now draw rectangles on the document image in the Structure
Detection step to mark areas (e.g. header graphics, alphabet strips)
that should be excluded from OCR results during grid building.

- Backend: PUT/DELETE endpoints for exclude regions stored in structure_result
- Backend: _build_grid_core() filters all words inside user-defined exclude regions
- Frontend: Interactive rectangle drawing with visual overlay and delete buttons
- Preserve exclude regions when re-running structure detection

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-19 09:08:30 +01:00
parent c09838e91c
commit f9d71d50d1
4 changed files with 417 additions and 10 deletions

View File

@@ -225,6 +225,14 @@ export interface StructureGraphic {
confidence: number
}
export interface ExcludeRegion {
x: number
y: number
w: number
h: number
label?: string
}
export interface StructureResult {
image_width: number
image_height: number
@@ -232,6 +240,7 @@ export interface StructureResult {
boxes: StructureBox[]
zones: StructureZone[]
graphics: StructureGraphic[]
exclude_regions?: ExcludeRegion[]
color_pixel_counts: Record<string, number>
has_words: boolean
word_count: number

View File

@@ -1,7 +1,7 @@
'use client'
import { useEffect, useState } from 'react'
import type { StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { ExcludeRegion, StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
@@ -19,6 +19,77 @@ const COLOR_HEX: Record<string, string> = {
purple: '#9333ea',
}
/**
* Convert a mouse event on the image container to image-pixel coordinates.
* The image uses object-contain inside an A4-ratio container, so we need
* to account for letterboxing.
*/
function mouseToImageCoords(
e: React.MouseEvent,
containerEl: HTMLElement,
imgWidth: number,
imgHeight: number,
): { x: number; y: number } | null {
const rect = containerEl.getBoundingClientRect()
const containerW = rect.width
const containerH = rect.height
// object-contain: image is scaled to fit, centered
const scaleX = containerW / imgWidth
const scaleY = containerH / imgHeight
const scale = Math.min(scaleX, scaleY)
const renderedW = imgWidth * scale
const renderedH = imgHeight * scale
const offsetX = (containerW - renderedW) / 2
const offsetY = (containerH - renderedH) / 2
const relX = e.clientX - rect.left - offsetX
const relY = e.clientY - rect.top - offsetY
if (relX < 0 || relY < 0 || relX > renderedW || relY > renderedH) {
return null
}
return {
x: Math.round(relX / scale),
y: Math.round(relY / scale),
}
}
/**
* Convert image-pixel coordinates to container-relative percentages
* for overlay positioning.
*/
function imageToOverlayPct(
region: { x: number; y: number; w: number; h: number },
containerW: number,
containerH: number,
imgWidth: number,
imgHeight: number,
): { left: string; top: string; width: string; height: string } {
const scaleX = containerW / imgWidth
const scaleY = containerH / imgHeight
const scale = Math.min(scaleX, scaleY)
const renderedW = imgWidth * scale
const renderedH = imgHeight * scale
const offsetX = (containerW - renderedW) / 2
const offsetY = (containerH - renderedH) / 2
const left = offsetX + region.x * scale
const top = offsetY + region.y * scale
const width = region.w * scale
const height = region.h * scale
return {
left: `${(left / containerW) * 100}%`,
top: `${(top / containerH) * 100}%`,
width: `${(width / containerW) * 100}%`,
height: `${(height / containerH) * 100}%`,
}
}
export function StepStructureDetection({ sessionId, onNext }: StepStructureDetectionProps) {
const [result, setResult] = useState<StructureResult | null>(null)
const [detecting, setDetecting] = useState(false)
@@ -26,6 +97,30 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
const [hasRun, setHasRun] = useState(false)
const [overlayTs, setOverlayTs] = useState(0)
// Exclude region drawing state
const [excludeRegions, setExcludeRegions] = useState<ExcludeRegion[]>([])
const [drawing, setDrawing] = useState(false)
const [drawStart, setDrawStart] = useState<{ x: number; y: number } | null>(null)
const [drawCurrent, setDrawCurrent] = useState<{ x: number; y: number } | null>(null)
const [saving, setSaving] = useState(false)
const [drawMode, setDrawMode] = useState(false)
const containerRef = useRef<HTMLDivElement>(null)
const [containerSize, setContainerSize] = useState({ w: 0, h: 0 })
// Track container size for overlay positioning
useEffect(() => {
const el = containerRef.current
if (!el) return
const obs = new ResizeObserver((entries) => {
for (const entry of entries) {
setContainerSize({ w: entry.contentRect.width, h: entry.contentRect.height })
}
})
obs.observe(el)
return () => obs.disconnect()
}, [])
// Auto-trigger detection on mount
useEffect(() => {
if (!sessionId || hasRun) return
@@ -36,7 +131,6 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
setError(null)
try {
// Always re-run detection to pick up latest word_result from OCR step
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
method: 'POST',
})
@@ -47,6 +141,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
const data = await res.json()
setResult(data)
setExcludeRegions(data.exclude_regions || [])
setOverlayTs(Date.now())
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
@@ -69,6 +164,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen')
const data = await res.json()
setResult(data)
setExcludeRegions(data.exclude_regions || [])
setOverlayTs(Date.now())
} catch (e) {
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
@@ -77,6 +173,84 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
}
}
// Save exclude regions to backend
const saveExcludeRegions = useCallback(async (regions: ExcludeRegion[]) => {
if (!sessionId) return
setSaving(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/exclude-regions`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ regions }),
})
if (!res.ok) throw new Error('Speichern fehlgeschlagen')
} catch (e) {
setError(e instanceof Error ? e.message : 'Speichern fehlgeschlagen')
} finally {
setSaving(false)
}
}, [sessionId])
// Mouse handlers for drawing exclude rectangles
const handleMouseDown = useCallback((e: React.MouseEvent) => {
if (!drawMode || !containerRef.current || !result) return
const coords = mouseToImageCoords(e, containerRef.current, result.image_width, result.image_height)
if (coords) {
setDrawing(true)
setDrawStart(coords)
setDrawCurrent(coords)
}
}, [drawMode, result])
const handleMouseMove = useCallback((e: React.MouseEvent) => {
if (!drawing || !containerRef.current || !result) return
const coords = mouseToImageCoords(e, containerRef.current, result.image_width, result.image_height)
if (coords) {
setDrawCurrent(coords)
}
}, [drawing, result])
const handleMouseUp = useCallback(() => {
if (!drawing || !drawStart || !drawCurrent) {
setDrawing(false)
return
}
const x = Math.min(drawStart.x, drawCurrent.x)
const y = Math.min(drawStart.y, drawCurrent.y)
const w = Math.abs(drawCurrent.x - drawStart.x)
const h = Math.abs(drawCurrent.y - drawStart.y)
// Minimum size to avoid accidental clicks
if (w > 10 && h > 10) {
const newRegion: ExcludeRegion = { x, y, w, h, label: `Bereich ${excludeRegions.length + 1}` }
const updated = [...excludeRegions, newRegion]
setExcludeRegions(updated)
saveExcludeRegions(updated)
}
setDrawing(false)
setDrawStart(null)
setDrawCurrent(null)
}, [drawing, drawStart, drawCurrent, excludeRegions, saveExcludeRegions])
const handleDeleteRegion = useCallback(async (index: number) => {
if (!sessionId) return
setSaving(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/exclude-regions/${index}`, {
method: 'DELETE',
})
if (!res.ok) throw new Error('Loeschen fehlgeschlagen')
const updated = excludeRegions.filter((_, i) => i !== index)
setExcludeRegions(updated)
} catch (e) {
setError(e instanceof Error ? e.message : 'Loeschen fehlgeschlagen')
} finally {
setSaving(false)
}
}, [sessionId, excludeRegions])
if (!sessionId) {
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
}
@@ -84,6 +258,16 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
const croppedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/structure-overlay${overlayTs ? `?t=${overlayTs}` : ''}`
// Current drag rectangle in image coords
const dragRect = drawing && drawStart && drawCurrent
? {
x: Math.min(drawStart.x, drawCurrent.x),
y: Math.min(drawStart.y, drawCurrent.y),
w: Math.abs(drawCurrent.x - drawStart.x),
h: Math.abs(drawCurrent.y - drawStart.y),
}
: null
return (
<div className="space-y-4">
{/* Loading indicator */}
@@ -94,23 +278,97 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
</div>
)}
{/* Draw mode toggle */}
{result && (
<div className="flex items-center gap-3">
<button
onClick={() => setDrawMode(!drawMode)}
className={`px-4 py-2 text-sm rounded-lg font-medium transition-colors ${
drawMode
? 'bg-red-600 text-white hover:bg-red-700'
: 'bg-gray-100 dark:bg-gray-700 text-gray-700 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
}`}
>
{drawMode ? 'Zeichnen beenden' : 'Ausschlussbereich zeichnen'}
</button>
{drawMode && (
<span className="text-xs text-red-600 dark:text-red-400">
Rechteck auf dem Bild zeichnen um Bereiche von der OCR-Erkennung auszuschliessen
</span>
)}
{saving && (
<span className="text-xs text-gray-400">Speichern...</span>
)}
</div>
)}
{/* Two-column image comparison */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
{/* Left: Original document */}
{/* Left: Original document with exclude region drawing */}
<div className="space-y-2">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Original
Original {excludeRegions.length > 0 && `(${excludeRegions.length} Ausschlussbereich${excludeRegions.length !== 1 ? 'e' : ''})`}
</div>
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
<div
ref={containerRef}
className={`relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden ${
drawMode ? 'cursor-crosshair' : ''
}`}
style={{ aspectRatio: '210/297' }}
onMouseDown={handleMouseDown}
onMouseMove={handleMouseMove}
onMouseUp={handleMouseUp}
onMouseLeave={() => {
if (drawing) {
handleMouseUp()
}
}}
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={croppedUrl}
alt="Originaldokument"
className="w-full h-full object-contain"
className="w-full h-full object-contain pointer-events-none"
draggable={false}
onError={(e) => {
(e.target as HTMLImageElement).style.display = 'none'
}}
/>
{/* Saved exclude regions overlay */}
{result && containerSize.w > 0 && excludeRegions.map((region, i) => {
const pos = imageToOverlayPct(region, containerSize.w, containerSize.h, result.image_width, result.image_height)
return (
<div
key={i}
className="absolute border-2 border-red-500 bg-red-500/20 group"
style={pos}
>
<div className="absolute -top-5 left-0 flex items-center gap-1 opacity-0 group-hover:opacity-100 transition-opacity">
<span className="text-[10px] bg-red-600 text-white px-1 rounded whitespace-nowrap">
{region.label || `Bereich ${i + 1}`}
</span>
<button
onClick={(e) => { e.stopPropagation(); handleDeleteRegion(i) }}
className="w-4 h-4 bg-red-600 text-white rounded-full text-[10px] flex items-center justify-center hover:bg-red-700"
>
x
</button>
</div>
</div>
)
})}
{/* Current drag rectangle */}
{dragRect && result && containerSize.w > 0 && (() => {
const pos = imageToOverlayPct(dragRect, containerSize.w, containerSize.h, result.image_width, result.image_height)
return (
<div
className="absolute border-2 border-red-500 border-dashed bg-red-500/15"
style={pos}
/>
)
})()}
</div>
</div>
@@ -133,6 +391,34 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
</div>
</div>
{/* Exclude regions list */}
{excludeRegions.length > 0 && (
<div className="bg-red-50 dark:bg-red-900/10 rounded-lg border border-red-200 dark:border-red-800 p-3">
<h4 className="text-xs font-medium text-red-700 dark:text-red-400 mb-2">
Ausschlussbereiche ({excludeRegions.length}) Woerter in diesen Bereichen werden nicht erkannt
</h4>
<div className="space-y-1">
{excludeRegions.map((region, i) => (
<div key={i} className="flex items-center gap-3 text-xs">
<span className="w-3 h-3 rounded-sm flex-shrink-0 bg-red-500/30 border border-red-500" />
<span className="text-red-700 dark:text-red-400 font-medium">
{region.label || `Bereich ${i + 1}`}
</span>
<span className="font-mono text-red-600/70 dark:text-red-400/70">
{region.w}x{region.h}px @ ({region.x}, {region.y})
</span>
<button
onClick={() => handleDeleteRegion(i)}
className="ml-auto text-red-500 hover:text-red-700 dark:hover:text-red-300"
>
Entfernen
</button>
</div>
))}
</div>
</div>
)}
{/* Result info */}
{result && (
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4 space-y-3">
@@ -154,6 +440,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
{result.word_count} Woerter
</span>
)}
{excludeRegions.length > 0 && (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
{excludeRegions.length} Ausschluss
</span>
)}
{(result.border_ghosts_removed ?? 0) > 0 && (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400 text-xs font-medium">
{result.border_ghosts_removed} Rahmenlinien entfernt
@@ -244,7 +535,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
className="inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-300 border border-purple-200 dark:border-purple-800"
>
{shape === 'arrow' ? '→' : shape === 'circle' ? '●' : shape === 'line' ? '─' : shape === 'exclamation' ? '❗' : shape === 'dot' ? '•' : shape === 'illustration' ? '🖼' : '◆'}
{' '}{shape} <span className="font-semibold">×{count}</span>
{' '}{shape} <span className="font-semibold">x{count}</span>
</span>
))}
</div>