feat: add Structure Detection step to OCR pipeline
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m58s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 16s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m58s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 16s
New pipeline step between Crop and Columns that visualizes detected document structure: boxes (line-based + shading), page zones, and color regions. Shows original image on the left, annotated overlay on the right. Backend: POST /detect-structure endpoint + /image/structure-overlay Frontend: StepStructureDetection component with zone/box/color details Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
|
||||
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
|
||||
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
|
||||
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
|
||||
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
||||
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
@@ -91,15 +92,15 @@ export default function OcrPipelinePage() {
|
||||
let uiStep = Math.max(0, dbStep - 1)
|
||||
const skipSteps = [...(savedDocType?.skip_steps || [])]
|
||||
|
||||
// Sub-sessions: image is already cropped, skip pre-processing steps
|
||||
// Jump directly to columns (UI step 4) unless already further ahead
|
||||
// Sub-sessions: image is already cropped, skip pre-processing + structure steps
|
||||
// Jump directly to columns (UI step 5) unless already further ahead
|
||||
const isSubSession = !!data.parent_session_id
|
||||
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
|
||||
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop', 'structure']
|
||||
if (isSubSession) {
|
||||
for (const s of SUB_SESSION_SKIP) {
|
||||
if (!skipSteps.includes(s)) skipSteps.push(s)
|
||||
}
|
||||
if (uiStep < 4) uiStep = 4 // columns step
|
||||
if (uiStep < 5) uiStep = 5 // columns step (now index 5)
|
||||
}
|
||||
|
||||
setSteps(
|
||||
@@ -329,12 +330,13 @@ export default function OcrPipelinePage() {
|
||||
2: 'Begradigung',
|
||||
3: 'Entzerrung',
|
||||
4: 'Zuschneiden',
|
||||
5: 'Spalten',
|
||||
6: 'Zeilen',
|
||||
7: 'Woerter',
|
||||
8: 'Korrektur',
|
||||
9: 'Rekonstruktion',
|
||||
10: 'Validierung',
|
||||
5: 'Struktur',
|
||||
6: 'Spalten',
|
||||
7: 'Zeilen',
|
||||
8: 'Woerter',
|
||||
9: 'Korrektur',
|
||||
10: 'Rekonstruktion',
|
||||
11: 'Validierung',
|
||||
}
|
||||
|
||||
const reprocessFromStep = useCallback(async (uiStep: number) => {
|
||||
@@ -371,16 +373,18 @@ export default function OcrPipelinePage() {
|
||||
case 3:
|
||||
return <StepCrop sessionId={sessionId} onNext={handleCropNext} />
|
||||
case 4:
|
||||
return <StepColumnDetection sessionId={sessionId} onNext={handleNext} onBoxSessionsCreated={handleBoxSessionsCreated} />
|
||||
return <StepStructureDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 5:
|
||||
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
||||
return <StepColumnDetection sessionId={sessionId} onNext={handleNext} onBoxSessionsCreated={handleBoxSessionsCreated} />
|
||||
case 6:
|
||||
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
|
||||
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 7:
|
||||
return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
|
||||
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
|
||||
case 8:
|
||||
return <StepReconstruction sessionId={sessionId} onNext={handleNext} />
|
||||
return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
|
||||
case 9:
|
||||
return <StepReconstruction sessionId={sessionId} onNext={handleNext} />
|
||||
case 10:
|
||||
return <StepGroundTruth sessionId={sessionId} onNext={handleNext} />
|
||||
default:
|
||||
return null
|
||||
|
||||
@@ -213,6 +213,38 @@ export interface RowGroundTruth {
|
||||
notes?: string
|
||||
}
|
||||
|
||||
export interface StructureResult {
|
||||
image_width: number
|
||||
image_height: number
|
||||
content_bounds: { x: number; y: number; w: number; h: number }
|
||||
boxes: StructureBox[]
|
||||
zones: StructureZone[]
|
||||
color_pixel_counts: Record<string, number>
|
||||
has_words: boolean
|
||||
word_count: number
|
||||
duration_seconds: number
|
||||
}
|
||||
|
||||
export interface StructureBox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
confidence: number
|
||||
border_thickness: number
|
||||
bg_color_name?: string
|
||||
bg_color_hex?: string
|
||||
}
|
||||
|
||||
export interface StructureZone {
|
||||
index: number
|
||||
zone_type: 'content' | 'box'
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface WordBbox {
|
||||
x: number
|
||||
y: number
|
||||
@@ -347,6 +379,7 @@ export const PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
|
||||
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
||||
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
|
||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||
|
||||
275
admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx
Normal file
275
admin-lehrer/components/ocr-pipeline/StepStructureDetection.tsx
Normal file
@@ -0,0 +1,275 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
import type { StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface StepStructureDetectionProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
const COLOR_HEX: Record<string, string> = {
|
||||
red: '#dc2626',
|
||||
orange: '#ea580c',
|
||||
yellow: '#ca8a04',
|
||||
green: '#16a34a',
|
||||
blue: '#2563eb',
|
||||
purple: '#9333ea',
|
||||
}
|
||||
|
||||
export function StepStructureDetection({ sessionId, onNext }: StepStructureDetectionProps) {
|
||||
const [result, setResult] = useState<StructureResult | null>(null)
|
||||
const [detecting, setDetecting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hasRun, setHasRun] = useState(false)
|
||||
const [overlayTs, setOverlayTs] = useState(0)
|
||||
|
||||
// Auto-trigger detection on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId || hasRun) return
|
||||
setHasRun(true)
|
||||
|
||||
const runDetection = async () => {
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
// Check if session already has structure result
|
||||
const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`)
|
||||
if (sessionRes.ok) {
|
||||
const sessionData = await sessionRes.json()
|
||||
if (sessionData.structure_result) {
|
||||
setResult(sessionData.structure_result)
|
||||
setOverlayTs(Date.now())
|
||||
setDetecting(false)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error('Strukturerkennung fehlgeschlagen')
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
setResult(data)
|
||||
setOverlayTs(Date.now())
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}
|
||||
|
||||
runDetection()
|
||||
}, [sessionId, hasRun])
|
||||
|
||||
const handleRerun = async () => {
|
||||
if (!sessionId) return
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen')
|
||||
const data = await res.json()
|
||||
setResult(data)
|
||||
setOverlayTs(Date.now())
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
setDetecting(false)
|
||||
}
|
||||
}
|
||||
|
||||
if (!sessionId) {
|
||||
return <div className="text-sm text-gray-400">Keine Session ausgewaehlt.</div>
|
||||
}
|
||||
|
||||
const croppedUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
const overlayUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/structure-overlay${overlayTs ? `?t=${overlayTs}` : ''}`
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading indicator */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Dokumentstruktur wird analysiert...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Two-column image comparison */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||
{/* Left: Original document */}
|
||||
<div className="space-y-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Original
|
||||
</div>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={croppedUrl}
|
||||
alt="Originaldokument"
|
||||
className="w-full h-full object-contain"
|
||||
onError={(e) => {
|
||||
(e.target as HTMLImageElement).style.display = 'none'
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Structure overlay */}
|
||||
<div className="space-y-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Erkannte Struktur
|
||||
</div>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={overlayUrl}
|
||||
alt="Strukturerkennung"
|
||||
className="w-full h-full object-contain"
|
||||
onError={(e) => {
|
||||
(e.target as HTMLImageElement).style.display = 'none'
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Result info */}
|
||||
{result && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
{/* Summary badges */}
|
||||
<div className="flex flex-wrap items-center gap-3 text-sm">
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-teal-50 dark:bg-teal-900/20 text-teal-700 dark:text-teal-400 text-xs font-medium">
|
||||
{result.zones.length} Zone(n)
|
||||
</span>
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
|
||||
{result.boxes.length} Box(en)
|
||||
</span>
|
||||
{result.has_words && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-400 text-xs font-medium">
|
||||
{result.word_count} Woerter
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400 text-xs ml-auto">
|
||||
{result.image_width}x{result.image_height}px | {result.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Boxes detail */}
|
||||
{result.boxes.length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Erkannte Boxen</h4>
|
||||
<div className="space-y-1.5">
|
||||
{result.boxes.map((box, i) => (
|
||||
<div key={i} className="flex items-center gap-3 text-xs">
|
||||
<span
|
||||
className="w-3 h-3 rounded-sm flex-shrink-0 border border-gray-300 dark:border-gray-600"
|
||||
style={{ backgroundColor: box.bg_color_hex || '#6b7280' }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400">
|
||||
Box {i + 1}:
|
||||
</span>
|
||||
<span className="font-mono text-gray-500">
|
||||
{box.w}x{box.h}px @ ({box.x}, {box.y})
|
||||
</span>
|
||||
{box.bg_color_name && box.bg_color_name !== 'unknown' && box.bg_color_name !== 'white' && (
|
||||
<span className="px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-500">
|
||||
{box.bg_color_name}
|
||||
</span>
|
||||
)}
|
||||
{box.border_thickness > 0 && (
|
||||
<span className="text-gray-400">
|
||||
Rahmen: {box.border_thickness}px
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400">
|
||||
{Math.round(box.confidence * 100)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Zones detail */}
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Seitenzonen</h4>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{result.zones.map((zone) => (
|
||||
<span
|
||||
key={zone.index}
|
||||
className={`inline-flex items-center gap-1 px-2 py-1 rounded text-[11px] font-medium ${
|
||||
zone.zone_type === 'box'
|
||||
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-800'
|
||||
: 'bg-gray-50 dark:bg-gray-800 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-700'
|
||||
}`}
|
||||
>
|
||||
{zone.zone_type === 'box' ? 'Box' : 'Inhalt'} {zone.index}
|
||||
<span className="text-[10px] font-normal opacity-70">
|
||||
({zone.w}x{zone.h})
|
||||
</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Color regions */}
|
||||
{Object.keys(result.color_pixel_counts).length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Erkannte Farben</h4>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{Object.entries(result.color_pixel_counts)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.map(([name, count]) => (
|
||||
<span key={name} className="inline-flex items-center gap-1.5 px-2 py-1 rounded text-[11px] bg-gray-50 dark:bg-gray-800 border border-gray-200 dark:border-gray-700">
|
||||
<span
|
||||
className="w-2.5 h-2.5 rounded-full"
|
||||
style={{ backgroundColor: COLOR_HEX[name] || '#6b7280' }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400">{name}</span>
|
||||
<span className="text-gray-400 text-[10px]">{count.toLocaleString()}px</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Action buttons */}
|
||||
{result && (
|
||||
<div className="flex justify-between">
|
||||
<button
|
||||
onClick={handleRerun}
|
||||
disabled={detecting}
|
||||
className="px-4 py-2 text-sm text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors disabled:opacity-50"
|
||||
>
|
||||
Erneut erkennen
|
||||
</button>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 font-medium transition-colors"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 rounded-lg text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user