feat: Sprint 2 — TrOCR ONNX, PP-DocLayout, Model Management
D2: TrOCR ONNX export script (printed + handwritten, int8 quantization) D3: PP-DocLayout ONNX export script (download or Docker-based conversion) B3: Model Management admin page (PyTorch vs ONNX status, benchmarks, config) A4: TrOCR ONNX service with runtime routing (auto/pytorch/onnx via TROCR_BACKEND) A5: PP-DocLayout ONNX detection with OpenCV fallback (via GRAPHIC_DETECT_BACKEND) B4: Structure Detection UI toggle (OpenCV vs PP-DocLayout) with class color coding C3: TrOCR-ONNX.md documentation C4: OCR-Pipeline.md ONNX section added C5: mkdocs.yml nav updated, optimum added to requirements.txt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,26 @@ const COLOR_HEX: Record<string, string> = {
|
||||
purple: '#9333ea',
|
||||
}
|
||||
|
||||
type DetectionMethod = 'auto' | 'opencv' | 'ppdoclayout'
|
||||
|
||||
/** Color map for PP-DocLayout region classes */
|
||||
const DOCLAYOUT_CLASS_COLORS: Record<string, string> = {
|
||||
table: '#2563eb',
|
||||
figure: '#16a34a',
|
||||
title: '#ea580c',
|
||||
text: '#6b7280',
|
||||
list: '#9333ea',
|
||||
header: '#0ea5e9',
|
||||
footer: '#64748b',
|
||||
equation: '#dc2626',
|
||||
}
|
||||
|
||||
const DOCLAYOUT_DEFAULT_COLOR = '#a3a3a3'
|
||||
|
||||
function getDocLayoutColor(className: string): string {
|
||||
return DOCLAYOUT_CLASS_COLORS[className.toLowerCase()] || DOCLAYOUT_DEFAULT_COLOR
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a mouse event on the image container to image-pixel coordinates.
|
||||
* The image uses object-contain inside an A4-ratio container, so we need
|
||||
@@ -96,6 +116,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hasRun, setHasRun] = useState(false)
|
||||
const [overlayTs, setOverlayTs] = useState(0)
|
||||
const [detectionMethod, setDetectionMethod] = useState<DetectionMethod>('auto')
|
||||
|
||||
// Exclude region drawing state
|
||||
const [excludeRegions, setExcludeRegions] = useState<ExcludeRegion[]>([])
|
||||
@@ -106,7 +127,9 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
const [drawMode, setDrawMode] = useState(false)
|
||||
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const overlayContainerRef = useRef<HTMLDivElement>(null)
|
||||
const [containerSize, setContainerSize] = useState({ w: 0, h: 0 })
|
||||
const [overlayContainerSize, setOverlayContainerSize] = useState({ w: 0, h: 0 })
|
||||
|
||||
// Track container size for overlay positioning
|
||||
useEffect(() => {
|
||||
@@ -121,6 +144,19 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
return () => obs.disconnect()
|
||||
}, [])
|
||||
|
||||
// Track overlay container size for PP-DocLayout region overlays
|
||||
useEffect(() => {
|
||||
const el = overlayContainerRef.current
|
||||
if (!el) return
|
||||
const obs = new ResizeObserver((entries) => {
|
||||
for (const entry of entries) {
|
||||
setOverlayContainerSize({ w: entry.contentRect.width, h: entry.contentRect.height })
|
||||
}
|
||||
})
|
||||
obs.observe(el)
|
||||
return () => obs.disconnect()
|
||||
}, [])
|
||||
|
||||
// Auto-trigger detection on mount
|
||||
useEffect(() => {
|
||||
if (!sessionId || hasRun) return
|
||||
@@ -131,7 +167,8 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
|
||||
const params = detectionMethod !== 'auto' ? `?method=${detectionMethod}` : ''
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure${params}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
@@ -158,7 +195,8 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
|
||||
const params = detectionMethod !== 'auto' ? `?method=${detectionMethod}` : ''
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure${params}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen')
|
||||
@@ -278,6 +316,31 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Detection method toggle */}
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-xs font-medium text-gray-500 dark:text-gray-400">Methode:</span>
|
||||
{(['auto', 'opencv', 'ppdoclayout'] as DetectionMethod[]).map((method) => (
|
||||
<button
|
||||
key={method}
|
||||
onClick={() => setDetectionMethod(method)}
|
||||
className={`px-3 py-1.5 text-xs rounded-md font-medium transition-colors ${
|
||||
detectionMethod === method
|
||||
? 'bg-teal-600 text-white'
|
||||
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
|
||||
}`}
|
||||
>
|
||||
{method === 'auto' ? 'Auto' : method === 'opencv' ? 'OpenCV' : 'PP-DocLayout'}
|
||||
</button>
|
||||
))}
|
||||
<span className="text-[10px] text-gray-400 dark:text-gray-500 ml-1">
|
||||
{detectionMethod === 'auto'
|
||||
? 'PP-DocLayout wenn verfuegbar, sonst OpenCV'
|
||||
: detectionMethod === 'ppdoclayout'
|
||||
? 'ONNX-basierte Layouterkennung mit Klassifikation'
|
||||
: 'Klassische OpenCV-Konturerkennung'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Draw mode toggle */}
|
||||
{result && (
|
||||
<div className="flex items-center gap-3">
|
||||
@@ -376,8 +439,17 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
<div className="space-y-2">
|
||||
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Erkannte Struktur
|
||||
{result?.detection_method && (
|
||||
<span className="ml-2 text-[10px] font-normal normal-case">
|
||||
({result.detection_method === 'ppdoclayout' ? 'PP-DocLayout' : 'OpenCV'})
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
|
||||
<div
|
||||
ref={overlayContainerRef}
|
||||
className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden"
|
||||
style={{ aspectRatio: '210/297' }}
|
||||
>
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={overlayUrl}
|
||||
@@ -387,7 +459,52 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
(e.target as HTMLImageElement).style.display = 'none'
|
||||
}}
|
||||
/>
|
||||
|
||||
{/* PP-DocLayout region overlays with class colors and labels */}
|
||||
{result?.layout_regions && overlayContainerSize.w > 0 && result.layout_regions.map((region, i) => {
|
||||
const pos = imageToOverlayPct(region, overlayContainerSize.w, overlayContainerSize.h, result.image_width, result.image_height)
|
||||
const color = getDocLayoutColor(region.class_name)
|
||||
return (
|
||||
<div
|
||||
key={`layout-${i}`}
|
||||
className="absolute border-2 pointer-events-none"
|
||||
style={{
|
||||
...pos,
|
||||
borderColor: color,
|
||||
backgroundColor: `${color}18`,
|
||||
}}
|
||||
>
|
||||
<span
|
||||
className="absolute -top-4 left-0 px-1 py-px text-[9px] font-medium text-white rounded-sm whitespace-nowrap leading-tight"
|
||||
style={{ backgroundColor: color }}
|
||||
>
|
||||
{region.class_name} {Math.round(region.confidence * 100)}%
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* PP-DocLayout legend */}
|
||||
{result?.layout_regions && result.layout_regions.length > 0 && (() => {
|
||||
const usedClasses = [...new Set(result.layout_regions!.map((r) => r.class_name.toLowerCase()))]
|
||||
return (
|
||||
<div className="flex flex-wrap gap-x-3 gap-y-1 px-1">
|
||||
{usedClasses.sort().map((cls) => (
|
||||
<span key={cls} className="inline-flex items-center gap-1 text-[10px] text-gray-500 dark:text-gray-400">
|
||||
<span
|
||||
className="w-2.5 h-2.5 rounded-sm border"
|
||||
style={{
|
||||
backgroundColor: `${getDocLayoutColor(cls)}30`,
|
||||
borderColor: getDocLayoutColor(cls),
|
||||
}}
|
||||
/>
|
||||
{cls}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -430,6 +547,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
|
||||
{result.boxes.length} Box(en)
|
||||
</span>
|
||||
{result.layout_regions && result.layout_regions.length > 0 && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-indigo-50 dark:bg-indigo-900/20 text-indigo-700 dark:text-indigo-400 text-xs font-medium">
|
||||
{result.layout_regions.length} Layout-Region(en)
|
||||
</span>
|
||||
)}
|
||||
{result.graphics && result.graphics.length > 0 && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-400 text-xs font-medium">
|
||||
{result.graphics.length} Grafik(en)
|
||||
@@ -451,6 +573,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400 text-xs ml-auto">
|
||||
{result.detection_method && (
|
||||
<span className="mr-1.5">
|
||||
{result.detection_method === 'ppdoclayout' ? 'PP-DocLayout' : 'OpenCV'} |
|
||||
</span>
|
||||
)}
|
||||
{result.image_width}x{result.image_height}px | {result.duration_seconds}s
|
||||
</span>
|
||||
</div>
|
||||
@@ -491,6 +618,37 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* PP-DocLayout regions detail */}
|
||||
{result.layout_regions && result.layout_regions.length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
|
||||
PP-DocLayout Regionen ({result.layout_regions.length})
|
||||
</h4>
|
||||
<div className="space-y-1.5">
|
||||
{result.layout_regions.map((region, i) => {
|
||||
const color = getDocLayoutColor(region.class_name)
|
||||
return (
|
||||
<div key={i} className="flex items-center gap-3 text-xs">
|
||||
<span
|
||||
className="w-3 h-3 rounded-sm flex-shrink-0 border"
|
||||
style={{ backgroundColor: `${color}40`, borderColor: color }}
|
||||
/>
|
||||
<span className="text-gray-600 dark:text-gray-400 font-medium min-w-[60px]">
|
||||
{region.class_name}
|
||||
</span>
|
||||
<span className="font-mono text-gray-500">
|
||||
{region.w}x{region.h}px @ ({region.x}, {region.y})
|
||||
</span>
|
||||
<span className="text-gray-400">
|
||||
{Math.round(region.confidence * 100)}%
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Zones detail */}
|
||||
<div>
|
||||
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Seitenzonen</h4>
|
||||
|
||||
Reference in New Issue
Block a user