feat: Sprint 2 — TrOCR ONNX, PP-DocLayout, Model Management

D2: TrOCR ONNX export script (printed + handwritten, int8 quantization)
D3: PP-DocLayout ONNX export script (download or Docker-based conversion)
B3: Model Management admin page (PyTorch vs ONNX status, benchmarks, config)
A4: TrOCR ONNX service with runtime routing (auto/pytorch/onnx via TROCR_BACKEND)
A5: PP-DocLayout ONNX detection with OpenCV fallback (via GRAPHIC_DETECT_BACKEND)
B4: Structure Detection UI toggle (OpenCV vs PP-DocLayout) with class color coding
C3: TrOCR-ONNX.md documentation
C4: OCR-Pipeline.md ONNX section added
C5: mkdocs.yml nav updated, optimum added to requirements.txt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-23 09:53:02 +01:00
parent c695b659fb
commit be7f5f1872
16 changed files with 3616 additions and 60 deletions

View File

@@ -19,6 +19,26 @@ const COLOR_HEX: Record<string, string> = {
purple: '#9333ea',
}
type DetectionMethod = 'auto' | 'opencv' | 'ppdoclayout'
/** Color map for PP-DocLayout region classes */
const DOCLAYOUT_CLASS_COLORS: Record<string, string> = {
table: '#2563eb',
figure: '#16a34a',
title: '#ea580c',
text: '#6b7280',
list: '#9333ea',
header: '#0ea5e9',
footer: '#64748b',
equation: '#dc2626',
}
const DOCLAYOUT_DEFAULT_COLOR = '#a3a3a3'
function getDocLayoutColor(className: string): string {
return DOCLAYOUT_CLASS_COLORS[className.toLowerCase()] || DOCLAYOUT_DEFAULT_COLOR
}
/**
* Convert a mouse event on the image container to image-pixel coordinates.
* The image uses object-contain inside an A4-ratio container, so we need
@@ -96,6 +116,7 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
const [error, setError] = useState<string | null>(null)
const [hasRun, setHasRun] = useState(false)
const [overlayTs, setOverlayTs] = useState(0)
const [detectionMethod, setDetectionMethod] = useState<DetectionMethod>('auto')
// Exclude region drawing state
const [excludeRegions, setExcludeRegions] = useState<ExcludeRegion[]>([])
@@ -106,7 +127,9 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
const [drawMode, setDrawMode] = useState(false)
const containerRef = useRef<HTMLDivElement>(null)
const overlayContainerRef = useRef<HTMLDivElement>(null)
const [containerSize, setContainerSize] = useState({ w: 0, h: 0 })
const [overlayContainerSize, setOverlayContainerSize] = useState({ w: 0, h: 0 })
// Track container size for overlay positioning
useEffect(() => {
@@ -121,6 +144,19 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
return () => obs.disconnect()
}, [])
// Track overlay container size for PP-DocLayout region overlays
useEffect(() => {
const el = overlayContainerRef.current
if (!el) return
const obs = new ResizeObserver((entries) => {
for (const entry of entries) {
setOverlayContainerSize({ w: entry.contentRect.width, h: entry.contentRect.height })
}
})
obs.observe(el)
return () => obs.disconnect()
}, [])
// Auto-trigger detection on mount
useEffect(() => {
if (!sessionId || hasRun) return
@@ -131,7 +167,8 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
const params = detectionMethod !== 'auto' ? `?method=${detectionMethod}` : ''
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure${params}`, {
method: 'POST',
})
@@ -158,7 +195,8 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
setDetecting(true)
setError(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure`, {
const params = detectionMethod !== 'auto' ? `?method=${detectionMethod}` : ''
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-structure${params}`, {
method: 'POST',
})
if (!res.ok) throw new Error('Erneute Erkennung fehlgeschlagen')
@@ -278,6 +316,31 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
</div>
)}
{/* Detection method toggle */}
<div className="flex items-center gap-2">
<span className="text-xs font-medium text-gray-500 dark:text-gray-400">Methode:</span>
{(['auto', 'opencv', 'ppdoclayout'] as DetectionMethod[]).map((method) => (
<button
key={method}
onClick={() => setDetectionMethod(method)}
className={`px-3 py-1.5 text-xs rounded-md font-medium transition-colors ${
detectionMethod === method
? 'bg-teal-600 text-white'
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-600'
}`}
>
{method === 'auto' ? 'Auto' : method === 'opencv' ? 'OpenCV' : 'PP-DocLayout'}
</button>
))}
<span className="text-[10px] text-gray-400 dark:text-gray-500 ml-1">
{detectionMethod === 'auto'
? 'PP-DocLayout wenn verfuegbar, sonst OpenCV'
: detectionMethod === 'ppdoclayout'
? 'ONNX-basierte Layouterkennung mit Klassifikation'
: 'Klassische OpenCV-Konturerkennung'}
</span>
</div>
{/* Draw mode toggle */}
{result && (
<div className="flex items-center gap-3">
@@ -376,8 +439,17 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
<div className="space-y-2">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Erkannte Struktur
{result?.detection_method && (
<span className="ml-2 text-[10px] font-normal normal-case">
({result.detection_method === 'ppdoclayout' ? 'PP-DocLayout' : 'OpenCV'})
</span>
)}
</div>
<div className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden" style={{ aspectRatio: '210/297' }}>
<div
ref={overlayContainerRef}
className="relative bg-gray-100 dark:bg-gray-800 rounded-lg overflow-hidden"
style={{ aspectRatio: '210/297' }}
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={overlayUrl}
@@ -387,7 +459,52 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
(e.target as HTMLImageElement).style.display = 'none'
}}
/>
{/* PP-DocLayout region overlays with class colors and labels */}
{result?.layout_regions && overlayContainerSize.w > 0 && result.layout_regions.map((region, i) => {
const pos = imageToOverlayPct(region, overlayContainerSize.w, overlayContainerSize.h, result.image_width, result.image_height)
const color = getDocLayoutColor(region.class_name)
return (
<div
key={`layout-${i}`}
className="absolute border-2 pointer-events-none"
style={{
...pos,
borderColor: color,
backgroundColor: `${color}18`,
}}
>
<span
className="absolute -top-4 left-0 px-1 py-px text-[9px] font-medium text-white rounded-sm whitespace-nowrap leading-tight"
style={{ backgroundColor: color }}
>
{region.class_name} {Math.round(region.confidence * 100)}%
</span>
</div>
)
})}
</div>
{/* PP-DocLayout legend */}
{result?.layout_regions && result.layout_regions.length > 0 && (() => {
const usedClasses = [...new Set(result.layout_regions!.map((r) => r.class_name.toLowerCase()))]
return (
<div className="flex flex-wrap gap-x-3 gap-y-1 px-1">
{usedClasses.sort().map((cls) => (
<span key={cls} className="inline-flex items-center gap-1 text-[10px] text-gray-500 dark:text-gray-400">
<span
className="w-2.5 h-2.5 rounded-sm border"
style={{
backgroundColor: `${getDocLayoutColor(cls)}30`,
borderColor: getDocLayoutColor(cls),
}}
/>
{cls}
</span>
))}
</div>
)
})()}
</div>
</div>
@@ -430,6 +547,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400 text-xs font-medium">
{result.boxes.length} Box(en)
</span>
{result.layout_regions && result.layout_regions.length > 0 && (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-indigo-50 dark:bg-indigo-900/20 text-indigo-700 dark:text-indigo-400 text-xs font-medium">
{result.layout_regions.length} Layout-Region(en)
</span>
)}
{result.graphics && result.graphics.length > 0 && (
<span className="inline-flex items-center gap-1.5 px-3 py-1 rounded-full bg-purple-50 dark:bg-purple-900/20 text-purple-700 dark:text-purple-400 text-xs font-medium">
{result.graphics.length} Grafik(en)
@@ -451,6 +573,11 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
</span>
)}
<span className="text-gray-400 text-xs ml-auto">
{result.detection_method && (
<span className="mr-1.5">
{result.detection_method === 'ppdoclayout' ? 'PP-DocLayout' : 'OpenCV'} |
</span>
)}
{result.image_width}x{result.image_height}px | {result.duration_seconds}s
</span>
</div>
@@ -491,6 +618,37 @@ export function StepStructureDetection({ sessionId, onNext }: StepStructureDetec
</div>
)}
{/* PP-DocLayout regions detail */}
{result.layout_regions && result.layout_regions.length > 0 && (
<div>
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
PP-DocLayout Regionen ({result.layout_regions.length})
</h4>
<div className="space-y-1.5">
{result.layout_regions.map((region, i) => {
const color = getDocLayoutColor(region.class_name)
return (
<div key={i} className="flex items-center gap-3 text-xs">
<span
className="w-3 h-3 rounded-sm flex-shrink-0 border"
style={{ backgroundColor: `${color}40`, borderColor: color }}
/>
<span className="text-gray-600 dark:text-gray-400 font-medium min-w-[60px]">
{region.class_name}
</span>
<span className="font-mono text-gray-500">
{region.w}x{region.h}px @ ({region.x}, {region.y})
</span>
<span className="text-gray-400">
{Math.round(region.confidence * 100)}%
</span>
</div>
)
})}
</div>
</div>
)}
{/* Zones detail */}
<div>
<h4 className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">Seitenzonen</h4>