Cell-First OCR (v2): Each cell is cropped and OCR'd in isolation, eliminating neighbour bleeding (e.g. "to", "ps" in marker columns). Uses ThreadPoolExecutor for parallel Tesseract calls. Document type detection: Classifies pages as vocab_table, full_text, or generic_table using projection profiles (<2s, no OCR needed). Frontend dynamically skips columns/rows steps for full-text pages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
116 lines
4.9 KiB
TypeScript
116 lines
4.9 KiB
TypeScript
'use client'
|
|
|
|
import { PipelineStep, DocumentTypeResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
|
|
|
const DOC_TYPE_LABELS: Record<string, string> = {
|
|
vocab_table: 'Vokabeltabelle',
|
|
full_text: 'Volltext',
|
|
generic_table: 'Tabelle',
|
|
}
|
|
|
|
interface PipelineStepperProps {
|
|
steps: PipelineStep[]
|
|
currentStep: number
|
|
onStepClick: (index: number) => void
|
|
onReprocess?: (index: number) => void
|
|
docTypeResult?: DocumentTypeResult | null
|
|
onDocTypeChange?: (docType: DocumentTypeResult['doc_type']) => void
|
|
}
|
|
|
|
export function PipelineStepper({
|
|
steps,
|
|
currentStep,
|
|
onStepClick,
|
|
onReprocess,
|
|
docTypeResult,
|
|
onDocTypeChange,
|
|
}: PipelineStepperProps) {
|
|
return (
|
|
<div className="space-y-2">
|
|
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
|
{steps.map((step, index) => {
|
|
const isActive = index === currentStep
|
|
const isCompleted = step.status === 'completed'
|
|
const isFailed = step.status === 'failed'
|
|
const isSkipped = step.status === 'skipped'
|
|
const isClickable = (index <= currentStep || isCompleted) && !isSkipped
|
|
|
|
return (
|
|
<div key={step.id} className="flex items-center">
|
|
{index > 0 && (
|
|
<div
|
|
className={`h-0.5 w-8 mx-1 ${
|
|
isSkipped
|
|
? 'bg-gray-200 dark:bg-gray-700 border-t border-dashed border-gray-400'
|
|
: index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
|
}`}
|
|
/>
|
|
)}
|
|
<div className="relative group">
|
|
<button
|
|
onClick={() => isClickable && onStepClick(index)}
|
|
disabled={!isClickable}
|
|
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
|
isSkipped
|
|
? 'bg-gray-100 text-gray-400 dark:bg-gray-800 dark:text-gray-600 line-through'
|
|
: isActive
|
|
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
|
: isCompleted
|
|
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
|
: isFailed
|
|
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
|
: 'text-gray-400 dark:text-gray-500'
|
|
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
|
>
|
|
<span className="text-base">
|
|
{isSkipped ? '-' : isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
|
</span>
|
|
<span className="hidden sm:inline">{step.name}</span>
|
|
<span className="sm:hidden">{index + 1}</span>
|
|
</button>
|
|
{/* Reprocess button — shown on completed steps on hover */}
|
|
{isCompleted && onReprocess && (
|
|
<button
|
|
onClick={(e) => { e.stopPropagation(); onReprocess(index) }}
|
|
className="absolute -top-1 -right-1 w-4 h-4 bg-orange-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
|
title={`Ab hier neu verarbeiten`}
|
|
>
|
|
↻
|
|
</button>
|
|
)}
|
|
</div>
|
|
</div>
|
|
)
|
|
})}
|
|
</div>
|
|
|
|
{/* Document type badge */}
|
|
{docTypeResult && (
|
|
<div className="flex items-center gap-2 px-4 py-2 bg-blue-50 dark:bg-blue-900/20 rounded-lg border border-blue-200 dark:border-blue-800 text-sm">
|
|
<span className="text-blue-600 dark:text-blue-400 font-medium">
|
|
Dokumenttyp:
|
|
</span>
|
|
{onDocTypeChange ? (
|
|
<select
|
|
value={docTypeResult.doc_type}
|
|
onChange={(e) => onDocTypeChange(e.target.value as DocumentTypeResult['doc_type'])}
|
|
className="bg-white dark:bg-gray-800 border border-blue-300 dark:border-blue-700 rounded px-2 py-0.5 text-sm text-blue-700 dark:text-blue-300"
|
|
>
|
|
<option value="vocab_table">Vokabeltabelle</option>
|
|
<option value="generic_table">Tabelle (generisch)</option>
|
|
<option value="full_text">Volltext</option>
|
|
</select>
|
|
) : (
|
|
<span className="text-blue-700 dark:text-blue-300">
|
|
{DOC_TYPE_LABELS[docTypeResult.doc_type] || docTypeResult.doc_type}
|
|
</span>
|
|
)}
|
|
<span className="text-blue-400 dark:text-blue-500 text-xs">
|
|
({Math.round(docTypeResult.confidence * 100)}% Konfidenz)
|
|
</span>
|
|
</div>
|
|
)}
|
|
</div>
|
|
)
|
|
}
|