feat: cell-first OCR + document type detection + dynamic pipeline steps
Cell-First OCR (v2): Each cell is cropped and OCR'd in isolation, eliminating neighbour bleeding (e.g. "to", "ps" in marker columns). Uses ThreadPoolExecutor for parallel Tesseract calls. Document type detection: Classifies pages as vocab_table, full_text, or generic_table using projection profiles (<2s, no OCR needed). Frontend dynamically skips columns/rows steps for full-text pages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,66 +1,115 @@
|
||||
'use client'
|
||||
|
||||
import { PipelineStep } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { PipelineStep, DocumentTypeResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const DOC_TYPE_LABELS: Record<string, string> = {
|
||||
vocab_table: 'Vokabeltabelle',
|
||||
full_text: 'Volltext',
|
||||
generic_table: 'Tabelle',
|
||||
}
|
||||
|
||||
interface PipelineStepperProps {
|
||||
steps: PipelineStep[]
|
||||
currentStep: number
|
||||
onStepClick: (index: number) => void
|
||||
onReprocess?: (index: number) => void
|
||||
docTypeResult?: DocumentTypeResult | null
|
||||
onDocTypeChange?: (docType: DocumentTypeResult['doc_type']) => void
|
||||
}
|
||||
|
||||
export function PipelineStepper({ steps, currentStep, onStepClick, onReprocess }: PipelineStepperProps) {
|
||||
export function PipelineStepper({
|
||||
steps,
|
||||
currentStep,
|
||||
onStepClick,
|
||||
onReprocess,
|
||||
docTypeResult,
|
||||
onDocTypeChange,
|
||||
}: PipelineStepperProps) {
|
||||
return (
|
||||
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
{steps.map((step, index) => {
|
||||
const isActive = index === currentStep
|
||||
const isCompleted = step.status === 'completed'
|
||||
const isFailed = step.status === 'failed'
|
||||
const isClickable = index <= currentStep || isCompleted
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
{steps.map((step, index) => {
|
||||
const isActive = index === currentStep
|
||||
const isCompleted = step.status === 'completed'
|
||||
const isFailed = step.status === 'failed'
|
||||
const isSkipped = step.status === 'skipped'
|
||||
const isClickable = (index <= currentStep || isCompleted) && !isSkipped
|
||||
|
||||
return (
|
||||
<div key={step.id} className="flex items-center">
|
||||
{index > 0 && (
|
||||
<div
|
||||
className={`h-0.5 w-8 mx-1 ${
|
||||
index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
<div className="relative group">
|
||||
<button
|
||||
onClick={() => isClickable && onStepClick(index)}
|
||||
disabled={!isClickable}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
||||
isActive
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||
: isCompleted
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||
: isFailed
|
||||
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||
: 'text-gray-400 dark:text-gray-500'
|
||||
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||
>
|
||||
<span className="text-base">
|
||||
{isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
||||
</span>
|
||||
<span className="hidden sm:inline">{step.name}</span>
|
||||
<span className="sm:hidden">{index + 1}</span>
|
||||
</button>
|
||||
{/* Reprocess button — shown on completed steps on hover */}
|
||||
{isCompleted && onReprocess && (
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); onReprocess(index) }}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-orange-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title={`Ab hier neu verarbeiten`}
|
||||
>
|
||||
↻
|
||||
</button>
|
||||
return (
|
||||
<div key={step.id} className="flex items-center">
|
||||
{index > 0 && (
|
||||
<div
|
||||
className={`h-0.5 w-8 mx-1 ${
|
||||
isSkipped
|
||||
? 'bg-gray-200 dark:bg-gray-700 border-t border-dashed border-gray-400'
|
||||
: index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
<div className="relative group">
|
||||
<button
|
||||
onClick={() => isClickable && onStepClick(index)}
|
||||
disabled={!isClickable}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
||||
isSkipped
|
||||
? 'bg-gray-100 text-gray-400 dark:bg-gray-800 dark:text-gray-600 line-through'
|
||||
: isActive
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||
: isCompleted
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||
: isFailed
|
||||
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||
: 'text-gray-400 dark:text-gray-500'
|
||||
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||
>
|
||||
<span className="text-base">
|
||||
{isSkipped ? '-' : isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
||||
</span>
|
||||
<span className="hidden sm:inline">{step.name}</span>
|
||||
<span className="sm:hidden">{index + 1}</span>
|
||||
</button>
|
||||
{/* Reprocess button — shown on completed steps on hover */}
|
||||
{isCompleted && onReprocess && (
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); onReprocess(index) }}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-orange-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title={`Ab hier neu verarbeiten`}
|
||||
>
|
||||
↻
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Document type badge */}
|
||||
{docTypeResult && (
|
||||
<div className="flex items-center gap-2 px-4 py-2 bg-blue-50 dark:bg-blue-900/20 rounded-lg border border-blue-200 dark:border-blue-800 text-sm">
|
||||
<span className="text-blue-600 dark:text-blue-400 font-medium">
|
||||
Dokumenttyp:
|
||||
</span>
|
||||
{onDocTypeChange ? (
|
||||
<select
|
||||
value={docTypeResult.doc_type}
|
||||
onChange={(e) => onDocTypeChange(e.target.value as DocumentTypeResult['doc_type'])}
|
||||
className="bg-white dark:bg-gray-800 border border-blue-300 dark:border-blue-700 rounded px-2 py-0.5 text-sm text-blue-700 dark:text-blue-300"
|
||||
>
|
||||
<option value="vocab_table">Vokabeltabelle</option>
|
||||
<option value="generic_table">Tabelle (generisch)</option>
|
||||
<option value="full_text">Volltext</option>
|
||||
</select>
|
||||
) : (
|
||||
<span className="text-blue-700 dark:text-blue-300">
|
||||
{DOC_TYPE_LABELS[docTypeResult.doc_type] || docTypeResult.doc_type}
|
||||
</span>
|
||||
)}
|
||||
<span className="text-blue-400 dark:text-blue-500 text-xs">
|
||||
({Math.round(docTypeResult.confidence * 100)}% Konfidenz)
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user