feat: cell-first OCR + document type detection + dynamic pipeline steps
Cell-First OCR (v2): Each cell is cropped and OCR'd in isolation, eliminating neighbour bleeding (e.g. "to", "ps" in marker columns). Uses ThreadPoolExecutor for parallel Tesseract calls. Document type detection: Classifies pages as vocab_table, full_text, or generic_table using projection profiles (<2s, no OCR needed). Frontend dynamically skips columns/rows steps for full-text pages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,7 @@ import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecogniti
|
||||
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
|
||||
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
||||
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
||||
import { PIPELINE_STEPS, type PipelineStep, type SessionListItem } from './types'
|
||||
import { PIPELINE_STEPS, type PipelineStep, type SessionListItem, type DocumentTypeResult } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
@@ -23,6 +23,7 @@ export default function OcrPipelinePage() {
|
||||
const [loadingSessions, setLoadingSessions] = useState(true)
|
||||
const [editingName, setEditingName] = useState<string | null>(null)
|
||||
const [editNameValue, setEditNameValue] = useState('')
|
||||
const [docTypeResult, setDocTypeResult] = useState<DocumentTypeResult | null>(null)
|
||||
const [steps, setSteps] = useState<PipelineStep[]>(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
@@ -59,16 +60,23 @@ export default function OcrPipelinePage() {
|
||||
setSessionId(sid)
|
||||
setSessionName(data.name || data.filename || '')
|
||||
|
||||
// Restore doc type result if available
|
||||
const savedDocType: DocumentTypeResult | null = data.doc_type_result || null
|
||||
setDocTypeResult(savedDocType)
|
||||
|
||||
// Determine which step to jump to based on current_step
|
||||
const dbStep = data.current_step || 1
|
||||
// Steps: 1=deskew, 2=dewarp, 3=columns, ...
|
||||
// UI steps are 0-indexed: 0=deskew, 1=dewarp, 2=columns, ...
|
||||
const uiStep = Math.max(0, dbStep - 1)
|
||||
const skipSteps = savedDocType?.skip_steps || []
|
||||
|
||||
setSteps(
|
||||
PIPELINE_STEPS.map((s, i) => ({
|
||||
...s,
|
||||
status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
status: skipSteps.includes(s.id)
|
||||
? 'skipped'
|
||||
: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(uiStep)
|
||||
@@ -84,6 +92,7 @@ export default function OcrPipelinePage() {
|
||||
if (sessionId === sid) {
|
||||
setSessionId(null)
|
||||
setCurrentStep(0)
|
||||
setDocTypeResult(null)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
} catch (e) {
|
||||
@@ -123,16 +132,28 @@ export default function OcrPipelinePage() {
|
||||
}
|
||||
|
||||
const handleNext = () => {
|
||||
if (currentStep < steps.length - 1) {
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => {
|
||||
if (i === currentStep) return { ...s, status: 'completed' }
|
||||
if (i === currentStep + 1) return { ...s, status: 'active' }
|
||||
return s
|
||||
}),
|
||||
)
|
||||
setCurrentStep((prev) => prev + 1)
|
||||
if (currentStep >= steps.length - 1) return
|
||||
|
||||
// Find the next non-skipped step
|
||||
const skipSteps = docTypeResult?.skip_steps || []
|
||||
let nextStep = currentStep + 1
|
||||
while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) {
|
||||
nextStep++
|
||||
}
|
||||
if (nextStep >= steps.length) nextStep = steps.length - 1
|
||||
|
||||
setSteps((prev) =>
|
||||
prev.map((s, i) => {
|
||||
if (i === currentStep) return { ...s, status: 'completed' }
|
||||
if (i === nextStep) return { ...s, status: 'active' }
|
||||
// Mark skipped steps between current and next
|
||||
if (i > currentStep && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) {
|
||||
return { ...s, status: 'skipped' }
|
||||
}
|
||||
return s
|
||||
}),
|
||||
)
|
||||
setCurrentStep(nextStep)
|
||||
}
|
||||
|
||||
const handleDeskewComplete = (sid: string) => {
|
||||
@@ -142,10 +163,69 @@ export default function OcrPipelinePage() {
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleDewarpNext = async () => {
|
||||
// Auto-detect document type after dewarp, then advance
|
||||
if (sessionId) {
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/detect-type`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (res.ok) {
|
||||
const data: DocumentTypeResult = await res.json()
|
||||
setDocTypeResult(data)
|
||||
|
||||
// Mark skipped steps immediately
|
||||
const skipSteps = data.skip_steps || []
|
||||
if (skipSteps.length > 0) {
|
||||
setSteps((prev) =>
|
||||
prev.map((s) =>
|
||||
skipSteps.includes(s.id) ? { ...s, status: 'skipped' } : s,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Doc type detection failed:', e)
|
||||
// Not critical — continue without it
|
||||
}
|
||||
}
|
||||
handleNext()
|
||||
}
|
||||
|
||||
const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => {
|
||||
if (!docTypeResult) return
|
||||
|
||||
// Build new skip_steps based on doc type
|
||||
let skipSteps: string[] = []
|
||||
if (newDocType === 'full_text') {
|
||||
skipSteps = ['columns', 'rows']
|
||||
}
|
||||
// vocab_table and generic_table: no skips
|
||||
|
||||
const updated: DocumentTypeResult = {
|
||||
...docTypeResult,
|
||||
doc_type: newDocType,
|
||||
skip_steps: skipSteps,
|
||||
pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first',
|
||||
}
|
||||
setDocTypeResult(updated)
|
||||
|
||||
// Update step statuses
|
||||
setSteps((prev) =>
|
||||
prev.map((s) => {
|
||||
if (skipSteps.includes(s.id)) return { ...s, status: 'skipped' as const }
|
||||
if (s.status === 'skipped') return { ...s, status: 'pending' as const }
|
||||
return s
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
const handleNewSession = () => {
|
||||
setSessionId(null)
|
||||
setSessionName('')
|
||||
setCurrentStep(0)
|
||||
setDocTypeResult(null)
|
||||
setSteps(PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
|
||||
}
|
||||
|
||||
@@ -188,7 +268,7 @@ export default function OcrPipelinePage() {
|
||||
case 0:
|
||||
return <StepDeskew sessionId={sessionId} onNext={handleDeskewComplete} />
|
||||
case 1:
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleNext} />
|
||||
return <StepDewarp sessionId={sessionId} onNext={handleDewarpNext} />
|
||||
case 2:
|
||||
return <StepColumnDetection sessionId={sessionId} onNext={handleNext} />
|
||||
case 3:
|
||||
@@ -314,7 +394,14 @@ export default function OcrPipelinePage() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
<PipelineStepper steps={steps} currentStep={currentStep} onStepClick={handleStepClick} onReprocess={sessionId ? reprocessFromStep : undefined} />
|
||||
<PipelineStepper
|
||||
steps={steps}
|
||||
currentStep={currentStep}
|
||||
onStepClick={handleStepClick}
|
||||
onReprocess={sessionId ? reprocessFromStep : undefined}
|
||||
docTypeResult={docTypeResult}
|
||||
onDocTypeChange={handleDocTypeChange}
|
||||
/>
|
||||
|
||||
<div className="min-h-[400px]">{renderStep()}</div>
|
||||
</div>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed'
|
||||
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed' | 'skipped'
|
||||
|
||||
export interface PipelineStep {
|
||||
id: string
|
||||
@@ -17,6 +17,15 @@ export interface SessionListItem {
|
||||
updated_at?: string
|
||||
}
|
||||
|
||||
export interface DocumentTypeResult {
|
||||
doc_type: 'vocab_table' | 'full_text' | 'generic_table'
|
||||
confidence: number
|
||||
pipeline: 'cell_first' | 'full_page'
|
||||
skip_steps: string[]
|
||||
features?: Record<string, unknown>
|
||||
duration_seconds?: number
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
session_id: string
|
||||
filename: string
|
||||
@@ -30,6 +39,7 @@ export interface SessionInfo {
|
||||
column_result?: ColumnResult
|
||||
row_result?: RowResult
|
||||
word_result?: GridResult
|
||||
doc_type_result?: DocumentTypeResult
|
||||
}
|
||||
|
||||
export interface DeskewResult {
|
||||
|
||||
@@ -1,66 +1,115 @@
|
||||
'use client'
|
||||
|
||||
import { PipelineStep } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
import { PipelineStep, DocumentTypeResult } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
const DOC_TYPE_LABELS: Record<string, string> = {
|
||||
vocab_table: 'Vokabeltabelle',
|
||||
full_text: 'Volltext',
|
||||
generic_table: 'Tabelle',
|
||||
}
|
||||
|
||||
interface PipelineStepperProps {
|
||||
steps: PipelineStep[]
|
||||
currentStep: number
|
||||
onStepClick: (index: number) => void
|
||||
onReprocess?: (index: number) => void
|
||||
docTypeResult?: DocumentTypeResult | null
|
||||
onDocTypeChange?: (docType: DocumentTypeResult['doc_type']) => void
|
||||
}
|
||||
|
||||
export function PipelineStepper({ steps, currentStep, onStepClick, onReprocess }: PipelineStepperProps) {
|
||||
export function PipelineStepper({
|
||||
steps,
|
||||
currentStep,
|
||||
onStepClick,
|
||||
onReprocess,
|
||||
docTypeResult,
|
||||
onDocTypeChange,
|
||||
}: PipelineStepperProps) {
|
||||
return (
|
||||
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
{steps.map((step, index) => {
|
||||
const isActive = index === currentStep
|
||||
const isCompleted = step.status === 'completed'
|
||||
const isFailed = step.status === 'failed'
|
||||
const isClickable = index <= currentStep || isCompleted
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center justify-between px-4 py-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
{steps.map((step, index) => {
|
||||
const isActive = index === currentStep
|
||||
const isCompleted = step.status === 'completed'
|
||||
const isFailed = step.status === 'failed'
|
||||
const isSkipped = step.status === 'skipped'
|
||||
const isClickable = (index <= currentStep || isCompleted) && !isSkipped
|
||||
|
||||
return (
|
||||
<div key={step.id} className="flex items-center">
|
||||
{index > 0 && (
|
||||
<div
|
||||
className={`h-0.5 w-8 mx-1 ${
|
||||
index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
<div className="relative group">
|
||||
<button
|
||||
onClick={() => isClickable && onStepClick(index)}
|
||||
disabled={!isClickable}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
||||
isActive
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||
: isCompleted
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||
: isFailed
|
||||
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||
: 'text-gray-400 dark:text-gray-500'
|
||||
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||
>
|
||||
<span className="text-base">
|
||||
{isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
||||
</span>
|
||||
<span className="hidden sm:inline">{step.name}</span>
|
||||
<span className="sm:hidden">{index + 1}</span>
|
||||
</button>
|
||||
{/* Reprocess button — shown on completed steps on hover */}
|
||||
{isCompleted && onReprocess && (
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); onReprocess(index) }}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-orange-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title={`Ab hier neu verarbeiten`}
|
||||
>
|
||||
↻
|
||||
</button>
|
||||
return (
|
||||
<div key={step.id} className="flex items-center">
|
||||
{index > 0 && (
|
||||
<div
|
||||
className={`h-0.5 w-8 mx-1 ${
|
||||
isSkipped
|
||||
? 'bg-gray-200 dark:bg-gray-700 border-t border-dashed border-gray-400'
|
||||
: index <= currentStep ? 'bg-teal-400' : 'bg-gray-300 dark:bg-gray-600'
|
||||
}`}
|
||||
/>
|
||||
)}
|
||||
<div className="relative group">
|
||||
<button
|
||||
onClick={() => isClickable && onStepClick(index)}
|
||||
disabled={!isClickable}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium transition-all ${
|
||||
isSkipped
|
||||
? 'bg-gray-100 text-gray-400 dark:bg-gray-800 dark:text-gray-600 line-through'
|
||||
: isActive
|
||||
? 'bg-teal-100 text-teal-700 dark:bg-teal-900/40 dark:text-teal-300 ring-2 ring-teal-400'
|
||||
: isCompleted
|
||||
? 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300'
|
||||
: isFailed
|
||||
? 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300'
|
||||
: 'text-gray-400 dark:text-gray-500'
|
||||
} ${isClickable ? 'cursor-pointer hover:opacity-80' : 'cursor-default'}`}
|
||||
>
|
||||
<span className="text-base">
|
||||
{isSkipped ? '-' : isCompleted ? '\u2713' : isFailed ? '\u2717' : step.icon}
|
||||
</span>
|
||||
<span className="hidden sm:inline">{step.name}</span>
|
||||
<span className="sm:hidden">{index + 1}</span>
|
||||
</button>
|
||||
{/* Reprocess button — shown on completed steps on hover */}
|
||||
{isCompleted && onReprocess && (
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); onReprocess(index) }}
|
||||
className="absolute -top-1 -right-1 w-4 h-4 bg-orange-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||
title={`Ab hier neu verarbeiten`}
|
||||
>
|
||||
↻
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Document type badge */}
|
||||
{docTypeResult && (
|
||||
<div className="flex items-center gap-2 px-4 py-2 bg-blue-50 dark:bg-blue-900/20 rounded-lg border border-blue-200 dark:border-blue-800 text-sm">
|
||||
<span className="text-blue-600 dark:text-blue-400 font-medium">
|
||||
Dokumenttyp:
|
||||
</span>
|
||||
{onDocTypeChange ? (
|
||||
<select
|
||||
value={docTypeResult.doc_type}
|
||||
onChange={(e) => onDocTypeChange(e.target.value as DocumentTypeResult['doc_type'])}
|
||||
className="bg-white dark:bg-gray-800 border border-blue-300 dark:border-blue-700 rounded px-2 py-0.5 text-sm text-blue-700 dark:text-blue-300"
|
||||
>
|
||||
<option value="vocab_table">Vokabeltabelle</option>
|
||||
<option value="generic_table">Tabelle (generisch)</option>
|
||||
<option value="full_text">Volltext</option>
|
||||
</select>
|
||||
) : (
|
||||
<span className="text-blue-700 dark:text-blue-300">
|
||||
{DOC_TYPE_LABELS[docTypeResult.doc_type] || docTypeResult.doc_type}
|
||||
</span>
|
||||
)}
|
||||
<span className="text-blue-400 dark:text-blue-500 text-xs">
|
||||
({Math.round(docTypeResult.confidence * 100)}% Konfidenz)
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user