fix: migrate ocr-pipeline types to ocr-kombi after page deletion

Types from deleted ocr-pipeline/types.ts inlined into ocr-kombi/types.ts.
All imports updated across components/ocr-kombi/ and components/ocr-pipeline/.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-23 14:22:09 +02:00
parent f39cbe9283
commit 5a154b744d
26 changed files with 249 additions and 107 deletions

View File

@@ -1,33 +1,210 @@
import type { PipelineStep, PipelineStepStatus, DocumentCategory } from '../ocr-pipeline/types'
// OCR Pipeline Types — migrated from deleted ocr-pipeline/types.ts
// Re-export shared types
export type { PipelineStep, PipelineStepStatus, DocumentCategory }
export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types'
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed' | 'skipped'
// Re-export grid/structure types used by later steps
export type {
SessionListItem,
SessionInfo,
OrientationResult,
CropResult,
DeskewResult,
DewarpResult,
GridResult,
GridCell,
OcrWordBox,
WordBbox,
ColumnMeta,
StructureResult,
StructureBox,
StructureZone,
StructureGraphic,
ExcludeRegion,
} from '../ocr-pipeline/types'
export interface PipelineStep {
id: string
name: string
icon: string
status: PipelineStepStatus
}
export type DocumentCategory =
| 'vokabelseite' | 'woerterbuch' | 'buchseite' | 'arbeitsblatt' | 'klausurseite'
| 'mathearbeit' | 'statistik' | 'zeitung' | 'formular' | 'handschrift' | 'sonstiges'
export const DOCUMENT_CATEGORIES: { value: DocumentCategory; label: string; icon: string }[] = [
{ value: 'vokabelseite', label: 'Vokabelseite', icon: '📖' },
{ value: 'woerterbuch', label: 'Woerterbuch', icon: '📕' },
{ value: 'buchseite', label: 'Buchseite', icon: '📚' },
{ value: 'arbeitsblatt', label: 'Arbeitsblatt', icon: '📝' },
{ value: 'klausurseite', label: 'Klausurseite', icon: '📄' },
{ value: 'mathearbeit', label: 'Mathearbeit', icon: '🔢' },
{ value: 'statistik', label: 'Statistik', icon: '📊' },
{ value: 'zeitung', label: 'Zeitung', icon: '📰' },
{ value: 'formular', label: 'Formular', icon: '📋' },
{ value: 'handschrift', label: 'Handschrift', icon: '✍️' },
{ value: 'sonstiges', label: 'Sonstiges', icon: '📎' },
]
export interface SessionListItem {
id: string
name: string
filename: string
status: string
current_step: number
document_category?: DocumentCategory
doc_type?: string
parent_session_id?: string
document_group_id?: string
page_number?: number
is_ground_truth?: boolean
created_at: string
updated_at?: string
}
export interface SubSession {
id: string
name: string
box_index: number
current_step?: number
status?: string
}
export interface OrientationResult {
orientation_degrees: number
corrected: boolean
duration_seconds: number
}
export interface CropResult {
crop_applied: boolean
crop_rect?: { x: number; y: number; width: number; height: number }
crop_rect_pct?: { x: number; y: number; width: number; height: number }
original_size: { width: number; height: number }
cropped_size: { width: number; height: number }
detected_format?: string
format_confidence?: number
aspect_ratio?: number
border_fractions?: { top: number; bottom: number; left: number; right: number }
skipped?: boolean
duration_seconds?: number
}
export interface DeskewResult {
session_id: string
angle_hough: number
angle_word_alignment: number
angle_iterative?: number
angle_residual?: number
angle_textline?: number
angle_applied: number
method_used: 'hough' | 'word_alignment' | 'manual' | 'iterative' | 'two_pass' | 'three_pass' | 'manual_combined'
confidence: number
duration_seconds: number
deskewed_image_url: string
binarized_image_url: string
}
export interface DewarpDetection {
method: string
shear_degrees: number
confidence: number
}
export interface DewarpResult {
session_id: string
method_used: string
shear_degrees: number
confidence: number
duration_seconds: number
dewarped_image_url: string
detections?: DewarpDetection[]
}
export interface SessionInfo {
session_id: string
filename: string
name?: string
image_width: number
image_height: number
original_image_url: string
current_step?: number
document_category?: DocumentCategory
doc_type?: string
orientation_result?: OrientationResult
crop_result?: CropResult
deskew_result?: DeskewResult
dewarp_result?: DewarpResult
sub_sessions?: SubSession[]
parent_session_id?: string
box_index?: number
document_group_id?: string
page_number?: number
}
export interface StructureGraphic {
x: number; y: number; w: number; h: number
area: number; shape: string; color_name: string; color_hex: string; confidence: number
}
export interface ExcludeRegion {
x: number; y: number; w: number; h: number; label?: string
}
export interface StructureBox {
x: number; y: number; w: number; h: number
confidence: number; border_thickness: number
bg_color_name?: string; bg_color_hex?: string
}
export interface StructureZone {
index: number; zone_type: 'content' | 'box'
x: number; y: number; w: number; h: number
}
export interface DocLayoutRegion {
x: number; y: number; w: number; h: number
class_name: string; confidence: number
}
export interface StructureResult {
image_width: number; image_height: number
content_bounds: { x: number; y: number; w: number; h: number }
boxes: StructureBox[]; zones: StructureZone[]
graphics: StructureGraphic[]; exclude_regions?: ExcludeRegion[]
color_pixel_counts: Record<string, number>
has_words: boolean; word_count: number
border_ghosts_removed?: number; duration_seconds: number
layout_regions?: DocLayoutRegion[]
detection_method?: 'opencv' | 'ppdoclayout'
}
export interface WordBbox { x: number; y: number; w: number; h: number }
export interface OcrWordBox {
text: string; left: number; top: number; width: number; height: number; conf: number
color?: string; color_name?: string; recovered?: boolean
}
export interface ColumnMeta { index: number; type: string; x: number; width: number }
export interface GridCell {
cell_id: string; row_index: number; col_index: number; col_type: string
text: string; confidence: number; bbox_px: WordBbox; bbox_pct: WordBbox
ocr_engine?: string; is_bold?: boolean
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
word_boxes?: OcrWordBox[]
}
export interface WordEntry {
row_index: number; english: string; german: string; example: string
source_page?: string; marker?: string; confidence: number
bbox: WordBbox; bbox_en: WordBbox | null; bbox_de: WordBbox | null; bbox_ex: WordBbox | null
bbox_ref?: WordBbox | null; bbox_marker?: WordBbox | null
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
}
export interface GridResult {
cells: GridCell[]
grid_shape: { rows: number; cols: number; total_cells: number }
columns_used: ColumnMeta[]
layout: 'vocab' | 'generic'
image_width: number; image_height: number; duration_seconds: number
ocr_engine?: string; vocab_entries?: WordEntry[]; entries?: WordEntry[]; entry_count?: number
summary: {
total_cells: number; non_empty_cells: number; low_confidence: number
total_entries?: number; with_english?: number; with_german?: number
}
llm_review?: {
changes: { row_index: number; field: string; old: string; new: string }[]
model_used: string; duration_ms: number; entries_corrected: number
applied_count?: number; applied_at?: string
}
}
// --- Kombi V2 Pipeline ---
/**
* 11-step Kombi V2 pipeline.
* Each step has its own component file in components/ocr-kombi/.
*/
export const KOMBI_V2_STEPS: PipelineStep[] = [
{ id: 'upload', name: 'Upload', icon: '📤', status: 'pending' },
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
@@ -45,67 +222,37 @@ export const KOMBI_V2_STEPS: PipelineStep[] = [
{ id: 'ground-truth', name: 'Ground Truth', icon: '✅', status: 'pending' },
]
/** Map from Kombi V2 UI step index to DB step number */
export const KOMBI_V2_UI_TO_DB: Record<number, number> = {
0: 1, // upload
1: 2, // orientation
2: 2, // page-split (same DB step as orientation)
3: 3, // deskew
4: 4, // dewarp
5: 5, // content-crop
6: 8, // ocr (word_result)
7: 9, // structure
8: 10, // grid-build
9: 11, // grid-review
10: 11, // gutter-repair (shares DB step with grid-review)
11: 11, // box-review (shares DB step with grid-review)
12: 11, // ansicht (shares DB step with grid-review)
13: 12, // ground-truth
0: 1, 1: 2, 2: 2, 3: 3, 4: 4, 5: 5, 6: 8, 7: 9, 8: 10, 9: 11, 10: 11, 11: 11, 12: 11, 13: 12,
}
/** Map from DB step to Kombi V2 UI step index */
export function dbStepToKombiV2Ui(dbStep: number): number {
if (dbStep <= 1) return 0 // upload
if (dbStep === 2) return 1 // orientation
if (dbStep === 3) return 3 // deskew
if (dbStep === 4) return 4 // dewarp
if (dbStep === 5) return 5 // content-crop
if (dbStep <= 8) return 6 // ocr
if (dbStep === 9) return 7 // structure
if (dbStep === 10) return 8 // grid-build
if (dbStep === 11) return 9 // grid-review
return 13 // ground-truth
if (dbStep <= 1) return 0
if (dbStep === 2) return 1
if (dbStep === 3) return 3
if (dbStep === 4) return 4
if (dbStep === 5) return 5
if (dbStep <= 8) return 6
if (dbStep === 9) return 7
if (dbStep === 10) return 8
if (dbStep === 11) return 9
return 13
}
/** Document group: groups multiple sessions from a multi-page upload */
export interface DocumentGroup {
group_id: string
title: string
page_count: number
sessions: DocumentGroupSession[]
group_id: string; title: string; page_count: number; sessions: DocumentGroupSession[]
}
export interface DocumentGroupSession {
id: string
name: string
page_number: number
current_step: number
status: string
document_category?: DocumentCategory
created_at: string
id: string; name: string; page_number: number; current_step: number
status: string; document_category?: DocumentCategory; created_at: string
}
/** Engine source for OCR transparency */
export type OcrEngineSource = 'both' | 'paddle_only' | 'tesseract_only' | 'conflict_paddle' | 'conflict_tesseract'
export interface OcrTransparentWord {
text: string
left: number
top: number
width: number
height: number
conf: number
engine_source: OcrEngineSource
text: string; left: number; top: number; width: number; height: number
conf: number; engine_source: OcrEngineSource
}
export interface OcrTransparentResult {
@@ -113,11 +260,7 @@ export interface OcrTransparentResult {
raw_paddle: { words: OcrTransparentWord[] }
merged: { words: OcrTransparentWord[] }
stats: {
total_words: number
both_agree: number
paddle_only: number
tesseract_only: number
conflict_paddle_wins: number
conflict_tesseract_wins: number
total_words: number; both_agree: number; paddle_only: number
tesseract_only: number; conflict_paddle_wins: number; conflict_tesseract_wins: number
}
}

View File

@@ -2,9 +2,8 @@
import { useCallback, useEffect, useState, useRef } from 'react'
import { useSearchParams } from 'next/navigation'
import type { PipelineStep, DocumentCategory } from './types'
import type { PipelineStep, DocumentCategory, SessionListItem } from './types'
import { KOMBI_V2_STEPS, dbStepToKombiV2Ui } from './types'
import type { SessionListItem } from '../ocr-pipeline/types'
export type { SessionListItem }

View File

@@ -1,4 +1,4 @@
import type { OcrWordBox } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { OcrWordBox } from '@/app/(admin)/ai/ocr-kombi/types'
// Re-export for convenience
export type { OcrWordBox }

View File

@@ -1,6 +1,6 @@
'use client'
import type { PipelineStep } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { PipelineStep } from '@/app/(admin)/ai/ocr-kombi/types'
interface KombiStepperProps {
steps: PipelineStep[]

View File

@@ -1,7 +1,7 @@
'use client'
import { useState } from 'react'
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-kombi/types'
interface SessionHeaderProps {
sessionName: string

View File

@@ -1,7 +1,7 @@
'use client'
import { useState } from 'react'
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-kombi/types'
import type { SessionListItem, DocumentGroupView } from '@/app/(admin)/ai/ocr-kombi/useKombiPipeline'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useState, useCallback, useEffect } from 'react'
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-pipeline/types'
import { DOCUMENT_CATEGORIES, type DocumentCategory } from '@/app/(admin)/ai/ocr-kombi/types'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,6 +1,6 @@
'use client'
import type { SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { SubSession } from '@/app/(admin)/ai/ocr-kombi/types'
interface BoxSessionTabsProps {
parentSessionId: string

View File

@@ -1,7 +1,7 @@
'use client'
import { useState, useMemo } from 'react'
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-kombi/types'
interface ColumnControlsProps {
columnResult: ColumnResult | null

View File

@@ -1,7 +1,7 @@
'use client'
import { useState } from 'react'
import type { DeskewResult, DeskewGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { DeskewResult, DeskewGroundTruth } from '@/app/(admin)/ai/ocr-kombi/types'
interface DeskewControlsProps {
deskewResult: DeskewResult | null

View File

@@ -1,7 +1,7 @@
'use client'
import { useEffect, useState } from 'react'
import type { DeskewResult, DewarpResult, DewarpDetection, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { DeskewResult, DewarpResult, DewarpDetection, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-kombi/types'
interface DewarpControlsProps {
dewarpResult: DewarpResult | null

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { GridCell } from '@/app/(admin)/ai/ocr-kombi/types'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { ColumnTypeKey, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { ColumnTypeKey, PageRegion } from '@/app/(admin)/ai/ocr-kombi/types'
const COLUMN_TYPES: { value: ColumnTypeKey; label: string }[] = [
{ value: 'column_en', label: 'EN' },

View File

@@ -1,6 +1,6 @@
'use client'
import { PipelineStep, DocumentTypeResult } from '@/app/(admin)/ai/ocr-pipeline/types'
import { PipelineStep, DocumentTypeResult } from '@/app/(admin)/ai/ocr-kombi/types'
const DOC_TYPE_LABELS: Record<string, string> = {
vocab_table: 'Vokabeltabelle',

View File

@@ -1,10 +1,10 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { ColumnResult, ColumnGroundTruth, PageRegion, SubSession } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { ColumnResult, ColumnGroundTruth, PageRegion, SubSession } from '@/app/(admin)/ai/ocr-kombi/types'
import { ColumnControls } from './ColumnControls'
import { ManualColumnEditor } from './ManualColumnEditor'
import type { ColumnTypeKey } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { ColumnTypeKey } from '@/app/(admin)/ai/ocr-kombi/types'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useEffect, useState } from 'react'
import type { CropResult } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { CropResult } from '@/app/(admin)/ai/ocr-kombi/types'
import { ImageCompareView } from './ImageCompareView'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { DeskewGroundTruth, DeskewResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { DeskewGroundTruth, DeskewResult, SessionInfo } from '@/app/(admin)/ai/ocr-kombi/types'
import { DeskewControls } from './DeskewControls'
import { ImageCompareView } from './ImageCompareView'

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { DeskewResult, DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { DeskewResult, DewarpResult, DewarpGroundTruth } from '@/app/(admin)/ai/ocr-kombi/types'
import { DewarpControls } from './DewarpControls'
import { ImageCompareView } from './ImageCompareView'

View File

@@ -3,8 +3,8 @@
import { useCallback, useEffect, useRef, useState } from 'react'
import type {
GridCell, ColumnMeta, ImageRegion, ImageStyle,
} from '@/app/(admin)/ai/ocr-pipeline/types'
import { IMAGE_STYLES as STYLES } from '@/app/(admin)/ai/ocr-pipeline/types'
} from '@/app/(admin)/ai/ocr-kombi/types'
import { IMAGE_STYLES as STYLES } from '@/app/(admin)/ai/ocr-kombi/types'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-kombi/types'
import { usePixelWordPositions } from './usePixelWordPositions'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { OrientationResult, SessionInfo } from '@/app/(admin)/ai/ocr-kombi/types'
import { ImageCompareView } from './ImageCompareView'
const KLAUSUR_API = '/klausur-api'

View File

@@ -2,7 +2,7 @@
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import dynamic from 'next/dynamic'
import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem, StructureResult, StructureBox, StructureGraphic } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem, StructureResult, StructureBox, StructureGraphic } from '@/app/(admin)/ai/ocr-kombi/types'
import { usePixelWordPositions } from './usePixelWordPositions'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { RowResult, RowGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { RowResult, RowGroundTruth } from '@/app/(admin)/ai/ocr-kombi/types'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { ExcludeRegion, StructureResult } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { ExcludeRegion, StructureResult } from '@/app/(admin)/ai/ocr-kombi/types'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,7 +1,7 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-kombi/types'
const KLAUSUR_API = '/klausur-api'

View File

@@ -1,5 +1,5 @@
import { useEffect, useState } from 'react'
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
import type { GridCell } from '@/app/(admin)/ai/ocr-kombi/types'
export interface WordPosition {
xPct: number