feat: OCR pipeline v2.1 – narrow column OCR, dewarp automation, Fabric.js editor
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m50s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 15s

Proposal B: Adaptive padding, crop upscaling, PSM selection, row-strip re-OCR
for narrow columns (<15% width) – expected accuracy boost 60-70% → 85-90%.

Proposal A: New text-line straightness detector (Method D), quality gate
(rejects counterproductive corrections), 2-pass projection refinement,
higher confidence thresholds – expected manual dewarp reduction to <10%.

Proposal C: Fabric.js canvas editor with drag/drop, inline editing, undo/redo,
opacity slider, zoom, PDF/DOCX export endpoints.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-03 22:44:14 +01:00
parent 970ec1f548
commit ab3ecc7c08
7 changed files with 1105 additions and 128 deletions

View File

@@ -0,0 +1,410 @@
'use client'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
// Column type → colour mapping
const COL_TYPE_COLORS: Record<string, string> = {
column_en: '#3b82f6', // blue-500
column_de: '#22c55e', // green-500
column_example: '#f97316', // orange-500
column_text: '#a855f7', // purple-500
page_ref: '#06b6d4', // cyan-500
column_marker: '#6b7280', // gray-500
}
interface FabricReconstructionCanvasProps {
sessionId: string
cells: GridCell[]
onCellsChanged: (updates: { cell_id: string; text: string }[]) => void
}
// Fabric.js types (subset used here)
interface FabricCanvas {
add: (...objects: FabricObject[]) => FabricCanvas
remove: (...objects: FabricObject[]) => FabricCanvas
setBackgroundImage: (img: FabricImage, callback: () => void) => void
renderAll: () => void
getObjects: () => FabricObject[]
dispose: () => void
on: (event: string, handler: (e: FabricEvent) => void) => void
setWidth: (w: number) => void
setHeight: (h: number) => void
getActiveObject: () => FabricObject | null
discardActiveObject: () => FabricCanvas
requestRenderAll: () => void
setZoom: (z: number) => void
getZoom: () => number
}
interface FabricObject {
type?: string
left?: number
top?: number
width?: number
height?: number
text?: string
set: (props: Record<string, unknown>) => FabricObject
get: (prop: string) => unknown
data?: Record<string, unknown>
selectable?: boolean
on?: (event: string, handler: () => void) => void
setCoords?: () => void
}
interface FabricImage extends FabricObject {
width?: number
height?: number
scaleX?: number
scaleY?: number
}
interface FabricEvent {
target?: FabricObject
e?: MouseEvent
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
type FabricModule = any
export function FabricReconstructionCanvas({
sessionId,
cells,
onCellsChanged,
}: FabricReconstructionCanvasProps) {
const canvasElRef = useRef<HTMLCanvasElement>(null)
const fabricRef = useRef<FabricCanvas | null>(null)
const fabricModuleRef = useRef<FabricModule>(null)
const [ready, setReady] = useState(false)
const [opacity, setOpacity] = useState(30)
const [zoom, setZoom] = useState(100)
const [selectedCell, setSelectedCell] = useState<string | null>(null)
const [error, setError] = useState('')
// Undo/Redo
const undoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
const redoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
// ---- Initialise Fabric.js ----
useEffect(() => {
let disposed = false
async function init() {
try {
const fabricModule = await import('fabric')
if (disposed) return
fabricModuleRef.current = fabricModule
const canvasEl = canvasElRef.current
if (!canvasEl) return
const canvas = new fabricModule.Canvas(canvasEl, {
selection: true,
preserveObjectStacking: true,
}) as unknown as FabricCanvas
fabricRef.current = canvas
// Load background image
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
const bgImg = await new Promise<FabricImage>((resolve, reject) => {
fabricModule.FabricImage.fromURL(imgUrl, { crossOrigin: 'anonymous' })
.then((img: FabricImage) => resolve(img))
.catch((err: Error) => reject(err))
})
if (disposed) return
const imgW = (bgImg.width || 800) * (bgImg.scaleX || 1)
const imgH = (bgImg.height || 600) * (bgImg.scaleY || 1)
canvas.setWidth(imgW)
canvas.setHeight(imgH)
bgImg.set({ opacity: opacity / 100, selectable: false, evented: false } as Record<string, unknown>)
canvas.setBackgroundImage(bgImg, () => {
canvas.renderAll()
})
// Add cell objects
addCellObjects(canvas, fabricModule, cells, imgW, imgH)
// Listen for text changes
canvas.on('object:modified', (e: FabricEvent) => {
if (e.target?.data?.cellId) {
const cellId = e.target.data.cellId as string
const newText = (e.target.text || '') as string
onCellsChanged([{ cell_id: cellId, text: newText }])
}
})
// Selection tracking
canvas.on('selection:created', (e: FabricEvent) => {
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
})
canvas.on('selection:updated', (e: FabricEvent) => {
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
})
canvas.on('selection:cleared', () => setSelectedCell(null))
setReady(true)
} catch (err) {
if (!disposed) setError(err instanceof Error ? err.message : 'Fabric.js konnte nicht geladen werden')
}
}
init()
return () => {
disposed = true
fabricRef.current?.dispose()
fabricRef.current = null
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId])
function addCellObjects(
canvas: FabricCanvas,
fabricModule: FabricModule,
gridCells: GridCell[],
imgW: number,
imgH: number,
) {
for (const cell of gridCells) {
const color = COL_TYPE_COLORS[cell.col_type] || '#6b7280'
const x = (cell.bbox_pct.x / 100) * imgW
const y = (cell.bbox_pct.y / 100) * imgH
const w = (cell.bbox_pct.w / 100) * imgW
const h = (cell.bbox_pct.h / 100) * imgH
const fontSize = Math.max(8, Math.min(18, h * 0.55))
const textObj = new fabricModule.IText(cell.text || '', {
left: x,
top: y,
width: w,
fontSize,
fontFamily: 'monospace',
fill: '#000000',
backgroundColor: `${color}22`,
padding: 2,
editable: true,
selectable: true,
lockScalingFlip: true,
data: {
cellId: cell.cell_id,
colType: cell.col_type,
rowIndex: cell.row_index,
colIndex: cell.col_index,
originalText: cell.text,
},
})
// Border colour matches column type
textObj.set({
borderColor: color,
cornerColor: color,
cornerSize: 6,
transparentCorners: false,
} as Record<string, unknown>)
canvas.add(textObj)
}
canvas.renderAll()
}
// ---- Opacity slider ----
const handleOpacityChange = useCallback((val: number) => {
setOpacity(val)
const canvas = fabricRef.current
if (!canvas) return
// Update background image opacity
// Access internal property — Fabric stores bgImage on the canvas
const bgImg = (canvas as unknown as Record<string, unknown>).backgroundImage as FabricObject | null
if (bgImg) {
bgImg.set({ opacity: val / 100 })
canvas.renderAll()
}
}, [])
// ---- Zoom ----
const handleZoomChange = useCallback((val: number) => {
setZoom(val)
const canvas = fabricRef.current
if (!canvas) return
canvas.setZoom(val / 100)
canvas.renderAll()
}, [])
// ---- Undo / Redo via keyboard ----
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if (!(e.metaKey || e.ctrlKey) || e.key !== 'z') return
e.preventDefault()
const canvas = fabricRef.current
if (!canvas) return
if (e.shiftKey) {
// Redo
const action = redoStackRef.current.pop()
if (!action) return
undoStackRef.current.push(action)
const obj = canvas.getObjects().find(
(o: FabricObject) => o.data?.cellId === action.cellId
)
if (obj) {
obj.set({ text: action.newText } as Record<string, unknown>)
canvas.renderAll()
onCellsChanged([{ cell_id: action.cellId, text: action.newText }])
}
} else {
// Undo
const action = undoStackRef.current.pop()
if (!action) return
redoStackRef.current.push(action)
const obj = canvas.getObjects().find(
(o: FabricObject) => o.data?.cellId === action.cellId
)
if (obj) {
obj.set({ text: action.oldText } as Record<string, unknown>)
canvas.renderAll()
onCellsChanged([{ cell_id: action.cellId, text: action.oldText }])
}
}
}
document.addEventListener('keydown', handler)
return () => document.removeEventListener('keydown', handler)
}, [onCellsChanged])
// ---- Delete selected cell (via context-menu or Delete key) ----
useEffect(() => {
const handler = (e: KeyboardEvent) => {
if (e.key !== 'Delete' && e.key !== 'Backspace') return
// Only delete if not currently editing text inside an IText
const canvas = fabricRef.current
if (!canvas) return
const active = canvas.getActiveObject()
if (!active) return
// If the IText is in editing mode, let the keypress pass through
if ((active as unknown as Record<string, boolean>).isEditing) return
e.preventDefault()
canvas.remove(active)
canvas.discardActiveObject()
canvas.renderAll()
}
document.addEventListener('keydown', handler)
return () => document.removeEventListener('keydown', handler)
}, [])
// ---- Export helpers ----
const handleExportPdf = useCallback(() => {
window.open(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/pdf`,
'_blank'
)
}, [sessionId])
const handleExportDocx = useCallback(() => {
window.open(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/docx`,
'_blank'
)
}, [sessionId])
if (error) {
return (
<div className="flex flex-col items-center justify-center py-8 text-red-500 text-sm">
<p>Fabric.js Editor konnte nicht geladen werden:</p>
<p className="text-xs mt-1 text-gray-400">{error}</p>
</div>
)
}
return (
<div className="space-y-2">
{/* Toolbar */}
<div className="flex items-center gap-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2 text-xs">
{/* Opacity slider */}
<label className="flex items-center gap-1.5 text-gray-500">
Hintergrund
<input
type="range"
min={0} max={100}
value={opacity}
onChange={e => handleOpacityChange(Number(e.target.value))}
className="w-20 h-1 accent-teal-500"
/>
<span className="w-8 text-right">{opacity}%</span>
</label>
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
{/* Zoom */}
<label className="flex items-center gap-1.5 text-gray-500">
Zoom
<button onClick={() => handleZoomChange(Math.max(25, zoom - 25))}
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
&minus;
</button>
<span className="w-8 text-center">{zoom}%</span>
<button onClick={() => handleZoomChange(Math.min(200, zoom + 25))}
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
+
</button>
<button onClick={() => handleZoomChange(100)}
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
Fit
</button>
</label>
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
{/* Selected cell info */}
{selectedCell && (
<span className="text-gray-400">
Zelle: <span className="text-gray-600 dark:text-gray-300">{selectedCell}</span>
</span>
)}
<div className="flex-1" />
{/* Export buttons */}
<button onClick={handleExportPdf}
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
PDF
</button>
<button onClick={handleExportDocx}
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
DOCX
</button>
</div>
{/* Canvas */}
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900"
style={{ maxHeight: '75vh' }}>
{!ready && (
<div className="flex items-center justify-center py-12">
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
<span className="ml-2 text-sm text-gray-500">Canvas wird geladen...</span>
</div>
)}
<canvas ref={canvasElRef} />
</div>
{/* Legend */}
<div className="flex items-center gap-4 text-xs text-gray-500">
{Object.entries(COL_TYPE_COLORS).map(([type, color]) => (
<span key={type} className="flex items-center gap-1">
<span className="w-3 h-3 rounded" style={{ backgroundColor: color + '44', border: `1px solid ${color}` }} />
{type.replace('column_', '').replace('page_', '')}
</span>
))}
<span className="ml-auto text-gray-400">Doppelklick = Text bearbeiten | Delete = Zelle entfernen | Cmd+Z = Undo</span>
</div>
</div>
)
}

View File

@@ -1,10 +1,19 @@
'use client'
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import dynamic from 'next/dynamic'
import type { GridResult, GridCell, WordEntry } from '@/app/(admin)/ai/ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
// Lazy-load Fabric.js canvas editor (SSR-incompatible)
const FabricReconstructionCanvas = dynamic(
() => import('./FabricReconstructionCanvas').then(m => ({ default: m.FabricReconstructionCanvas })),
{ ssr: false, loading: () => <div className="py-8 text-center text-sm text-gray-400">Editor wird geladen...</div> }
)
type EditorMode = 'simple' | 'editor'
interface StepReconstructionProps {
sessionId: string | null
onNext: () => void
@@ -26,6 +35,8 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
const [error, setError] = useState('')
const [cells, setCells] = useState<EditableCell[]>([])
const [gridCells, setGridCells] = useState<GridCell[]>([])
const [editorMode, setEditorMode] = useState<EditorMode>('simple')
const [editedTexts, setEditedTexts] = useState<Map<string, string>>(new Map())
const [zoom, setZoom] = useState(100)
const [imageNaturalH, setImageNaturalH] = useState(0)
@@ -70,8 +81,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
}
// Build editable cells from grid cells
const gridCells: GridCell[] = wordResult.cells || []
const allEditableCells: EditableCell[] = gridCells.map(c => ({
const rawGridCells: GridCell[] = wordResult.cells || []
setGridCells(rawGridCells)
const allEditableCells: EditableCell[] = rawGridCells.map(c => ({
cellId: c.cell_id,
text: c.text,
originalText: c.text,
@@ -252,6 +264,17 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
}
}, [sessionId, editedTexts, cells])
// Handler for Fabric.js editor cell changes
const handleFabricCellsChanged = useCallback((updates: { cell_id: string; text: string }[]) => {
for (const u of updates) {
setEditedTexts(prev => {
const next = new Map(prev)
next.set(u.cell_id, u.text)
return next
})
}
}, [])
const dewarpedUrl = sessionId
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
: ''
@@ -332,6 +355,29 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Schritt 7: Rekonstruktion
</h3>
{/* Mode toggle */}
<div className="flex items-center ml-2 border border-gray-300 dark:border-gray-600 rounded overflow-hidden text-xs">
<button
onClick={() => setEditorMode('simple')}
className={`px-2 py-0.5 transition-colors ${
editorMode === 'simple'
? 'bg-teal-600 text-white'
: 'hover:bg-gray-50 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}
>
Einfach
</button>
<button
onClick={() => setEditorMode('editor')}
className={`px-2 py-0.5 transition-colors ${
editorMode === 'editor'
? 'bg-teal-600 text-white'
: 'hover:bg-gray-50 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}
>
Editor
</button>
</div>
<span className="text-xs text-gray-400">
{cells.length} Zellen &middot; {changedCount} geaendert
{emptyCellIds.size > 0 && showEmptyHighlight && (
@@ -408,82 +454,90 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
</div>
</div>
{/* Reconstruction canvas */}
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900" style={{ maxHeight: '75vh' }}>
<div
ref={containerRef}
className="relative inline-block"
style={{ transform: `scale(${zoom / 100})`, transformOrigin: 'top left' }}
>
{/* Background image at reduced opacity */}
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
ref={imageRef}
src={dewarpedUrl}
alt="Dewarped"
className="block"
style={{ opacity: 0.3 }}
onLoad={handleImageLoad}
/>
{/* Reconstruction canvas — Simple or Editor mode */}
{editorMode === 'editor' && sessionId ? (
<FabricReconstructionCanvas
sessionId={sessionId}
cells={gridCells}
onCellsChanged={handleFabricCellsChanged}
/>
) : (
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900" style={{ maxHeight: '75vh' }}>
<div
ref={containerRef}
className="relative inline-block"
style={{ transform: `scale(${zoom / 100})`, transformOrigin: 'top left' }}
>
{/* Background image at reduced opacity */}
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
ref={imageRef}
src={dewarpedUrl}
alt="Dewarped"
className="block"
style={{ opacity: 0.3 }}
onLoad={handleImageLoad}
/>
{/* Empty field markers */}
{showEmptyHighlight && cells
.filter(c => emptyCellIds.has(c.cellId))
.map(cell => (
<div
key={`empty-${cell.cellId}`}
className="absolute border-2 border-dashed border-red-400/60 rounded pointer-events-none"
style={{
{/* Empty field markers */}
{showEmptyHighlight && cells
.filter(c => emptyCellIds.has(c.cellId))
.map(cell => (
<div
key={`empty-${cell.cellId}`}
className="absolute border-2 border-dashed border-red-400/60 rounded pointer-events-none"
style={{
left: `${cell.bboxPct.x}%`,
top: `${cell.bboxPct.y}%`,
width: `${cell.bboxPct.w}%`,
height: `${cell.bboxPct.h}%`,
}}
/>
))}
{/* Editable text fields at bbox positions */}
{cells.map((cell) => {
const displayText = getDisplayText(cell)
const edited = isEdited(cell)
return (
<div key={cell.cellId} className="absolute group" style={{
left: `${cell.bboxPct.x}%`,
top: `${cell.bboxPct.y}%`,
width: `${cell.bboxPct.w}%`,
height: `${cell.bboxPct.h}%`,
}}
/>
))}
{/* Editable text fields at bbox positions */}
{cells.map((cell) => {
const displayText = getDisplayText(cell)
const edited = isEdited(cell)
return (
<div key={cell.cellId} className="absolute group" style={{
left: `${cell.bboxPct.x}%`,
top: `${cell.bboxPct.y}%`,
width: `${cell.bboxPct.w}%`,
height: `${cell.bboxPct.h}%`,
}}>
<input
id={`cell-${cell.cellId}`}
type="text"
value={displayText}
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
className={`w-full h-full bg-transparent text-black dark:text-white border px-0.5 outline-none transition-colors ${
colTypeColor(cell.colType)
} ${edited ? 'border-green-500 bg-green-50/30 dark:bg-green-900/20' : ''}`}
style={{
fontSize: `${getFontSize(cell.bboxPct.h)}px`,
lineHeight: '1',
}}
title={`${cell.cellId} (${cell.colType})`}
/>
{/* Per-cell reset button (X) — only shown for edited cells on hover */}
{edited && (
<button
onClick={() => resetCell(cell.cellId)}
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
title="Zuruecksetzen"
>
&times;
</button>
)}
</div>
)
})}
}}>
<input
id={`cell-${cell.cellId}`}
type="text"
value={displayText}
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
className={`w-full h-full bg-transparent text-black dark:text-white border px-0.5 outline-none transition-colors ${
colTypeColor(cell.colType)
} ${edited ? 'border-green-500 bg-green-50/30 dark:bg-green-900/20' : ''}`}
style={{
fontSize: `${getFontSize(cell.bboxPct.h)}px`,
lineHeight: '1',
}}
title={`${cell.cellId} (${cell.colType})`}
/>
{/* Per-cell reset button (X) — only shown for edited cells on hover */}
{edited && (
<button
onClick={() => resetCell(cell.cellId)}
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
title="Zuruecksetzen"
>
&times;
</button>
)}
</div>
)
})}
</div>
</div>
</div>
)}
{/* Bottom action */}
<div className="flex justify-end">

View File

@@ -27,6 +27,7 @@
"react-dom": "^18.3.1",
"reactflow": "^11.11.4",
"recharts": "^2.15.0",
"fabric": "^6.0.0",
"uuid": "^13.0.0"
},
"devDependencies": {

View File

@@ -511,27 +511,39 @@ def _detect_shear_by_projection(img: np.ndarray) -> Dict[str, Any]:
small = cv2.resize(binary, (w // 2, h // 2), interpolation=cv2.INTER_AREA)
sh, sw = small.shape
# Angle sweep: ±3° in 0.25° steps
angles = [a * 0.25 for a in range(-12, 13)] # 25 values
best_angle = 0.0
best_variance = -1.0
variances: List[Tuple[float, float]] = []
# 2-pass angle sweep for 10x better precision:
# Pass 1: Coarse sweep ±3° in 0.5° steps (13 values)
# Pass 2: Fine sweep ±0.5° around coarse best in 0.05° steps (21 values)
for angle_deg in angles:
if abs(angle_deg) < 0.01:
rotated = small
else:
shear_tan = math.tan(math.radians(angle_deg))
M = np.float32([[1, shear_tan, -sh / 2.0 * shear_tan], [0, 1, 0]])
rotated = cv2.warpAffine(small, M, (sw, sh),
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT)
profile = np.sum(rotated, axis=1).astype(float)
var = float(np.var(profile))
variances.append((angle_deg, var))
if var > best_variance:
best_variance = var
best_angle = angle_deg
def _sweep_variance(angles_list):
results = []
for angle_deg in angles_list:
if abs(angle_deg) < 0.001:
rotated = small
else:
shear_tan = math.tan(math.radians(angle_deg))
M = np.float32([[1, shear_tan, -sh / 2.0 * shear_tan], [0, 1, 0]])
rotated = cv2.warpAffine(small, M, (sw, sh),
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT)
profile = np.sum(rotated, axis=1).astype(float)
results.append((angle_deg, float(np.var(profile))))
return results
# Pass 1: coarse
coarse_angles = [a * 0.5 for a in range(-6, 7)] # 13 values
coarse_results = _sweep_variance(coarse_angles)
coarse_best = max(coarse_results, key=lambda x: x[1])
# Pass 2: fine around coarse best
fine_center = coarse_best[0]
fine_angles = [fine_center + a * 0.05 for a in range(-10, 11)] # 21 values
fine_results = _sweep_variance(fine_angles)
fine_best = max(fine_results, key=lambda x: x[1])
best_angle = fine_best[0]
best_variance = fine_best[1]
variances = coarse_results + fine_results
# Confidence: how much sharper is the best angle vs. the mean?
all_mean = sum(v for _, v in variances) / len(variances)
@@ -611,6 +623,133 @@ def _detect_shear_by_hough(img: np.ndarray) -> Dict[str, Any]:
return result
def _detect_shear_by_text_lines(img: np.ndarray) -> Dict[str, Any]:
"""Detect shear by measuring text-line straightness (Method D).
Runs a quick Tesseract scan (PSM 11, 50% downscale) to locate word
bounding boxes, groups them into horizontal lines by Y-proximity,
fits a linear regression to each line, and takes the median slope
as the shear angle.
This is the most robust method because it measures actual text content
rather than relying on edges, projections, or printed lines.
Returns:
Dict with keys: method, shear_degrees, confidence.
"""
import math
result = {"method": "text_lines", "shear_degrees": 0.0, "confidence": 0.0}
h, w = img.shape[:2]
# Downscale 50% for speed
scale = 0.5
small = cv2.resize(img, (int(w * scale), int(h * scale)),
interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY)
pil_img = Image.fromarray(gray)
try:
data = pytesseract.image_to_data(
pil_img, lang='eng+deu', config='--psm 11 --oem 3',
output_type=pytesseract.Output.DICT,
)
except Exception:
return result
# Collect word centres
words = []
for i in range(len(data['text'])):
text = data['text'][i].strip()
conf = int(data['conf'][i])
if not text or conf < 20 or len(text) < 2:
continue
cx = data['left'][i] + data['width'][i] / 2.0
cy = data['top'][i] + data['height'][i] / 2.0
words.append((cx, cy, data['height'][i]))
if len(words) < 10:
return result
# Group words into lines by Y-proximity
avg_h = sum(wh for _, _, wh in words) / len(words)
y_tol = max(avg_h * 0.6, 8)
words_sorted = sorted(words, key=lambda w: w[1])
lines: List[List[Tuple[float, float]]] = []
current_line: List[Tuple[float, float]] = [(words_sorted[0][0], words_sorted[0][1])]
current_y = words_sorted[0][1]
for cx, cy, _ in words_sorted[1:]:
if abs(cy - current_y) <= y_tol:
current_line.append((cx, cy))
else:
if len(current_line) >= 3:
lines.append(current_line)
current_line = [(cx, cy)]
current_y = cy
if len(current_line) >= 3:
lines.append(current_line)
if len(lines) < 3:
return result
# Linear regression per line → slope (dy/dx)
slopes = []
for line in lines:
xs = np.array([p[0] for p in line])
ys = np.array([p[1] for p in line])
x_range = xs.max() - xs.min()
if x_range < 20:
continue
coeffs = np.polyfit(xs, ys, 1)
slopes.append(coeffs[0]) # dy/dx
if len(slopes) < 3:
return result
# Median slope → shear angle
# dy/dx of horizontal text lines = tan(shear_angle)
# Positive slope means text tilts down-right → vertical columns lean right
median_slope = float(np.median(slopes))
shear_degrees = math.degrees(math.atan(median_slope))
# Confidence from line count + slope consistency
slope_std = float(np.std(slopes))
consistency = max(0.0, 1.0 - slope_std * 20) # penalise high variance
count_factor = min(1.0, len(slopes) / 8.0)
confidence = count_factor * 0.6 + consistency * 0.4
result["shear_degrees"] = round(shear_degrees, 3)
result["confidence"] = round(max(0.0, min(1.0, confidence)), 2)
return result
def _dewarp_quality_check(original: np.ndarray, corrected: np.ndarray) -> bool:
"""Check whether the dewarp correction actually improved alignment.
Compares horizontal projection variance before and after correction.
Higher variance means sharper text-line peaks, which indicates better
horizontal alignment.
Returns True if the correction improved the image, False if it should
be discarded.
"""
def _h_proj_variance(img: np.ndarray) -> float:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
small = cv2.resize(binary, (binary.shape[1] // 2, binary.shape[0] // 2),
interpolation=cv2.INTER_AREA)
profile = np.sum(small, axis=1).astype(float)
return float(np.var(profile))
var_before = _h_proj_variance(original)
var_after = _h_proj_variance(corrected)
# Correction must improve variance (even by a tiny margin)
return var_after > var_before
def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
"""Apply a vertical shear correction to an image.
@@ -644,24 +783,36 @@ def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
def _ensemble_shear(detections: List[Dict[str, Any]]) -> Tuple[float, float, str]:
"""Combine multiple shear detections into a single weighted estimate.
"""Combine multiple shear detections into a single weighted estimate (v2).
Only methods with confidence >= 0.3 are considered.
Results are outlier-filtered: if any accepted result differs by more than
1° from the weighted mean, it is discarded.
Ensemble v2 changes vs v1:
- Minimum confidence raised to 0.5 (was 0.3)
- text_lines method gets 1.5× weight boost (most reliable detector)
- Outlier filter at 1° from weighted mean
Returns:
(shear_degrees, ensemble_confidence, methods_used_str)
"""
accepted = [(d["shear_degrees"], d["confidence"], d["method"])
for d in detections if d["confidence"] >= 0.3]
# Higher confidence threshold — "im Zweifel nichts tun"
_MIN_CONF = 0.5
# text_lines gets a weight boost as the most content-aware method
_METHOD_WEIGHT_BOOST = {"text_lines": 1.5}
accepted = []
for d in detections:
if d["confidence"] < _MIN_CONF:
continue
boost = _METHOD_WEIGHT_BOOST.get(d["method"], 1.0)
effective_conf = d["confidence"] * boost
accepted.append((d["shear_degrees"], effective_conf, d["method"]))
if not accepted:
return 0.0, 0.0, "none"
if len(accepted) == 1:
deg, conf, method = accepted[0]
return deg, conf, method
return deg, min(conf, 1.0), method
# First pass: weighted mean
total_w = sum(c for _, c, _ in accepted)
@@ -684,23 +835,24 @@ def _ensemble_shear(detections: List[Dict[str, Any]]) -> Tuple[float, float, str
ensemble_conf = min(1.0, avg_conf + agreement_bonus)
methods_str = "+".join(m for _, _, m in filtered)
return round(final_deg, 3), round(ensemble_conf, 2), methods_str
return round(final_deg, 3), round(min(ensemble_conf, 1.0), 2), methods_str
def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray, Dict[str, Any]]:
"""Correct vertical shear after deskew.
"""Correct vertical shear after deskew (v2 with quality gate).
After deskew aligns horizontal text lines, vertical features (column
edges) may still be tilted. This detects the tilt angle using an ensemble
of three complementary methods and applies an affine shear correction.
of four complementary methods and applies an affine shear correction.
Methods (all run in ~100ms total):
A. _detect_shear_angle() — vertical edge profile (~50ms)
B. _detect_shear_by_projection() — horizontal text-line variance (~30ms)
C. _detect_shear_by_hough() — Hough lines on table borders (~20ms)
Methods (all run in ~150ms total):
A. _detect_shear_angle() — vertical edge profile (~50ms)
B. _detect_shear_by_projection() — horizontal text-line variance (~30ms)
C. _detect_shear_by_hough() — Hough lines on table borders (~20ms)
D. _detect_shear_by_text_lines() — text-line straightness (~50ms)
Only methods with confidence >= 0.3 contribute to the ensemble.
Outlier filtering discards results deviating > 1° from the weighted mean.
Quality gate: after correction, horizontal projection variance is compared
before vs after. If correction worsened alignment, it is discarded.
Args:
img: BGR image (already deskewed).
@@ -726,7 +878,8 @@ def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray
det_a = _detect_shear_angle(img)
det_b = _detect_shear_by_projection(img)
det_c = _detect_shear_by_hough(img)
detections = [det_a, det_b, det_c]
det_d = _detect_shear_by_text_lines(img)
detections = [det_a, det_b, det_c, det_d]
shear_deg, confidence, method = _ensemble_shear(detections)
else:
det_a = _detect_shear_angle(img)
@@ -739,22 +892,35 @@ def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray
logger.info(
"dewarp: ensemble shear=%.3f° conf=%.2f method=%s (%.2fs) | "
"A=%.3f/%.2f B=%.3f/%.2f C=%.3f/%.2f",
"A=%.3f/%.2f B=%.3f/%.2f C=%.3f/%.2f D=%.3f/%.2f",
shear_deg, confidence, method, duration,
detections[0]["shear_degrees"], detections[0]["confidence"],
detections[1]["shear_degrees"] if len(detections) > 1 else 0.0,
detections[1]["confidence"] if len(detections) > 1 else 0.0,
detections[2]["shear_degrees"] if len(detections) > 2 else 0.0,
detections[2]["confidence"] if len(detections) > 2 else 0.0,
detections[3]["shear_degrees"] if len(detections) > 3 else 0.0,
detections[3]["confidence"] if len(detections) > 3 else 0.0,
)
# Only correct if shear is significant (> 0.05°)
if abs(shear_deg) < 0.05 or confidence < 0.3:
# Higher thresholds: subtle shear (<0.15°) is irrelevant for OCR
if abs(shear_deg) < 0.15 or confidence < 0.5:
return img, no_correction
# Apply correction (negate the detected shear to straighten)
corrected = _apply_shear(img, -shear_deg)
# Quality gate: verify the correction actually improved alignment
if not _dewarp_quality_check(img, corrected):
logger.info("dewarp: quality gate REJECTED correction (%.3f°) — "
"projection variance did not improve", shear_deg)
no_correction["detections"] = [
{"method": d["method"], "shear_degrees": d["shear_degrees"],
"confidence": d["confidence"]}
for d in detections
]
return img, no_correction
info = {
"method": method,
"shear_degrees": shear_deg,
@@ -4180,6 +4346,60 @@ def _clean_cell_text(text: str) -> str:
return ' '.join(tokens)
# ---------------------------------------------------------------------------
# Narrow-column OCR helpers (Proposal B)
# ---------------------------------------------------------------------------
def _compute_cell_padding(col_width: int, img_w: int) -> int:
"""Adaptive padding for OCR crops based on column width.
Narrow columns (page_ref, marker) need more surrounding context so
Tesseract can segment characters correctly. Wide columns keep the
minimal 4 px padding to avoid pulling in neighbours.
"""
col_pct = col_width / img_w * 100 if img_w > 0 else 100
if col_pct < 5:
return max(20, col_width // 2)
if col_pct < 10:
return max(12, col_width // 4)
if col_pct < 15:
return 8
return 4
def _ensure_minimum_crop_size(crop: np.ndarray, min_dim: int = 150,
max_scale: int = 3) -> np.ndarray:
"""Upscale tiny crops so Tesseract gets enough pixel data.
If either dimension is below *min_dim*, the crop is bicubic-upscaled
so the smallest dimension reaches *min_dim* (capped at *max_scale* ×).
"""
h, w = crop.shape[:2]
if h >= min_dim and w >= min_dim:
return crop
scale = min(max_scale, max(min_dim / max(h, 1), min_dim / max(w, 1)))
if scale <= 1.0:
return crop
new_w = int(w * scale)
new_h = int(h * scale)
return cv2.resize(crop, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
def _select_psm_for_column(col_type: str, col_width: int,
row_height: int) -> int:
"""Choose the best Tesseract PSM for a given column geometry.
- page_ref columns are almost always single short tokens → PSM 8
- Very narrow or short cells → PSM 7 (single text line)
- Everything else → PSM 6 (uniform block)
"""
if col_type in ('page_ref', 'marker'):
return 8 # single word
if col_width < 100 or row_height < 30:
return 7 # single line
return 6 # uniform block
def _ocr_single_cell(
row_idx: int,
col_idx: int,
@@ -4202,12 +4422,13 @@ def _ocr_single_cell(
disp_w = col.width
disp_h = row.height
# OCR crop: slightly wider to catch edge characters (internal only)
pad = 4
# OCR crop: adaptive padding — narrow columns get more context
pad = _compute_cell_padding(col.width, img_w)
cell_x = max(0, col.x - pad)
cell_y = max(0, row.y - pad)
cell_w = min(col.width + 2 * pad, img_w - cell_x)
cell_h = min(row.height + 2 * pad, img_h - cell_y)
is_narrow = (col.width / img_w * 100) < 15 if img_w > 0 else False
if disp_w <= 0 or disp_h <= 0:
return {
@@ -4266,20 +4487,56 @@ def _ocr_single_cell(
dark_ratio = float(np.count_nonzero(crop < 180)) / crop.size
_run_fallback = dark_ratio > 0.005
if _run_fallback:
cell_region = PageRegion(
type=col.type,
x=cell_x, y=cell_y,
width=cell_w, height=cell_h,
)
if engine_name in ("trocr-printed", "trocr-handwritten") and img_bgr is not None:
fallback_words = ocr_region_trocr(img_bgr, cell_region, handwritten=(engine_name == "trocr-handwritten"))
elif engine_name == "lighton" and img_bgr is not None:
fallback_words = ocr_region_lighton(img_bgr, cell_region)
elif use_rapid and img_bgr is not None:
fallback_words = ocr_region_rapid(img_bgr, cell_region)
# For narrow columns, upscale the crop before OCR
if is_narrow and ocr_img is not None:
_crop_slice = ocr_img[cell_y:cell_y + cell_h, cell_x:cell_x + cell_w]
_upscaled = _ensure_minimum_crop_size(_crop_slice)
if _upscaled is not _crop_slice:
# Build a temporary full-size image with the upscaled crop
# placed at origin so ocr_region can crop it cleanly.
_up_h, _up_w = _upscaled.shape[:2]
_tmp_region = PageRegion(
type=col.type, x=0, y=0, width=_up_w, height=_up_h,
)
_cell_psm = _select_psm_for_column(col.type, col.width, row.height)
cell_lang = lang_map.get(col.type, lang)
fallback_words = ocr_region(_upscaled, _tmp_region,
lang=cell_lang, psm=_cell_psm)
# Remap word positions back to original image coordinates
_sx = cell_w / max(_up_w, 1)
_sy = cell_h / max(_up_h, 1)
for _fw in (fallback_words or []):
_fw['left'] = int(_fw['left'] * _sx) + cell_x
_fw['top'] = int(_fw['top'] * _sy) + cell_y
_fw['width'] = int(_fw['width'] * _sx)
_fw['height'] = int(_fw['height'] * _sy)
else:
# No upscaling needed, use adaptive PSM
cell_region = PageRegion(
type=col.type, x=cell_x, y=cell_y,
width=cell_w, height=cell_h,
)
_cell_psm = _select_psm_for_column(col.type, col.width, row.height)
cell_lang = lang_map.get(col.type, lang)
fallback_words = ocr_region(ocr_img, cell_region,
lang=cell_lang, psm=_cell_psm)
else:
cell_lang = lang_map.get(col.type, lang)
fallback_words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=6)
cell_region = PageRegion(
type=col.type,
x=cell_x, y=cell_y,
width=cell_w, height=cell_h,
)
if engine_name in ("trocr-printed", "trocr-handwritten") and img_bgr is not None:
fallback_words = ocr_region_trocr(img_bgr, cell_region, handwritten=(engine_name == "trocr-handwritten"))
elif engine_name == "lighton" and img_bgr is not None:
fallback_words = ocr_region_lighton(img_bgr, cell_region)
elif use_rapid and img_bgr is not None:
fallback_words = ocr_region_rapid(img_bgr, cell_region)
else:
_cell_psm = _select_psm_for_column(col.type, col.width, row.height)
cell_lang = lang_map.get(col.type, lang)
fallback_words = ocr_region(ocr_img, cell_region,
lang=cell_lang, psm=_cell_psm)
if fallback_words:
# Apply same confidence filter to fallback words
@@ -4297,8 +4554,12 @@ def _ocr_single_cell(
# --- SECONDARY FALLBACK: PSM=7 (single line) for still-empty cells ---
if not text.strip() and _run_fallback and not use_rapid:
_fb_region = PageRegion(
type=col.type, x=cell_x, y=cell_y,
width=cell_w, height=cell_h,
)
cell_lang = lang_map.get(col.type, lang)
psm7_words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=7)
psm7_words = ocr_region(ocr_img, _fb_region, lang=cell_lang, psm=7)
if psm7_words:
psm7_words = [w for w in psm7_words if w.get('conf', 0) >= _MIN_WORD_CONF]
if psm7_words:
@@ -4310,6 +4571,38 @@ def _ocr_single_cell(
)
used_engine = 'cell_ocr_psm7'
# --- TERTIARY FALLBACK: Row-strip re-OCR for narrow columns ---
# If a narrow cell is still empty, OCR the entire row strip with
# RapidOCR (which handles small text better) and assign words by
# X-position overlap with this column.
if not text.strip() and is_narrow and img_bgr is not None:
row_region = PageRegion(
type='_row_strip', x=0, y=row.y,
width=img_w, height=row.height,
)
strip_words = ocr_region_rapid(img_bgr, row_region)
if strip_words:
# Filter to words overlapping this column's X-range
col_left = col.x
col_right = col.x + col.width
col_words = []
for sw in strip_words:
sw_left = sw.get('left', 0)
sw_right = sw_left + sw.get('width', 0)
overlap = max(0, min(sw_right, col_right) - max(sw_left, col_left))
if overlap > sw.get('width', 1) * 0.3:
col_words.append(sw)
if col_words:
col_words = [w for w in col_words if w.get('conf', 0) >= _MIN_WORD_CONF]
if col_words:
rs_text = _words_to_reading_order_text(col_words, y_tolerance_px=row.height)
if rs_text.strip():
text = rs_text
avg_conf = round(
sum(w['conf'] for w in col_words) / len(col_words), 1
)
used_engine = 'row_strip_rapid'
# --- NOISE FILTER: clear cells that contain only OCR artifacts ---
if text.strip():
text = _clean_cell_text(text)

View File

@@ -1742,6 +1742,151 @@ async def save_reconstruction(session_id: str, request: Request):
}
@router.get("/sessions/{session_id}/reconstruction/fabric-json")
async def get_fabric_json(session_id: str):
"""Return cell grid as Fabric.js-compatible JSON for the canvas editor."""
session = await get_session_db(session_id)
if not session:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
word_result = session.get("word_result")
if not word_result:
raise HTTPException(status_code=400, detail="No word result found")
cells = word_result.get("cells", [])
img_w = word_result.get("image_width", 800)
img_h = word_result.get("image_height", 600)
from services.layout_reconstruction_service import cells_to_fabric_json
fabric_json = cells_to_fabric_json(cells, img_w, img_h)
return fabric_json
@router.get("/sessions/{session_id}/reconstruction/export/pdf")
async def export_reconstruction_pdf(session_id: str):
"""Export the reconstructed cell grid as a PDF table."""
session = await get_session_db(session_id)
if not session:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
word_result = session.get("word_result")
if not word_result:
raise HTTPException(status_code=400, detail="No word result found")
cells = word_result.get("cells", [])
columns_used = word_result.get("columns_used", [])
grid_shape = word_result.get("grid_shape", {})
n_rows = grid_shape.get("rows", 0)
n_cols = grid_shape.get("cols", 0)
# Build table data: rows × columns
table_data: list[list[str]] = []
header = [c.get("label", c.get("type", f"Col {i}")) for i, c in enumerate(columns_used)]
if not header:
header = [f"Col {i}" for i in range(n_cols)]
table_data.append(header)
for r in range(n_rows):
row_texts = []
for ci in range(n_cols):
cell_id = f"R{r:02d}_C{ci}"
cell = next((c for c in cells if c.get("cell_id") == cell_id), None)
row_texts.append(cell.get("text", "") if cell else "")
table_data.append(row_texts)
# Generate PDF with reportlab
try:
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
import io as _io
buf = _io.BytesIO()
doc = SimpleDocTemplate(buf, pagesize=A4)
if not table_data or not table_data[0]:
raise HTTPException(status_code=400, detail="No data to export")
t = Table(table_data)
t.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#0d9488')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
('FONTSIZE', (0, 0), (-1, -1), 9),
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
('VALIGN', (0, 0), (-1, -1), 'TOP'),
('WORDWRAP', (0, 0), (-1, -1), True),
]))
doc.build([t])
buf.seek(0)
from fastapi.responses import StreamingResponse
return StreamingResponse(
buf,
media_type="application/pdf",
headers={"Content-Disposition": f'attachment; filename="reconstruction_{session_id}.pdf"'},
)
except ImportError:
raise HTTPException(status_code=501, detail="reportlab not installed")
@router.get("/sessions/{session_id}/reconstruction/export/docx")
async def export_reconstruction_docx(session_id: str):
"""Export the reconstructed cell grid as a DOCX table."""
session = await get_session_db(session_id)
if not session:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
word_result = session.get("word_result")
if not word_result:
raise HTTPException(status_code=400, detail="No word result found")
cells = word_result.get("cells", [])
columns_used = word_result.get("columns_used", [])
grid_shape = word_result.get("grid_shape", {})
n_rows = grid_shape.get("rows", 0)
n_cols = grid_shape.get("cols", 0)
try:
from docx import Document
from docx.shared import Pt
import io as _io
doc = Document()
doc.add_heading(f'Rekonstruktion Session {session_id[:8]}', level=1)
# Build header
header = [c.get("label", c.get("type", f"Col {i}")) for i, c in enumerate(columns_used)]
if not header:
header = [f"Col {i}" for i in range(n_cols)]
table = doc.add_table(rows=1 + n_rows, cols=max(n_cols, 1))
table.style = 'Table Grid'
# Header row
for ci, h in enumerate(header):
table.rows[0].cells[ci].text = h
# Data rows
for r in range(n_rows):
for ci in range(n_cols):
cell_id = f"R{r:02d}_C{ci}"
cell = next((c for c in cells if c.get("cell_id") == cell_id), None)
table.rows[r + 1].cells[ci].text = cell.get("text", "") if cell else ""
buf = _io.BytesIO()
doc.save(buf)
buf.seek(0)
from fastapi.responses import StreamingResponse
return StreamingResponse(
buf,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={"Content-Disposition": f'attachment; filename="reconstruction_{session_id}.docx"'},
)
except ImportError:
raise HTTPException(status_code=501, detail="python-docx not installed")
@router.post("/sessions/{session_id}/reprocess")
async def reprocess_session(session_id: str, request: Request):
"""Re-run pipeline from a specific step, clearing downstream data.

View File

@@ -45,6 +45,9 @@ asyncpg>=0.29.0
# Email validation for Pydantic
email-validator>=2.0.0
# DOCX export for reconstruction editor (MIT license)
python-docx>=1.1.0
# Testing
pytest>=8.0.0
pytest-asyncio>=0.23.0

View File

@@ -350,6 +350,77 @@ def layout_to_fabric_json(layout_result: LayoutResult) -> str:
return json.dumps(layout_result.fabric_json, ensure_ascii=False, indent=2)
def cells_to_fabric_json(
cells: List[Dict[str, Any]],
image_width: int,
image_height: int,
) -> Dict[str, Any]:
"""Convert pipeline grid cells to Fabric.js-compatible JSON.
Each cell becomes a Textbox object positioned at its bbox_pct coordinates
(converted to pixels). Colour-coded by column type.
Args:
cells: List of cell dicts from GridResult (with bbox_pct, col_type, text).
image_width: Source image width in pixels.
image_height: Source image height in pixels.
Returns:
Dict with Fabric.js canvas JSON (version + objects array).
"""
COL_TYPE_COLORS = {
'column_en': '#3b82f6',
'column_de': '#22c55e',
'column_example': '#f97316',
'column_text': '#a855f7',
'page_ref': '#06b6d4',
'column_marker': '#6b7280',
}
fabric_objects = []
for cell in cells:
bp = cell.get('bbox_pct', {})
x = bp.get('x', 0) / 100 * image_width
y = bp.get('y', 0) / 100 * image_height
w = bp.get('w', 10) / 100 * image_width
h = bp.get('h', 3) / 100 * image_height
col_type = cell.get('col_type', '')
color = COL_TYPE_COLORS.get(col_type, '#6b7280')
font_size = max(8, min(18, h * 0.55))
fabric_objects.append({
"type": "textbox",
"version": "6.0.0",
"originX": "left",
"originY": "top",
"left": round(x, 1),
"top": round(y, 1),
"width": max(round(w, 1), 30),
"height": round(h, 1),
"fill": "#000000",
"stroke": color,
"strokeWidth": 1,
"text": cell.get('text', ''),
"fontSize": round(font_size, 1),
"fontFamily": "monospace",
"editable": True,
"selectable": True,
"backgroundColor": color + "22",
"data": {
"cellId": cell.get('cell_id', ''),
"colType": col_type,
"rowIndex": cell.get('row_index', 0),
"colIndex": cell.get('col_index', 0),
"originalText": cell.get('text', ''),
},
})
return {
"version": "6.0.0",
"objects": fabric_objects,
}
def reconstruct_and_clean(
image_bytes: bytes,
remove_handwriting: bool = True