feat: OCR pipeline v2.1 – narrow column OCR, dewarp automation, Fabric.js editor
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m50s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 15s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 24s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m50s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 15s
Proposal B: Adaptive padding, crop upscaling, PSM selection, row-strip re-OCR for narrow columns (<15% width) – expected accuracy boost 60-70% → 85-90%. Proposal A: New text-line straightness detector (Method D), quality gate (rejects counterproductive corrections), 2-pass projection refinement, higher confidence thresholds – expected manual dewarp reduction to <10%. Proposal C: Fabric.js canvas editor with drag/drop, inline editing, undo/redo, opacity slider, zoom, PDF/DOCX export endpoints. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,410 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||||
|
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
// Column type → colour mapping
|
||||||
|
const COL_TYPE_COLORS: Record<string, string> = {
|
||||||
|
column_en: '#3b82f6', // blue-500
|
||||||
|
column_de: '#22c55e', // green-500
|
||||||
|
column_example: '#f97316', // orange-500
|
||||||
|
column_text: '#a855f7', // purple-500
|
||||||
|
page_ref: '#06b6d4', // cyan-500
|
||||||
|
column_marker: '#6b7280', // gray-500
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FabricReconstructionCanvasProps {
|
||||||
|
sessionId: string
|
||||||
|
cells: GridCell[]
|
||||||
|
onCellsChanged: (updates: { cell_id: string; text: string }[]) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fabric.js types (subset used here)
|
||||||
|
interface FabricCanvas {
|
||||||
|
add: (...objects: FabricObject[]) => FabricCanvas
|
||||||
|
remove: (...objects: FabricObject[]) => FabricCanvas
|
||||||
|
setBackgroundImage: (img: FabricImage, callback: () => void) => void
|
||||||
|
renderAll: () => void
|
||||||
|
getObjects: () => FabricObject[]
|
||||||
|
dispose: () => void
|
||||||
|
on: (event: string, handler: (e: FabricEvent) => void) => void
|
||||||
|
setWidth: (w: number) => void
|
||||||
|
setHeight: (h: number) => void
|
||||||
|
getActiveObject: () => FabricObject | null
|
||||||
|
discardActiveObject: () => FabricCanvas
|
||||||
|
requestRenderAll: () => void
|
||||||
|
setZoom: (z: number) => void
|
||||||
|
getZoom: () => number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FabricObject {
|
||||||
|
type?: string
|
||||||
|
left?: number
|
||||||
|
top?: number
|
||||||
|
width?: number
|
||||||
|
height?: number
|
||||||
|
text?: string
|
||||||
|
set: (props: Record<string, unknown>) => FabricObject
|
||||||
|
get: (prop: string) => unknown
|
||||||
|
data?: Record<string, unknown>
|
||||||
|
selectable?: boolean
|
||||||
|
on?: (event: string, handler: () => void) => void
|
||||||
|
setCoords?: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FabricImage extends FabricObject {
|
||||||
|
width?: number
|
||||||
|
height?: number
|
||||||
|
scaleX?: number
|
||||||
|
scaleY?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FabricEvent {
|
||||||
|
target?: FabricObject
|
||||||
|
e?: MouseEvent
|
||||||
|
}
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
type FabricModule = any
|
||||||
|
|
||||||
|
export function FabricReconstructionCanvas({
|
||||||
|
sessionId,
|
||||||
|
cells,
|
||||||
|
onCellsChanged,
|
||||||
|
}: FabricReconstructionCanvasProps) {
|
||||||
|
const canvasElRef = useRef<HTMLCanvasElement>(null)
|
||||||
|
const fabricRef = useRef<FabricCanvas | null>(null)
|
||||||
|
const fabricModuleRef = useRef<FabricModule>(null)
|
||||||
|
const [ready, setReady] = useState(false)
|
||||||
|
const [opacity, setOpacity] = useState(30)
|
||||||
|
const [zoom, setZoom] = useState(100)
|
||||||
|
const [selectedCell, setSelectedCell] = useState<string | null>(null)
|
||||||
|
const [error, setError] = useState('')
|
||||||
|
|
||||||
|
// Undo/Redo
|
||||||
|
const undoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
|
||||||
|
const redoStackRef = useRef<{ cellId: string; oldText: string; newText: string }[]>([])
|
||||||
|
|
||||||
|
// ---- Initialise Fabric.js ----
|
||||||
|
useEffect(() => {
|
||||||
|
let disposed = false
|
||||||
|
|
||||||
|
async function init() {
|
||||||
|
try {
|
||||||
|
const fabricModule = await import('fabric')
|
||||||
|
if (disposed) return
|
||||||
|
fabricModuleRef.current = fabricModule
|
||||||
|
|
||||||
|
const canvasEl = canvasElRef.current
|
||||||
|
if (!canvasEl) return
|
||||||
|
|
||||||
|
const canvas = new fabricModule.Canvas(canvasEl, {
|
||||||
|
selection: true,
|
||||||
|
preserveObjectStacking: true,
|
||||||
|
}) as unknown as FabricCanvas
|
||||||
|
|
||||||
|
fabricRef.current = canvas
|
||||||
|
|
||||||
|
// Load background image
|
||||||
|
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||||
|
|
||||||
|
const bgImg = await new Promise<FabricImage>((resolve, reject) => {
|
||||||
|
fabricModule.FabricImage.fromURL(imgUrl, { crossOrigin: 'anonymous' })
|
||||||
|
.then((img: FabricImage) => resolve(img))
|
||||||
|
.catch((err: Error) => reject(err))
|
||||||
|
})
|
||||||
|
|
||||||
|
if (disposed) return
|
||||||
|
|
||||||
|
const imgW = (bgImg.width || 800) * (bgImg.scaleX || 1)
|
||||||
|
const imgH = (bgImg.height || 600) * (bgImg.scaleY || 1)
|
||||||
|
|
||||||
|
canvas.setWidth(imgW)
|
||||||
|
canvas.setHeight(imgH)
|
||||||
|
|
||||||
|
bgImg.set({ opacity: opacity / 100, selectable: false, evented: false } as Record<string, unknown>)
|
||||||
|
canvas.setBackgroundImage(bgImg, () => {
|
||||||
|
canvas.renderAll()
|
||||||
|
})
|
||||||
|
|
||||||
|
// Add cell objects
|
||||||
|
addCellObjects(canvas, fabricModule, cells, imgW, imgH)
|
||||||
|
|
||||||
|
// Listen for text changes
|
||||||
|
canvas.on('object:modified', (e: FabricEvent) => {
|
||||||
|
if (e.target?.data?.cellId) {
|
||||||
|
const cellId = e.target.data.cellId as string
|
||||||
|
const newText = (e.target.text || '') as string
|
||||||
|
onCellsChanged([{ cell_id: cellId, text: newText }])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Selection tracking
|
||||||
|
canvas.on('selection:created', (e: FabricEvent) => {
|
||||||
|
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
|
||||||
|
})
|
||||||
|
canvas.on('selection:updated', (e: FabricEvent) => {
|
||||||
|
if (e.target?.data?.cellId) setSelectedCell(e.target.data.cellId as string)
|
||||||
|
})
|
||||||
|
canvas.on('selection:cleared', () => setSelectedCell(null))
|
||||||
|
|
||||||
|
setReady(true)
|
||||||
|
} catch (err) {
|
||||||
|
if (!disposed) setError(err instanceof Error ? err.message : 'Fabric.js konnte nicht geladen werden')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
init()
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
disposed = true
|
||||||
|
fabricRef.current?.dispose()
|
||||||
|
fabricRef.current = null
|
||||||
|
}
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
function addCellObjects(
|
||||||
|
canvas: FabricCanvas,
|
||||||
|
fabricModule: FabricModule,
|
||||||
|
gridCells: GridCell[],
|
||||||
|
imgW: number,
|
||||||
|
imgH: number,
|
||||||
|
) {
|
||||||
|
for (const cell of gridCells) {
|
||||||
|
const color = COL_TYPE_COLORS[cell.col_type] || '#6b7280'
|
||||||
|
const x = (cell.bbox_pct.x / 100) * imgW
|
||||||
|
const y = (cell.bbox_pct.y / 100) * imgH
|
||||||
|
const w = (cell.bbox_pct.w / 100) * imgW
|
||||||
|
const h = (cell.bbox_pct.h / 100) * imgH
|
||||||
|
|
||||||
|
const fontSize = Math.max(8, Math.min(18, h * 0.55))
|
||||||
|
|
||||||
|
const textObj = new fabricModule.IText(cell.text || '', {
|
||||||
|
left: x,
|
||||||
|
top: y,
|
||||||
|
width: w,
|
||||||
|
fontSize,
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fill: '#000000',
|
||||||
|
backgroundColor: `${color}22`,
|
||||||
|
padding: 2,
|
||||||
|
editable: true,
|
||||||
|
selectable: true,
|
||||||
|
lockScalingFlip: true,
|
||||||
|
data: {
|
||||||
|
cellId: cell.cell_id,
|
||||||
|
colType: cell.col_type,
|
||||||
|
rowIndex: cell.row_index,
|
||||||
|
colIndex: cell.col_index,
|
||||||
|
originalText: cell.text,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// Border colour matches column type
|
||||||
|
textObj.set({
|
||||||
|
borderColor: color,
|
||||||
|
cornerColor: color,
|
||||||
|
cornerSize: 6,
|
||||||
|
transparentCorners: false,
|
||||||
|
} as Record<string, unknown>)
|
||||||
|
|
||||||
|
canvas.add(textObj)
|
||||||
|
}
|
||||||
|
canvas.renderAll()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Opacity slider ----
|
||||||
|
const handleOpacityChange = useCallback((val: number) => {
|
||||||
|
setOpacity(val)
|
||||||
|
const canvas = fabricRef.current
|
||||||
|
if (!canvas) return
|
||||||
|
// Update background image opacity
|
||||||
|
// Access internal property — Fabric stores bgImage on the canvas
|
||||||
|
const bgImg = (canvas as unknown as Record<string, unknown>).backgroundImage as FabricObject | null
|
||||||
|
if (bgImg) {
|
||||||
|
bgImg.set({ opacity: val / 100 })
|
||||||
|
canvas.renderAll()
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
// ---- Zoom ----
|
||||||
|
const handleZoomChange = useCallback((val: number) => {
|
||||||
|
setZoom(val)
|
||||||
|
const canvas = fabricRef.current
|
||||||
|
if (!canvas) return
|
||||||
|
canvas.setZoom(val / 100)
|
||||||
|
canvas.renderAll()
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
// ---- Undo / Redo via keyboard ----
|
||||||
|
useEffect(() => {
|
||||||
|
const handler = (e: KeyboardEvent) => {
|
||||||
|
if (!(e.metaKey || e.ctrlKey) || e.key !== 'z') return
|
||||||
|
e.preventDefault()
|
||||||
|
|
||||||
|
const canvas = fabricRef.current
|
||||||
|
if (!canvas) return
|
||||||
|
|
||||||
|
if (e.shiftKey) {
|
||||||
|
// Redo
|
||||||
|
const action = redoStackRef.current.pop()
|
||||||
|
if (!action) return
|
||||||
|
undoStackRef.current.push(action)
|
||||||
|
const obj = canvas.getObjects().find(
|
||||||
|
(o: FabricObject) => o.data?.cellId === action.cellId
|
||||||
|
)
|
||||||
|
if (obj) {
|
||||||
|
obj.set({ text: action.newText } as Record<string, unknown>)
|
||||||
|
canvas.renderAll()
|
||||||
|
onCellsChanged([{ cell_id: action.cellId, text: action.newText }])
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Undo
|
||||||
|
const action = undoStackRef.current.pop()
|
||||||
|
if (!action) return
|
||||||
|
redoStackRef.current.push(action)
|
||||||
|
const obj = canvas.getObjects().find(
|
||||||
|
(o: FabricObject) => o.data?.cellId === action.cellId
|
||||||
|
)
|
||||||
|
if (obj) {
|
||||||
|
obj.set({ text: action.oldText } as Record<string, unknown>)
|
||||||
|
canvas.renderAll()
|
||||||
|
onCellsChanged([{ cell_id: action.cellId, text: action.oldText }])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.addEventListener('keydown', handler)
|
||||||
|
return () => document.removeEventListener('keydown', handler)
|
||||||
|
}, [onCellsChanged])
|
||||||
|
|
||||||
|
// ---- Delete selected cell (via context-menu or Delete key) ----
|
||||||
|
useEffect(() => {
|
||||||
|
const handler = (e: KeyboardEvent) => {
|
||||||
|
if (e.key !== 'Delete' && e.key !== 'Backspace') return
|
||||||
|
// Only delete if not currently editing text inside an IText
|
||||||
|
const canvas = fabricRef.current
|
||||||
|
if (!canvas) return
|
||||||
|
const active = canvas.getActiveObject()
|
||||||
|
if (!active) return
|
||||||
|
// If the IText is in editing mode, let the keypress pass through
|
||||||
|
if ((active as unknown as Record<string, boolean>).isEditing) return
|
||||||
|
e.preventDefault()
|
||||||
|
canvas.remove(active)
|
||||||
|
canvas.discardActiveObject()
|
||||||
|
canvas.renderAll()
|
||||||
|
}
|
||||||
|
document.addEventListener('keydown', handler)
|
||||||
|
return () => document.removeEventListener('keydown', handler)
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
// ---- Export helpers ----
|
||||||
|
const handleExportPdf = useCallback(() => {
|
||||||
|
window.open(
|
||||||
|
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/pdf`,
|
||||||
|
'_blank'
|
||||||
|
)
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
const handleExportDocx = useCallback(() => {
|
||||||
|
window.open(
|
||||||
|
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction/export/docx`,
|
||||||
|
'_blank'
|
||||||
|
)
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col items-center justify-center py-8 text-red-500 text-sm">
|
||||||
|
<p>Fabric.js Editor konnte nicht geladen werden:</p>
|
||||||
|
<p className="text-xs mt-1 text-gray-400">{error}</p>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-2">
|
||||||
|
{/* Toolbar */}
|
||||||
|
<div className="flex items-center gap-3 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2 text-xs">
|
||||||
|
{/* Opacity slider */}
|
||||||
|
<label className="flex items-center gap-1.5 text-gray-500">
|
||||||
|
Hintergrund
|
||||||
|
<input
|
||||||
|
type="range"
|
||||||
|
min={0} max={100}
|
||||||
|
value={opacity}
|
||||||
|
onChange={e => handleOpacityChange(Number(e.target.value))}
|
||||||
|
className="w-20 h-1 accent-teal-500"
|
||||||
|
/>
|
||||||
|
<span className="w-8 text-right">{opacity}%</span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
|
||||||
|
|
||||||
|
{/* Zoom */}
|
||||||
|
<label className="flex items-center gap-1.5 text-gray-500">
|
||||||
|
Zoom
|
||||||
|
<button onClick={() => handleZoomChange(Math.max(25, zoom - 25))}
|
||||||
|
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||||
|
−
|
||||||
|
</button>
|
||||||
|
<span className="w-8 text-center">{zoom}%</span>
|
||||||
|
<button onClick={() => handleZoomChange(Math.min(200, zoom + 25))}
|
||||||
|
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||||
|
+
|
||||||
|
</button>
|
||||||
|
<button onClick={() => handleZoomChange(100)}
|
||||||
|
className="px-1.5 py-0.5 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||||
|
Fit
|
||||||
|
</button>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600" />
|
||||||
|
|
||||||
|
{/* Selected cell info */}
|
||||||
|
{selectedCell && (
|
||||||
|
<span className="text-gray-400">
|
||||||
|
Zelle: <span className="text-gray-600 dark:text-gray-300">{selectedCell}</span>
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div className="flex-1" />
|
||||||
|
|
||||||
|
{/* Export buttons */}
|
||||||
|
<button onClick={handleExportPdf}
|
||||||
|
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||||
|
PDF
|
||||||
|
</button>
|
||||||
|
<button onClick={handleExportDocx}
|
||||||
|
className="px-2.5 py-1 border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||||
|
DOCX
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Canvas */}
|
||||||
|
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900"
|
||||||
|
style={{ maxHeight: '75vh' }}>
|
||||||
|
{!ready && (
|
||||||
|
<div className="flex items-center justify-center py-12">
|
||||||
|
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
|
||||||
|
<span className="ml-2 text-sm text-gray-500">Canvas wird geladen...</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<canvas ref={canvasElRef} />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Legend */}
|
||||||
|
<div className="flex items-center gap-4 text-xs text-gray-500">
|
||||||
|
{Object.entries(COL_TYPE_COLORS).map(([type, color]) => (
|
||||||
|
<span key={type} className="flex items-center gap-1">
|
||||||
|
<span className="w-3 h-3 rounded" style={{ backgroundColor: color + '44', border: `1px solid ${color}` }} />
|
||||||
|
{type.replace('column_', '').replace('page_', '')}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
<span className="ml-auto text-gray-400">Doppelklick = Text bearbeiten | Delete = Zelle entfernen | Cmd+Z = Undo</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -1,10 +1,19 @@
|
|||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||||
|
import dynamic from 'next/dynamic'
|
||||||
import type { GridResult, GridCell, WordEntry } from '@/app/(admin)/ai/ocr-pipeline/types'
|
import type { GridResult, GridCell, WordEntry } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
// Lazy-load Fabric.js canvas editor (SSR-incompatible)
|
||||||
|
const FabricReconstructionCanvas = dynamic(
|
||||||
|
() => import('./FabricReconstructionCanvas').then(m => ({ default: m.FabricReconstructionCanvas })),
|
||||||
|
{ ssr: false, loading: () => <div className="py-8 text-center text-sm text-gray-400">Editor wird geladen...</div> }
|
||||||
|
)
|
||||||
|
|
||||||
|
type EditorMode = 'simple' | 'editor'
|
||||||
|
|
||||||
interface StepReconstructionProps {
|
interface StepReconstructionProps {
|
||||||
sessionId: string | null
|
sessionId: string | null
|
||||||
onNext: () => void
|
onNext: () => void
|
||||||
@@ -26,6 +35,8 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
|
const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading')
|
||||||
const [error, setError] = useState('')
|
const [error, setError] = useState('')
|
||||||
const [cells, setCells] = useState<EditableCell[]>([])
|
const [cells, setCells] = useState<EditableCell[]>([])
|
||||||
|
const [gridCells, setGridCells] = useState<GridCell[]>([])
|
||||||
|
const [editorMode, setEditorMode] = useState<EditorMode>('simple')
|
||||||
const [editedTexts, setEditedTexts] = useState<Map<string, string>>(new Map())
|
const [editedTexts, setEditedTexts] = useState<Map<string, string>>(new Map())
|
||||||
const [zoom, setZoom] = useState(100)
|
const [zoom, setZoom] = useState(100)
|
||||||
const [imageNaturalH, setImageNaturalH] = useState(0)
|
const [imageNaturalH, setImageNaturalH] = useState(0)
|
||||||
@@ -70,8 +81,9 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Build editable cells from grid cells
|
// Build editable cells from grid cells
|
||||||
const gridCells: GridCell[] = wordResult.cells || []
|
const rawGridCells: GridCell[] = wordResult.cells || []
|
||||||
const allEditableCells: EditableCell[] = gridCells.map(c => ({
|
setGridCells(rawGridCells)
|
||||||
|
const allEditableCells: EditableCell[] = rawGridCells.map(c => ({
|
||||||
cellId: c.cell_id,
|
cellId: c.cell_id,
|
||||||
text: c.text,
|
text: c.text,
|
||||||
originalText: c.text,
|
originalText: c.text,
|
||||||
@@ -252,6 +264,17 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
}
|
}
|
||||||
}, [sessionId, editedTexts, cells])
|
}, [sessionId, editedTexts, cells])
|
||||||
|
|
||||||
|
// Handler for Fabric.js editor cell changes
|
||||||
|
const handleFabricCellsChanged = useCallback((updates: { cell_id: string; text: string }[]) => {
|
||||||
|
for (const u of updates) {
|
||||||
|
setEditedTexts(prev => {
|
||||||
|
const next = new Map(prev)
|
||||||
|
next.set(u.cell_id, u.text)
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
const dewarpedUrl = sessionId
|
const dewarpedUrl = sessionId
|
||||||
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/dewarped`
|
||||||
: ''
|
: ''
|
||||||
@@ -332,6 +355,29 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||||
Schritt 7: Rekonstruktion
|
Schritt 7: Rekonstruktion
|
||||||
</h3>
|
</h3>
|
||||||
|
{/* Mode toggle */}
|
||||||
|
<div className="flex items-center ml-2 border border-gray-300 dark:border-gray-600 rounded overflow-hidden text-xs">
|
||||||
|
<button
|
||||||
|
onClick={() => setEditorMode('simple')}
|
||||||
|
className={`px-2 py-0.5 transition-colors ${
|
||||||
|
editorMode === 'simple'
|
||||||
|
? 'bg-teal-600 text-white'
|
||||||
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Einfach
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setEditorMode('editor')}
|
||||||
|
className={`px-2 py-0.5 transition-colors ${
|
||||||
|
editorMode === 'editor'
|
||||||
|
? 'bg-teal-600 text-white'
|
||||||
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Editor
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
<span className="text-xs text-gray-400">
|
<span className="text-xs text-gray-400">
|
||||||
{cells.length} Zellen · {changedCount} geaendert
|
{cells.length} Zellen · {changedCount} geaendert
|
||||||
{emptyCellIds.size > 0 && showEmptyHighlight && (
|
{emptyCellIds.size > 0 && showEmptyHighlight && (
|
||||||
@@ -408,82 +454,90 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Reconstruction canvas */}
|
{/* Reconstruction canvas — Simple or Editor mode */}
|
||||||
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900" style={{ maxHeight: '75vh' }}>
|
{editorMode === 'editor' && sessionId ? (
|
||||||
<div
|
<FabricReconstructionCanvas
|
||||||
ref={containerRef}
|
sessionId={sessionId}
|
||||||
className="relative inline-block"
|
cells={gridCells}
|
||||||
style={{ transform: `scale(${zoom / 100})`, transformOrigin: 'top left' }}
|
onCellsChanged={handleFabricCellsChanged}
|
||||||
>
|
/>
|
||||||
{/* Background image at reduced opacity */}
|
) : (
|
||||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
<div className="border rounded-lg overflow-auto dark:border-gray-700 bg-gray-100 dark:bg-gray-900" style={{ maxHeight: '75vh' }}>
|
||||||
<img
|
<div
|
||||||
ref={imageRef}
|
ref={containerRef}
|
||||||
src={dewarpedUrl}
|
className="relative inline-block"
|
||||||
alt="Dewarped"
|
style={{ transform: `scale(${zoom / 100})`, transformOrigin: 'top left' }}
|
||||||
className="block"
|
>
|
||||||
style={{ opacity: 0.3 }}
|
{/* Background image at reduced opacity */}
|
||||||
onLoad={handleImageLoad}
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
/>
|
<img
|
||||||
|
ref={imageRef}
|
||||||
|
src={dewarpedUrl}
|
||||||
|
alt="Dewarped"
|
||||||
|
className="block"
|
||||||
|
style={{ opacity: 0.3 }}
|
||||||
|
onLoad={handleImageLoad}
|
||||||
|
/>
|
||||||
|
|
||||||
{/* Empty field markers */}
|
{/* Empty field markers */}
|
||||||
{showEmptyHighlight && cells
|
{showEmptyHighlight && cells
|
||||||
.filter(c => emptyCellIds.has(c.cellId))
|
.filter(c => emptyCellIds.has(c.cellId))
|
||||||
.map(cell => (
|
.map(cell => (
|
||||||
<div
|
<div
|
||||||
key={`empty-${cell.cellId}`}
|
key={`empty-${cell.cellId}`}
|
||||||
className="absolute border-2 border-dashed border-red-400/60 rounded pointer-events-none"
|
className="absolute border-2 border-dashed border-red-400/60 rounded pointer-events-none"
|
||||||
style={{
|
style={{
|
||||||
|
left: `${cell.bboxPct.x}%`,
|
||||||
|
top: `${cell.bboxPct.y}%`,
|
||||||
|
width: `${cell.bboxPct.w}%`,
|
||||||
|
height: `${cell.bboxPct.h}%`,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{/* Editable text fields at bbox positions */}
|
||||||
|
{cells.map((cell) => {
|
||||||
|
const displayText = getDisplayText(cell)
|
||||||
|
const edited = isEdited(cell)
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={cell.cellId} className="absolute group" style={{
|
||||||
left: `${cell.bboxPct.x}%`,
|
left: `${cell.bboxPct.x}%`,
|
||||||
top: `${cell.bboxPct.y}%`,
|
top: `${cell.bboxPct.y}%`,
|
||||||
width: `${cell.bboxPct.w}%`,
|
width: `${cell.bboxPct.w}%`,
|
||||||
height: `${cell.bboxPct.h}%`,
|
height: `${cell.bboxPct.h}%`,
|
||||||
}}
|
}}>
|
||||||
/>
|
<input
|
||||||
))}
|
id={`cell-${cell.cellId}`}
|
||||||
|
type="text"
|
||||||
{/* Editable text fields at bbox positions */}
|
value={displayText}
|
||||||
{cells.map((cell) => {
|
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
|
||||||
const displayText = getDisplayText(cell)
|
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
|
||||||
const edited = isEdited(cell)
|
className={`w-full h-full bg-transparent text-black dark:text-white border px-0.5 outline-none transition-colors ${
|
||||||
|
colTypeColor(cell.colType)
|
||||||
return (
|
} ${edited ? 'border-green-500 bg-green-50/30 dark:bg-green-900/20' : ''}`}
|
||||||
<div key={cell.cellId} className="absolute group" style={{
|
style={{
|
||||||
left: `${cell.bboxPct.x}%`,
|
fontSize: `${getFontSize(cell.bboxPct.h)}px`,
|
||||||
top: `${cell.bboxPct.y}%`,
|
lineHeight: '1',
|
||||||
width: `${cell.bboxPct.w}%`,
|
}}
|
||||||
height: `${cell.bboxPct.h}%`,
|
title={`${cell.cellId} (${cell.colType})`}
|
||||||
}}>
|
/>
|
||||||
<input
|
{/* Per-cell reset button (X) — only shown for edited cells on hover */}
|
||||||
id={`cell-${cell.cellId}`}
|
{edited && (
|
||||||
type="text"
|
<button
|
||||||
value={displayText}
|
onClick={() => resetCell(cell.cellId)}
|
||||||
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
|
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||||
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
|
title="Zuruecksetzen"
|
||||||
className={`w-full h-full bg-transparent text-black dark:text-white border px-0.5 outline-none transition-colors ${
|
>
|
||||||
colTypeColor(cell.colType)
|
×
|
||||||
} ${edited ? 'border-green-500 bg-green-50/30 dark:bg-green-900/20' : ''}`}
|
</button>
|
||||||
style={{
|
)}
|
||||||
fontSize: `${getFontSize(cell.bboxPct.h)}px`,
|
</div>
|
||||||
lineHeight: '1',
|
)
|
||||||
}}
|
})}
|
||||||
title={`${cell.cellId} (${cell.colType})`}
|
</div>
|
||||||
/>
|
|
||||||
{/* Per-cell reset button (X) — only shown for edited cells on hover */}
|
|
||||||
{edited && (
|
|
||||||
<button
|
|
||||||
onClick={() => resetCell(cell.cellId)}
|
|
||||||
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
|
||||||
title="Zuruecksetzen"
|
|
||||||
>
|
|
||||||
×
|
|
||||||
</button>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
})}
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
)}
|
||||||
|
|
||||||
{/* Bottom action */}
|
{/* Bottom action */}
|
||||||
<div className="flex justify-end">
|
<div className="flex justify-end">
|
||||||
|
|||||||
@@ -27,6 +27,7 @@
|
|||||||
"react-dom": "^18.3.1",
|
"react-dom": "^18.3.1",
|
||||||
"reactflow": "^11.11.4",
|
"reactflow": "^11.11.4",
|
||||||
"recharts": "^2.15.0",
|
"recharts": "^2.15.0",
|
||||||
|
"fabric": "^6.0.0",
|
||||||
"uuid": "^13.0.0"
|
"uuid": "^13.0.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|||||||
@@ -511,27 +511,39 @@ def _detect_shear_by_projection(img: np.ndarray) -> Dict[str, Any]:
|
|||||||
small = cv2.resize(binary, (w // 2, h // 2), interpolation=cv2.INTER_AREA)
|
small = cv2.resize(binary, (w // 2, h // 2), interpolation=cv2.INTER_AREA)
|
||||||
sh, sw = small.shape
|
sh, sw = small.shape
|
||||||
|
|
||||||
# Angle sweep: ±3° in 0.25° steps
|
# 2-pass angle sweep for 10x better precision:
|
||||||
angles = [a * 0.25 for a in range(-12, 13)] # 25 values
|
# Pass 1: Coarse sweep ±3° in 0.5° steps (13 values)
|
||||||
best_angle = 0.0
|
# Pass 2: Fine sweep ±0.5° around coarse best in 0.05° steps (21 values)
|
||||||
best_variance = -1.0
|
|
||||||
variances: List[Tuple[float, float]] = []
|
|
||||||
|
|
||||||
for angle_deg in angles:
|
def _sweep_variance(angles_list):
|
||||||
if abs(angle_deg) < 0.01:
|
results = []
|
||||||
rotated = small
|
for angle_deg in angles_list:
|
||||||
else:
|
if abs(angle_deg) < 0.001:
|
||||||
shear_tan = math.tan(math.radians(angle_deg))
|
rotated = small
|
||||||
M = np.float32([[1, shear_tan, -sh / 2.0 * shear_tan], [0, 1, 0]])
|
else:
|
||||||
rotated = cv2.warpAffine(small, M, (sw, sh),
|
shear_tan = math.tan(math.radians(angle_deg))
|
||||||
flags=cv2.INTER_NEAREST,
|
M = np.float32([[1, shear_tan, -sh / 2.0 * shear_tan], [0, 1, 0]])
|
||||||
borderMode=cv2.BORDER_CONSTANT)
|
rotated = cv2.warpAffine(small, M, (sw, sh),
|
||||||
profile = np.sum(rotated, axis=1).astype(float)
|
flags=cv2.INTER_NEAREST,
|
||||||
var = float(np.var(profile))
|
borderMode=cv2.BORDER_CONSTANT)
|
||||||
variances.append((angle_deg, var))
|
profile = np.sum(rotated, axis=1).astype(float)
|
||||||
if var > best_variance:
|
results.append((angle_deg, float(np.var(profile))))
|
||||||
best_variance = var
|
return results
|
||||||
best_angle = angle_deg
|
|
||||||
|
# Pass 1: coarse
|
||||||
|
coarse_angles = [a * 0.5 for a in range(-6, 7)] # 13 values
|
||||||
|
coarse_results = _sweep_variance(coarse_angles)
|
||||||
|
coarse_best = max(coarse_results, key=lambda x: x[1])
|
||||||
|
|
||||||
|
# Pass 2: fine around coarse best
|
||||||
|
fine_center = coarse_best[0]
|
||||||
|
fine_angles = [fine_center + a * 0.05 for a in range(-10, 11)] # 21 values
|
||||||
|
fine_results = _sweep_variance(fine_angles)
|
||||||
|
fine_best = max(fine_results, key=lambda x: x[1])
|
||||||
|
|
||||||
|
best_angle = fine_best[0]
|
||||||
|
best_variance = fine_best[1]
|
||||||
|
variances = coarse_results + fine_results
|
||||||
|
|
||||||
# Confidence: how much sharper is the best angle vs. the mean?
|
# Confidence: how much sharper is the best angle vs. the mean?
|
||||||
all_mean = sum(v for _, v in variances) / len(variances)
|
all_mean = sum(v for _, v in variances) / len(variances)
|
||||||
@@ -611,6 +623,133 @@ def _detect_shear_by_hough(img: np.ndarray) -> Dict[str, Any]:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_shear_by_text_lines(img: np.ndarray) -> Dict[str, Any]:
|
||||||
|
"""Detect shear by measuring text-line straightness (Method D).
|
||||||
|
|
||||||
|
Runs a quick Tesseract scan (PSM 11, 50% downscale) to locate word
|
||||||
|
bounding boxes, groups them into horizontal lines by Y-proximity,
|
||||||
|
fits a linear regression to each line, and takes the median slope
|
||||||
|
as the shear angle.
|
||||||
|
|
||||||
|
This is the most robust method because it measures actual text content
|
||||||
|
rather than relying on edges, projections, or printed lines.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with keys: method, shear_degrees, confidence.
|
||||||
|
"""
|
||||||
|
import math
|
||||||
|
result = {"method": "text_lines", "shear_degrees": 0.0, "confidence": 0.0}
|
||||||
|
|
||||||
|
h, w = img.shape[:2]
|
||||||
|
# Downscale 50% for speed
|
||||||
|
scale = 0.5
|
||||||
|
small = cv2.resize(img, (int(w * scale), int(h * scale)),
|
||||||
|
interpolation=cv2.INTER_AREA)
|
||||||
|
gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY)
|
||||||
|
pil_img = Image.fromarray(gray)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = pytesseract.image_to_data(
|
||||||
|
pil_img, lang='eng+deu', config='--psm 11 --oem 3',
|
||||||
|
output_type=pytesseract.Output.DICT,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Collect word centres
|
||||||
|
words = []
|
||||||
|
for i in range(len(data['text'])):
|
||||||
|
text = data['text'][i].strip()
|
||||||
|
conf = int(data['conf'][i])
|
||||||
|
if not text or conf < 20 or len(text) < 2:
|
||||||
|
continue
|
||||||
|
cx = data['left'][i] + data['width'][i] / 2.0
|
||||||
|
cy = data['top'][i] + data['height'][i] / 2.0
|
||||||
|
words.append((cx, cy, data['height'][i]))
|
||||||
|
|
||||||
|
if len(words) < 10:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Group words into lines by Y-proximity
|
||||||
|
avg_h = sum(wh for _, _, wh in words) / len(words)
|
||||||
|
y_tol = max(avg_h * 0.6, 8)
|
||||||
|
words_sorted = sorted(words, key=lambda w: w[1])
|
||||||
|
|
||||||
|
lines: List[List[Tuple[float, float]]] = []
|
||||||
|
current_line: List[Tuple[float, float]] = [(words_sorted[0][0], words_sorted[0][1])]
|
||||||
|
current_y = words_sorted[0][1]
|
||||||
|
|
||||||
|
for cx, cy, _ in words_sorted[1:]:
|
||||||
|
if abs(cy - current_y) <= y_tol:
|
||||||
|
current_line.append((cx, cy))
|
||||||
|
else:
|
||||||
|
if len(current_line) >= 3:
|
||||||
|
lines.append(current_line)
|
||||||
|
current_line = [(cx, cy)]
|
||||||
|
current_y = cy
|
||||||
|
if len(current_line) >= 3:
|
||||||
|
lines.append(current_line)
|
||||||
|
|
||||||
|
if len(lines) < 3:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Linear regression per line → slope (dy/dx)
|
||||||
|
slopes = []
|
||||||
|
for line in lines:
|
||||||
|
xs = np.array([p[0] for p in line])
|
||||||
|
ys = np.array([p[1] for p in line])
|
||||||
|
x_range = xs.max() - xs.min()
|
||||||
|
if x_range < 20:
|
||||||
|
continue
|
||||||
|
coeffs = np.polyfit(xs, ys, 1)
|
||||||
|
slopes.append(coeffs[0]) # dy/dx
|
||||||
|
|
||||||
|
if len(slopes) < 3:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Median slope → shear angle
|
||||||
|
# dy/dx of horizontal text lines = tan(shear_angle)
|
||||||
|
# Positive slope means text tilts down-right → vertical columns lean right
|
||||||
|
median_slope = float(np.median(slopes))
|
||||||
|
shear_degrees = math.degrees(math.atan(median_slope))
|
||||||
|
|
||||||
|
# Confidence from line count + slope consistency
|
||||||
|
slope_std = float(np.std(slopes))
|
||||||
|
consistency = max(0.0, 1.0 - slope_std * 20) # penalise high variance
|
||||||
|
count_factor = min(1.0, len(slopes) / 8.0)
|
||||||
|
confidence = count_factor * 0.6 + consistency * 0.4
|
||||||
|
|
||||||
|
result["shear_degrees"] = round(shear_degrees, 3)
|
||||||
|
result["confidence"] = round(max(0.0, min(1.0, confidence)), 2)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _dewarp_quality_check(original: np.ndarray, corrected: np.ndarray) -> bool:
|
||||||
|
"""Check whether the dewarp correction actually improved alignment.
|
||||||
|
|
||||||
|
Compares horizontal projection variance before and after correction.
|
||||||
|
Higher variance means sharper text-line peaks, which indicates better
|
||||||
|
horizontal alignment.
|
||||||
|
|
||||||
|
Returns True if the correction improved the image, False if it should
|
||||||
|
be discarded.
|
||||||
|
"""
|
||||||
|
def _h_proj_variance(img: np.ndarray) -> float:
|
||||||
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
_, binary = cv2.threshold(gray, 0, 255,
|
||||||
|
cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
||||||
|
small = cv2.resize(binary, (binary.shape[1] // 2, binary.shape[0] // 2),
|
||||||
|
interpolation=cv2.INTER_AREA)
|
||||||
|
profile = np.sum(small, axis=1).astype(float)
|
||||||
|
return float(np.var(profile))
|
||||||
|
|
||||||
|
var_before = _h_proj_variance(original)
|
||||||
|
var_after = _h_proj_variance(corrected)
|
||||||
|
|
||||||
|
# Correction must improve variance (even by a tiny margin)
|
||||||
|
return var_after > var_before
|
||||||
|
|
||||||
|
|
||||||
def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
|
def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
|
||||||
"""Apply a vertical shear correction to an image.
|
"""Apply a vertical shear correction to an image.
|
||||||
|
|
||||||
@@ -644,24 +783,36 @@ def _apply_shear(img: np.ndarray, shear_degrees: float) -> np.ndarray:
|
|||||||
|
|
||||||
|
|
||||||
def _ensemble_shear(detections: List[Dict[str, Any]]) -> Tuple[float, float, str]:
|
def _ensemble_shear(detections: List[Dict[str, Any]]) -> Tuple[float, float, str]:
|
||||||
"""Combine multiple shear detections into a single weighted estimate.
|
"""Combine multiple shear detections into a single weighted estimate (v2).
|
||||||
|
|
||||||
Only methods with confidence >= 0.3 are considered.
|
Ensemble v2 changes vs v1:
|
||||||
Results are outlier-filtered: if any accepted result differs by more than
|
- Minimum confidence raised to 0.5 (was 0.3)
|
||||||
1° from the weighted mean, it is discarded.
|
- text_lines method gets 1.5× weight boost (most reliable detector)
|
||||||
|
- Outlier filter at 1° from weighted mean
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(shear_degrees, ensemble_confidence, methods_used_str)
|
(shear_degrees, ensemble_confidence, methods_used_str)
|
||||||
"""
|
"""
|
||||||
accepted = [(d["shear_degrees"], d["confidence"], d["method"])
|
# Higher confidence threshold — "im Zweifel nichts tun"
|
||||||
for d in detections if d["confidence"] >= 0.3]
|
_MIN_CONF = 0.5
|
||||||
|
|
||||||
|
# text_lines gets a weight boost as the most content-aware method
|
||||||
|
_METHOD_WEIGHT_BOOST = {"text_lines": 1.5}
|
||||||
|
|
||||||
|
accepted = []
|
||||||
|
for d in detections:
|
||||||
|
if d["confidence"] < _MIN_CONF:
|
||||||
|
continue
|
||||||
|
boost = _METHOD_WEIGHT_BOOST.get(d["method"], 1.0)
|
||||||
|
effective_conf = d["confidence"] * boost
|
||||||
|
accepted.append((d["shear_degrees"], effective_conf, d["method"]))
|
||||||
|
|
||||||
if not accepted:
|
if not accepted:
|
||||||
return 0.0, 0.0, "none"
|
return 0.0, 0.0, "none"
|
||||||
|
|
||||||
if len(accepted) == 1:
|
if len(accepted) == 1:
|
||||||
deg, conf, method = accepted[0]
|
deg, conf, method = accepted[0]
|
||||||
return deg, conf, method
|
return deg, min(conf, 1.0), method
|
||||||
|
|
||||||
# First pass: weighted mean
|
# First pass: weighted mean
|
||||||
total_w = sum(c for _, c, _ in accepted)
|
total_w = sum(c for _, c, _ in accepted)
|
||||||
@@ -684,23 +835,24 @@ def _ensemble_shear(detections: List[Dict[str, Any]]) -> Tuple[float, float, str
|
|||||||
ensemble_conf = min(1.0, avg_conf + agreement_bonus)
|
ensemble_conf = min(1.0, avg_conf + agreement_bonus)
|
||||||
|
|
||||||
methods_str = "+".join(m for _, _, m in filtered)
|
methods_str = "+".join(m for _, _, m in filtered)
|
||||||
return round(final_deg, 3), round(ensemble_conf, 2), methods_str
|
return round(final_deg, 3), round(min(ensemble_conf, 1.0), 2), methods_str
|
||||||
|
|
||||||
|
|
||||||
def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray, Dict[str, Any]]:
|
def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray, Dict[str, Any]]:
|
||||||
"""Correct vertical shear after deskew.
|
"""Correct vertical shear after deskew (v2 with quality gate).
|
||||||
|
|
||||||
After deskew aligns horizontal text lines, vertical features (column
|
After deskew aligns horizontal text lines, vertical features (column
|
||||||
edges) may still be tilted. This detects the tilt angle using an ensemble
|
edges) may still be tilted. This detects the tilt angle using an ensemble
|
||||||
of three complementary methods and applies an affine shear correction.
|
of four complementary methods and applies an affine shear correction.
|
||||||
|
|
||||||
Methods (all run in ~100ms total):
|
Methods (all run in ~150ms total):
|
||||||
A. _detect_shear_angle() — vertical edge profile (~50ms)
|
A. _detect_shear_angle() — vertical edge profile (~50ms)
|
||||||
B. _detect_shear_by_projection() — horizontal text-line variance (~30ms)
|
B. _detect_shear_by_projection() — horizontal text-line variance (~30ms)
|
||||||
C. _detect_shear_by_hough() — Hough lines on table borders (~20ms)
|
C. _detect_shear_by_hough() — Hough lines on table borders (~20ms)
|
||||||
|
D. _detect_shear_by_text_lines() — text-line straightness (~50ms)
|
||||||
|
|
||||||
Only methods with confidence >= 0.3 contribute to the ensemble.
|
Quality gate: after correction, horizontal projection variance is compared
|
||||||
Outlier filtering discards results deviating > 1° from the weighted mean.
|
before vs after. If correction worsened alignment, it is discarded.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
img: BGR image (already deskewed).
|
img: BGR image (already deskewed).
|
||||||
@@ -726,7 +878,8 @@ def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray
|
|||||||
det_a = _detect_shear_angle(img)
|
det_a = _detect_shear_angle(img)
|
||||||
det_b = _detect_shear_by_projection(img)
|
det_b = _detect_shear_by_projection(img)
|
||||||
det_c = _detect_shear_by_hough(img)
|
det_c = _detect_shear_by_hough(img)
|
||||||
detections = [det_a, det_b, det_c]
|
det_d = _detect_shear_by_text_lines(img)
|
||||||
|
detections = [det_a, det_b, det_c, det_d]
|
||||||
shear_deg, confidence, method = _ensemble_shear(detections)
|
shear_deg, confidence, method = _ensemble_shear(detections)
|
||||||
else:
|
else:
|
||||||
det_a = _detect_shear_angle(img)
|
det_a = _detect_shear_angle(img)
|
||||||
@@ -739,22 +892,35 @@ def dewarp_image(img: np.ndarray, use_ensemble: bool = True) -> Tuple[np.ndarray
|
|||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"dewarp: ensemble shear=%.3f° conf=%.2f method=%s (%.2fs) | "
|
"dewarp: ensemble shear=%.3f° conf=%.2f method=%s (%.2fs) | "
|
||||||
"A=%.3f/%.2f B=%.3f/%.2f C=%.3f/%.2f",
|
"A=%.3f/%.2f B=%.3f/%.2f C=%.3f/%.2f D=%.3f/%.2f",
|
||||||
shear_deg, confidence, method, duration,
|
shear_deg, confidence, method, duration,
|
||||||
detections[0]["shear_degrees"], detections[0]["confidence"],
|
detections[0]["shear_degrees"], detections[0]["confidence"],
|
||||||
detections[1]["shear_degrees"] if len(detections) > 1 else 0.0,
|
detections[1]["shear_degrees"] if len(detections) > 1 else 0.0,
|
||||||
detections[1]["confidence"] if len(detections) > 1 else 0.0,
|
detections[1]["confidence"] if len(detections) > 1 else 0.0,
|
||||||
detections[2]["shear_degrees"] if len(detections) > 2 else 0.0,
|
detections[2]["shear_degrees"] if len(detections) > 2 else 0.0,
|
||||||
detections[2]["confidence"] if len(detections) > 2 else 0.0,
|
detections[2]["confidence"] if len(detections) > 2 else 0.0,
|
||||||
|
detections[3]["shear_degrees"] if len(detections) > 3 else 0.0,
|
||||||
|
detections[3]["confidence"] if len(detections) > 3 else 0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Only correct if shear is significant (> 0.05°)
|
# Higher thresholds: subtle shear (<0.15°) is irrelevant for OCR
|
||||||
if abs(shear_deg) < 0.05 or confidence < 0.3:
|
if abs(shear_deg) < 0.15 or confidence < 0.5:
|
||||||
return img, no_correction
|
return img, no_correction
|
||||||
|
|
||||||
# Apply correction (negate the detected shear to straighten)
|
# Apply correction (negate the detected shear to straighten)
|
||||||
corrected = _apply_shear(img, -shear_deg)
|
corrected = _apply_shear(img, -shear_deg)
|
||||||
|
|
||||||
|
# Quality gate: verify the correction actually improved alignment
|
||||||
|
if not _dewarp_quality_check(img, corrected):
|
||||||
|
logger.info("dewarp: quality gate REJECTED correction (%.3f°) — "
|
||||||
|
"projection variance did not improve", shear_deg)
|
||||||
|
no_correction["detections"] = [
|
||||||
|
{"method": d["method"], "shear_degrees": d["shear_degrees"],
|
||||||
|
"confidence": d["confidence"]}
|
||||||
|
for d in detections
|
||||||
|
]
|
||||||
|
return img, no_correction
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
"method": method,
|
"method": method,
|
||||||
"shear_degrees": shear_deg,
|
"shear_degrees": shear_deg,
|
||||||
@@ -4180,6 +4346,60 @@ def _clean_cell_text(text: str) -> str:
|
|||||||
return ' '.join(tokens)
|
return ' '.join(tokens)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Narrow-column OCR helpers (Proposal B)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _compute_cell_padding(col_width: int, img_w: int) -> int:
|
||||||
|
"""Adaptive padding for OCR crops based on column width.
|
||||||
|
|
||||||
|
Narrow columns (page_ref, marker) need more surrounding context so
|
||||||
|
Tesseract can segment characters correctly. Wide columns keep the
|
||||||
|
minimal 4 px padding to avoid pulling in neighbours.
|
||||||
|
"""
|
||||||
|
col_pct = col_width / img_w * 100 if img_w > 0 else 100
|
||||||
|
if col_pct < 5:
|
||||||
|
return max(20, col_width // 2)
|
||||||
|
if col_pct < 10:
|
||||||
|
return max(12, col_width // 4)
|
||||||
|
if col_pct < 15:
|
||||||
|
return 8
|
||||||
|
return 4
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_minimum_crop_size(crop: np.ndarray, min_dim: int = 150,
|
||||||
|
max_scale: int = 3) -> np.ndarray:
|
||||||
|
"""Upscale tiny crops so Tesseract gets enough pixel data.
|
||||||
|
|
||||||
|
If either dimension is below *min_dim*, the crop is bicubic-upscaled
|
||||||
|
so the smallest dimension reaches *min_dim* (capped at *max_scale* ×).
|
||||||
|
"""
|
||||||
|
h, w = crop.shape[:2]
|
||||||
|
if h >= min_dim and w >= min_dim:
|
||||||
|
return crop
|
||||||
|
scale = min(max_scale, max(min_dim / max(h, 1), min_dim / max(w, 1)))
|
||||||
|
if scale <= 1.0:
|
||||||
|
return crop
|
||||||
|
new_w = int(w * scale)
|
||||||
|
new_h = int(h * scale)
|
||||||
|
return cv2.resize(crop, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
|
||||||
|
|
||||||
|
|
||||||
|
def _select_psm_for_column(col_type: str, col_width: int,
|
||||||
|
row_height: int) -> int:
|
||||||
|
"""Choose the best Tesseract PSM for a given column geometry.
|
||||||
|
|
||||||
|
- page_ref columns are almost always single short tokens → PSM 8
|
||||||
|
- Very narrow or short cells → PSM 7 (single text line)
|
||||||
|
- Everything else → PSM 6 (uniform block)
|
||||||
|
"""
|
||||||
|
if col_type in ('page_ref', 'marker'):
|
||||||
|
return 8 # single word
|
||||||
|
if col_width < 100 or row_height < 30:
|
||||||
|
return 7 # single line
|
||||||
|
return 6 # uniform block
|
||||||
|
|
||||||
|
|
||||||
def _ocr_single_cell(
|
def _ocr_single_cell(
|
||||||
row_idx: int,
|
row_idx: int,
|
||||||
col_idx: int,
|
col_idx: int,
|
||||||
@@ -4202,12 +4422,13 @@ def _ocr_single_cell(
|
|||||||
disp_w = col.width
|
disp_w = col.width
|
||||||
disp_h = row.height
|
disp_h = row.height
|
||||||
|
|
||||||
# OCR crop: slightly wider to catch edge characters (internal only)
|
# OCR crop: adaptive padding — narrow columns get more context
|
||||||
pad = 4
|
pad = _compute_cell_padding(col.width, img_w)
|
||||||
cell_x = max(0, col.x - pad)
|
cell_x = max(0, col.x - pad)
|
||||||
cell_y = max(0, row.y - pad)
|
cell_y = max(0, row.y - pad)
|
||||||
cell_w = min(col.width + 2 * pad, img_w - cell_x)
|
cell_w = min(col.width + 2 * pad, img_w - cell_x)
|
||||||
cell_h = min(row.height + 2 * pad, img_h - cell_y)
|
cell_h = min(row.height + 2 * pad, img_h - cell_y)
|
||||||
|
is_narrow = (col.width / img_w * 100) < 15 if img_w > 0 else False
|
||||||
|
|
||||||
if disp_w <= 0 or disp_h <= 0:
|
if disp_w <= 0 or disp_h <= 0:
|
||||||
return {
|
return {
|
||||||
@@ -4266,20 +4487,56 @@ def _ocr_single_cell(
|
|||||||
dark_ratio = float(np.count_nonzero(crop < 180)) / crop.size
|
dark_ratio = float(np.count_nonzero(crop < 180)) / crop.size
|
||||||
_run_fallback = dark_ratio > 0.005
|
_run_fallback = dark_ratio > 0.005
|
||||||
if _run_fallback:
|
if _run_fallback:
|
||||||
cell_region = PageRegion(
|
# For narrow columns, upscale the crop before OCR
|
||||||
type=col.type,
|
if is_narrow and ocr_img is not None:
|
||||||
x=cell_x, y=cell_y,
|
_crop_slice = ocr_img[cell_y:cell_y + cell_h, cell_x:cell_x + cell_w]
|
||||||
width=cell_w, height=cell_h,
|
_upscaled = _ensure_minimum_crop_size(_crop_slice)
|
||||||
)
|
if _upscaled is not _crop_slice:
|
||||||
if engine_name in ("trocr-printed", "trocr-handwritten") and img_bgr is not None:
|
# Build a temporary full-size image with the upscaled crop
|
||||||
fallback_words = ocr_region_trocr(img_bgr, cell_region, handwritten=(engine_name == "trocr-handwritten"))
|
# placed at origin so ocr_region can crop it cleanly.
|
||||||
elif engine_name == "lighton" and img_bgr is not None:
|
_up_h, _up_w = _upscaled.shape[:2]
|
||||||
fallback_words = ocr_region_lighton(img_bgr, cell_region)
|
_tmp_region = PageRegion(
|
||||||
elif use_rapid and img_bgr is not None:
|
type=col.type, x=0, y=0, width=_up_w, height=_up_h,
|
||||||
fallback_words = ocr_region_rapid(img_bgr, cell_region)
|
)
|
||||||
|
_cell_psm = _select_psm_for_column(col.type, col.width, row.height)
|
||||||
|
cell_lang = lang_map.get(col.type, lang)
|
||||||
|
fallback_words = ocr_region(_upscaled, _tmp_region,
|
||||||
|
lang=cell_lang, psm=_cell_psm)
|
||||||
|
# Remap word positions back to original image coordinates
|
||||||
|
_sx = cell_w / max(_up_w, 1)
|
||||||
|
_sy = cell_h / max(_up_h, 1)
|
||||||
|
for _fw in (fallback_words or []):
|
||||||
|
_fw['left'] = int(_fw['left'] * _sx) + cell_x
|
||||||
|
_fw['top'] = int(_fw['top'] * _sy) + cell_y
|
||||||
|
_fw['width'] = int(_fw['width'] * _sx)
|
||||||
|
_fw['height'] = int(_fw['height'] * _sy)
|
||||||
|
else:
|
||||||
|
# No upscaling needed, use adaptive PSM
|
||||||
|
cell_region = PageRegion(
|
||||||
|
type=col.type, x=cell_x, y=cell_y,
|
||||||
|
width=cell_w, height=cell_h,
|
||||||
|
)
|
||||||
|
_cell_psm = _select_psm_for_column(col.type, col.width, row.height)
|
||||||
|
cell_lang = lang_map.get(col.type, lang)
|
||||||
|
fallback_words = ocr_region(ocr_img, cell_region,
|
||||||
|
lang=cell_lang, psm=_cell_psm)
|
||||||
else:
|
else:
|
||||||
cell_lang = lang_map.get(col.type, lang)
|
cell_region = PageRegion(
|
||||||
fallback_words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=6)
|
type=col.type,
|
||||||
|
x=cell_x, y=cell_y,
|
||||||
|
width=cell_w, height=cell_h,
|
||||||
|
)
|
||||||
|
if engine_name in ("trocr-printed", "trocr-handwritten") and img_bgr is not None:
|
||||||
|
fallback_words = ocr_region_trocr(img_bgr, cell_region, handwritten=(engine_name == "trocr-handwritten"))
|
||||||
|
elif engine_name == "lighton" and img_bgr is not None:
|
||||||
|
fallback_words = ocr_region_lighton(img_bgr, cell_region)
|
||||||
|
elif use_rapid and img_bgr is not None:
|
||||||
|
fallback_words = ocr_region_rapid(img_bgr, cell_region)
|
||||||
|
else:
|
||||||
|
_cell_psm = _select_psm_for_column(col.type, col.width, row.height)
|
||||||
|
cell_lang = lang_map.get(col.type, lang)
|
||||||
|
fallback_words = ocr_region(ocr_img, cell_region,
|
||||||
|
lang=cell_lang, psm=_cell_psm)
|
||||||
|
|
||||||
if fallback_words:
|
if fallback_words:
|
||||||
# Apply same confidence filter to fallback words
|
# Apply same confidence filter to fallback words
|
||||||
@@ -4297,8 +4554,12 @@ def _ocr_single_cell(
|
|||||||
|
|
||||||
# --- SECONDARY FALLBACK: PSM=7 (single line) for still-empty cells ---
|
# --- SECONDARY FALLBACK: PSM=7 (single line) for still-empty cells ---
|
||||||
if not text.strip() and _run_fallback and not use_rapid:
|
if not text.strip() and _run_fallback and not use_rapid:
|
||||||
|
_fb_region = PageRegion(
|
||||||
|
type=col.type, x=cell_x, y=cell_y,
|
||||||
|
width=cell_w, height=cell_h,
|
||||||
|
)
|
||||||
cell_lang = lang_map.get(col.type, lang)
|
cell_lang = lang_map.get(col.type, lang)
|
||||||
psm7_words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=7)
|
psm7_words = ocr_region(ocr_img, _fb_region, lang=cell_lang, psm=7)
|
||||||
if psm7_words:
|
if psm7_words:
|
||||||
psm7_words = [w for w in psm7_words if w.get('conf', 0) >= _MIN_WORD_CONF]
|
psm7_words = [w for w in psm7_words if w.get('conf', 0) >= _MIN_WORD_CONF]
|
||||||
if psm7_words:
|
if psm7_words:
|
||||||
@@ -4310,6 +4571,38 @@ def _ocr_single_cell(
|
|||||||
)
|
)
|
||||||
used_engine = 'cell_ocr_psm7'
|
used_engine = 'cell_ocr_psm7'
|
||||||
|
|
||||||
|
# --- TERTIARY FALLBACK: Row-strip re-OCR for narrow columns ---
|
||||||
|
# If a narrow cell is still empty, OCR the entire row strip with
|
||||||
|
# RapidOCR (which handles small text better) and assign words by
|
||||||
|
# X-position overlap with this column.
|
||||||
|
if not text.strip() and is_narrow and img_bgr is not None:
|
||||||
|
row_region = PageRegion(
|
||||||
|
type='_row_strip', x=0, y=row.y,
|
||||||
|
width=img_w, height=row.height,
|
||||||
|
)
|
||||||
|
strip_words = ocr_region_rapid(img_bgr, row_region)
|
||||||
|
if strip_words:
|
||||||
|
# Filter to words overlapping this column's X-range
|
||||||
|
col_left = col.x
|
||||||
|
col_right = col.x + col.width
|
||||||
|
col_words = []
|
||||||
|
for sw in strip_words:
|
||||||
|
sw_left = sw.get('left', 0)
|
||||||
|
sw_right = sw_left + sw.get('width', 0)
|
||||||
|
overlap = max(0, min(sw_right, col_right) - max(sw_left, col_left))
|
||||||
|
if overlap > sw.get('width', 1) * 0.3:
|
||||||
|
col_words.append(sw)
|
||||||
|
if col_words:
|
||||||
|
col_words = [w for w in col_words if w.get('conf', 0) >= _MIN_WORD_CONF]
|
||||||
|
if col_words:
|
||||||
|
rs_text = _words_to_reading_order_text(col_words, y_tolerance_px=row.height)
|
||||||
|
if rs_text.strip():
|
||||||
|
text = rs_text
|
||||||
|
avg_conf = round(
|
||||||
|
sum(w['conf'] for w in col_words) / len(col_words), 1
|
||||||
|
)
|
||||||
|
used_engine = 'row_strip_rapid'
|
||||||
|
|
||||||
# --- NOISE FILTER: clear cells that contain only OCR artifacts ---
|
# --- NOISE FILTER: clear cells that contain only OCR artifacts ---
|
||||||
if text.strip():
|
if text.strip():
|
||||||
text = _clean_cell_text(text)
|
text = _clean_cell_text(text)
|
||||||
|
|||||||
@@ -1742,6 +1742,151 @@ async def save_reconstruction(session_id: str, request: Request):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}/reconstruction/fabric-json")
|
||||||
|
async def get_fabric_json(session_id: str):
|
||||||
|
"""Return cell grid as Fabric.js-compatible JSON for the canvas editor."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
word_result = session.get("word_result")
|
||||||
|
if not word_result:
|
||||||
|
raise HTTPException(status_code=400, detail="No word result found")
|
||||||
|
|
||||||
|
cells = word_result.get("cells", [])
|
||||||
|
img_w = word_result.get("image_width", 800)
|
||||||
|
img_h = word_result.get("image_height", 600)
|
||||||
|
|
||||||
|
from services.layout_reconstruction_service import cells_to_fabric_json
|
||||||
|
fabric_json = cells_to_fabric_json(cells, img_w, img_h)
|
||||||
|
|
||||||
|
return fabric_json
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}/reconstruction/export/pdf")
|
||||||
|
async def export_reconstruction_pdf(session_id: str):
|
||||||
|
"""Export the reconstructed cell grid as a PDF table."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
word_result = session.get("word_result")
|
||||||
|
if not word_result:
|
||||||
|
raise HTTPException(status_code=400, detail="No word result found")
|
||||||
|
|
||||||
|
cells = word_result.get("cells", [])
|
||||||
|
columns_used = word_result.get("columns_used", [])
|
||||||
|
grid_shape = word_result.get("grid_shape", {})
|
||||||
|
n_rows = grid_shape.get("rows", 0)
|
||||||
|
n_cols = grid_shape.get("cols", 0)
|
||||||
|
|
||||||
|
# Build table data: rows × columns
|
||||||
|
table_data: list[list[str]] = []
|
||||||
|
header = [c.get("label", c.get("type", f"Col {i}")) for i, c in enumerate(columns_used)]
|
||||||
|
if not header:
|
||||||
|
header = [f"Col {i}" for i in range(n_cols)]
|
||||||
|
table_data.append(header)
|
||||||
|
|
||||||
|
for r in range(n_rows):
|
||||||
|
row_texts = []
|
||||||
|
for ci in range(n_cols):
|
||||||
|
cell_id = f"R{r:02d}_C{ci}"
|
||||||
|
cell = next((c for c in cells if c.get("cell_id") == cell_id), None)
|
||||||
|
row_texts.append(cell.get("text", "") if cell else "")
|
||||||
|
table_data.append(row_texts)
|
||||||
|
|
||||||
|
# Generate PDF with reportlab
|
||||||
|
try:
|
||||||
|
from reportlab.lib.pagesizes import A4
|
||||||
|
from reportlab.lib import colors
|
||||||
|
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
|
||||||
|
import io as _io
|
||||||
|
|
||||||
|
buf = _io.BytesIO()
|
||||||
|
doc = SimpleDocTemplate(buf, pagesize=A4)
|
||||||
|
if not table_data or not table_data[0]:
|
||||||
|
raise HTTPException(status_code=400, detail="No data to export")
|
||||||
|
|
||||||
|
t = Table(table_data)
|
||||||
|
t.setStyle(TableStyle([
|
||||||
|
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#0d9488')),
|
||||||
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
||||||
|
('FONTSIZE', (0, 0), (-1, -1), 9),
|
||||||
|
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
||||||
|
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
||||||
|
('WORDWRAP', (0, 0), (-1, -1), True),
|
||||||
|
]))
|
||||||
|
doc.build([t])
|
||||||
|
buf.seek(0)
|
||||||
|
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
return StreamingResponse(
|
||||||
|
buf,
|
||||||
|
media_type="application/pdf",
|
||||||
|
headers={"Content-Disposition": f'attachment; filename="reconstruction_{session_id}.pdf"'},
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
raise HTTPException(status_code=501, detail="reportlab not installed")
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}/reconstruction/export/docx")
|
||||||
|
async def export_reconstruction_docx(session_id: str):
|
||||||
|
"""Export the reconstructed cell grid as a DOCX table."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
word_result = session.get("word_result")
|
||||||
|
if not word_result:
|
||||||
|
raise HTTPException(status_code=400, detail="No word result found")
|
||||||
|
|
||||||
|
cells = word_result.get("cells", [])
|
||||||
|
columns_used = word_result.get("columns_used", [])
|
||||||
|
grid_shape = word_result.get("grid_shape", {})
|
||||||
|
n_rows = grid_shape.get("rows", 0)
|
||||||
|
n_cols = grid_shape.get("cols", 0)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from docx import Document
|
||||||
|
from docx.shared import Pt
|
||||||
|
import io as _io
|
||||||
|
|
||||||
|
doc = Document()
|
||||||
|
doc.add_heading(f'Rekonstruktion – Session {session_id[:8]}', level=1)
|
||||||
|
|
||||||
|
# Build header
|
||||||
|
header = [c.get("label", c.get("type", f"Col {i}")) for i, c in enumerate(columns_used)]
|
||||||
|
if not header:
|
||||||
|
header = [f"Col {i}" for i in range(n_cols)]
|
||||||
|
|
||||||
|
table = doc.add_table(rows=1 + n_rows, cols=max(n_cols, 1))
|
||||||
|
table.style = 'Table Grid'
|
||||||
|
|
||||||
|
# Header row
|
||||||
|
for ci, h in enumerate(header):
|
||||||
|
table.rows[0].cells[ci].text = h
|
||||||
|
|
||||||
|
# Data rows
|
||||||
|
for r in range(n_rows):
|
||||||
|
for ci in range(n_cols):
|
||||||
|
cell_id = f"R{r:02d}_C{ci}"
|
||||||
|
cell = next((c for c in cells if c.get("cell_id") == cell_id), None)
|
||||||
|
table.rows[r + 1].cells[ci].text = cell.get("text", "") if cell else ""
|
||||||
|
|
||||||
|
buf = _io.BytesIO()
|
||||||
|
doc.save(buf)
|
||||||
|
buf.seek(0)
|
||||||
|
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
return StreamingResponse(
|
||||||
|
buf,
|
||||||
|
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
headers={"Content-Disposition": f'attachment; filename="reconstruction_{session_id}.docx"'},
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
raise HTTPException(status_code=501, detail="python-docx not installed")
|
||||||
|
|
||||||
|
|
||||||
@router.post("/sessions/{session_id}/reprocess")
|
@router.post("/sessions/{session_id}/reprocess")
|
||||||
async def reprocess_session(session_id: str, request: Request):
|
async def reprocess_session(session_id: str, request: Request):
|
||||||
"""Re-run pipeline from a specific step, clearing downstream data.
|
"""Re-run pipeline from a specific step, clearing downstream data.
|
||||||
|
|||||||
@@ -45,6 +45,9 @@ asyncpg>=0.29.0
|
|||||||
# Email validation for Pydantic
|
# Email validation for Pydantic
|
||||||
email-validator>=2.0.0
|
email-validator>=2.0.0
|
||||||
|
|
||||||
|
# DOCX export for reconstruction editor (MIT license)
|
||||||
|
python-docx>=1.1.0
|
||||||
|
|
||||||
# Testing
|
# Testing
|
||||||
pytest>=8.0.0
|
pytest>=8.0.0
|
||||||
pytest-asyncio>=0.23.0
|
pytest-asyncio>=0.23.0
|
||||||
|
|||||||
@@ -350,6 +350,77 @@ def layout_to_fabric_json(layout_result: LayoutResult) -> str:
|
|||||||
return json.dumps(layout_result.fabric_json, ensure_ascii=False, indent=2)
|
return json.dumps(layout_result.fabric_json, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def cells_to_fabric_json(
|
||||||
|
cells: List[Dict[str, Any]],
|
||||||
|
image_width: int,
|
||||||
|
image_height: int,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Convert pipeline grid cells to Fabric.js-compatible JSON.
|
||||||
|
|
||||||
|
Each cell becomes a Textbox object positioned at its bbox_pct coordinates
|
||||||
|
(converted to pixels). Colour-coded by column type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cells: List of cell dicts from GridResult (with bbox_pct, col_type, text).
|
||||||
|
image_width: Source image width in pixels.
|
||||||
|
image_height: Source image height in pixels.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with Fabric.js canvas JSON (version + objects array).
|
||||||
|
"""
|
||||||
|
COL_TYPE_COLORS = {
|
||||||
|
'column_en': '#3b82f6',
|
||||||
|
'column_de': '#22c55e',
|
||||||
|
'column_example': '#f97316',
|
||||||
|
'column_text': '#a855f7',
|
||||||
|
'page_ref': '#06b6d4',
|
||||||
|
'column_marker': '#6b7280',
|
||||||
|
}
|
||||||
|
|
||||||
|
fabric_objects = []
|
||||||
|
for cell in cells:
|
||||||
|
bp = cell.get('bbox_pct', {})
|
||||||
|
x = bp.get('x', 0) / 100 * image_width
|
||||||
|
y = bp.get('y', 0) / 100 * image_height
|
||||||
|
w = bp.get('w', 10) / 100 * image_width
|
||||||
|
h = bp.get('h', 3) / 100 * image_height
|
||||||
|
col_type = cell.get('col_type', '')
|
||||||
|
color = COL_TYPE_COLORS.get(col_type, '#6b7280')
|
||||||
|
font_size = max(8, min(18, h * 0.55))
|
||||||
|
|
||||||
|
fabric_objects.append({
|
||||||
|
"type": "textbox",
|
||||||
|
"version": "6.0.0",
|
||||||
|
"originX": "left",
|
||||||
|
"originY": "top",
|
||||||
|
"left": round(x, 1),
|
||||||
|
"top": round(y, 1),
|
||||||
|
"width": max(round(w, 1), 30),
|
||||||
|
"height": round(h, 1),
|
||||||
|
"fill": "#000000",
|
||||||
|
"stroke": color,
|
||||||
|
"strokeWidth": 1,
|
||||||
|
"text": cell.get('text', ''),
|
||||||
|
"fontSize": round(font_size, 1),
|
||||||
|
"fontFamily": "monospace",
|
||||||
|
"editable": True,
|
||||||
|
"selectable": True,
|
||||||
|
"backgroundColor": color + "22",
|
||||||
|
"data": {
|
||||||
|
"cellId": cell.get('cell_id', ''),
|
||||||
|
"colType": col_type,
|
||||||
|
"rowIndex": cell.get('row_index', 0),
|
||||||
|
"colIndex": cell.get('col_index', 0),
|
||||||
|
"originalText": cell.get('text', ''),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"version": "6.0.0",
|
||||||
|
"objects": fabric_objects,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def reconstruct_and_clean(
|
def reconstruct_and_clean(
|
||||||
image_bytes: bytes,
|
image_bytes: bytes,
|
||||||
remove_handwriting: bool = True
|
remove_handwriting: bool = True
|
||||||
|
|||||||
Reference in New Issue
Block a user