feat: Overlay-Modus fuer ganzseitige Tabellenrekonstruktion mit Pixel-Positionierung
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 34s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m9s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 24s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 34s
CI / test-go-edu-search (push) Successful in 31s
CI / test-python-klausur (push) Failing after 2m9s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 24s
- usePixelWordPositions Hook extrahiert (shared zwischen StepLlmReview und StepReconstruction) - StepReconstruction: neuer Overlay-Modus mit 50/50 Layout (Original + Rekonstruktion) - Sub-Session-Zellen werden in Parent-Koordinaten konvertiert und zusammengefuehrt - Spalten-/Zeilenlinien und Box-Zone-Markierung aus column_result/row_result - Schriftgroesse-Slider und Bold-Toggle fuer Overlay - StepLlmReview: ~140 Zeilen Pixel-Analyse durch Hook ersetzt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||||
import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types'
|
import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
import { usePixelWordPositions } from './usePixelWordPositions'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
@@ -92,8 +93,11 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
const reconRef = useRef<HTMLDivElement>(null)
|
const reconRef = useRef<HTMLDivElement>(null)
|
||||||
const [reconWidth, setReconWidth] = useState(0)
|
const [reconWidth, setReconWidth] = useState(0)
|
||||||
|
|
||||||
// Pixel-analysed word positions: cell_id → [{xPct, wPct, text, fontRatio}]
|
// Pixel-analysed word positions via shared hook
|
||||||
const [cellWordPositions, setCellWordPositions] = useState<Map<string, { xPct: number; wPct: number; text: string; fontRatio: number }[]>>(new Map())
|
const overlayImageUrl = sessionId
|
||||||
|
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||||
|
: ''
|
||||||
|
const cellWordPositions = usePixelWordPositions(overlayImageUrl, cells, viewMode === 'overlay')
|
||||||
|
|
||||||
const tableRef = useRef<HTMLDivElement>(null)
|
const tableRef = useRef<HTMLDivElement>(null)
|
||||||
const activeRowRef = useRef<HTMLTableRowElement>(null)
|
const activeRowRef = useRef<HTMLTableRowElement>(null)
|
||||||
@@ -109,146 +113,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
return () => obs.disconnect()
|
return () => obs.disconnect()
|
||||||
}, [viewMode])
|
}, [viewMode])
|
||||||
|
|
||||||
// Pixel-based word positioning: analyse dark-pixel clusters on the image
|
|
||||||
useEffect(() => {
|
|
||||||
if (viewMode !== 'overlay' || cells.length === 0 || !sessionId) return
|
|
||||||
|
|
||||||
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
|
||||||
const img = new Image()
|
|
||||||
img.crossOrigin = 'anonymous'
|
|
||||||
img.onload = () => {
|
|
||||||
const canvas = document.createElement('canvas')
|
|
||||||
canvas.width = img.naturalWidth
|
|
||||||
canvas.height = img.naturalHeight
|
|
||||||
const ctx = canvas.getContext('2d')
|
|
||||||
if (!ctx) return
|
|
||||||
ctx.drawImage(img, 0, 0)
|
|
||||||
|
|
||||||
const refFontSize = 40
|
|
||||||
const fontFam = "'Liberation Sans', Arial, sans-serif"
|
|
||||||
ctx.font = `${refFontSize}px ${fontFam}`
|
|
||||||
|
|
||||||
const positions = new Map<string, { xPct: number; wPct: number; text: string; fontRatio: number }[]>()
|
|
||||||
|
|
||||||
for (const cell of cells) {
|
|
||||||
if (!cell.bbox_pct || !cell.text) continue
|
|
||||||
|
|
||||||
// Split by 3+ whitespace into word-groups
|
|
||||||
const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
|
|
||||||
|
|
||||||
// Pixel region for this cell
|
|
||||||
const imgW = img.naturalWidth
|
|
||||||
const imgH = img.naturalHeight
|
|
||||||
const cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
|
||||||
const cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
|
||||||
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
|
||||||
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
|
||||||
if (cw <= 0 || ch <= 0) continue
|
|
||||||
|
|
||||||
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
|
||||||
|
|
||||||
// Vertical projection: count dark pixels per column
|
|
||||||
const proj = new Float32Array(cw)
|
|
||||||
for (let y = 0; y < ch; y++) {
|
|
||||||
for (let x = 0; x < cw; x++) {
|
|
||||||
const idx = (y * cw + x) * 4
|
|
||||||
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
|
||||||
if (lum < 128) proj[x]++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find dark-pixel clusters (word groups on the image)
|
|
||||||
const threshold = Math.max(1, ch * 0.03)
|
|
||||||
const minGap = Math.max(5, Math.round(cw * 0.02))
|
|
||||||
const clusters: { start: number; end: number }[] = []
|
|
||||||
let inCluster = false
|
|
||||||
let clStart = 0
|
|
||||||
let gap = 0
|
|
||||||
|
|
||||||
for (let x = 0; x < cw; x++) {
|
|
||||||
if (proj[x] >= threshold) {
|
|
||||||
if (!inCluster) { clStart = x; inCluster = true }
|
|
||||||
gap = 0
|
|
||||||
} else if (inCluster) {
|
|
||||||
gap++
|
|
||||||
if (gap > minGap) {
|
|
||||||
clusters.push({ start: clStart, end: x - gap })
|
|
||||||
inCluster = false
|
|
||||||
gap = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
|
|
||||||
|
|
||||||
if (clusters.length === 0) continue
|
|
||||||
|
|
||||||
const wordPos: { xPct: number; wPct: number; text: string; fontRatio: number }[] = []
|
|
||||||
|
|
||||||
if (groups.length <= 1) {
|
|
||||||
// Single group: position at first cluster, merge all clusters for width
|
|
||||||
const firstCl = clusters[0]
|
|
||||||
const lastCl = clusters[clusters.length - 1]
|
|
||||||
const clusterW = lastCl.end - firstCl.start + 1
|
|
||||||
// Auto font-size: fit text width to cluster width
|
|
||||||
const measured = ctx.measureText(cell.text.trim())
|
|
||||||
const autoFontPx = refFontSize * (clusterW / measured.width)
|
|
||||||
const fontRatio = Math.min(autoFontPx / ch, 1.0) // ratio of cell height
|
|
||||||
wordPos.push({
|
|
||||||
xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w,
|
|
||||||
wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w,
|
|
||||||
text: cell.text.trim(),
|
|
||||||
fontRatio,
|
|
||||||
})
|
|
||||||
} else if (clusters.length >= groups.length) {
|
|
||||||
// Multiple groups: match to clusters left-to-right
|
|
||||||
for (let i = 0; i < groups.length; i++) {
|
|
||||||
const cl = clusters[i]
|
|
||||||
const clusterW = cl.end - cl.start + 1
|
|
||||||
const measured = ctx.measureText(groups[i])
|
|
||||||
const autoFontPx = refFontSize * (clusterW / measured.width)
|
|
||||||
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
|
||||||
wordPos.push({
|
|
||||||
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
|
|
||||||
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
|
|
||||||
text: groups[i],
|
|
||||||
fontRatio,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
continue // fewer clusters than groups — skip
|
|
||||||
}
|
|
||||||
|
|
||||||
positions.set(cell.cell_id, wordPos)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalise: find the most common fontRatio (mode) and apply it to all
|
|
||||||
const allRatios: number[] = []
|
|
||||||
for (const wps of positions.values()) {
|
|
||||||
for (const wp of wps) allRatios.push(wp.fontRatio)
|
|
||||||
}
|
|
||||||
if (allRatios.length > 0) {
|
|
||||||
// Bucket ratios to 2 decimal places, find mode
|
|
||||||
const buckets = new Map<number, number>()
|
|
||||||
for (const r of allRatios) {
|
|
||||||
const key = Math.round(r * 50) / 50 // round to nearest 0.02
|
|
||||||
buckets.set(key, (buckets.get(key) || 0) + 1)
|
|
||||||
}
|
|
||||||
let modeRatio = allRatios[0]
|
|
||||||
let modeCount = 0
|
|
||||||
for (const [ratio, count] of buckets) {
|
|
||||||
if (count > modeCount) { modeRatio = ratio; modeCount = count }
|
|
||||||
}
|
|
||||||
// Apply mode to all word positions
|
|
||||||
for (const wps of positions.values()) {
|
|
||||||
for (const wp of wps) wp.fontRatio = modeRatio
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
setCellWordPositions(positions)
|
|
||||||
}
|
|
||||||
img.src = imgUrl
|
|
||||||
}, [viewMode, cells, sessionId])
|
|
||||||
|
|
||||||
// Load session data on mount
|
// Load session data on mount
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!sessionId) return
|
if (!sessionId) return
|
||||||
|
|||||||
@@ -2,7 +2,8 @@
|
|||||||
|
|
||||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||||
import dynamic from 'next/dynamic'
|
import dynamic from 'next/dynamic'
|
||||||
import type { GridResult, GridCell, WordEntry } from '@/app/(admin)/ai/ocr-pipeline/types'
|
import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
import { usePixelWordPositions } from './usePixelWordPositions'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
@@ -12,7 +13,7 @@ const FabricReconstructionCanvas = dynamic(
|
|||||||
{ ssr: false, loading: () => <div className="py-8 text-center text-sm text-gray-400">Editor wird geladen...</div> }
|
{ ssr: false, loading: () => <div className="py-8 text-center text-sm text-gray-400">Editor wird geladen...</div> }
|
||||||
)
|
)
|
||||||
|
|
||||||
type EditorMode = 'simple' | 'editor'
|
type EditorMode = 'simple' | 'editor' | 'overlay'
|
||||||
|
|
||||||
interface StepReconstructionProps {
|
interface StepReconstructionProps {
|
||||||
sessionId: string | null
|
sessionId: string | null
|
||||||
@@ -46,11 +47,42 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
const [undoStack, setUndoStack] = useState<UndoAction[]>([])
|
const [undoStack, setUndoStack] = useState<UndoAction[]>([])
|
||||||
const [redoStack, setRedoStack] = useState<UndoAction[]>([])
|
const [redoStack, setRedoStack] = useState<UndoAction[]>([])
|
||||||
|
|
||||||
// (allCells removed — cells now contains all cells including empty ones)
|
|
||||||
|
|
||||||
const containerRef = useRef<HTMLDivElement>(null)
|
const containerRef = useRef<HTMLDivElement>(null)
|
||||||
const imageRef = useRef<HTMLImageElement>(null)
|
const imageRef = useRef<HTMLImageElement>(null)
|
||||||
|
|
||||||
|
// Overlay mode state
|
||||||
|
const [isParentWithBoxes, setIsParentWithBoxes] = useState(false)
|
||||||
|
const [mergedGridCells, setMergedGridCells] = useState<GridCell[]>([])
|
||||||
|
const [parentColumns, setParentColumns] = useState<PageRegion[]>([])
|
||||||
|
const [parentRows, setParentRows] = useState<RowItem[]>([])
|
||||||
|
const [parentZones, setParentZones] = useState<PageZone[]>([])
|
||||||
|
const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
|
||||||
|
const [fontScale, setFontScale] = useState(0.7)
|
||||||
|
const [globalBold, setGlobalBold] = useState(false)
|
||||||
|
const reconRef = useRef<HTMLDivElement>(null)
|
||||||
|
const [reconWidth, setReconWidth] = useState(0)
|
||||||
|
|
||||||
|
// Pixel-based word positions for overlay mode
|
||||||
|
const overlayImageUrl = sessionId
|
||||||
|
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||||
|
: ''
|
||||||
|
const cellWordPositions = usePixelWordPositions(
|
||||||
|
overlayImageUrl,
|
||||||
|
mergedGridCells,
|
||||||
|
editorMode === 'overlay',
|
||||||
|
)
|
||||||
|
|
||||||
|
// Track reconstruction container width for font size calculation
|
||||||
|
useEffect(() => {
|
||||||
|
const el = reconRef.current
|
||||||
|
if (!el) return
|
||||||
|
const obs = new ResizeObserver(entries => {
|
||||||
|
for (const entry of entries) setReconWidth(entry.contentRect.width)
|
||||||
|
})
|
||||||
|
obs.observe(el)
|
||||||
|
return () => obs.disconnect()
|
||||||
|
}, [editorMode])
|
||||||
|
|
||||||
// Load session data on mount
|
// Load session data on mount
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!sessionId) return
|
if (!sessionId) return
|
||||||
@@ -97,6 +129,100 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
setEditedTexts(new Map())
|
setEditedTexts(new Map())
|
||||||
setUndoStack([])
|
setUndoStack([])
|
||||||
setRedoStack([])
|
setRedoStack([])
|
||||||
|
|
||||||
|
// Check for parent with boxes (sub-sessions + zones)
|
||||||
|
const columnResult: ColumnResult | undefined = data.column_result
|
||||||
|
const rowResult: RowResult | undefined = data.row_result
|
||||||
|
const subSessions: { id: string; box_index: number }[] = data.sub_sessions || []
|
||||||
|
const zones: PageZone[] = columnResult?.zones || []
|
||||||
|
const hasBoxes = subSessions.length > 0 && zones.some(z => z.zone_type === 'box')
|
||||||
|
|
||||||
|
setIsParentWithBoxes(hasBoxes)
|
||||||
|
|
||||||
|
if (columnResult?.columns) setParentColumns(columnResult.columns)
|
||||||
|
if (rowResult?.rows) setParentRows(rowResult.rows)
|
||||||
|
if (zones.length > 0) setParentZones(zones)
|
||||||
|
|
||||||
|
// Store image dimensions
|
||||||
|
if (wordResult.image_width && wordResult.image_height) {
|
||||||
|
setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasBoxes) {
|
||||||
|
// Default to overlay mode for parent sessions with boxes
|
||||||
|
setEditorMode('overlay')
|
||||||
|
|
||||||
|
// Load sub-sessions and merge cells
|
||||||
|
const imgW = wordResult.image_width || 1
|
||||||
|
const imgH = wordResult.image_height || 1
|
||||||
|
const allMergedCells: GridCell[] = [...rawGridCells]
|
||||||
|
|
||||||
|
for (const sub of subSessions) {
|
||||||
|
try {
|
||||||
|
const subRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sub.id}`)
|
||||||
|
if (!subRes.ok) continue
|
||||||
|
const subData = await subRes.json()
|
||||||
|
const subWordResult: GridResult | undefined = subData.word_result
|
||||||
|
if (!subWordResult?.cells) continue
|
||||||
|
|
||||||
|
// Find the box zone for this sub-session
|
||||||
|
const boxZone = zones.find(z => z.zone_type === 'box')
|
||||||
|
if (!boxZone?.box) continue
|
||||||
|
|
||||||
|
const box = boxZone.box
|
||||||
|
// Box coordinates are in pixels, convert to pct
|
||||||
|
const boxXPct = (box.x / imgW) * 100
|
||||||
|
const boxYPct = (box.y / imgH) * 100
|
||||||
|
const boxWPct = (box.width / imgW) * 100
|
||||||
|
const boxHPct = (box.height / imgH) * 100
|
||||||
|
|
||||||
|
// Convert sub-session cell coordinates to parent coordinates
|
||||||
|
for (const subCell of subWordResult.cells) {
|
||||||
|
if (!subCell.bbox_pct) continue
|
||||||
|
const parentCellX = boxXPct + (subCell.bbox_pct.x / 100) * boxWPct
|
||||||
|
const parentCellY = boxYPct + (subCell.bbox_pct.y / 100) * boxHPct
|
||||||
|
const parentCellW = (subCell.bbox_pct.w / 100) * boxWPct
|
||||||
|
const parentCellH = (subCell.bbox_pct.h / 100) * boxHPct
|
||||||
|
|
||||||
|
allMergedCells.push({
|
||||||
|
...subCell,
|
||||||
|
cell_id: `sub_${sub.id}_${subCell.cell_id}`,
|
||||||
|
bbox_pct: {
|
||||||
|
x: parentCellX,
|
||||||
|
y: parentCellY,
|
||||||
|
w: parentCellW,
|
||||||
|
h: parentCellH,
|
||||||
|
},
|
||||||
|
bbox_px: {
|
||||||
|
x: Math.round(parentCellX / 100 * imgW),
|
||||||
|
y: Math.round(parentCellY / 100 * imgH),
|
||||||
|
w: Math.round(parentCellW / 100 * imgW),
|
||||||
|
h: Math.round(parentCellH / 100 * imgH),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Skip failing sub-sessions
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setMergedGridCells(allMergedCells)
|
||||||
|
|
||||||
|
// Also add merged cells as editable cells
|
||||||
|
const mergedEditableCells: EditableCell[] = allMergedCells.map(c => ({
|
||||||
|
cellId: c.cell_id,
|
||||||
|
text: c.text,
|
||||||
|
originalText: c.text,
|
||||||
|
bboxPct: c.bbox_pct,
|
||||||
|
colType: c.col_type,
|
||||||
|
rowIndex: c.row_index,
|
||||||
|
colIndex: c.col_index,
|
||||||
|
}))
|
||||||
|
setCells(mergedEditableCells)
|
||||||
|
} else {
|
||||||
|
setMergedGridCells(rawGridCells)
|
||||||
|
}
|
||||||
|
|
||||||
setStatus('ready')
|
setStatus('ready')
|
||||||
} catch (e: unknown) {
|
} catch (e: unknown) {
|
||||||
setError(e instanceof Error ? e.message : String(e))
|
setError(e instanceof Error ? e.message : String(e))
|
||||||
@@ -347,6 +473,215 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Overlay rendering helper
|
||||||
|
const renderOverlayMode = () => {
|
||||||
|
const imgW = imageNaturalSize?.w || 1
|
||||||
|
const imgH = imageNaturalSize?.h || 1
|
||||||
|
const aspect = imgH / imgW
|
||||||
|
const containerH = reconWidth * aspect
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="grid grid-cols-2 gap-4">
|
||||||
|
{/* Left: Original image */}
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Originalbild
|
||||||
|
</div>
|
||||||
|
<div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 sticky top-4">
|
||||||
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
|
<img
|
||||||
|
src={dewarpedUrl}
|
||||||
|
alt="Original"
|
||||||
|
className="w-full h-auto"
|
||||||
|
onLoad={(e) => {
|
||||||
|
const img = e.target as HTMLImageElement
|
||||||
|
setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight })
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right: Reconstructed table overlay */}
|
||||||
|
<div>
|
||||||
|
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
|
||||||
|
Rekonstruktion ({cells.length} Zellen)
|
||||||
|
</div>
|
||||||
|
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-white">
|
||||||
|
<div
|
||||||
|
ref={reconRef}
|
||||||
|
className="relative"
|
||||||
|
style={{
|
||||||
|
aspectRatio: `${imgW} / ${imgH}`,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{/* Column lines */}
|
||||||
|
{parentColumns
|
||||||
|
.filter(c => !['header', 'footer'].includes(c.type))
|
||||||
|
.map((col, i) => (
|
||||||
|
<div
|
||||||
|
key={`col-${i}`}
|
||||||
|
className="absolute top-0 bottom-0 border-l border-gray-300/50"
|
||||||
|
style={{ left: `${(col.x / imgW) * 100}%` }}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{/* Row lines */}
|
||||||
|
{parentRows.map((row, i) => (
|
||||||
|
<div
|
||||||
|
key={`row-${i}`}
|
||||||
|
className="absolute left-0 right-0 border-t border-gray-300/50"
|
||||||
|
style={{ top: `${(row.y / imgH) * 100}%` }}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{/* Box zone highlight */}
|
||||||
|
{parentZones
|
||||||
|
.filter(z => z.zone_type === 'box' && z.box)
|
||||||
|
.map((z, i) => {
|
||||||
|
const box = z.box!
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={`box-${i}`}
|
||||||
|
className="absolute border-2 border-blue-400/30 bg-blue-50/10 pointer-events-none"
|
||||||
|
style={{
|
||||||
|
left: `${(box.x / imgW) * 100}%`,
|
||||||
|
top: `${(box.y / imgH) * 100}%`,
|
||||||
|
width: `${(box.width / imgW) * 100}%`,
|
||||||
|
height: `${(box.height / imgH) * 100}%`,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
|
||||||
|
{/* Pixel-positioned words / editable inputs */}
|
||||||
|
{cells.map((cell) => {
|
||||||
|
const displayText = getDisplayText(cell)
|
||||||
|
const edited = isEdited(cell)
|
||||||
|
const wordPos = cellWordPositions.get(cell.cellId)
|
||||||
|
const cellHeightPx = containerH * (cell.bboxPct.h / 100)
|
||||||
|
|
||||||
|
// Pixel-analysed: render word-groups at detected positions as inputs
|
||||||
|
if (wordPos && wordPos.length > 0) {
|
||||||
|
return wordPos.map((wp, i) => {
|
||||||
|
const autoFontPx = cellHeightPx * wp.fontRatio * fontScale
|
||||||
|
const fs = Math.max(6, autoFontPx)
|
||||||
|
|
||||||
|
// For multi-group cells, only the first group is the primary input
|
||||||
|
// Show as span (read-only positioned) — editing happens at cell level
|
||||||
|
if (wordPos.length > 1) {
|
||||||
|
return (
|
||||||
|
<span
|
||||||
|
key={`${cell.cellId}_wp_${i}`}
|
||||||
|
className="absolute leading-none pointer-events-none select-none"
|
||||||
|
style={{
|
||||||
|
left: `${wp.xPct}%`,
|
||||||
|
top: `${cell.bboxPct.y}%`,
|
||||||
|
width: `${wp.wPct}%`,
|
||||||
|
height: `${cell.bboxPct.h}%`,
|
||||||
|
fontSize: `${fs}px`,
|
||||||
|
fontWeight: globalBold ? 'bold' : (cell.colType === 'column_en' ? 'bold' : 'normal'),
|
||||||
|
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||||
|
display: 'flex',
|
||||||
|
alignItems: 'center',
|
||||||
|
whiteSpace: 'nowrap',
|
||||||
|
overflow: 'visible',
|
||||||
|
color: '#1a1a1a',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{wp.text}
|
||||||
|
</span>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Single group: render as editable input at pixel position
|
||||||
|
return (
|
||||||
|
<div key={`${cell.cellId}_wp_${i}`} className="absolute group" style={{
|
||||||
|
left: `${wp.xPct}%`,
|
||||||
|
top: `${cell.bboxPct.y}%`,
|
||||||
|
width: `${wp.wPct}%`,
|
||||||
|
height: `${cell.bboxPct.h}%`,
|
||||||
|
}}>
|
||||||
|
<input
|
||||||
|
id={`cell-${cell.cellId}`}
|
||||||
|
type="text"
|
||||||
|
value={displayText}
|
||||||
|
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
|
||||||
|
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
|
||||||
|
className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
|
||||||
|
edited ? 'bg-green-50/30' : ''
|
||||||
|
}`}
|
||||||
|
style={{
|
||||||
|
fontSize: `${fs}px`,
|
||||||
|
fontWeight: globalBold ? 'bold' : (cell.colType === 'column_en' ? 'bold' : 'normal'),
|
||||||
|
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||||
|
lineHeight: '1',
|
||||||
|
color: '#1a1a1a',
|
||||||
|
}}
|
||||||
|
title={`${cell.cellId} (${cell.colType})`}
|
||||||
|
/>
|
||||||
|
{edited && (
|
||||||
|
<button
|
||||||
|
onClick={() => resetCell(cell.cellId)}
|
||||||
|
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||||
|
title="Zuruecksetzen"
|
||||||
|
>
|
||||||
|
×
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multi-group cell with pixel positions: already handled above
|
||||||
|
// Fallback: no pixel data — single input at cell bbox
|
||||||
|
if (!cell.text) return null
|
||||||
|
|
||||||
|
const fontSize = Math.max(6, cellHeightPx * fontScale)
|
||||||
|
return (
|
||||||
|
<div key={cell.cellId} className="absolute group" style={{
|
||||||
|
left: `${cell.bboxPct.x}%`,
|
||||||
|
top: `${cell.bboxPct.y}%`,
|
||||||
|
width: `${cell.bboxPct.w}%`,
|
||||||
|
height: `${cell.bboxPct.h}%`,
|
||||||
|
}}>
|
||||||
|
<input
|
||||||
|
id={`cell-${cell.cellId}`}
|
||||||
|
type="text"
|
||||||
|
value={displayText}
|
||||||
|
onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
|
||||||
|
onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
|
||||||
|
className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
|
||||||
|
edited ? 'bg-green-50/30' : ''
|
||||||
|
}`}
|
||||||
|
style={{
|
||||||
|
fontSize: `${fontSize}px`,
|
||||||
|
fontWeight: globalBold ? 'bold' : 'normal',
|
||||||
|
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||||
|
lineHeight: '1',
|
||||||
|
color: '#1a1a1a',
|
||||||
|
}}
|
||||||
|
title={`${cell.cellId} (${cell.colType})`}
|
||||||
|
/>
|
||||||
|
{edited && (
|
||||||
|
<button
|
||||||
|
onClick={() => resetCell(cell.cellId)}
|
||||||
|
className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
|
||||||
|
title="Zuruecksetzen"
|
||||||
|
>
|
||||||
|
×
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
{/* Toolbar */}
|
{/* Toolbar */}
|
||||||
@@ -367,6 +702,18 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
>
|
>
|
||||||
Einfach
|
Einfach
|
||||||
</button>
|
</button>
|
||||||
|
{isParentWithBoxes && (
|
||||||
|
<button
|
||||||
|
onClick={() => setEditorMode('overlay')}
|
||||||
|
className={`px-2 py-0.5 transition-colors ${
|
||||||
|
editorMode === 'overlay'
|
||||||
|
? 'bg-teal-600 text-white'
|
||||||
|
: 'hover:bg-gray-50 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Overlay
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
<button
|
<button
|
||||||
onClick={() => setEditorMode('editor')}
|
onClick={() => setEditorMode('editor')}
|
||||||
className={`px-2 py-0.5 transition-colors ${
|
className={`px-2 py-0.5 transition-colors ${
|
||||||
@@ -406,6 +753,35 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
|
|
||||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||||
|
|
||||||
|
{/* Overlay-specific toolbar */}
|
||||||
|
{editorMode === 'overlay' && (
|
||||||
|
<>
|
||||||
|
<label className="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
|
||||||
|
Schrift
|
||||||
|
<input
|
||||||
|
type="range" min={30} max={120} value={Math.round(fontScale * 100)}
|
||||||
|
onChange={e => setFontScale(Number(e.target.value) / 100)}
|
||||||
|
className="w-20 h-1 accent-teal-600"
|
||||||
|
/>
|
||||||
|
<span className="w-8 text-right font-mono">{Math.round(fontScale * 100)}%</span>
|
||||||
|
</label>
|
||||||
|
<button
|
||||||
|
onClick={() => setGlobalBold(b => !b)}
|
||||||
|
className={`px-2 py-1 text-xs rounded border transition-colors font-bold ${
|
||||||
|
globalBold
|
||||||
|
? 'bg-teal-600 text-white border-teal-600'
|
||||||
|
: 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
B
|
||||||
|
</button>
|
||||||
|
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Non-overlay controls */}
|
||||||
|
{editorMode !== 'overlay' && (
|
||||||
|
<>
|
||||||
{/* Empty field toggle */}
|
{/* Empty field toggle */}
|
||||||
<button
|
<button
|
||||||
onClick={() => setShowEmptyHighlight(v => !v)}
|
onClick={() => setShowEmptyHighlight(v => !v)}
|
||||||
@@ -443,6 +819,8 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
</button>
|
</button>
|
||||||
|
|
||||||
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
<div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
<button
|
<button
|
||||||
onClick={saveReconstruction}
|
onClick={saveReconstruction}
|
||||||
@@ -454,8 +832,10 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Reconstruction canvas — Simple or Editor mode */}
|
{/* Reconstruction canvas */}
|
||||||
{editorMode === 'editor' && sessionId ? (
|
{editorMode === 'overlay' ? (
|
||||||
|
renderOverlayMode()
|
||||||
|
) : editorMode === 'editor' && sessionId ? (
|
||||||
<FabricReconstructionCanvas
|
<FabricReconstructionCanvas
|
||||||
sessionId={sessionId}
|
sessionId={sessionId}
|
||||||
cells={gridCells}
|
cells={gridCells}
|
||||||
|
|||||||
162
admin-lehrer/components/ocr-pipeline/usePixelWordPositions.ts
Normal file
162
admin-lehrer/components/ocr-pipeline/usePixelWordPositions.ts
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
import { useEffect, useState } from 'react'
|
||||||
|
import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||||
|
|
||||||
|
export interface WordPosition {
|
||||||
|
xPct: number
|
||||||
|
wPct: number
|
||||||
|
text: string
|
||||||
|
fontRatio: number
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shared hook: analyse dark-pixel clusters on an image to determine
|
||||||
|
* the exact horizontal position & auto-font-size of word groups in each cell.
|
||||||
|
*
|
||||||
|
* Returns a Map<cell_id, WordPosition[]>.
|
||||||
|
*/
|
||||||
|
export function usePixelWordPositions(
|
||||||
|
imageUrl: string,
|
||||||
|
cells: GridCell[],
|
||||||
|
active: boolean,
|
||||||
|
): Map<string, WordPosition[]> {
|
||||||
|
const [cellWordPositions, setCellWordPositions] = useState<Map<string, WordPosition[]>>(new Map())
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!active || cells.length === 0 || !imageUrl) return
|
||||||
|
|
||||||
|
const img = new Image()
|
||||||
|
img.crossOrigin = 'anonymous'
|
||||||
|
img.onload = () => {
|
||||||
|
const canvas = document.createElement('canvas')
|
||||||
|
canvas.width = img.naturalWidth
|
||||||
|
canvas.height = img.naturalHeight
|
||||||
|
const ctx = canvas.getContext('2d')
|
||||||
|
if (!ctx) return
|
||||||
|
ctx.drawImage(img, 0, 0)
|
||||||
|
|
||||||
|
const refFontSize = 40
|
||||||
|
const fontFam = "'Liberation Sans', Arial, sans-serif"
|
||||||
|
ctx.font = `${refFontSize}px ${fontFam}`
|
||||||
|
|
||||||
|
const positions = new Map<string, WordPosition[]>()
|
||||||
|
|
||||||
|
for (const cell of cells) {
|
||||||
|
if (!cell.bbox_pct || !cell.text) continue
|
||||||
|
|
||||||
|
// Split by 3+ whitespace into word-groups
|
||||||
|
const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
|
||||||
|
|
||||||
|
// Pixel region for this cell
|
||||||
|
const imgW = img.naturalWidth
|
||||||
|
const imgH = img.naturalHeight
|
||||||
|
const cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
||||||
|
const cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
||||||
|
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
||||||
|
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
||||||
|
if (cw <= 0 || ch <= 0) continue
|
||||||
|
|
||||||
|
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
||||||
|
|
||||||
|
// Vertical projection: count dark pixels per column
|
||||||
|
const proj = new Float32Array(cw)
|
||||||
|
for (let y = 0; y < ch; y++) {
|
||||||
|
for (let x = 0; x < cw; x++) {
|
||||||
|
const idx = (y * cw + x) * 4
|
||||||
|
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
||||||
|
if (lum < 128) proj[x]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find dark-pixel clusters (word groups on the image)
|
||||||
|
const threshold = Math.max(1, ch * 0.03)
|
||||||
|
const minGap = Math.max(5, Math.round(cw * 0.02))
|
||||||
|
const clusters: { start: number; end: number }[] = []
|
||||||
|
let inCluster = false
|
||||||
|
let clStart = 0
|
||||||
|
let gap = 0
|
||||||
|
|
||||||
|
for (let x = 0; x < cw; x++) {
|
||||||
|
if (proj[x] >= threshold) {
|
||||||
|
if (!inCluster) { clStart = x; inCluster = true }
|
||||||
|
gap = 0
|
||||||
|
} else if (inCluster) {
|
||||||
|
gap++
|
||||||
|
if (gap > minGap) {
|
||||||
|
clusters.push({ start: clStart, end: x - gap })
|
||||||
|
inCluster = false
|
||||||
|
gap = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
|
||||||
|
|
||||||
|
if (clusters.length === 0) continue
|
||||||
|
|
||||||
|
const wordPos: WordPosition[] = []
|
||||||
|
|
||||||
|
if (groups.length <= 1) {
|
||||||
|
// Single group: position at first cluster, merge all clusters for width
|
||||||
|
const firstCl = clusters[0]
|
||||||
|
const lastCl = clusters[clusters.length - 1]
|
||||||
|
const clusterW = lastCl.end - firstCl.start + 1
|
||||||
|
const measured = ctx.measureText(cell.text.trim())
|
||||||
|
const autoFontPx = refFontSize * (clusterW / measured.width)
|
||||||
|
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||||
|
wordPos.push({
|
||||||
|
xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w,
|
||||||
|
wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w,
|
||||||
|
text: cell.text.trim(),
|
||||||
|
fontRatio,
|
||||||
|
})
|
||||||
|
} else if (clusters.length >= groups.length) {
|
||||||
|
// Multiple groups: match to clusters left-to-right
|
||||||
|
for (let i = 0; i < groups.length; i++) {
|
||||||
|
const cl = clusters[i]
|
||||||
|
const clusterW = cl.end - cl.start + 1
|
||||||
|
const measured = ctx.measureText(groups[i])
|
||||||
|
const autoFontPx = refFontSize * (clusterW / measured.width)
|
||||||
|
const fontRatio = Math.min(autoFontPx / ch, 1.0)
|
||||||
|
wordPos.push({
|
||||||
|
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
|
||||||
|
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
|
||||||
|
text: groups[i],
|
||||||
|
fontRatio,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
continue // fewer clusters than groups — skip
|
||||||
|
}
|
||||||
|
|
||||||
|
positions.set(cell.cell_id, wordPos)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalise: find the most common fontRatio (mode) and apply it to all
|
||||||
|
const allRatios: number[] = []
|
||||||
|
for (const wps of positions.values()) {
|
||||||
|
for (const wp of wps) allRatios.push(wp.fontRatio)
|
||||||
|
}
|
||||||
|
if (allRatios.length > 0) {
|
||||||
|
// Bucket ratios to 2 decimal places, find mode
|
||||||
|
const buckets = new Map<number, number>()
|
||||||
|
for (const r of allRatios) {
|
||||||
|
const key = Math.round(r * 50) / 50 // round to nearest 0.02
|
||||||
|
buckets.set(key, (buckets.get(key) || 0) + 1)
|
||||||
|
}
|
||||||
|
let modeRatio = allRatios[0]
|
||||||
|
let modeCount = 0
|
||||||
|
for (const [ratio, count] of buckets) {
|
||||||
|
if (count > modeCount) { modeRatio = ratio; modeCount = count }
|
||||||
|
}
|
||||||
|
// Apply mode to all word positions
|
||||||
|
for (const wps of positions.values()) {
|
||||||
|
for (const wp of wps) wp.fontRatio = modeRatio
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setCellWordPositions(positions)
|
||||||
|
}
|
||||||
|
img.src = imageUrl
|
||||||
|
}, [active, cells, imageUrl])
|
||||||
|
|
||||||
|
return cellWordPositions
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user