feat: Pixel-basierte Wortpositionierung im Overlay
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m6s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 20s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m6s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 20s
Analysiert Schwarzpixel-Verteilung auf dem Originalbild per Canvas. Findet Wort-Cluster pro Zeile und positioniert erkannte Textgruppen an den exakten Pixel-Positionen. Monospace-Font zurueck auf Sans-Serif. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -92,6 +92,9 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
const reconRef = useRef<HTMLDivElement>(null)
|
const reconRef = useRef<HTMLDivElement>(null)
|
||||||
const [reconWidth, setReconWidth] = useState(0)
|
const [reconWidth, setReconWidth] = useState(0)
|
||||||
|
|
||||||
|
// Pixel-analysed word positions: cell_id → [{xPct, wPct, text}]
|
||||||
|
const [cellWordPositions, setCellWordPositions] = useState<Map<string, { xPct: number; wPct: number; text: string }[]>>(new Map())
|
||||||
|
|
||||||
const tableRef = useRef<HTMLDivElement>(null)
|
const tableRef = useRef<HTMLDivElement>(null)
|
||||||
const activeRowRef = useRef<HTMLTableRowElement>(null)
|
const activeRowRef = useRef<HTMLTableRowElement>(null)
|
||||||
|
|
||||||
@@ -106,6 +109,95 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
return () => obs.disconnect()
|
return () => obs.disconnect()
|
||||||
}, [viewMode])
|
}, [viewMode])
|
||||||
|
|
||||||
|
// Pixel-based word positioning: analyse dark-pixel clusters on the image
|
||||||
|
useEffect(() => {
|
||||||
|
if (viewMode !== 'overlay' || cells.length === 0 || !sessionId) return
|
||||||
|
|
||||||
|
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||||
|
const img = new Image()
|
||||||
|
img.crossOrigin = 'anonymous'
|
||||||
|
img.onload = () => {
|
||||||
|
const canvas = document.createElement('canvas')
|
||||||
|
canvas.width = img.naturalWidth
|
||||||
|
canvas.height = img.naturalHeight
|
||||||
|
const ctx = canvas.getContext('2d')
|
||||||
|
if (!ctx) return
|
||||||
|
ctx.drawImage(img, 0, 0)
|
||||||
|
|
||||||
|
const positions = new Map<string, { xPct: number; wPct: number; text: string }[]>()
|
||||||
|
|
||||||
|
for (const cell of cells) {
|
||||||
|
if (!cell.bbox_pct || !cell.text) continue
|
||||||
|
|
||||||
|
// Split by 3+ whitespace — only analyse cells with multiple word-groups
|
||||||
|
const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
|
||||||
|
if (groups.length <= 1) continue
|
||||||
|
|
||||||
|
// Pixel region for this cell
|
||||||
|
const imgW = img.naturalWidth
|
||||||
|
const imgH = img.naturalHeight
|
||||||
|
const cx = Math.round(cell.bbox_pct.x / 100 * imgW)
|
||||||
|
const cy = Math.round(cell.bbox_pct.y / 100 * imgH)
|
||||||
|
const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
|
||||||
|
const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
|
||||||
|
if (cw <= 0 || ch <= 0) continue
|
||||||
|
|
||||||
|
const imageData = ctx.getImageData(cx, cy, cw, ch)
|
||||||
|
|
||||||
|
// Vertical projection: count dark pixels per column
|
||||||
|
const proj = new Float32Array(cw)
|
||||||
|
for (let y = 0; y < ch; y++) {
|
||||||
|
for (let x = 0; x < cw; x++) {
|
||||||
|
const idx = (y * cw + x) * 4
|
||||||
|
const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
|
||||||
|
if (lum < 128) proj[x]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find dark-pixel clusters (word groups on the image)
|
||||||
|
const threshold = Math.max(1, ch * 0.03)
|
||||||
|
const minGap = Math.max(5, Math.round(cw * 0.02))
|
||||||
|
const clusters: { start: number; end: number }[] = []
|
||||||
|
let inCluster = false
|
||||||
|
let clStart = 0
|
||||||
|
let gap = 0
|
||||||
|
|
||||||
|
for (let x = 0; x < cw; x++) {
|
||||||
|
if (proj[x] >= threshold) {
|
||||||
|
if (!inCluster) { clStart = x; inCluster = true }
|
||||||
|
gap = 0
|
||||||
|
} else if (inCluster) {
|
||||||
|
gap++
|
||||||
|
if (gap > minGap) {
|
||||||
|
clusters.push({ start: clStart, end: x - gap })
|
||||||
|
inCluster = false
|
||||||
|
gap = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
|
||||||
|
|
||||||
|
// Need enough clusters for all word groups
|
||||||
|
if (clusters.length < groups.length) continue
|
||||||
|
|
||||||
|
// Match word-groups to clusters left-to-right
|
||||||
|
const wordPos: { xPct: number; wPct: number; text: string }[] = []
|
||||||
|
for (let i = 0; i < groups.length; i++) {
|
||||||
|
const cl = clusters[i]
|
||||||
|
wordPos.push({
|
||||||
|
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
|
||||||
|
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
|
||||||
|
text: groups[i],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
positions.set(cell.cell_id, wordPos)
|
||||||
|
}
|
||||||
|
|
||||||
|
setCellWordPositions(positions)
|
||||||
|
}
|
||||||
|
img.src = imgUrl
|
||||||
|
}, [viewMode, cells, sessionId])
|
||||||
|
|
||||||
// Load session data on mount
|
// Load session data on mount
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!sessionId) return
|
if (!sessionId) return
|
||||||
@@ -701,6 +793,38 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
const containerH = reconWidth * aspect
|
const containerH = reconWidth * aspect
|
||||||
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
|
const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
|
||||||
const fontSize = Math.max(6, cellHeightPx * fontScale)
|
const fontSize = Math.max(6, cellHeightPx * fontScale)
|
||||||
|
|
||||||
|
const wordPos = cellWordPositions.get(cell.cell_id)
|
||||||
|
|
||||||
|
// Pixel-analysed: render each word-group at its detected position
|
||||||
|
if (wordPos && wordPos.length > 1) {
|
||||||
|
return wordPos.map((wp, i) => (
|
||||||
|
<span
|
||||||
|
key={`${cell.cell_id}_${i}`}
|
||||||
|
className="absolute leading-none overflow-hidden"
|
||||||
|
contentEditable
|
||||||
|
suppressContentEditableWarning
|
||||||
|
style={{
|
||||||
|
left: `${wp.xPct}%`,
|
||||||
|
top: `${cell.bbox_pct.y}%`,
|
||||||
|
width: `${wp.wPct}%`,
|
||||||
|
height: `${cell.bbox_pct.h}%`,
|
||||||
|
fontSize: `${fontSize}px`,
|
||||||
|
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
|
||||||
|
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||||
|
display: 'flex',
|
||||||
|
alignItems: 'center',
|
||||||
|
whiteSpace: 'nowrap',
|
||||||
|
color: '#1a1a1a',
|
||||||
|
}}
|
||||||
|
onBlur={(e) => handleCellEdit(cell.cell_id, cell.row_index, e.currentTarget.textContent)}
|
||||||
|
>
|
||||||
|
{wp.text}
|
||||||
|
</span>
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: single span for entire cell
|
||||||
return (
|
return (
|
||||||
<span
|
<span
|
||||||
key={cell.cell_id}
|
key={cell.cell_id}
|
||||||
@@ -715,7 +839,7 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
fontSize: `${fontSize}px`,
|
fontSize: `${fontSize}px`,
|
||||||
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
|
fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
|
||||||
paddingLeft: `${leftPaddingPct}%`,
|
paddingLeft: `${leftPaddingPct}%`,
|
||||||
fontFamily: "'Courier New', 'Liberation Mono', monospace",
|
fontFamily: "'Liberation Sans', Arial, sans-serif",
|
||||||
display: 'flex',
|
display: 'flex',
|
||||||
alignItems: 'center',
|
alignItems: 'center',
|
||||||
whiteSpace: 'pre',
|
whiteSpace: 'pre',
|
||||||
|
|||||||
Reference in New Issue
Block a user