diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx new file mode 100644 index 0000000..b9dd5b0 --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx @@ -0,0 +1,423 @@ +'use client' + +import { useCallback, useEffect, useState } from 'react' +import { PagePurpose } from '@/components/common/PagePurpose' +import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper' +import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation' +import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew' +import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp' +import { StepCrop } from '@/components/ocr-pipeline/StepCrop' +import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection' +import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition' +import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction' +import { OVERLAY_PIPELINE_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types' + +const KLAUSUR_API = '/klausur-api' + +export default function OcrOverlayPage() { + const [currentStep, setCurrentStep] = useState(0) + const [sessionId, setSessionId] = useState(null) + const [sessionName, setSessionName] = useState('') + const [sessions, setSessions] = useState([]) + const [loadingSessions, setLoadingSessions] = useState(true) + const [editingName, setEditingName] = useState(null) + const [editNameValue, setEditNameValue] = useState('') + const [editingCategory, setEditingCategory] = useState(null) + const [activeCategory, setActiveCategory] = useState(undefined) + const [steps, setSteps] = useState( + OVERLAY_PIPELINE_STEPS.map((s, i) => ({ + ...s, + status: i === 0 ? 'active' : 'pending', + })), + ) + + useEffect(() => { + loadSessions() + }, []) + + const loadSessions = async () => { + setLoadingSessions(true) + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`) + if (res.ok) { + const data = await res.json() + // Filter to only show top-level sessions (no sub-sessions) + setSessions((data.sessions || []).filter((s: SessionListItem) => !s.parent_session_id)) + } + } catch (e) { + console.error('Failed to load sessions:', e) + } finally { + setLoadingSessions(false) + } + } + + const openSession = useCallback(async (sid: string) => { + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`) + if (!res.ok) return + const data = await res.json() + + setSessionId(sid) + setSessionName(data.name || data.filename || '') + setActiveCategory(data.document_category || undefined) + + // Map DB step to overlay UI step + const dbStep = data.current_step || 1 + const uiStep = dbStepToOverlayUi(dbStep) + + setSteps( + OVERLAY_PIPELINE_STEPS.map((s, i) => ({ + ...s, + status: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending', + })), + ) + setCurrentStep(uiStep) + } catch (e) { + console.error('Failed to open session:', e) + } + }, []) + + const deleteSession = useCallback(async (sid: string) => { + try { + await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' }) + setSessions((prev) => prev.filter((s) => s.id !== sid)) + if (sessionId === sid) { + setSessionId(null) + setCurrentStep(0) + setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) + } + } catch (e) { + console.error('Failed to delete session:', e) + } + }, [sessionId]) + + const renameSession = useCallback(async (sid: string, newName: string) => { + try { + await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: newName }), + }) + setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s))) + if (sessionId === sid) setSessionName(newName) + } catch (e) { + console.error('Failed to rename session:', e) + } + setEditingName(null) + }, [sessionId]) + + const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => { + try { + await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ document_category: category }), + }) + setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s))) + if (sessionId === sid) setActiveCategory(category) + } catch (e) { + console.error('Failed to update category:', e) + } + setEditingCategory(null) + }, [sessionId]) + + const handleStepClick = (index: number) => { + if (index <= currentStep || steps[index].status === 'completed') { + setCurrentStep(index) + } + } + + const goToStep = (step: number) => { + setCurrentStep(step) + setSteps((prev) => + prev.map((s, i) => ({ + ...s, + status: i < step ? 'completed' : i === step ? 'active' : 'pending', + })), + ) + } + + const handleNext = () => { + if (currentStep >= steps.length - 1) { + // Last step completed — return to session list + setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) + setCurrentStep(0) + setSessionId(null) + loadSessions() + return + } + + const nextStep = currentStep + 1 + setSteps((prev) => + prev.map((s, i) => { + if (i === currentStep) return { ...s, status: 'completed' } + if (i === nextStep) return { ...s, status: 'active' } + return s + }), + ) + setCurrentStep(nextStep) + } + + const handleOrientationComplete = (sid: string) => { + setSessionId(sid) + loadSessions() + handleNext() + } + + const handleNewSession = () => { + setSessionId(null) + setSessionName('') + setCurrentStep(0) + setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) + } + + const stepNames: Record = { + 1: 'Orientierung', + 2: 'Begradigung', + 3: 'Entzerrung', + 4: 'Zuschneiden', + 5: 'Zeilen', + 6: 'Woerter', + 7: 'Overlay', + } + + const reprocessFromStep = useCallback(async (uiStep: number) => { + if (!sessionId) return + // Map overlay UI step to DB step + const dbStepMap: Record = { 0: 2, 1: 3, 2: 4, 3: 5, 4: 7, 5: 8, 6: 9 } + const dbStep = dbStepMap[uiStep] || uiStep + 1 + if (!confirm(`Ab Schritt ${uiStep + 1} (${stepNames[uiStep + 1] || '?'}) neu verarbeiten?`)) return + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ from_step: dbStep }), + }) + if (!res.ok) { + const data = await res.json().catch(() => ({})) + console.error('Reprocess failed:', data.detail || res.status) + return + } + goToStep(uiStep) + } catch (e) { + console.error('Reprocess error:', e) + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [sessionId, goToStep]) + + const renderStep = () => { + switch (currentStep) { + case 0: + return + case 1: + return + case 2: + return + case 3: + return + case 4: + return + case 5: + return + case 6: + return + default: + return null + } + } + + return ( +
+ + + {/* Session List */} +
+
+

+ Sessions ({sessions.length}) +

+ +
+ + {loadingSessions ? ( +
Lade Sessions...
+ ) : sessions.length === 0 ? ( +
Noch keine Sessions vorhanden.
+ ) : ( +
+ {sessions.map((s) => { + const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category) + return ( +
+ {/* Thumbnail */} +
openSession(s.id)} + > + {/* eslint-disable-next-line @next/next/no-img-element */} + { (e.target as HTMLImageElement).style.display = 'none' }} + /> +
+ + {/* Info */} +
openSession(s.id)}> + {editingName === s.id ? ( + setEditNameValue(e.target.value)} + onBlur={() => renameSession(s.id, editNameValue)} + onKeyDown={(e) => { + if (e.key === 'Enter') renameSession(s.id, editNameValue) + if (e.key === 'Escape') setEditingName(null) + }} + onClick={(e) => e.stopPropagation()} + className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600" + /> + ) : ( +
+ {s.name || s.filename} +
+ )} + +
+ {new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })} +
+
+ + {/* Category Badge */} +
e.stopPropagation()}> + +
+ + {/* Actions */} +
+ + +
+ + {/* Category dropdown */} + {editingCategory === s.id && ( +
e.stopPropagation()} + > + {DOCUMENT_CATEGORIES.map((cat) => ( + + ))} +
+ )} +
+ ) + })} +
+ )} +
+ + {/* Active session info */} + {sessionId && sessionName && ( +
+ Aktive Session: {sessionName} + {activeCategory && (() => { + const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory) + return cat ? {cat.icon} {cat.label} : null + })()} +
+ )} + + + +
{renderStep()}
+
+ ) +} diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts new file mode 100644 index 0000000..a26fc7c --- /dev/null +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts @@ -0,0 +1,60 @@ +import type { PipelineStep } from '../ocr-pipeline/types' + +// Re-export types used by overlay components +export type { + PipelineStep, + PipelineStepStatus, + SessionListItem, + SessionInfo, + DocumentCategory, + DocumentTypeResult, + OrientationResult, + CropResult, + DeskewResult, + DewarpResult, + RowResult, + RowItem, + GridResult, + GridCell, + WordBbox, + ColumnMeta, +} from '../ocr-pipeline/types' + +export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types' + +/** + * 7-step pipeline for full-page overlay reconstruction. + * Skips: Spalten (columns), LLM-Review (Korrektur), Ground-Truth (Validierung) + */ +export const OVERLAY_PIPELINE_STEPS: PipelineStep[] = [ + { id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' }, + { id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' }, + { id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' }, + { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' }, + { id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' }, + { id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' }, + { id: 'reconstruction', name: 'Overlay', icon: '🏗️', status: 'pending' }, +] + +/** Map from overlay UI step index to DB step number (1-indexed) */ +export const OVERLAY_UI_TO_DB: Record = { + 0: 2, // orientation + 1: 3, // deskew + 2: 4, // dewarp + 3: 5, // crop + 4: 6, // rows (skip columns=6 in DB, rows=7 — but we reuse DB step numbering) + 5: 7, // words + 6: 9, // reconstruction +} + +/** Map from DB step to overlay UI step index */ +export function dbStepToOverlayUi(dbStep: number): number { + // DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt + if (dbStep <= 2) return 0 // orientation + if (dbStep === 3) return 1 // deskew + if (dbStep === 4) return 2 // dewarp + if (dbStep === 5) return 3 // crop + if (dbStep <= 7) return 4 // rows (skip columns) + if (dbStep === 8) return 5 // words + return 6 // reconstruction +} diff --git a/admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx b/admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx new file mode 100644 index 0000000..dd0548a --- /dev/null +++ b/admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx @@ -0,0 +1,576 @@ +'use client' + +import { useCallback, useEffect, useMemo, useRef, useState } from 'react' +import type { GridResult, GridCell, RowResult, RowItem } from '@/app/(admin)/ai/ocr-overlay/types' +import { usePixelWordPositions } from './usePixelWordPositions' + +const KLAUSUR_API = '/klausur-api' + +interface OverlayReconstructionProps { + sessionId: string | null + onNext: () => void +} + +interface EditableCell { + cellId: string + text: string + originalText: string + bboxPct: { x: number; y: number; w: number; h: number } + colType: string + rowIndex: number + colIndex: number +} + +type UndoAction = { cellId: string; oldText: string; newText: string } + +export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructionProps) { + const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading') + const [error, setError] = useState('') + const [cells, setCells] = useState([]) + const [gridCells, setGridCells] = useState([]) + const [editedTexts, setEditedTexts] = useState>(new Map()) + + // Undo/Redo + const [undoStack, setUndoStack] = useState([]) + const [redoStack, setRedoStack] = useState([]) + + // Overlay state + const [rows, setRows] = useState([]) + const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null) + const [fontScale, setFontScale] = useState(0.7) + const [globalBold, setGlobalBold] = useState(false) + const [imageRotation, setImageRotation] = useState<0 | 180>(0) + const reconRef = useRef(null) + const [reconWidth, setReconWidth] = useState(0) + + // Pixel-based word positions + const overlayImageUrl = sessionId + ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` + : '' + const cellWordPositions = usePixelWordPositions( + overlayImageUrl, + gridCells, + status === 'ready', + imageRotation, + ) + + // Track container width + useEffect(() => { + const el = reconRef.current + if (!el) return + const obs = new ResizeObserver(entries => { + for (const entry of entries) setReconWidth(entry.contentRect.width) + }) + obs.observe(el) + return () => obs.disconnect() + }, [status]) + + // Load session data + useEffect(() => { + if (!sessionId) return + loadSessionData() + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [sessionId]) + + const loadSessionData = async () => { + if (!sessionId) return + setStatus('loading') + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}`) + if (!res.ok) throw new Error(`HTTP ${res.status}`) + const data = await res.json() + + const wordResult: GridResult | undefined = data.word_result + if (!wordResult) { + setError('Keine Worterkennungsdaten gefunden. Bitte zuerst den Woerter-Schritt abschliessen.') + setStatus('error') + return + } + + const rawGridCells: GridCell[] = wordResult.cells || [] + setGridCells(rawGridCells) + + const editableCells: EditableCell[] = rawGridCells.map(c => ({ + cellId: c.cell_id, + text: c.text, + originalText: c.text, + bboxPct: c.bbox_pct, + colType: c.col_type, + rowIndex: c.row_index, + colIndex: c.col_index, + })) + setCells(editableCells) + setEditedTexts(new Map()) + setUndoStack([]) + setRedoStack([]) + + // Load rows + const rowResult: RowResult | undefined = data.row_result + if (rowResult?.rows) setRows(rowResult.rows) + + // Store image dimensions + if (wordResult.image_width && wordResult.image_height) { + setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height }) + } + + setStatus('ready') + } catch (e: unknown) { + setError(e instanceof Error ? e.message : String(e)) + setStatus('error') + } + } + + const handleTextChange = useCallback((cellId: string, newText: string) => { + setEditedTexts(prev => { + const oldText = prev.get(cellId) + const cell = cells.find(c => c.cellId === cellId) + const prevText = oldText ?? cell?.text ?? '' + + setUndoStack(stack => [...stack, { cellId, oldText: prevText, newText }]) + setRedoStack([]) + + const next = new Map(prev) + next.set(cellId, newText) + return next + }) + }, [cells]) + + const undo = useCallback(() => { + setUndoStack(stack => { + if (stack.length === 0) return stack + const action = stack[stack.length - 1] + const newStack = stack.slice(0, -1) + setRedoStack(rs => [...rs, action]) + setEditedTexts(prev => { + const next = new Map(prev) + next.set(action.cellId, action.oldText) + return next + }) + return newStack + }) + }, []) + + const redo = useCallback(() => { + setRedoStack(stack => { + if (stack.length === 0) return stack + const action = stack[stack.length - 1] + const newStack = stack.slice(0, -1) + setUndoStack(us => [...us, action]) + setEditedTexts(prev => { + const next = new Map(prev) + next.set(action.cellId, action.newText) + return next + }) + return newStack + }) + }, []) + + const resetCell = useCallback((cellId: string) => { + setEditedTexts(prev => { + const next = new Map(prev) + next.delete(cellId) + return next + }) + }, []) + + // Keyboard shortcuts + useEffect(() => { + const handler = (e: KeyboardEvent) => { + if ((e.metaKey || e.ctrlKey) && e.key === 'z') { + e.preventDefault() + if (e.shiftKey) redo() + else undo() + } + } + document.addEventListener('keydown', handler) + return () => document.removeEventListener('keydown', handler) + }, [undo, redo]) + + const getDisplayText = useCallback((cell: EditableCell): string => { + return editedTexts.get(cell.cellId) ?? cell.text + }, [editedTexts]) + + const isEdited = useCallback((cell: EditableCell): boolean => { + const edited = editedTexts.get(cell.cellId) + return edited !== undefined && edited !== cell.originalText + }, [editedTexts]) + + const changedCount = useMemo(() => { + let count = 0 + for (const cell of cells) { + if (isEdited(cell)) count++ + } + return count + }, [cells, isEdited]) + + // Tab navigation + const sortedCellIds = useMemo(() => { + return [...cells] + .sort((a, b) => a.rowIndex !== b.rowIndex ? a.rowIndex - b.rowIndex : a.colIndex - b.colIndex) + .map(c => c.cellId) + }, [cells]) + + const handleKeyDown = useCallback((e: React.KeyboardEvent, cellId: string) => { + if (e.key === 'Tab') { + e.preventDefault() + const idx = sortedCellIds.indexOf(cellId) + const nextIdx = e.shiftKey ? idx - 1 : idx + 1 + if (nextIdx >= 0 && nextIdx < sortedCellIds.length) { + const nextId = sortedCellIds[nextIdx] + const el = document.getElementById(`cell-${nextId}`) + el?.focus() + } + } + }, [sortedCellIds]) + + const saveReconstruction = useCallback(async () => { + if (!sessionId) return + setStatus('saving') + try { + const cellUpdates = Array.from(editedTexts.entries()) + .filter(([cellId, text]) => { + const cell = cells.find(c => c.cellId === cellId) + return cell && text !== cell.originalText + }) + .map(([cellId, text]) => ({ cell_id: cellId, text })) + + if (cellUpdates.length === 0) { + setStatus('saved') + return + } + + const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reconstruction`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ cells: cellUpdates }), + }) + + if (!res.ok) { + const data = await res.json().catch(() => ({})) + throw new Error(data.detail || `HTTP ${res.status}`) + } + + setStatus('saved') + } catch (e: unknown) { + setError(e instanceof Error ? e.message : String(e)) + setStatus('error') + } + }, [sessionId, editedTexts, cells]) + + const dewarpedUrl = sessionId + ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` + : '' + + if (!sessionId) { + return
Bitte zuerst eine Session auswaehlen.
+ } + + if (status === 'loading') { + return ( +
+
+ Overlay-Daten werden geladen... +
+ ) + } + + if (status === 'error') { + return ( +
+
⚠️
+

Fehler

+

{error}

+
+ + +
+
+ ) + } + + if (status === 'saved') { + return ( +
+
+

Overlay gespeichert

+

+ {changedCount > 0 ? `${changedCount} Zellen wurden aktualisiert.` : 'Keine Aenderungen vorgenommen.'} +

+ +
+ ) + } + + const imgW = imageNaturalSize?.w || 1 + const imgH = imageNaturalSize?.h || 1 + const containerH = reconWidth * (imgH / imgW) + + return ( +
+ {/* Toolbar */} +
+
+

+ Overlay-Rekonstruktion +

+ + {cells.length} Zellen · {changedCount} geaendert + +
+
+ {/* Undo/Redo */} + + + +
+ + {/* Font scale */} + + + + +
+ + +
+
+ + {/* Side-by-side: Original + Overlay */} +
+ {/* Left: Original image */} +
+
+ Originalbild +
+
+ {/* eslint-disable-next-line @next/next/no-img-element */} + Original { + const img = e.target as HTMLImageElement + setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight }) + }} + /> +
+
+ + {/* Right: Reconstructed overlay */} +
+
+ Rekonstruktion ({cells.length} Zellen) +
+
+
+ {/* Row lines */} + {rows.map((row, i) => ( +
+ ))} + + {/* Pixel-positioned words / editable inputs */} + {cells.map((cell) => { + const displayText = getDisplayText(cell) + const edited = isEdited(cell) + const wordPos = cellWordPositions.get(cell.cellId) + const bboxPct = cell.bboxPct + const cellHeightPx = containerH * (bboxPct.h / 100) + + // Pixel-analysed: render word-groups at detected positions + if (wordPos && wordPos.length > 0) { + return wordPos.map((wp, i) => { + const autoFontPx = cellHeightPx * wp.fontRatio * fontScale + const fs = Math.max(6, autoFontPx) + + if (wordPos.length > 1) { + return ( + + {wp.text} + + ) + } + + return ( +
+ handleTextChange(cell.cellId, e.target.value)} + onKeyDown={(e) => handleKeyDown(e, cell.cellId)} + className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${ + edited ? 'bg-green-50/30' : '' + }`} + style={{ + fontSize: `${fs}px`, + fontWeight: globalBold ? 'bold' : 'normal', + fontFamily: "'Liberation Sans', Arial, sans-serif", + lineHeight: '1', + color: '#1a1a1a', + }} + title={`${cell.cellId} (${cell.colType})`} + /> + {edited && ( + + )} +
+ ) + }) + } + + // Fallback: no pixel data — single input at cell bbox + if (!cell.text) return null + + const fontSize = Math.max(6, cellHeightPx * fontScale) + return ( +
+ handleTextChange(cell.cellId, e.target.value)} + onKeyDown={(e) => handleKeyDown(e, cell.cellId)} + className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${ + edited ? 'bg-green-50/30' : '' + }`} + style={{ + fontSize: `${fontSize}px`, + fontWeight: globalBold ? 'bold' : 'normal', + fontFamily: "'Liberation Sans', Arial, sans-serif", + lineHeight: '1', + color: '#1a1a1a', + }} + title={`${cell.cellId} (${cell.colType})`} + /> + {edited && ( + + )} +
+ ) + })} +
+
+
+
+ + {/* Bottom action */} +
+ +
+
+ ) +} diff --git a/admin-lehrer/components/ocr-overlay/usePixelWordPositions.ts b/admin-lehrer/components/ocr-overlay/usePixelWordPositions.ts new file mode 100644 index 0000000..8d30f4b --- /dev/null +++ b/admin-lehrer/components/ocr-overlay/usePixelWordPositions.ts @@ -0,0 +1,185 @@ +import { useEffect, useState } from 'react' +import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types' + +export interface WordPosition { + xPct: number + wPct: number + text: string + fontRatio: number +} + +/** + * Analyse dark-pixel clusters on an image to determine + * the exact horizontal position & auto-font-size of word groups in each cell. + * + * When rotation=180, the image is rotated 180° before pixel analysis. + * Cell coordinates are transformed to the rotated space for reading, + * and cluster positions are mirrored back to the original coordinate system. + * + * Returns a Map. + */ +export function usePixelWordPositions( + imageUrl: string, + cells: GridCell[], + active: boolean, + rotation: 0 | 180 = 0, +): Map { + const [cellWordPositions, setCellWordPositions] = useState>(new Map()) + + useEffect(() => { + if (!active || cells.length === 0 || !imageUrl) return + + const img = new Image() + img.crossOrigin = 'anonymous' + img.onload = () => { + const imgW = img.naturalWidth + const imgH = img.naturalHeight + + const canvas = document.createElement('canvas') + canvas.width = imgW + canvas.height = imgH + const ctx = canvas.getContext('2d') + if (!ctx) return + + if (rotation === 180) { + ctx.translate(imgW, imgH) + ctx.rotate(Math.PI) + ctx.drawImage(img, 0, 0) + ctx.setTransform(1, 0, 0, 1, 0, 0) + } else { + ctx.drawImage(img, 0, 0) + } + + const refFontSize = 40 + const fontFam = "'Liberation Sans', Arial, sans-serif" + ctx.font = `${refFontSize}px ${fontFam}` + + const positions = new Map() + + for (const cell of cells) { + if (!cell.bbox_pct || !cell.text) continue + + const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean) + + let cx: number, cy: number + const cw = Math.round(cell.bbox_pct.w / 100 * imgW) + const ch = Math.round(cell.bbox_pct.h / 100 * imgH) + + if (rotation === 180) { + cx = Math.round((100 - cell.bbox_pct.x - cell.bbox_pct.w) / 100 * imgW) + cy = Math.round((100 - cell.bbox_pct.y - cell.bbox_pct.h) / 100 * imgH) + } else { + cx = Math.round(cell.bbox_pct.x / 100 * imgW) + cy = Math.round(cell.bbox_pct.y / 100 * imgH) + } + if (cw <= 0 || ch <= 0) continue + if (cx < 0) cx = 0 + if (cy < 0) cy = 0 + if (cx + cw > imgW || cy + ch > imgH) continue + + const imageData = ctx.getImageData(cx, cy, cw, ch) + + const proj = new Float32Array(cw) + for (let y = 0; y < ch; y++) { + for (let x = 0; x < cw; x++) { + const idx = (y * cw + x) * 4 + const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2] + if (lum < 128) proj[x]++ + } + } + + const threshold = Math.max(1, ch * 0.03) + const minGap = Math.max(5, Math.round(cw * 0.02)) + let clusters: { start: number; end: number }[] = [] + let inCluster = false + let clStart = 0 + let gap = 0 + + for (let x = 0; x < cw; x++) { + if (proj[x] >= threshold) { + if (!inCluster) { clStart = x; inCluster = true } + gap = 0 + } else if (inCluster) { + gap++ + if (gap > minGap) { + clusters.push({ start: clStart, end: x - gap }) + inCluster = false + gap = 0 + } + } + } + if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap }) + + if (clusters.length === 0) continue + + if (rotation === 180) { + clusters = clusters.map(c => ({ + start: cw - 1 - c.end, + end: cw - 1 - c.start, + })).reverse() + } + + const wordPos: WordPosition[] = [] + + if (groups.length <= 1) { + const firstCl = clusters[0] + const lastCl = clusters[clusters.length - 1] + const clusterW = lastCl.end - firstCl.start + 1 + const measured = ctx.measureText(cell.text.trim()) + const autoFontPx = refFontSize * (clusterW / measured.width) + const fontRatio = Math.min(autoFontPx / ch, 1.0) + wordPos.push({ + xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w, + wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w, + text: cell.text.trim(), + fontRatio, + }) + } else if (clusters.length >= groups.length) { + for (let i = 0; i < groups.length; i++) { + const cl = clusters[i] + const clusterW = cl.end - cl.start + 1 + const measured = ctx.measureText(groups[i]) + const autoFontPx = refFontSize * (clusterW / measured.width) + const fontRatio = Math.min(autoFontPx / ch, 1.0) + wordPos.push({ + xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w, + wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w, + text: groups[i], + fontRatio, + }) + } + } else { + continue + } + + positions.set(cell.cell_id, wordPos) + } + + // Normalise: find the most common fontRatio (mode) and apply it to all + const allRatios: number[] = [] + for (const wps of positions.values()) { + for (const wp of wps) allRatios.push(wp.fontRatio) + } + if (allRatios.length > 0) { + const buckets = new Map() + for (const r of allRatios) { + const key = Math.round(r * 50) / 50 + buckets.set(key, (buckets.get(key) || 0) + 1) + } + let modeRatio = allRatios[0] + let modeCount = 0 + for (const [ratio, count] of buckets) { + if (count > modeCount) { modeRatio = ratio; modeCount = count } + } + for (const wps of positions.values()) { + for (const wp of wps) wp.fontRatio = modeRatio + } + } + + setCellWordPositions(positions) + } + img.src = imageUrl + }, [active, cells, imageUrl, rotation]) + + return cellWordPositions +} diff --git a/admin-lehrer/lib/navigation.ts b/admin-lehrer/lib/navigation.ts index c27c2f1..618b6cd 100644 --- a/admin-lehrer/lib/navigation.ts +++ b/admin-lehrer/lib/navigation.ts @@ -150,6 +150,15 @@ export const navigation: NavCategory[] = [ audience: ['Entwickler', 'Data Scientists'], subgroup: 'KI-Werkzeuge', }, + { + id: 'ocr-overlay', + name: 'OCR Overlay', + href: '/ai/ocr-overlay', + description: 'Ganzseitige Overlay-Rekonstruktion', + purpose: 'Arbeitsblatt ohne Spaltenerkennung direkt als Overlay rekonstruieren. Vereinfachte 7-Schritt-Pipeline.', + audience: ['Entwickler'], + subgroup: 'KI-Werkzeuge', + }, { id: 'test-quality', name: 'Test Quality (BQAS)',