This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/studio-v2/lib/worksheet-editor/ocr-integration.ts
BreakPilot Dev 916ecef476 feat(worksheet-editor): Add OCR import panel for grid analysis data
Add OCRImportPanel component and ocr-integration utilities to import
OCR-analyzed data from the grid detection service into the worksheet editor.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 23:50:35 +01:00

289 lines
8.0 KiB
TypeScript

/**
* OCR Integration Utility
*
* Provides types, conversion functions, and import/export utilities
* for sharing OCR data between admin-v2 (OCR Compare) and studio-v2 (Worksheet Editor).
*
* Both frontends proxy to klausur-service via /klausur-api/, enabling
* shared API-based storage since localStorage is port-isolated.
*/
// =============================================================================
// Constants
// =============================================================================
/** Conversion factor: 1mm = 3.7795275591 pixels at 96 DPI */
export const MM_TO_PX = 96 / 25.4 // 3.7795275591
/** A4 dimensions in millimeters */
export const A4_WIDTH_MM = 210
export const A4_HEIGHT_MM = 297
/** A4 dimensions in pixels at 96 DPI */
export const A4_WIDTH_PX = Math.round(A4_WIDTH_MM * MM_TO_PX)
export const A4_HEIGHT_PX = Math.round(A4_HEIGHT_MM * MM_TO_PX)
// =============================================================================
// Types
// =============================================================================
export type ColumnType = 'english' | 'german' | 'example' | 'unknown'
export interface OCRWord {
text: string
x_mm: number
y_mm: number
width_mm: number
height_mm: number
column_type: ColumnType
logical_row: number
confidence?: number
}
export interface OCRExportData {
version: string
source: string
exported_at: string
session_id: string
page_number: number
page_dimensions: {
width_mm: number
height_mm: number
format: string
}
words: OCRWord[]
detected_columns: Array<{
column_type: ColumnType
x_start_mm?: number
x_end_mm?: number
}>
}
// =============================================================================
// Conversion Functions
// =============================================================================
/** Convert millimeters to pixels at 96 DPI */
export function mmToPixel(mm: number): number {
return mm * MM_TO_PX
}
/** Convert pixels to millimeters at 96 DPI */
export function pixelToMm(px: number): number {
return px / MM_TO_PX
}
// =============================================================================
// Color Functions
// =============================================================================
interface ColorOptions {
englishColor?: string
germanColor?: string
exampleColor?: string
unknownColor?: string
}
/** Get color for a column type */
export function getColumnColor(
columnType: ColumnType,
options?: ColorOptions
): string {
switch (columnType) {
case 'english':
return options?.englishColor ?? '#1e40af'
case 'german':
return options?.germanColor ?? '#166534'
case 'example':
return options?.exampleColor ?? '#6b21a8'
case 'unknown':
default:
return options?.unknownColor ?? '#374151'
}
}
// =============================================================================
// Canvas Integration
// =============================================================================
interface TextPropsOptions {
offsetX?: number
offsetY?: number
fontFamily?: string
fontSize?: number
}
/** Create Fabric.js IText properties from an OCR word */
export function createTextProps(
word: OCRWord,
options?: TextPropsOptions
): Record<string, any> {
const offsetX = options?.offsetX ?? 0
const offsetY = options?.offsetY ?? 0
return {
type: 'i-text',
text: word.text,
left: mmToPixel(word.x_mm + offsetX),
top: mmToPixel(word.y_mm + offsetY),
fontSize: options?.fontSize ?? 14,
fontFamily: options?.fontFamily ?? 'Arial',
fill: getColumnColor(word.column_type),
editable: true,
ocrMetadata: {
x_mm: word.x_mm,
y_mm: word.y_mm,
width_mm: word.width_mm,
height_mm: word.height_mm,
column_type: word.column_type,
logical_row: word.logical_row,
confidence: word.confidence,
},
}
}
// =============================================================================
// Export Functions
// =============================================================================
/** Convert grid analysis data to OCR export format */
export function exportOCRData(
gridData: {
cells: Array<Array<Record<string, any>>>
detected_columns: Array<Record<string, any>>
page_dimensions: { width_mm: number; height_mm: number; format: string }
},
sessionId: string,
pageNumber: number
): OCRExportData {
const words: OCRWord[] = []
for (const row of gridData.cells) {
for (const cell of row) {
if (!cell.text || cell.status === 'empty') continue
words.push({
text: cell.text,
x_mm: cell.x_mm ?? 0,
y_mm: cell.y_mm ?? 0,
width_mm: cell.width_mm ?? 0,
height_mm: cell.height_mm ?? 0,
column_type: (cell.column_type as ColumnType) ?? 'unknown',
logical_row: cell.logical_row ?? 0,
confidence: cell.confidence,
})
}
}
return {
version: '1.0',
source: 'ocr-compare',
exported_at: new Date().toISOString(),
session_id: sessionId,
page_number: pageNumber,
page_dimensions: gridData.page_dimensions,
words,
detected_columns: gridData.detected_columns.map((col) => ({
column_type: (col.column_type as ColumnType) ?? 'unknown',
x_start_mm: col.x_start_mm,
x_end_mm: col.x_end_mm,
})),
}
}
// =============================================================================
// localStorage Operations (fallback)
// =============================================================================
const STORAGE_PREFIX = 'ocr_export_'
const LATEST_KEY = 'ocr_export_latest'
/** Save OCR export data to localStorage */
export function saveOCRExportToStorage(data: OCRExportData): void {
const key = `${STORAGE_PREFIX}${data.session_id}_${data.page_number}`
localStorage.setItem(key, JSON.stringify(data))
localStorage.setItem(LATEST_KEY, key)
}
/** Load the latest OCR export from localStorage */
export function loadLatestOCRExport(): OCRExportData | null {
try {
const latestKey = localStorage.getItem(LATEST_KEY)
if (!latestKey) return null
const raw = localStorage.getItem(latestKey)
if (!raw) return null
return JSON.parse(raw) as OCRExportData
} catch {
return null
}
}
/** Load a specific OCR export from localStorage */
export function loadOCRExport(
sessionId: string,
pageNumber: number
): OCRExportData | null {
try {
const key = `${STORAGE_PREFIX}${sessionId}_${pageNumber}`
const raw = localStorage.getItem(key)
if (!raw) return null
return JSON.parse(raw) as OCRExportData
} catch {
return null
}
}
/** Clear all OCR exports from localStorage */
export function clearOCRExports(): void {
const keys = Object.keys(localStorage)
for (const key of keys) {
if (key.startsWith(STORAGE_PREFIX)) {
localStorage.removeItem(key)
}
}
}
// =============================================================================
// API Operations (primary - shared across ports)
// =============================================================================
const API_BASE = '/klausur-api/api/v1/vocab'
/** Save OCR export data via API (with localStorage fallback) */
export async function saveOCRExportToAPI(data: OCRExportData): Promise<boolean> {
// Always save to localStorage as fallback
saveOCRExportToStorage(data)
try {
const res = await fetch(
`${API_BASE}/sessions/${data.session_id}/ocr-export/${data.page_number}`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(data),
}
)
return res.ok
} catch (e) {
console.warn('API save failed, localStorage fallback used:', e)
return false
}
}
/** Load the latest OCR export from API (with localStorage fallback) */
export async function loadLatestOCRExportFromAPI(): Promise<OCRExportData | null> {
try {
const res = await fetch(`${API_BASE}/ocr-export/latest`)
if (res.ok) {
return (await res.json()) as OCRExportData
}
} catch (e) {
console.warn('API load failed, trying localStorage fallback:', e)
}
// Fallback to localStorage
return loadLatestOCRExport()
}