feat(ocr): Add Grid Detection v4 tests, docs, and SBOM update

- Add comprehensive tests for grid_detection_service.py (31 tests)
  - mm coordinate conversion tests
  - Deskew calculation tests
  - Column detection tests
  - Integration tests for vocabulary tables

- Add OCR-Compare documentation (OCR-Compare.md)
  - mm coordinate system documentation
  - Deskew correction documentation
  - Worksheet Editor integration guide
  - API endpoints documentation

- Add TypeScript tests for ocr-integration.ts
  - mm to pixel conversion tests
  - OCR export format tests
  - localStorage operations tests

- Update SBOM to v1.5.0
  - Add OCR Grid Detection System section
  - Document Fabric.js (MIT) for Worksheet Editor
  - Document NumPy and OpenCV usage

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
BreakPilot Dev
2026-02-08 21:31:35 -08:00
commit baee45b861
4 changed files with 2421 additions and 0 deletions

View File

@@ -0,0 +1,466 @@
/**
* Tests for OCR Integration Utility
*
* Tests cover:
* - mm to pixel conversion
* - OCR data export format
* - LocalStorage operations
* - Canvas integration
*/
import {
MM_TO_PX,
A4_WIDTH_MM,
A4_HEIGHT_MM,
A4_WIDTH_PX,
A4_HEIGHT_PX,
mmToPixel,
pixelToMm,
getColumnColor,
createTextProps,
exportOCRData,
saveOCRExportToStorage,
loadLatestOCRExport,
loadOCRExport,
clearOCRExports,
type OCRWord,
type OCRExportData,
type ColumnType,
} from './ocr-integration'
// Mock localStorage
const localStorageMock = (() => {
let store: Record<string, string> = {}
return {
getItem: jest.fn((key: string) => store[key] || null),
setItem: jest.fn((key: string, value: string) => {
store[key] = value
}),
removeItem: jest.fn((key: string) => {
delete store[key]
}),
clear: jest.fn(() => {
store = {}
}),
keys: () => Object.keys(store),
}
})()
Object.defineProperty(window, 'localStorage', { value: localStorageMock })
describe('Constants', () => {
test('MM_TO_PX is correct for 96 DPI', () => {
// 1 inch = 25.4mm, 96 DPI = 96 pixels per inch
// 96 / 25.4 = 3.7795275591
expect(MM_TO_PX).toBeCloseTo(3.7795275591, 8)
})
test('A4 dimensions in mm are correct', () => {
expect(A4_WIDTH_MM).toBe(210)
expect(A4_HEIGHT_MM).toBe(297)
})
test('A4 dimensions in pixels are calculated correctly', () => {
expect(A4_WIDTH_PX).toBe(Math.round(210 * MM_TO_PX)) // ~794
expect(A4_HEIGHT_PX).toBe(Math.round(297 * MM_TO_PX)) // ~1123
})
})
describe('mmToPixel', () => {
test('converts 0mm to 0px', () => {
expect(mmToPixel(0)).toBe(0)
})
test('converts 1mm correctly', () => {
expect(mmToPixel(1)).toBeCloseTo(3.7795275591, 8)
})
test('converts 100mm correctly', () => {
expect(mmToPixel(100)).toBeCloseTo(377.95275591, 6)
})
test('converts A4 width correctly', () => {
expect(mmToPixel(210)).toBeCloseTo(793.7, 1)
})
})
describe('pixelToMm', () => {
test('converts 0px to 0mm', () => {
expect(pixelToMm(0)).toBe(0)
})
test('converts 100px correctly', () => {
expect(pixelToMm(100)).toBeCloseTo(26.458, 2)
})
test('round-trip conversion is accurate', () => {
const original = 50
const pixels = mmToPixel(original)
const backToMm = pixelToMm(pixels)
expect(backToMm).toBeCloseTo(original, 8)
})
})
describe('getColumnColor', () => {
test('returns blue for english column', () => {
expect(getColumnColor('english')).toBe('#1e40af')
})
test('returns green for german column', () => {
expect(getColumnColor('german')).toBe('#166534')
})
test('returns purple for example column', () => {
expect(getColumnColor('example')).toBe('#6b21a8')
})
test('returns gray for unknown column', () => {
expect(getColumnColor('unknown')).toBe('#374151')
})
test('uses custom colors from options', () => {
const options = { englishColor: '#ff0000' }
expect(getColumnColor('english', options)).toBe('#ff0000')
})
})
describe('createTextProps', () => {
const mockWord: OCRWord = {
text: 'house',
x_mm: 21.0,
y_mm: 44.55,
width_mm: 52.5,
height_mm: 8.91,
column_type: 'english',
logical_row: 0,
}
test('creates correct type', () => {
const props = createTextProps(mockWord)
expect(props.type).toBe('i-text')
})
test('converts mm to pixels for left position', () => {
const props = createTextProps(mockWord)
expect(props.left).toBeCloseTo(21.0 * MM_TO_PX, 2)
})
test('converts mm to pixels for top position', () => {
const props = createTextProps(mockWord)
expect(props.top).toBeCloseTo(44.55 * MM_TO_PX, 2)
})
test('applies offset correctly', () => {
const props = createTextProps(mockWord, { offsetX: 5, offsetY: 10 })
expect(props.left).toBeCloseTo((21.0 + 5) * MM_TO_PX, 2)
expect(props.top).toBeCloseTo((44.55 + 10) * MM_TO_PX, 2)
})
test('sets fill color based on column type', () => {
const props = createTextProps(mockWord)
expect(props.fill).toBe('#1e40af') // English blue
})
test('includes OCR metadata', () => {
const props = createTextProps(mockWord)
expect(props.ocrMetadata).toBeDefined()
expect((props.ocrMetadata as any).x_mm).toBe(21.0)
expect((props.ocrMetadata as any).column_type).toBe('english')
expect((props.ocrMetadata as any).logical_row).toBe(0)
})
test('uses custom font family', () => {
const props = createTextProps(mockWord, { fontFamily: 'Times New Roman' })
expect(props.fontFamily).toBe('Times New Roman')
})
test('uses custom font size', () => {
const props = createTextProps(mockWord, { fontSize: 16 })
expect(props.fontSize).toBe(16)
})
})
describe('exportOCRData', () => {
const mockGridData = {
cells: [
[
{
text: 'house',
x_mm: 21.0,
y_mm: 44.55,
width_mm: 52.5,
height_mm: 8.91,
column_type: 'english' as ColumnType,
logical_row: 0,
status: 'recognized',
},
{
text: 'Haus',
x_mm: 80.0,
y_mm: 44.55,
width_mm: 40.0,
height_mm: 8.91,
column_type: 'german' as ColumnType,
logical_row: 0,
status: 'recognized',
},
],
],
detected_columns: [
{ column_type: 'english', x_start_mm: 20.0, x_end_mm: 73.5 },
{ column_type: 'german', x_start_mm: 74.0, x_end_mm: 140.0 },
],
page_dimensions: {
width_mm: 210,
height_mm: 297,
format: 'A4',
},
}
test('creates correct version', () => {
const result = exportOCRData(mockGridData, 'session-123', 1)
expect(result.version).toBe('1.0')
})
test('sets correct source', () => {
const result = exportOCRData(mockGridData, 'session-123', 1)
expect(result.source).toBe('ocr-compare')
})
test('includes session ID and page number', () => {
const result = exportOCRData(mockGridData, 'session-123', 1)
expect(result.session_id).toBe('session-123')
expect(result.page_number).toBe(1)
})
test('includes page dimensions', () => {
const result = exportOCRData(mockGridData, 'session-123', 1)
expect(result.page_dimensions.width_mm).toBe(210)
expect(result.page_dimensions.height_mm).toBe(297)
expect(result.page_dimensions.format).toBe('A4')
})
test('converts cells to words', () => {
const result = exportOCRData(mockGridData, 'session-123', 1)
expect(result.words).toHaveLength(2)
expect(result.words[0].text).toBe('house')
expect(result.words[0].column_type).toBe('english')
})
test('filters empty cells', () => {
const dataWithEmpty = {
...mockGridData,
cells: [
[
...mockGridData.cells[0],
{ text: '', status: 'empty' }, // Empty cell
],
],
}
const result = exportOCRData(dataWithEmpty, 'session-123', 1)
expect(result.words).toHaveLength(2) // Empty cell excluded
})
test('includes detected columns', () => {
const result = exportOCRData(mockGridData, 'session-123', 1)
expect(result.detected_columns).toHaveLength(2)
expect(result.detected_columns[0].column_type).toBe('english')
})
test('sets exported_at timestamp', () => {
const before = new Date().toISOString()
const result = exportOCRData(mockGridData, 'session-123', 1)
const after = new Date().toISOString()
expect(result.exported_at >= before).toBe(true)
expect(result.exported_at <= after).toBe(true)
})
})
describe('localStorage operations', () => {
beforeEach(() => {
localStorageMock.clear()
})
const mockExportData: OCRExportData = {
version: '1.0',
source: 'ocr-compare',
exported_at: '2026-02-08T12:00:00Z',
session_id: 'session-123',
page_number: 1,
page_dimensions: {
width_mm: 210,
height_mm: 297,
format: 'A4',
},
words: [
{
text: 'house',
x_mm: 21.0,
y_mm: 44.55,
width_mm: 52.5,
height_mm: 8.91,
column_type: 'english',
logical_row: 0,
},
],
detected_columns: [],
}
describe('saveOCRExportToStorage', () => {
test('saves data to localStorage', () => {
saveOCRExportToStorage(mockExportData)
expect(localStorageMock.setItem).toHaveBeenCalledWith(
'ocr_export_session-123_1',
expect.any(String)
)
})
test('sets latest export key', () => {
saveOCRExportToStorage(mockExportData)
expect(localStorageMock.setItem).toHaveBeenCalledWith(
'ocr_export_latest',
'ocr_export_session-123_1'
)
})
})
describe('loadLatestOCRExport', () => {
test('returns null when no export exists', () => {
const result = loadLatestOCRExport()
expect(result).toBeNull()
})
test('loads latest export data', () => {
// Manually set up the mock
localStorageMock.setItem(
'ocr_export_session-123_1',
JSON.stringify(mockExportData)
)
localStorageMock.setItem('ocr_export_latest', 'ocr_export_session-123_1')
// Reset the mock to return correct values
localStorageMock.getItem.mockImplementation((key: string) => {
if (key === 'ocr_export_latest') return 'ocr_export_session-123_1'
if (key === 'ocr_export_session-123_1')
return JSON.stringify(mockExportData)
return null
})
const result = loadLatestOCRExport()
expect(result).not.toBeNull()
expect(result?.session_id).toBe('session-123')
})
})
describe('loadOCRExport', () => {
test('returns null for non-existent session', () => {
const result = loadOCRExport('nonexistent', 1)
expect(result).toBeNull()
})
test('loads specific export by session and page', () => {
localStorageMock.getItem.mockImplementation((key: string) => {
if (key === 'ocr_export_session-123_1')
return JSON.stringify(mockExportData)
return null
})
const result = loadOCRExport('session-123', 1)
expect(result).not.toBeNull()
expect(result?.page_number).toBe(1)
})
test('handles JSON parse errors gracefully', () => {
localStorageMock.getItem.mockImplementation((key: string) => {
if (key === 'ocr_export_session-123_1') return 'invalid json'
return null
})
const result = loadOCRExport('session-123', 1)
expect(result).toBeNull()
})
})
describe('clearOCRExports', () => {
test('removes all OCR export keys', () => {
// Set up mock to return keys
Object.defineProperty(localStorageMock, 'keys', {
value: () => [
'ocr_export_session-1_1',
'ocr_export_session-2_1',
'ocr_export_latest',
'other_key',
],
})
// Mock Object.keys(localStorage)
const originalKeys = Object.keys
Object.keys = jest.fn((obj) => {
if (obj === localStorage) {
return [
'ocr_export_session-1_1',
'ocr_export_session-2_1',
'ocr_export_latest',
'other_key',
]
}
return originalKeys(obj)
})
clearOCRExports()
expect(localStorageMock.removeItem).toHaveBeenCalledWith(
'ocr_export_session-1_1'
)
expect(localStorageMock.removeItem).toHaveBeenCalledWith(
'ocr_export_session-2_1'
)
expect(localStorageMock.removeItem).toHaveBeenCalledWith(
'ocr_export_latest'
)
// Restore Object.keys
Object.keys = originalKeys
})
})
})
describe('Edge Cases', () => {
test('handles negative mm values', () => {
const pixels = mmToPixel(-10)
expect(pixels).toBeCloseTo(-37.795, 2)
})
test('handles very large mm values', () => {
const pixels = mmToPixel(10000)
expect(pixels).toBeCloseTo(37795.275591, 2)
})
test('handles word with missing optional fields', () => {
const word: OCRWord = {
text: 'test',
x_mm: 0,
y_mm: 0,
width_mm: 10,
height_mm: 5,
column_type: 'unknown',
logical_row: 0,
}
const props = createTextProps(word)
expect(props).toBeDefined()
expect(props.text).toBe('test')
})
test('handles empty words array in export', () => {
const gridData = {
cells: [],
detected_columns: [],
page_dimensions: { width_mm: 210, height_mm: 297, format: 'A4' },
}
const result = exportOCRData(gridData, 'session', 1)
expect(result.words).toHaveLength(0)
})
})