diff --git a/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx b/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx index 9f9161c..45ee16e 100644 --- a/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx @@ -221,6 +221,29 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) { positions.set(cell.cell_id, wordPos) } + // Normalise: find the most common fontRatio (mode) and apply it to all + const allRatios: number[] = [] + for (const wps of positions.values()) { + for (const wp of wps) allRatios.push(wp.fontRatio) + } + if (allRatios.length > 0) { + // Bucket ratios to 2 decimal places, find mode + const buckets = new Map() + for (const r of allRatios) { + const key = Math.round(r * 50) / 50 // round to nearest 0.02 + buckets.set(key, (buckets.get(key) || 0) + 1) + } + let modeRatio = allRatios[0] + let modeCount = 0 + for (const [ratio, count] of buckets) { + if (count > modeCount) { modeRatio = ratio; modeCount = count } + } + // Apply mode to all word positions + for (const wps of positions.values()) { + for (const wp of wps) wp.fontRatio = modeRatio + } + } + setCellWordPositions(positions) } img.src = imgUrl