feat(ocr): Add CV Document Reconstruction Pipeline for vocabulary extraction

New OCR method using classical Computer Vision: high-res rendering (432 DPI),
deskew, dewarp, binarization, projection-profile layout analysis, multi-pass
Tesseract OCR with region-specific PSM, and Y-coordinate line alignment.
Includes bugfix for convert_pdf_to_image call (line 869) and 39 unit tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
BreakPilot Dev
2026-02-09 23:52:35 +01:00
parent 981e5477a5
commit 2dd36099f1
4 changed files with 2096 additions and 50 deletions

View File

@@ -93,6 +93,15 @@ const OCR_METHODS = {
description: 'ARM64-nativ, Standard',
enabled: true,
},
cv_pipeline: {
id: 'cv_pipeline',
name: 'Loesung E: Document Reconstruction',
shortName: 'E: Doc Recon',
model: 'opencv + tesseract (multi-pass)',
color: 'green',
description: 'CV-Pipeline: Deskew, Dewarp, Binarisierung, Multi-Pass OCR',
enabled: true,
},
}
export default function OCRComparePage() {
@@ -115,7 +124,7 @@ export default function OCRComparePage() {
const [uploading, setUploading] = useState(false)
// Method Selection
const [selectedMethods, setSelectedMethods] = useState<string[]>(['vision_llm', 'tesseract'])
const [selectedMethods, setSelectedMethods] = useState<string[]>(['vision_llm', 'tesseract', 'cv_pipeline'])
// QR Upload State
const [showQRModal, setShowQRModal] = useState(false)
@@ -133,6 +142,9 @@ export default function OCRComparePage() {
const [showGridOverlay, setShowGridOverlay] = useState(true)
const [selectedCell, setSelectedCell] = useState<GridCell | null>(null)
const [showCellDialog, setShowCellDialog] = useState(false)
const [showMmGrid, setShowMmGrid] = useState(false)
const [showTextAtPosition, setShowTextAtPosition] = useState(false)
const [editableText, setEditableText] = useState(false)
// Block Review State
const [blockReviewMode, setBlockReviewMode] = useState(false)
@@ -651,6 +663,7 @@ export default function OCRComparePage() {
blue: { bg: 'bg-blue-50', border: 'border-blue-300', text: 'text-blue-700' },
red: { bg: 'bg-red-50', border: 'border-red-300', text: 'text-red-700' },
purple: { bg: 'bg-purple-50', border: 'border-purple-300', text: 'text-purple-700' },
green: { bg: 'bg-green-50', border: 'border-green-300', text: 'text-green-700' },
}
return colors[color]?.[type] || colors.slate[type]
}
@@ -839,7 +852,7 @@ export default function OCRComparePage() {
{/* Grid Analysis Button */}
<button
onClick={analyzeGrid}
disabled={analyzingGrid}
disabled={analyzingGrid || !sessionId || !result}
className="w-full px-4 py-2 bg-teal-600 text-white rounded-lg font-medium hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed text-sm"
>
{analyzingGrid ? (
@@ -874,6 +887,41 @@ export default function OCRComparePage() {
<span className="text-slate-700">Grid-Overlay anzeigen</span>
</label>
<label className="flex items-center gap-2 text-sm cursor-pointer">
<input
type="checkbox"
checked={showMmGrid}
onChange={(e) => setShowMmGrid(e.target.checked)}
className="rounded"
/>
<span className="text-slate-700">1mm Raster anzeigen</span>
</label>
<label className="flex items-center gap-2 text-sm cursor-pointer">
<input
type="checkbox"
checked={showTextAtPosition}
onChange={(e) => {
setShowTextAtPosition(e.target.checked)
if (!e.target.checked) setEditableText(false)
}}
className="rounded"
/>
<span className="text-slate-700">Text an Originalposition</span>
</label>
{showTextAtPosition && (
<label className="flex items-center gap-2 text-sm cursor-pointer ml-5">
<input
type="checkbox"
checked={editableText}
onChange={(e) => setEditableText(e.target.checked)}
className="rounded"
/>
<span className="text-slate-700">Text bearbeitbar</span>
</label>
)}
{/* Block Review Button */}
{result && nonEmptyBlockCount > 0 && (
<button
@@ -1122,7 +1170,20 @@ export default function OCRComparePage() {
selectedCell={selectedCell}
showEmpty={false}
showNumbers={blockReviewMode}
showTextLabels={true}
showTextLabels={!showTextAtPosition}
showMmGrid={showMmGrid}
showTextAtPosition={showTextAtPosition}
editableText={editableText}
onCellTextChange={(cell, newText) => {
if (!gridData) return
const newCells = gridData.cells.map(row =>
row.map(c => c.row === cell.row && c.col === cell.col
? { ...c, text: newText, status: 'manual' as const }
: c
)
)
setGridData({ ...gridData, cells: newCells })
}}
highlightedBlockNumber={blockReviewMode ? currentBlockNumber : null}
className={`rounded-lg border border-slate-200 overflow-hidden ${isFullscreen ? 'max-h-[80vh] mx-auto' : 'w-full max-w-2xl mx-auto'}`}
/>
@@ -1229,7 +1290,20 @@ export default function OCRComparePage() {
selectedCell={selectedCell}
showEmpty={false}
showNumbers={blockReviewMode}
showTextLabels={!blockReviewMode}
showTextLabels={!blockReviewMode && !showTextAtPosition}
showMmGrid={showMmGrid}
showTextAtPosition={showTextAtPosition}
editableText={editableText}
onCellTextChange={(cell, newText) => {
if (!gridData) return
const newCells = gridData.cells.map(row =>
row.map(c => c.row === cell.row && c.col === cell.col
? { ...c, text: newText, status: 'manual' as const }
: c
)
)
setGridData({ ...gridData, cells: newCells })
}}
highlightedBlockNumber={blockReviewMode ? currentBlockNumber : null}
className="rounded-lg border border-slate-200 overflow-hidden"
/>