feat(ocr-pipeline): add LLM-based OCR correction step (Step 6)

Replace the placeholder "Koordinaten" step with an LLM review step that
sends vocab entries to qwen3:30b-a3b via Ollama for OCR error correction
(e.g. "8en" → "Ben"). Teachers can review, accept/reject individual
corrections in a diff table before applying them.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-02 11:13:17 +01:00
parent e9f368d3ec
commit 938d1d69cf
5 changed files with 586 additions and 5 deletions

View File

@@ -8,7 +8,7 @@ import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
import { StepCoordinates } from '@/components/ocr-pipeline/StepCoordinates'
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
import { PIPELINE_STEPS, type PipelineStep, type SessionListItem } from './types'
@@ -155,7 +155,7 @@ export default function OcrPipelinePage() {
3: 'Spalten',
4: 'Zeilen',
5: 'Woerter',
6: 'Koordinaten',
6: 'LLM-Korrektur',
7: 'Rekonstruktion',
8: 'Validierung',
}
@@ -173,7 +173,7 @@ export default function OcrPipelinePage() {
case 4:
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
case 5:
return <StepCoordinates />
return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
case 6:
return <StepReconstruction />
case 7: