feat: PaddleOCR Remote-Engine (PP-OCRv5 Latin auf Hetzner x86_64)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 31s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m7s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 21s

PaddleOCR als neue engine=paddle Option in der OCR-Pipeline.
Microservice auf Hetzner (paddleocr-service/), async HTTP-Client
(paddleocr_remote.py), Frontend-Dropdown, automatisch words_first.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-12 09:31:22 +01:00
parent ced5bb3dd3
commit a6069631cc
10 changed files with 354 additions and 27 deletions

View File

@@ -60,7 +60,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps
const [editedEntries, setEditedEntries] = useState<WordEntry[]>([])
const [editedCells, setEditedCells] = useState<GridCell[]>([])
const [mode, setMode] = useState<'overview' | 'labeling'>('overview')
const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid'>('auto')
const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid' | 'paddle'>('auto')
const [usedEngine, setUsedEngine] = useState<string>('')
const [pronunciation, setPronunciation] = useState<'british' | 'american'>('british')
const [gridMethod, setGridMethod] = useState<'v2' | 'words_first'>('v2')
@@ -810,12 +810,13 @@ export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps
{/* OCR Engine selector */}
<select
value={ocrEngine}
onChange={(e) => setOcrEngine(e.target.value as 'auto' | 'tesseract' | 'rapid')}
onChange={(e) => setOcrEngine(e.target.value as 'auto' | 'tesseract' | 'rapid' | 'paddle')}
className="px-2 py-1.5 text-xs border rounded-lg dark:bg-gray-700 dark:border-gray-600"
>
<option value="auto">Auto (RapidOCR wenn verfuegbar)</option>
<option value="rapid">RapidOCR (ONNX)</option>
<option value="tesseract">Tesseract</option>
<option value="paddle">PaddleOCR (Hetzner)</option>
</select>
{/* Pronunciation selector (only for vocab) */}
@@ -843,6 +844,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps
<span className={`px-2 py-0.5 rounded text-[10px] uppercase font-semibold ${
usedEngine === 'rapid'
? 'bg-purple-100 dark:bg-purple-900/30 text-purple-700 dark:text-purple-300'
: usedEngine === 'paddle'
? 'bg-blue-100 dark:bg-blue-900/30 text-blue-700 dark:text-blue-300'
: 'bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}>
{usedEngine}