Replace old OCR pipeline with Kombi pipeline + add IPA/syllable toggles
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 41s
CI / test-go-edu-search (push) Successful in 37s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 32s
CI / test-nodejs-website (push) Successful in 33s

Backend:
- _run_ocr_pipeline_for_page() now runs the full Kombi pipeline:
  orientation → deskew → dewarp → content crop → dual-engine OCR
  (RapidOCR + Tesseract merge) → _build_grid_core() with pipe-autocorrect,
  word-gap merge, dictionary detection
- Accepts ipa_mode and syllable_mode query params on process-single-page
- Pipeline sessions are visible in admin OCR Kombi UI for debugging

Frontend (vocab-worksheet):
- New "Anzeigeoptionen" section with IPA and syllable toggles
- Settings are passed to process-single-page as query parameters

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-11 00:43:42 +02:00
parent 2828871e42
commit 3b78baf37f
2 changed files with 235 additions and 137 deletions

View File

@@ -156,6 +156,8 @@ export default function VocabWorksheetPage() {
const [includeSolutions, setIncludeSolutions] = useState(true)
const [lineHeight, setLineHeight] = useState('normal')
const [selectedFormat, setSelectedFormat] = useState<WorksheetFormat>('standard')
const [showIpa, setShowIpa] = useState(false)
const [showSyllables, setShowSyllables] = useState(false)
// Export state
const [worksheetId, setWorksheetId] = useState<string | null>(null)
@@ -431,7 +433,9 @@ export default function VocabWorksheetPage() {
const API_BASE = getApiBase()
try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}`, {
const ipaParam = showIpa ? 'auto' : 'none'
const syllableParam = showSyllables ? 'auto' : 'none'
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?ipa_mode=${ipaParam}&syllable_mode=${syllableParam}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
@@ -1907,6 +1911,27 @@ export default function VocabWorksheetPage() {
)}
</div>
{/* OCR display options */}
<div className={`p-4 rounded-xl border ${isDark ? 'bg-white/5 border-white/10' : 'bg-gray-50 border-gray-200'} space-y-3`}>
<h4 className={`text-sm font-medium ${isDark ? 'text-white/70' : 'text-slate-600'}`}>Anzeigeoptionen</h4>
<div className="flex flex-col gap-2">
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
<input type="checkbox" checked={showIpa} onChange={(e) => setShowIpa(e.target.checked)} className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500" />
<div>
<span>Lautschrift (IPA) anzeigen</span>
<p className={`text-xs ${isDark ? 'text-white/40' : 'text-slate-400'}`}>z.B. achieve [əˈtʃiːv]</p>
</div>
</label>
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
<input type="checkbox" checked={showSyllables} onChange={(e) => setShowSyllables(e.target.checked)} className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500" />
<div>
<span>Silbentrennung anzeigen</span>
<p className={`text-xs ${isDark ? 'text-white/40' : 'text-slate-400'}`}>z.B. Schmet|ter|ling</p>
</div>
</label>
</div>
</div>
<button
onClick={generateWorksheet}
disabled={(selectedFormat === 'standard' && selectedTypes.length === 0) || isGenerating}