From 9b0e31097893ea489fc9485634ea488b1bf6452a Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 17 Apr 2026 00:46:15 +0200 Subject: [PATCH] Fix: reprocess button works after session resume + apply merge logic Two bugs fixed: 1. reprocessPages() failed silently after session resume because successfulPages was empty. Now derives pages from vocabulary source_page or selectedPages as fallback. 2. process-single-page endpoint built vocabulary entries WITHOUT applying merge logic (_merge_wrapped_rows, _merge_continuation_rows). Now applies full merge pipeline after vocabulary extraction. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/vocab_worksheet_api.py | 41 +++++++++++++++++++ .../app/vocab-worksheet/useVocabWorksheet.ts | 21 +++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/klausur-service/backend/vocab_worksheet_api.py b/klausur-service/backend/vocab_worksheet_api.py index 43d2a45..91e7307 100644 --- a/klausur-service/backend/vocab_worksheet_api.py +++ b/klausur-service/backend/vocab_worksheet_api.py @@ -77,6 +77,11 @@ try: render_pdf_high_res, PageRegion, RowGeometry, ) + from cv_cell_grid import ( + _merge_wrapped_rows, + _merge_phonetic_continuation_rows, + _merge_continuation_rows, + ) from ocr_pipeline_session_store import ( create_session_db as create_pipeline_session_db, update_session_db as update_pipeline_session_db, @@ -1696,6 +1701,42 @@ async def _run_ocr_pipeline_for_page( }) extraction_source = f"generic ({len(all_ci)} cols)" + # --- Post-processing: merge cell-wrap continuation rows --- + if len(page_vocabulary) >= 2: + try: + # Convert to internal format (example_sentence → example) + internal = [] + for v in page_vocabulary: + internal.append({ + 'row_index': len(internal), + 'english': v.get('english', ''), + 'german': v.get('german', ''), + 'example': v.get('example_sentence', ''), + }) + + n_before = len(internal) + internal = _merge_wrapped_rows(internal) + internal = _merge_phonetic_continuation_rows(internal) + internal = _merge_continuation_rows(internal) + + if len(internal) < n_before: + # Rebuild page_vocabulary from merged entries + merged_vocab = [] + for entry in internal: + if not entry.get('english') and not entry.get('german'): + continue + merged_vocab.append({ + 'id': str(uuid.uuid4()), + 'english': entry.get('english', ''), + 'german': entry.get('german', ''), + 'example_sentence': entry.get('example', ''), + 'source_page': page_number + 1, + }) + logger.info(f" row merging: {n_before} → {len(merged_vocab)} entries") + page_vocabulary = merged_vocab + except Exception as e: + logger.warning(f" row merging failed (non-critical): {e}") + logger.info(f" vocab extraction: {len(page_vocabulary)} entries via {extraction_source}") total_duration = _time.time() - t_total diff --git a/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts b/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts index da95877..72a2ee3 100644 --- a/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts +++ b/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts @@ -760,11 +760,28 @@ export function useVocabWorksheet(): VocabWorksheetHook { // Reprocess all successful pages with new IPA/syllable modes const reprocessPages = (ipa: IpaMode, syllable: SyllableMode) => { - if (!session || successfulPages.length === 0) return + if (!session) return + + // Determine pages to reprocess: use successfulPages if available, + // otherwise derive from vocabulary source_page or selectedPages + let pagesToReprocess: number[] + if (successfulPages.length > 0) { + pagesToReprocess = successfulPages.map(p => p - 1) + } else if (vocabulary.length > 0) { + // Derive from vocabulary entries' source_page (1-indexed → 0-indexed) + const pageSet = new Set(vocabulary.map(v => (v.source_page || 1) - 1)) + pagesToReprocess = [...pageSet].sort((a, b) => a - b) + } else if (selectedPages.length > 0) { + pagesToReprocess = [...selectedPages] + } else { + // Fallback: try page 0 + pagesToReprocess = [0] + } + + if (pagesToReprocess.length === 0) return setIsExtracting(true) setExtractionStatus('Verarbeite mit neuen Einstellungen...') - const pagesToReprocess = successfulPages.map(p => p - 1) const API_BASE = getApiBase() ;(async () => {