diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 5acaf1c..72b22a7 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -2264,10 +2264,10 @@ def build_word_grid( ) cell_lang = lang_map.get(col.type, lang) - words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=7) + words = ocr_region(ocr_img, cell_region, lang=cell_lang, psm=6) - # Sort words by x position, join to text - words.sort(key=lambda w: w['left']) + # Sort words by Y then X (reading order for multi-line cells) + words.sort(key=lambda w: (w['top'], w['left'])) text = ' '.join(w['text'] for w in words) if words: avg_conf = sum(w['conf'] for w in words) / len(words)