diff --git a/klausur-service/backend/cv_words_first.py b/klausur-service/backend/cv_words_first.py index b62b547..723f62c 100644 --- a/klausur-service/backend/cv_words_first.py +++ b/klausur-service/backend/cv_words_first.py @@ -191,8 +191,16 @@ def _build_cells( # but the overlay slide mechanism expects one box per word. Split multi-word # boxes into individual word positions proportional to character length. # Also split at "[" boundaries (IPA patterns like "badge[bxd3]"). + # + # Sort in reading order: group by Y (same visual line), then sort by X. + # Simple (top, left) sort fails when words on the same line have slightly + # different top values (1-6px), causing wrong word order. + y_tol_wb = max(10, int(bh * 0.4)) + reading_lines = _group_words_into_lines(cell_words, y_tolerance_px=y_tol_wb) + ordered_cell_words = [w for line in reading_lines for w in line] + word_boxes = [] - for w in sorted(cell_words, key=lambda ww: (ww['top'], ww['left'])): + for w in ordered_cell_words: raw_text = w.get('text', '').strip() # Split by whitespace, at "[" boundaries (IPA), and after leading "!" # e.g. "badge[bxd3]" → ["badge", "[bxd3]"]