diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 966f47e..27f7e57 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -5250,13 +5250,14 @@ def build_cell_grid_v2( if empty_rows_removed > 0: logger.info(f"build_cell_grid_v2: removed {empty_rows_removed} all-empty rows") - # --- Page-level bold detection: compare stroke widths across all cells --- - _classify_bold_cells(cells, ocr_img, img_w, img_h) - bold_count = sum(1 for c in cells if c.get('is_bold')) + # Bold detection disabled: cell-level stroke-width analysis cannot + # distinguish bold from non-bold when cells contain mixed formatting + # (e.g. "cookie ['kuki]" — bold word + non-bold phonetics). + # TODO: word-level bold detection would require per-word bounding boxes. logger.info(f"build_cell_grid_v2: {len(cells)} cells from " f"{len(content_rows)} rows × {len(relevant_cols)} columns, " - f"engine={engine_name} (hybrid), {bold_count} bold") + f"engine={engine_name} (hybrid)") return cells, columns_meta @@ -7132,11 +7133,13 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict: if not _entry_needs_review(e): all_corrected.append(e) continue - for field_name in ("english", "german"): + for field_name in ("english", "german", "example"): old_val = (e.get(field_name) or "").strip() if not old_val: continue - new_val, was_changed = _spell_fix_field(old_val, field=field_name) + # example field is mixed-language — try German first (for umlauts) + lang = "german" if field_name in ("german", "example") else "english" + new_val, was_changed = _spell_fix_field(old_val, field=lang) if was_changed and new_val != old_val: changes.append({ "row_index": e.get("row_index", i),