fix: correct example field (fixes iberqueren), disable cell-level bold
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m50s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 17s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m50s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 17s
- Add "example" to spell correction loop — was only correcting "english" and "german" fields, missing umlauts in example sentences - Use "german" language for example field (mixed-language, umlauts needed) - Disable cell-level bold detection — cannot distinguish bold from non-bold in mixed-format cells (e.g. "cookie ['kuki]") - Keep _measure_stroke_width and _classify_bold_cells for future word-level bold detection Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5250,13 +5250,14 @@ def build_cell_grid_v2(
|
||||
if empty_rows_removed > 0:
|
||||
logger.info(f"build_cell_grid_v2: removed {empty_rows_removed} all-empty rows")
|
||||
|
||||
# --- Page-level bold detection: compare stroke widths across all cells ---
|
||||
_classify_bold_cells(cells, ocr_img, img_w, img_h)
|
||||
bold_count = sum(1 for c in cells if c.get('is_bold'))
|
||||
# Bold detection disabled: cell-level stroke-width analysis cannot
|
||||
# distinguish bold from non-bold when cells contain mixed formatting
|
||||
# (e.g. "cookie ['kuki]" — bold word + non-bold phonetics).
|
||||
# TODO: word-level bold detection would require per-word bounding boxes.
|
||||
|
||||
logger.info(f"build_cell_grid_v2: {len(cells)} cells from "
|
||||
f"{len(content_rows)} rows × {len(relevant_cols)} columns, "
|
||||
f"engine={engine_name} (hybrid), {bold_count} bold")
|
||||
f"engine={engine_name} (hybrid)")
|
||||
|
||||
return cells, columns_meta
|
||||
|
||||
@@ -7132,11 +7133,13 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
|
||||
if not _entry_needs_review(e):
|
||||
all_corrected.append(e)
|
||||
continue
|
||||
for field_name in ("english", "german"):
|
||||
for field_name in ("english", "german", "example"):
|
||||
old_val = (e.get(field_name) or "").strip()
|
||||
if not old_val:
|
||||
continue
|
||||
new_val, was_changed = _spell_fix_field(old_val, field=field_name)
|
||||
# example field is mixed-language — try German first (for umlauts)
|
||||
lang = "german" if field_name in ("german", "example") else "english"
|
||||
new_val, was_changed = _spell_fix_field(old_val, field=lang)
|
||||
if was_changed and new_val != old_val:
|
||||
changes.append({
|
||||
"row_index": e.get("row_index", i),
|
||||
|
||||
Reference in New Issue
Block a user