Fix gutter repair: detect short fragments + show spell alternatives
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 48s
CI / test-go-edu-search (push) Successful in 49s
CI / test-python-klausur (push) Failing after 2m37s
CI / test-python-agent-core (push) Successful in 35s
CI / test-nodejs-website (push) Successful in 35s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 48s
CI / test-go-edu-search (push) Successful in 49s
CI / test-python-klausur (push) Failing after 2m37s
CI / test-python-agent-core (push) Successful in 35s
CI / test-nodejs-website (push) Successful in 35s
- Lower min word length from 3→2 for hyphen-join candidates so fragments like "ve" (from "ver-künden") are no longer skipped - Return all spellchecker candidates instead of just top-1, so user can pick the correct form (e.g. "stammeln" vs "stammelt") - Frontend shows clickable alternative buttons for spell_fix suggestions - Backend accepts text_overrides in apply endpoint for user-selected alternatives Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,8 @@ from cv_gutter_repair import (
|
||||
_try_spell_fix,
|
||||
_edit_distance,
|
||||
_word_is_at_gutter_edge,
|
||||
_MIN_WORD_LEN_SPELL,
|
||||
_MIN_WORD_LEN_HYPHEN,
|
||||
analyse_grid_for_gutter_repair,
|
||||
apply_gutter_suggestions,
|
||||
)
|
||||
@@ -127,27 +129,33 @@ class TestTryHyphenJoin:
|
||||
|
||||
@needs_spellchecker
|
||||
class TestTrySpellFix:
|
||||
def test_fix_garbled_ending(self):
|
||||
# "stammeli" should suggest "stammeln"
|
||||
def test_fix_garbled_ending_returns_alternatives(self):
|
||||
# "stammeli" should return a correction with alternatives
|
||||
result = _try_spell_fix("stammeli", col_type="column_de")
|
||||
assert result is not None
|
||||
corrected, conf = result
|
||||
assert corrected == "stammeln"
|
||||
corrected, conf, alts = result
|
||||
# The best correction is one of the valid forms
|
||||
all_options = [corrected] + alts
|
||||
all_lower = [w.lower() for w in all_options]
|
||||
# "stammeln" must be among the candidates
|
||||
assert "stammeln" in all_lower, f"Expected 'stammeln' in {all_options}"
|
||||
|
||||
def test_known_word_not_fixed(self):
|
||||
# "Haus" is correct — no fix needed
|
||||
result = _try_spell_fix("Haus", col_type="column_de")
|
||||
# Should be None since the word is correct
|
||||
# (unless spellchecker suggests something else)
|
||||
# Either None or same word is acceptable
|
||||
if result is not None:
|
||||
corrected, _ = result
|
||||
corrected, _, _ = result
|
||||
assert corrected.lower() == "haus"
|
||||
|
||||
def test_short_word_skipped(self):
|
||||
result = _try_spell_fix("ab")
|
||||
assert result is None
|
||||
|
||||
def test_min_word_len_thresholds(self):
|
||||
assert _MIN_WORD_LEN_HYPHEN == 2
|
||||
assert _MIN_WORD_LEN_SPELL == 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Grid analysis tests
|
||||
|
||||
Reference in New Issue
Block a user