Add SmartSpellChecker + refactor vocab-worksheet page.tsx
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 45s
CI / test-go-edu-search (push) Successful in 43s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 36s
CI / test-nodejs-website (push) Successful in 37s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 45s
CI / test-go-edu-search (push) Successful in 43s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 36s
CI / test-nodejs-website (push) Successful in 37s
SmartSpellChecker (klausur-service): - Language-aware OCR post-correction without LLMs - Dual-dictionary heuristic for EN/DE language detection - Context-based a/I disambiguation via bigram lookup - Multi-digit substitution (sch00l→school) - Cross-language guard (don't false-correct DE words in EN column) - Umlaut correction (Schuler→Schüler, uber→über) - Integrated into spell_review_entries_sync() pipeline - 31 tests, 9ms/100 corrections Vocab-worksheet refactoring (studio-v2): - Split 2337-line page.tsx into 14 files - Custom hook useVocabWorksheet.ts (all state + logic) - 9 components in components/ directory - types.ts, constants.ts for shared definitions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -881,10 +881,25 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
|
||||
"""Rule-based OCR correction: spell-checker + structural heuristics.
|
||||
|
||||
Deterministic — never translates, never touches IPA, never hallucinates.
|
||||
Uses SmartSpellChecker for language-aware corrections with context-based
|
||||
disambiguation (a/I), multi-digit substitution, and cross-language guard.
|
||||
"""
|
||||
t0 = time.time()
|
||||
changes: List[Dict] = []
|
||||
all_corrected: List[Dict] = []
|
||||
|
||||
# Use SmartSpellChecker if available, fall back to legacy _spell_fix_field
|
||||
_smart = None
|
||||
try:
|
||||
from smart_spell import SmartSpellChecker
|
||||
_smart = SmartSpellChecker()
|
||||
logger.debug("spell_review: using SmartSpellChecker")
|
||||
except Exception:
|
||||
logger.debug("spell_review: SmartSpellChecker not available, using legacy")
|
||||
|
||||
# Map field names → language codes for SmartSpellChecker
|
||||
_LANG_MAP = {"english": "en", "german": "de", "example": "auto"}
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
e = dict(entry)
|
||||
# Page-ref normalization (always, regardless of review status)
|
||||
@@ -907,9 +922,18 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
|
||||
old_val = (e.get(field_name) or "").strip()
|
||||
if not old_val:
|
||||
continue
|
||||
# example field is mixed-language — try German first (for umlauts)
|
||||
lang = "german" if field_name in ("german", "example") else "english"
|
||||
new_val, was_changed = _spell_fix_field(old_val, field=lang)
|
||||
|
||||
if _smart:
|
||||
# SmartSpellChecker path — language-aware, context-based
|
||||
lang_code = _LANG_MAP.get(field_name, "en")
|
||||
result = _smart.correct_text(old_val, lang=lang_code)
|
||||
new_val = result.corrected
|
||||
was_changed = result.changed
|
||||
else:
|
||||
# Legacy path
|
||||
lang = "german" if field_name in ("german", "example") else "english"
|
||||
new_val, was_changed = _spell_fix_field(old_val, field=lang)
|
||||
|
||||
if was_changed and new_val != old_val:
|
||||
changes.append({
|
||||
"row_index": e.get("row_index", i),
|
||||
@@ -921,12 +945,13 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
|
||||
e["llm_corrected"] = True
|
||||
all_corrected.append(e)
|
||||
duration_ms = int((time.time() - t0) * 1000)
|
||||
model_name = "smart-spell-checker" if _smart else "spell-checker"
|
||||
return {
|
||||
"entries_original": entries,
|
||||
"entries_corrected": all_corrected,
|
||||
"changes": changes,
|
||||
"skipped_count": 0,
|
||||
"model_used": "spell-checker",
|
||||
"model_used": model_name,
|
||||
"duration_ms": duration_ms,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user