Fix: enrich non-EN Kaikki search results with translations from EN hub
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 33s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 2m33s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 24s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 33s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 2m33s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 24s
When searching for DE/FR/ES/etc. words, the Kaikki entries have empty translations. Now does a reverse lookup to find the EN entry and copies its 24-language translations. This ensures wordInNative() works for all languages, not just the original 7. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -82,13 +82,38 @@ async def _search_kaikki(q: str, lang: str, limit: int, offset: int):
|
||||
if isinstance(tr, str):
|
||||
import json as _json
|
||||
tr = _json.loads(tr)
|
||||
|
||||
en_word = ""
|
||||
en_ipa = ""
|
||||
|
||||
if r["lang"] == "en":
|
||||
en_word = r["word"]
|
||||
en_ipa = r["ipa"] or ""
|
||||
else:
|
||||
# Non-EN entries have empty translations — enrich from EN via reverse lookup
|
||||
if not tr or len(tr) < 3:
|
||||
async with pool.acquire() as conn2:
|
||||
en_row = await conn2.fetchrow(
|
||||
"""SELECT word, ipa, translations FROM vocabulary_kaikki
|
||||
WHERE lang = 'en' AND translations->'%s'->>'text' ILIKE $1
|
||||
ORDER BY length(word) LIMIT 1""" % lang,
|
||||
r["word"],
|
||||
)
|
||||
if en_row:
|
||||
en_word = en_row["word"]
|
||||
en_ipa = en_row["ipa"] or ""
|
||||
en_tr = en_row["translations"]
|
||||
if isinstance(en_tr, str):
|
||||
en_tr = _json.loads(en_tr)
|
||||
tr = en_tr
|
||||
|
||||
words.append({
|
||||
"id": str(r["id"]),
|
||||
"english": r["word"] if r["lang"] == "en" else "",
|
||||
"german": tr.get("de", {}).get("text", "") if r["lang"] == "en" else r["word"] if r["lang"] == "de" else "",
|
||||
"english": en_word if r["lang"] != "en" else r["word"],
|
||||
"german": tr.get("de", {}).get("text", "") if r["lang"] != "de" else r["word"],
|
||||
"word": r["word"],
|
||||
"lang": r["lang"],
|
||||
"ipa_en": r["ipa"] if r["lang"] == "en" else "",
|
||||
"ipa_en": en_ipa if r["lang"] != "en" else (r["ipa"] or ""),
|
||||
"ipa_de": r["ipa"] if r["lang"] == "de" else "",
|
||||
"part_of_speech": r["pos"],
|
||||
"syllables_en": [],
|
||||
|
||||
Reference in New Issue
Block a user