Fix: enrich non-EN Kaikki search results with translations from EN hub
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 33s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 2m33s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 24s

When searching for DE/FR/ES/etc. words, the Kaikki entries have empty
translations. Now does a reverse lookup to find the EN entry and copies
its 24-language translations. This ensures wordInNative() works for
all languages, not just the original 7.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-29 18:40:29 +02:00
parent 8d53b1f6b9
commit a1488b2fec

View File

@@ -82,13 +82,38 @@ async def _search_kaikki(q: str, lang: str, limit: int, offset: int):
if isinstance(tr, str):
import json as _json
tr = _json.loads(tr)
en_word = ""
en_ipa = ""
if r["lang"] == "en":
en_word = r["word"]
en_ipa = r["ipa"] or ""
else:
# Non-EN entries have empty translations — enrich from EN via reverse lookup
if not tr or len(tr) < 3:
async with pool.acquire() as conn2:
en_row = await conn2.fetchrow(
"""SELECT word, ipa, translations FROM vocabulary_kaikki
WHERE lang = 'en' AND translations->'%s'->>'text' ILIKE $1
ORDER BY length(word) LIMIT 1""" % lang,
r["word"],
)
if en_row:
en_word = en_row["word"]
en_ipa = en_row["ipa"] or ""
en_tr = en_row["translations"]
if isinstance(en_tr, str):
en_tr = _json.loads(en_tr)
tr = en_tr
words.append({
"id": str(r["id"]),
"english": r["word"] if r["lang"] == "en" else "",
"german": tr.get("de", {}).get("text", "") if r["lang"] == "en" else r["word"] if r["lang"] == "de" else "",
"english": en_word if r["lang"] != "en" else r["word"],
"german": tr.get("de", {}).get("text", "") if r["lang"] != "de" else r["word"],
"word": r["word"],
"lang": r["lang"],
"ipa_en": r["ipa"] if r["lang"] == "en" else "",
"ipa_en": en_ipa if r["lang"] != "en" else (r["ipa"] or ""),
"ipa_de": r["ipa"] if r["lang"] == "de" else "",
"part_of_speech": r["pos"],
"syllables_en": [],