Connect frontend to Kaikki dictionary (6.27M words, 24 languages)
Search endpoint now defaults to source=kaikki, searching the vocabulary_kaikki table with 6.27M Wiktionary entries. /filters returns kaikki_total and kaikki_languages count. /vocabulary header shows "6,271,749 Woerter in 24 Sprachen". Manual vocabulary_words (27 entries) still accessible via source=manual. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -41,14 +41,22 @@ router = APIRouter(prefix="/vocabulary", tags=["vocabulary"])
|
||||
@router.get("/search")
|
||||
async def api_search_words(
|
||||
q: str = Query("", description="Search query"),
|
||||
lang: str = Query("en", pattern="^(en|de)$"),
|
||||
lang: str = Query("en"),
|
||||
limit: int = Query(20, ge=1, le=100),
|
||||
offset: int = Query(0, ge=0),
|
||||
source: str = Query("kaikki", description="Source: kaikki (6M words) or manual (27 words)"),
|
||||
):
|
||||
"""Full-text search for vocabulary words."""
|
||||
"""Full-text search for vocabulary words.
|
||||
|
||||
source=kaikki searches the 6.27M Kaikki/Wiktionary dictionary.
|
||||
source=manual searches the manually curated vocabulary_words table.
|
||||
"""
|
||||
if not q.strip():
|
||||
return {"words": [], "query": q, "total": 0}
|
||||
|
||||
if source == "kaikki":
|
||||
return await _search_kaikki(q.strip(), lang, limit, offset)
|
||||
|
||||
words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
|
||||
return {
|
||||
"words": [w.to_dict() for w in words],
|
||||
@@ -57,6 +65,52 @@ async def api_search_words(
|
||||
}
|
||||
|
||||
|
||||
async def _search_kaikki(q: str, lang: str, limit: int, offset: int):
|
||||
"""Search the vocabulary_kaikki table (6.27M Wiktionary entries)."""
|
||||
from vocabulary.db import get_pool
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id, word, lang, pos, ipa, translations, example
|
||||
FROM vocabulary_kaikki
|
||||
WHERE lang = $1 AND lower(word) LIKE $2
|
||||
ORDER BY length(word), lower(word)
|
||||
LIMIT $3 OFFSET $4
|
||||
""",
|
||||
lang, f"{q.lower()}%", limit, offset,
|
||||
)
|
||||
|
||||
words = []
|
||||
for r in rows:
|
||||
tr = r["translations"]
|
||||
if isinstance(tr, str):
|
||||
import json as _json
|
||||
tr = _json.loads(tr)
|
||||
words.append({
|
||||
"id": str(r["id"]),
|
||||
"english": r["word"] if r["lang"] == "en" else "",
|
||||
"german": tr.get("de", {}).get("text", "") if r["lang"] == "en" else r["word"] if r["lang"] == "de" else "",
|
||||
"word": r["word"],
|
||||
"lang": r["lang"],
|
||||
"ipa_en": r["ipa"] if r["lang"] == "en" else "",
|
||||
"ipa_de": r["ipa"] if r["lang"] == "de" else "",
|
||||
"part_of_speech": r["pos"],
|
||||
"syllables_en": [],
|
||||
"syllables_de": [],
|
||||
"example_en": r["example"] if r["lang"] == "en" else "",
|
||||
"example_de": r["example"] if r["lang"] == "de" else "",
|
||||
"image_url": "",
|
||||
"audio_url_en": "",
|
||||
"audio_url_de": "",
|
||||
"difficulty": 0,
|
||||
"tags": [],
|
||||
"translations": tr,
|
||||
})
|
||||
|
||||
return {"words": words, "query": q, "total": len(words), "source": "kaikki"}
|
||||
|
||||
|
||||
@router.get("/browse")
|
||||
async def api_browse_words(
|
||||
pos: str = Query("", description="Part of speech filter"),
|
||||
@@ -92,10 +146,24 @@ async def api_get_filters():
|
||||
tags = await get_all_tags()
|
||||
pos_list = await get_all_pos()
|
||||
total = await count_words()
|
||||
# Kaikki stats
|
||||
kaikki_total = 0
|
||||
kaikki_langs = 0
|
||||
try:
|
||||
from vocabulary.db import get_pool
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
kaikki_total = await conn.fetchval("SELECT COUNT(*) FROM vocabulary_kaikki")
|
||||
kaikki_langs = await conn.fetchval("SELECT COUNT(DISTINCT lang) FROM vocabulary_kaikki")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"tags": tags,
|
||||
"parts_of_speech": pos_list,
|
||||
"total_words": total,
|
||||
"kaikki_total": kaikki_total,
|
||||
"kaikki_languages": kaikki_langs,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user