From cb4ea8e49a30c9ab554148c188eb9bcbcb583ef0 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 28 Apr 2026 17:49:28 +0200 Subject: [PATCH] Connect frontend to Kaikki dictionary (6.27M words, 24 languages) Search endpoint now defaults to source=kaikki, searching the vocabulary_kaikki table with 6.27M Wiktionary entries. /filters returns kaikki_total and kaikki_languages count. /vocabulary header shows "6,271,749 Woerter in 24 Sprachen". Manual vocabulary_words (27 entries) still accessible via source=manual. Co-Authored-By: Claude Opus 4.6 (1M context) --- backend-lehrer/vocabulary/api.py | 72 ++++++++++++++++++++++++++++++- studio-v2/app/vocabulary/page.tsx | 4 +- 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/backend-lehrer/vocabulary/api.py b/backend-lehrer/vocabulary/api.py index 76ef783..d09763f 100644 --- a/backend-lehrer/vocabulary/api.py +++ b/backend-lehrer/vocabulary/api.py @@ -41,14 +41,22 @@ router = APIRouter(prefix="/vocabulary", tags=["vocabulary"]) @router.get("/search") async def api_search_words( q: str = Query("", description="Search query"), - lang: str = Query("en", pattern="^(en|de)$"), + lang: str = Query("en"), limit: int = Query(20, ge=1, le=100), offset: int = Query(0, ge=0), + source: str = Query("kaikki", description="Source: kaikki (6M words) or manual (27 words)"), ): - """Full-text search for vocabulary words.""" + """Full-text search for vocabulary words. + + source=kaikki searches the 6.27M Kaikki/Wiktionary dictionary. + source=manual searches the manually curated vocabulary_words table. + """ if not q.strip(): return {"words": [], "query": q, "total": 0} + if source == "kaikki": + return await _search_kaikki(q.strip(), lang, limit, offset) + words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset) return { "words": [w.to_dict() for w in words], @@ -57,6 +65,52 @@ async def api_search_words( } +async def _search_kaikki(q: str, lang: str, limit: int, offset: int): + """Search the vocabulary_kaikki table (6.27M Wiktionary entries).""" + from vocabulary.db import get_pool + pool = await get_pool() + async with pool.acquire() as conn: + rows = await conn.fetch( + """ + SELECT id, word, lang, pos, ipa, translations, example + FROM vocabulary_kaikki + WHERE lang = $1 AND lower(word) LIKE $2 + ORDER BY length(word), lower(word) + LIMIT $3 OFFSET $4 + """, + lang, f"{q.lower()}%", limit, offset, + ) + + words = [] + for r in rows: + tr = r["translations"] + if isinstance(tr, str): + import json as _json + tr = _json.loads(tr) + words.append({ + "id": str(r["id"]), + "english": r["word"] if r["lang"] == "en" else "", + "german": tr.get("de", {}).get("text", "") if r["lang"] == "en" else r["word"] if r["lang"] == "de" else "", + "word": r["word"], + "lang": r["lang"], + "ipa_en": r["ipa"] if r["lang"] == "en" else "", + "ipa_de": r["ipa"] if r["lang"] == "de" else "", + "part_of_speech": r["pos"], + "syllables_en": [], + "syllables_de": [], + "example_en": r["example"] if r["lang"] == "en" else "", + "example_de": r["example"] if r["lang"] == "de" else "", + "image_url": "", + "audio_url_en": "", + "audio_url_de": "", + "difficulty": 0, + "tags": [], + "translations": tr, + }) + + return {"words": words, "query": q, "total": len(words), "source": "kaikki"} + + @router.get("/browse") async def api_browse_words( pos: str = Query("", description="Part of speech filter"), @@ -92,10 +146,24 @@ async def api_get_filters(): tags = await get_all_tags() pos_list = await get_all_pos() total = await count_words() + # Kaikki stats + kaikki_total = 0 + kaikki_langs = 0 + try: + from vocabulary.db import get_pool + pool = await get_pool() + async with pool.acquire() as conn: + kaikki_total = await conn.fetchval("SELECT COUNT(*) FROM vocabulary_kaikki") + kaikki_langs = await conn.fetchval("SELECT COUNT(DISTINCT lang) FROM vocabulary_kaikki") + except Exception: + pass + return { "tags": tags, "parts_of_speech": pos_list, "total_words": total, + "kaikki_total": kaikki_total, + "kaikki_languages": kaikki_langs, } diff --git a/studio-v2/app/vocabulary/page.tsx b/studio-v2/app/vocabulary/page.tsx index 36468d9..c12d684 100644 --- a/studio-v2/app/vocabulary/page.tsx +++ b/studio-v2/app/vocabulary/page.tsx @@ -71,7 +71,7 @@ export default function VocabularyPage() { try { let url: string if (query.trim()) { - url = `${getApiBase()}/api/vocabulary/search?q=${encodeURIComponent(query)}&limit=30` + url = `${getApiBase()}/api/vocabulary/search?q=${encodeURIComponent(query)}&limit=30&source=kaikki` } else { const params = new URLSearchParams({ limit: '30' }) if (posFilter) params.set('pos', posFilter) @@ -145,7 +145,7 @@ export default function VocabularyPage() {

Woerterbuch

- {filters.total_words > 0 ? `${filters.total_words.toLocaleString()} Woerter` : 'Woerter suchen und Lernunits erstellen'} + {(filters as any).kaikki_total > 0 ? `${((filters as any).kaikki_total as number).toLocaleString()} Woerter in ${(filters as any).kaikki_languages} Sprachen` : filters.total_words > 0 ? `${filters.total_words.toLocaleString()} Woerter` : 'Woerter suchen und Lernunits erstellen'}