Connect frontend to Kaikki dictionary (6.27M words, 24 languages)

Search endpoint now defaults to source=kaikki, searching the
vocabulary_kaikki table with 6.27M Wiktionary entries.

/filters returns kaikki_total and kaikki_languages count.
/vocabulary header shows "6,271,749 Woerter in 24 Sprachen".

Manual vocabulary_words (27 entries) still accessible via source=manual.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-28 17:49:28 +02:00
parent d14826b199
commit cb4ea8e49a
2 changed files with 72 additions and 4 deletions

View File

@@ -41,14 +41,22 @@ router = APIRouter(prefix="/vocabulary", tags=["vocabulary"])
@router.get("/search") @router.get("/search")
async def api_search_words( async def api_search_words(
q: str = Query("", description="Search query"), q: str = Query("", description="Search query"),
lang: str = Query("en", pattern="^(en|de)$"), lang: str = Query("en"),
limit: int = Query(20, ge=1, le=100), limit: int = Query(20, ge=1, le=100),
offset: int = Query(0, ge=0), offset: int = Query(0, ge=0),
source: str = Query("kaikki", description="Source: kaikki (6M words) or manual (27 words)"),
): ):
"""Full-text search for vocabulary words.""" """Full-text search for vocabulary words.
source=kaikki searches the 6.27M Kaikki/Wiktionary dictionary.
source=manual searches the manually curated vocabulary_words table.
"""
if not q.strip(): if not q.strip():
return {"words": [], "query": q, "total": 0} return {"words": [], "query": q, "total": 0}
if source == "kaikki":
return await _search_kaikki(q.strip(), lang, limit, offset)
words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset) words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
return { return {
"words": [w.to_dict() for w in words], "words": [w.to_dict() for w in words],
@@ -57,6 +65,52 @@ async def api_search_words(
} }
async def _search_kaikki(q: str, lang: str, limit: int, offset: int):
"""Search the vocabulary_kaikki table (6.27M Wiktionary entries)."""
from vocabulary.db import get_pool
pool = await get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, word, lang, pos, ipa, translations, example
FROM vocabulary_kaikki
WHERE lang = $1 AND lower(word) LIKE $2
ORDER BY length(word), lower(word)
LIMIT $3 OFFSET $4
""",
lang, f"{q.lower()}%", limit, offset,
)
words = []
for r in rows:
tr = r["translations"]
if isinstance(tr, str):
import json as _json
tr = _json.loads(tr)
words.append({
"id": str(r["id"]),
"english": r["word"] if r["lang"] == "en" else "",
"german": tr.get("de", {}).get("text", "") if r["lang"] == "en" else r["word"] if r["lang"] == "de" else "",
"word": r["word"],
"lang": r["lang"],
"ipa_en": r["ipa"] if r["lang"] == "en" else "",
"ipa_de": r["ipa"] if r["lang"] == "de" else "",
"part_of_speech": r["pos"],
"syllables_en": [],
"syllables_de": [],
"example_en": r["example"] if r["lang"] == "en" else "",
"example_de": r["example"] if r["lang"] == "de" else "",
"image_url": "",
"audio_url_en": "",
"audio_url_de": "",
"difficulty": 0,
"tags": [],
"translations": tr,
})
return {"words": words, "query": q, "total": len(words), "source": "kaikki"}
@router.get("/browse") @router.get("/browse")
async def api_browse_words( async def api_browse_words(
pos: str = Query("", description="Part of speech filter"), pos: str = Query("", description="Part of speech filter"),
@@ -92,10 +146,24 @@ async def api_get_filters():
tags = await get_all_tags() tags = await get_all_tags()
pos_list = await get_all_pos() pos_list = await get_all_pos()
total = await count_words() total = await count_words()
# Kaikki stats
kaikki_total = 0
kaikki_langs = 0
try:
from vocabulary.db import get_pool
pool = await get_pool()
async with pool.acquire() as conn:
kaikki_total = await conn.fetchval("SELECT COUNT(*) FROM vocabulary_kaikki")
kaikki_langs = await conn.fetchval("SELECT COUNT(DISTINCT lang) FROM vocabulary_kaikki")
except Exception:
pass
return { return {
"tags": tags, "tags": tags,
"parts_of_speech": pos_list, "parts_of_speech": pos_list,
"total_words": total, "total_words": total,
"kaikki_total": kaikki_total,
"kaikki_languages": kaikki_langs,
} }

View File

@@ -71,7 +71,7 @@ export default function VocabularyPage() {
try { try {
let url: string let url: string
if (query.trim()) { if (query.trim()) {
url = `${getApiBase()}/api/vocabulary/search?q=${encodeURIComponent(query)}&limit=30` url = `${getApiBase()}/api/vocabulary/search?q=${encodeURIComponent(query)}&limit=30&source=kaikki`
} else { } else {
const params = new URLSearchParams({ limit: '30' }) const params = new URLSearchParams({ limit: '30' })
if (posFilter) params.set('pos', posFilter) if (posFilter) params.set('pos', posFilter)
@@ -145,7 +145,7 @@ export default function VocabularyPage() {
<div> <div>
<h1 className={`text-xl font-bold ${isDark ? 'text-white' : 'text-slate-900'}`}>Woerterbuch</h1> <h1 className={`text-xl font-bold ${isDark ? 'text-white' : 'text-slate-900'}`}>Woerterbuch</h1>
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}> <p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
{filters.total_words > 0 ? `${filters.total_words.toLocaleString()} Woerter` : 'Woerter suchen und Lernunits erstellen'} {(filters as any).kaikki_total > 0 ? `${((filters as any).kaikki_total as number).toLocaleString()} Woerter in ${(filters as any).kaikki_languages} Sprachen` : filters.total_words > 0 ? `${filters.total_words.toLocaleString()} Woerter` : 'Woerter suchen und Lernunits erstellen'}
</p> </p>
</div> </div>
</div> </div>