Connect frontend to Kaikki dictionary (6.27M words, 24 languages)
Search endpoint now defaults to source=kaikki, searching the vocabulary_kaikki table with 6.27M Wiktionary entries. /filters returns kaikki_total and kaikki_languages count. /vocabulary header shows "6,271,749 Woerter in 24 Sprachen". Manual vocabulary_words (27 entries) still accessible via source=manual. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -41,14 +41,22 @@ router = APIRouter(prefix="/vocabulary", tags=["vocabulary"])
|
|||||||
@router.get("/search")
|
@router.get("/search")
|
||||||
async def api_search_words(
|
async def api_search_words(
|
||||||
q: str = Query("", description="Search query"),
|
q: str = Query("", description="Search query"),
|
||||||
lang: str = Query("en", pattern="^(en|de)$"),
|
lang: str = Query("en"),
|
||||||
limit: int = Query(20, ge=1, le=100),
|
limit: int = Query(20, ge=1, le=100),
|
||||||
offset: int = Query(0, ge=0),
|
offset: int = Query(0, ge=0),
|
||||||
|
source: str = Query("kaikki", description="Source: kaikki (6M words) or manual (27 words)"),
|
||||||
):
|
):
|
||||||
"""Full-text search for vocabulary words."""
|
"""Full-text search for vocabulary words.
|
||||||
|
|
||||||
|
source=kaikki searches the 6.27M Kaikki/Wiktionary dictionary.
|
||||||
|
source=manual searches the manually curated vocabulary_words table.
|
||||||
|
"""
|
||||||
if not q.strip():
|
if not q.strip():
|
||||||
return {"words": [], "query": q, "total": 0}
|
return {"words": [], "query": q, "total": 0}
|
||||||
|
|
||||||
|
if source == "kaikki":
|
||||||
|
return await _search_kaikki(q.strip(), lang, limit, offset)
|
||||||
|
|
||||||
words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
|
words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
|
||||||
return {
|
return {
|
||||||
"words": [w.to_dict() for w in words],
|
"words": [w.to_dict() for w in words],
|
||||||
@@ -57,6 +65,52 @@ async def api_search_words(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _search_kaikki(q: str, lang: str, limit: int, offset: int):
|
||||||
|
"""Search the vocabulary_kaikki table (6.27M Wiktionary entries)."""
|
||||||
|
from vocabulary.db import get_pool
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"""
|
||||||
|
SELECT id, word, lang, pos, ipa, translations, example
|
||||||
|
FROM vocabulary_kaikki
|
||||||
|
WHERE lang = $1 AND lower(word) LIKE $2
|
||||||
|
ORDER BY length(word), lower(word)
|
||||||
|
LIMIT $3 OFFSET $4
|
||||||
|
""",
|
||||||
|
lang, f"{q.lower()}%", limit, offset,
|
||||||
|
)
|
||||||
|
|
||||||
|
words = []
|
||||||
|
for r in rows:
|
||||||
|
tr = r["translations"]
|
||||||
|
if isinstance(tr, str):
|
||||||
|
import json as _json
|
||||||
|
tr = _json.loads(tr)
|
||||||
|
words.append({
|
||||||
|
"id": str(r["id"]),
|
||||||
|
"english": r["word"] if r["lang"] == "en" else "",
|
||||||
|
"german": tr.get("de", {}).get("text", "") if r["lang"] == "en" else r["word"] if r["lang"] == "de" else "",
|
||||||
|
"word": r["word"],
|
||||||
|
"lang": r["lang"],
|
||||||
|
"ipa_en": r["ipa"] if r["lang"] == "en" else "",
|
||||||
|
"ipa_de": r["ipa"] if r["lang"] == "de" else "",
|
||||||
|
"part_of_speech": r["pos"],
|
||||||
|
"syllables_en": [],
|
||||||
|
"syllables_de": [],
|
||||||
|
"example_en": r["example"] if r["lang"] == "en" else "",
|
||||||
|
"example_de": r["example"] if r["lang"] == "de" else "",
|
||||||
|
"image_url": "",
|
||||||
|
"audio_url_en": "",
|
||||||
|
"audio_url_de": "",
|
||||||
|
"difficulty": 0,
|
||||||
|
"tags": [],
|
||||||
|
"translations": tr,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {"words": words, "query": q, "total": len(words), "source": "kaikki"}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/browse")
|
@router.get("/browse")
|
||||||
async def api_browse_words(
|
async def api_browse_words(
|
||||||
pos: str = Query("", description="Part of speech filter"),
|
pos: str = Query("", description="Part of speech filter"),
|
||||||
@@ -92,10 +146,24 @@ async def api_get_filters():
|
|||||||
tags = await get_all_tags()
|
tags = await get_all_tags()
|
||||||
pos_list = await get_all_pos()
|
pos_list = await get_all_pos()
|
||||||
total = await count_words()
|
total = await count_words()
|
||||||
|
# Kaikki stats
|
||||||
|
kaikki_total = 0
|
||||||
|
kaikki_langs = 0
|
||||||
|
try:
|
||||||
|
from vocabulary.db import get_pool
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
kaikki_total = await conn.fetchval("SELECT COUNT(*) FROM vocabulary_kaikki")
|
||||||
|
kaikki_langs = await conn.fetchval("SELECT COUNT(DISTINCT lang) FROM vocabulary_kaikki")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"tags": tags,
|
"tags": tags,
|
||||||
"parts_of_speech": pos_list,
|
"parts_of_speech": pos_list,
|
||||||
"total_words": total,
|
"total_words": total,
|
||||||
|
"kaikki_total": kaikki_total,
|
||||||
|
"kaikki_languages": kaikki_langs,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ export default function VocabularyPage() {
|
|||||||
try {
|
try {
|
||||||
let url: string
|
let url: string
|
||||||
if (query.trim()) {
|
if (query.trim()) {
|
||||||
url = `${getApiBase()}/api/vocabulary/search?q=${encodeURIComponent(query)}&limit=30`
|
url = `${getApiBase()}/api/vocabulary/search?q=${encodeURIComponent(query)}&limit=30&source=kaikki`
|
||||||
} else {
|
} else {
|
||||||
const params = new URLSearchParams({ limit: '30' })
|
const params = new URLSearchParams({ limit: '30' })
|
||||||
if (posFilter) params.set('pos', posFilter)
|
if (posFilter) params.set('pos', posFilter)
|
||||||
@@ -145,7 +145,7 @@ export default function VocabularyPage() {
|
|||||||
<div>
|
<div>
|
||||||
<h1 className={`text-xl font-bold ${isDark ? 'text-white' : 'text-slate-900'}`}>Woerterbuch</h1>
|
<h1 className={`text-xl font-bold ${isDark ? 'text-white' : 'text-slate-900'}`}>Woerterbuch</h1>
|
||||||
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
||||||
{filters.total_words > 0 ? `${filters.total_words.toLocaleString()} Woerter` : 'Woerter suchen und Lernunits erstellen'}
|
{(filters as any).kaikki_total > 0 ? `${((filters as any).kaikki_total as number).toLocaleString()} Woerter in ${(filters as any).kaikki_languages} Sprachen` : filters.total_words > 0 ? `${filters.total_words.toLocaleString()} Woerter` : 'Woerter suchen und Lernunits erstellen'}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user