Connect frontend to Kaikki dictionary (6.27M words, 24 languages)

Search endpoint now defaults to source=kaikki, searching the vocabulary_kaikki table with 6.27M Wiktionary entries. /filters returns kaikki_total and kaikki_languages count. /vocabulary header shows "6,271,749 Woerter in 24 Sprachen". Manual vocabulary_words (27 entries) still accessible via source=manual. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-28 17:49:28 +02:00
parent d14826b199
commit cb4ea8e49a
2 changed files with 72 additions and 4 deletions
--- a/backend-lehrer/vocabulary/api.py
+++ b/backend-lehrer/vocabulary/api.py
@@ -41,14 +41,22 @@ router = APIRouter(prefix="/vocabulary", tags=["vocabulary"])
@router.get("/search")
 async def api_search_words(
    q: str = Query("", description="Search query"),
-    lang: str = Query("en", pattern="^(en|de)$"),
+    lang: str = Query("en"),
    limit: int = Query(20, ge=1, le=100),
    offset: int = Query(0, ge=0),
+    source: str = Query("kaikki", description="Source: kaikki (6M words) or manual (27 words)"),
 ):
-    """Full-text search for vocabulary words."""
+    """Full-text search for vocabulary words.
+
+    source=kaikki searches the 6.27M Kaikki/Wiktionary dictionary.
+    source=manual searches the manually curated vocabulary_words table.
+    """
    if not q.strip():
        return {"words": [], "query": q, "total": 0}

+    if source == "kaikki":
+        return await _search_kaikki(q.strip(), lang, limit, offset)
+
    words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
    return {
        "words": [w.to_dict() for w in words],
@@ -57,6 +65,52 @@ async def api_search_words(
    }


+async def _search_kaikki(q: str, lang: str, limit: int, offset: int):
+    """Search the vocabulary_kaikki table (6.27M Wiktionary entries)."""
+    from vocabulary.db import get_pool
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """
+            SELECT id, word, lang, pos, ipa, translations, example
+            FROM vocabulary_kaikki
+            WHERE lang = $1 AND lower(word) LIKE $2
+            ORDER BY length(word), lower(word)
+            LIMIT $3 OFFSET $4
+            """,
+            lang, f"{q.lower()}%", limit, offset,
+        )
+
+    words = []
+    for r in rows:
+        tr = r["translations"]
+        if isinstance(tr, str):
+            import json as _json
+            tr = _json.loads(tr)
+        words.append({
+            "id": str(r["id"]),
+            "english": r["word"] if r["lang"] == "en" else "",
+            "german": tr.get("de", {}).get("text", "") if r["lang"] == "en" else r["word"] if r["lang"] == "de" else "",
+            "word": r["word"],
+            "lang": r["lang"],
+            "ipa_en": r["ipa"] if r["lang"] == "en" else "",
+            "ipa_de": r["ipa"] if r["lang"] == "de" else "",
+            "part_of_speech": r["pos"],
+            "syllables_en": [],
+            "syllables_de": [],
+            "example_en": r["example"] if r["lang"] == "en" else "",
+            "example_de": r["example"] if r["lang"] == "de" else "",
+            "image_url": "",
+            "audio_url_en": "",
+            "audio_url_de": "",
+            "difficulty": 0,
+            "tags": [],
+            "translations": tr,
+        })
+
+    return {"words": words, "query": q, "total": len(words), "source": "kaikki"}
+
+
@router.get("/browse")
 async def api_browse_words(
    pos: str = Query("", description="Part of speech filter"),
@@ -92,10 +146,24 @@ async def api_get_filters():
    tags = await get_all_tags()
    pos_list = await get_all_pos()
    total = await count_words()
+    # Kaikki stats
+    kaikki_total = 0
+    kaikki_langs = 0
+    try:
+        from vocabulary.db import get_pool
+        pool = await get_pool()
+        async with pool.acquire() as conn:
+            kaikki_total = await conn.fetchval("SELECT COUNT(*) FROM vocabulary_kaikki")
+            kaikki_langs = await conn.fetchval("SELECT COUNT(DISTINCT lang) FROM vocabulary_kaikki")
+    except Exception:
+        pass
+
    return {
        "tags": tags,
        "parts_of_speech": pos_list,
        "total_words": total,
+        "kaikki_total": kaikki_total,
+        "kaikki_languages": kaikki_langs,
    }