Add Vocabulary Learning Platform (Phase 1: DB + API + Editor)

Strategic pivot: Studio-v2 becomes a language learning platform. Compliance guardrail added to CLAUDE.md — no scan/OCR of third-party content in customer frontend. Upload of OWN materials remains allowed. Phase 1.1 — vocabulary_db.py: PostgreSQL model for 160k+ words with english, german, IPA, syllables, examples, images, audio, difficulty, tags, translations (multilingual). Trigram search index. Phase 1.2 — vocabulary_api.py: Search, browse, filters, bulk import, learning unit creation from word selection. Creates QA items with enhanced fields (IPA, syllables, image, audio) for flashcards. Phase 1.3 — /vocabulary page: Search bar with POS/difficulty filters, word cards with audio buttons, unit builder sidebar. Teacher selects words → creates learning unit → redirects to flashcards. Sidebar: Added "Woerterbuch" (/vocabulary) and "Lernmodule" (/learn). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 13:36:28 +02:00
parent 7fc5464df7
commit 7ff9860c69
6 changed files with 900 additions and 1 deletions
--- a/backend-lehrer/vocabulary_api.py
+++ b/backend-lehrer/vocabulary_api.py
@@ -0,0 +1,264 @@
+"""
+Vocabulary API — Search, browse, and build learning units from the word catalog.
+
+Endpoints for teachers to find words and create learning units,
+and for students to access word details with audio/images/syllables.
+"""
+
+import logging
+import json
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+
+from vocabulary_db import (
+    search_words,
+    get_word,
+    browse_words,
+    insert_word,
+    count_words,
+    get_all_tags,
+    get_all_pos,
+    VocabularyWord,
+)
+from learning_units import (
+    LearningUnitCreate,
+    create_learning_unit,
+    get_learning_unit,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/vocabulary", tags=["vocabulary"])
+
+
+# ---------------------------------------------------------------------------
+# Search & Browse
+# ---------------------------------------------------------------------------
+
+
+@router.get("/search")
+async def api_search_words(
+    q: str = Query("", description="Search query"),
+    lang: str = Query("en", pattern="^(en|de)$"),
+    limit: int = Query(20, ge=1, le=100),
+    offset: int = Query(0, ge=0),
+):
+    """Full-text search for vocabulary words."""
+    if not q.strip():
+        return {"words": [], "query": q, "total": 0}
+
+    words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
+    return {
+        "words": [w.to_dict() for w in words],
+        "query": q,
+        "total": len(words),
+    }
+
+
+@router.get("/browse")
+async def api_browse_words(
+    pos: str = Query("", description="Part of speech filter"),
+    difficulty: int = Query(0, ge=0, le=5, description="Difficulty 1-5, 0=all"),
+    tag: str = Query("", description="Tag filter"),
+    limit: int = Query(50, ge=1, le=200),
+    offset: int = Query(0, ge=0),
+):
+    """Browse vocabulary words with filters."""
+    words = await browse_words(
+        pos=pos, difficulty=difficulty, tag=tag,
+        limit=limit, offset=offset,
+    )
+    return {
+        "words": [w.to_dict() for w in words],
+        "filters": {"pos": pos, "difficulty": difficulty, "tag": tag},
+        "total": len(words),
+    }
+
+
+@router.get("/word/{word_id}")
+async def api_get_word(word_id: str):
+    """Get a single word with all details."""
+    word = await get_word(word_id)
+    if not word:
+        raise HTTPException(status_code=404, detail="Wort nicht gefunden")
+    return word.to_dict()
+
+
+@router.get("/filters")
+async def api_get_filters():
+    """Get available filter options (tags, parts of speech, word count)."""
+    tags = await get_all_tags()
+    pos_list = await get_all_pos()
+    total = await count_words()
+    return {
+        "tags": tags,
+        "parts_of_speech": pos_list,
+        "total_words": total,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Learning Unit Creation from Word Selection
+# ---------------------------------------------------------------------------
+
+
+class CreateUnitFromWordsPayload(BaseModel):
+    title: str
+    word_ids: List[str]
+    grade: Optional[str] = None
+    language: Optional[str] = "de"
+
+
+@router.post("/units")
+async def api_create_unit_from_words(payload: CreateUnitFromWordsPayload):
+    """Create a learning unit from selected vocabulary word IDs.
+
+    Fetches full word details, creates a LearningUnit in the
+    learning_units system, and stores the vocabulary data.
+    """
+    if not payload.word_ids:
+        raise HTTPException(status_code=400, detail="Keine Woerter ausgewaehlt")
+
+    # Fetch all selected words
+    words = []
+    for wid in payload.word_ids:
+        word = await get_word(wid)
+        if word:
+            words.append(word)
+
+    if not words:
+        raise HTTPException(status_code=404, detail="Keine der Woerter gefunden")
+
+    # Create learning unit
+    lu = create_learning_unit(LearningUnitCreate(
+        title=payload.title,
+        topic="Vocabulary",
+        grade_level=payload.grade or "5-8",
+        language=payload.language or "de",
+        status="raw",
+    ))
+
+    # Save vocabulary data as analysis JSON for generators
+    import os
+    analysis_dir = os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten")
+    os.makedirs(analysis_dir, exist_ok=True)
+
+    vocab_data = [w.to_dict() for w in words]
+    analysis_path = os.path.join(analysis_dir, f"{lu.id}_vocab.json")
+    with open(analysis_path, "w", encoding="utf-8") as f:
+        json.dump({"words": vocab_data, "title": payload.title}, f, ensure_ascii=False, indent=2)
+
+    # Also save as QA items for flashcards/type trainer
+    qa_items = []
+    for i, w in enumerate(words):
+        qa_items.append({
+            "id": f"qa_{i+1}",
+            "question": w.english,
+            "answer": w.german,
+            "question_type": "knowledge",
+            "key_terms": [w.english],
+            "difficulty": w.difficulty,
+            "source_hint": w.part_of_speech,
+            "leitner_box": 0,
+            "correct_count": 0,
+            "incorrect_count": 0,
+            "last_seen": None,
+            "next_review": None,
+            # Extra fields for enhanced flashcards
+            "ipa_en": w.ipa_en,
+            "ipa_de": w.ipa_de,
+            "syllables_en": w.syllables_en,
+            "syllables_de": w.syllables_de,
+            "example_en": w.example_en,
+            "example_de": w.example_de,
+            "image_url": w.image_url,
+            "audio_url_en": w.audio_url_en,
+            "audio_url_de": w.audio_url_de,
+            "part_of_speech": w.part_of_speech,
+            "translations": w.translations,
+        })
+
+    qa_path = os.path.join(analysis_dir, f"{lu.id}_qa.json")
+    with open(qa_path, "w", encoding="utf-8") as f:
+        json.dump({
+            "qa_items": qa_items,
+            "metadata": {
+                "subject": "English Vocabulary",
+                "grade_level": payload.grade or "5-8",
+                "source_title": payload.title,
+                "total_questions": len(qa_items),
+            },
+        }, f, ensure_ascii=False, indent=2)
+
+    logger.info(f"Created vocab unit {lu.id} with {len(words)} words")
+
+    return {
+        "unit_id": lu.id,
+        "title": payload.title,
+        "word_count": len(words),
+        "status": "created",
+    }
+
+
+@router.get("/units/{unit_id}")
+async def api_get_unit_words(unit_id: str):
+    """Get all words for a learning unit."""
+    import os
+    vocab_path = os.path.join(
+        os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten"),
+        f"{unit_id}_vocab.json",
+    )
+    if not os.path.exists(vocab_path):
+        raise HTTPException(status_code=404, detail="Unit nicht gefunden")
+
+    with open(vocab_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    return {
+        "unit_id": unit_id,
+        "title": data.get("title", ""),
+        "words": data.get("words", []),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Bulk Import (for seeding the dictionary)
+# ---------------------------------------------------------------------------
+
+
+class BulkImportPayload(BaseModel):
+    words: List[Dict[str, Any]]
+
+
+@router.post("/import")
+async def api_bulk_import(payload: BulkImportPayload):
+    """Bulk import vocabulary words (for seeding the dictionary).
+
+    Each word dict should have at minimum: english, german.
+    Optional: ipa_en, ipa_de, part_of_speech, syllables_en, syllables_de,
+    example_en, example_de, difficulty, tags, translations.
+    """
+    from vocabulary_db import insert_words_bulk
+
+    words = []
+    for w in payload.words:
+        words.append(VocabularyWord(
+            english=w.get("english", ""),
+            german=w.get("german", ""),
+            ipa_en=w.get("ipa_en", ""),
+            ipa_de=w.get("ipa_de", ""),
+            part_of_speech=w.get("part_of_speech", ""),
+            syllables_en=w.get("syllables_en", []),
+            syllables_de=w.get("syllables_de", []),
+            example_en=w.get("example_en", ""),
+            example_de=w.get("example_de", ""),
+            difficulty=w.get("difficulty", 1),
+            tags=w.get("tags", []),
+            translations=w.get("translations", {}),
+        ))
+
+    count = await insert_words_bulk(words)
+    logger.info(f"Bulk imported {count} vocabulary words")
+    return {"imported": count}