""" Vocabulary API — Search, browse, and build learning units from the word catalog. Endpoints for teachers to find words and create learning units, and for students to access word details with audio/images/syllables. """ import logging import json from typing import Any, Dict, List, Optional from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel from vocabulary_db import ( search_words, get_word, browse_words, insert_word, count_words, get_all_tags, get_all_pos, VocabularyWord, ) from learning_units import ( LearningUnitCreate, create_learning_unit, get_learning_unit, ) logger = logging.getLogger(__name__) router = APIRouter(prefix="/vocabulary", tags=["vocabulary"]) # --------------------------------------------------------------------------- # Search & Browse # --------------------------------------------------------------------------- @router.get("/search") async def api_search_words( q: str = Query("", description="Search query"), lang: str = Query("en", pattern="^(en|de)$"), limit: int = Query(20, ge=1, le=100), offset: int = Query(0, ge=0), ): """Full-text search for vocabulary words.""" if not q.strip(): return {"words": [], "query": q, "total": 0} words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset) return { "words": [w.to_dict() for w in words], "query": q, "total": len(words), } @router.get("/browse") async def api_browse_words( pos: str = Query("", description="Part of speech filter"), difficulty: int = Query(0, ge=0, le=5, description="Difficulty 1-5, 0=all"), tag: str = Query("", description="Tag filter"), limit: int = Query(50, ge=1, le=200), offset: int = Query(0, ge=0), ): """Browse vocabulary words with filters.""" words = await browse_words( pos=pos, difficulty=difficulty, tag=tag, limit=limit, offset=offset, ) return { "words": [w.to_dict() for w in words], "filters": {"pos": pos, "difficulty": difficulty, "tag": tag}, "total": len(words), } @router.get("/word/{word_id}") async def api_get_word(word_id: str): """Get a single word with all details.""" word = await get_word(word_id) if not word: raise HTTPException(status_code=404, detail="Wort nicht gefunden") return word.to_dict() @router.get("/filters") async def api_get_filters(): """Get available filter options (tags, parts of speech, word count).""" tags = await get_all_tags() pos_list = await get_all_pos() total = await count_words() return { "tags": tags, "parts_of_speech": pos_list, "total_words": total, } # --------------------------------------------------------------------------- # Learning Unit Creation from Word Selection # --------------------------------------------------------------------------- class CreateUnitFromWordsPayload(BaseModel): title: str word_ids: List[str] grade: Optional[str] = None language: Optional[str] = "de" @router.post("/units") async def api_create_unit_from_words(payload: CreateUnitFromWordsPayload): """Create a learning unit from selected vocabulary word IDs. Fetches full word details, creates a LearningUnit in the learning_units system, and stores the vocabulary data. """ if not payload.word_ids: raise HTTPException(status_code=400, detail="Keine Woerter ausgewaehlt") # Fetch all selected words words = [] for wid in payload.word_ids: word = await get_word(wid) if word: words.append(word) if not words: raise HTTPException(status_code=404, detail="Keine der Woerter gefunden") # Create learning unit lu = create_learning_unit(LearningUnitCreate( title=payload.title, topic="Vocabulary", grade_level=payload.grade or "5-8", language=payload.language or "de", status="raw", )) # Save vocabulary data as analysis JSON for generators import os analysis_dir = os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten") os.makedirs(analysis_dir, exist_ok=True) vocab_data = [w.to_dict() for w in words] analysis_path = os.path.join(analysis_dir, f"{lu.id}_vocab.json") with open(analysis_path, "w", encoding="utf-8") as f: json.dump({"words": vocab_data, "title": payload.title}, f, ensure_ascii=False, indent=2) # Also save as QA items for flashcards/type trainer qa_items = [] for i, w in enumerate(words): qa_items.append({ "id": f"qa_{i+1}", "question": w.english, "answer": w.german, "question_type": "knowledge", "key_terms": [w.english], "difficulty": w.difficulty, "source_hint": w.part_of_speech, "leitner_box": 0, "correct_count": 0, "incorrect_count": 0, "last_seen": None, "next_review": None, # Extra fields for enhanced flashcards "ipa_en": w.ipa_en, "ipa_de": w.ipa_de, "syllables_en": w.syllables_en, "syllables_de": w.syllables_de, "example_en": w.example_en, "example_de": w.example_de, "image_url": w.image_url, "audio_url_en": w.audio_url_en, "audio_url_de": w.audio_url_de, "part_of_speech": w.part_of_speech, "translations": w.translations, }) qa_path = os.path.join(analysis_dir, f"{lu.id}_qa.json") with open(qa_path, "w", encoding="utf-8") as f: json.dump({ "qa_items": qa_items, "metadata": { "subject": "English Vocabulary", "grade_level": payload.grade or "5-8", "source_title": payload.title, "total_questions": len(qa_items), }, }, f, ensure_ascii=False, indent=2) logger.info(f"Created vocab unit {lu.id} with {len(words)} words") return { "unit_id": lu.id, "title": payload.title, "word_count": len(words), "status": "created", } @router.get("/units/{unit_id}") async def api_get_unit_words(unit_id: str): """Get all words for a learning unit.""" import os vocab_path = os.path.join( os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten"), f"{unit_id}_vocab.json", ) if not os.path.exists(vocab_path): raise HTTPException(status_code=404, detail="Unit nicht gefunden") with open(vocab_path, "r", encoding="utf-8") as f: data = json.load(f) return { "unit_id": unit_id, "title": data.get("title", ""), "words": data.get("words", []), } # --------------------------------------------------------------------------- # Bulk Import (for seeding the dictionary) # --------------------------------------------------------------------------- class BulkImportPayload(BaseModel): words: List[Dict[str, Any]] @router.post("/import") async def api_bulk_import(payload: BulkImportPayload): """Bulk import vocabulary words (for seeding the dictionary). Each word dict should have at minimum: english, german. Optional: ipa_en, ipa_de, part_of_speech, syllables_en, syllables_de, example_en, example_de, difficulty, tags, translations. """ from vocabulary_db import insert_words_bulk words = [] for w in payload.words: words.append(VocabularyWord( english=w.get("english", ""), german=w.get("german", ""), ipa_en=w.get("ipa_en", ""), ipa_de=w.get("ipa_de", ""), part_of_speech=w.get("part_of_speech", ""), syllables_en=w.get("syllables_en", []), syllables_de=w.get("syllables_de", []), example_en=w.get("example_en", ""), example_de=w.get("example_de", ""), difficulty=w.get("difficulty", 1), tags=w.get("tags", []), translations=w.get("translations", {}), )) count = await insert_words_bulk(words) logger.info(f"Bulk imported {count} vocabulary words") return {"imported": count}