breakpilot-lehrer/backend-lehrer/vocabulary_api.py

"""
Vocabulary API — Search, browse, and build learning units from the word catalog.

Endpoints for teachers to find words and create learning units,
and for students to access word details with audio/images/syllables.
"""

import logging
import json
from typing import Any, Dict, List, Optional

from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel

from vocabulary_db import (
    search_words,
    get_word,
    browse_words,
    insert_word,
    count_words,
    get_all_tags,
    get_all_pos,
    VocabularyWord,
)
from learning_units import (
    LearningUnitCreate,
    create_learning_unit,
    get_learning_unit,
)

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/vocabulary", tags=["vocabulary"])


# ---------------------------------------------------------------------------
# Search & Browse
# ---------------------------------------------------------------------------


@router.get("/search")
async def api_search_words(
    q: str = Query("", description="Search query"),
    lang: str = Query("en", pattern="^(en|de)$"),
    limit: int = Query(20, ge=1, le=100),
    offset: int = Query(0, ge=0),
):
    """Full-text search for vocabulary words."""
    if not q.strip():
        return {"words": [], "query": q, "total": 0}

    words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
    return {
        "words": [w.to_dict() for w in words],
        "query": q,
        "total": len(words),
    }


@router.get("/browse")
async def api_browse_words(
    pos: str = Query("", description="Part of speech filter"),
    difficulty: int = Query(0, ge=0, le=5, description="Difficulty 1-5, 0=all"),
    tag: str = Query("", description="Tag filter"),
    limit: int = Query(50, ge=1, le=200),
    offset: int = Query(0, ge=0),
):
    """Browse vocabulary words with filters."""
    words = await browse_words(
        pos=pos, difficulty=difficulty, tag=tag,
        limit=limit, offset=offset,
    )
    return {
        "words": [w.to_dict() for w in words],
        "filters": {"pos": pos, "difficulty": difficulty, "tag": tag},
        "total": len(words),
    }


@router.get("/word/{word_id}")
async def api_get_word(word_id: str):
    """Get a single word with all details."""
    word = await get_word(word_id)
    if not word:
        raise HTTPException(status_code=404, detail="Wort nicht gefunden")
    return word.to_dict()


@router.get("/filters")
async def api_get_filters():
    """Get available filter options (tags, parts of speech, word count)."""
    tags = await get_all_tags()
    pos_list = await get_all_pos()
    total = await count_words()
    return {
        "tags": tags,
        "parts_of_speech": pos_list,
        "total_words": total,
    }


# ---------------------------------------------------------------------------
# Learning Unit Creation from Word Selection
# ---------------------------------------------------------------------------


class CreateUnitFromWordsPayload(BaseModel):
    title: str
    word_ids: List[str]
    grade: Optional[str] = None
    language: Optional[str] = "de"


@router.post("/units")
async def api_create_unit_from_words(payload: CreateUnitFromWordsPayload):
    """Create a learning unit from selected vocabulary word IDs.

    Fetches full word details, creates a LearningUnit in the
    learning_units system, and stores the vocabulary data.
    """
    if not payload.word_ids:
        raise HTTPException(status_code=400, detail="Keine Woerter ausgewaehlt")

    # Fetch all selected words
    words = []
    for wid in payload.word_ids:
        word = await get_word(wid)
        if word:
            words.append(word)

    if not words:
        raise HTTPException(status_code=404, detail="Keine der Woerter gefunden")

    # Create learning unit
    lu = create_learning_unit(LearningUnitCreate(
        title=payload.title,
        topic="Vocabulary",
        grade_level=payload.grade or "5-8",
        language=payload.language or "de",
        status="raw",
    ))

    # Save vocabulary data as analysis JSON for generators
    import os
    analysis_dir = os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten")
    os.makedirs(analysis_dir, exist_ok=True)

    vocab_data = [w.to_dict() for w in words]
    analysis_path = os.path.join(analysis_dir, f"{lu.id}_vocab.json")
    with open(analysis_path, "w", encoding="utf-8") as f:
        json.dump({"words": vocab_data, "title": payload.title}, f, ensure_ascii=False, indent=2)

    # Also save as QA items for flashcards/type trainer
    qa_items = []
    for i, w in enumerate(words):
        qa_items.append({
            "id": f"qa_{i+1}",
            "question": w.english,
            "answer": w.german,
            "question_type": "knowledge",
            "key_terms": [w.english],
            "difficulty": w.difficulty,
            "source_hint": w.part_of_speech,
            "leitner_box": 0,
            "correct_count": 0,
            "incorrect_count": 0,
            "last_seen": None,
            "next_review": None,
            # Extra fields for enhanced flashcards
            "ipa_en": w.ipa_en,
            "ipa_de": w.ipa_de,
            "syllables_en": w.syllables_en,
            "syllables_de": w.syllables_de,
            "example_en": w.example_en,
            "example_de": w.example_de,
            "image_url": w.image_url,
            "audio_url_en": w.audio_url_en,
            "audio_url_de": w.audio_url_de,
            "part_of_speech": w.part_of_speech,
            "translations": w.translations,
        })

    qa_path = os.path.join(analysis_dir, f"{lu.id}_qa.json")
    with open(qa_path, "w", encoding="utf-8") as f:
        json.dump({
            "qa_items": qa_items,
            "metadata": {
                "subject": "English Vocabulary",
                "grade_level": payload.grade or "5-8",
                "source_title": payload.title,
                "total_questions": len(qa_items),
            },
        }, f, ensure_ascii=False, indent=2)

    logger.info(f"Created vocab unit {lu.id} with {len(words)} words")

    return {
        "unit_id": lu.id,
        "title": payload.title,
        "word_count": len(words),
        "status": "created",
    }


@router.get("/units/{unit_id}")
async def api_get_unit_words(unit_id: str):
    """Get all words for a learning unit."""
    import os
    vocab_path = os.path.join(
        os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten"),
        f"{unit_id}_vocab.json",
    )
    if not os.path.exists(vocab_path):
        raise HTTPException(status_code=404, detail="Unit nicht gefunden")

    with open(vocab_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    return {
        "unit_id": unit_id,
        "title": data.get("title", ""),
        "words": data.get("words", []),
    }


# ---------------------------------------------------------------------------
# Bulk Import (for seeding the dictionary)
# ---------------------------------------------------------------------------


class BulkImportPayload(BaseModel):
    words: List[Dict[str, Any]]


@router.post("/import")
async def api_bulk_import(payload: BulkImportPayload):
    """Bulk import vocabulary words (for seeding the dictionary).

    Each word dict should have at minimum: english, german.
    Optional: ipa_en, ipa_de, part_of_speech, syllables_en, syllables_de,
    example_en, example_de, difficulty, tags, translations.
    """
    from vocabulary_db import insert_words_bulk

    words = []
    for w in payload.words:
        words.append(VocabularyWord(
            english=w.get("english", ""),
            german=w.get("german", ""),
            ipa_en=w.get("ipa_en", ""),
            ipa_de=w.get("ipa_de", ""),
            part_of_speech=w.get("part_of_speech", ""),
            syllables_en=w.get("syllables_en", []),
            syllables_de=w.get("syllables_de", []),
            example_en=w.get("example_en", ""),
            example_de=w.get("example_de", ""),
            difficulty=w.get("difficulty", 1),
            tags=w.get("tags", []),
            translations=w.get("translations", {}),
        ))

    count = await insert_words_bulk(words)
    logger.info(f"Bulk imported {count} vocabulary words")
    return {"imported": count}