breakpilot-lehrer/backend-lehrer/vocabulary_api.py

"""
Vocabulary API — Search, browse, and build learning units from the word catalog.

Endpoints for teachers to find words and create learning units,
and for students to access word details with audio/images/syllables.
"""

import logging
import json
from typing import Any, Dict, List, Optional

from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel

from vocabulary_db import (
    search_words,
    get_word,
    browse_words,
    insert_word,
    count_words,
    get_all_tags,
    get_all_pos,
    VocabularyWord,
)
from learning_units import (
    LearningUnitCreate,
    create_learning_unit,
    get_learning_unit,
)

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/vocabulary", tags=["vocabulary"])


# ---------------------------------------------------------------------------
# Search & Browse
# ---------------------------------------------------------------------------


@router.get("/search")
async def api_search_words(
    q: str = Query("", description="Search query"),
    lang: str = Query("en", pattern="^(en|de)$"),
    limit: int = Query(20, ge=1, le=100),
    offset: int = Query(0, ge=0),
):
    """Full-text search for vocabulary words."""
    if not q.strip():
        return {"words": [], "query": q, "total": 0}

    words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
    return {
        "words": [w.to_dict() for w in words],
        "query": q,
        "total": len(words),
    }


@router.get("/browse")
async def api_browse_words(
    pos: str = Query("", description="Part of speech filter"),
    difficulty: int = Query(0, ge=0, le=5, description="Difficulty 1-5, 0=all"),
    tag: str = Query("", description="Tag filter"),
    limit: int = Query(50, ge=1, le=200),
    offset: int = Query(0, ge=0),
):
    """Browse vocabulary words with filters."""
    words = await browse_words(
        pos=pos, difficulty=difficulty, tag=tag,
        limit=limit, offset=offset,
    )
    return {
        "words": [w.to_dict() for w in words],
        "filters": {"pos": pos, "difficulty": difficulty, "tag": tag},
        "total": len(words),
    }


@router.get("/word/{word_id}")
async def api_get_word(word_id: str):
    """Get a single word with all details."""
    word = await get_word(word_id)
    if not word:
        raise HTTPException(status_code=404, detail="Wort nicht gefunden")
    return word.to_dict()


@router.get("/filters")
async def api_get_filters():
    """Get available filter options (tags, parts of speech, word count)."""
    tags = await get_all_tags()
    pos_list = await get_all_pos()
    total = await count_words()
    return {
        "tags": tags,
        "parts_of_speech": pos_list,
        "total_words": total,
    }


# ---------------------------------------------------------------------------
# Audio TTS for Words
# ---------------------------------------------------------------------------


@router.get("/word/{word_id}/audio/{lang}")
async def api_get_word_audio(word_id: str, lang: str = "en"):
    """Get or generate TTS audio for a vocabulary word.

    Returns MP3 audio. Generated on first request, cached after.
    Uses Piper TTS (MIT license) with Thorsten (DE) and Lessac (EN) voices.
    """
    from fastapi.responses import Response as FastAPIResponse

    word = await get_word(word_id)
    if not word:
        raise HTTPException(status_code=404, detail="Wort nicht gefunden")

    text = word.english if lang == "en" else word.german
    if not text:
        raise HTTPException(status_code=400, detail=f"Kein Text fuer Sprache '{lang}'")

    from audio_service import get_or_generate_audio
    audio_bytes = await get_or_generate_audio(text, language=lang, word_id=word_id)

    if not audio_bytes:
        raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")

    return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")


@router.get("/word/{word_id}/audio-syllables/{lang}")
async def api_get_syllable_audio(word_id: str, lang: str = "en"):
    """Get TTS audio with slow syllable pronunciation.

    Generates audio like "ap ... ple" with pauses between syllables.
    """
    from fastapi.responses import Response as FastAPIResponse

    word = await get_word(word_id)
    if not word:
        raise HTTPException(status_code=404, detail="Wort nicht gefunden")

    syllables = word.syllables_en if lang == "en" else word.syllables_de
    if not syllables:
        # Fallback to full word
        text = word.english if lang == "en" else word.german
        syllables = [text]

    # Join syllables with pauses (Piper handles "..." as pause)
    slow_text = " ... ".join(syllables)

    from audio_service import get_or_generate_audio
    cache_key = f"{word_id}_syl_{lang}"
    audio_bytes = await get_or_generate_audio(slow_text, language=lang, word_id=cache_key)

    if not audio_bytes:
        raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")

    return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")


# ---------------------------------------------------------------------------
# Learning Unit Creation from Word Selection
# ---------------------------------------------------------------------------


class CreateUnitFromWordsPayload(BaseModel):
    title: str
    word_ids: List[str]
    grade: Optional[str] = None
    language: Optional[str] = "de"


@router.post("/units")
async def api_create_unit_from_words(payload: CreateUnitFromWordsPayload):
    """Create a learning unit from selected vocabulary word IDs.

    Fetches full word details, creates a LearningUnit in the
    learning_units system, and stores the vocabulary data.
    """
    if not payload.word_ids:
        raise HTTPException(status_code=400, detail="Keine Woerter ausgewaehlt")

    # Fetch all selected words
    words = []
    for wid in payload.word_ids:
        word = await get_word(wid)
        if word:
            words.append(word)

    if not words:
        raise HTTPException(status_code=404, detail="Keine der Woerter gefunden")

    # Create learning unit
    lu = create_learning_unit(LearningUnitCreate(
        title=payload.title,
        topic="Vocabulary",
        grade_level=payload.grade or "5-8",
        language=payload.language or "de",
        status="raw",
    ))

    # Save vocabulary data as analysis JSON for generators
    import os
    analysis_dir = os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten")
    os.makedirs(analysis_dir, exist_ok=True)

    vocab_data = [w.to_dict() for w in words]
    analysis_path = os.path.join(analysis_dir, f"{lu.id}_vocab.json")
    with open(analysis_path, "w", encoding="utf-8") as f:
        json.dump({"words": vocab_data, "title": payload.title}, f, ensure_ascii=False, indent=2)

    # Also save as QA items for flashcards/type trainer
    qa_items = []
    for i, w in enumerate(words):
        qa_items.append({
            "id": f"qa_{i+1}",
            "question": w.english,
            "answer": w.german,
            "question_type": "knowledge",
            "key_terms": [w.english],
            "difficulty": w.difficulty,
            "source_hint": w.part_of_speech,
            "leitner_box": 0,
            "correct_count": 0,
            "incorrect_count": 0,
            "last_seen": None,
            "next_review": None,
            # Extra fields for enhanced flashcards
            "ipa_en": w.ipa_en,
            "ipa_de": w.ipa_de,
            "syllables_en": w.syllables_en,
            "syllables_de": w.syllables_de,
            "example_en": w.example_en,
            "example_de": w.example_de,
            "image_url": w.image_url,
            "audio_url_en": w.audio_url_en,
            "audio_url_de": w.audio_url_de,
            "part_of_speech": w.part_of_speech,
            "translations": w.translations,
        })

    qa_path = os.path.join(analysis_dir, f"{lu.id}_qa.json")
    with open(qa_path, "w", encoding="utf-8") as f:
        json.dump({
            "qa_items": qa_items,
            "metadata": {
                "subject": "English Vocabulary",
                "grade_level": payload.grade or "5-8",
                "source_title": payload.title,
                "total_questions": len(qa_items),
            },
        }, f, ensure_ascii=False, indent=2)

    logger.info(f"Created vocab unit {lu.id} with {len(words)} words")

    return {
        "unit_id": lu.id,
        "title": payload.title,
        "word_count": len(words),
        "status": "created",
    }


@router.get("/units/{unit_id}")
async def api_get_unit_words(unit_id: str):
    """Get all words for a learning unit."""
    import os
    vocab_path = os.path.join(
        os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten"),
        f"{unit_id}_vocab.json",
    )
    if not os.path.exists(vocab_path):
        raise HTTPException(status_code=404, detail="Unit nicht gefunden")

    with open(vocab_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    return {
        "unit_id": unit_id,
        "title": data.get("title", ""),
        "words": data.get("words", []),
    }


# ---------------------------------------------------------------------------
# Bulk Import (for seeding the dictionary)
# ---------------------------------------------------------------------------


class BulkImportPayload(BaseModel):
    words: List[Dict[str, Any]]


@router.post("/import")
async def api_bulk_import(payload: BulkImportPayload):
    """Bulk import vocabulary words (for seeding the dictionary).

    Each word dict should have at minimum: english, german.
    Optional: ipa_en, ipa_de, part_of_speech, syllables_en, syllables_de,
    example_en, example_de, difficulty, tags, translations.
    """
    from vocabulary_db import insert_words_bulk

    words = []
    for w in payload.words:
        words.append(VocabularyWord(
            english=w.get("english", ""),
            german=w.get("german", ""),
            ipa_en=w.get("ipa_en", ""),
            ipa_de=w.get("ipa_de", ""),
            part_of_speech=w.get("part_of_speech", ""),
            syllables_en=w.get("syllables_en", []),
            syllables_de=w.get("syllables_de", []),
            example_en=w.get("example_en", ""),
            example_de=w.get("example_de", ""),
            difficulty=w.get("difficulty", 1),
            tags=w.get("tags", []),
            translations=w.get("translations", {}),
        ))

    count = await insert_words_bulk(words)
    logger.info(f"Bulk imported {count} vocabulary words")
    return {"imported": count}


# ---------------------------------------------------------------------------
# Translation Generation
# ---------------------------------------------------------------------------


class TranslateRequest(BaseModel):
    word_ids: List[str]
    target_language: str


@router.post("/translate")
async def api_translate_words(payload: TranslateRequest):
    """Generate translations for vocabulary words into a target language.

    Uses local LLM (Ollama) for translation. Results are cached in the
    vocabulary_words.translations JSONB field.
    """
    from translation_service import translate_and_store

    if payload.target_language not in {"tr", "ar", "uk", "ru", "pl", "fr", "es"}:
        raise HTTPException(status_code=400, detail=f"Sprache '{payload.target_language}' nicht unterstuetzt")

    count = await translate_and_store(payload.word_ids, payload.target_language)
    return {"translated": count, "target_language": payload.target_language}