Add Vocabulary Learning Platform (Phase 1: DB + API + Editor)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 59s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 3m7s
CI / test-python-agent-core (push) Successful in 24s
CI / test-nodejs-website (push) Successful in 31s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 59s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 3m7s
CI / test-python-agent-core (push) Successful in 24s
CI / test-nodejs-website (push) Successful in 31s
Strategic pivot: Studio-v2 becomes a language learning platform. Compliance guardrail added to CLAUDE.md — no scan/OCR of third-party content in customer frontend. Upload of OWN materials remains allowed. Phase 1.1 — vocabulary_db.py: PostgreSQL model for 160k+ words with english, german, IPA, syllables, examples, images, audio, difficulty, tags, translations (multilingual). Trigram search index. Phase 1.2 — vocabulary_api.py: Search, browse, filters, bulk import, learning unit creation from word selection. Creates QA items with enhanced fields (IPA, syllables, image, audio) for flashcards. Phase 1.3 — /vocabulary page: Search bar with POS/difficulty filters, word cards with audio buttons, unit builder sidebar. Teacher selects words → creates learning unit → redirects to flashcards. Sidebar: Added "Woerterbuch" (/vocabulary) and "Lernmodule" (/learn). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
264
backend-lehrer/vocabulary_api.py
Normal file
264
backend-lehrer/vocabulary_api.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""
|
||||
Vocabulary API — Search, browse, and build learning units from the word catalog.
|
||||
|
||||
Endpoints for teachers to find words and create learning units,
|
||||
and for students to access word details with audio/images/syllables.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
|
||||
from vocabulary_db import (
|
||||
search_words,
|
||||
get_word,
|
||||
browse_words,
|
||||
insert_word,
|
||||
count_words,
|
||||
get_all_tags,
|
||||
get_all_pos,
|
||||
VocabularyWord,
|
||||
)
|
||||
from learning_units import (
|
||||
LearningUnitCreate,
|
||||
create_learning_unit,
|
||||
get_learning_unit,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/vocabulary", tags=["vocabulary"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Search & Browse
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@router.get("/search")
|
||||
async def api_search_words(
|
||||
q: str = Query("", description="Search query"),
|
||||
lang: str = Query("en", pattern="^(en|de)$"),
|
||||
limit: int = Query(20, ge=1, le=100),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""Full-text search for vocabulary words."""
|
||||
if not q.strip():
|
||||
return {"words": [], "query": q, "total": 0}
|
||||
|
||||
words = await search_words(q.strip(), lang=lang, limit=limit, offset=offset)
|
||||
return {
|
||||
"words": [w.to_dict() for w in words],
|
||||
"query": q,
|
||||
"total": len(words),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/browse")
|
||||
async def api_browse_words(
|
||||
pos: str = Query("", description="Part of speech filter"),
|
||||
difficulty: int = Query(0, ge=0, le=5, description="Difficulty 1-5, 0=all"),
|
||||
tag: str = Query("", description="Tag filter"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""Browse vocabulary words with filters."""
|
||||
words = await browse_words(
|
||||
pos=pos, difficulty=difficulty, tag=tag,
|
||||
limit=limit, offset=offset,
|
||||
)
|
||||
return {
|
||||
"words": [w.to_dict() for w in words],
|
||||
"filters": {"pos": pos, "difficulty": difficulty, "tag": tag},
|
||||
"total": len(words),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/word/{word_id}")
|
||||
async def api_get_word(word_id: str):
|
||||
"""Get a single word with all details."""
|
||||
word = await get_word(word_id)
|
||||
if not word:
|
||||
raise HTTPException(status_code=404, detail="Wort nicht gefunden")
|
||||
return word.to_dict()
|
||||
|
||||
|
||||
@router.get("/filters")
|
||||
async def api_get_filters():
|
||||
"""Get available filter options (tags, parts of speech, word count)."""
|
||||
tags = await get_all_tags()
|
||||
pos_list = await get_all_pos()
|
||||
total = await count_words()
|
||||
return {
|
||||
"tags": tags,
|
||||
"parts_of_speech": pos_list,
|
||||
"total_words": total,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Learning Unit Creation from Word Selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CreateUnitFromWordsPayload(BaseModel):
|
||||
title: str
|
||||
word_ids: List[str]
|
||||
grade: Optional[str] = None
|
||||
language: Optional[str] = "de"
|
||||
|
||||
|
||||
@router.post("/units")
|
||||
async def api_create_unit_from_words(payload: CreateUnitFromWordsPayload):
|
||||
"""Create a learning unit from selected vocabulary word IDs.
|
||||
|
||||
Fetches full word details, creates a LearningUnit in the
|
||||
learning_units system, and stores the vocabulary data.
|
||||
"""
|
||||
if not payload.word_ids:
|
||||
raise HTTPException(status_code=400, detail="Keine Woerter ausgewaehlt")
|
||||
|
||||
# Fetch all selected words
|
||||
words = []
|
||||
for wid in payload.word_ids:
|
||||
word = await get_word(wid)
|
||||
if word:
|
||||
words.append(word)
|
||||
|
||||
if not words:
|
||||
raise HTTPException(status_code=404, detail="Keine der Woerter gefunden")
|
||||
|
||||
# Create learning unit
|
||||
lu = create_learning_unit(LearningUnitCreate(
|
||||
title=payload.title,
|
||||
topic="Vocabulary",
|
||||
grade_level=payload.grade or "5-8",
|
||||
language=payload.language or "de",
|
||||
status="raw",
|
||||
))
|
||||
|
||||
# Save vocabulary data as analysis JSON for generators
|
||||
import os
|
||||
analysis_dir = os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten")
|
||||
os.makedirs(analysis_dir, exist_ok=True)
|
||||
|
||||
vocab_data = [w.to_dict() for w in words]
|
||||
analysis_path = os.path.join(analysis_dir, f"{lu.id}_vocab.json")
|
||||
with open(analysis_path, "w", encoding="utf-8") as f:
|
||||
json.dump({"words": vocab_data, "title": payload.title}, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# Also save as QA items for flashcards/type trainer
|
||||
qa_items = []
|
||||
for i, w in enumerate(words):
|
||||
qa_items.append({
|
||||
"id": f"qa_{i+1}",
|
||||
"question": w.english,
|
||||
"answer": w.german,
|
||||
"question_type": "knowledge",
|
||||
"key_terms": [w.english],
|
||||
"difficulty": w.difficulty,
|
||||
"source_hint": w.part_of_speech,
|
||||
"leitner_box": 0,
|
||||
"correct_count": 0,
|
||||
"incorrect_count": 0,
|
||||
"last_seen": None,
|
||||
"next_review": None,
|
||||
# Extra fields for enhanced flashcards
|
||||
"ipa_en": w.ipa_en,
|
||||
"ipa_de": w.ipa_de,
|
||||
"syllables_en": w.syllables_en,
|
||||
"syllables_de": w.syllables_de,
|
||||
"example_en": w.example_en,
|
||||
"example_de": w.example_de,
|
||||
"image_url": w.image_url,
|
||||
"audio_url_en": w.audio_url_en,
|
||||
"audio_url_de": w.audio_url_de,
|
||||
"part_of_speech": w.part_of_speech,
|
||||
"translations": w.translations,
|
||||
})
|
||||
|
||||
qa_path = os.path.join(analysis_dir, f"{lu.id}_qa.json")
|
||||
with open(qa_path, "w", encoding="utf-8") as f:
|
||||
json.dump({
|
||||
"qa_items": qa_items,
|
||||
"metadata": {
|
||||
"subject": "English Vocabulary",
|
||||
"grade_level": payload.grade or "5-8",
|
||||
"source_title": payload.title,
|
||||
"total_questions": len(qa_items),
|
||||
},
|
||||
}, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"Created vocab unit {lu.id} with {len(words)} words")
|
||||
|
||||
return {
|
||||
"unit_id": lu.id,
|
||||
"title": payload.title,
|
||||
"word_count": len(words),
|
||||
"status": "created",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/units/{unit_id}")
|
||||
async def api_get_unit_words(unit_id: str):
|
||||
"""Get all words for a learning unit."""
|
||||
import os
|
||||
vocab_path = os.path.join(
|
||||
os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten"),
|
||||
f"{unit_id}_vocab.json",
|
||||
)
|
||||
if not os.path.exists(vocab_path):
|
||||
raise HTTPException(status_code=404, detail="Unit nicht gefunden")
|
||||
|
||||
with open(vocab_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
return {
|
||||
"unit_id": unit_id,
|
||||
"title": data.get("title", ""),
|
||||
"words": data.get("words", []),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk Import (for seeding the dictionary)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BulkImportPayload(BaseModel):
|
||||
words: List[Dict[str, Any]]
|
||||
|
||||
|
||||
@router.post("/import")
|
||||
async def api_bulk_import(payload: BulkImportPayload):
|
||||
"""Bulk import vocabulary words (for seeding the dictionary).
|
||||
|
||||
Each word dict should have at minimum: english, german.
|
||||
Optional: ipa_en, ipa_de, part_of_speech, syllables_en, syllables_de,
|
||||
example_en, example_de, difficulty, tags, translations.
|
||||
"""
|
||||
from vocabulary_db import insert_words_bulk
|
||||
|
||||
words = []
|
||||
for w in payload.words:
|
||||
words.append(VocabularyWord(
|
||||
english=w.get("english", ""),
|
||||
german=w.get("german", ""),
|
||||
ipa_en=w.get("ipa_en", ""),
|
||||
ipa_de=w.get("ipa_de", ""),
|
||||
part_of_speech=w.get("part_of_speech", ""),
|
||||
syllables_en=w.get("syllables_en", []),
|
||||
syllables_de=w.get("syllables_de", []),
|
||||
example_en=w.get("example_en", ""),
|
||||
example_de=w.get("example_de", ""),
|
||||
difficulty=w.get("difficulty", 1),
|
||||
tags=w.get("tags", []),
|
||||
translations=w.get("translations", {}),
|
||||
))
|
||||
|
||||
count = await insert_words_bulk(words)
|
||||
logger.info(f"Bulk imported {count} vocabulary words")
|
||||
return {"imported": count}
|
||||
Reference in New Issue
Block a user