diff --git a/backend-lehrer/services/image_service.py b/backend-lehrer/services/image_service.py new file mode 100644 index 0000000..e82d9f3 --- /dev/null +++ b/backend-lehrer/services/image_service.py @@ -0,0 +1,115 @@ +""" +Image Service — Fetches vocabulary images from Wikipedia + Emoji fallback. + +On-demand: Images are fetched when a learning unit is created, +then cached in the vocabulary_words.image_url field. + +Sources (in priority order): +1. Wikipedia REST API (free, no account needed, CC license) +2. Emoji fallback for abstract words + +Later: Unsplash API (needs account), Stable Diffusion (local batch) +""" + +import logging +import os +from typing import Optional + +import httpx + +logger = logging.getLogger(__name__) + +# Emoji map for common abstract words that don't have good photos +EMOJI_FALLBACK: dict[str, str] = { + "strong": "💊", "weak": "ðŸ˜Đ", "hard-working": "📚", "skinny": "ðŸĶī", + "female": "ðŸ‘Đ", "male": "ðŸ‘Ļ", "definite": "✅", "definitely": "✅", + "even": "⚖ïļ", "violent": "⚡", "opinion": "💭", "message": "💎", + "beginning": "🏁", "mention": "ðŸ—Ģïļ", "summarize": "📋", "mark": "✏ïļ", + "throw": "ðŸĪū", "take": "ðŸĪē", "sum": "➕", "on the one hand": "👐", + "apple": "🍎", "gym": "🏋ïļ", "medal": "🏅", "sportswoman": "🏃‍♀ïļ", + "role model": "⭐", "tourist office": "ðŸĻ", "the olympics": "🏅", + "box": "ðŸĨŠ", "football": "âš―", "footballer": "âš―", +} + + +async def fetch_wikipedia_image(word: str) -> Optional[str]: + """Fetch thumbnail image URL from Wikipedia for a word.""" + # Clean word for Wikipedia lookup + query = word.split(",")[0].strip() # "throw, threw, thrown" → "throw" + query = query.replace("sth.", "").replace("sb.", "").strip() + if query.startswith("the "): + query = query[4:] + + try: + async with httpx.AsyncClient(timeout=10.0) as client: + resp = await client.get( + f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}", + headers={"User-Agent": "BreakPilot/1.0 (education platform)"}, + ) + if resp.status_code == 200: + data = resp.json() + thumb = data.get("thumbnail", {}) + url = thumb.get("source") + if url: + logger.info(f"Wikipedia image for '{word}': {url}") + return url + except Exception as e: + logger.debug(f"Wikipedia image lookup failed for '{word}': {e}") + + return None + + +def get_emoji_for_word(word: str) -> str: + """Get an emoji representation for a word.""" + lower = word.lower() + for key, emoji in EMOJI_FALLBACK.items(): + if key in lower: + return emoji + # Generic fallback by part of speech could be added here + return "📝" + + +async def get_image_for_word(word: str) -> str: + """Get the best available image for a vocabulary word. + + Returns a URL (Wikipedia) or emoji string. + Result should be stored in vocabulary_words.image_url. + """ + # Try Wikipedia first + url = await fetch_wikipedia_image(word) + if url: + return url + + # Fallback to emoji + return get_emoji_for_word(word) + + +async def enrich_words_with_images(word_ids: list[str]) -> int: + """Fetch and store images for vocabulary words that don't have one yet.""" + from vocabulary.db import get_pool + import uuid + + pool = await get_pool() + updated = 0 + + async with pool.acquire() as conn: + rows = await conn.fetch( + "SELECT id, english, image_url FROM vocabulary_words WHERE id = ANY($1::uuid[])", + [uuid.UUID(wid) for wid in word_ids], + ) + + for row in rows: + if row["image_url"]: + continue # Already has an image + + image = await get_image_for_word(row["english"]) + if image: + await conn.execute( + "UPDATE vocabulary_words SET image_url = $1 WHERE id = $2", + image, row["id"], + ) + updated += 1 + logger.info(f"Image for '{row['english']}': {image[:60]}...") + + logger.info(f"Enriched {updated} words with images") + return updated diff --git a/backend-lehrer/vocabulary/api.py b/backend-lehrer/vocabulary/api.py index cbe2eaf..76ef783 100644 --- a/backend-lehrer/vocabulary/api.py +++ b/backend-lehrer/vocabulary/api.py @@ -270,6 +270,13 @@ async def api_create_unit_from_words(payload: CreateUnitFromWordsPayload): }, }, f, ensure_ascii=False, indent=2) + # Auto-enrich words with images (Wikipedia + emoji fallback) + try: + from services.image_service import enrich_words_with_images + await enrich_words_with_images(payload.word_ids) + except Exception as e: + logger.warning(f"Image enrichment failed (non-critical): {e}") + logger.info(f"Created vocab unit {lu.id} with {len(words)} words") return { @@ -347,6 +354,26 @@ async def api_bulk_import(payload: BulkImportPayload): # --------------------------------------------------------------------------- +@router.post("/enrich-images") +async def api_enrich_images(word_ids: List[str] = None): + """Fetch and store images for vocabulary words (Wikipedia + emoji fallback).""" + from services.image_service import enrich_words_with_images + from vocabulary.db import get_pool + import uuid as _uuid + + if not word_ids: + pool = await get_pool() + async with pool.acquire() as conn: + rows = await conn.fetch("SELECT id FROM vocabulary_words WHERE image_url = '' OR image_url IS NULL") + word_ids = [str(r["id"]) for r in rows] + + if not word_ids: + return {"enriched": 0, "message": "All words already have images"} + + count = await enrich_words_with_images(word_ids) + return {"enriched": count, "total": len(word_ids)} + + class TranslateRequest(BaseModel): word_ids: List[str] target_language: str