""" Image Service — Fetches vocabulary images from Wikipedia + Emoji fallback. On-demand: Images are fetched when a learning unit is created, then cached in the vocabulary_words.image_url field. Sources (in priority order): 1. Wikipedia REST API (free, no account needed, CC license) 2. Emoji fallback for abstract words Later: Unsplash API (needs account), Stable Diffusion (local batch) """ import logging import os from typing import Optional import httpx logger = logging.getLogger(__name__) # Emoji map for common abstract words that don't have good photos EMOJI_FALLBACK: dict[str, str] = { "strong": "💊", "weak": "ðŸ˜Đ", "hard-working": "📚", "skinny": "ðŸĶī", "female": "ðŸ‘Đ", "male": "ðŸ‘Ļ", "definite": "✅", "definitely": "✅", "even": "⚖ïļ", "violent": "⚡", "opinion": "💭", "message": "💎", "beginning": "🏁", "mention": "ðŸ—Ģïļ", "summarize": "📋", "mark": "✏ïļ", "throw": "ðŸĪū", "take": "ðŸĪē", "sum": "➕", "on the one hand": "👐", "apple": "🍎", "gym": "🏋ïļ", "medal": "🏅", "sportswoman": "🏃‍♀ïļ", "role model": "⭐", "tourist office": "ðŸĻ", "the olympics": "🏅", "box": "ðŸĨŠ", "football": "âš―", "footballer": "âš―", } async def fetch_wikipedia_image(word: str) -> Optional[str]: """Fetch thumbnail image URL from Wikipedia for a word.""" # Clean word for Wikipedia lookup query = word.split(",")[0].strip() # "throw, threw, thrown" → "throw" query = query.replace("sth.", "").replace("sb.", "").strip() if query.startswith("the "): query = query[4:] try: async with httpx.AsyncClient(timeout=10.0) as client: resp = await client.get( f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}", headers={"User-Agent": "BreakPilot/1.0 (https://breakpilot.com; education platform; contact@breakpilot.com)"}, follow_redirects=True, ) if resp.status_code == 200: data = resp.json() thumb = data.get("thumbnail", {}) url = thumb.get("source") if url: logger.info(f"Wikipedia image for '{word}': {url}") return url except Exception as e: logger.debug(f"Wikipedia image lookup failed for '{word}': {e}") return None def get_emoji_for_word(word: str) -> str: """Get an emoji representation for a word.""" lower = word.lower() for key, emoji in EMOJI_FALLBACK.items(): if key in lower: return emoji # Generic fallback by part of speech could be added here return "📝" async def get_image_for_word(word: str) -> str: """Get the best available image for a vocabulary word. Returns a URL (Wikipedia) or emoji string. Result should be stored in vocabulary_words.image_url. """ # Try Wikipedia first url = await fetch_wikipedia_image(word) if url: return url # Fallback to emoji return get_emoji_for_word(word) async def enrich_words_with_images(word_ids: list[str]) -> int: """Fetch and store images for vocabulary words that don't have one yet.""" from vocabulary.db import get_pool import uuid pool = await get_pool() updated = 0 async with pool.acquire() as conn: rows = await conn.fetch( "SELECT id, english, image_url FROM vocabulary_words WHERE id = ANY($1::uuid[])", [uuid.UUID(wid) for wid in word_ids], ) for row in rows: if row["image_url"]: continue # Already has an image image = await get_image_for_word(row["english"]) if image: await conn.execute( "UPDATE vocabulary_words SET image_url = $1 WHERE id = $2", image, row["id"], ) updated += 1 logger.info(f"Image for '{row['english']}': {image[:60]}...") logger.info(f"Enriched {updated} words with images") return updated