Add image service: Wikipedia photos + emoji fallback for vocabulary

image_service.py: Fetches thumbnail from Wikipedia REST API (free, no account). Falls back to emoji for abstract words (40+ mapped). Auto-enrichment: When a learning unit is created, images are automatically fetched for all words that don't have one yet. Manual endpoint: POST /api/vocabulary/enrich-images fills images for existing words without images. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-27 23:36:48 +02:00
parent 91d6918e2c
commit f6caa3091f
2 changed files with 142 additions and 0 deletions
--- a/backend-lehrer/services/image_service.py
+++ b/backend-lehrer/services/image_service.py
@@ -0,0 +1,115 @@
+"""
+Image Service — Fetches vocabulary images from Wikipedia + Emoji fallback.
+
+On-demand: Images are fetched when a learning unit is created,
+then cached in the vocabulary_words.image_url field.
+
+Sources (in priority order):
+1. Wikipedia REST API (free, no account needed, CC license)
+2. Emoji fallback for abstract words
+
+Later: Unsplash API (needs account), Stable Diffusion (local batch)
+"""
+
+import logging
+import os
+from typing import Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# Emoji map for common abstract words that don't have good photos
+EMOJI_FALLBACK: dict[str, str] = {
+    "strong": "💪", "weak": "😩", "hard-working": "📚", "skinny": "🦴",
+    "female": "👩", "male": "👨", "definite": "✅", "definitely": "✅",
+    "even": "⚖️", "violent": "⚡", "opinion": "💭", "message": "💬",
+    "beginning": "🏁", "mention": "🗣️", "summarize": "📋", "mark": "✏️",
+    "throw": "🤾", "take": "🤲", "sum": "➕", "on the one hand": "👐",
+    "apple": "🍎", "gym": "🏋️", "medal": "🏅", "sportswoman": "🏃‍♀️",
+    "role model": "⭐", "tourist office": "🏨", "the olympics": "🏅",
+    "box": "🥊", "football": "⚽", "footballer": "⚽",
+}
+
+
+async def fetch_wikipedia_image(word: str) -> Optional[str]:
+    """Fetch thumbnail image URL from Wikipedia for a word."""
+    # Clean word for Wikipedia lookup
+    query = word.split(",")[0].strip()  # "throw, threw, thrown" → "throw"
+    query = query.replace("sth.", "").replace("sb.", "").strip()
+    if query.startswith("the "):
+        query = query[4:]
+
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.get(
+                f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}",
+                headers={"User-Agent": "BreakPilot/1.0 (education platform)"},
+            )
+            if resp.status_code == 200:
+                data = resp.json()
+                thumb = data.get("thumbnail", {})
+                url = thumb.get("source")
+                if url:
+                    logger.info(f"Wikipedia image for '{word}': {url}")
+                    return url
+    except Exception as e:
+        logger.debug(f"Wikipedia image lookup failed for '{word}': {e}")
+
+    return None
+
+
+def get_emoji_for_word(word: str) -> str:
+    """Get an emoji representation for a word."""
+    lower = word.lower()
+    for key, emoji in EMOJI_FALLBACK.items():
+        if key in lower:
+            return emoji
+    # Generic fallback by part of speech could be added here
+    return "📝"
+
+
+async def get_image_for_word(word: str) -> str:
+    """Get the best available image for a vocabulary word.
+
+    Returns a URL (Wikipedia) or emoji string.
+    Result should be stored in vocabulary_words.image_url.
+    """
+    # Try Wikipedia first
+    url = await fetch_wikipedia_image(word)
+    if url:
+        return url
+
+    # Fallback to emoji
+    return get_emoji_for_word(word)
+
+
+async def enrich_words_with_images(word_ids: list[str]) -> int:
+    """Fetch and store images for vocabulary words that don't have one yet."""
+    from vocabulary.db import get_pool
+    import uuid
+
+    pool = await get_pool()
+    updated = 0
+
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            "SELECT id, english, image_url FROM vocabulary_words WHERE id = ANY($1::uuid[])",
+            [uuid.UUID(wid) for wid in word_ids],
+        )
+
+        for row in rows:
+            if row["image_url"]:
+                continue  # Already has an image
+
+            image = await get_image_for_word(row["english"])
+            if image:
+                await conn.execute(
+                    "UPDATE vocabulary_words SET image_url = $1 WHERE id = $2",
+                    image, row["id"],
+                )
+                updated += 1
+                logger.info(f"Image for '{row['english']}': {image[:60]}...")
+
+    logger.info(f"Enriched {updated} words with images")
+    return updated