fdde5d43b3
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
117 lines
4.0 KiB
Python
117 lines
4.0 KiB
Python
"""
|
||
Image Service — Fetches vocabulary images from Wikipedia + Emoji fallback.
|
||
|
||
On-demand: Images are fetched when a learning unit is created,
|
||
then cached in the vocabulary_words.image_url field.
|
||
|
||
Sources (in priority order):
|
||
1. Wikipedia REST API (free, no account needed, CC license)
|
||
2. Emoji fallback for abstract words
|
||
|
||
Later: Unsplash API (needs account), Stable Diffusion (local batch)
|
||
"""
|
||
|
||
import logging
|
||
import os
|
||
from typing import Optional
|
||
|
||
import httpx
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# Emoji map for common abstract words that don't have good photos
|
||
EMOJI_FALLBACK: dict[str, str] = {
|
||
"strong": "💪", "weak": "😩", "hard-working": "📚", "skinny": "🦴",
|
||
"female": "👩", "male": "👨", "definite": "✅", "definitely": "✅",
|
||
"even": "⚖️", "violent": "⚡", "opinion": "💭", "message": "💬",
|
||
"beginning": "🏁", "mention": "🗣️", "summarize": "📋", "mark": "✏️",
|
||
"throw": "🤾", "take": "🤲", "sum": "➕", "on the one hand": "👐",
|
||
"apple": "🍎", "gym": "🏋️", "medal": "🏅", "sportswoman": "🏃♀️",
|
||
"role model": "⭐", "tourist office": "🏨", "the olympics": "🏅",
|
||
"box": "🥊", "football": "⚽", "footballer": "⚽",
|
||
}
|
||
|
||
|
||
async def fetch_wikipedia_image(word: str) -> Optional[str]:
|
||
"""Fetch thumbnail image URL from Wikipedia for a word."""
|
||
# Clean word for Wikipedia lookup
|
||
query = word.split(",")[0].strip() # "throw, threw, thrown" → "throw"
|
||
query = query.replace("sth.", "").replace("sb.", "").strip()
|
||
if query.startswith("the "):
|
||
query = query[4:]
|
||
|
||
try:
|
||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||
resp = await client.get(
|
||
f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}",
|
||
headers={"User-Agent": "BreakPilot/1.0 (https://breakpilot.com; education platform; contact@breakpilot.com)"},
|
||
follow_redirects=True,
|
||
)
|
||
if resp.status_code == 200:
|
||
data = resp.json()
|
||
thumb = data.get("thumbnail", {})
|
||
url = thumb.get("source")
|
||
if url:
|
||
logger.info(f"Wikipedia image for '{word}': {url}")
|
||
return url
|
||
except Exception as e:
|
||
logger.debug(f"Wikipedia image lookup failed for '{word}': {e}")
|
||
|
||
return None
|
||
|
||
|
||
def get_emoji_for_word(word: str) -> str:
|
||
"""Get an emoji representation for a word."""
|
||
lower = word.lower()
|
||
for key, emoji in EMOJI_FALLBACK.items():
|
||
if key in lower:
|
||
return emoji
|
||
# Generic fallback by part of speech could be added here
|
||
return "📝"
|
||
|
||
|
||
async def get_image_for_word(word: str) -> str:
|
||
"""Get the best available image for a vocabulary word.
|
||
|
||
Returns a URL (Wikipedia) or emoji string.
|
||
Result should be stored in vocabulary_words.image_url.
|
||
"""
|
||
# Try Wikipedia first
|
||
url = await fetch_wikipedia_image(word)
|
||
if url:
|
||
return url
|
||
|
||
# Fallback to emoji
|
||
return get_emoji_for_word(word)
|
||
|
||
|
||
async def enrich_words_with_images(word_ids: list[str]) -> int:
|
||
"""Fetch and store images for vocabulary words that don't have one yet."""
|
||
from vocabulary.db import get_pool
|
||
import uuid
|
||
|
||
pool = await get_pool()
|
||
updated = 0
|
||
|
||
async with pool.acquire() as conn:
|
||
rows = await conn.fetch(
|
||
"SELECT id, english, image_url FROM vocabulary_words WHERE id = ANY($1::uuid[])",
|
||
[uuid.UUID(wid) for wid in word_ids],
|
||
)
|
||
|
||
for row in rows:
|
||
if row["image_url"]:
|
||
continue # Already has an image
|
||
|
||
image = await get_image_for_word(row["english"])
|
||
if image:
|
||
await conn.execute(
|
||
"UPDATE vocabulary_words SET image_url = $1 WHERE id = $2",
|
||
image, row["id"],
|
||
)
|
||
updated += 1
|
||
logger.info(f"Image for '{row['english']}': {image[:60]}...")
|
||
|
||
logger.info(f"Enriched {updated} words with images")
|
||
return updated
|