Add image service: Wikipedia photos + emoji fallback for vocabulary
image_service.py: Fetches thumbnail from Wikipedia REST API (free, no account). Falls back to emoji for abstract words (40+ mapped). Auto-enrichment: When a learning unit is created, images are automatically fetched for all words that don't have one yet. Manual endpoint: POST /api/vocabulary/enrich-images fills images for existing words without images. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
115
backend-lehrer/services/image_service.py
Normal file
115
backend-lehrer/services/image_service.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""
|
||||
Image Service — Fetches vocabulary images from Wikipedia + Emoji fallback.
|
||||
|
||||
On-demand: Images are fetched when a learning unit is created,
|
||||
then cached in the vocabulary_words.image_url field.
|
||||
|
||||
Sources (in priority order):
|
||||
1. Wikipedia REST API (free, no account needed, CC license)
|
||||
2. Emoji fallback for abstract words
|
||||
|
||||
Later: Unsplash API (needs account), Stable Diffusion (local batch)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Emoji map for common abstract words that don't have good photos
|
||||
EMOJI_FALLBACK: dict[str, str] = {
|
||||
"strong": "💪", "weak": "😩", "hard-working": "📚", "skinny": "🦴",
|
||||
"female": "👩", "male": "👨", "definite": "✅", "definitely": "✅",
|
||||
"even": "⚖️", "violent": "⚡", "opinion": "💭", "message": "💬",
|
||||
"beginning": "🏁", "mention": "🗣️", "summarize": "📋", "mark": "✏️",
|
||||
"throw": "🤾", "take": "🤲", "sum": "➕", "on the one hand": "👐",
|
||||
"apple": "🍎", "gym": "🏋️", "medal": "🏅", "sportswoman": "🏃♀️",
|
||||
"role model": "⭐", "tourist office": "🏨", "the olympics": "🏅",
|
||||
"box": "🥊", "football": "⚽", "footballer": "⚽",
|
||||
}
|
||||
|
||||
|
||||
async def fetch_wikipedia_image(word: str) -> Optional[str]:
|
||||
"""Fetch thumbnail image URL from Wikipedia for a word."""
|
||||
# Clean word for Wikipedia lookup
|
||||
query = word.split(",")[0].strip() # "throw, threw, thrown" → "throw"
|
||||
query = query.replace("sth.", "").replace("sb.", "").strip()
|
||||
if query.startswith("the "):
|
||||
query = query[4:]
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}",
|
||||
headers={"User-Agent": "BreakPilot/1.0 (education platform)"},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
thumb = data.get("thumbnail", {})
|
||||
url = thumb.get("source")
|
||||
if url:
|
||||
logger.info(f"Wikipedia image for '{word}': {url}")
|
||||
return url
|
||||
except Exception as e:
|
||||
logger.debug(f"Wikipedia image lookup failed for '{word}': {e}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_emoji_for_word(word: str) -> str:
|
||||
"""Get an emoji representation for a word."""
|
||||
lower = word.lower()
|
||||
for key, emoji in EMOJI_FALLBACK.items():
|
||||
if key in lower:
|
||||
return emoji
|
||||
# Generic fallback by part of speech could be added here
|
||||
return "📝"
|
||||
|
||||
|
||||
async def get_image_for_word(word: str) -> str:
|
||||
"""Get the best available image for a vocabulary word.
|
||||
|
||||
Returns a URL (Wikipedia) or emoji string.
|
||||
Result should be stored in vocabulary_words.image_url.
|
||||
"""
|
||||
# Try Wikipedia first
|
||||
url = await fetch_wikipedia_image(word)
|
||||
if url:
|
||||
return url
|
||||
|
||||
# Fallback to emoji
|
||||
return get_emoji_for_word(word)
|
||||
|
||||
|
||||
async def enrich_words_with_images(word_ids: list[str]) -> int:
|
||||
"""Fetch and store images for vocabulary words that don't have one yet."""
|
||||
from vocabulary.db import get_pool
|
||||
import uuid
|
||||
|
||||
pool = await get_pool()
|
||||
updated = 0
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"SELECT id, english, image_url FROM vocabulary_words WHERE id = ANY($1::uuid[])",
|
||||
[uuid.UUID(wid) for wid in word_ids],
|
||||
)
|
||||
|
||||
for row in rows:
|
||||
if row["image_url"]:
|
||||
continue # Already has an image
|
||||
|
||||
image = await get_image_for_word(row["english"])
|
||||
if image:
|
||||
await conn.execute(
|
||||
"UPDATE vocabulary_words SET image_url = $1 WHERE id = $2",
|
||||
image, row["id"],
|
||||
)
|
||||
updated += 1
|
||||
logger.info(f"Image for '{row['english']}': {image[:60]}...")
|
||||
|
||||
logger.info(f"Enriched {updated} words with images")
|
||||
return updated
|
||||
@@ -270,6 +270,13 @@ async def api_create_unit_from_words(payload: CreateUnitFromWordsPayload):
|
||||
},
|
||||
}, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# Auto-enrich words with images (Wikipedia + emoji fallback)
|
||||
try:
|
||||
from services.image_service import enrich_words_with_images
|
||||
await enrich_words_with_images(payload.word_ids)
|
||||
except Exception as e:
|
||||
logger.warning(f"Image enrichment failed (non-critical): {e}")
|
||||
|
||||
logger.info(f"Created vocab unit {lu.id} with {len(words)} words")
|
||||
|
||||
return {
|
||||
@@ -347,6 +354,26 @@ async def api_bulk_import(payload: BulkImportPayload):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@router.post("/enrich-images")
|
||||
async def api_enrich_images(word_ids: List[str] = None):
|
||||
"""Fetch and store images for vocabulary words (Wikipedia + emoji fallback)."""
|
||||
from services.image_service import enrich_words_with_images
|
||||
from vocabulary.db import get_pool
|
||||
import uuid as _uuid
|
||||
|
||||
if not word_ids:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("SELECT id FROM vocabulary_words WHERE image_url = '' OR image_url IS NULL")
|
||||
word_ids = [str(r["id"]) for r in rows]
|
||||
|
||||
if not word_ids:
|
||||
return {"enriched": 0, "message": "All words already have images"}
|
||||
|
||||
count = await enrich_words_with_images(word_ids)
|
||||
return {"enriched": count, "total": len(word_ids)}
|
||||
|
||||
|
||||
class TranslateRequest(BaseModel):
|
||||
word_ids: List[str]
|
||||
target_language: str
|
||||
|
||||
Reference in New Issue
Block a user