Add image service: Wikipedia photos + emoji fallback for vocabulary
image_service.py: Fetches thumbnail from Wikipedia REST API (free, no account). Falls back to emoji for abstract words (40+ mapped). Auto-enrichment: When a learning unit is created, images are automatically fetched for all words that don't have one yet. Manual endpoint: POST /api/vocabulary/enrich-images fills images for existing words without images. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
115
backend-lehrer/services/image_service.py
Normal file
115
backend-lehrer/services/image_service.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
"""
|
||||||
|
Image Service — Fetches vocabulary images from Wikipedia + Emoji fallback.
|
||||||
|
|
||||||
|
On-demand: Images are fetched when a learning unit is created,
|
||||||
|
then cached in the vocabulary_words.image_url field.
|
||||||
|
|
||||||
|
Sources (in priority order):
|
||||||
|
1. Wikipedia REST API (free, no account needed, CC license)
|
||||||
|
2. Emoji fallback for abstract words
|
||||||
|
|
||||||
|
Later: Unsplash API (needs account), Stable Diffusion (local batch)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Emoji map for common abstract words that don't have good photos
|
||||||
|
EMOJI_FALLBACK: dict[str, str] = {
|
||||||
|
"strong": "💪", "weak": "😩", "hard-working": "📚", "skinny": "🦴",
|
||||||
|
"female": "👩", "male": "👨", "definite": "✅", "definitely": "✅",
|
||||||
|
"even": "⚖️", "violent": "⚡", "opinion": "💭", "message": "💬",
|
||||||
|
"beginning": "🏁", "mention": "🗣️", "summarize": "📋", "mark": "✏️",
|
||||||
|
"throw": "🤾", "take": "🤲", "sum": "➕", "on the one hand": "👐",
|
||||||
|
"apple": "🍎", "gym": "🏋️", "medal": "🏅", "sportswoman": "🏃♀️",
|
||||||
|
"role model": "⭐", "tourist office": "🏨", "the olympics": "🏅",
|
||||||
|
"box": "🥊", "football": "⚽", "footballer": "⚽",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_wikipedia_image(word: str) -> Optional[str]:
|
||||||
|
"""Fetch thumbnail image URL from Wikipedia for a word."""
|
||||||
|
# Clean word for Wikipedia lookup
|
||||||
|
query = word.split(",")[0].strip() # "throw, threw, thrown" → "throw"
|
||||||
|
query = query.replace("sth.", "").replace("sb.", "").strip()
|
||||||
|
if query.startswith("the "):
|
||||||
|
query = query[4:]
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||||
|
resp = await client.get(
|
||||||
|
f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}",
|
||||||
|
headers={"User-Agent": "BreakPilot/1.0 (education platform)"},
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
thumb = data.get("thumbnail", {})
|
||||||
|
url = thumb.get("source")
|
||||||
|
if url:
|
||||||
|
logger.info(f"Wikipedia image for '{word}': {url}")
|
||||||
|
return url
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Wikipedia image lookup failed for '{word}': {e}")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_emoji_for_word(word: str) -> str:
|
||||||
|
"""Get an emoji representation for a word."""
|
||||||
|
lower = word.lower()
|
||||||
|
for key, emoji in EMOJI_FALLBACK.items():
|
||||||
|
if key in lower:
|
||||||
|
return emoji
|
||||||
|
# Generic fallback by part of speech could be added here
|
||||||
|
return "📝"
|
||||||
|
|
||||||
|
|
||||||
|
async def get_image_for_word(word: str) -> str:
|
||||||
|
"""Get the best available image for a vocabulary word.
|
||||||
|
|
||||||
|
Returns a URL (Wikipedia) or emoji string.
|
||||||
|
Result should be stored in vocabulary_words.image_url.
|
||||||
|
"""
|
||||||
|
# Try Wikipedia first
|
||||||
|
url = await fetch_wikipedia_image(word)
|
||||||
|
if url:
|
||||||
|
return url
|
||||||
|
|
||||||
|
# Fallback to emoji
|
||||||
|
return get_emoji_for_word(word)
|
||||||
|
|
||||||
|
|
||||||
|
async def enrich_words_with_images(word_ids: list[str]) -> int:
|
||||||
|
"""Fetch and store images for vocabulary words that don't have one yet."""
|
||||||
|
from vocabulary.db import get_pool
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
pool = await get_pool()
|
||||||
|
updated = 0
|
||||||
|
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT id, english, image_url FROM vocabulary_words WHERE id = ANY($1::uuid[])",
|
||||||
|
[uuid.UUID(wid) for wid in word_ids],
|
||||||
|
)
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
if row["image_url"]:
|
||||||
|
continue # Already has an image
|
||||||
|
|
||||||
|
image = await get_image_for_word(row["english"])
|
||||||
|
if image:
|
||||||
|
await conn.execute(
|
||||||
|
"UPDATE vocabulary_words SET image_url = $1 WHERE id = $2",
|
||||||
|
image, row["id"],
|
||||||
|
)
|
||||||
|
updated += 1
|
||||||
|
logger.info(f"Image for '{row['english']}': {image[:60]}...")
|
||||||
|
|
||||||
|
logger.info(f"Enriched {updated} words with images")
|
||||||
|
return updated
|
||||||
@@ -270,6 +270,13 @@ async def api_create_unit_from_words(payload: CreateUnitFromWordsPayload):
|
|||||||
},
|
},
|
||||||
}, f, ensure_ascii=False, indent=2)
|
}, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
# Auto-enrich words with images (Wikipedia + emoji fallback)
|
||||||
|
try:
|
||||||
|
from services.image_service import enrich_words_with_images
|
||||||
|
await enrich_words_with_images(payload.word_ids)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Image enrichment failed (non-critical): {e}")
|
||||||
|
|
||||||
logger.info(f"Created vocab unit {lu.id} with {len(words)} words")
|
logger.info(f"Created vocab unit {lu.id} with {len(words)} words")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -347,6 +354,26 @@ async def api_bulk_import(payload: BulkImportPayload):
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/enrich-images")
|
||||||
|
async def api_enrich_images(word_ids: List[str] = None):
|
||||||
|
"""Fetch and store images for vocabulary words (Wikipedia + emoji fallback)."""
|
||||||
|
from services.image_service import enrich_words_with_images
|
||||||
|
from vocabulary.db import get_pool
|
||||||
|
import uuid as _uuid
|
||||||
|
|
||||||
|
if not word_ids:
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch("SELECT id FROM vocabulary_words WHERE image_url = '' OR image_url IS NULL")
|
||||||
|
word_ids = [str(r["id"]) for r in rows]
|
||||||
|
|
||||||
|
if not word_ids:
|
||||||
|
return {"enriched": 0, "message": "All words already have images"}
|
||||||
|
|
||||||
|
count = await enrich_words_with_images(word_ids)
|
||||||
|
return {"enriched": count, "total": len(word_ids)}
|
||||||
|
|
||||||
|
|
||||||
class TranslateRequest(BaseModel):
|
class TranslateRequest(BaseModel):
|
||||||
word_ids: List[str]
|
word_ids: List[str]
|
||||||
target_language: str
|
target_language: str
|
||||||
|
|||||||
Reference in New Issue
Block a user