Restructure: Move final 16 root files into packages (backend-lehrer)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 37s
CI / test-go-edu-search (push) Successful in 35s
CI / test-python-klausur (push) Failing after 2m41s
CI / test-python-agent-core (push) Successful in 30s
CI / test-nodejs-website (push) Successful in 38s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 37s
CI / test-go-edu-search (push) Successful in 35s
CI / test-python-klausur (push) Failing after 2m41s
CI / test-python-agent-core (push) Successful in 30s
CI / test-nodejs-website (push) Successful in 38s
classroom/ (+2): state_engine_api, state_engine_models vocabulary/ (2): api, db worksheets/ (2): api, models services/ (+6): audio, email, translation, claude_vision, ai_processor, story_generator api/ (4): school, klausur_proxy, progress, user_language Only main.py + config.py remain at root. 16 shims added. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,179 +1,4 @@
|
||||
"""
|
||||
Translation Service — Batch-translates vocabulary words into target languages.
|
||||
|
||||
Uses Ollama (local LLM) to translate EN/DE word pairs into TR, AR, UK, RU, PL.
|
||||
Translations are cached in vocabulary_words.translations JSONB field.
|
||||
|
||||
All processing happens locally — no external API calls, GDPR-compliant.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
|
||||
TRANSLATION_MODEL = os.getenv("TRANSLATION_MODEL", "qwen3:30b-a3b")
|
||||
|
||||
LANGUAGE_NAMES = {
|
||||
"tr": "Turkish",
|
||||
"ar": "Arabic",
|
||||
"uk": "Ukrainian",
|
||||
"ru": "Russian",
|
||||
"pl": "Polish",
|
||||
"fr": "French",
|
||||
"es": "Spanish",
|
||||
}
|
||||
|
||||
|
||||
async def translate_words_batch(
|
||||
words: List[Dict[str, str]],
|
||||
target_language: str,
|
||||
batch_size: int = 30,
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Translate a batch of EN/DE word pairs into a target language.
|
||||
|
||||
Args:
|
||||
words: List of dicts with 'english' and 'german' keys
|
||||
target_language: ISO 639-1 code (tr, ar, uk, ru, pl)
|
||||
batch_size: Words per LLM request
|
||||
|
||||
Returns:
|
||||
List of dicts with 'english', 'translation', 'example' keys
|
||||
"""
|
||||
lang_name = LANGUAGE_NAMES.get(target_language, target_language)
|
||||
all_translations = []
|
||||
|
||||
for i in range(0, len(words), batch_size):
|
||||
batch = words[i:i + batch_size]
|
||||
word_list = "\n".join(
|
||||
f"{j+1}. {w['english']} = {w.get('german', '')}"
|
||||
for j, w in enumerate(batch)
|
||||
)
|
||||
|
||||
prompt = f"""Translate these English/German word pairs into {lang_name}.
|
||||
For each word, provide the translation and a short example sentence in {lang_name}.
|
||||
|
||||
Words:
|
||||
{word_list}
|
||||
|
||||
Reply ONLY with a JSON array, no explanation:
|
||||
[
|
||||
{{"english": "word", "translation": "...", "example": "..."}},
|
||||
...
|
||||
]"""
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(
|
||||
f"{OLLAMA_BASE_URL}/api/generate",
|
||||
json={
|
||||
"model": TRANSLATION_MODEL,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.2, "num_predict": 4096},
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
response_text = resp.json().get("response", "")
|
||||
|
||||
# Parse JSON from response
|
||||
import re
|
||||
match = re.search(r'\[[\s\S]*\]', response_text)
|
||||
if match:
|
||||
batch_translations = json.loads(match.group())
|
||||
all_translations.extend(batch_translations)
|
||||
logger.info(
|
||||
f"Translated batch {i//batch_size + 1}: "
|
||||
f"{len(batch_translations)} words → {lang_name}"
|
||||
)
|
||||
else:
|
||||
logger.warning(f"No JSON array in LLM response for {lang_name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Translation batch failed ({lang_name}): {e}")
|
||||
|
||||
return all_translations
|
||||
|
||||
|
||||
async def translate_and_store(
|
||||
word_ids: List[str],
|
||||
target_language: str,
|
||||
) -> int:
|
||||
"""
|
||||
Translate vocabulary words and store in the database.
|
||||
|
||||
Fetches words from DB, translates via LLM, stores in translations JSONB.
|
||||
Skips words that already have a translation for the target language.
|
||||
|
||||
Returns count of newly translated words.
|
||||
"""
|
||||
from vocabulary_db import get_pool
|
||||
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Fetch words that need translation
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id, english, german, translations
|
||||
FROM vocabulary_words
|
||||
WHERE id = ANY($1::uuid[])
|
||||
""",
|
||||
[__import__('uuid').UUID(wid) for wid in word_ids],
|
||||
)
|
||||
|
||||
words_to_translate = []
|
||||
word_map = {}
|
||||
for row in rows:
|
||||
translations = row["translations"] or {}
|
||||
if isinstance(translations, str):
|
||||
translations = json.loads(translations)
|
||||
if target_language not in translations:
|
||||
words_to_translate.append({
|
||||
"english": row["english"],
|
||||
"german": row["german"],
|
||||
})
|
||||
word_map[row["english"].lower()] = str(row["id"])
|
||||
|
||||
if not words_to_translate:
|
||||
logger.info(f"All {len(rows)} words already translated to {target_language}")
|
||||
return 0
|
||||
|
||||
# Translate
|
||||
results = await translate_words_batch(words_to_translate, target_language)
|
||||
|
||||
# Store results
|
||||
updated = 0
|
||||
async with pool.acquire() as conn:
|
||||
for result in results:
|
||||
en = result.get("english", "").lower()
|
||||
word_id = word_map.get(en)
|
||||
if not word_id:
|
||||
continue
|
||||
|
||||
translation = result.get("translation", "")
|
||||
example = result.get("example", "")
|
||||
if not translation:
|
||||
continue
|
||||
|
||||
await conn.execute(
|
||||
"""
|
||||
UPDATE vocabulary_words
|
||||
SET translations = translations || $1::jsonb
|
||||
WHERE id = $2
|
||||
""",
|
||||
json.dumps({target_language: {
|
||||
"text": translation,
|
||||
"example": example,
|
||||
}}),
|
||||
__import__('uuid').UUID(word_id),
|
||||
)
|
||||
updated += 1
|
||||
|
||||
logger.info(f"Stored {updated} translations for {target_language}")
|
||||
return updated
|
||||
# Backward-compat shim -- module moved to services/translation.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("services.translation")
|
||||
|
||||
Reference in New Issue
Block a user