breakpilot-lehrer/backend-lehrer/services/audio.py

"""
Audio Service — Generates TTS audio for vocabulary words.

Uses the Piper TTS service (compliance-tts-service, MIT license)
for high-quality German (Thorsten) and English (Lessac) voices.
Falls back to a placeholder response if TTS service is unavailable.

Audio files are cached — generated once, served forever.
"""

import hashlib
import logging
import os
from typing import Optional

import httpx

logger = logging.getLogger(__name__)

# Piper TTS service (runs in compliance stack)
TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095")

# Local cache directory for generated audio
AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache")


def _ensure_cache_dir():
    os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)


def _cache_key(text: str, language: str) -> str:
    """Generate a deterministic cache key for text + language."""
    h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16]
    return f"{language}_{h}"


def _cache_path(text: str, language: str) -> str:
    """Full path to cached MP3 file."""
    _ensure_cache_dir()
    return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3")


async def synthesize_word(
    text: str,
    language: str = "de",
    word_id: str = "",
) -> Optional[str]:
    """
    Generate TTS audio for a word or short phrase.

    Returns the file path to the cached MP3, or None on error.
    Uses Piper TTS service (compliance-tts-service).
    """
    # Check cache first
    cached = _cache_path(text, language)
    if os.path.exists(cached):
        return cached

    # Call Piper TTS service via /synthesize-direct (returns MP3, selects language correctly)
    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            resp = await client.post(
                f"{TTS_SERVICE_URL}/synthesize-direct",
                json={
                    "text": text,
                    "language": language,
                },
            )
            if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"):
                with open(cached, "wb") as f:
                    f.write(resp.content)
                logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}")
                return cached
    except Exception as e:
        logger.debug(f"TTS direct fallback also failed: {e}")

    return None


async def get_or_generate_audio(
    text: str, language: str = "de", word_id: str = "",
) -> Optional[bytes]:
    """
    Get audio bytes for a word. Returns MP3 bytes or None.
    Generates via TTS if not cached.
    """
    path = await synthesize_word(text, language, word_id)
    if path and os.path.exists(path):
        with open(path, "rb") as f:
            return f.read()
    return None