breakpilot-lehrer/backend-lehrer/services/audio.py

"""
Audio Service — Generates TTS audio for vocabulary words.

Uses the Piper TTS service (compliance-tts-service, MIT license)
for high-quality German (Thorsten) and English (Lessac) voices.
Falls back to a placeholder response if TTS service is unavailable.

Audio files are cached — generated once, served forever.
"""

import hashlib
import logging
import os
from typing import Optional

import httpx

logger = logging.getLogger(__name__)

# Piper TTS service (runs in compliance stack)
TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095")

# Local cache directory for generated audio
AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache")

# Abbreviations expanded before TTS (so the speaker says the full word)
_TTS_EXPANSIONS = {
    "sth.": "something",
    "sth": "something",
    "sb.": "somebody",
    "sb": "somebody",
    "smth.": "something",
    "smb.": "somebody",
    "sbd.": "somebody",
    "etc.": "etcetera",
    "e.g.": "for example",
    "i.e.": "that is",
    "esp.": "especially",
    "approx.": "approximately",
    "vs.": "versus",
    "nr.": "number",
    "no.": "number",
    "p.": "page",
    "adj.": "adjective",
    "adv.": "adverb",
    "prep.": "preposition",
    "pron.": "pronoun",
    "pl.": "plural",
    "sg.": "singular",
    "syn.": "synonym",
    "ant.": "antonym",
    # DE
    "usw.": "und so weiter",
    "bzw.": "beziehungsweise",
    "z.B.": "zum Beispiel",
    "d.h.": "das heisst",
    "vgl.": "vergleiche",
    "ca.": "circa",
    "evtl.": "eventuell",
    "ggf.": "gegebenenfalls",
}


def _expand_abbreviations(text: str) -> str:
    """Expand abbreviations so TTS speaks the full word."""
    import re
    for abbr, full in _TTS_EXPANSIONS.items():
        # Word-boundary aware replacement (case-insensitive)
        pattern = re.escape(abbr)
        text = re.sub(rf'\b{pattern}', full, text, flags=re.IGNORECASE)
    return text


def _ensure_cache_dir():
    os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)


def _cache_key(text: str, language: str) -> str:
    """Generate a deterministic cache key for text + language."""
    h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16]
    return f"{language}_{h}"


def _cache_path(text: str, language: str) -> str:
    """Full path to cached MP3 file."""
    _ensure_cache_dir()
    return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3")


async def synthesize_word(
    text: str,
    language: str = "de",
    word_id: str = "",
) -> Optional[str]:
    """
    Generate TTS audio for a word or short phrase.

    Returns the file path to the cached MP3, or None on error.
    Uses Piper TTS service (compliance-tts-service).
    """
    # Check cache first
    cached = _cache_path(text, language)
    if os.path.exists(cached):
        return cached

    # Expand abbreviations before speaking
    speak_text = _expand_abbreviations(text)

    # Call Piper TTS service via /synthesize-direct (returns MP3, selects language correctly)
    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            resp = await client.post(
                f"{TTS_SERVICE_URL}/synthesize-direct",
                json={
                    "text": speak_text,
                    "language": language,
                },
            )
            if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"):
                with open(cached, "wb") as f:
                    f.write(resp.content)
                logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}")
                return cached
    except Exception as e:
        logger.debug(f"TTS direct fallback also failed: {e}")

    return None


async def get_or_generate_audio(
    text: str, language: str = "de", word_id: str = "",
) -> Optional[bytes]:
    """
    Get audio bytes for a word. Returns MP3 bytes or None.
    Generates via TTS if not cached.
    """
    path = await synthesize_word(text, language, word_id)
    if path and os.path.exists(path):
        with open(path, "rb") as f:
            return f.read()
    return None