feat(tts): add Edge TTS (Microsoft Neural Voices) as primary engine with Piper fallback
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 32s
CI/CD / test-python-backend-compliance (push) Successful in 31s
CI/CD / test-python-document-crawler (push) Successful in 21s
CI/CD / test-python-dsms-gateway (push) Successful in 20s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped

Edge TTS provides near-human quality voices (de-DE-ConradNeural, en-US-GuyNeural).
Falls back to Piper TTS when Edge TTS is unavailable (e.g. no internet).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-20 16:13:10 +01:00
parent 4f6ac9b23a
commit df5b6d69ef
2 changed files with 38 additions and 6 deletions

View File

@@ -136,11 +136,31 @@ TTS_CACHE_DIR = "/tmp/tts-cache"
os.makedirs(TTS_CACHE_DIR, exist_ok=True) os.makedirs(TTS_CACHE_DIR, exist_ok=True)
EDGE_TTS_VOICES = {
"de": "de-DE-ConradNeural",
"en": "en-US-GuyNeural",
}
async def _edge_tts_synthesize(text: str, language: str, output_path: str) -> bool:
"""Synthesize using Edge TTS (Microsoft Neural Voices). Returns True on success."""
try:
import edge_tts
voice = EDGE_TTS_VOICES.get(language, EDGE_TTS_VOICES["de"])
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output_path)
return True
except Exception as e:
logger.warning(f"Edge TTS failed, falling back to Piper: {e}")
return False
@app.post("/synthesize-direct") @app.post("/synthesize-direct")
async def synthesize_direct(req: SynthesizeDirectRequest): async def synthesize_direct(req: SynthesizeDirectRequest):
"""Synthesize text and return MP3 audio directly (no MinIO upload). """Synthesize text and return MP3 audio directly (no MinIO upload).
Used by the pitch-deck presenter for real-time TTS playback. Uses Edge TTS (Microsoft Neural Voices) for high-quality speech.
Falls back to Piper TTS if Edge TTS is unavailable (e.g. no internet).
Includes disk caching so identical text is only synthesized once. Includes disk caching so identical text is only synthesized once.
""" """
if not req.text.strip(): if not req.text.strip():
@@ -158,7 +178,19 @@ async def synthesize_direct(req: SynthesizeDirectRequest):
headers={"X-TTS-Cache": "hit"}, headers={"X-TTS-Cache": "hit"},
) )
# Select TTS engine based on language # Try Edge TTS first (high quality neural voices)
success = await _edge_tts_synthesize(req.text, req.language, cache_path)
if success and os.path.exists(cache_path):
size = os.path.getsize(cache_path)
logger.info(f"Edge TTS ({req.language}): {len(req.text)} chars, {size} bytes, cached as {text_hash}")
return FileResponse(
cache_path,
media_type="audio/mpeg",
headers={"X-TTS-Cache": "miss", "X-TTS-Engine": "edge"},
)
# Fallback: Piper TTS
engine = tts engine = tts
if req.language == "en" and tts_en is not None: if req.language == "en" and tts_en is not None:
engine = tts_en engine = tts_en
@@ -166,18 +198,17 @@ async def synthesize_direct(req: SynthesizeDirectRequest):
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
try: try:
mp3_path, duration = engine.synthesize_to_mp3(req.text, tmpdir) mp3_path, duration = engine.synthesize_to_mp3(req.text, tmpdir)
# Copy to cache
import shutil import shutil
shutil.copy2(mp3_path, cache_path) shutil.copy2(mp3_path, cache_path)
logger.info(f"TTS synthesized ({req.language}): {len(req.text)} chars, {duration:.1f}s, cached as {text_hash}") logger.info(f"Piper TTS ({req.language}): {len(req.text)} chars, {duration:.1f}s, cached as {text_hash}")
except Exception as e: except Exception as e:
logger.error(f"Direct synthesis failed: {e}") logger.error(f"TTS synthesis failed: {e}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
return FileResponse( return FileResponse(
cache_path, cache_path,
media_type="audio/mpeg", media_type="audio/mpeg",
headers={"X-TTS-Cache": "miss", "X-TTS-Duration": str(round(duration, 2))}, headers={"X-TTS-Cache": "miss", "X-TTS-Engine": "piper"},
) )

View File

@@ -3,3 +3,4 @@ uvicorn[standard]==0.27.1
boto3==1.34.25 boto3==1.34.25
python-multipart==0.0.6 python-multipart==0.0.6
pydantic==2.6.1 pydantic==2.6.1
edge-tts==6.1.12