Switch AudioButton to Piper TTS (Thorsten/Lessac voices)
AudioButton now tries Piper TTS via /api/vocabulary/tts endpoint first, falls back to Browser Web Speech API if unavailable. Backend: New GET /api/vocabulary/tts?text=...&lang=de endpoint. audio_service.py: Fixed presigned URL flow for MinIO download. This gives the same high-quality voice as the Investor Agent in the pitch deck (Thorsten DE / Lessac EN, MIT license). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -74,16 +74,24 @@ async def synthesize_word(
|
||||
return None
|
||||
|
||||
data = resp.json()
|
||||
audio_url = data.get("audio_url") or data.get("presigned_url")
|
||||
bucket = data.get("bucket")
|
||||
object_key = data.get("object_key")
|
||||
|
||||
if audio_url:
|
||||
# Download the audio file
|
||||
audio_resp = await client.get(audio_url)
|
||||
if audio_resp.status_code == 200:
|
||||
with open(cached, "wb") as f:
|
||||
f.write(audio_resp.content)
|
||||
logger.info(f"TTS cached: '{text}' ({language}) → {cached}")
|
||||
return cached
|
||||
if bucket and object_key:
|
||||
# Get presigned URL to download the audio
|
||||
url_resp = await client.post(
|
||||
f"{TTS_SERVICE_URL}/presigned-url",
|
||||
json={"bucket": bucket, "object_key": object_key, "expires": 300},
|
||||
)
|
||||
if url_resp.status_code == 200:
|
||||
audio_url = url_resp.json().get("url")
|
||||
if audio_url:
|
||||
audio_resp = await client.get(audio_url)
|
||||
if audio_resp.status_code == 200:
|
||||
with open(cached, "wb") as f:
|
||||
f.write(audio_resp.content)
|
||||
logger.info(f"TTS cached: '{text}' ({language}) → {cached}")
|
||||
return cached
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"TTS service unavailable: {e}")
|
||||
|
||||
@@ -161,6 +161,22 @@ async def api_get_syllable_audio(word_id: str, lang: str = "en"):
|
||||
return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")
|
||||
|
||||
|
||||
@router.get("/tts")
|
||||
async def api_tts(text: str = Query("", min_length=1), lang: str = Query("de")):
|
||||
"""Text-to-Speech endpoint. Returns MP3 audio for any text.
|
||||
|
||||
Uses Piper TTS (Thorsten DE / Lessac EN). Cached by text+lang.
|
||||
"""
|
||||
from fastapi.responses import Response as FastAPIResponse
|
||||
from services.audio import get_or_generate_audio
|
||||
|
||||
audio_bytes = await get_or_generate_audio(text, language=lang)
|
||||
if not audio_bytes:
|
||||
raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")
|
||||
|
||||
return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Learning Unit Creation from Word Selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'use client'
|
||||
|
||||
import React, { useCallback, useState } from 'react'
|
||||
import React, { useCallback, useRef, useState } from 'react'
|
||||
|
||||
interface AudioButtonProps {
|
||||
text: string
|
||||
@@ -9,47 +9,65 @@ interface AudioButtonProps {
|
||||
size?: 'sm' | 'md' | 'lg'
|
||||
}
|
||||
|
||||
/**
|
||||
* AudioButton — plays TTS audio for a word or phrase.
|
||||
*
|
||||
* Priority: Piper TTS (Thorsten DE / Lessac EN) via backend API.
|
||||
* Fallback: Browser Web Speech API if Piper is unavailable.
|
||||
*/
|
||||
export function AudioButton({ text, lang, isDark, size = 'md' }: AudioButtonProps) {
|
||||
const [isSpeaking, setIsSpeaking] = useState(false)
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null)
|
||||
|
||||
const speak = useCallback(() => {
|
||||
if (!('speechSynthesis' in window)) return
|
||||
const speak = useCallback(async () => {
|
||||
// Stop if already playing
|
||||
if (isSpeaking) {
|
||||
window.speechSynthesis.cancel()
|
||||
audioRef.current?.pause()
|
||||
window.speechSynthesis?.cancel()
|
||||
setIsSpeaking(false)
|
||||
return
|
||||
}
|
||||
|
||||
const utterance = new SpeechSynthesisUtterance(text)
|
||||
utterance.lang = lang === 'de' ? 'de-DE' : 'en-GB'
|
||||
utterance.rate = 0.9
|
||||
utterance.pitch = 1.0
|
||||
|
||||
// Try to find a good voice
|
||||
const voices = window.speechSynthesis.getVoices()
|
||||
const preferred = voices.find((v) =>
|
||||
v.lang.startsWith(lang === 'de' ? 'de' : 'en') && v.localService
|
||||
) || voices.find((v) => v.lang.startsWith(lang === 'de' ? 'de' : 'en'))
|
||||
if (preferred) utterance.voice = preferred
|
||||
|
||||
utterance.onend = () => setIsSpeaking(false)
|
||||
utterance.onerror = () => setIsSpeaking(false)
|
||||
|
||||
setIsSpeaking(true)
|
||||
window.speechSynthesis.speak(utterance)
|
||||
|
||||
// Try Piper TTS via backend API first
|
||||
try {
|
||||
const url = `/api/vocabulary/tts?text=${encodeURIComponent(text)}&lang=${lang}`
|
||||
const resp = await fetch(url)
|
||||
if (resp.ok && resp.headers.get('content-type')?.startsWith('audio')) {
|
||||
const blob = await resp.blob()
|
||||
const audioUrl = URL.createObjectURL(blob)
|
||||
const audio = new Audio(audioUrl)
|
||||
audioRef.current = audio
|
||||
audio.onended = () => { setIsSpeaking(false); URL.revokeObjectURL(audioUrl) }
|
||||
audio.onerror = () => { setIsSpeaking(false); URL.revokeObjectURL(audioUrl) }
|
||||
await audio.play()
|
||||
return
|
||||
}
|
||||
} catch {
|
||||
// Piper unavailable — fall through to Web Speech API
|
||||
}
|
||||
|
||||
// Fallback: Browser Web Speech API
|
||||
if ('speechSynthesis' in window) {
|
||||
const utterance = new SpeechSynthesisUtterance(text)
|
||||
utterance.lang = lang === 'de' ? 'de-DE' : 'en-GB'
|
||||
utterance.rate = 0.9
|
||||
const voices = window.speechSynthesis.getVoices()
|
||||
const preferred = voices.find((v) =>
|
||||
v.lang.startsWith(lang === 'de' ? 'de' : 'en') && v.localService
|
||||
) || voices.find((v) => v.lang.startsWith(lang === 'de' ? 'de' : 'en'))
|
||||
if (preferred) utterance.voice = preferred
|
||||
utterance.onend = () => setIsSpeaking(false)
|
||||
utterance.onerror = () => setIsSpeaking(false)
|
||||
window.speechSynthesis.speak(utterance)
|
||||
} else {
|
||||
setIsSpeaking(false)
|
||||
}
|
||||
}, [text, lang, isSpeaking])
|
||||
|
||||
const sizeClasses = {
|
||||
sm: 'w-7 h-7',
|
||||
md: 'w-9 h-9',
|
||||
lg: 'w-11 h-11',
|
||||
}
|
||||
|
||||
const iconSizes = {
|
||||
sm: 'w-3.5 h-3.5',
|
||||
md: 'w-4 h-4',
|
||||
lg: 'w-5 h-5',
|
||||
}
|
||||
const sizeClasses = { sm: 'w-7 h-7', md: 'w-9 h-9', lg: 'w-11 h-11' }
|
||||
const iconSizes = { sm: 'w-3.5 h-3.5', md: 'w-4 h-4', lg: 'w-5 h-5' }
|
||||
|
||||
return (
|
||||
<button
|
||||
|
||||
Reference in New Issue
Block a user