diff --git a/docker-compose.yml b/docker-compose.yml index cfb0ad6..fed4330 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -836,6 +836,7 @@ services: LITELLM_URL: ${LITELLM_URL:-https://llm-dev.meghsakha.com} LITELLM_MODEL: ${LITELLM_MODEL:-gpt-oss-120b} LITELLM_API_KEY: ${LITELLM_API_KEY:-sk-0nAyxaMVbIqmz_ntnndzag} + TTS_SERVICE_URL: http://bp-compliance-tts:8095 extra_hosts: - "host.docker.internal:host-gateway" depends_on: diff --git a/pitch-deck/app/api/presenter/tts/route.ts b/pitch-deck/app/api/presenter/tts/route.ts new file mode 100644 index 0000000..bd8f67f --- /dev/null +++ b/pitch-deck/app/api/presenter/tts/route.ts @@ -0,0 +1,46 @@ +import { NextRequest, NextResponse } from 'next/server' + +const TTS_SERVICE_URL = process.env.TTS_SERVICE_URL || 'http://compliance-tts-service:8095' + +export async function POST(request: NextRequest) { + try { + const body = await request.json() + const { text, language = 'de' } = body + + if (!text || typeof text !== 'string') { + return NextResponse.json({ error: 'Text is required' }, { status: 400 }) + } + + const res = await fetch(`${TTS_SERVICE_URL}/synthesize-direct`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text, language }), + signal: AbortSignal.timeout(30000), + }) + + if (!res.ok) { + const errorText = await res.text() + console.error('TTS service error:', res.status, errorText) + return NextResponse.json( + { error: `TTS service error (${res.status})` }, + { status: 502 } + ) + } + + const audioBuffer = await res.arrayBuffer() + + return new NextResponse(audioBuffer, { + headers: { + 'Content-Type': 'audio/mpeg', + 'Cache-Control': 'public, max-age=86400', // Cache 24h — texts are static + 'X-TTS-Cache': res.headers.get('X-TTS-Cache') || 'unknown', + }, + }) + } catch (error) { + console.error('TTS proxy error:', error) + return NextResponse.json( + { error: 'TTS service not reachable' }, + { status: 503 } + ) + } +} diff --git a/pitch-deck/lib/hooks/usePresenterMode.ts b/pitch-deck/lib/hooks/usePresenterMode.ts index 32ce868..d002cbd 100644 --- a/pitch-deck/lib/hooks/usePresenterMode.ts +++ b/pitch-deck/lib/hooks/usePresenterMode.ts @@ -11,7 +11,6 @@ interface UsePresenterModeConfig { currentSlide: number totalSlides: number language: Language - speechRate?: number // 0.5–2.0, default 1.0 ttsEnabled?: boolean // default true } @@ -32,12 +31,21 @@ interface UsePresenterModeReturn { setTtsEnabled: (enabled: boolean) => void } +// Client-side audio cache: text hash → blob URL +const audioCache = new Map() + +async function hashText(text: string): Promise { + const encoder = new TextEncoder() + const data = encoder.encode(text) + const hash = await crypto.subtle.digest('SHA-256', data) + return Array.from(new Uint8Array(hash)).slice(0, 8).map(b => b.toString(16).padStart(2, '0')).join('') +} + export function usePresenterMode({ goToSlide, currentSlide, totalSlides, language, - speechRate = 1.0, ttsEnabled: initialTtsEnabled = true, }: UsePresenterModeConfig): UsePresenterModeReturn { const [state, setState] = useState('idle') @@ -50,45 +58,42 @@ export function usePresenterMode({ const slideIndexRef = useRef(currentSlide) const paragraphIndexRef = useRef(0) const stateRef = useRef('idle') - const utteranceRef = useRef(null) - const voicesRef = useRef([]) + const audioRef = useRef(null) + const abortRef = useRef(null) // Refs for recursive functions to avoid circular useCallback dependencies const advanceRef = useRef<() => void>(() => {}) const speakAndAdvanceRef = useRef<(text: string, pauseAfter: number, onDone: () => void) => void>(() => {}) - // Initialize Web Speech API voices + // Check TTS service availability on mount useEffect(() => { - if (typeof window === 'undefined' || !window.speechSynthesis) return - setTtsAvailable(true) - - const loadVoices = () => { - voicesRef.current = window.speechSynthesis.getVoices() - } - loadVoices() - window.speechSynthesis.addEventListener('voiceschanged', loadVoices) - return () => { - window.speechSynthesis.removeEventListener('voiceschanged', loadVoices) - } - }, []) - - const getVoice = useCallback((lang: Language): SpeechSynthesisVoice | null => { - const voices = voicesRef.current - if (!voices.length) return null - const langCode = lang === 'de' ? 'de' : 'en' - // Prefer high-quality voices - const premium = voices.find(v => - v.lang.startsWith(langCode) && /premium|enhanced|neural|google|microsoft/i.test(v.name) - ) - if (premium) return premium - return voices.find(v => v.lang.startsWith(langCode)) || null + fetch('/api/presenter/tts', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text: 'Test', language: 'de' }), + signal: AbortSignal.timeout(5000), + }) + .then(res => { + setTtsAvailable(res.ok) + if (res.ok) console.log('Piper TTS available') + else console.warn('Piper TTS not available:', res.status) + }) + .catch(() => { + setTtsAvailable(false) + console.warn('Piper TTS service not reachable') + }) }, []) const cancelSpeech = useCallback(() => { - if (typeof window !== 'undefined' && window.speechSynthesis) { - window.speechSynthesis.cancel() + if (audioRef.current) { + audioRef.current.pause() + audioRef.current.currentTime = 0 + audioRef.current = null + } + if (abortRef.current) { + abortRef.current.abort() + abortRef.current = null } - utteranceRef.current = null setIsSpeaking(false) }, []) @@ -123,48 +128,79 @@ export function usePresenterMode({ // Update speakAndAdvance ref whenever dependencies change useEffect(() => { speakAndAdvanceRef.current = (text: string, pauseAfter: number, onDone: () => void) => { - const canSpeak = ttsAvailable && ttsEnabled && typeof window !== 'undefined' - - if (canSpeak) { - // Chrome bug: speechSynthesis can get stuck - window.speechSynthesis.cancel() - - const utterance = new SpeechSynthesisUtterance(text) - const voice = getVoice(language) - if (voice) utterance.voice = voice - utterance.lang = language === 'de' ? 'de-DE' : 'en-US' - utterance.rate = speechRate - utterance.pitch = 1.0 - - const handleEnd = () => { - setIsSpeaking(false) - utteranceRef.current = null - if (pauseAfter > 0) { - timerRef.current = setTimeout(onDone, pauseAfter) - } else { - onDone() - } - } - - utterance.onstart = () => setIsSpeaking(true) - utterance.onend = handleEnd - utterance.onerror = (e) => { - if (e.error !== 'canceled') console.warn('TTS error:', e.error) - setIsSpeaking(false) - utteranceRef.current = null - handleEnd() - } - - utteranceRef.current = utterance - window.speechSynthesis.speak(utterance) - } else { + if (!ttsAvailable || !ttsEnabled) { // No TTS — use word-count-based timer const wordCount = text.split(/\s+/).length const readingTime = Math.max(wordCount * 150, 2000) timerRef.current = setTimeout(onDone, readingTime + pauseAfter) + return } + + // Piper TTS via API + setIsSpeaking(true) + const controller = new AbortController() + abortRef.current = controller + + const playAudio = async () => { + try { + const key = await hashText(text + language) + let blobUrl = audioCache.get(key) + + if (!blobUrl) { + const res = await fetch('/api/presenter/tts', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text, language }), + signal: controller.signal, + }) + + if (!res.ok) throw new Error(`TTS error: ${res.status}`) + + const blob = await res.blob() + blobUrl = URL.createObjectURL(blob) + audioCache.set(key, blobUrl) + } + + if (controller.signal.aborted) return + + const audio = new Audio(blobUrl) + audioRef.current = audio + + audio.onended = () => { + setIsSpeaking(false) + audioRef.current = null + if (pauseAfter > 0) { + timerRef.current = setTimeout(onDone, pauseAfter) + } else { + onDone() + } + } + + audio.onerror = () => { + console.warn('Audio playback error') + setIsSpeaking(false) + audioRef.current = null + // Fallback to timer + const wordCount = text.split(/\s+/).length + const readingTime = Math.max(wordCount * 150, 2000) + timerRef.current = setTimeout(onDone, readingTime + pauseAfter) + } + + await audio.play() + } catch (err: any) { + if (err.name === 'AbortError') return + console.warn('TTS fetch error:', err) + setIsSpeaking(false) + // Fallback to timer + const wordCount = text.split(/\s+/).length + const readingTime = Math.max(wordCount * 150, 2000) + timerRef.current = setTimeout(onDone, readingTime + pauseAfter) + } + } + + playAudio() } - }, [ttsAvailable, ttsEnabled, language, speechRate, getVoice]) + }, [ttsAvailable, ttsEnabled, language]) // Update advancePresentation ref whenever dependencies change useEffect(() => { @@ -342,18 +378,6 @@ export function usePresenterMode({ } }, [clearTimer, cancelSpeech]) - // Chrome workaround: speechSynthesis pauses after ~15s without interaction - useEffect(() => { - if (state !== 'presenting' || !ttsEnabled || !ttsAvailable) return - const keepAlive = setInterval(() => { - if (typeof window !== 'undefined' && window.speechSynthesis?.speaking) { - window.speechSynthesis.pause() - window.speechSynthesis.resume() - } - }, 10000) - return () => clearInterval(keepAlive) - }, [state, ttsEnabled, ttsAvailable]) - return { state, currentParagraph,