feat(presenter): continuous speech — no gaps between paragraphs/slides

- Concatenate all paragraphs + transition hint into one TTS call per slide → natural prosody, zero gaps within a slide - Pre-fetch next slide's audio during current playback → seamless transitions - Advance slide during transition phrase ("Let us look at...") - Pause/resume without destroying audio → instant continue - Subtitle display synced to playback position via timeupdate Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-20 17:02:13 +01:00
parent fa4027d027
commit f126b40574
1 changed files with 251 additions and 245 deletions
--- a/pitch-deck/lib/hooks/usePresenterMode.ts
+++ b/pitch-deck/lib/hooks/usePresenterMode.ts
@@ -1,8 +1,8 @@
 'use client'

 import { useState, useCallback, useRef, useEffect } from 'react'
-import { Language, SlideId } from '../types'
-import { PresenterState } from '../presenter/types'
+import { Language } from '../types'
+import { PresenterState, SlideScript } from '../presenter/types'
 import { PRESENTER_SCRIPT } from '../presenter/presenter-script'
 import { SLIDE_ORDER } from './useSlideNavigation'

@@ -11,7 +11,7 @@ interface UsePresenterModeConfig {
  currentSlide: number
  totalSlides: number
  language: Language
-  ttsEnabled?: boolean // default true
+  ttsEnabled?: boolean
 }

 interface UsePresenterModeReturn {
@@ -31,11 +31,10 @@ interface UsePresenterModeReturn {
  setTtsEnabled: (enabled: boolean) => void
 }

-// Client-side audio cache: text key → blob URL
+// Client-side audio cache: text hash → blob URL
 const audioCache = new Map<string, string>()

 function cacheKey(text: string, lang: string): string {
-  // Simple string hash — no crypto.subtle needed (works on HTTP too)
  let hash = 0
  const str = text + '|' + lang
  for (let i = 0; i < str.length; i++) {
@@ -44,6 +43,77 @@ function cacheKey(text: string, lang: string): string {
  return 'tts_' + (hash >>> 0).toString(36)
 }

+// --- Slide audio plan: concatenates all paragraphs + transition into one text ---
+
+interface AudioSegment {
+  text: string
+  startRatio: number  // 0..1 — where in the audio this segment starts
+  isTransition: boolean
+}
+
+interface SlideAudioPlan {
+  fullText: string
+  segments: AudioSegment[]
+}
+
+function buildSlideAudioPlan(slideIdx: number, lang: Language): SlideAudioPlan | null {
+  const slideId = SLIDE_ORDER[slideIdx]
+  const script = PRESENTER_SCRIPT.find(s => s.slideId === slideId)
+  if (!script || script.paragraphs.length === 0) return null
+
+  const segments: AudioSegment[] = []
+  let totalWords = 0
+  const parts: string[] = []
+
+  for (const para of script.paragraphs) {
+    const text = lang === 'de' ? para.text_de : para.text_en
+    segments.push({ text, startRatio: totalWords, isTransition: false })
+    totalWords += text.split(/\s+/).length
+    parts.push(text)
+  }
+
+  const hint = lang === 'de'
+    ? (script.transition_hint_de || '')
+    : (script.transition_hint_en || '')
+
+  if (hint) {
+    segments.push({ text: hint, startRatio: totalWords, isTransition: true })
+    totalWords += hint.split(/\s+/).length
+    parts.push(hint)
+  }
+
+  // Normalize ratios to 0..1
+  if (totalWords > 0) {
+    for (const s of segments) {
+      s.startRatio /= totalWords
+    }
+  }
+
+  return { fullText: parts.join(' '), segments }
+}
+
+async function fetchAudio(text: string, lang: string, signal?: AbortSignal): Promise<string | null> {
+  const key = cacheKey(text, lang)
+  const cached = audioCache.get(key)
+  if (cached) return cached
+
+  try {
+    const res = await fetch('/api/presenter/tts', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ text, language: lang }),
+      signal,
+    })
+    if (!res.ok) return null
+    const blob = await res.blob()
+    const url = URL.createObjectURL(blob)
+    audioCache.set(key, url)
+    return url
+  } catch {
+    return null
+  }
+}
+
 export function usePresenterMode({
  goToSlide,
  currentSlide,
@@ -57,23 +127,22 @@ export function usePresenterMode({
  const [isSpeaking, setIsSpeaking] = useState(false)
  const [ttsEnabled, setTtsEnabled] = useState(initialTtsEnabled)
  const [ttsAvailable, setTtsAvailable] = useState(false)
-  const timerRef = useRef<NodeJS.Timeout | null>(null)
-  const slideIndexRef = useRef(currentSlide)
-  const paragraphIndexRef = useRef(0)
+
  const stateRef = useRef<PresenterState>('idle')
+  const slideIndexRef = useRef(currentSlide)
  const audioRef = useRef<HTMLAudioElement | null>(null)
  const abortRef = useRef<AbortController | null>(null)
  const audioUnlockedRef = useRef(false)
+  const slideAdvancedRef = useRef(false)
+  const timerRefs = useRef<NodeJS.Timeout[]>([])

-  // Refs for recursive functions to avoid circular useCallback dependencies
-  const advanceRef = useRef<() => void>(() => {})
-  const speakAndAdvanceRef = useRef<(text: string, pauseAfter: number, onDone: () => void) => void>(() => {})
+  // Ref for playSlide — avoids stale closure in audio callbacks
+  const playSlideRef = useRef<(slideIdx: number) => void>(() => {})

-  // Unlock browser audio playback — must be called from a user gesture (click)
+  // Unlock browser audio (must be called from user gesture)
  const unlockAudio = useCallback(() => {
    if (audioUnlockedRef.current) return
    try {
-      // Create and play a silent audio to unlock the Audio API
      const ctx = new (window.AudioContext || (window as any).webkitAudioContext)()
      const buffer = ctx.createBuffer(1, 1, 22050)
      const source = ctx.createBufferSource()
@@ -81,13 +150,10 @@ export function usePresenterMode({
      source.connect(ctx.destination)
      source.start(0)
      audioUnlockedRef.current = true
-      console.log('Audio playback unlocked')
-    } catch (e) {
-      console.warn('Audio unlock failed:', e)
-    }
+    } catch {}
  }, [])

-  // Check TTS service availability on mount
+  // Check TTS availability on mount
  useEffect(() => {
    fetch('/api/presenter/tts', {
      method: 'POST',
@@ -95,27 +161,25 @@ export function usePresenterMode({
      body: JSON.stringify({ text: 'Test', language: 'de' }),
      signal: AbortSignal.timeout(5000),
    })
-      .then(res => {
-        setTtsAvailable(res.ok)
-        if (res.ok) console.log('Piper TTS available')
-        else console.warn('Piper TTS not available:', res.status)
-      })
-      .catch(() => {
-        setTtsAvailable(false)
-        console.warn('Piper TTS service not reachable')
-      })
+      .then(res => setTtsAvailable(res.ok))
+      .catch(() => setTtsAvailable(false))
  }, [])

-  const cancelSpeech = useCallback(() => {
+  // Cancel all audio and timers
+  const cancelAll = useCallback(() => {
    if (audioRef.current) {
      audioRef.current.pause()
-      audioRef.current.currentTime = 0
+      audioRef.current.onended = null
+      audioRef.current.ontimeupdate = null
+      audioRef.current.onerror = null
      audioRef.current = null
    }
    if (abortRef.current) {
      abortRef.current.abort()
      abortRef.current = null
    }
+    for (const t of timerRefs.current) clearTimeout(t)
+    timerRefs.current = []
    setIsSpeaking(false)
  }, [])

@@ -123,259 +187,204 @@ export function usePresenterMode({
  useEffect(() => { slideIndexRef.current = currentSlide }, [currentSlide])
  useEffect(() => { stateRef.current = state }, [state])

-  const clearTimer = useCallback(() => {
-    if (timerRef.current) {
-      clearTimeout(timerRef.current)
-      timerRef.current = null
-    }
-  }, [])
-
-  const getScriptForIndex = useCallback((index: number) => {
-    const slideId = SLIDE_ORDER[index]
-    return PRESENTER_SCRIPT.find(s => s.slideId === slideId)
-  }, [])
-
-  const showParagraph = useCallback((slideIdx: number, paraIdx: number) => {
-    const script = getScriptForIndex(slideIdx)
-    if (!script || paraIdx >= script.paragraphs.length) return null
-
-    const para = script.paragraphs[paraIdx]
-    const text = language === 'de' ? para.text_de : para.text_en
-    setDisplayText(text)
-    setCurrentParagraph(paraIdx)
-    paragraphIndexRef.current = paraIdx
-    return para
-  }, [language, getScriptForIndex])
-
-  // Update speakAndAdvance ref whenever dependencies change
+  // --- Core: play one slide's audio (all paragraphs concatenated) ---
  useEffect(() => {
-    speakAndAdvanceRef.current = (text: string, pauseAfter: number, onDone: () => void) => {
-      if (!ttsAvailable || !ttsEnabled) {
-        // No TTS — use word-count-based timer
-        const wordCount = text.split(/\s+/).length
-        const readingTime = Math.max(wordCount * 150, 2000)
-        timerRef.current = setTimeout(onDone, readingTime + pauseAfter)
-        return
-      }
-
-      // Piper TTS via API
-      setIsSpeaking(true)
-      const controller = new AbortController()
-      abortRef.current = controller
-
-      const playAudio = async () => {
-        try {
-          const key = cacheKey(text, language)
-          let blobUrl = audioCache.get(key)
-
-          if (!blobUrl) {
-            console.log('[TTS] Fetching audio for:', text.slice(0, 50) + '...')
-            const res = await fetch('/api/presenter/tts', {
-              method: 'POST',
-              headers: { 'Content-Type': 'application/json' },
-              body: JSON.stringify({ text, language }),
-              signal: controller.signal,
-            })
-
-            if (!res.ok) throw new Error(`TTS error: ${res.status}`)
-
-            const blob = await res.blob()
-            console.log('[TTS] Audio received:', blob.size, 'bytes')
-            blobUrl = URL.createObjectURL(blob)
-            audioCache.set(key, blobUrl)
-          } else {
-            console.log('[TTS] Cache hit for:', text.slice(0, 50) + '...')
-          }
-
-          if (controller.signal.aborted) return
-
-          const audio = new Audio(blobUrl)
-          audioRef.current = audio
-
-          audio.onended = () => {
-            console.log('[TTS] Audio playback ended')
-            setIsSpeaking(false)
-            audioRef.current = null
-            if (pauseAfter > 0) {
-              timerRef.current = setTimeout(onDone, pauseAfter)
-            } else {
-              onDone()
-            }
-          }
-
-          audio.onerror = (e) => {
-            console.warn('[TTS] Audio playback error:', e)
-            setIsSpeaking(false)
-            audioRef.current = null
-            const wordCount = text.split(/\s+/).length
-            const readingTime = Math.max(wordCount * 150, 2000)
-            timerRef.current = setTimeout(onDone, readingTime + pauseAfter)
-          }
-
-          await audio.play()
-          console.log('[TTS] Audio playing')
-        } catch (err: any) {
-          if (err.name === 'AbortError') return
-          console.warn('[TTS] Error:', err.name, err.message)
-          setIsSpeaking(false)
-          const wordCount = text.split(/\s+/).length
-          const readingTime = Math.max(wordCount * 150, 2000)
-          timerRef.current = setTimeout(onDone, readingTime + pauseAfter)
-        }
-      }
-
-      playAudio()
-    }
-  }, [ttsAvailable, ttsEnabled, language])
-
-  // Update advancePresentation ref whenever dependencies change
-  useEffect(() => {
-    advanceRef.current = () => {
+    playSlideRef.current = async (slideIdx: number) => {
      if (stateRef.current !== 'presenting') return

-      const slideIdx = slideIndexRef.current
-      const script = getScriptForIndex(slideIdx)
-
-      if (!script) {
+      const plan = buildSlideAudioPlan(slideIdx, language)
+      if (!plan) {
+        // No script for this slide — skip to next
        if (slideIdx < totalSlides - 1) {
          goToSlide(slideIdx + 1)
-          paragraphIndexRef.current = 0
-          timerRef.current = setTimeout(() => advanceRef.current(), 2000)
+          slideIndexRef.current = slideIdx + 1
+          const t = setTimeout(() => playSlideRef.current(slideIdx + 1), 300)
+          timerRefs.current.push(t)
        } else {
-          cancelSpeech()
          setState('idle')
+          stateRef.current = 'idle'
          setDisplayText('')
        }
        return
      }

-      const nextPara = paragraphIndexRef.current + 1
+      // Show first segment immediately
+      setDisplayText(plan.segments[0]?.text || '')
+      setCurrentParagraph(0)
+      slideAdvancedRef.current = false

-      if (nextPara < script.paragraphs.length) {
-        const para = showParagraph(slideIdx, nextPara)
-        if (para) {
-          const text = language === 'de' ? para.text_de : para.text_en
-          speakAndAdvanceRef.current(text, para.pause_after, () => advanceRef.current())
-        }
-      } else {
-        // All paragraphs done — transition hint then next slide
-        const hint = language === 'de'
-          ? (script.transition_hint_de || '')
-          : (script.transition_hint_en || '')
+      // Pre-fetch next slide's audio in background
+      if (slideIdx < totalSlides - 1) {
+        const nextPlan = buildSlideAudioPlan(slideIdx + 1, language)
+        if (nextPlan) fetchAudio(nextPlan.fullText, language).catch(() => {})
+      }

-        const goNext = () => {
-          if (slideIdx < totalSlides - 1) {
-            timerRef.current = setTimeout(() => {
-              if (stateRef.current !== 'presenting') return
+      // --- Non-TTS path: word-count based timing ---
+      if (!ttsAvailable || !ttsEnabled) {
+        const words = plan.fullText.split(/\s+/).length
+        const totalTime = Math.max(words * 130, 3000)
+
+        for (const seg of plan.segments) {
+          const t = setTimeout(() => {
+            if (stateRef.current !== 'presenting') return
+            setDisplayText(seg.text)
+            if (seg.isTransition && slideIdx < totalSlides - 1 && !slideAdvancedRef.current) {
+              slideAdvancedRef.current = true
              goToSlide(slideIdx + 1)
-              paragraphIndexRef.current = -1
+            }
+          }, seg.startRatio * totalTime)
+          timerRefs.current.push(t)
+        }

-              timerRef.current = setTimeout(() => {
-                if (stateRef.current !== 'presenting') return
-                const nextScript = getScriptForIndex(slideIdx + 1)
-                if (nextScript && nextScript.paragraphs.length > 0) {
-                  const para = showParagraph(slideIdx + 1, 0)
-                  if (para) {
-                    const text = language === 'de' ? para.text_de : para.text_en
-                    speakAndAdvanceRef.current(text, para.pause_after, () => advanceRef.current())
-                  }
-                } else {
-                  advanceRef.current()
-                }
-              }, 1500)
-            }, 1000)
+        const t = setTimeout(() => {
+          if (stateRef.current !== 'presenting') return
+          const next = slideIdx + 1
+          if (next < totalSlides) {
+            if (!slideAdvancedRef.current) goToSlide(next)
+            slideIndexRef.current = next
+            playSlideRef.current(next)
          } else {
-            timerRef.current = setTimeout(() => {
-              cancelSpeech()
-              setState('idle')
-              setDisplayText('')
-            }, 3000)
+            setState('idle')
+            stateRef.current = 'idle'
+            setDisplayText('')
+          }
+        }, totalTime)
+        timerRefs.current.push(t)
+        return
+      }
+
+      // --- TTS path: fetch + play full-slide audio ---
+      setIsSpeaking(true)
+      const controller = new AbortController()
+      abortRef.current = controller
+
+      const blobUrl = await fetchAudio(plan.fullText, language, controller.signal)
+      if (!blobUrl || controller.signal.aborted) {
+        setIsSpeaking(false)
+        return
+      }
+      if (stateRef.current !== 'presenting') {
+        setIsSpeaking(false)
+        return
+      }
+
+      const audio = new Audio(blobUrl)
+      audioRef.current = audio
+
+      // Sync subtitle text to playback position
+      audio.ontimeupdate = () => {
+        if (!audio.duration || stateRef.current !== 'presenting') return
+        const ratio = audio.currentTime / audio.duration
+
+        for (let i = plan.segments.length - 1; i >= 0; i--) {
+          if (ratio >= plan.segments[i].startRatio) {
+            setDisplayText(plan.segments[i].text)
+            setCurrentParagraph(Math.min(i, plan.segments.length - 1))
+
+            // Advance slide when transition phrase starts playing
+            if (plan.segments[i].isTransition && !slideAdvancedRef.current && slideIdx < totalSlides - 1) {
+              slideAdvancedRef.current = true
+              goToSlide(slideIdx + 1)
+            }
+            break
          }
        }
+      }

-        if (hint) {
-          setDisplayText(hint)
-          speakAndAdvanceRef.current(hint, 0, () => {
-            if (stateRef.current !== 'presenting') return
-            goNext()
-          })
+      // When audio finishes → immediately play next slide (pre-fetched)
+      audio.onended = () => {
+        setIsSpeaking(false)
+        audioRef.current = null
+        if (stateRef.current !== 'presenting') return
+
+        const next = slideIdx + 1
+        if (next < totalSlides) {
+          if (!slideAdvancedRef.current) goToSlide(next)
+          slideIndexRef.current = next
+          playSlideRef.current(next)
        } else {
-          goNext()
+          setState('idle')
+          stateRef.current = 'idle'
+          setDisplayText('')
        }
      }
+
+      audio.onerror = () => {
+        setIsSpeaking(false)
+        audioRef.current = null
+        // Skip to next slide on error
+        if (stateRef.current !== 'presenting') return
+        const next = slideIdx + 1
+        if (next < totalSlides) {
+          goToSlide(next)
+          slideIndexRef.current = next
+          playSlideRef.current(next)
+        }
+      }
+
+      try {
+        await audio.play()
+      } catch {
+        setIsSpeaking(false)
+      }
    }
-  }, [language, totalSlides, goToSlide, getScriptForIndex, showParagraph, cancelSpeech])
+  }, [language, totalSlides, goToSlide, ttsAvailable, ttsEnabled])

  const start = useCallback(() => {
-    // Unlock audio playback immediately in user gesture context
    unlockAudio()
+    cancelAll()
+    setState('presenting')
+    stateRef.current = 'presenting'
+    playSlideRef.current(slideIndexRef.current)
+  }, [unlockAudio, cancelAll])

-    clearTimer()
-    cancelSpeech()
+  const stop = useCallback(() => {
+    cancelAll()
+    setState('idle')
+    stateRef.current = 'idle'
+    setDisplayText('')
+    setCurrentParagraph(0)
+  }, [cancelAll])
+
+  // Pause: keep audio alive, just pause playback
+  const pause = useCallback(() => {
+    if (audioRef.current) {
+      audioRef.current.pause()
+    }
+    for (const t of timerRefs.current) clearTimeout(t)
+    timerRefs.current = []
+    setState('paused')
+    stateRef.current = 'paused'
+    setIsSpeaking(false)
+  }, [])
+
+  // Resume: continue paused audio, or restart current slide
+  const resume = useCallback(() => {
    setState('presenting')
    stateRef.current = 'presenting'

-    const slideIdx = slideIndexRef.current
-    const script = getScriptForIndex(slideIdx)
-
-    if (script && script.paragraphs.length > 0) {
-      const para = showParagraph(slideIdx, 0)
-      if (para) {
-        const text = language === 'de' ? para.text_de : para.text_en
-        speakAndAdvanceRef.current(text, para.pause_after, () => advanceRef.current())
-      }
+    if (audioRef.current && audioRef.current.paused && audioRef.current.currentTime > 0) {
+      setIsSpeaking(true)
+      audioRef.current.play().catch(() => {
+        setIsSpeaking(false)
+        playSlideRef.current(slideIndexRef.current)
+      })
    } else {
-      timerRef.current = setTimeout(() => advanceRef.current(), 1000)
+      playSlideRef.current(slideIndexRef.current)
    }
-  }, [unlockAudio, clearTimer, cancelSpeech, language, getScriptForIndex, showParagraph])
-
-  const stop = useCallback(() => {
-    clearTimer()
-    cancelSpeech()
-    setState('idle')
-    setDisplayText('')
-    setCurrentParagraph(0)
-    paragraphIndexRef.current = 0
-  }, [clearTimer, cancelSpeech])
-
-  const pause = useCallback(() => {
-    clearTimer()
-    cancelSpeech()
-    setState('paused')
-  }, [clearTimer, cancelSpeech])
-
-  const resume = useCallback(() => {
-    setState('resuming')
-    stateRef.current = 'resuming'
-    timerRef.current = setTimeout(() => {
-      setState('presenting')
-      stateRef.current = 'presenting' // Sync ref immediately before calling advance
-      advanceRef.current()
-    }, 2000)
  }, [])

  const skipSlide = useCallback(() => {
-    clearTimer()
-    cancelSpeech()
+    cancelAll()
    const nextIdx = slideIndexRef.current + 1
    if (nextIdx < totalSlides) {
      goToSlide(nextIdx)
-      paragraphIndexRef.current = -1
-
+      slideIndexRef.current = nextIdx
      if (stateRef.current === 'presenting') {
-        timerRef.current = setTimeout(() => {
-          const script = getScriptForIndex(nextIdx)
-          if (script && script.paragraphs.length > 0) {
-            const para = showParagraph(nextIdx, 0)
-            if (para) {
-              const text = language === 'de' ? para.text_de : para.text_en
-              speakAndAdvanceRef.current(text, para.pause_after, () => advanceRef.current())
-            }
-          }
-        }, 1500)
+        playSlideRef.current(nextIdx)
      }
    }
-  }, [clearTimer, cancelSpeech, totalSlides, goToSlide, language, getScriptForIndex, showParagraph])
+  }, [cancelAll, totalSlides, goToSlide])

  const toggle = useCallback(() => {
    unlockAudio()
@@ -402,11 +411,8 @@ export function usePresenterMode({

  // Cleanup on unmount
  useEffect(() => {
-    return () => {
-      clearTimer()
-      cancelSpeech()
-    }
-  }, [clearTimer, cancelSpeech])
+    return () => { cancelAll() }
+  }, [cancelAll])

  return {
    state,