feat(pitch-deck): route DE presenter TTS through OVH via LiteLLM passthrough

Adds an OVH-backed branch to /api/presenter/tts so the German presenter
narration is synthesized by OVH AI Endpoints' nvr-tts-de-de (NVIDIA Riva)
reached through the LiteLLM passthrough at /tts-ovh/audio/*, which
injects the OVH API token server-side.

- DE requests now hit ${LITELLM_URL}/tts-ovh/audio/v1/tts/text_to_audio
  with the documented body shape (encoding=1, language_code=de-DE,
  voice_name=German-DE-Male-1, sample_rate_hz=22050) and return the
  audio/wav bytes upstream serves (confirmed RIFF-framed in a smoke test).
- EN continues to hit compliance-tts-service until OVH_TTS_URL_EN is set,
  making the eventual EN switch a single env flip.
- OVH and voice/url/sample-rate parameters are env-overridable
  (OVH_TTS_URL_DE, OVH_TTS_VOICE_DE, OVH_TTS_SAMPLE_RATE,
  OVH_TTS_URL_EN, OVH_TTS_VOICE_EN) so retuning doesn't need a redeploy.
- Defensive: OVH failures surface as 502 (no silent fallback) so upstream
  issues are visible during this test rollout.
- wrapPcmAsWav() helper is kept as a safety net in case OVH ever returns
  bare PCM instead of a full WAV.

Adds X-TTS-Source response header (ovh | compliance) to make
provenance observable from DevTools.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-04-15 18:35:38 +02:00
parent 7c17e484c1
commit 01f05e4399

View File

@@ -1,6 +1,30 @@
import { NextRequest, NextResponse } from 'next/server' import { NextRequest, NextResponse } from 'next/server'
const TTS_SERVICE_URL = process.env.TTS_SERVICE_URL || 'http://compliance-tts-service:8095' const TTS_SERVICE_URL = process.env.TTS_SERVICE_URL || 'http://compliance-tts-service:8095'
const LITELLM_URL = process.env.LITELLM_URL || 'https://llm-dev.meghsakha.com'
const LITELLM_API_KEY = process.env.LITELLM_API_KEY || ''
// OVH AI Endpoints TTS via the LiteLLM passthrough.
// Path on the LiteLLM side: /tts-ovh/audio/* → https://nvr-tts-<lang>.endpoints.kepler.ai.cloud.ovh.net/api/*
const OVH_TTS = {
de: {
url: process.env.OVH_TTS_URL_DE || `${LITELLM_URL}/tts-ovh/audio/v1/tts/text_to_audio`,
// German only exposes a male voice; note the hyphen separator (EN uses dots).
voice: process.env.OVH_TTS_VOICE_DE || 'German-DE-Male-1',
languageCode: 'de-DE',
},
// Enable by setting OVH_TTS_URL_EN (e.g. pointing at a second LiteLLM
// passthrough that targets nvr-tts-en-us). Keeps EN on the old path until set.
en: process.env.OVH_TTS_URL_EN
? {
url: process.env.OVH_TTS_URL_EN,
voice: process.env.OVH_TTS_VOICE_EN || 'English-US.Female-1',
languageCode: 'en-US',
}
: null,
} as const
const SAMPLE_RATE_HZ = parseInt(process.env.OVH_TTS_SAMPLE_RATE || '22050', 10)
export async function POST(request: NextRequest) { export async function POST(request: NextRequest) {
try { try {
@@ -11,6 +35,58 @@ export async function POST(request: NextRequest) {
return NextResponse.json({ error: 'Text is required' }, { status: 400 }) return NextResponse.json({ error: 'Text is required' }, { status: 400 })
} }
const ovh = language === 'de' ? OVH_TTS.de : OVH_TTS.en
if (ovh) {
return await synthesizeViaOvh(text, ovh)
}
return await synthesizeViaComplianceService(text, language)
} catch (error) {
console.error('TTS proxy error:', error)
return NextResponse.json({ error: 'TTS service not reachable' }, { status: 503 })
}
}
async function synthesizeViaOvh(
text: string,
cfg: { url: string; voice: string; languageCode: string },
): Promise<NextResponse> {
const res = await fetch(cfg.url, {
method: 'POST',
headers: {
accept: 'application/octet-stream',
'Content-Type': 'application/json',
Authorization: `Bearer ${LITELLM_API_KEY}`,
},
body: JSON.stringify({
encoding: 1, // LINEAR_PCM
language_code: cfg.languageCode,
sample_rate_hz: SAMPLE_RATE_HZ,
text,
voice_name: cfg.voice,
}),
signal: AbortSignal.timeout(30000),
})
if (!res.ok) {
const errorText = await res.text().catch(() => '')
console.error('OVH TTS error:', res.status, errorText.slice(0, 500))
return NextResponse.json({ error: `OVH TTS error (${res.status})` }, { status: 502 })
}
const pcm = Buffer.from(await res.arrayBuffer())
const wav = pcm.subarray(0, 4).toString('ascii') === 'RIFF' ? pcm : wrapPcmAsWav(pcm, SAMPLE_RATE_HZ)
return new NextResponse(new Uint8Array(wav), {
headers: {
'Content-Type': 'audio/wav',
'Cache-Control': 'public, max-age=86400',
'X-TTS-Source': 'ovh',
},
})
}
async function synthesizeViaComplianceService(text: string, language: string): Promise<NextResponse> {
const res = await fetch(`${TTS_SERVICE_URL}/synthesize-direct`, { const res = await fetch(`${TTS_SERVICE_URL}/synthesize-direct`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
@@ -19,28 +95,45 @@ export async function POST(request: NextRequest) {
}) })
if (!res.ok) { if (!res.ok) {
const errorText = await res.text() const errorText = await res.text().catch(() => '')
console.error('TTS service error:', res.status, errorText) console.error('TTS service error:', res.status, errorText.slice(0, 500))
return NextResponse.json( return NextResponse.json({ error: `TTS service error (${res.status})` }, { status: 502 })
{ error: `TTS service error (${res.status})` },
{ status: 502 }
)
} }
const audioBuffer = await res.arrayBuffer() const audioBuffer = await res.arrayBuffer()
return new NextResponse(audioBuffer, { return new NextResponse(audioBuffer, {
headers: { headers: {
'Content-Type': 'audio/mpeg', 'Content-Type': 'audio/mpeg',
'Cache-Control': 'public, max-age=86400', // Cache 24h — texts are static 'Cache-Control': 'public, max-age=86400',
'X-TTS-Cache': res.headers.get('X-TTS-Cache') || 'unknown', 'X-TTS-Cache': res.headers.get('X-TTS-Cache') || 'unknown',
'X-TTS-Source': 'compliance',
}, },
}) })
} catch (error) { }
console.error('TTS proxy error:', error)
return NextResponse.json( // Prepend a minimal 44-byte WAV header to raw 16-bit mono PCM.
{ error: 'TTS service not reachable' }, // OVH's Riva HTTP endpoint returns bare PCM samples; browsers need RIFF/WAV framing.
{ status: 503 } function wrapPcmAsWav(pcm: Buffer, sampleRateHz: number): Buffer {
) const numChannels = 1
} const bitsPerSample = 16
const byteRate = (sampleRateHz * numChannels * bitsPerSample) / 8
const blockAlign = (numChannels * bitsPerSample) / 8
const dataSize = pcm.length
const header = Buffer.alloc(44)
header.write('RIFF', 0)
header.writeUInt32LE(36 + dataSize, 4)
header.write('WAVE', 8)
header.write('fmt ', 12)
header.writeUInt32LE(16, 16) // PCM subchunk size
header.writeUInt16LE(1, 20) // PCM format
header.writeUInt16LE(numChannels, 22)
header.writeUInt32LE(sampleRateHz, 24)
header.writeUInt32LE(byteRate, 28)
header.writeUInt16LE(blockAlign, 32)
header.writeUInt16LE(bitsPerSample, 34)
header.write('data', 36)
header.writeUInt32LE(dataSize, 40)
return Buffer.concat([header, pcm])
} }