feat(pitch-deck): route DE presenter TTS through OVH via LiteLLM passthrough
Adds an OVH-backed branch to /api/presenter/tts so the German presenter
narration is synthesized by OVH AI Endpoints' nvr-tts-de-de (NVIDIA Riva)
reached through the LiteLLM passthrough at /tts-ovh/audio/*, which
injects the OVH API token server-side.
- DE requests now hit ${LITELLM_URL}/tts-ovh/audio/v1/tts/text_to_audio
with the documented body shape (encoding=1, language_code=de-DE,
voice_name=German-DE-Male-1, sample_rate_hz=22050) and return the
audio/wav bytes upstream serves (confirmed RIFF-framed in a smoke test).
- EN continues to hit compliance-tts-service until OVH_TTS_URL_EN is set,
making the eventual EN switch a single env flip.
- OVH and voice/url/sample-rate parameters are env-overridable
(OVH_TTS_URL_DE, OVH_TTS_VOICE_DE, OVH_TTS_SAMPLE_RATE,
OVH_TTS_URL_EN, OVH_TTS_VOICE_EN) so retuning doesn't need a redeploy.
- Defensive: OVH failures surface as 502 (no silent fallback) so upstream
issues are visible during this test rollout.
- wrapPcmAsWav() helper is kept as a safety net in case OVH ever returns
bare PCM instead of a full WAV.
Adds X-TTS-Source response header (ovh | compliance) to make
provenance observable from DevTools.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,30 @@
|
|||||||
import { NextRequest, NextResponse } from 'next/server'
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
|
|
||||||
const TTS_SERVICE_URL = process.env.TTS_SERVICE_URL || 'http://compliance-tts-service:8095'
|
const TTS_SERVICE_URL = process.env.TTS_SERVICE_URL || 'http://compliance-tts-service:8095'
|
||||||
|
const LITELLM_URL = process.env.LITELLM_URL || 'https://llm-dev.meghsakha.com'
|
||||||
|
const LITELLM_API_KEY = process.env.LITELLM_API_KEY || ''
|
||||||
|
|
||||||
|
// OVH AI Endpoints TTS via the LiteLLM passthrough.
|
||||||
|
// Path on the LiteLLM side: /tts-ovh/audio/* → https://nvr-tts-<lang>.endpoints.kepler.ai.cloud.ovh.net/api/*
|
||||||
|
const OVH_TTS = {
|
||||||
|
de: {
|
||||||
|
url: process.env.OVH_TTS_URL_DE || `${LITELLM_URL}/tts-ovh/audio/v1/tts/text_to_audio`,
|
||||||
|
// German only exposes a male voice; note the hyphen separator (EN uses dots).
|
||||||
|
voice: process.env.OVH_TTS_VOICE_DE || 'German-DE-Male-1',
|
||||||
|
languageCode: 'de-DE',
|
||||||
|
},
|
||||||
|
// Enable by setting OVH_TTS_URL_EN (e.g. pointing at a second LiteLLM
|
||||||
|
// passthrough that targets nvr-tts-en-us). Keeps EN on the old path until set.
|
||||||
|
en: process.env.OVH_TTS_URL_EN
|
||||||
|
? {
|
||||||
|
url: process.env.OVH_TTS_URL_EN,
|
||||||
|
voice: process.env.OVH_TTS_VOICE_EN || 'English-US.Female-1',
|
||||||
|
languageCode: 'en-US',
|
||||||
|
}
|
||||||
|
: null,
|
||||||
|
} as const
|
||||||
|
|
||||||
|
const SAMPLE_RATE_HZ = parseInt(process.env.OVH_TTS_SAMPLE_RATE || '22050', 10)
|
||||||
|
|
||||||
export async function POST(request: NextRequest) {
|
export async function POST(request: NextRequest) {
|
||||||
try {
|
try {
|
||||||
@@ -11,6 +35,58 @@ export async function POST(request: NextRequest) {
|
|||||||
return NextResponse.json({ error: 'Text is required' }, { status: 400 })
|
return NextResponse.json({ error: 'Text is required' }, { status: 400 })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const ovh = language === 'de' ? OVH_TTS.de : OVH_TTS.en
|
||||||
|
if (ovh) {
|
||||||
|
return await synthesizeViaOvh(text, ovh)
|
||||||
|
}
|
||||||
|
|
||||||
|
return await synthesizeViaComplianceService(text, language)
|
||||||
|
} catch (error) {
|
||||||
|
console.error('TTS proxy error:', error)
|
||||||
|
return NextResponse.json({ error: 'TTS service not reachable' }, { status: 503 })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function synthesizeViaOvh(
|
||||||
|
text: string,
|
||||||
|
cfg: { url: string; voice: string; languageCode: string },
|
||||||
|
): Promise<NextResponse> {
|
||||||
|
const res = await fetch(cfg.url, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
accept: 'application/octet-stream',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
Authorization: `Bearer ${LITELLM_API_KEY}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
encoding: 1, // LINEAR_PCM
|
||||||
|
language_code: cfg.languageCode,
|
||||||
|
sample_rate_hz: SAMPLE_RATE_HZ,
|
||||||
|
text,
|
||||||
|
voice_name: cfg.voice,
|
||||||
|
}),
|
||||||
|
signal: AbortSignal.timeout(30000),
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const errorText = await res.text().catch(() => '')
|
||||||
|
console.error('OVH TTS error:', res.status, errorText.slice(0, 500))
|
||||||
|
return NextResponse.json({ error: `OVH TTS error (${res.status})` }, { status: 502 })
|
||||||
|
}
|
||||||
|
|
||||||
|
const pcm = Buffer.from(await res.arrayBuffer())
|
||||||
|
const wav = pcm.subarray(0, 4).toString('ascii') === 'RIFF' ? pcm : wrapPcmAsWav(pcm, SAMPLE_RATE_HZ)
|
||||||
|
|
||||||
|
return new NextResponse(new Uint8Array(wav), {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'audio/wav',
|
||||||
|
'Cache-Control': 'public, max-age=86400',
|
||||||
|
'X-TTS-Source': 'ovh',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async function synthesizeViaComplianceService(text: string, language: string): Promise<NextResponse> {
|
||||||
const res = await fetch(`${TTS_SERVICE_URL}/synthesize-direct`, {
|
const res = await fetch(`${TTS_SERVICE_URL}/synthesize-direct`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
@@ -19,28 +95,45 @@ export async function POST(request: NextRequest) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
const errorText = await res.text()
|
const errorText = await res.text().catch(() => '')
|
||||||
console.error('TTS service error:', res.status, errorText)
|
console.error('TTS service error:', res.status, errorText.slice(0, 500))
|
||||||
return NextResponse.json(
|
return NextResponse.json({ error: `TTS service error (${res.status})` }, { status: 502 })
|
||||||
{ error: `TTS service error (${res.status})` },
|
|
||||||
{ status: 502 }
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const audioBuffer = await res.arrayBuffer()
|
const audioBuffer = await res.arrayBuffer()
|
||||||
|
|
||||||
return new NextResponse(audioBuffer, {
|
return new NextResponse(audioBuffer, {
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'audio/mpeg',
|
'Content-Type': 'audio/mpeg',
|
||||||
'Cache-Control': 'public, max-age=86400', // Cache 24h — texts are static
|
'Cache-Control': 'public, max-age=86400',
|
||||||
'X-TTS-Cache': res.headers.get('X-TTS-Cache') || 'unknown',
|
'X-TTS-Cache': res.headers.get('X-TTS-Cache') || 'unknown',
|
||||||
|
'X-TTS-Source': 'compliance',
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
} catch (error) {
|
}
|
||||||
console.error('TTS proxy error:', error)
|
|
||||||
return NextResponse.json(
|
// Prepend a minimal 44-byte WAV header to raw 16-bit mono PCM.
|
||||||
{ error: 'TTS service not reachable' },
|
// OVH's Riva HTTP endpoint returns bare PCM samples; browsers need RIFF/WAV framing.
|
||||||
{ status: 503 }
|
function wrapPcmAsWav(pcm: Buffer, sampleRateHz: number): Buffer {
|
||||||
)
|
const numChannels = 1
|
||||||
}
|
const bitsPerSample = 16
|
||||||
|
const byteRate = (sampleRateHz * numChannels * bitsPerSample) / 8
|
||||||
|
const blockAlign = (numChannels * bitsPerSample) / 8
|
||||||
|
const dataSize = pcm.length
|
||||||
|
|
||||||
|
const header = Buffer.alloc(44)
|
||||||
|
header.write('RIFF', 0)
|
||||||
|
header.writeUInt32LE(36 + dataSize, 4)
|
||||||
|
header.write('WAVE', 8)
|
||||||
|
header.write('fmt ', 12)
|
||||||
|
header.writeUInt32LE(16, 16) // PCM subchunk size
|
||||||
|
header.writeUInt16LE(1, 20) // PCM format
|
||||||
|
header.writeUInt16LE(numChannels, 22)
|
||||||
|
header.writeUInt32LE(sampleRateHz, 24)
|
||||||
|
header.writeUInt32LE(byteRate, 28)
|
||||||
|
header.writeUInt16LE(blockAlign, 32)
|
||||||
|
header.writeUInt16LE(bitsPerSample, 34)
|
||||||
|
header.write('data', 36)
|
||||||
|
header.writeUInt32LE(dataSize, 40)
|
||||||
|
|
||||||
|
return Buffer.concat([header, pcm])
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user