fix(pitch-deck): decouple OVH synthesis rate from WAV header rate
All checks were successful
Build pitch-deck / build-push-deploy (push) Successful in 1m26s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-consent (push) Successful in 49s
CI / test-python-voice (push) Successful in 40s
CI / test-bqas (push) Successful in 37s

OVH uses sample_rate_hz in the request for internal synthesis quality
but always outputs raw PCM at 16000 Hz. Sending 22050 for synthesis
gives better pronunciation; declaring 16000 in the WAV header gives
correct playback speed. Previously both were the same value, forcing
a tradeoff between quality and speed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Sharang Parnerkar
2026-04-16 21:26:54 +02:00
parent 4f2a963834
commit 5a476ac97d

View File

@@ -24,6 +24,9 @@ const OVH_TTS = {
: null, : null,
} as const } as const
// Rate sent to OVH in the synthesis request (affects internal model quality).
const OVH_SYNTHESIS_RATE_HZ = parseInt(process.env.OVH_TTS_SYNTHESIS_RATE || '22050', 10)
// Actual rate of the raw PCM bytes OVH returns (used for the WAV header).
const SAMPLE_RATE_HZ = parseInt(process.env.OVH_TTS_SAMPLE_RATE || '16000', 10) const SAMPLE_RATE_HZ = parseInt(process.env.OVH_TTS_SAMPLE_RATE || '16000', 10)
export async function POST(request: NextRequest) { export async function POST(request: NextRequest) {
@@ -61,7 +64,7 @@ async function synthesizeViaOvh(
body: JSON.stringify({ body: JSON.stringify({
encoding: 1, // LINEAR_PCM encoding: 1, // LINEAR_PCM
language_code: cfg.languageCode, language_code: cfg.languageCode,
sample_rate_hz: SAMPLE_RATE_HZ, sample_rate_hz: OVH_SYNTHESIS_RATE_HZ,
text, text,
voice_name: cfg.voice, voice_name: cfg.voice,
}), }),