From 01f05e4399f527c09e82cae1520011fd773df5c2 Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar <30073382+mighty840@users.noreply.github.com> Date: Wed, 15 Apr 2026 18:35:38 +0200 Subject: [PATCH 1/2] feat(pitch-deck): route DE presenter TTS through OVH via LiteLLM passthrough Adds an OVH-backed branch to /api/presenter/tts so the German presenter narration is synthesized by OVH AI Endpoints' nvr-tts-de-de (NVIDIA Riva) reached through the LiteLLM passthrough at /tts-ovh/audio/*, which injects the OVH API token server-side. - DE requests now hit ${LITELLM_URL}/tts-ovh/audio/v1/tts/text_to_audio with the documented body shape (encoding=1, language_code=de-DE, voice_name=German-DE-Male-1, sample_rate_hz=22050) and return the audio/wav bytes upstream serves (confirmed RIFF-framed in a smoke test). - EN continues to hit compliance-tts-service until OVH_TTS_URL_EN is set, making the eventual EN switch a single env flip. - OVH and voice/url/sample-rate parameters are env-overridable (OVH_TTS_URL_DE, OVH_TTS_VOICE_DE, OVH_TTS_SAMPLE_RATE, OVH_TTS_URL_EN, OVH_TTS_VOICE_EN) so retuning doesn't need a redeploy. - Defensive: OVH failures surface as 502 (no silent fallback) so upstream issues are visible during this test rollout. - wrapPcmAsWav() helper is kept as a safety net in case OVH ever returns bare PCM instead of a full WAV. Adds X-TTS-Source response header (ovh | compliance) to make provenance observable from DevTools. Co-Authored-By: Claude Sonnet 4.6 --- pitch-deck/app/api/presenter/tts/route.ts | 147 ++++++++++++++++++---- 1 file changed, 120 insertions(+), 27 deletions(-) diff --git a/pitch-deck/app/api/presenter/tts/route.ts b/pitch-deck/app/api/presenter/tts/route.ts index bd8f67f..dd21f38 100644 --- a/pitch-deck/app/api/presenter/tts/route.ts +++ b/pitch-deck/app/api/presenter/tts/route.ts @@ -1,6 +1,30 @@ import { NextRequest, NextResponse } from 'next/server' const TTS_SERVICE_URL = process.env.TTS_SERVICE_URL || 'http://compliance-tts-service:8095' +const LITELLM_URL = process.env.LITELLM_URL || 'https://llm-dev.meghsakha.com' +const LITELLM_API_KEY = process.env.LITELLM_API_KEY || '' + +// OVH AI Endpoints TTS via the LiteLLM passthrough. +// Path on the LiteLLM side: /tts-ovh/audio/* → https://nvr-tts-.endpoints.kepler.ai.cloud.ovh.net/api/* +const OVH_TTS = { + de: { + url: process.env.OVH_TTS_URL_DE || `${LITELLM_URL}/tts-ovh/audio/v1/tts/text_to_audio`, + // German only exposes a male voice; note the hyphen separator (EN uses dots). + voice: process.env.OVH_TTS_VOICE_DE || 'German-DE-Male-1', + languageCode: 'de-DE', + }, + // Enable by setting OVH_TTS_URL_EN (e.g. pointing at a second LiteLLM + // passthrough that targets nvr-tts-en-us). Keeps EN on the old path until set. + en: process.env.OVH_TTS_URL_EN + ? { + url: process.env.OVH_TTS_URL_EN, + voice: process.env.OVH_TTS_VOICE_EN || 'English-US.Female-1', + languageCode: 'en-US', + } + : null, +} as const + +const SAMPLE_RATE_HZ = parseInt(process.env.OVH_TTS_SAMPLE_RATE || '22050', 10) export async function POST(request: NextRequest) { try { @@ -11,36 +35,105 @@ export async function POST(request: NextRequest) { return NextResponse.json({ error: 'Text is required' }, { status: 400 }) } - const res = await fetch(`${TTS_SERVICE_URL}/synthesize-direct`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ text, language }), - signal: AbortSignal.timeout(30000), - }) - - if (!res.ok) { - const errorText = await res.text() - console.error('TTS service error:', res.status, errorText) - return NextResponse.json( - { error: `TTS service error (${res.status})` }, - { status: 502 } - ) + const ovh = language === 'de' ? OVH_TTS.de : OVH_TTS.en + if (ovh) { + return await synthesizeViaOvh(text, ovh) } - const audioBuffer = await res.arrayBuffer() - - return new NextResponse(audioBuffer, { - headers: { - 'Content-Type': 'audio/mpeg', - 'Cache-Control': 'public, max-age=86400', // Cache 24h — texts are static - 'X-TTS-Cache': res.headers.get('X-TTS-Cache') || 'unknown', - }, - }) + return await synthesizeViaComplianceService(text, language) } catch (error) { console.error('TTS proxy error:', error) - return NextResponse.json( - { error: 'TTS service not reachable' }, - { status: 503 } - ) + return NextResponse.json({ error: 'TTS service not reachable' }, { status: 503 }) } } + +async function synthesizeViaOvh( + text: string, + cfg: { url: string; voice: string; languageCode: string }, +): Promise { + const res = await fetch(cfg.url, { + method: 'POST', + headers: { + accept: 'application/octet-stream', + 'Content-Type': 'application/json', + Authorization: `Bearer ${LITELLM_API_KEY}`, + }, + body: JSON.stringify({ + encoding: 1, // LINEAR_PCM + language_code: cfg.languageCode, + sample_rate_hz: SAMPLE_RATE_HZ, + text, + voice_name: cfg.voice, + }), + signal: AbortSignal.timeout(30000), + }) + + if (!res.ok) { + const errorText = await res.text().catch(() => '') + console.error('OVH TTS error:', res.status, errorText.slice(0, 500)) + return NextResponse.json({ error: `OVH TTS error (${res.status})` }, { status: 502 }) + } + + const pcm = Buffer.from(await res.arrayBuffer()) + const wav = pcm.subarray(0, 4).toString('ascii') === 'RIFF' ? pcm : wrapPcmAsWav(pcm, SAMPLE_RATE_HZ) + + return new NextResponse(new Uint8Array(wav), { + headers: { + 'Content-Type': 'audio/wav', + 'Cache-Control': 'public, max-age=86400', + 'X-TTS-Source': 'ovh', + }, + }) +} + +async function synthesizeViaComplianceService(text: string, language: string): Promise { + const res = await fetch(`${TTS_SERVICE_URL}/synthesize-direct`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text, language }), + signal: AbortSignal.timeout(30000), + }) + + if (!res.ok) { + const errorText = await res.text().catch(() => '') + console.error('TTS service error:', res.status, errorText.slice(0, 500)) + return NextResponse.json({ error: `TTS service error (${res.status})` }, { status: 502 }) + } + + const audioBuffer = await res.arrayBuffer() + return new NextResponse(audioBuffer, { + headers: { + 'Content-Type': 'audio/mpeg', + 'Cache-Control': 'public, max-age=86400', + 'X-TTS-Cache': res.headers.get('X-TTS-Cache') || 'unknown', + 'X-TTS-Source': 'compliance', + }, + }) +} + +// Prepend a minimal 44-byte WAV header to raw 16-bit mono PCM. +// OVH's Riva HTTP endpoint returns bare PCM samples; browsers need RIFF/WAV framing. +function wrapPcmAsWav(pcm: Buffer, sampleRateHz: number): Buffer { + const numChannels = 1 + const bitsPerSample = 16 + const byteRate = (sampleRateHz * numChannels * bitsPerSample) / 8 + const blockAlign = (numChannels * bitsPerSample) / 8 + const dataSize = pcm.length + + const header = Buffer.alloc(44) + header.write('RIFF', 0) + header.writeUInt32LE(36 + dataSize, 4) + header.write('WAVE', 8) + header.write('fmt ', 12) + header.writeUInt32LE(16, 16) // PCM subchunk size + header.writeUInt16LE(1, 20) // PCM format + header.writeUInt16LE(numChannels, 22) + header.writeUInt32LE(sampleRateHz, 24) + header.writeUInt32LE(byteRate, 28) + header.writeUInt16LE(blockAlign, 32) + header.writeUInt16LE(bitsPerSample, 34) + header.write('data', 36) + header.writeUInt32LE(dataSize, 40) + + return Buffer.concat([header, pcm]) +} From 3e9a988aaf97d7b30c3a441223d3dbc6c3368725 Mon Sep 17 00:00:00 2001 From: Sharang Parnerkar <30073382+mighty840@users.noreply.github.com> Date: Wed, 15 Apr 2026 18:35:52 +0200 Subject: [PATCH 2/2] =?UTF-8?q?perf(pitch-deck):=20smooth=20SDK=20demo=20c?= =?UTF-8?q?arousel=20=E2=80=94=20no=20blank=20frames,=20parallel=20preload?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SDK Live Demo was janky: AnimatePresence mode="wait" unmounted the current Image before mounting the next, so every advance forced a cold fetch and left an empty black frame until the new image decoded. Only the first three screenshots had priority; the rest fetched lazily, so the first pass through the carousel repeatedly stalled. Replaces the single swap-in/swap-out Image with a stack of 23 images layered in an aspect-[1920/1080] container. Cross-fades are now pure CSS opacity on always-mounted nodes, so there is no unmount and no gap. Key details: - priority on the first 3 (triggers ); loading=eager on the remaining 20 so the browser starts all fetches at mount rather than deferring via IntersectionObserver. - sizes="(max-width: 1024px) 100vw, 1024px" lets next/image serve the actual displayed resolution instead of the 1920 hint — fewer bytes, faster first paint. - Load-gated reveal: a new `shown` state trails `current` until the target image fires onLoadingComplete. If the user clicks ahead of the network, the previous loaded screenshot stays visible — no more black flashes before images arrive. Second pass through the carousel is instant (images are in-cache). Co-Authored-By: Claude Sonnet 4.6 --- pitch-deck/components/slides/SDKDemoSlide.tsx | 66 +++++++++++++------ 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/pitch-deck/components/slides/SDKDemoSlide.tsx b/pitch-deck/components/slides/SDKDemoSlide.tsx index b12b9e2..4565085 100644 --- a/pitch-deck/components/slides/SDKDemoSlide.tsx +++ b/pitch-deck/components/slides/SDKDemoSlide.tsx @@ -1,6 +1,6 @@ 'use client' -import { useState, useEffect, useCallback } from 'react' +import { useState, useEffect, useCallback, useRef } from 'react' import { motion, AnimatePresence } from 'framer-motion' import Image from 'next/image' import { Language } from '@/lib/types' @@ -43,6 +43,26 @@ export default function SDKDemoSlide({ lang }: SDKDemoSlideProps) { const [fullscreen, setFullscreen] = useState(false) const [autoPlay, setAutoPlay] = useState(true) + // Track which images have actually loaded so we never cross-fade to a blank + // frame. While the target image is still fetching, `shown` stays on the + // previous loaded one — this eliminates the flash of empty canvas the user + // hit on the first pass through the carousel. + const loadedRef = useRef>(new Set()) + const [shown, setShown] = useState(0) + + const handleLoaded = useCallback((idx: number) => { + loadedRef.current.add(idx) + // If the user is currently waiting on this image, reveal it immediately. + // Otherwise the preceding loaded image keeps showing — no blank flash. + if (idx === current) setShown(idx) + }, [current]) + + useEffect(() => { + if (loadedRef.current.has(current)) { + setShown(current) + } + }, [current]) + const next = useCallback(() => { setCurrent(i => (i + 1) % SCREENSHOTS.length) }, []) @@ -101,25 +121,31 @@ export default function SDKDemoSlide({ lang }: SDKDemoSlideProps) { - {/* Screenshot */} - - - {de - - + {/* Screenshot stack — all images mount at once so we can cross-fade + between them by toggling opacity. AnimatePresence mode="wait" + unmounts before the next mounts, which forces a cold fetch and + produces a blank frame; the stack avoids both. */} +
+ {SCREENSHOTS.map((s, idx) => ( +
+ {de handleLoaded(idx)} + /> +
+ ))} +
{/* Navigation arrows */}