/** * Voice API Client * WebSocket-based voice streaming to voice-service */ import { VoiceEncryption } from './voice-encryption' const VOICE_SERVICE_URL = process.env.NEXT_PUBLIC_VOICE_SERVICE_URL || 'http://localhost:8091' const WS_URL = VOICE_SERVICE_URL.replace('http', 'ws') export interface VoiceSession { id: string namespace_id: string status: string created_at: string websocket_url: string } export interface VoiceTask { id: string session_id: string type: string state: string created_at: string updated_at: string result_available: boolean error_message?: string } export interface TranscriptMessage { type: 'transcript' text: string final: boolean confidence: number } export interface IntentMessage { type: 'intent' intent: string confidence: number parameters: Record } export interface ResponseMessage { type: 'response' text: string } export interface StatusMessage { type: 'status' status: string } export interface TaskCreatedMessage { type: 'task_created' task_id: string task_type: string state: string } export interface ErrorMessage { type: 'error' message: string code: string } export type VoiceMessage = | TranscriptMessage | IntentMessage | ResponseMessage | StatusMessage | TaskCreatedMessage | ErrorMessage export type VoiceEventHandler = (message: VoiceMessage) => void export type AudioHandler = (audioData: ArrayBuffer) => void export type ErrorHandler = (error: Error) => void /** * Voice API Client * Handles session management and WebSocket streaming */ export class VoiceAPI { private encryption: VoiceEncryption private session: VoiceSession | null = null private ws: WebSocket | null = null private audioContext: AudioContext | null = null private mediaStream: MediaStream | null = null private processor: ScriptProcessorNode | null = null private onMessage: VoiceEventHandler | null = null private onAudio: AudioHandler | null = null private onError: ErrorHandler | null = null private onStatusChange: ((status: string) => void) | null = null constructor() { this.encryption = new VoiceEncryption() } /** * Initialize the voice API */ async initialize(): Promise { await this.encryption.initialize() } /** * Check if API is ready */ isReady(): boolean { return this.encryption.isInitialized() } /** * Create a new voice session */ async createSession(): Promise { const namespaceId = this.encryption.getNamespaceId() const keyHash = this.encryption.getKeyHash() if (!namespaceId || !keyHash) { throw new Error('Encryption not initialized') } const response = await fetch(`${VOICE_SERVICE_URL}/api/v1/sessions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ namespace_id: namespaceId, key_hash: keyHash, device_type: 'pwa', client_version: '1.0.0', }), }) if (!response.ok) { throw new Error(`Failed to create session: ${response.statusText}`) } this.session = await response.json() return this.session! } /** * Connect to WebSocket for voice streaming */ async connect(): Promise { if (!this.session) { await this.createSession() } return new Promise((resolve, reject) => { const wsUrl = this.session!.websocket_url this.ws = new WebSocket(wsUrl) this.ws.onopen = () => { console.log('WebSocket connected') this.onStatusChange?.('connected') resolve() } this.ws.onerror = (event) => { console.error('WebSocket error:', event) this.onError?.(new Error('WebSocket connection failed')) reject(new Error('WebSocket connection failed')) } this.ws.onclose = () => { console.log('WebSocket closed') this.onStatusChange?.('disconnected') } this.ws.onmessage = (event) => { if (event.data instanceof Blob) { // Binary audio data event.data.arrayBuffer().then((buffer) => { this.onAudio?.(buffer) }) } else { // JSON message try { const message = JSON.parse(event.data) as VoiceMessage this.onMessage?.(message) if (message.type === 'status') { this.onStatusChange?.(message.status) } } catch (e) { console.warn('Failed to parse message:', event.data) } } } }) } /** * Start capturing audio from microphone */ async startCapture(): Promise { try { // Request microphone access this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 24000, channelCount: 1, echoCancellation: true, noiseSuppression: true, }, }) // Create audio context this.audioContext = new AudioContext({ sampleRate: 24000 }) const source = this.audioContext.createMediaStreamSource(this.mediaStream) // Create processor for capturing audio // Note: ScriptProcessorNode is deprecated but still widely supported // In production, use AudioWorklet this.processor = this.audioContext.createScriptProcessor(2048, 1, 1) this.processor.onaudioprocess = (event) => { if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return const inputData = event.inputBuffer.getChannelData(0) // Convert Float32 to Int16 const int16Data = new Int16Array(inputData.length) for (let i = 0; i < inputData.length; i++) { const s = Math.max(-1, Math.min(1, inputData[i])) int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7fff } // Send audio chunk this.ws.send(int16Data.buffer) } source.connect(this.processor) this.processor.connect(this.audioContext.destination) this.onStatusChange?.('listening') } catch (error) { console.error('Failed to start capture:', error) this.onError?.(error as Error) throw error } } /** * Stop capturing audio */ stopCapture(): void { if (this.processor) { this.processor.disconnect() this.processor = null } if (this.mediaStream) { this.mediaStream.getTracks().forEach((track) => track.stop()) this.mediaStream = null } if (this.audioContext) { this.audioContext.close() this.audioContext = null } // Signal end of turn if (this.ws && this.ws.readyState === WebSocket.OPEN) { this.ws.send(JSON.stringify({ type: 'end_turn' })) } this.onStatusChange?.('processing') } /** * Send interrupt signal */ interrupt(): void { if (this.ws && this.ws.readyState === WebSocket.OPEN) { this.ws.send(JSON.stringify({ type: 'interrupt' })) } } /** * Disconnect from voice service */ async disconnect(): Promise { this.stopCapture() if (this.ws) { this.ws.close() this.ws = null } if (this.session) { try { await fetch( `${VOICE_SERVICE_URL}/api/v1/sessions/${this.session.id}`, { method: 'DELETE' } ) } catch (e) { console.warn('Failed to close session:', e) } this.session = null } } /** * Get pending tasks for current session */ async getTasks(): Promise { if (!this.session) return [] const response = await fetch( `${VOICE_SERVICE_URL}/api/v1/sessions/${this.session.id}/tasks` ) if (!response.ok) { throw new Error('Failed to get tasks') } return response.json() } /** * Approve a task */ async approveTask(taskId: string): Promise { const response = await fetch( `${VOICE_SERVICE_URL}/api/v1/tasks/${taskId}/transition`, { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ new_state: 'approved', reason: 'user_approved', }), } ) if (!response.ok) { throw new Error('Failed to approve task') } } /** * Reject a task */ async rejectTask(taskId: string): Promise { const response = await fetch( `${VOICE_SERVICE_URL}/api/v1/tasks/${taskId}/transition`, { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ new_state: 'rejected', reason: 'user_rejected', }), } ) if (!response.ok) { throw new Error('Failed to reject task') } } /** * Set event handlers */ setOnMessage(handler: VoiceEventHandler): void { this.onMessage = handler } setOnAudio(handler: AudioHandler): void { this.onAudio = handler } setOnError(handler: ErrorHandler): void { this.onError = handler } setOnStatusChange(handler: (status: string) => void): void { this.onStatusChange = handler } /** * Get current session */ getSession(): VoiceSession | null { return this.session } /** * Get connection status */ isConnected(): boolean { return this.ws !== null && this.ws.readyState === WebSocket.OPEN } }