package training import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "time" "github.com/google/uuid" ) // MediaType represents audio or video type MediaType string const ( MediaTypeAudio MediaType = "audio" MediaTypeVideo MediaType = "video" MediaTypeInteractiveVideo MediaType = "interactive_video" ) // MediaStatus represents the processing status type MediaStatus string const ( MediaStatusProcessing MediaStatus = "processing" MediaStatusCompleted MediaStatus = "completed" MediaStatusFailed MediaStatus = "failed" ) // TrainingMedia represents a generated media file type TrainingMedia struct { ID uuid.UUID `json:"id"` ModuleID uuid.UUID `json:"module_id"` ContentID *uuid.UUID `json:"content_id,omitempty"` MediaType MediaType `json:"media_type"` Status MediaStatus `json:"status"` Bucket string `json:"bucket"` ObjectKey string `json:"object_key"` FileSizeBytes int64 `json:"file_size_bytes"` DurationSeconds float64 `json:"duration_seconds"` MimeType string `json:"mime_type"` VoiceModel string `json:"voice_model"` Language string `json:"language"` Metadata json.RawMessage `json:"metadata"` ErrorMessage string `json:"error_message,omitempty"` GeneratedBy string `json:"generated_by"` IsPublished bool `json:"is_published"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` } // ============================================================================ // TTS Client // ============================================================================ // TTSClient communicates with the compliance-tts-service type TTSClient struct { baseURL string httpClient *http.Client } // NewTTSClient creates a new TTS service client func NewTTSClient(baseURL string) *TTSClient { return &TTSClient{ baseURL: baseURL, httpClient: &http.Client{ Timeout: 5 * time.Minute, }, } } // TTSSynthesizeRequest is the request to synthesize audio type TTSSynthesizeRequest struct { Text string `json:"text"` Language string `json:"language"` Voice string `json:"voice"` ModuleID string `json:"module_id"` ContentID string `json:"content_id,omitempty"` } // TTSSynthesizeResponse is the response from audio synthesis type TTSSynthesizeResponse struct { AudioID string `json:"audio_id"` Bucket string `json:"bucket"` ObjectKey string `json:"object_key"` DurationSeconds float64 `json:"duration_seconds"` SizeBytes int64 `json:"size_bytes"` } // TTSGenerateVideoRequest is the request to generate a video type TTSGenerateVideoRequest struct { Script map[string]interface{} `json:"script"` AudioObjectKey string `json:"audio_object_key"` ModuleID string `json:"module_id"` } // TTSGenerateVideoResponse is the response from video generation type TTSGenerateVideoResponse struct { VideoID string `json:"video_id"` Bucket string `json:"bucket"` ObjectKey string `json:"object_key"` DurationSeconds float64 `json:"duration_seconds"` SizeBytes int64 `json:"size_bytes"` } // Synthesize calls the TTS service to create audio func (c *TTSClient) Synthesize(ctx context.Context, req *TTSSynthesizeRequest) (*TTSSynthesizeResponse, error) { body, err := json.Marshal(req) if err != nil { return nil, fmt.Errorf("marshal request: %w", err) } httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/synthesize", bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("create request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(httpReq) if err != nil { return nil, fmt.Errorf("TTS service request failed: %w", err) } defer resp.Body.Close() respBody, _ := io.ReadAll(resp.Body) if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("TTS service error (%d): %s", resp.StatusCode, string(respBody)) } var result TTSSynthesizeResponse if err := json.Unmarshal(respBody, &result); err != nil { return nil, fmt.Errorf("parse TTS response: %w", err) } return &result, nil } // GenerateVideo calls the TTS service to create a presentation video func (c *TTSClient) GenerateVideo(ctx context.Context, req *TTSGenerateVideoRequest) (*TTSGenerateVideoResponse, error) { body, err := json.Marshal(req) if err != nil { return nil, fmt.Errorf("marshal request: %w", err) } httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/generate-video", bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("create request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(httpReq) if err != nil { return nil, fmt.Errorf("TTS service request failed: %w", err) } defer resp.Body.Close() respBody, _ := io.ReadAll(resp.Body) if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("TTS service error (%d): %s", resp.StatusCode, string(respBody)) } var result TTSGenerateVideoResponse if err := json.Unmarshal(respBody, &result); err != nil { return nil, fmt.Errorf("parse TTS response: %w", err) } return &result, nil } // PresignedURLRequest is the request to get a presigned URL type PresignedURLRequest struct { Bucket string `json:"bucket"` ObjectKey string `json:"object_key"` Expires int `json:"expires"` } // PresignedURLResponse is the response containing a presigned URL type PresignedURLResponse struct { URL string `json:"url"` ExpiresIn int `json:"expires_in"` } // GetPresignedURL requests a presigned URL from the TTS service func (c *TTSClient) GetPresignedURL(ctx context.Context, bucket, objectKey string) (string, error) { reqBody := PresignedURLRequest{ Bucket: bucket, ObjectKey: objectKey, Expires: 3600, } body, err := json.Marshal(reqBody) if err != nil { return "", fmt.Errorf("marshal request: %w", err) } httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/presigned-url", bytes.NewReader(body)) if err != nil { return "", fmt.Errorf("create request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(httpReq) if err != nil { return "", fmt.Errorf("TTS presigned URL request failed: %w", err) } defer resp.Body.Close() respBody, _ := io.ReadAll(resp.Body) if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("TTS presigned URL error (%d): %s", resp.StatusCode, string(respBody)) } var result PresignedURLResponse if err := json.Unmarshal(respBody, &result); err != nil { return "", fmt.Errorf("parse presigned URL response: %w", err) } return result.URL, nil } // IsHealthy checks if the TTS service is responsive func (c *TTSClient) IsHealthy(ctx context.Context) bool { httpReq, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/health", nil) if err != nil { return false } resp, err := c.httpClient.Do(httpReq) if err != nil { return false } defer resp.Body.Close() return resp.StatusCode == http.StatusOK } // ============================================================================ // Interactive Video TTS Client Methods // ============================================================================ // SynthesizeSectionsRequest is the request for batch section audio synthesis type SynthesizeSectionsRequest struct { Sections []SectionAudio `json:"sections"` Voice string `json:"voice"` ModuleID string `json:"module_id"` } // SectionAudio represents one section's text for audio synthesis type SectionAudio struct { Text string `json:"text"` Heading string `json:"heading"` } // SynthesizeSectionsResponse is the response from batch section synthesis type SynthesizeSectionsResponse struct { Sections []SectionResult `json:"sections"` TotalDuration float64 `json:"total_duration"` } // SectionResult is the result for one section's audio type SectionResult struct { Heading string `json:"heading"` AudioPath string `json:"audio_path"` AudioObjectKey string `json:"audio_object_key"` Duration float64 `json:"duration"` StartTimestamp float64 `json:"start_timestamp"` } // GenerateInteractiveVideoRequest is the request for interactive video generation type GenerateInteractiveVideoRequest struct { Script *NarratorScript `json:"script"` Audio *SynthesizeSectionsResponse `json:"audio"` ModuleID string `json:"module_id"` } // GenerateInteractiveVideoResponse is the response from interactive video generation type GenerateInteractiveVideoResponse struct { VideoID string `json:"video_id"` Bucket string `json:"bucket"` ObjectKey string `json:"object_key"` DurationSeconds float64 `json:"duration_seconds"` SizeBytes int64 `json:"size_bytes"` } // SynthesizeSections calls the TTS service to synthesize audio for multiple sections func (c *TTSClient) SynthesizeSections(ctx context.Context, req *SynthesizeSectionsRequest) (*SynthesizeSectionsResponse, error) { body, err := json.Marshal(req) if err != nil { return nil, fmt.Errorf("marshal request: %w", err) } httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/synthesize-sections", bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("create request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(httpReq) if err != nil { return nil, fmt.Errorf("TTS synthesize-sections request failed: %w", err) } defer resp.Body.Close() respBody, _ := io.ReadAll(resp.Body) if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("TTS synthesize-sections error (%d): %s", resp.StatusCode, string(respBody)) } var result SynthesizeSectionsResponse if err := json.Unmarshal(respBody, &result); err != nil { return nil, fmt.Errorf("parse TTS synthesize-sections response: %w", err) } return &result, nil } // GenerateInteractiveVideo calls the TTS service to create an interactive video with checkpoint slides func (c *TTSClient) GenerateInteractiveVideo(ctx context.Context, req *GenerateInteractiveVideoRequest) (*GenerateInteractiveVideoResponse, error) { body, err := json.Marshal(req) if err != nil { return nil, fmt.Errorf("marshal request: %w", err) } httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/generate-interactive-video", bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("create request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(httpReq) if err != nil { return nil, fmt.Errorf("TTS interactive video request failed: %w", err) } defer resp.Body.Close() respBody, _ := io.ReadAll(resp.Body) if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("TTS interactive video error (%d): %s", resp.StatusCode, string(respBody)) } var result GenerateInteractiveVideoResponse if err := json.Unmarshal(respBody, &result); err != nil { return nil, fmt.Errorf("parse TTS interactive video response: %w", err) } return &result, nil }