breakpilot-compliance/ai-compliance-sdk/internal/training/media.go

package training

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"time"

	"github.com/google/uuid"
)

// MediaType represents audio or video
type MediaType string

const (
	MediaTypeAudio            MediaType = "audio"
	MediaTypeVideo            MediaType = "video"
	MediaTypeInteractiveVideo MediaType = "interactive_video"
)

// MediaStatus represents the processing status
type MediaStatus string

const (
	MediaStatusProcessing MediaStatus = "processing"
	MediaStatusCompleted  MediaStatus = "completed"
	MediaStatusFailed     MediaStatus = "failed"
)

// TrainingMedia represents a generated media file
type TrainingMedia struct {
	ID              uuid.UUID       `json:"id"`
	ModuleID        uuid.UUID       `json:"module_id"`
	ContentID       *uuid.UUID      `json:"content_id,omitempty"`
	MediaType       MediaType       `json:"media_type"`
	Status          MediaStatus     `json:"status"`
	Bucket          string          `json:"bucket"`
	ObjectKey       string          `json:"object_key"`
	FileSizeBytes   int64           `json:"file_size_bytes"`
	DurationSeconds float64         `json:"duration_seconds"`
	MimeType        string          `json:"mime_type"`
	VoiceModel      string          `json:"voice_model"`
	Language        string          `json:"language"`
	Metadata        json.RawMessage `json:"metadata"`
	ErrorMessage    string          `json:"error_message,omitempty"`
	GeneratedBy     string          `json:"generated_by"`
	IsPublished     bool            `json:"is_published"`
	CreatedAt       time.Time       `json:"created_at"`
	UpdatedAt       time.Time       `json:"updated_at"`
}

// ============================================================================
// TTS Client
// ============================================================================

// TTSClient communicates with the compliance-tts-service
type TTSClient struct {
	baseURL    string
	httpClient *http.Client
}

// NewTTSClient creates a new TTS service client
func NewTTSClient(baseURL string) *TTSClient {
	return &TTSClient{
		baseURL: baseURL,
		httpClient: &http.Client{
			Timeout: 5 * time.Minute,
		},
	}
}

// TTSSynthesizeRequest is the request to synthesize audio
type TTSSynthesizeRequest struct {
	Text      string `json:"text"`
	Language  string `json:"language"`
	Voice     string `json:"voice"`
	ModuleID  string `json:"module_id"`
	ContentID string `json:"content_id,omitempty"`
}

// TTSSynthesizeResponse is the response from audio synthesis
type TTSSynthesizeResponse struct {
	AudioID         string  `json:"audio_id"`
	Bucket          string  `json:"bucket"`
	ObjectKey       string  `json:"object_key"`
	DurationSeconds float64 `json:"duration_seconds"`
	SizeBytes       int64   `json:"size_bytes"`
}

// TTSGenerateVideoRequest is the request to generate a video
type TTSGenerateVideoRequest struct {
	Script        map[string]interface{} `json:"script"`
	AudioObjectKey string                `json:"audio_object_key"`
	ModuleID       string                `json:"module_id"`
}

// TTSGenerateVideoResponse is the response from video generation
type TTSGenerateVideoResponse struct {
	VideoID         string  `json:"video_id"`
	Bucket          string  `json:"bucket"`
	ObjectKey       string  `json:"object_key"`
	DurationSeconds float64 `json:"duration_seconds"`
	SizeBytes       int64   `json:"size_bytes"`
}

// Synthesize calls the TTS service to create audio
func (c *TTSClient) Synthesize(ctx context.Context, req *TTSSynthesizeRequest) (*TTSSynthesizeResponse, error) {
	body, err := json.Marshal(req)
	if err != nil {
		return nil, fmt.Errorf("marshal request: %w", err)
	}

	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/synthesize", bytes.NewReader(body))
	if err != nil {
		return nil, fmt.Errorf("create request: %w", err)
	}
	httpReq.Header.Set("Content-Type", "application/json")

	resp, err := c.httpClient.Do(httpReq)
	if err != nil {
		return nil, fmt.Errorf("TTS service request failed: %w", err)
	}
	defer resp.Body.Close()

	respBody, _ := io.ReadAll(resp.Body)
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("TTS service error (%d): %s", resp.StatusCode, string(respBody))
	}

	var result TTSSynthesizeResponse
	if err := json.Unmarshal(respBody, &result); err != nil {
		return nil, fmt.Errorf("parse TTS response: %w", err)
	}

	return &result, nil
}

// GenerateVideo calls the TTS service to create a presentation video
func (c *TTSClient) GenerateVideo(ctx context.Context, req *TTSGenerateVideoRequest) (*TTSGenerateVideoResponse, error) {
	body, err := json.Marshal(req)
	if err != nil {
		return nil, fmt.Errorf("marshal request: %w", err)
	}

	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/generate-video", bytes.NewReader(body))
	if err != nil {
		return nil, fmt.Errorf("create request: %w", err)
	}
	httpReq.Header.Set("Content-Type", "application/json")

	resp, err := c.httpClient.Do(httpReq)
	if err != nil {
		return nil, fmt.Errorf("TTS service request failed: %w", err)
	}
	defer resp.Body.Close()

	respBody, _ := io.ReadAll(resp.Body)
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("TTS service error (%d): %s", resp.StatusCode, string(respBody))
	}

	var result TTSGenerateVideoResponse
	if err := json.Unmarshal(respBody, &result); err != nil {
		return nil, fmt.Errorf("parse TTS response: %w", err)
	}

	return &result, nil
}

// PresignedURLRequest is the request to get a presigned URL
type PresignedURLRequest struct {
	Bucket    string `json:"bucket"`
	ObjectKey string `json:"object_key"`
	Expires   int    `json:"expires"`
}

// PresignedURLResponse is the response containing a presigned URL
type PresignedURLResponse struct {
	URL       string `json:"url"`
	ExpiresIn int    `json:"expires_in"`
}

// GetPresignedURL requests a presigned URL from the TTS service
func (c *TTSClient) GetPresignedURL(ctx context.Context, bucket, objectKey string) (string, error) {
	reqBody := PresignedURLRequest{
		Bucket:    bucket,
		ObjectKey: objectKey,
		Expires:   3600,
	}

	body, err := json.Marshal(reqBody)
	if err != nil {
		return "", fmt.Errorf("marshal request: %w", err)
	}

	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/presigned-url", bytes.NewReader(body))
	if err != nil {
		return "", fmt.Errorf("create request: %w", err)
	}
	httpReq.Header.Set("Content-Type", "application/json")

	resp, err := c.httpClient.Do(httpReq)
	if err != nil {
		return "", fmt.Errorf("TTS presigned URL request failed: %w", err)
	}
	defer resp.Body.Close()

	respBody, _ := io.ReadAll(resp.Body)
	if resp.StatusCode != http.StatusOK {
		return "", fmt.Errorf("TTS presigned URL error (%d): %s", resp.StatusCode, string(respBody))
	}

	var result PresignedURLResponse
	if err := json.Unmarshal(respBody, &result); err != nil {
		return "", fmt.Errorf("parse presigned URL response: %w", err)
	}

	return result.URL, nil
}

// IsHealthy checks if the TTS service is responsive
func (c *TTSClient) IsHealthy(ctx context.Context) bool {
	httpReq, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/health", nil)
	if err != nil {
		return false
	}

	resp, err := c.httpClient.Do(httpReq)
	if err != nil {
		return false
	}
	defer resp.Body.Close()

	return resp.StatusCode == http.StatusOK
}

// ============================================================================
// Interactive Video TTS Client Methods
// ============================================================================

// SynthesizeSectionsRequest is the request for batch section audio synthesis
type SynthesizeSectionsRequest struct {
	Sections []SectionAudio `json:"sections"`
	Voice    string         `json:"voice"`
	ModuleID string         `json:"module_id"`
}

// SectionAudio represents one section's text for audio synthesis
type SectionAudio struct {
	Text    string `json:"text"`
	Heading string `json:"heading"`
}

// SynthesizeSectionsResponse is the response from batch section synthesis
type SynthesizeSectionsResponse struct {
	Sections      []SectionResult `json:"sections"`
	TotalDuration float64         `json:"total_duration"`
}

// SectionResult is the result for one section's audio
type SectionResult struct {
	Heading        string  `json:"heading"`
	AudioPath      string  `json:"audio_path"`
	AudioObjectKey string  `json:"audio_object_key"`
	Duration       float64 `json:"duration"`
	StartTimestamp float64 `json:"start_timestamp"`
}

// GenerateInteractiveVideoRequest is the request for interactive video generation
type GenerateInteractiveVideoRequest struct {
	Script   *NarratorScript             `json:"script"`
	Audio    *SynthesizeSectionsResponse `json:"audio"`
	ModuleID string                      `json:"module_id"`
}

// GenerateInteractiveVideoResponse is the response from interactive video generation
type GenerateInteractiveVideoResponse struct {
	VideoID         string  `json:"video_id"`
	Bucket          string  `json:"bucket"`
	ObjectKey       string  `json:"object_key"`
	DurationSeconds float64 `json:"duration_seconds"`
	SizeBytes       int64   `json:"size_bytes"`
}

// SynthesizeSections calls the TTS service to synthesize audio for multiple sections
func (c *TTSClient) SynthesizeSections(ctx context.Context, req *SynthesizeSectionsRequest) (*SynthesizeSectionsResponse, error) {
	body, err := json.Marshal(req)
	if err != nil {
		return nil, fmt.Errorf("marshal request: %w", err)
	}

	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/synthesize-sections", bytes.NewReader(body))
	if err != nil {
		return nil, fmt.Errorf("create request: %w", err)
	}
	httpReq.Header.Set("Content-Type", "application/json")

	resp, err := c.httpClient.Do(httpReq)
	if err != nil {
		return nil, fmt.Errorf("TTS synthesize-sections request failed: %w", err)
	}
	defer resp.Body.Close()

	respBody, _ := io.ReadAll(resp.Body)
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("TTS synthesize-sections error (%d): %s", resp.StatusCode, string(respBody))
	}

	var result SynthesizeSectionsResponse
	if err := json.Unmarshal(respBody, &result); err != nil {
		return nil, fmt.Errorf("parse TTS synthesize-sections response: %w", err)
	}

	return &result, nil
}

// GenerateInteractiveVideo calls the TTS service to create an interactive video with checkpoint slides
func (c *TTSClient) GenerateInteractiveVideo(ctx context.Context, req *GenerateInteractiveVideoRequest) (*GenerateInteractiveVideoResponse, error) {
	body, err := json.Marshal(req)
	if err != nil {
		return nil, fmt.Errorf("marshal request: %w", err)
	}

	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/generate-interactive-video", bytes.NewReader(body))
	if err != nil {
		return nil, fmt.Errorf("create request: %w", err)
	}
	httpReq.Header.Set("Content-Type", "application/json")

	resp, err := c.httpClient.Do(httpReq)
	if err != nil {
		return nil, fmt.Errorf("TTS interactive video request failed: %w", err)
	}
	defer resp.Body.Close()

	respBody, _ := io.ReadAll(resp.Body)
	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("TTS interactive video error (%d): %s", resp.StatusCode, string(respBody))
	}

	var result GenerateInteractiveVideoResponse
	if err := json.Unmarshal(respBody, &result); err != nil {
		return nil, fmt.Errorf("parse TTS interactive video response: %w", err)
	}

	return &result, nil
}