package search import ( "context" "encoding/json" "fmt" "strings" "github.com/opensearch-project/opensearch-go/v2" "github.com/opensearch-project/opensearch-go/v2/opensearchapi" ) // SearchRequest represents an API search request type SearchRequest struct { Query string `json:"q"` Mode string `json:"mode"` // keyword, semantic, hybrid Limit int `json:"limit"` Offset int `json:"offset"` Filters SearchFilters `json:"filters"` Rerank bool `json:"rerank"` Include SearchInclude `json:"include"` } // SearchFilters for narrowing results type SearchFilters struct { Language []string `json:"language"` CountryHint []string `json:"country_hint"` SourceCategory []string `json:"source_category"` DocType []string `json:"doc_type"` SchoolLevel []string `json:"school_level"` Subjects []string `json:"subjects"` State []string `json:"state"` MinTrustScore float64 `json:"min_trust_score"` DateFrom string `json:"date_from"` } // SearchInclude specifies what to include in response type SearchInclude struct { Snippets bool `json:"snippets"` Highlights bool `json:"highlights"` ContentText bool `json:"content_text"` } // SearchResult represents a single search result type SearchResult struct { DocID string `json:"doc_id"` Title string `json:"title"` URL string `json:"url"` Domain string `json:"domain"` Language string `json:"language"` DocType string `json:"doc_type"` SchoolLevel string `json:"school_level"` Subjects []string `json:"subjects"` Scores Scores `json:"scores"` Snippet string `json:"snippet,omitempty"` Highlights []string `json:"highlights,omitempty"` } // Scores contains all scoring components type Scores struct { BM25 float64 `json:"bm25"` Semantic float64 `json:"semantic"` Rerank float64 `json:"rerank"` Trust float64 `json:"trust"` Quality float64 `json:"quality"` Final float64 `json:"final"` } // SearchResponse is the API response type SearchResponse struct { QueryID string `json:"query_id"` Results []SearchResult `json:"results"` Pagination Pagination `json:"pagination"` } // Pagination info type Pagination struct { Limit int `json:"limit"` Offset int `json:"offset"` TotalEstimate int `json:"total_estimate"` } // EmbeddingProvider interface for generating embeddings type EmbeddingProvider interface { Embed(ctx context.Context, text string) ([]float32, error) IsEnabled() bool Dimension() int } // Service handles search operations type Service struct { client *opensearch.Client indexName string embeddingProvider EmbeddingProvider semanticEnabled bool } // NewService creates a new search service func NewService(url, username, password, indexName string) (*Service, error) { cfg := opensearch.Config{ Addresses: []string{url}, Username: username, Password: password, } client, err := opensearch.NewClient(cfg) if err != nil { return nil, err } return &Service{ client: client, indexName: indexName, semanticEnabled: false, }, nil } // SetEmbeddingProvider configures the embedding provider for semantic search func (s *Service) SetEmbeddingProvider(provider EmbeddingProvider) { if provider != nil && provider.IsEnabled() { s.embeddingProvider = provider s.semanticEnabled = true } } // IsSemanticEnabled returns true if semantic search is available func (s *Service) IsSemanticEnabled() bool { return s.semanticEnabled && s.embeddingProvider != nil } // Search performs a search query func (s *Service) Search(ctx context.Context, req *SearchRequest) (*SearchResponse, error) { // Determine search mode mode := req.Mode if mode == "" { mode = "keyword" // Default to keyword search } // For semantic/hybrid modes, generate query embedding var queryEmbedding []float32 var embErr error if (mode == "semantic" || mode == "hybrid") && s.IsSemanticEnabled() { queryEmbedding, embErr = s.embeddingProvider.Embed(ctx, req.Query) if embErr != nil { // Fall back to keyword search if embedding fails mode = "keyword" } } else if mode == "semantic" || mode == "hybrid" { // Semantic requested but not enabled, fall back mode = "keyword" } // Build OpenSearch query based on mode var query map[string]interface{} switch mode { case "semantic": query = s.buildSemanticQuery(req, queryEmbedding) case "hybrid": query = s.buildHybridQuery(req, queryEmbedding) default: query = s.buildQuery(req) } queryJSON, err := json.Marshal(query) if err != nil { return nil, err } searchReq := opensearchapi.SearchRequest{ Index: []string{s.indexName}, Body: strings.NewReader(string(queryJSON)), } res, err := searchReq.Do(ctx, s.client) if err != nil { return nil, err } defer res.Body.Close() // Parse response var osResponse struct { Hits struct { Total struct { Value int `json:"value"` } `json:"total"` Hits []struct { ID string `json:"_id"` Score float64 `json:"_score"` Source map[string]interface{} `json:"_source"` Highlight map[string][]string `json:"highlight,omitempty"` } `json:"hits"` } `json:"hits"` } if err := json.NewDecoder(res.Body).Decode(&osResponse); err != nil { return nil, err } // Convert to SearchResults results := make([]SearchResult, 0, len(osResponse.Hits.Hits)) for _, hit := range osResponse.Hits.Hits { result := s.hitToResult(hit.Source, hit.Score, hit.Highlight, req.Include) results = append(results, result) } return &SearchResponse{ QueryID: fmt.Sprintf("q-%d", ctx.Value("request_id")), Results: results, Pagination: Pagination{ Limit: req.Limit, Offset: req.Offset, TotalEstimate: osResponse.Hits.Total.Value, }, }, nil } // Helper functions func getString(m map[string]interface{}, key string) string { if v, ok := m[key].(string); ok { return v } return "" } func getFloat(m map[string]interface{}, key string) float64 { if v, ok := m[key].(float64); ok { return v } return 0.0 } func getStringArray(m map[string]interface{}, key string) []string { if v, ok := m[key].([]interface{}); ok { result := make([]string, 0, len(v)) for _, item := range v { if s, ok := item.(string); ok { result = append(result, s) } } return result } return nil }