package search // buildQuery constructs the OpenSearch query func (s *Service) buildQuery(req *SearchRequest) map[string]interface{} { // Main query must := []map[string]interface{}{} filter := []map[string]interface{}{} // Text search if req.Query != "" { must = append(must, map[string]interface{}{ "multi_match": map[string]interface{}{ "query": req.Query, "fields": []string{"title^3", "content_text"}, "type": "best_fields", }, }) } // Filters if len(req.Filters.Language) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"language": req.Filters.Language}, }) } if len(req.Filters.CountryHint) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"country_hint": req.Filters.CountryHint}, }) } if len(req.Filters.SourceCategory) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"source_category": req.Filters.SourceCategory}, }) } if len(req.Filters.DocType) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"doc_type": req.Filters.DocType}, }) } if len(req.Filters.SchoolLevel) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"school_level": req.Filters.SchoolLevel}, }) } if len(req.Filters.Subjects) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"subjects": req.Filters.Subjects}, }) } if len(req.Filters.State) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"state": req.Filters.State}, }) } if req.Filters.MinTrustScore > 0 { filter = append(filter, map[string]interface{}{ "range": map[string]interface{}{ "trust_score": map[string]interface{}{"gte": req.Filters.MinTrustScore}, }, }) } if req.Filters.DateFrom != "" { filter = append(filter, map[string]interface{}{ "range": map[string]interface{}{ "fetch_time": map[string]interface{}{"gte": req.Filters.DateFrom}, }, }) } // Build bool query boolQuery := map[string]interface{}{} if len(must) > 0 { boolQuery["must"] = must } if len(filter) > 0 { boolQuery["filter"] = filter } // Construct full query query := map[string]interface{}{ "query": map[string]interface{}{ "bool": boolQuery, }, "from": req.Offset, "size": req.Limit, "_source": []string{ "doc_id", "title", "url", "domain", "language", "doc_type", "school_level", "subjects", "trust_score", "quality_score", "snippet_text", }, } // Add highlighting if requested if req.Include.Highlights { query["highlight"] = map[string]interface{}{ "fields": map[string]interface{}{ "title": map[string]interface{}{}, "content_text": map[string]interface{}{"fragment_size": 150, "number_of_fragments": 3}, }, } } // Add function score for trust/quality boosting query["query"] = map[string]interface{}{ "function_score": map[string]interface{}{ "query": query["query"], "functions": []map[string]interface{}{ { "field_value_factor": map[string]interface{}{ "field": "trust_score", "factor": 1.5, "modifier": "sqrt", "missing": 0.5, }, }, { "field_value_factor": map[string]interface{}{ "field": "quality_score", "factor": 1.0, "modifier": "sqrt", "missing": 0.5, }, }, }, "score_mode": "multiply", "boost_mode": "multiply", }, } return query } // buildSemanticQuery constructs a pure vector search query using k-NN func (s *Service) buildSemanticQuery(req *SearchRequest, embedding []float32) map[string]interface{} { filter := s.buildFilters(req) // k-NN query for semantic search knnQuery := map[string]interface{}{ "content_embedding": map[string]interface{}{ "vector": embedding, "k": req.Limit + req.Offset, // Get enough results for pagination }, } // Add filter if present if len(filter) > 0 { knnQuery["content_embedding"].(map[string]interface{})["filter"] = map[string]interface{}{ "bool": map[string]interface{}{ "filter": filter, }, } } query := map[string]interface{}{ "knn": knnQuery, "from": req.Offset, "size": req.Limit, "_source": []string{ "doc_id", "title", "url", "domain", "language", "doc_type", "school_level", "subjects", "trust_score", "quality_score", "snippet_text", }, } // Add highlighting if requested if req.Include.Highlights { query["highlight"] = map[string]interface{}{ "fields": map[string]interface{}{ "title": map[string]interface{}{}, "content_text": map[string]interface{}{"fragment_size": 150, "number_of_fragments": 3}, }, } } return query } // buildHybridQuery constructs a combined BM25 + vector search query func (s *Service) buildHybridQuery(req *SearchRequest, embedding []float32) map[string]interface{} { filter := s.buildFilters(req) // Build the bool query for BM25 must := []map[string]interface{}{} if req.Query != "" { must = append(must, map[string]interface{}{ "multi_match": map[string]interface{}{ "query": req.Query, "fields": []string{"title^3", "content_text"}, "type": "best_fields", }, }) } boolQuery := map[string]interface{}{} if len(must) > 0 { boolQuery["must"] = must } if len(filter) > 0 { boolQuery["filter"] = filter } // Convert embedding to []interface{} for JSON embeddingInterface := make([]interface{}, len(embedding)) for i, v := range embedding { embeddingInterface[i] = v } // Hybrid query using script_score to combine BM25 and cosine similarity // This is a simpler approach than OpenSearch's neural search plugin query := map[string]interface{}{ "query": map[string]interface{}{ "script_score": map[string]interface{}{ "query": map[string]interface{}{ "bool": boolQuery, }, "script": map[string]interface{}{ "source": "cosineSimilarity(params.query_vector, 'content_embedding') + 1.0 + _score * 0.5", "params": map[string]interface{}{ "query_vector": embeddingInterface, }, }, }, }, "from": req.Offset, "size": req.Limit, "_source": []string{ "doc_id", "title", "url", "domain", "language", "doc_type", "school_level", "subjects", "trust_score", "quality_score", "snippet_text", }, } // Add highlighting if requested if req.Include.Highlights { query["highlight"] = map[string]interface{}{ "fields": map[string]interface{}{ "title": map[string]interface{}{}, "content_text": map[string]interface{}{"fragment_size": 150, "number_of_fragments": 3}, }, } } return query } // buildFilters constructs the filter array for queries func (s *Service) buildFilters(req *SearchRequest) []map[string]interface{} { filter := []map[string]interface{}{} if len(req.Filters.Language) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"language": req.Filters.Language}, }) } if len(req.Filters.CountryHint) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"country_hint": req.Filters.CountryHint}, }) } if len(req.Filters.SourceCategory) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"source_category": req.Filters.SourceCategory}, }) } if len(req.Filters.DocType) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"doc_type": req.Filters.DocType}, }) } if len(req.Filters.SchoolLevel) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"school_level": req.Filters.SchoolLevel}, }) } if len(req.Filters.Subjects) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"subjects": req.Filters.Subjects}, }) } if len(req.Filters.State) > 0 { filter = append(filter, map[string]interface{}{ "terms": map[string]interface{}{"state": req.Filters.State}, }) } if req.Filters.MinTrustScore > 0 { filter = append(filter, map[string]interface{}{ "range": map[string]interface{}{ "trust_score": map[string]interface{}{"gte": req.Filters.MinTrustScore}, }, }) } if req.Filters.DateFrom != "" { filter = append(filter, map[string]interface{}{ "range": map[string]interface{}{ "fetch_time": map[string]interface{}{"gte": req.Filters.DateFrom}, }, }) } return filter } // hitToResult converts an OpenSearch hit to SearchResult func (s *Service) hitToResult(source map[string]interface{}, score float64, highlight map[string][]string, include SearchInclude) SearchResult { result := SearchResult{ DocID: getString(source, "doc_id"), Title: getString(source, "title"), URL: getString(source, "url"), Domain: getString(source, "domain"), Language: getString(source, "language"), DocType: getString(source, "doc_type"), SchoolLevel: getString(source, "school_level"), Subjects: getStringArray(source, "subjects"), Scores: Scores{ BM25: score, Trust: getFloat(source, "trust_score"), Quality: getFloat(source, "quality_score"), Final: score, // MVP: final = BM25 * trust * quality (via function_score) }, } if include.Snippets { result.Snippet = getString(source, "snippet_text") } if include.Highlights && highlight != nil { if h, ok := highlight["content_text"]; ok { result.Highlights = h } } return result }