Files
breakpilot-lehrer/edu-search-service/internal/search/search.go
Benjamin Admin 9ba420fa91
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 42s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 29s
Fix: Remove broken getKlausurApiUrl and clean up empty lines
sed replacement left orphaned hostname references in story page
and empty lines in getApiBase functions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 16:02:04 +02:00

244 lines
6.3 KiB
Go

package search
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/opensearch-project/opensearch-go/v2"
"github.com/opensearch-project/opensearch-go/v2/opensearchapi"
)
// SearchRequest represents an API search request
type SearchRequest struct {
Query string `json:"q"`
Mode string `json:"mode"` // keyword, semantic, hybrid
Limit int `json:"limit"`
Offset int `json:"offset"`
Filters SearchFilters `json:"filters"`
Rerank bool `json:"rerank"`
Include SearchInclude `json:"include"`
}
// SearchFilters for narrowing results
type SearchFilters struct {
Language []string `json:"language"`
CountryHint []string `json:"country_hint"`
SourceCategory []string `json:"source_category"`
DocType []string `json:"doc_type"`
SchoolLevel []string `json:"school_level"`
Subjects []string `json:"subjects"`
State []string `json:"state"`
MinTrustScore float64 `json:"min_trust_score"`
DateFrom string `json:"date_from"`
}
// SearchInclude specifies what to include in response
type SearchInclude struct {
Snippets bool `json:"snippets"`
Highlights bool `json:"highlights"`
ContentText bool `json:"content_text"`
}
// SearchResult represents a single search result
type SearchResult struct {
DocID string `json:"doc_id"`
Title string `json:"title"`
URL string `json:"url"`
Domain string `json:"domain"`
Language string `json:"language"`
DocType string `json:"doc_type"`
SchoolLevel string `json:"school_level"`
Subjects []string `json:"subjects"`
Scores Scores `json:"scores"`
Snippet string `json:"snippet,omitempty"`
Highlights []string `json:"highlights,omitempty"`
}
// Scores contains all scoring components
type Scores struct {
BM25 float64 `json:"bm25"`
Semantic float64 `json:"semantic"`
Rerank float64 `json:"rerank"`
Trust float64 `json:"trust"`
Quality float64 `json:"quality"`
Final float64 `json:"final"`
}
// SearchResponse is the API response
type SearchResponse struct {
QueryID string `json:"query_id"`
Results []SearchResult `json:"results"`
Pagination Pagination `json:"pagination"`
}
// Pagination info
type Pagination struct {
Limit int `json:"limit"`
Offset int `json:"offset"`
TotalEstimate int `json:"total_estimate"`
}
// EmbeddingProvider interface for generating embeddings
type EmbeddingProvider interface {
Embed(ctx context.Context, text string) ([]float32, error)
IsEnabled() bool
Dimension() int
}
// Service handles search operations
type Service struct {
client *opensearch.Client
indexName string
embeddingProvider EmbeddingProvider
semanticEnabled bool
}
// NewService creates a new search service
func NewService(url, username, password, indexName string) (*Service, error) {
cfg := opensearch.Config{
Addresses: []string{url},
Username: username,
Password: password,
}
client, err := opensearch.NewClient(cfg)
if err != nil {
return nil, err
}
return &Service{
client: client,
indexName: indexName,
semanticEnabled: false,
}, nil
}
// SetEmbeddingProvider configures the embedding provider for semantic search
func (s *Service) SetEmbeddingProvider(provider EmbeddingProvider) {
if provider != nil && provider.IsEnabled() {
s.embeddingProvider = provider
s.semanticEnabled = true
}
}
// IsSemanticEnabled returns true if semantic search is available
func (s *Service) IsSemanticEnabled() bool {
return s.semanticEnabled && s.embeddingProvider != nil
}
// Search performs a search query
func (s *Service) Search(ctx context.Context, req *SearchRequest) (*SearchResponse, error) {
// Determine search mode
mode := req.Mode
if mode == "" {
mode = "keyword" // Default to keyword search
}
// For semantic/hybrid modes, generate query embedding
var queryEmbedding []float32
var embErr error
if (mode == "semantic" || mode == "hybrid") && s.IsSemanticEnabled() {
queryEmbedding, embErr = s.embeddingProvider.Embed(ctx, req.Query)
if embErr != nil {
// Fall back to keyword search if embedding fails
mode = "keyword"
}
} else if mode == "semantic" || mode == "hybrid" {
// Semantic requested but not enabled, fall back
mode = "keyword"
}
// Build OpenSearch query based on mode
var query map[string]interface{}
switch mode {
case "semantic":
query = s.buildSemanticQuery(req, queryEmbedding)
case "hybrid":
query = s.buildHybridQuery(req, queryEmbedding)
default:
query = s.buildQuery(req)
}
queryJSON, err := json.Marshal(query)
if err != nil {
return nil, err
}
searchReq := opensearchapi.SearchRequest{
Index: []string{s.indexName},
Body: strings.NewReader(string(queryJSON)),
}
res, err := searchReq.Do(ctx, s.client)
if err != nil {
return nil, err
}
defer res.Body.Close()
// Parse response
var osResponse struct {
Hits struct {
Total struct {
Value int `json:"value"`
} `json:"total"`
Hits []struct {
ID string `json:"_id"`
Score float64 `json:"_score"`
Source map[string]interface{} `json:"_source"`
Highlight map[string][]string `json:"highlight,omitempty"`
} `json:"hits"`
} `json:"hits"`
}
if err := json.NewDecoder(res.Body).Decode(&osResponse); err != nil {
return nil, err
}
// Convert to SearchResults
results := make([]SearchResult, 0, len(osResponse.Hits.Hits))
for _, hit := range osResponse.Hits.Hits {
result := s.hitToResult(hit.Source, hit.Score, hit.Highlight, req.Include)
results = append(results, result)
}
return &SearchResponse{
QueryID: fmt.Sprintf("q-%d", ctx.Value("request_id")),
Results: results,
Pagination: Pagination{
Limit: req.Limit,
Offset: req.Offset,
TotalEstimate: osResponse.Hits.Total.Value,
},
}, nil
}
// Helper functions
func getString(m map[string]interface{}, key string) string {
if v, ok := m[key].(string); ok {
return v
}
return ""
}
func getFloat(m map[string]interface{}, key string) float64 {
if v, ok := m[key].(float64); ok {
return v
}
return 0.0
}
func getStringArray(m map[string]interface{}, key string) []string {
if v, ok := m[key].([]interface{}); ok {
result := make([]string, 0, len(v))
for _, item := range v {
if s, ok := item.(string); ok {
result = append(result, s)
}
}
return result
}
return nil
}