feat: edu-search-service migriert, voice-service/geo-service entfernt
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s

- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor)
- opensearch + edu-search-service in docker-compose.yml hinzugefuegt
- voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core)
- geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt)
- CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt
  (Go lint, test mit go mod download, build, SBOM)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Boenisch
2026-02-15 18:36:38 +01:00
parent d4e1d6bab6
commit 414e0f5ec0
73 changed files with 23938 additions and 92 deletions

View File

@@ -0,0 +1,406 @@
package handlers
import (
"encoding/json"
"net/http"
"os"
"path/filepath"
"sync"
"time"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// SeedURL represents a seed URL configuration
type SeedURL struct {
ID string `json:"id"`
URL string `json:"url"`
Category string `json:"category"`
Name string `json:"name"`
Description string `json:"description"`
TrustBoost float64 `json:"trustBoost"`
Enabled bool `json:"enabled"`
LastCrawled *string `json:"lastCrawled,omitempty"`
DocumentCount int `json:"documentCount,omitempty"`
CreatedAt time.Time `json:"createdAt"`
UpdatedAt time.Time `json:"updatedAt"`
}
// CrawlStats contains crawl statistics
type CrawlStats struct {
TotalDocuments int `json:"totalDocuments"`
TotalSeeds int `json:"totalSeeds"`
LastCrawlTime *string `json:"lastCrawlTime,omitempty"`
CrawlStatus string `json:"crawlStatus"`
DocumentsPerCategory map[string]int `json:"documentsPerCategory"`
DocumentsPerDocType map[string]int `json:"documentsPerDocType"`
AvgTrustScore float64 `json:"avgTrustScore"`
}
// SeedStore manages seed URLs in memory and file
type SeedStore struct {
seeds map[string]SeedURL
mu sync.RWMutex
filePath string
}
var seedStore *SeedStore
var crawlStatus = "idle"
var lastCrawlTime *string
// InitSeedStore initializes the seed store
func InitSeedStore(seedsDir string) error {
seedStore = &SeedStore{
seeds: make(map[string]SeedURL),
filePath: filepath.Join(seedsDir, "seeds.json"),
}
// Try to load existing seeds from JSON file
if err := seedStore.loadFromFile(); err != nil {
// If file doesn't exist, load from txt files
return seedStore.loadFromTxtFiles(seedsDir)
}
return nil
}
func (s *SeedStore) loadFromFile() error {
data, err := os.ReadFile(s.filePath)
if err != nil {
return err
}
var seeds []SeedURL
if err := json.Unmarshal(data, &seeds); err != nil {
return err
}
s.mu.Lock()
defer s.mu.Unlock()
for _, seed := range seeds {
s.seeds[seed.ID] = seed
}
return nil
}
func (s *SeedStore) loadFromTxtFiles(seedsDir string) error {
// Default seeds from category files
defaultSeeds := []SeedURL{
{ID: uuid.New().String(), URL: "https://www.kmk.org", Category: "federal", Name: "Kultusministerkonferenz", Description: "Beschlüsse und Bildungsstandards", TrustBoost: 0.50, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.bildungsserver.de", Category: "federal", Name: "Deutscher Bildungsserver", Description: "Zentrale Bildungsinformationen", TrustBoost: 0.50, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.bpb.de", Category: "federal", Name: "Bundeszentrale politische Bildung", Description: "Politische Bildung", TrustBoost: 0.45, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.bmbf.de", Category: "federal", Name: "BMBF", Description: "Bundesbildungsministerium", TrustBoost: 0.50, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.iqb.hu-berlin.de", Category: "federal", Name: "IQB", Description: "Institut Qualitätsentwicklung", TrustBoost: 0.50, Enabled: true},
// Science
{ID: uuid.New().String(), URL: "https://www.bertelsmann-stiftung.de/de/themen/bildung", Category: "science", Name: "Bertelsmann Stiftung", Description: "Bildungsstudien und Ländermonitor", TrustBoost: 0.40, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.oecd.org/pisa", Category: "science", Name: "PISA-Studien", Description: "Internationale Schulleistungsstudie", TrustBoost: 0.45, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.iea.nl/studies/iea/pirls", Category: "science", Name: "IGLU/PIRLS", Description: "Internationale Grundschul-Lese-Untersuchung", TrustBoost: 0.45, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.iea.nl/studies/iea/timss", Category: "science", Name: "TIMSS", Description: "Trends in International Mathematics and Science Study", TrustBoost: 0.45, Enabled: true},
// Bundesländer
{ID: uuid.New().String(), URL: "https://www.km.bayern.de", Category: "states", Name: "Bayern Kultusministerium", Description: "Lehrpläne Bayern", TrustBoost: 0.45, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.schulministerium.nrw", Category: "states", Name: "NRW Schulministerium", Description: "Lehrpläne NRW", TrustBoost: 0.45, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.berlin.de/sen/bildung", Category: "states", Name: "Berlin Bildung", Description: "Rahmenlehrpläne Berlin", TrustBoost: 0.45, Enabled: true},
{ID: uuid.New().String(), URL: "https://kultusministerium.hessen.de", Category: "states", Name: "Hessen Kultusministerium", Description: "Kerncurricula Hessen", TrustBoost: 0.45, Enabled: true},
// Portale
{ID: uuid.New().String(), URL: "https://www.lehrer-online.de", Category: "portals", Name: "Lehrer-Online", Description: "Unterrichtsmaterialien", TrustBoost: 0.20, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.4teachers.de", Category: "portals", Name: "4teachers", Description: "Lehrercommunity", TrustBoost: 0.20, Enabled: true},
{ID: uuid.New().String(), URL: "https://www.zum.de", Category: "portals", Name: "ZUM", Description: "Zentrale für Unterrichtsmedien", TrustBoost: 0.25, Enabled: true},
}
s.mu.Lock()
defer s.mu.Unlock()
now := time.Now()
for _, seed := range defaultSeeds {
seed.CreatedAt = now
seed.UpdatedAt = now
s.seeds[seed.ID] = seed
}
return s.saveToFile()
}
func (s *SeedStore) saveToFile() error {
seeds := make([]SeedURL, 0, len(s.seeds))
for _, seed := range s.seeds {
seeds = append(seeds, seed)
}
data, err := json.MarshalIndent(seeds, "", " ")
if err != nil {
return err
}
return os.WriteFile(s.filePath, data, 0644)
}
// GetAllSeeds returns all seeds
func (s *SeedStore) GetAllSeeds() []SeedURL {
s.mu.RLock()
defer s.mu.RUnlock()
seeds := make([]SeedURL, 0, len(s.seeds))
for _, seed := range s.seeds {
seeds = append(seeds, seed)
}
return seeds
}
// GetSeed returns a single seed by ID
func (s *SeedStore) GetSeed(id string) (SeedURL, bool) {
s.mu.RLock()
defer s.mu.RUnlock()
seed, ok := s.seeds[id]
return seed, ok
}
// CreateSeed adds a new seed
func (s *SeedStore) CreateSeed(seed SeedURL) (SeedURL, error) {
s.mu.Lock()
defer s.mu.Unlock()
seed.ID = uuid.New().String()
seed.CreatedAt = time.Now()
seed.UpdatedAt = time.Now()
s.seeds[seed.ID] = seed
if err := s.saveToFile(); err != nil {
delete(s.seeds, seed.ID)
return SeedURL{}, err
}
return seed, nil
}
// UpdateSeed updates an existing seed
func (s *SeedStore) UpdateSeed(id string, updates SeedURL) (SeedURL, bool, error) {
s.mu.Lock()
defer s.mu.Unlock()
seed, ok := s.seeds[id]
if !ok {
return SeedURL{}, false, nil
}
// Update fields
if updates.URL != "" {
seed.URL = updates.URL
}
if updates.Name != "" {
seed.Name = updates.Name
}
if updates.Category != "" {
seed.Category = updates.Category
}
if updates.Description != "" {
seed.Description = updates.Description
}
seed.TrustBoost = updates.TrustBoost
seed.Enabled = updates.Enabled
seed.UpdatedAt = time.Now()
s.seeds[id] = seed
if err := s.saveToFile(); err != nil {
return SeedURL{}, true, err
}
return seed, true, nil
}
// DeleteSeed removes a seed
func (s *SeedStore) DeleteSeed(id string) bool {
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.seeds[id]; !ok {
return false
}
delete(s.seeds, id)
s.saveToFile()
return true
}
// Admin Handlers
// GetSeeds returns all seed URLs
func (h *Handler) GetSeeds(c *gin.Context) {
if seedStore == nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"})
return
}
seeds := seedStore.GetAllSeeds()
c.JSON(http.StatusOK, seeds)
}
// CreateSeed adds a new seed URL
func (h *Handler) CreateSeed(c *gin.Context) {
if seedStore == nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"})
return
}
var seed SeedURL
if err := c.ShouldBindJSON(&seed); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
if seed.URL == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "URL is required"})
return
}
created, err := seedStore.CreateSeed(seed)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create seed", "details": err.Error()})
return
}
c.JSON(http.StatusCreated, created)
}
// UpdateSeed updates an existing seed URL
func (h *Handler) UpdateSeed(c *gin.Context) {
if seedStore == nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"})
return
}
id := c.Param("id")
if id == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "Seed ID required"})
return
}
var updates SeedURL
if err := c.ShouldBindJSON(&updates); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
updated, found, err := seedStore.UpdateSeed(id, updates)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update seed", "details": err.Error()})
return
}
if !found {
c.JSON(http.StatusNotFound, gin.H{"error": "Seed not found"})
return
}
c.JSON(http.StatusOK, updated)
}
// DeleteSeed removes a seed URL
func (h *Handler) DeleteSeed(c *gin.Context) {
if seedStore == nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"})
return
}
id := c.Param("id")
if id == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "Seed ID required"})
return
}
if !seedStore.DeleteSeed(id) {
c.JSON(http.StatusNotFound, gin.H{"error": "Seed not found"})
return
}
c.JSON(http.StatusOK, gin.H{"deleted": true, "id": id})
}
// GetStats returns crawl statistics
func (h *Handler) GetStats(c *gin.Context) {
// Get document count from OpenSearch
totalDocs := 0
// TODO: Get real count from OpenSearch
seeds := []SeedURL{}
if seedStore != nil {
seeds = seedStore.GetAllSeeds()
}
enabledSeeds := 0
for _, seed := range seeds {
if seed.Enabled {
enabledSeeds++
}
}
stats := CrawlStats{
TotalDocuments: totalDocs,
TotalSeeds: enabledSeeds,
LastCrawlTime: lastCrawlTime,
CrawlStatus: crawlStatus,
DocumentsPerCategory: map[string]int{
"federal": 0,
"states": 0,
"science": 0,
"universities": 0,
"portals": 0,
},
DocumentsPerDocType: map[string]int{
"Lehrplan": 0,
"Arbeitsblatt": 0,
"Unterrichtsentwurf": 0,
"Erlass_Verordnung": 0,
"Pruefung_Abitur": 0,
"Studie_Bericht": 0,
"Sonstiges": 0,
},
AvgTrustScore: 0.0,
}
c.JSON(http.StatusOK, stats)
}
// StartCrawl initiates a crawl run
func (h *Handler) StartCrawl(c *gin.Context) {
if crawlStatus == "running" {
c.JSON(http.StatusConflict, gin.H{"error": "Crawl already running"})
return
}
crawlStatus = "running"
// TODO: Start actual crawl in background goroutine
go func() {
time.Sleep(5 * time.Second) // Simulate crawl
now := time.Now().Format(time.RFC3339)
lastCrawlTime = &now
crawlStatus = "idle"
}()
c.JSON(http.StatusAccepted, gin.H{
"status": "started",
"message": "Crawl initiated",
})
}
// SetupAdminRoutes configures admin API routes
func SetupAdminRoutes(r *gin.RouterGroup, h *Handler) {
admin := r.Group("/admin")
{
// Seeds CRUD
admin.GET("/seeds", h.GetSeeds)
admin.POST("/seeds", h.CreateSeed)
admin.PUT("/seeds/:id", h.UpdateSeed)
admin.DELETE("/seeds/:id", h.DeleteSeed)
// Stats
admin.GET("/stats", h.GetStats)
// Crawl control
admin.POST("/crawl/start", h.StartCrawl)
}
}

View File

@@ -0,0 +1,554 @@
package handlers
import (
"net/http"
"time"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/breakpilot/edu-search-service/internal/database"
)
// AIExtractionHandlers handles AI-based profile extraction endpoints
// These endpoints are designed for vast.ai or similar AI services to:
// 1. Get profile URLs that need extraction
// 2. Submit extracted data back
type AIExtractionHandlers struct {
repo *database.Repository
}
// NewAIExtractionHandlers creates new AI extraction handlers
func NewAIExtractionHandlers(repo *database.Repository) *AIExtractionHandlers {
return &AIExtractionHandlers{repo: repo}
}
// ProfileExtractionTask represents a profile URL to be processed by AI
type ProfileExtractionTask struct {
StaffID uuid.UUID `json:"staff_id"`
ProfileURL string `json:"profile_url"`
UniversityID uuid.UUID `json:"university_id"`
UniversityURL string `json:"university_url,omitempty"`
FullName string `json:"full_name,omitempty"`
CurrentData struct {
Email string `json:"email,omitempty"`
Phone string `json:"phone,omitempty"`
Office string `json:"office,omitempty"`
Position string `json:"position,omitempty"`
Department string `json:"department,omitempty"`
} `json:"current_data"`
}
// GetPendingProfiles returns staff profiles that need AI extraction
// GET /api/v1/ai/extraction/pending?limit=10&university_id=...
func (h *AIExtractionHandlers) GetPendingProfiles(c *gin.Context) {
limit := parseIntDefault(c.Query("limit"), 10)
if limit > 100 {
limit = 100
}
var universityID *uuid.UUID
if uniIDStr := c.Query("university_id"); uniIDStr != "" {
id, err := uuid.Parse(uniIDStr)
if err == nil {
universityID = &id
}
}
// Get staff that have profile URLs but missing key data
params := database.StaffSearchParams{
UniversityID: universityID,
Limit: limit * 2, // Get more to filter
}
result, err := h.repo.SearchStaff(c.Request.Context(), params)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
// Filter to only include profiles that need extraction
var tasks []ProfileExtractionTask
for _, staff := range result.Staff {
// Skip if no profile URL
if staff.ProfileURL == nil || *staff.ProfileURL == "" {
continue
}
// Include if missing email or other important data
needsExtraction := staff.Email == nil || *staff.Email == ""
if needsExtraction {
task := ProfileExtractionTask{
StaffID: staff.ID,
ProfileURL: *staff.ProfileURL,
UniversityID: staff.UniversityID,
}
if staff.FullName != nil {
task.FullName = *staff.FullName
}
if staff.Email != nil {
task.CurrentData.Email = *staff.Email
}
if staff.Phone != nil {
task.CurrentData.Phone = *staff.Phone
}
if staff.Office != nil {
task.CurrentData.Office = *staff.Office
}
if staff.Position != nil {
task.CurrentData.Position = *staff.Position
}
if staff.DepartmentName != nil {
task.CurrentData.Department = *staff.DepartmentName
}
tasks = append(tasks, task)
if len(tasks) >= limit {
break
}
}
}
c.JSON(http.StatusOK, gin.H{
"tasks": tasks,
"total": len(tasks),
})
}
// ExtractedProfileData represents data extracted by AI from a profile page
type ExtractedProfileData struct {
StaffID uuid.UUID `json:"staff_id" binding:"required"`
// Contact info
Email string `json:"email,omitempty"`
Phone string `json:"phone,omitempty"`
Office string `json:"office,omitempty"`
// Professional info
Position string `json:"position,omitempty"`
PositionType string `json:"position_type,omitempty"` // professor, researcher, phd_student, staff
AcademicTitle string `json:"academic_title,omitempty"`
IsProfessor *bool `json:"is_professor,omitempty"`
DepartmentName string `json:"department_name,omitempty"`
// Hierarchy
SupervisorName string `json:"supervisor_name,omitempty"`
TeamRole string `json:"team_role,omitempty"` // leitung, mitarbeiter, sekretariat, hiwi, doktorand
// Research
ResearchInterests []string `json:"research_interests,omitempty"`
ResearchSummary string `json:"research_summary,omitempty"`
// Teaching (Lehrveranstaltungen)
TeachingTopics []string `json:"teaching_topics,omitempty"`
// External profiles
ORCID string `json:"orcid,omitempty"`
GoogleScholarID string `json:"google_scholar_id,omitempty"`
ResearchgateURL string `json:"researchgate_url,omitempty"`
LinkedInURL string `json:"linkedin_url,omitempty"`
PersonalWebsite string `json:"personal_website,omitempty"`
PhotoURL string `json:"photo_url,omitempty"`
// Institute/Department links discovered
InstituteURL string `json:"institute_url,omitempty"`
InstituteName string `json:"institute_name,omitempty"`
// Confidence score (0-1)
Confidence float64 `json:"confidence,omitempty"`
}
// SubmitExtractedData saves AI-extracted profile data
// POST /api/v1/ai/extraction/submit
func (h *AIExtractionHandlers) SubmitExtractedData(c *gin.Context) {
var data ExtractedProfileData
if err := c.ShouldBindJSON(&data); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request: " + err.Error()})
return
}
// Get existing staff record
staff, err := h.repo.GetStaff(c.Request.Context(), data.StaffID)
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "Staff not found"})
return
}
// Update fields if provided and not empty
updated := false
if data.Email != "" && (staff.Email == nil || *staff.Email == "") {
staff.Email = &data.Email
updated = true
}
if data.Phone != "" && (staff.Phone == nil || *staff.Phone == "") {
staff.Phone = &data.Phone
updated = true
}
if data.Office != "" && (staff.Office == nil || *staff.Office == "") {
staff.Office = &data.Office
updated = true
}
if data.Position != "" && (staff.Position == nil || *staff.Position == "") {
staff.Position = &data.Position
updated = true
}
if data.PositionType != "" && (staff.PositionType == nil || *staff.PositionType == "") {
staff.PositionType = &data.PositionType
updated = true
}
if data.AcademicTitle != "" && (staff.AcademicTitle == nil || *staff.AcademicTitle == "") {
staff.AcademicTitle = &data.AcademicTitle
updated = true
}
if data.IsProfessor != nil {
staff.IsProfessor = *data.IsProfessor
updated = true
}
if data.TeamRole != "" && (staff.TeamRole == nil || *staff.TeamRole == "") {
staff.TeamRole = &data.TeamRole
updated = true
}
if len(data.ResearchInterests) > 0 && len(staff.ResearchInterests) == 0 {
staff.ResearchInterests = data.ResearchInterests
updated = true
}
if data.ResearchSummary != "" && (staff.ResearchSummary == nil || *staff.ResearchSummary == "") {
staff.ResearchSummary = &data.ResearchSummary
updated = true
}
if data.ORCID != "" && (staff.ORCID == nil || *staff.ORCID == "") {
staff.ORCID = &data.ORCID
updated = true
}
if data.GoogleScholarID != "" && (staff.GoogleScholarID == nil || *staff.GoogleScholarID == "") {
staff.GoogleScholarID = &data.GoogleScholarID
updated = true
}
if data.ResearchgateURL != "" && (staff.ResearchgateURL == nil || *staff.ResearchgateURL == "") {
staff.ResearchgateURL = &data.ResearchgateURL
updated = true
}
if data.LinkedInURL != "" && (staff.LinkedInURL == nil || *staff.LinkedInURL == "") {
staff.LinkedInURL = &data.LinkedInURL
updated = true
}
if data.PersonalWebsite != "" && (staff.PersonalWebsite == nil || *staff.PersonalWebsite == "") {
staff.PersonalWebsite = &data.PersonalWebsite
updated = true
}
if data.PhotoURL != "" && (staff.PhotoURL == nil || *staff.PhotoURL == "") {
staff.PhotoURL = &data.PhotoURL
updated = true
}
// Try to resolve supervisor by name
if data.SupervisorName != "" && staff.SupervisorID == nil {
// Search for supervisor in same university
supervisorParams := database.StaffSearchParams{
Query: data.SupervisorName,
UniversityID: &staff.UniversityID,
Limit: 1,
}
result, err := h.repo.SearchStaff(c.Request.Context(), supervisorParams)
if err == nil && len(result.Staff) > 0 {
staff.SupervisorID = &result.Staff[0].ID
updated = true
}
}
// Update last verified timestamp
now := time.Now()
staff.LastVerified = &now
if updated {
err = h.repo.CreateStaff(c.Request.Context(), staff)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update: " + err.Error()})
return
}
}
c.JSON(http.StatusOK, gin.H{
"status": "success",
"updated": updated,
"staff_id": staff.ID,
})
}
// SubmitBatchExtractedData saves multiple AI-extracted profile data items
// POST /api/v1/ai/extraction/submit-batch
func (h *AIExtractionHandlers) SubmitBatchExtractedData(c *gin.Context) {
var batch struct {
Items []ExtractedProfileData `json:"items" binding:"required"`
}
if err := c.ShouldBindJSON(&batch); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request: " + err.Error()})
return
}
results := make([]gin.H, 0, len(batch.Items))
successCount := 0
errorCount := 0
for _, item := range batch.Items {
// Get existing staff record
staff, err := h.repo.GetStaff(c.Request.Context(), item.StaffID)
if err != nil {
results = append(results, gin.H{
"staff_id": item.StaffID,
"status": "error",
"error": "Staff not found",
})
errorCount++
continue
}
// Apply updates (same logic as single submit)
updated := false
if item.Email != "" && (staff.Email == nil || *staff.Email == "") {
staff.Email = &item.Email
updated = true
}
if item.Phone != "" && (staff.Phone == nil || *staff.Phone == "") {
staff.Phone = &item.Phone
updated = true
}
if item.Office != "" && (staff.Office == nil || *staff.Office == "") {
staff.Office = &item.Office
updated = true
}
if item.Position != "" && (staff.Position == nil || *staff.Position == "") {
staff.Position = &item.Position
updated = true
}
if item.PositionType != "" && (staff.PositionType == nil || *staff.PositionType == "") {
staff.PositionType = &item.PositionType
updated = true
}
if item.TeamRole != "" && (staff.TeamRole == nil || *staff.TeamRole == "") {
staff.TeamRole = &item.TeamRole
updated = true
}
if len(item.ResearchInterests) > 0 && len(staff.ResearchInterests) == 0 {
staff.ResearchInterests = item.ResearchInterests
updated = true
}
if item.ORCID != "" && (staff.ORCID == nil || *staff.ORCID == "") {
staff.ORCID = &item.ORCID
updated = true
}
// Update last verified
now := time.Now()
staff.LastVerified = &now
if updated {
err = h.repo.CreateStaff(c.Request.Context(), staff)
if err != nil {
results = append(results, gin.H{
"staff_id": item.StaffID,
"status": "error",
"error": err.Error(),
})
errorCount++
continue
}
}
results = append(results, gin.H{
"staff_id": item.StaffID,
"status": "success",
"updated": updated,
})
successCount++
}
c.JSON(http.StatusOK, gin.H{
"results": results,
"success_count": successCount,
"error_count": errorCount,
"total": len(batch.Items),
})
}
// InstituteHierarchyTask represents an institute page to crawl for hierarchy
type InstituteHierarchyTask struct {
InstituteURL string `json:"institute_url"`
InstituteName string `json:"institute_name,omitempty"`
UniversityID uuid.UUID `json:"university_id"`
}
// GetInstitutePages returns institute pages that need hierarchy crawling
// GET /api/v1/ai/extraction/institutes?university_id=...
func (h *AIExtractionHandlers) GetInstitutePages(c *gin.Context) {
var universityID *uuid.UUID
if uniIDStr := c.Query("university_id"); uniIDStr != "" {
id, err := uuid.Parse(uniIDStr)
if err == nil {
universityID = &id
}
}
// Get unique institute/department URLs from staff profiles
params := database.StaffSearchParams{
UniversityID: universityID,
Limit: 1000,
}
result, err := h.repo.SearchStaff(c.Request.Context(), params)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
// Collect unique source URLs (these are typically department pages)
urlSet := make(map[string]bool)
var tasks []InstituteHierarchyTask
for _, staff := range result.Staff {
if staff.SourceURL != nil && *staff.SourceURL != "" {
url := *staff.SourceURL
if !urlSet[url] {
urlSet[url] = true
tasks = append(tasks, InstituteHierarchyTask{
InstituteURL: url,
UniversityID: staff.UniversityID,
})
}
}
}
c.JSON(http.StatusOK, gin.H{
"institutes": tasks,
"total": len(tasks),
})
}
// InstituteHierarchyData represents hierarchy data extracted from an institute page
type InstituteHierarchyData struct {
InstituteURL string `json:"institute_url" binding:"required"`
UniversityID uuid.UUID `json:"university_id" binding:"required"`
InstituteName string `json:"institute_name,omitempty"`
// Leadership
LeaderName string `json:"leader_name,omitempty"`
LeaderTitle string `json:"leader_title,omitempty"` // e.g., "Professor", "Lehrstuhlinhaber"
// Staff organization
StaffGroups []struct {
Role string `json:"role"` // e.g., "Leitung", "Wissenschaftliche Mitarbeiter", "Sekretariat"
Members []string `json:"members"` // Names of people in this group
} `json:"staff_groups,omitempty"`
// Teaching info (Lehrveranstaltungen)
TeachingCourses []struct {
Title string `json:"title"`
Teacher string `json:"teacher,omitempty"`
} `json:"teaching_courses,omitempty"`
}
// SubmitInstituteHierarchy saves hierarchy data from an institute page
// POST /api/v1/ai/extraction/institutes/submit
func (h *AIExtractionHandlers) SubmitInstituteHierarchy(c *gin.Context) {
var data InstituteHierarchyData
if err := c.ShouldBindJSON(&data); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request: " + err.Error()})
return
}
// Find or create department
dept := &database.Department{
UniversityID: data.UniversityID,
Name: data.InstituteName,
}
if data.InstituteURL != "" {
dept.URL = &data.InstituteURL
}
err := h.repo.CreateDepartment(c.Request.Context(), dept)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create department: " + err.Error()})
return
}
// Find leader and set as supervisor for all staff in this institute
var leaderID *uuid.UUID
if data.LeaderName != "" {
// Search for leader
leaderParams := database.StaffSearchParams{
Query: data.LeaderName,
UniversityID: &data.UniversityID,
Limit: 1,
}
result, err := h.repo.SearchStaff(c.Request.Context(), leaderParams)
if err == nil && len(result.Staff) > 0 {
leaderID = &result.Staff[0].ID
// Update leader with department and role
leader := &result.Staff[0]
leader.DepartmentID = &dept.ID
roleLeitung := "leitung"
leader.TeamRole = &roleLeitung
leader.IsProfessor = true
if data.LeaderTitle != "" {
leader.AcademicTitle = &data.LeaderTitle
}
h.repo.CreateStaff(c.Request.Context(), leader)
}
}
// Process staff groups
updatedCount := 0
for _, group := range data.StaffGroups {
for _, memberName := range group.Members {
// Find staff member
memberParams := database.StaffSearchParams{
Query: memberName,
UniversityID: &data.UniversityID,
Limit: 1,
}
result, err := h.repo.SearchStaff(c.Request.Context(), memberParams)
if err != nil || len(result.Staff) == 0 {
continue
}
member := &result.Staff[0]
member.DepartmentID = &dept.ID
member.TeamRole = &group.Role
// Set supervisor if leader was found and this is not the leader
if leaderID != nil && member.ID != *leaderID {
member.SupervisorID = leaderID
}
h.repo.CreateStaff(c.Request.Context(), member)
updatedCount++
}
}
c.JSON(http.StatusOK, gin.H{
"status": "success",
"department_id": dept.ID,
"leader_id": leaderID,
"members_updated": updatedCount,
})
}
// RegisterAIExtractionRoutes registers AI extraction routes
func (h *AIExtractionHandlers) RegisterRoutes(r *gin.RouterGroup) {
ai := r.Group("/ai/extraction")
// Profile extraction endpoints
ai.GET("/pending", h.GetPendingProfiles)
ai.POST("/submit", h.SubmitExtractedData)
ai.POST("/submit-batch", h.SubmitBatchExtractedData)
// Institute hierarchy endpoints
ai.GET("/institutes", h.GetInstitutePages)
ai.POST("/institutes/submit", h.SubmitInstituteHierarchy)
}

View File

@@ -0,0 +1,314 @@
package handlers
import (
"net/http"
"strconv"
"github.com/breakpilot/edu-search-service/internal/orchestrator"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// AudienceHandler handles audience-related HTTP requests
type AudienceHandler struct {
repo orchestrator.AudienceRepository
}
// NewAudienceHandler creates a new audience handler
func NewAudienceHandler(repo orchestrator.AudienceRepository) *AudienceHandler {
return &AudienceHandler{repo: repo}
}
// CreateAudienceRequest represents a request to create an audience
type CreateAudienceRequest struct {
Name string `json:"name" binding:"required"`
Description string `json:"description"`
Filters orchestrator.AudienceFilters `json:"filters"`
CreatedBy string `json:"created_by"`
}
// UpdateAudienceRequest represents a request to update an audience
type UpdateAudienceRequest struct {
Name string `json:"name" binding:"required"`
Description string `json:"description"`
Filters orchestrator.AudienceFilters `json:"filters"`
IsActive bool `json:"is_active"`
}
// CreateExportRequest represents a request to create an export
type CreateExportRequest struct {
ExportType string `json:"export_type" binding:"required"` // csv, json, email_list
Purpose string `json:"purpose"`
ExportedBy string `json:"exported_by"`
}
// ListAudiences returns all audiences
func (h *AudienceHandler) ListAudiences(c *gin.Context) {
activeOnly := c.Query("active_only") == "true"
audiences, err := h.repo.ListAudiences(c.Request.Context(), activeOnly)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list audiences", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"audiences": audiences,
"count": len(audiences),
})
}
// GetAudience returns a single audience
func (h *AudienceHandler) GetAudience(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid audience ID"})
return
}
audience, err := h.repo.GetAudience(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "Audience not found", "details": err.Error()})
return
}
c.JSON(http.StatusOK, audience)
}
// CreateAudience creates a new audience
func (h *AudienceHandler) CreateAudience(c *gin.Context) {
var req CreateAudienceRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
audience := &orchestrator.Audience{
Name: req.Name,
Description: req.Description,
Filters: req.Filters,
CreatedBy: req.CreatedBy,
IsActive: true,
}
if err := h.repo.CreateAudience(c.Request.Context(), audience); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create audience", "details": err.Error()})
return
}
// Update the member count
count, _ := h.repo.UpdateAudienceCount(c.Request.Context(), audience.ID)
audience.MemberCount = count
c.JSON(http.StatusCreated, audience)
}
// UpdateAudience updates an existing audience
func (h *AudienceHandler) UpdateAudience(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid audience ID"})
return
}
var req UpdateAudienceRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
audience := &orchestrator.Audience{
ID: id,
Name: req.Name,
Description: req.Description,
Filters: req.Filters,
IsActive: req.IsActive,
}
if err := h.repo.UpdateAudience(c.Request.Context(), audience); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update audience", "details": err.Error()})
return
}
// Update the member count
count, _ := h.repo.UpdateAudienceCount(c.Request.Context(), audience.ID)
audience.MemberCount = count
c.JSON(http.StatusOK, audience)
}
// DeleteAudience soft-deletes an audience
func (h *AudienceHandler) DeleteAudience(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid audience ID"})
return
}
if err := h.repo.DeleteAudience(c.Request.Context(), id); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete audience", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{"deleted": true, "id": idStr})
}
// GetAudienceMembers returns members matching the audience filters
func (h *AudienceHandler) GetAudienceMembers(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid audience ID"})
return
}
// Parse pagination
limit := 50
offset := 0
if l := c.Query("limit"); l != "" {
if parsed, err := strconv.Atoi(l); err == nil && parsed > 0 && parsed <= 500 {
limit = parsed
}
}
if o := c.Query("offset"); o != "" {
if parsed, err := strconv.Atoi(o); err == nil && parsed >= 0 {
offset = parsed
}
}
members, totalCount, err := h.repo.GetAudienceMembers(c.Request.Context(), id, limit, offset)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get members", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"members": members,
"count": len(members),
"total_count": totalCount,
"limit": limit,
"offset": offset,
})
}
// RefreshAudienceCount recalculates the member count
func (h *AudienceHandler) RefreshAudienceCount(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid audience ID"})
return
}
count, err := h.repo.UpdateAudienceCount(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to refresh count", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"audience_id": idStr,
"member_count": count,
})
}
// PreviewAudienceFilters previews the result of filters without saving
func (h *AudienceHandler) PreviewAudienceFilters(c *gin.Context) {
var filters orchestrator.AudienceFilters
if err := c.ShouldBindJSON(&filters); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
// Return the filters for now - preview functionality can be expanded later
c.JSON(http.StatusOK, gin.H{
"filters": filters,
"message": "Preview functionality requires direct repository access",
})
}
// CreateExport creates a new export for an audience
func (h *AudienceHandler) CreateExport(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid audience ID"})
return
}
var req CreateExportRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
// Get the member count for the export
_, totalCount, err := h.repo.GetAudienceMembers(c.Request.Context(), id, 1, 0)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get members", "details": err.Error()})
return
}
export := &orchestrator.AudienceExport{
AudienceID: id,
ExportType: req.ExportType,
RecordCount: totalCount,
ExportedBy: req.ExportedBy,
Purpose: req.Purpose,
}
if err := h.repo.CreateExport(c.Request.Context(), export); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create export", "details": err.Error()})
return
}
c.JSON(http.StatusCreated, export)
}
// ListExports lists exports for an audience
func (h *AudienceHandler) ListExports(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid audience ID"})
return
}
exports, err := h.repo.ListExports(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list exports", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"exports": exports,
"count": len(exports),
})
}
// SetupAudienceRoutes configures audience API routes
func SetupAudienceRoutes(r *gin.RouterGroup, h *AudienceHandler) {
audiences := r.Group("/audiences")
{
// Audience CRUD
audiences.GET("", h.ListAudiences)
audiences.GET("/:id", h.GetAudience)
audiences.POST("", h.CreateAudience)
audiences.PUT("/:id", h.UpdateAudience)
audiences.DELETE("/:id", h.DeleteAudience)
// Members
audiences.GET("/:id/members", h.GetAudienceMembers)
audiences.POST("/:id/refresh", h.RefreshAudienceCount)
// Exports
audiences.GET("/:id/exports", h.ListExports)
audiences.POST("/:id/exports", h.CreateExport)
// Preview (no audience required)
audiences.POST("/preview", h.PreviewAudienceFilters)
}
}

View File

@@ -0,0 +1,630 @@
package handlers
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/breakpilot/edu-search-service/internal/orchestrator"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// MockAudienceRepository implements orchestrator.AudienceRepository for testing
type MockAudienceRepository struct {
audiences []orchestrator.Audience
exports []orchestrator.AudienceExport
members []orchestrator.AudienceMember
}
func NewMockAudienceRepository() *MockAudienceRepository {
return &MockAudienceRepository{
audiences: make([]orchestrator.Audience, 0),
exports: make([]orchestrator.AudienceExport, 0),
members: make([]orchestrator.AudienceMember, 0),
}
}
func (m *MockAudienceRepository) CreateAudience(ctx context.Context, audience *orchestrator.Audience) error {
audience.ID = uuid.New()
audience.CreatedAt = time.Now()
audience.UpdatedAt = time.Now()
m.audiences = append(m.audiences, *audience)
return nil
}
func (m *MockAudienceRepository) GetAudience(ctx context.Context, id uuid.UUID) (*orchestrator.Audience, error) {
for i := range m.audiences {
if m.audiences[i].ID == id {
return &m.audiences[i], nil
}
}
return nil, context.DeadlineExceeded // simulate not found
}
func (m *MockAudienceRepository) ListAudiences(ctx context.Context, activeOnly bool) ([]orchestrator.Audience, error) {
if activeOnly {
var active []orchestrator.Audience
for _, a := range m.audiences {
if a.IsActive {
active = append(active, a)
}
}
return active, nil
}
return m.audiences, nil
}
func (m *MockAudienceRepository) UpdateAudience(ctx context.Context, audience *orchestrator.Audience) error {
for i := range m.audiences {
if m.audiences[i].ID == audience.ID {
m.audiences[i].Name = audience.Name
m.audiences[i].Description = audience.Description
m.audiences[i].Filters = audience.Filters
m.audiences[i].IsActive = audience.IsActive
m.audiences[i].UpdatedAt = time.Now()
audience.UpdatedAt = m.audiences[i].UpdatedAt
return nil
}
}
return nil
}
func (m *MockAudienceRepository) DeleteAudience(ctx context.Context, id uuid.UUID) error {
for i := range m.audiences {
if m.audiences[i].ID == id {
m.audiences[i].IsActive = false
return nil
}
}
return nil
}
func (m *MockAudienceRepository) GetAudienceMembers(ctx context.Context, id uuid.UUID, limit, offset int) ([]orchestrator.AudienceMember, int, error) {
// Return mock members
if len(m.members) == 0 {
m.members = []orchestrator.AudienceMember{
{
ID: uuid.New(),
Name: "Prof. Dr. Test Person",
Email: "test@university.de",
Position: "professor",
University: "Test Universität",
Department: "Informatik",
SubjectArea: "Informatik",
PublicationCount: 42,
},
{
ID: uuid.New(),
Name: "Dr. Another Person",
Email: "another@university.de",
Position: "researcher",
University: "Test Universität",
Department: "Mathematik",
SubjectArea: "Mathematik",
PublicationCount: 15,
},
}
}
total := len(m.members)
if offset >= total {
return []orchestrator.AudienceMember{}, total, nil
}
end := offset + limit
if end > total {
end = total
}
return m.members[offset:end], total, nil
}
func (m *MockAudienceRepository) UpdateAudienceCount(ctx context.Context, id uuid.UUID) (int, error) {
count := len(m.members)
for i := range m.audiences {
if m.audiences[i].ID == id {
m.audiences[i].MemberCount = count
now := time.Now()
m.audiences[i].LastCountUpdate = &now
}
}
return count, nil
}
func (m *MockAudienceRepository) CreateExport(ctx context.Context, export *orchestrator.AudienceExport) error {
export.ID = uuid.New()
export.CreatedAt = time.Now()
m.exports = append(m.exports, *export)
return nil
}
func (m *MockAudienceRepository) ListExports(ctx context.Context, audienceID uuid.UUID) ([]orchestrator.AudienceExport, error) {
var exports []orchestrator.AudienceExport
for _, e := range m.exports {
if e.AudienceID == audienceID {
exports = append(exports, e)
}
}
return exports, nil
}
func setupAudienceRouter(repo *MockAudienceRepository) *gin.Engine {
gin.SetMode(gin.TestMode)
router := gin.New()
handler := NewAudienceHandler(repo)
v1 := router.Group("/v1")
SetupAudienceRoutes(v1, handler)
return router
}
func TestAudienceHandler_ListAudiences_Empty(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
req := httptest.NewRequest(http.MethodGet, "/v1/audiences", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code)
}
var response struct {
Audiences []orchestrator.Audience `json:"audiences"`
Count int `json:"count"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.Count != 0 {
t.Errorf("Expected 0 audiences, got %d", response.Count)
}
}
func TestAudienceHandler_CreateAudience(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
body := CreateAudienceRequest{
Name: "Test Audience",
Description: "A test audience for professors",
Filters: orchestrator.AudienceFilters{
PositionTypes: []string{"professor"},
States: []string{"BW", "BY"},
},
CreatedBy: "test-admin",
}
bodyJSON, _ := json.Marshal(body)
req := httptest.NewRequest(http.MethodPost, "/v1/audiences", bytes.NewBuffer(bodyJSON))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusCreated {
t.Errorf("Expected status %d, got %d: %s", http.StatusCreated, w.Code, w.Body.String())
}
var response orchestrator.Audience
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.Name != "Test Audience" {
t.Errorf("Expected name 'Test Audience', got '%s'", response.Name)
}
if !response.IsActive {
t.Errorf("Expected audience to be active")
}
if len(repo.audiences) != 1 {
t.Errorf("Expected 1 audience in repo, got %d", len(repo.audiences))
}
}
func TestAudienceHandler_CreateAudience_InvalidJSON(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
req := httptest.NewRequest(http.MethodPost, "/v1/audiences", bytes.NewBuffer([]byte("invalid json")))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status %d, got %d", http.StatusBadRequest, w.Code)
}
}
func TestAudienceHandler_CreateAudience_MissingName(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
body := map[string]interface{}{
"description": "Missing name field",
}
bodyJSON, _ := json.Marshal(body)
req := httptest.NewRequest(http.MethodPost, "/v1/audiences", bytes.NewBuffer(bodyJSON))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status %d, got %d", http.StatusBadRequest, w.Code)
}
}
func TestAudienceHandler_GetAudience(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
// Create an audience first
audience := orchestrator.Audience{
ID: uuid.New(),
Name: "Test Audience",
Description: "Test description",
IsActive: true,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
repo.audiences = append(repo.audiences, audience)
req := httptest.NewRequest(http.MethodGet, "/v1/audiences/"+audience.ID.String(), nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d: %s", http.StatusOK, w.Code, w.Body.String())
}
var response orchestrator.Audience
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.Name != "Test Audience" {
t.Errorf("Expected name 'Test Audience', got '%s'", response.Name)
}
}
func TestAudienceHandler_GetAudience_InvalidID(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
req := httptest.NewRequest(http.MethodGet, "/v1/audiences/invalid-uuid", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status %d, got %d", http.StatusBadRequest, w.Code)
}
}
func TestAudienceHandler_GetAudience_NotFound(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
req := httptest.NewRequest(http.MethodGet, "/v1/audiences/"+uuid.New().String(), nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusNotFound {
t.Errorf("Expected status %d, got %d", http.StatusNotFound, w.Code)
}
}
func TestAudienceHandler_UpdateAudience(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
// Create an audience first
audience := orchestrator.Audience{
ID: uuid.New(),
Name: "Old Name",
Description: "Old description",
IsActive: true,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
repo.audiences = append(repo.audiences, audience)
body := UpdateAudienceRequest{
Name: "New Name",
Description: "New description",
IsActive: true,
}
bodyJSON, _ := json.Marshal(body)
req := httptest.NewRequest(http.MethodPut, "/v1/audiences/"+audience.ID.String(), bytes.NewBuffer(bodyJSON))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d: %s", http.StatusOK, w.Code, w.Body.String())
}
// Verify the update
if repo.audiences[0].Name != "New Name" {
t.Errorf("Expected name 'New Name', got '%s'", repo.audiences[0].Name)
}
}
func TestAudienceHandler_DeleteAudience(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
// Create an audience first
audience := orchestrator.Audience{
ID: uuid.New(),
Name: "To Delete",
IsActive: true,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
repo.audiences = append(repo.audiences, audience)
req := httptest.NewRequest(http.MethodDelete, "/v1/audiences/"+audience.ID.String(), nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code)
}
// Verify soft delete
if repo.audiences[0].IsActive {
t.Errorf("Expected audience to be inactive after delete")
}
}
func TestAudienceHandler_GetAudienceMembers(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
// Create an audience first
audience := orchestrator.Audience{
ID: uuid.New(),
Name: "Test Audience",
IsActive: true,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
repo.audiences = append(repo.audiences, audience)
req := httptest.NewRequest(http.MethodGet, "/v1/audiences/"+audience.ID.String()+"/members", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d: %s", http.StatusOK, w.Code, w.Body.String())
}
var response struct {
Members []orchestrator.AudienceMember `json:"members"`
Count int `json:"count"`
TotalCount int `json:"total_count"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.TotalCount != 2 {
t.Errorf("Expected 2 total members, got %d", response.TotalCount)
}
}
func TestAudienceHandler_GetAudienceMembers_WithPagination(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
audience := orchestrator.Audience{
ID: uuid.New(),
Name: "Test Audience",
IsActive: true,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
repo.audiences = append(repo.audiences, audience)
req := httptest.NewRequest(http.MethodGet, "/v1/audiences/"+audience.ID.String()+"/members?limit=1&offset=0", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code)
}
var response struct {
Members []orchestrator.AudienceMember `json:"members"`
Count int `json:"count"`
Limit int `json:"limit"`
Offset int `json:"offset"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.Count != 1 {
t.Errorf("Expected 1 member in response, got %d", response.Count)
}
if response.Limit != 1 {
t.Errorf("Expected limit 1, got %d", response.Limit)
}
}
func TestAudienceHandler_RefreshAudienceCount(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
audience := orchestrator.Audience{
ID: uuid.New(),
Name: "Test Audience",
IsActive: true,
MemberCount: 0,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
repo.audiences = append(repo.audiences, audience)
// Pre-initialize members so count works correctly
repo.members = []orchestrator.AudienceMember{
{ID: uuid.New(), Name: "Test Person 1"},
{ID: uuid.New(), Name: "Test Person 2"},
}
req := httptest.NewRequest(http.MethodPost, "/v1/audiences/"+audience.ID.String()+"/refresh", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code)
}
var response struct {
AudienceID string `json:"audience_id"`
MemberCount int `json:"member_count"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.MemberCount != 2 {
t.Errorf("Expected member_count 2, got %d", response.MemberCount)
}
}
func TestAudienceHandler_CreateExport(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
audience := orchestrator.Audience{
ID: uuid.New(),
Name: "Test Audience",
IsActive: true,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
repo.audiences = append(repo.audiences, audience)
body := CreateExportRequest{
ExportType: "csv",
Purpose: "Newsletter December 2024",
ExportedBy: "admin",
}
bodyJSON, _ := json.Marshal(body)
req := httptest.NewRequest(http.MethodPost, "/v1/audiences/"+audience.ID.String()+"/exports", bytes.NewBuffer(bodyJSON))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusCreated {
t.Errorf("Expected status %d, got %d: %s", http.StatusCreated, w.Code, w.Body.String())
}
var response orchestrator.AudienceExport
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.ExportType != "csv" {
t.Errorf("Expected export_type 'csv', got '%s'", response.ExportType)
}
if response.RecordCount != 2 {
t.Errorf("Expected record_count 2, got %d", response.RecordCount)
}
}
func TestAudienceHandler_ListExports(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
audience := orchestrator.Audience{
ID: uuid.New(),
Name: "Test Audience",
IsActive: true,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
repo.audiences = append(repo.audiences, audience)
// Add an export
export := orchestrator.AudienceExport{
ID: uuid.New(),
AudienceID: audience.ID,
ExportType: "csv",
RecordCount: 100,
Purpose: "Test export",
CreatedAt: time.Now(),
}
repo.exports = append(repo.exports, export)
req := httptest.NewRequest(http.MethodGet, "/v1/audiences/"+audience.ID.String()+"/exports", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code)
}
var response struct {
Exports []orchestrator.AudienceExport `json:"exports"`
Count int `json:"count"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.Count != 1 {
t.Errorf("Expected 1 export, got %d", response.Count)
}
}
func TestAudienceHandler_ListAudiences_ActiveOnly(t *testing.T) {
repo := NewMockAudienceRepository()
router := setupAudienceRouter(repo)
// Add active and inactive audiences
repo.audiences = []orchestrator.Audience{
{ID: uuid.New(), Name: "Active", IsActive: true, CreatedAt: time.Now(), UpdatedAt: time.Now()},
{ID: uuid.New(), Name: "Inactive", IsActive: false, CreatedAt: time.Now(), UpdatedAt: time.Now()},
}
req := httptest.NewRequest(http.MethodGet, "/v1/audiences?active_only=true", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code)
}
var response struct {
Audiences []orchestrator.Audience `json:"audiences"`
Count int `json:"count"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to unmarshal response: %v", err)
}
if response.Count != 1 {
t.Errorf("Expected 1 active audience, got %d", response.Count)
}
if response.Audiences[0].Name != "Active" {
t.Errorf("Expected audience 'Active', got '%s'", response.Audiences[0].Name)
}
}

View File

@@ -0,0 +1,146 @@
package handlers
import (
"net/http"
"github.com/breakpilot/edu-search-service/internal/config"
"github.com/breakpilot/edu-search-service/internal/indexer"
"github.com/breakpilot/edu-search-service/internal/search"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// Handler contains all HTTP handlers
type Handler struct {
cfg *config.Config
searchService *search.Service
indexClient *indexer.Client
}
// NewHandler creates a new handler instance
func NewHandler(cfg *config.Config, searchService *search.Service, indexClient *indexer.Client) *Handler {
return &Handler{
cfg: cfg,
searchService: searchService,
indexClient: indexClient,
}
}
// Health returns service health status
func (h *Handler) Health(c *gin.Context) {
status := "ok"
// Check OpenSearch health
osStatus, err := h.indexClient.Health(c.Request.Context())
if err != nil {
status = "degraded"
osStatus = "unreachable"
}
c.JSON(http.StatusOK, gin.H{
"status": status,
"opensearch": osStatus,
"service": "edu-search-service",
"version": "0.1.0",
})
}
// Search handles /v1/search requests
func (h *Handler) Search(c *gin.Context) {
var req search.SearchRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
// Set defaults
if req.Limit <= 0 || req.Limit > 100 {
req.Limit = 10
}
if req.Mode == "" {
req.Mode = "keyword" // MVP: only BM25
}
// Generate query ID
queryID := uuid.New().String()
// Execute search
result, err := h.searchService.Search(c.Request.Context(), &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Search failed", "details": err.Error()})
return
}
result.QueryID = queryID
c.JSON(http.StatusOK, result)
}
// GetDocument retrieves a single document
func (h *Handler) GetDocument(c *gin.Context) {
docID := c.Query("doc_id")
if docID == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "doc_id parameter required"})
return
}
// TODO: Implement document retrieval
c.JSON(http.StatusNotImplemented, gin.H{"error": "Not implemented yet"})
}
// AuthMiddleware validates API keys
func AuthMiddleware(apiKey string) gin.HandlerFunc {
return func(c *gin.Context) {
// Skip auth for health endpoint
if c.Request.URL.Path == "/v1/health" {
c.Next()
return
}
// Check API key
authHeader := c.GetHeader("Authorization")
if authHeader == "" {
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "Missing Authorization header"})
return
}
// Extract Bearer token
if len(authHeader) < 7 || authHeader[:7] != "Bearer " {
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "Invalid Authorization format"})
return
}
token := authHeader[7:]
if apiKey != "" && token != apiKey {
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "Invalid API key"})
return
}
c.Next()
}
}
// RateLimitMiddleware implements basic rate limiting
func RateLimitMiddleware() gin.HandlerFunc {
// TODO: Implement proper rate limiting with Redis
return func(c *gin.Context) {
c.Next()
}
}
// SetupRoutes configures all API routes
func SetupRoutes(r *gin.Engine, h *Handler, apiKey string) {
// Health endpoint (no auth)
r.GET("/v1/health", h.Health)
// API v1 group with auth
v1 := r.Group("/v1")
v1.Use(AuthMiddleware(apiKey))
v1.Use(RateLimitMiddleware())
{
v1.POST("/search", h.Search)
v1.GET("/document", h.GetDocument)
// Admin routes
SetupAdminRoutes(v1, h)
}
}

View File

@@ -0,0 +1,645 @@
package handlers
import (
"bytes"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"github.com/gin-gonic/gin"
)
func init() {
gin.SetMode(gin.TestMode)
}
// setupTestRouter creates a test router with the handler
func setupTestRouter(h *Handler, apiKey string) *gin.Engine {
router := gin.New()
SetupRoutes(router, h, apiKey)
return router
}
// setupTestSeedStore creates a test seed store
func setupTestSeedStore(t *testing.T) string {
t.Helper()
dir := t.TempDir()
// Initialize global seed store
err := InitSeedStore(dir)
if err != nil {
t.Fatalf("Failed to initialize seed store: %v", err)
}
return dir
}
func TestHealthEndpoint(t *testing.T) {
// Health endpoint requires indexClient for health check
// This test verifies the route is set up correctly
// A full integration test would need a mock OpenSearch client
t.Skip("Skipping: requires mock indexer client for full test")
}
func TestAuthMiddleware_NoAuth(t *testing.T) {
h := &Handler{}
router := setupTestRouter(h, "test-api-key")
// Request without auth header
req, _ := http.NewRequest("POST", "/v1/search", bytes.NewBufferString(`{"q":"test"}`))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("Expected status 401, got %d", w.Code)
}
}
func TestAuthMiddleware_InvalidFormat(t *testing.T) {
h := &Handler{}
router := setupTestRouter(h, "test-api-key")
// Request with wrong auth format
req, _ := http.NewRequest("POST", "/v1/search", bytes.NewBufferString(`{"q":"test"}`))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Basic dGVzdDp0ZXN0") // Basic auth instead of Bearer
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("Expected status 401, got %d", w.Code)
}
}
func TestAuthMiddleware_InvalidKey(t *testing.T) {
h := &Handler{}
router := setupTestRouter(h, "test-api-key")
// Request with wrong API key
req, _ := http.NewRequest("POST", "/v1/search", bytes.NewBufferString(`{"q":"test"}`))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer wrong-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("Expected status 401, got %d", w.Code)
}
}
func TestAuthMiddleware_ValidKey(t *testing.T) {
h := &Handler{}
router := setupTestRouter(h, "test-api-key")
// Request with correct API key (search will fail due to no search service, but auth should pass)
req, _ := http.NewRequest("GET", "/v1/document?doc_id=test", nil)
req.Header.Set("Authorization", "Bearer test-api-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
// Auth should pass, endpoint returns 501 (not implemented)
if w.Code == http.StatusUnauthorized {
t.Error("Expected auth to pass, got 401")
}
}
func TestAuthMiddleware_HealthNoAuth(t *testing.T) {
// Health endpoint requires indexClient for health check
// Skipping because route calls h.indexClient.Health() which panics with nil
t.Skip("Skipping: requires mock indexer client for full test")
}
func TestGetDocument_MissingDocID(t *testing.T) {
h := &Handler{}
router := setupTestRouter(h, "test-key")
req, _ := http.NewRequest("GET", "/v1/document", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status 400, got %d", w.Code)
}
}
// Admin Handler Tests
func TestSeedStore_InitAndLoad(t *testing.T) {
dir := t.TempDir()
// First initialization should create default seeds
err := InitSeedStore(dir)
if err != nil {
t.Fatalf("InitSeedStore failed: %v", err)
}
// Check that seeds file was created
seedsFile := filepath.Join(dir, "seeds.json")
if _, err := os.Stat(seedsFile); os.IsNotExist(err) {
t.Error("seeds.json was not created")
}
// Check that default seeds were loaded
seeds := seedStore.GetAllSeeds()
if len(seeds) == 0 {
t.Error("Expected default seeds to be loaded")
}
}
func TestSeedStore_CreateSeed(t *testing.T) {
setupTestSeedStore(t)
newSeed := SeedURL{
URL: "https://test.example.com",
Name: "Test Seed",
Category: "test",
Description: "A test seed",
TrustBoost: 0.5,
Enabled: true,
}
created, err := seedStore.CreateSeed(newSeed)
if err != nil {
t.Fatalf("CreateSeed failed: %v", err)
}
if created.ID == "" {
t.Error("Expected generated ID")
}
if created.URL != newSeed.URL {
t.Errorf("Expected URL %q, got %q", newSeed.URL, created.URL)
}
if created.CreatedAt.IsZero() {
t.Error("Expected CreatedAt to be set")
}
}
func TestSeedStore_GetSeed(t *testing.T) {
setupTestSeedStore(t)
// Create a seed first
newSeed := SeedURL{
URL: "https://get-test.example.com",
Name: "Get Test",
Category: "test",
}
created, _ := seedStore.CreateSeed(newSeed)
// Get the seed
retrieved, found := seedStore.GetSeed(created.ID)
if !found {
t.Fatal("Seed not found")
}
if retrieved.URL != newSeed.URL {
t.Errorf("Expected URL %q, got %q", newSeed.URL, retrieved.URL)
}
}
func TestSeedStore_GetSeed_NotFound(t *testing.T) {
setupTestSeedStore(t)
_, found := seedStore.GetSeed("nonexistent-id")
if found {
t.Error("Expected seed not to be found")
}
}
func TestSeedStore_UpdateSeed(t *testing.T) {
setupTestSeedStore(t)
// Create a seed first
original := SeedURL{
URL: "https://update-test.example.com",
Name: "Original Name",
Category: "test",
Enabled: true,
}
created, _ := seedStore.CreateSeed(original)
// Update the seed
updates := SeedURL{
Name: "Updated Name",
TrustBoost: 0.75,
Enabled: false,
}
updated, found, err := seedStore.UpdateSeed(created.ID, updates)
if err != nil {
t.Fatalf("UpdateSeed failed: %v", err)
}
if !found {
t.Fatal("Seed not found for update")
}
if updated.Name != "Updated Name" {
t.Errorf("Expected name 'Updated Name', got %q", updated.Name)
}
if updated.TrustBoost != 0.75 {
t.Errorf("Expected TrustBoost 0.75, got %f", updated.TrustBoost)
}
if updated.Enabled != false {
t.Error("Expected Enabled to be false")
}
// URL should remain unchanged since we didn't provide it
if updated.URL != original.URL {
t.Errorf("URL should remain unchanged, expected %q, got %q", original.URL, updated.URL)
}
}
func TestSeedStore_UpdateSeed_NotFound(t *testing.T) {
setupTestSeedStore(t)
updates := SeedURL{Name: "New Name"}
_, found, err := seedStore.UpdateSeed("nonexistent-id", updates)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if found {
t.Error("Expected seed not to be found")
}
}
func TestSeedStore_DeleteSeed(t *testing.T) {
setupTestSeedStore(t)
// Create a seed first
newSeed := SeedURL{
URL: "https://delete-test.example.com",
Name: "Delete Test",
Category: "test",
}
created, _ := seedStore.CreateSeed(newSeed)
// Delete the seed
deleted := seedStore.DeleteSeed(created.ID)
if !deleted {
t.Error("Expected delete to succeed")
}
// Verify it's gone
_, found := seedStore.GetSeed(created.ID)
if found {
t.Error("Seed should have been deleted")
}
}
func TestSeedStore_DeleteSeed_NotFound(t *testing.T) {
setupTestSeedStore(t)
deleted := seedStore.DeleteSeed("nonexistent-id")
if deleted {
t.Error("Expected delete to return false for nonexistent seed")
}
}
func TestSeedStore_Persistence(t *testing.T) {
dir := t.TempDir()
// Create and populate seed store
err := InitSeedStore(dir)
if err != nil {
t.Fatal(err)
}
newSeed := SeedURL{
URL: "https://persist-test.example.com",
Name: "Persistence Test",
Category: "test",
}
created, err := seedStore.CreateSeed(newSeed)
if err != nil {
t.Fatal(err)
}
// Re-initialize from the same directory
seedStore = nil
err = InitSeedStore(dir)
if err != nil {
t.Fatal(err)
}
// Check if the seed persisted
retrieved, found := seedStore.GetSeed(created.ID)
if !found {
t.Error("Seed should have persisted")
}
if retrieved.URL != newSeed.URL {
t.Errorf("Persisted seed URL mismatch: expected %q, got %q", newSeed.URL, retrieved.URL)
}
}
func TestAdminGetSeeds(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
// Initialize seed store for the test
InitSeedStore(dir)
req, _ := http.NewRequest("GET", "/v1/admin/seeds", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", w.Code)
}
var seeds []SeedURL
if err := json.Unmarshal(w.Body.Bytes(), &seeds); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
// Should have default seeds
if len(seeds) == 0 {
t.Error("Expected seeds to be returned")
}
}
func TestAdminCreateSeed(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
InitSeedStore(dir)
newSeed := map[string]interface{}{
"url": "https://new-seed.example.com",
"name": "New Seed",
"category": "test",
"description": "Test description",
"trustBoost": 0.5,
"enabled": true,
}
body, _ := json.Marshal(newSeed)
req, _ := http.NewRequest("POST", "/v1/admin/seeds", bytes.NewBuffer(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusCreated {
t.Errorf("Expected status 201, got %d: %s", w.Code, w.Body.String())
}
var created SeedURL
if err := json.Unmarshal(w.Body.Bytes(), &created); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if created.ID == "" {
t.Error("Expected ID to be generated")
}
if created.URL != "https://new-seed.example.com" {
t.Errorf("Expected URL to match, got %q", created.URL)
}
}
func TestAdminCreateSeed_MissingURL(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
InitSeedStore(dir)
newSeed := map[string]interface{}{
"name": "No URL Seed",
"category": "test",
}
body, _ := json.Marshal(newSeed)
req, _ := http.NewRequest("POST", "/v1/admin/seeds", bytes.NewBuffer(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status 400 for missing URL, got %d", w.Code)
}
}
func TestAdminUpdateSeed(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
InitSeedStore(dir)
// Create a seed first
newSeed := SeedURL{
URL: "https://update-api-test.example.com",
Name: "API Update Test",
Category: "test",
}
created, _ := seedStore.CreateSeed(newSeed)
// Update via API
updates := map[string]interface{}{
"name": "Updated via API",
"trustBoost": 0.8,
}
body, _ := json.Marshal(updates)
req, _ := http.NewRequest("PUT", "/v1/admin/seeds/"+created.ID, bytes.NewBuffer(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d: %s", w.Code, w.Body.String())
}
var updated SeedURL
if err := json.Unmarshal(w.Body.Bytes(), &updated); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if updated.Name != "Updated via API" {
t.Errorf("Expected name 'Updated via API', got %q", updated.Name)
}
}
func TestAdminDeleteSeed(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
InitSeedStore(dir)
// Create a seed first
newSeed := SeedURL{
URL: "https://delete-api-test.example.com",
Name: "API Delete Test",
Category: "test",
}
created, _ := seedStore.CreateSeed(newSeed)
// Delete via API
req, _ := http.NewRequest("DELETE", "/v1/admin/seeds/"+created.ID, nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", w.Code)
}
// Verify it's deleted
_, found := seedStore.GetSeed(created.ID)
if found {
t.Error("Seed should have been deleted")
}
}
func TestAdminDeleteSeed_NotFound(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
InitSeedStore(dir)
req, _ := http.NewRequest("DELETE", "/v1/admin/seeds/nonexistent-id", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusNotFound {
t.Errorf("Expected status 404, got %d", w.Code)
}
}
func TestAdminGetStats(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
InitSeedStore(dir)
req, _ := http.NewRequest("GET", "/v1/admin/stats", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", w.Code)
}
var stats CrawlStats
if err := json.Unmarshal(w.Body.Bytes(), &stats); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
// Check that stats structure is populated
if stats.CrawlStatus == "" {
t.Error("Expected CrawlStatus to be set")
}
if stats.DocumentsPerCategory == nil {
t.Error("Expected DocumentsPerCategory to be set")
}
}
func TestAdminStartCrawl(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
InitSeedStore(dir)
// Reset crawl status
crawlStatus = "idle"
req, _ := http.NewRequest("POST", "/v1/admin/crawl/start", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusAccepted {
t.Errorf("Expected status 202, got %d: %s", w.Code, w.Body.String())
}
var response map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if response["status"] != "started" {
t.Errorf("Expected status 'started', got %v", response["status"])
}
}
func TestAdminStartCrawl_AlreadyRunning(t *testing.T) {
dir := setupTestSeedStore(t)
h := &Handler{}
router := gin.New()
SetupRoutes(router, h, "test-key")
InitSeedStore(dir)
// Set crawl status to running
crawlStatus = "running"
req, _ := http.NewRequest("POST", "/v1/admin/crawl/start", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusConflict {
t.Errorf("Expected status 409, got %d", w.Code)
}
// Reset for other tests
crawlStatus = "idle"
}
func TestConcurrentSeedAccess(t *testing.T) {
setupTestSeedStore(t)
// Test concurrent reads and writes
done := make(chan bool, 10)
// Concurrent readers
for i := 0; i < 5; i++ {
go func() {
seedStore.GetAllSeeds()
done <- true
}()
}
// Concurrent writers
for i := 0; i < 5; i++ {
go func(n int) {
seed := SeedURL{
URL: "https://concurrent-" + string(rune('A'+n)) + ".example.com",
Name: "Concurrent Test",
Category: "test",
}
seedStore.CreateSeed(seed)
done <- true
}(i)
}
// Wait for all goroutines
for i := 0; i < 10; i++ {
<-done
}
// If we get here without deadlock or race, test passes
}

View File

@@ -0,0 +1,207 @@
package handlers
import (
"net/http"
"github.com/breakpilot/edu-search-service/internal/orchestrator"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// OrchestratorHandler handles orchestrator-related HTTP requests
type OrchestratorHandler struct {
orchestrator *orchestrator.Orchestrator
repo orchestrator.Repository
}
// NewOrchestratorHandler creates a new orchestrator handler
func NewOrchestratorHandler(orch *orchestrator.Orchestrator, repo orchestrator.Repository) *OrchestratorHandler {
return &OrchestratorHandler{
orchestrator: orch,
repo: repo,
}
}
// AddToQueueRequest represents a request to add a university to the crawl queue
type AddToQueueRequest struct {
UniversityID string `json:"university_id" binding:"required"`
Priority int `json:"priority"`
InitiatedBy string `json:"initiated_by"`
}
// GetStatus returns the current orchestrator status
func (h *OrchestratorHandler) GetStatus(c *gin.Context) {
status, err := h.orchestrator.Status(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get status", "details": err.Error()})
return
}
c.JSON(http.StatusOK, status)
}
// GetQueue returns all items in the crawl queue
func (h *OrchestratorHandler) GetQueue(c *gin.Context) {
items, err := h.orchestrator.GetQueue(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get queue", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"queue": items,
"count": len(items),
})
}
// AddToQueue adds a university to the crawl queue
func (h *OrchestratorHandler) AddToQueue(c *gin.Context) {
var req AddToQueueRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
universityID, err := uuid.Parse(req.UniversityID)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid university_id format"})
return
}
// Default priority if not specified
priority := req.Priority
if priority == 0 {
priority = 5
}
initiatedBy := req.InitiatedBy
if initiatedBy == "" {
initiatedBy = "api"
}
item, err := h.orchestrator.AddUniversity(c.Request.Context(), universityID, priority, initiatedBy)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to add to queue", "details": err.Error()})
return
}
c.JSON(http.StatusCreated, item)
}
// RemoveFromQueue removes a university from the crawl queue
func (h *OrchestratorHandler) RemoveFromQueue(c *gin.Context) {
idStr := c.Param("id")
if idStr == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "University ID required"})
return
}
universityID, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid university_id format"})
return
}
if err := h.orchestrator.RemoveUniversity(c.Request.Context(), universityID); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to remove from queue", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{"deleted": true, "university_id": idStr})
}
// Start starts the orchestrator
func (h *OrchestratorHandler) Start(c *gin.Context) {
if err := h.orchestrator.Start(); err != nil {
c.JSON(http.StatusConflict, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"status": "started",
"message": "Orchestrator started successfully",
})
}
// Stop stops the orchestrator
func (h *OrchestratorHandler) Stop(c *gin.Context) {
if err := h.orchestrator.Stop(); err != nil {
c.JSON(http.StatusConflict, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"status": "stopped",
"message": "Orchestrator stopped successfully",
})
}
// PauseUniversity pauses crawling for a specific university
func (h *OrchestratorHandler) PauseUniversity(c *gin.Context) {
idStr := c.Param("id")
if idStr == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "University ID required"})
return
}
universityID, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid university_id format"})
return
}
if err := h.orchestrator.PauseUniversity(c.Request.Context(), universityID); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to pause crawl", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"status": "paused",
"university_id": idStr,
})
}
// ResumeUniversity resumes crawling for a paused university
func (h *OrchestratorHandler) ResumeUniversity(c *gin.Context) {
idStr := c.Param("id")
if idStr == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "University ID required"})
return
}
universityID, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid university_id format"})
return
}
if err := h.orchestrator.ResumeUniversity(c.Request.Context(), universityID); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to resume crawl", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"status": "resumed",
"university_id": idStr,
})
}
// SetupOrchestratorRoutes configures orchestrator API routes
func SetupOrchestratorRoutes(r *gin.RouterGroup, h *OrchestratorHandler) {
crawl := r.Group("/crawl")
{
// Orchestrator control
crawl.GET("/status", h.GetStatus)
crawl.POST("/start", h.Start)
crawl.POST("/stop", h.Stop)
// Queue management
crawl.GET("/queue", h.GetQueue)
crawl.POST("/queue", h.AddToQueue)
crawl.DELETE("/queue/:id", h.RemoveFromQueue)
// Individual university control
crawl.POST("/queue/:id/pause", h.PauseUniversity)
crawl.POST("/queue/:id/resume", h.ResumeUniversity)
}
}

View File

@@ -0,0 +1,659 @@
package handlers
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/breakpilot/edu-search-service/internal/orchestrator"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
func init() {
gin.SetMode(gin.TestMode)
}
// MockRepository implements orchestrator.Repository for testing
type MockRepository struct {
items []orchestrator.CrawlQueueItem
failOnAdd bool
failOnUpdate bool
}
func NewMockRepository() *MockRepository {
return &MockRepository{
items: make([]orchestrator.CrawlQueueItem, 0),
}
}
func (m *MockRepository) GetQueueItems(ctx context.Context) ([]orchestrator.CrawlQueueItem, error) {
return m.items, nil
}
func (m *MockRepository) GetNextInQueue(ctx context.Context) (*orchestrator.CrawlQueueItem, error) {
for i := range m.items {
if m.items[i].CurrentPhase != orchestrator.PhaseCompleted &&
m.items[i].CurrentPhase != orchestrator.PhaseFailed &&
m.items[i].CurrentPhase != orchestrator.PhasePaused {
return &m.items[i], nil
}
}
return nil, nil
}
func (m *MockRepository) AddToQueue(ctx context.Context, universityID uuid.UUID, priority int, initiatedBy string) (*orchestrator.CrawlQueueItem, error) {
if m.failOnAdd {
return nil, context.DeadlineExceeded
}
position := len(m.items) + 1
item := orchestrator.CrawlQueueItem{
ID: uuid.New(),
UniversityID: universityID,
QueuePosition: &position,
Priority: priority,
CurrentPhase: orchestrator.PhasePending,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
m.items = append(m.items, item)
return &item, nil
}
func (m *MockRepository) RemoveFromQueue(ctx context.Context, universityID uuid.UUID) error {
for i, item := range m.items {
if item.UniversityID == universityID {
m.items = append(m.items[:i], m.items[i+1:]...)
return nil
}
}
return nil
}
func (m *MockRepository) UpdateQueueItem(ctx context.Context, item *orchestrator.CrawlQueueItem) error {
if m.failOnUpdate {
return context.DeadlineExceeded
}
for i, existing := range m.items {
if existing.UniversityID == item.UniversityID {
m.items[i] = *item
return nil
}
}
return nil
}
func (m *MockRepository) PauseQueueItem(ctx context.Context, universityID uuid.UUID) error {
for i, item := range m.items {
if item.UniversityID == universityID {
m.items[i].CurrentPhase = orchestrator.PhasePaused
return nil
}
}
return nil
}
func (m *MockRepository) ResumeQueueItem(ctx context.Context, universityID uuid.UUID) error {
for i, item := range m.items {
if item.UniversityID == universityID && m.items[i].CurrentPhase == orchestrator.PhasePaused {
m.items[i].CurrentPhase = orchestrator.PhasePending
return nil
}
}
return nil
}
func (m *MockRepository) CompletePhase(ctx context.Context, universityID uuid.UUID, phase orchestrator.CrawlPhase, count int) error {
return nil
}
func (m *MockRepository) FailPhase(ctx context.Context, universityID uuid.UUID, phase orchestrator.CrawlPhase, errMsg string) error {
return nil
}
func (m *MockRepository) GetCompletedTodayCount(ctx context.Context) (int, error) {
count := 0
today := time.Now().Truncate(24 * time.Hour)
for _, item := range m.items {
if item.CurrentPhase == orchestrator.PhaseCompleted &&
item.CompletedAt != nil &&
item.CompletedAt.After(today) {
count++
}
}
return count, nil
}
func (m *MockRepository) GetTotalProcessedCount(ctx context.Context) (int, error) {
count := 0
for _, item := range m.items {
if item.CurrentPhase == orchestrator.PhaseCompleted {
count++
}
}
return count, nil
}
// MockStaffCrawler implements orchestrator.StaffCrawlerInterface
type MockStaffCrawler struct{}
func (m *MockStaffCrawler) DiscoverSampleProfessor(ctx context.Context, universityID uuid.UUID) (*orchestrator.CrawlProgress, error) {
return &orchestrator.CrawlProgress{
Phase: orchestrator.PhaseDiscovery,
ItemsFound: 1,
}, nil
}
func (m *MockStaffCrawler) CrawlProfessors(ctx context.Context, universityID uuid.UUID) (*orchestrator.CrawlProgress, error) {
return &orchestrator.CrawlProgress{
Phase: orchestrator.PhaseProfessors,
ItemsFound: 10,
}, nil
}
func (m *MockStaffCrawler) CrawlAllStaff(ctx context.Context, universityID uuid.UUID) (*orchestrator.CrawlProgress, error) {
return &orchestrator.CrawlProgress{
Phase: orchestrator.PhaseAllStaff,
ItemsFound: 50,
}, nil
}
// MockPubCrawler implements orchestrator.PublicationCrawlerInterface
type MockPubCrawler struct{}
func (m *MockPubCrawler) CrawlPublicationsForUniversity(ctx context.Context, universityID uuid.UUID) (*orchestrator.CrawlProgress, error) {
return &orchestrator.CrawlProgress{
Phase: orchestrator.PhasePublications,
ItemsFound: 100,
}, nil
}
// setupOrchestratorTestRouter creates a test router with orchestrator handler
func setupOrchestratorTestRouter(orch *orchestrator.Orchestrator, repo orchestrator.Repository, apiKey string) *gin.Engine {
router := gin.New()
handler := NewOrchestratorHandler(orch, repo)
v1 := router.Group("/v1")
v1.Use(AuthMiddleware(apiKey))
SetupOrchestratorRoutes(v1, handler)
return router
}
func TestOrchestratorGetStatus(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
req, _ := http.NewRequest("GET", "/v1/crawl/status", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d: %s", w.Code, w.Body.String())
}
var status orchestrator.OrchestratorStatus
if err := json.Unmarshal(w.Body.Bytes(), &status); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if status.IsRunning != false {
t.Error("Expected orchestrator to not be running initially")
}
}
func TestOrchestratorGetQueue(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
req, _ := http.NewRequest("GET", "/v1/crawl/queue", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d: %s", w.Code, w.Body.String())
}
var response struct {
Queue []orchestrator.CrawlQueueItem `json:"queue"`
Count int `json:"count"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if response.Count != 0 {
t.Errorf("Expected empty queue, got %d items", response.Count)
}
}
func TestOrchestratorAddToQueue(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
universityID := uuid.New()
reqBody := AddToQueueRequest{
UniversityID: universityID.String(),
Priority: 7,
InitiatedBy: "test-user",
}
body, _ := json.Marshal(reqBody)
req, _ := http.NewRequest("POST", "/v1/crawl/queue", bytes.NewBuffer(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusCreated {
t.Errorf("Expected status 201, got %d: %s", w.Code, w.Body.String())
}
var item orchestrator.CrawlQueueItem
if err := json.Unmarshal(w.Body.Bytes(), &item); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if item.UniversityID != universityID {
t.Errorf("Expected universityID %s, got %s", universityID, item.UniversityID)
}
if item.Priority != 7 {
t.Errorf("Expected priority 7, got %d", item.Priority)
}
}
func TestOrchestratorAddToQueue_InvalidUUID(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
reqBody := map[string]interface{}{
"university_id": "not-a-valid-uuid",
"priority": 5,
}
body, _ := json.Marshal(reqBody)
req, _ := http.NewRequest("POST", "/v1/crawl/queue", bytes.NewBuffer(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status 400, got %d: %s", w.Code, w.Body.String())
}
}
func TestOrchestratorAddToQueue_MissingUniversityID(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
reqBody := map[string]interface{}{
"priority": 5,
}
body, _ := json.Marshal(reqBody)
req, _ := http.NewRequest("POST", "/v1/crawl/queue", bytes.NewBuffer(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status 400, got %d: %s", w.Code, w.Body.String())
}
}
func TestOrchestratorRemoveFromQueue(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
// Add an item first
universityID := uuid.New()
repo.AddToQueue(context.Background(), universityID, 5, "test")
router := setupOrchestratorTestRouter(orch, repo, "test-key")
req, _ := http.NewRequest("DELETE", "/v1/crawl/queue/"+universityID.String(), nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d: %s", w.Code, w.Body.String())
}
// Verify it was removed
items, _ := repo.GetQueueItems(context.Background())
if len(items) != 0 {
t.Errorf("Expected queue to be empty, got %d items", len(items))
}
}
func TestOrchestratorRemoveFromQueue_InvalidUUID(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
req, _ := http.NewRequest("DELETE", "/v1/crawl/queue/invalid-uuid", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status 400, got %d: %s", w.Code, w.Body.String())
}
}
func TestOrchestratorStartStop(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
// Start orchestrator
req, _ := http.NewRequest("POST", "/v1/crawl/start", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200 on start, got %d: %s", w.Code, w.Body.String())
}
// Try to start again (should fail)
req, _ = http.NewRequest("POST", "/v1/crawl/start", nil)
req.Header.Set("Authorization", "Bearer test-key")
w = httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusConflict {
t.Errorf("Expected status 409 on duplicate start, got %d", w.Code)
}
// Stop orchestrator
req, _ = http.NewRequest("POST", "/v1/crawl/stop", nil)
req.Header.Set("Authorization", "Bearer test-key")
w = httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200 on stop, got %d: %s", w.Code, w.Body.String())
}
// Try to stop again (should fail)
req, _ = http.NewRequest("POST", "/v1/crawl/stop", nil)
req.Header.Set("Authorization", "Bearer test-key")
w = httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusConflict {
t.Errorf("Expected status 409 on duplicate stop, got %d", w.Code)
}
}
func TestOrchestratorPauseResume(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
// Add an item first
universityID := uuid.New()
repo.AddToQueue(context.Background(), universityID, 5, "test")
router := setupOrchestratorTestRouter(orch, repo, "test-key")
// Pause university
req, _ := http.NewRequest("POST", "/v1/crawl/queue/"+universityID.String()+"/pause", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200 on pause, got %d: %s", w.Code, w.Body.String())
}
// Verify it's paused
items, _ := repo.GetQueueItems(context.Background())
if len(items) != 1 || items[0].CurrentPhase != orchestrator.PhasePaused {
t.Errorf("Expected item to be paused, got phase %s", items[0].CurrentPhase)
}
// Resume university
req, _ = http.NewRequest("POST", "/v1/crawl/queue/"+universityID.String()+"/resume", nil)
req.Header.Set("Authorization", "Bearer test-key")
w = httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200 on resume, got %d: %s", w.Code, w.Body.String())
}
// Verify it's resumed
items, _ = repo.GetQueueItems(context.Background())
if len(items) != 1 || items[0].CurrentPhase == orchestrator.PhasePaused {
t.Errorf("Expected item to not be paused, got phase %s", items[0].CurrentPhase)
}
}
func TestOrchestratorPause_InvalidUUID(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
req, _ := http.NewRequest("POST", "/v1/crawl/queue/invalid-uuid/pause", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("Expected status 400, got %d: %s", w.Code, w.Body.String())
}
}
func TestOrchestratorNoAuth(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
// Request without auth
req, _ := http.NewRequest("GET", "/v1/crawl/status", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusUnauthorized {
t.Errorf("Expected status 401, got %d", w.Code)
}
}
func TestOrchestratorDefaultPriority(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
// Add without priority (should default to 5)
universityID := uuid.New()
reqBody := AddToQueueRequest{
UniversityID: universityID.String(),
// Priority and InitiatedBy omitted
}
body, _ := json.Marshal(reqBody)
req, _ := http.NewRequest("POST", "/v1/crawl/queue", bytes.NewBuffer(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusCreated {
t.Errorf("Expected status 201, got %d: %s", w.Code, w.Body.String())
}
var item orchestrator.CrawlQueueItem
if err := json.Unmarshal(w.Body.Bytes(), &item); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if item.Priority != 5 {
t.Errorf("Expected default priority 5, got %d", item.Priority)
}
}
// TestOrchestratorQueueWithNullableFields tests that queue items with NULL values
// for optional fields (UniversityShort, LastError) are handled correctly.
// This tests the COALESCE fix in repository.go that prevents NULL scan errors.
func TestOrchestratorQueueWithNullableFields(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
// Add item with empty optional fields (simulates NULL from DB)
universityID := uuid.New()
item := orchestrator.CrawlQueueItem{
ID: uuid.New(),
UniversityID: universityID,
UniversityName: "Test Universität",
UniversityShort: "", // Empty string (COALESCE converts NULL to '')
CurrentPhase: orchestrator.PhasePending,
LastError: "", // Empty string (COALESCE converts NULL to '')
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
position := 1
item.QueuePosition = &position
repo.items = append(repo.items, item)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
req, _ := http.NewRequest("GET", "/v1/crawl/queue", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d: %s", w.Code, w.Body.String())
}
var response struct {
Queue []orchestrator.CrawlQueueItem `json:"queue"`
Count int `json:"count"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if response.Count != 1 {
t.Errorf("Expected 1 item in queue, got %d", response.Count)
}
// Verify empty strings are preserved (not NULL)
if response.Queue[0].UniversityShort != "" {
t.Errorf("Expected empty UniversityShort, got %q", response.Queue[0].UniversityShort)
}
if response.Queue[0].LastError != "" {
t.Errorf("Expected empty LastError, got %q", response.Queue[0].LastError)
}
}
// TestOrchestratorQueueWithLastError tests that queue items with an error message
// are correctly serialized and returned.
func TestOrchestratorQueueWithLastError(t *testing.T) {
repo := NewMockRepository()
staffCrawler := &MockStaffCrawler{}
pubCrawler := &MockPubCrawler{}
orch := orchestrator.NewOrchestrator(repo, staffCrawler, pubCrawler)
// Add item with an error
universityID := uuid.New()
item := orchestrator.CrawlQueueItem{
ID: uuid.New(),
UniversityID: universityID,
UniversityName: "Test Universität mit Fehler",
UniversityShort: "TUmF",
CurrentPhase: orchestrator.PhaseFailed,
LastError: "connection timeout after 30s",
RetryCount: 3,
MaxRetries: 3,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
position := 1
item.QueuePosition = &position
repo.items = append(repo.items, item)
router := setupOrchestratorTestRouter(orch, repo, "test-key")
req, _ := http.NewRequest("GET", "/v1/crawl/queue", nil)
req.Header.Set("Authorization", "Bearer test-key")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d: %s", w.Code, w.Body.String())
}
var response struct {
Queue []orchestrator.CrawlQueueItem `json:"queue"`
Count int `json:"count"`
}
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
t.Fatalf("Failed to parse response: %v", err)
}
if response.Count != 1 {
t.Errorf("Expected 1 item in queue, got %d", response.Count)
}
// Verify error message is preserved
if response.Queue[0].LastError != "connection timeout after 30s" {
t.Errorf("Expected LastError to be 'connection timeout after 30s', got %q", response.Queue[0].LastError)
}
if response.Queue[0].UniversityShort != "TUmF" {
t.Errorf("Expected UniversityShort 'TUmF', got %q", response.Queue[0].UniversityShort)
}
}

View File

@@ -0,0 +1,700 @@
package handlers
import (
"net/http"
"time"
"github.com/breakpilot/edu-search-service/internal/policy"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// PolicyHandler contains all policy-related HTTP handlers.
type PolicyHandler struct {
store *policy.Store
enforcer *policy.Enforcer
}
// policyHandler is the singleton instance
var policyHandler *PolicyHandler
// InitPolicyHandler initializes the policy handler with a database pool.
func InitPolicyHandler(store *policy.Store) {
policyHandler = &PolicyHandler{
store: store,
enforcer: policy.NewEnforcer(store),
}
}
// GetPolicyHandler returns the policy handler instance.
func GetPolicyHandler() *PolicyHandler {
return policyHandler
}
// =============================================================================
// POLICIES
// =============================================================================
// ListPolicies returns all source policies.
func (h *PolicyHandler) ListPolicies(c *gin.Context) {
var filter policy.PolicyListFilter
if err := c.ShouldBindQuery(&filter); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid query parameters", "details": err.Error()})
return
}
// Set defaults
if filter.Limit <= 0 || filter.Limit > 100 {
filter.Limit = 50
}
policies, total, err := h.store.ListPolicies(c.Request.Context(), &filter)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list policies", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"policies": policies,
"total": total,
"limit": filter.Limit,
"offset": filter.Offset,
})
}
// GetPolicy returns a single policy by ID.
func (h *PolicyHandler) GetPolicy(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid policy ID"})
return
}
p, err := h.store.GetPolicy(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get policy", "details": err.Error()})
return
}
if p == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "Policy not found"})
return
}
c.JSON(http.StatusOK, p)
}
// CreatePolicy creates a new source policy.
func (h *PolicyHandler) CreatePolicy(c *gin.Context) {
var req policy.CreateSourcePolicyRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
p, err := h.store.CreatePolicy(c.Request.Context(), &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create policy", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionCreate, policy.AuditEntitySourcePolicy, &p.ID, nil, p, userEmail)
c.JSON(http.StatusCreated, p)
}
// UpdatePolicy updates an existing policy.
func (h *PolicyHandler) UpdatePolicy(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid policy ID"})
return
}
// Get old value for audit
oldPolicy, err := h.store.GetPolicy(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get policy", "details": err.Error()})
return
}
if oldPolicy == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "Policy not found"})
return
}
var req policy.UpdateSourcePolicyRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
p, err := h.store.UpdatePolicy(c.Request.Context(), id, &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update policy", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionUpdate, policy.AuditEntitySourcePolicy, &p.ID, oldPolicy, p, userEmail)
c.JSON(http.StatusOK, p)
}
// =============================================================================
// SOURCES (WHITELIST)
// =============================================================================
// ListSources returns all allowed sources.
func (h *PolicyHandler) ListSources(c *gin.Context) {
var filter policy.SourceListFilter
if err := c.ShouldBindQuery(&filter); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid query parameters", "details": err.Error()})
return
}
// Set defaults
if filter.Limit <= 0 || filter.Limit > 100 {
filter.Limit = 50
}
sources, total, err := h.store.ListSources(c.Request.Context(), &filter)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list sources", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"sources": sources,
"total": total,
"limit": filter.Limit,
"offset": filter.Offset,
})
}
// GetSource returns a single source by ID.
func (h *PolicyHandler) GetSource(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid source ID"})
return
}
source, err := h.store.GetSource(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get source", "details": err.Error()})
return
}
if source == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "Source not found"})
return
}
c.JSON(http.StatusOK, source)
}
// CreateSource creates a new allowed source.
func (h *PolicyHandler) CreateSource(c *gin.Context) {
var req policy.CreateAllowedSourceRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
source, err := h.store.CreateSource(c.Request.Context(), &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create source", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionCreate, policy.AuditEntityAllowedSource, &source.ID, nil, source, userEmail)
c.JSON(http.StatusCreated, source)
}
// UpdateSource updates an existing source.
func (h *PolicyHandler) UpdateSource(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid source ID"})
return
}
// Get old value for audit
oldSource, err := h.store.GetSource(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get source", "details": err.Error()})
return
}
if oldSource == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "Source not found"})
return
}
var req policy.UpdateAllowedSourceRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
source, err := h.store.UpdateSource(c.Request.Context(), id, &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update source", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionUpdate, policy.AuditEntityAllowedSource, &source.ID, oldSource, source, userEmail)
c.JSON(http.StatusOK, source)
}
// DeleteSource deletes a source.
func (h *PolicyHandler) DeleteSource(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid source ID"})
return
}
// Get source for audit before deletion
source, err := h.store.GetSource(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get source", "details": err.Error()})
return
}
if source == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "Source not found"})
return
}
if err := h.store.DeleteSource(c.Request.Context(), id); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete source", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionDelete, policy.AuditEntityAllowedSource, &id, source, nil, userEmail)
c.JSON(http.StatusOK, gin.H{"deleted": true, "id": id})
}
// =============================================================================
// OPERATIONS MATRIX
// =============================================================================
// GetOperationsMatrix returns all sources with their operation permissions.
func (h *PolicyHandler) GetOperationsMatrix(c *gin.Context) {
sources, err := h.store.GetOperationsMatrix(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get operations matrix", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"sources": sources,
"operations": []string{
string(policy.OperationLookup),
string(policy.OperationRAG),
string(policy.OperationTraining),
string(policy.OperationExport),
},
})
}
// UpdateOperationPermission updates a single operation permission.
func (h *PolicyHandler) UpdateOperationPermission(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid operation permission ID"})
return
}
var req policy.UpdateOperationPermissionRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
// SECURITY: Prevent enabling training
if req.IsAllowed != nil && *req.IsAllowed {
// Check if this is a training operation by querying
ops, _ := h.store.GetOperationsBySourceID(c.Request.Context(), id)
for _, op := range ops {
if op.ID == id && op.Operation == policy.OperationTraining {
c.JSON(http.StatusForbidden, gin.H{
"error": "Training operations cannot be enabled",
"message": "Training with external data is FORBIDDEN by policy",
})
return
}
}
}
op, err := h.store.UpdateOperationPermission(c.Request.Context(), id, &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update operation permission", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionUpdate, policy.AuditEntityOperationPermission, &op.ID, nil, op, userEmail)
c.JSON(http.StatusOK, op)
}
// =============================================================================
// PII RULES
// =============================================================================
// ListPIIRules returns all PII detection rules.
func (h *PolicyHandler) ListPIIRules(c *gin.Context) {
activeOnly := c.Query("active_only") == "true"
rules, err := h.store.ListPIIRules(c.Request.Context(), activeOnly)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list PII rules", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"rules": rules,
"total": len(rules),
})
}
// GetPIIRule returns a single PII rule by ID.
func (h *PolicyHandler) GetPIIRule(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid PII rule ID"})
return
}
rule, err := h.store.GetPIIRule(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get PII rule", "details": err.Error()})
return
}
if rule == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "PII rule not found"})
return
}
c.JSON(http.StatusOK, rule)
}
// CreatePIIRule creates a new PII detection rule.
func (h *PolicyHandler) CreatePIIRule(c *gin.Context) {
var req policy.CreatePIIRuleRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
rule, err := h.store.CreatePIIRule(c.Request.Context(), &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create PII rule", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionCreate, policy.AuditEntityPIIRule, &rule.ID, nil, rule, userEmail)
c.JSON(http.StatusCreated, rule)
}
// UpdatePIIRule updates an existing PII rule.
func (h *PolicyHandler) UpdatePIIRule(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid PII rule ID"})
return
}
// Get old value for audit
oldRule, err := h.store.GetPIIRule(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get PII rule", "details": err.Error()})
return
}
if oldRule == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "PII rule not found"})
return
}
var req policy.UpdatePIIRuleRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
rule, err := h.store.UpdatePIIRule(c.Request.Context(), id, &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update PII rule", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionUpdate, policy.AuditEntityPIIRule, &rule.ID, oldRule, rule, userEmail)
c.JSON(http.StatusOK, rule)
}
// DeletePIIRule deletes a PII rule.
func (h *PolicyHandler) DeletePIIRule(c *gin.Context) {
id, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid PII rule ID"})
return
}
// Get rule for audit before deletion
rule, err := h.store.GetPIIRule(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get PII rule", "details": err.Error()})
return
}
if rule == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "PII rule not found"})
return
}
if err := h.store.DeletePIIRule(c.Request.Context(), id); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete PII rule", "details": err.Error()})
return
}
// Log audit
userEmail := getUserEmail(c)
h.enforcer.LogChange(c.Request.Context(), policy.AuditActionDelete, policy.AuditEntityPIIRule, &id, rule, nil, userEmail)
c.JSON(http.StatusOK, gin.H{"deleted": true, "id": id})
}
// TestPIIRules tests PII detection against sample text.
func (h *PolicyHandler) TestPIIRules(c *gin.Context) {
var req policy.PIITestRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
response, err := h.enforcer.DetectPII(c.Request.Context(), req.Text)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to test PII detection", "details": err.Error()})
return
}
c.JSON(http.StatusOK, response)
}
// =============================================================================
// AUDIT & COMPLIANCE
// =============================================================================
// ListAuditLogs returns audit log entries.
func (h *PolicyHandler) ListAuditLogs(c *gin.Context) {
var filter policy.AuditLogFilter
if err := c.ShouldBindQuery(&filter); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid query parameters", "details": err.Error()})
return
}
// Set defaults
if filter.Limit <= 0 || filter.Limit > 500 {
filter.Limit = 100
}
logs, total, err := h.store.ListAuditLogs(c.Request.Context(), &filter)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list audit logs", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"logs": logs,
"total": total,
"limit": filter.Limit,
"offset": filter.Offset,
})
}
// ListBlockedContent returns blocked content log entries.
func (h *PolicyHandler) ListBlockedContent(c *gin.Context) {
var filter policy.BlockedContentFilter
if err := c.ShouldBindQuery(&filter); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid query parameters", "details": err.Error()})
return
}
// Set defaults
if filter.Limit <= 0 || filter.Limit > 500 {
filter.Limit = 100
}
logs, total, err := h.store.ListBlockedContent(c.Request.Context(), &filter)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list blocked content", "details": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"blocked": logs,
"total": total,
"limit": filter.Limit,
"offset": filter.Offset,
})
}
// CheckCompliance performs a compliance check for a URL.
func (h *PolicyHandler) CheckCompliance(c *gin.Context) {
var req policy.CheckComplianceRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
return
}
response, err := h.enforcer.CheckCompliance(c.Request.Context(), &req)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to check compliance", "details": err.Error()})
return
}
c.JSON(http.StatusOK, response)
}
// GetPolicyStats returns aggregated statistics.
func (h *PolicyHandler) GetPolicyStats(c *gin.Context) {
stats, err := h.store.GetStats(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get stats", "details": err.Error()})
return
}
c.JSON(http.StatusOK, stats)
}
// GenerateComplianceReport generates an audit report.
func (h *PolicyHandler) GenerateComplianceReport(c *gin.Context) {
var auditFilter policy.AuditLogFilter
var blockedFilter policy.BlockedContentFilter
// Parse date filters
fromStr := c.Query("from")
toStr := c.Query("to")
if fromStr != "" {
from, err := time.Parse("2006-01-02", fromStr)
if err == nil {
auditFilter.FromDate = &from
blockedFilter.FromDate = &from
}
}
if toStr != "" {
to, err := time.Parse("2006-01-02", toStr)
if err == nil {
// Add 1 day to include the end date
to = to.Add(24 * time.Hour)
auditFilter.ToDate = &to
blockedFilter.ToDate = &to
}
}
// No limit for report
auditFilter.Limit = 10000
blockedFilter.Limit = 10000
auditor := policy.NewAuditor(h.store)
report, err := auditor.GenerateAuditReport(c.Request.Context(), &auditFilter, &blockedFilter)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to generate report", "details": err.Error()})
return
}
// Set filename for download
format := c.Query("format")
if format == "download" {
filename := "compliance-report-" + time.Now().Format("2006-01-02") + ".json"
c.Header("Content-Disposition", "attachment; filename="+filename)
c.Header("Content-Type", "application/json")
}
c.JSON(http.StatusOK, report)
}
// =============================================================================
// HELPERS
// =============================================================================
// getUserEmail extracts user email from context or headers.
func getUserEmail(c *gin.Context) *string {
// Try to get from header (set by auth proxy)
email := c.GetHeader("X-User-Email")
if email != "" {
return &email
}
// Try to get from context (set by auth middleware)
if e, exists := c.Get("user_email"); exists {
if emailStr, ok := e.(string); ok {
return &emailStr
}
}
return nil
}
// =============================================================================
// ROUTE SETUP
// =============================================================================
// SetupPolicyRoutes configures all policy-related routes.
func SetupPolicyRoutes(r *gin.RouterGroup) {
if policyHandler == nil {
return
}
h := policyHandler
// Policies
r.GET("/policies", h.ListPolicies)
r.GET("/policies/:id", h.GetPolicy)
r.POST("/policies", h.CreatePolicy)
r.PUT("/policies/:id", h.UpdatePolicy)
// Sources (Whitelist)
r.GET("/sources", h.ListSources)
r.GET("/sources/:id", h.GetSource)
r.POST("/sources", h.CreateSource)
r.PUT("/sources/:id", h.UpdateSource)
r.DELETE("/sources/:id", h.DeleteSource)
// Operations Matrix
r.GET("/operations-matrix", h.GetOperationsMatrix)
r.PUT("/operations/:id", h.UpdateOperationPermission)
// PII Rules
r.GET("/pii-rules", h.ListPIIRules)
r.GET("/pii-rules/:id", h.GetPIIRule)
r.POST("/pii-rules", h.CreatePIIRule)
r.PUT("/pii-rules/:id", h.UpdatePIIRule)
r.DELETE("/pii-rules/:id", h.DeletePIIRule)
r.POST("/pii-rules/test", h.TestPIIRules)
// Audit & Compliance
r.GET("/policy-audit", h.ListAuditLogs)
r.GET("/blocked-content", h.ListBlockedContent)
r.POST("/check-compliance", h.CheckCompliance)
r.GET("/policy-stats", h.GetPolicyStats)
r.GET("/compliance-report", h.GenerateComplianceReport)
}

View File

@@ -0,0 +1,374 @@
package handlers
import (
"fmt"
"net/http"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/breakpilot/edu-search-service/internal/database"
"github.com/breakpilot/edu-search-service/internal/publications"
"github.com/breakpilot/edu-search-service/internal/staff"
)
// StaffHandlers handles staff-related API endpoints
type StaffHandlers struct {
repo *database.Repository
crawler *staff.StaffCrawler
pubCrawler *publications.PublicationCrawler
}
// NewStaffHandlers creates new staff handlers
func NewStaffHandlers(repo *database.Repository, email string) *StaffHandlers {
return &StaffHandlers{
repo: repo,
crawler: staff.NewStaffCrawler(repo),
pubCrawler: publications.NewPublicationCrawler(repo, email),
}
}
// SearchStaff searches for university staff
// GET /api/v1/staff/search?q=...&university_id=...&state=...&position_type=...&is_professor=...
func (h *StaffHandlers) SearchStaff(c *gin.Context) {
params := database.StaffSearchParams{
Query: c.Query("q"),
Limit: parseIntDefault(c.Query("limit"), 20),
Offset: parseIntDefault(c.Query("offset"), 0),
}
// Optional filters
if uniID := c.Query("university_id"); uniID != "" {
id, err := uuid.Parse(uniID)
if err == nil {
params.UniversityID = &id
}
}
if deptID := c.Query("department_id"); deptID != "" {
id, err := uuid.Parse(deptID)
if err == nil {
params.DepartmentID = &id
}
}
if state := c.Query("state"); state != "" {
params.State = &state
}
if uniType := c.Query("uni_type"); uniType != "" {
params.UniType = &uniType
}
if posType := c.Query("position_type"); posType != "" {
params.PositionType = &posType
}
if isProfStr := c.Query("is_professor"); isProfStr != "" {
isProf := isProfStr == "true" || isProfStr == "1"
params.IsProfessor = &isProf
}
result, err := h.repo.SearchStaff(c.Request.Context(), params)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, result)
}
// GetStaff gets a single staff member by ID
// GET /api/v1/staff/:id
func (h *StaffHandlers) GetStaff(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid staff ID"})
return
}
staff, err := h.repo.GetStaff(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "Staff not found"})
return
}
c.JSON(http.StatusOK, staff)
}
// GetStaffPublications gets publications for a staff member
// GET /api/v1/staff/:id/publications
func (h *StaffHandlers) GetStaffPublications(c *gin.Context) {
idStr := c.Param("id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid staff ID"})
return
}
pubs, err := h.repo.GetStaffPublications(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"publications": pubs,
"total": len(pubs),
"staff_id": id,
})
}
// SearchPublications searches for publications
// GET /api/v1/publications/search?q=...&year=...&pub_type=...
func (h *StaffHandlers) SearchPublications(c *gin.Context) {
params := database.PublicationSearchParams{
Query: c.Query("q"),
Limit: parseIntDefault(c.Query("limit"), 20),
Offset: parseIntDefault(c.Query("offset"), 0),
}
if staffID := c.Query("staff_id"); staffID != "" {
id, err := uuid.Parse(staffID)
if err == nil {
params.StaffID = &id
}
}
if year := c.Query("year"); year != "" {
y := parseIntDefault(year, 0)
if y > 0 {
params.Year = &y
}
}
if yearFrom := c.Query("year_from"); yearFrom != "" {
y := parseIntDefault(yearFrom, 0)
if y > 0 {
params.YearFrom = &y
}
}
if yearTo := c.Query("year_to"); yearTo != "" {
y := parseIntDefault(yearTo, 0)
if y > 0 {
params.YearTo = &y
}
}
if pubType := c.Query("pub_type"); pubType != "" {
params.PubType = &pubType
}
result, err := h.repo.SearchPublications(c.Request.Context(), params)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, result)
}
// GetStaffStats gets statistics about staff data
// GET /api/v1/staff/stats
func (h *StaffHandlers) GetStaffStats(c *gin.Context) {
stats, err := h.repo.GetStaffStats(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, stats)
}
// ListUniversities lists all universities
// GET /api/v1/universities
func (h *StaffHandlers) ListUniversities(c *gin.Context) {
universities, err := h.repo.ListUniversities(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"universities": universities,
"total": len(universities),
})
}
// StartStaffCrawl starts a staff crawl for a university
// POST /api/v1/admin/crawl/staff
func (h *StaffHandlers) StartStaffCrawl(c *gin.Context) {
var req struct {
UniversityID string `json:"university_id"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request"})
return
}
uniID, err := uuid.Parse(req.UniversityID)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid university ID"})
return
}
uni, err := h.repo.GetUniversity(c.Request.Context(), uniID)
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "University not found"})
return
}
// Start crawl in background
go func() {
result, err := h.crawler.CrawlUniversity(c.Request.Context(), uni)
if err != nil {
// Log error
return
}
_ = result
}()
c.JSON(http.StatusAccepted, gin.H{
"status": "started",
"university_id": uniID,
"message": "Staff crawl started in background",
})
}
// StartPublicationCrawl starts a publication crawl for a university
// POST /api/v1/admin/crawl/publications
func (h *StaffHandlers) StartPublicationCrawl(c *gin.Context) {
var req struct {
UniversityID string `json:"university_id"`
Limit int `json:"limit"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request"})
return
}
uniID, err := uuid.Parse(req.UniversityID)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid university ID"})
return
}
limit := req.Limit
if limit <= 0 {
limit = 50
}
// Start crawl in background
go func() {
status, err := h.pubCrawler.CrawlForUniversity(c.Request.Context(), uniID, limit)
if err != nil {
// Log error
return
}
_ = status
}()
c.JSON(http.StatusAccepted, gin.H{
"status": "started",
"university_id": uniID,
"message": "Publication crawl started in background",
})
}
// ResolveDOI resolves a DOI and saves the publication
// POST /api/v1/publications/resolve-doi
func (h *StaffHandlers) ResolveDOI(c *gin.Context) {
var req struct {
DOI string `json:"doi"`
StaffID string `json:"staff_id,omitempty"`
}
if err := c.ShouldBindJSON(&req); err != nil || req.DOI == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "DOI is required"})
return
}
pub, err := h.pubCrawler.ResolveDOI(c.Request.Context(), req.DOI)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
// Link to staff if provided
if req.StaffID != "" {
staffID, err := uuid.Parse(req.StaffID)
if err == nil {
link := &database.StaffPublication{
StaffID: staffID,
PublicationID: pub.ID,
}
h.repo.LinkStaffPublication(c.Request.Context(), link)
}
}
c.JSON(http.StatusOK, pub)
}
// GetCrawlStatus gets crawl status for a university
// GET /api/v1/admin/crawl/status/:university_id
func (h *StaffHandlers) GetCrawlStatus(c *gin.Context) {
idStr := c.Param("university_id")
id, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid university ID"})
return
}
status, err := h.repo.GetCrawlStatus(c.Request.Context(), id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if status == nil {
c.JSON(http.StatusOK, gin.H{
"university_id": id,
"staff_crawl_status": "never",
"pub_crawl_status": "never",
})
return
}
c.JSON(http.StatusOK, status)
}
// Helper to parse int with default
func parseIntDefault(s string, def int) int {
if s == "" {
return def
}
var n int
_, err := fmt.Sscanf(s, "%d", &n)
if err != nil {
return def
}
return n
}
// RegisterStaffRoutes registers staff-related routes
func (h *StaffHandlers) RegisterRoutes(r *gin.RouterGroup) {
// Public endpoints
r.GET("/staff/search", h.SearchStaff)
r.GET("/staff/stats", h.GetStaffStats)
r.GET("/staff/:id", h.GetStaff)
r.GET("/staff/:id/publications", h.GetStaffPublications)
r.GET("/publications/search", h.SearchPublications)
r.POST("/publications/resolve-doi", h.ResolveDOI)
r.GET("/universities", h.ListUniversities)
// Admin endpoints
r.POST("/admin/crawl/staff", h.StartStaffCrawl)
r.POST("/admin/crawl/publications", h.StartPublicationCrawl)
r.GET("/admin/crawl/status/:university_id", h.GetCrawlStatus)
}