All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
223 lines
4.9 KiB
Go
223 lines
4.9 KiB
Go
package scheduler
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// CrawlFunc is the function signature for executing a crawl
|
|
type CrawlFunc func(ctx context.Context) error
|
|
|
|
// Status represents the current scheduler status
|
|
type Status struct {
|
|
Enabled bool `json:"enabled"`
|
|
Running bool `json:"running"`
|
|
LastRun time.Time `json:"last_run,omitempty"`
|
|
LastRunStatus string `json:"last_run_status,omitempty"`
|
|
NextRun time.Time `json:"next_run,omitempty"`
|
|
Interval string `json:"interval"`
|
|
}
|
|
|
|
// Scheduler handles automatic crawl scheduling
|
|
type Scheduler struct {
|
|
mu sync.RWMutex
|
|
enabled bool
|
|
interval time.Duration
|
|
crawlFunc CrawlFunc
|
|
running bool
|
|
lastRun time.Time
|
|
lastRunStatus string
|
|
stopChan chan struct{}
|
|
doneChan chan struct{}
|
|
}
|
|
|
|
// Config holds scheduler configuration
|
|
type Config struct {
|
|
Enabled bool
|
|
Interval time.Duration
|
|
}
|
|
|
|
// NewScheduler creates a new crawler scheduler
|
|
func NewScheduler(cfg Config, crawlFunc CrawlFunc) *Scheduler {
|
|
return &Scheduler{
|
|
enabled: cfg.Enabled,
|
|
interval: cfg.Interval,
|
|
crawlFunc: crawlFunc,
|
|
stopChan: make(chan struct{}),
|
|
doneChan: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
// Start begins the scheduler loop
|
|
func (s *Scheduler) Start() {
|
|
if !s.enabled {
|
|
log.Println("Scheduler is disabled")
|
|
return
|
|
}
|
|
|
|
log.Printf("Scheduler starting with interval: %v", s.interval)
|
|
|
|
go s.run()
|
|
}
|
|
|
|
// Stop gracefully stops the scheduler
|
|
func (s *Scheduler) Stop() {
|
|
s.mu.Lock()
|
|
if !s.enabled {
|
|
s.mu.Unlock()
|
|
return
|
|
}
|
|
s.mu.Unlock()
|
|
|
|
close(s.stopChan)
|
|
<-s.doneChan
|
|
log.Println("Scheduler stopped")
|
|
}
|
|
|
|
// run is the main scheduler loop
|
|
func (s *Scheduler) run() {
|
|
defer close(s.doneChan)
|
|
|
|
// Calculate time until first run
|
|
// Default: run at 2:00 AM to minimize impact
|
|
now := time.Now()
|
|
nextRun := s.calculateNextRun(now)
|
|
|
|
log.Printf("Scheduler: first crawl scheduled for %v", nextRun)
|
|
|
|
timer := time.NewTimer(time.Until(nextRun))
|
|
defer timer.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-s.stopChan:
|
|
return
|
|
case <-timer.C:
|
|
s.executeCrawl()
|
|
// Schedule next run
|
|
nextRun = time.Now().Add(s.interval)
|
|
timer.Reset(s.interval)
|
|
}
|
|
}
|
|
}
|
|
|
|
// calculateNextRun determines when the next crawl should occur
|
|
func (s *Scheduler) calculateNextRun(from time.Time) time.Time {
|
|
// If interval is 24h or more, schedule for 2:00 AM
|
|
if s.interval >= 24*time.Hour {
|
|
next := time.Date(from.Year(), from.Month(), from.Day(), 2, 0, 0, 0, from.Location())
|
|
if next.Before(from) || next.Equal(from) {
|
|
next = next.Add(24 * time.Hour)
|
|
}
|
|
return next
|
|
}
|
|
|
|
// For shorter intervals, start immediately
|
|
return from.Add(1 * time.Minute)
|
|
}
|
|
|
|
// executeCrawl runs the crawl function
|
|
func (s *Scheduler) executeCrawl() {
|
|
s.mu.Lock()
|
|
if s.running {
|
|
s.mu.Unlock()
|
|
log.Println("Scheduler: crawl already running, skipping")
|
|
return
|
|
}
|
|
s.running = true
|
|
s.mu.Unlock()
|
|
|
|
log.Println("Scheduler: starting scheduled crawl")
|
|
startTime := time.Now()
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Hour)
|
|
defer cancel()
|
|
|
|
err := s.crawlFunc(ctx)
|
|
|
|
s.mu.Lock()
|
|
s.running = false
|
|
s.lastRun = startTime
|
|
if err != nil {
|
|
s.lastRunStatus = "failed: " + err.Error()
|
|
log.Printf("Scheduler: crawl failed after %v: %v", time.Since(startTime), err)
|
|
} else {
|
|
s.lastRunStatus = "success"
|
|
log.Printf("Scheduler: crawl completed successfully in %v", time.Since(startTime))
|
|
}
|
|
s.mu.Unlock()
|
|
}
|
|
|
|
// TriggerCrawl manually triggers a crawl
|
|
func (s *Scheduler) TriggerCrawl() error {
|
|
s.mu.Lock()
|
|
if s.running {
|
|
s.mu.Unlock()
|
|
return ErrCrawlAlreadyRunning
|
|
}
|
|
s.running = true
|
|
s.mu.Unlock()
|
|
|
|
log.Println("Scheduler: manual crawl triggered")
|
|
|
|
go func() {
|
|
startTime := time.Now()
|
|
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Hour)
|
|
defer cancel()
|
|
|
|
err := s.crawlFunc(ctx)
|
|
|
|
s.mu.Lock()
|
|
s.running = false
|
|
s.lastRun = startTime
|
|
if err != nil {
|
|
s.lastRunStatus = "failed: " + err.Error()
|
|
log.Printf("Scheduler: manual crawl failed after %v: %v", time.Since(startTime), err)
|
|
} else {
|
|
s.lastRunStatus = "success"
|
|
log.Printf("Scheduler: manual crawl completed successfully in %v", time.Since(startTime))
|
|
}
|
|
s.mu.Unlock()
|
|
}()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Status returns the current scheduler status
|
|
func (s *Scheduler) Status() Status {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
status := Status{
|
|
Enabled: s.enabled,
|
|
Running: s.running,
|
|
LastRun: s.lastRun,
|
|
LastRunStatus: s.lastRunStatus,
|
|
Interval: s.interval.String(),
|
|
}
|
|
|
|
if s.enabled && !s.lastRun.IsZero() {
|
|
status.NextRun = s.lastRun.Add(s.interval)
|
|
}
|
|
|
|
return status
|
|
}
|
|
|
|
// IsRunning returns true if a crawl is currently in progress
|
|
func (s *Scheduler) IsRunning() bool {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
return s.running
|
|
}
|
|
|
|
// Errors
|
|
type SchedulerError string
|
|
|
|
func (e SchedulerError) Error() string { return string(e) }
|
|
|
|
const (
|
|
ErrCrawlAlreadyRunning = SchedulerError("crawl already running")
|
|
)
|