feat: edu-search-service migriert, voice-service/geo-service entfernt
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s

- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor)
- opensearch + edu-search-service in docker-compose.yml hinzugefuegt
- voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core)
- geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt)
- CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt
  (Go lint, test mit go mod download, build, SBOM)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Boenisch
2026-02-15 18:36:38 +01:00
parent d4e1d6bab6
commit 414e0f5ec0
73 changed files with 23938 additions and 92 deletions

View File

@@ -0,0 +1,222 @@
package scheduler
import (
"context"
"log"
"sync"
"time"
)
// CrawlFunc is the function signature for executing a crawl
type CrawlFunc func(ctx context.Context) error
// Status represents the current scheduler status
type Status struct {
Enabled bool `json:"enabled"`
Running bool `json:"running"`
LastRun time.Time `json:"last_run,omitempty"`
LastRunStatus string `json:"last_run_status,omitempty"`
NextRun time.Time `json:"next_run,omitempty"`
Interval string `json:"interval"`
}
// Scheduler handles automatic crawl scheduling
type Scheduler struct {
mu sync.RWMutex
enabled bool
interval time.Duration
crawlFunc CrawlFunc
running bool
lastRun time.Time
lastRunStatus string
stopChan chan struct{}
doneChan chan struct{}
}
// Config holds scheduler configuration
type Config struct {
Enabled bool
Interval time.Duration
}
// NewScheduler creates a new crawler scheduler
func NewScheduler(cfg Config, crawlFunc CrawlFunc) *Scheduler {
return &Scheduler{
enabled: cfg.Enabled,
interval: cfg.Interval,
crawlFunc: crawlFunc,
stopChan: make(chan struct{}),
doneChan: make(chan struct{}),
}
}
// Start begins the scheduler loop
func (s *Scheduler) Start() {
if !s.enabled {
log.Println("Scheduler is disabled")
return
}
log.Printf("Scheduler starting with interval: %v", s.interval)
go s.run()
}
// Stop gracefully stops the scheduler
func (s *Scheduler) Stop() {
s.mu.Lock()
if !s.enabled {
s.mu.Unlock()
return
}
s.mu.Unlock()
close(s.stopChan)
<-s.doneChan
log.Println("Scheduler stopped")
}
// run is the main scheduler loop
func (s *Scheduler) run() {
defer close(s.doneChan)
// Calculate time until first run
// Default: run at 2:00 AM to minimize impact
now := time.Now()
nextRun := s.calculateNextRun(now)
log.Printf("Scheduler: first crawl scheduled for %v", nextRun)
timer := time.NewTimer(time.Until(nextRun))
defer timer.Stop()
for {
select {
case <-s.stopChan:
return
case <-timer.C:
s.executeCrawl()
// Schedule next run
nextRun = time.Now().Add(s.interval)
timer.Reset(s.interval)
}
}
}
// calculateNextRun determines when the next crawl should occur
func (s *Scheduler) calculateNextRun(from time.Time) time.Time {
// If interval is 24h or more, schedule for 2:00 AM
if s.interval >= 24*time.Hour {
next := time.Date(from.Year(), from.Month(), from.Day(), 2, 0, 0, 0, from.Location())
if next.Before(from) || next.Equal(from) {
next = next.Add(24 * time.Hour)
}
return next
}
// For shorter intervals, start immediately
return from.Add(1 * time.Minute)
}
// executeCrawl runs the crawl function
func (s *Scheduler) executeCrawl() {
s.mu.Lock()
if s.running {
s.mu.Unlock()
log.Println("Scheduler: crawl already running, skipping")
return
}
s.running = true
s.mu.Unlock()
log.Println("Scheduler: starting scheduled crawl")
startTime := time.Now()
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Hour)
defer cancel()
err := s.crawlFunc(ctx)
s.mu.Lock()
s.running = false
s.lastRun = startTime
if err != nil {
s.lastRunStatus = "failed: " + err.Error()
log.Printf("Scheduler: crawl failed after %v: %v", time.Since(startTime), err)
} else {
s.lastRunStatus = "success"
log.Printf("Scheduler: crawl completed successfully in %v", time.Since(startTime))
}
s.mu.Unlock()
}
// TriggerCrawl manually triggers a crawl
func (s *Scheduler) TriggerCrawl() error {
s.mu.Lock()
if s.running {
s.mu.Unlock()
return ErrCrawlAlreadyRunning
}
s.running = true
s.mu.Unlock()
log.Println("Scheduler: manual crawl triggered")
go func() {
startTime := time.Now()
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Hour)
defer cancel()
err := s.crawlFunc(ctx)
s.mu.Lock()
s.running = false
s.lastRun = startTime
if err != nil {
s.lastRunStatus = "failed: " + err.Error()
log.Printf("Scheduler: manual crawl failed after %v: %v", time.Since(startTime), err)
} else {
s.lastRunStatus = "success"
log.Printf("Scheduler: manual crawl completed successfully in %v", time.Since(startTime))
}
s.mu.Unlock()
}()
return nil
}
// Status returns the current scheduler status
func (s *Scheduler) Status() Status {
s.mu.RLock()
defer s.mu.RUnlock()
status := Status{
Enabled: s.enabled,
Running: s.running,
LastRun: s.lastRun,
LastRunStatus: s.lastRunStatus,
Interval: s.interval.String(),
}
if s.enabled && !s.lastRun.IsZero() {
status.NextRun = s.lastRun.Add(s.interval)
}
return status
}
// IsRunning returns true if a crawl is currently in progress
func (s *Scheduler) IsRunning() bool {
s.mu.RLock()
defer s.mu.RUnlock()
return s.running
}
// Errors
type SchedulerError string
func (e SchedulerError) Error() string { return string(e) }
const (
ErrCrawlAlreadyRunning = SchedulerError("crawl already running")
)

View File

@@ -0,0 +1,294 @@
package scheduler
import (
"context"
"errors"
"sync/atomic"
"testing"
"time"
)
func TestNewScheduler(t *testing.T) {
callCount := int32(0)
crawlFunc := func(ctx context.Context) error {
atomic.AddInt32(&callCount, 1)
return nil
}
cfg := Config{
Enabled: true,
Interval: 1 * time.Hour,
}
scheduler := NewScheduler(cfg, crawlFunc)
if scheduler == nil {
t.Fatal("Expected non-nil scheduler")
}
if !scheduler.enabled {
t.Error("Expected scheduler to be enabled")
}
if scheduler.interval != 1*time.Hour {
t.Errorf("Expected interval 1h, got %v", scheduler.interval)
}
}
func TestScheduler_Disabled(t *testing.T) {
callCount := int32(0)
crawlFunc := func(ctx context.Context) error {
atomic.AddInt32(&callCount, 1)
return nil
}
cfg := Config{
Enabled: false,
Interval: 1 * time.Second,
}
scheduler := NewScheduler(cfg, crawlFunc)
scheduler.Start()
// Wait a bit - crawl should not run
time.Sleep(100 * time.Millisecond)
if atomic.LoadInt32(&callCount) != 0 {
t.Error("Crawl should not run when scheduler is disabled")
}
}
func TestScheduler_TriggerCrawl(t *testing.T) {
callCount := int32(0)
crawlFunc := func(ctx context.Context) error {
atomic.AddInt32(&callCount, 1)
time.Sleep(50 * time.Millisecond) // Simulate work
return nil
}
cfg := Config{
Enabled: false, // Disabled scheduler, but manual trigger should work
Interval: 24 * time.Hour,
}
scheduler := NewScheduler(cfg, crawlFunc)
// Trigger manual crawl
err := scheduler.TriggerCrawl()
if err != nil {
t.Fatalf("TriggerCrawl failed: %v", err)
}
// Wait for crawl to complete
time.Sleep(100 * time.Millisecond)
if atomic.LoadInt32(&callCount) != 1 {
t.Errorf("Expected 1 crawl, got %d", atomic.LoadInt32(&callCount))
}
}
func TestScheduler_TriggerCrawl_AlreadyRunning(t *testing.T) {
crawlFunc := func(ctx context.Context) error {
time.Sleep(200 * time.Millisecond)
return nil
}
cfg := Config{
Enabled: false,
Interval: 24 * time.Hour,
}
scheduler := NewScheduler(cfg, crawlFunc)
// First trigger
err := scheduler.TriggerCrawl()
if err != nil {
t.Fatalf("First TriggerCrawl failed: %v", err)
}
// Wait a bit for crawl to start
time.Sleep(10 * time.Millisecond)
// Second trigger should fail
err = scheduler.TriggerCrawl()
if err != ErrCrawlAlreadyRunning {
t.Errorf("Expected ErrCrawlAlreadyRunning, got %v", err)
}
// Wait for crawl to complete
time.Sleep(250 * time.Millisecond)
// Now trigger should work again
err = scheduler.TriggerCrawl()
if err != nil {
t.Errorf("Third TriggerCrawl should succeed: %v", err)
}
}
func TestScheduler_Status(t *testing.T) {
crawlFunc := func(ctx context.Context) error {
return nil
}
cfg := Config{
Enabled: true,
Interval: 24 * time.Hour,
}
scheduler := NewScheduler(cfg, crawlFunc)
status := scheduler.Status()
if !status.Enabled {
t.Error("Expected enabled=true")
}
if status.Running {
t.Error("Expected running=false initially")
}
if status.Interval != "24h0m0s" {
t.Errorf("Expected interval '24h0m0s', got '%s'", status.Interval)
}
}
func TestScheduler_Status_AfterCrawl(t *testing.T) {
crawlFunc := func(ctx context.Context) error {
return nil
}
cfg := Config{
Enabled: false,
Interval: 24 * time.Hour,
}
scheduler := NewScheduler(cfg, crawlFunc)
// Trigger and wait
scheduler.TriggerCrawl()
time.Sleep(50 * time.Millisecond)
status := scheduler.Status()
if status.LastRun.IsZero() {
t.Error("Expected LastRun to be set")
}
if status.LastRunStatus != "success" {
t.Errorf("Expected status 'success', got '%s'", status.LastRunStatus)
}
}
func TestScheduler_Status_FailedCrawl(t *testing.T) {
crawlFunc := func(ctx context.Context) error {
return errors.New("connection failed")
}
cfg := Config{
Enabled: false,
Interval: 24 * time.Hour,
}
scheduler := NewScheduler(cfg, crawlFunc)
// Trigger and wait
scheduler.TriggerCrawl()
time.Sleep(50 * time.Millisecond)
status := scheduler.Status()
if status.LastRunStatus != "failed: connection failed" {
t.Errorf("Expected failed status, got '%s'", status.LastRunStatus)
}
}
func TestScheduler_IsRunning(t *testing.T) {
crawlFunc := func(ctx context.Context) error {
time.Sleep(100 * time.Millisecond)
return nil
}
cfg := Config{
Enabled: false,
Interval: 24 * time.Hour,
}
scheduler := NewScheduler(cfg, crawlFunc)
if scheduler.IsRunning() {
t.Error("Should not be running initially")
}
scheduler.TriggerCrawl()
time.Sleep(10 * time.Millisecond)
if !scheduler.IsRunning() {
t.Error("Should be running after trigger")
}
time.Sleep(150 * time.Millisecond)
if scheduler.IsRunning() {
t.Error("Should not be running after completion")
}
}
func TestScheduler_CalculateNextRun_Daily(t *testing.T) {
crawlFunc := func(ctx context.Context) error { return nil }
cfg := Config{
Enabled: true,
Interval: 24 * time.Hour,
}
scheduler := NewScheduler(cfg, crawlFunc)
// Test at 1 AM - should schedule for 2 AM same day
from := time.Date(2024, 1, 15, 1, 0, 0, 0, time.UTC)
next := scheduler.calculateNextRun(from)
expectedHour := 2
if next.Hour() != expectedHour {
t.Errorf("Expected hour %d, got %d", expectedHour, next.Hour())
}
if next.Day() != 15 {
t.Errorf("Expected day 15, got %d", next.Day())
}
// Test at 3 AM - should schedule for 2 AM next day
from = time.Date(2024, 1, 15, 3, 0, 0, 0, time.UTC)
next = scheduler.calculateNextRun(from)
if next.Day() != 16 {
t.Errorf("Expected day 16, got %d", next.Day())
}
}
func TestScheduler_CalculateNextRun_Hourly(t *testing.T) {
crawlFunc := func(ctx context.Context) error { return nil }
cfg := Config{
Enabled: true,
Interval: 1 * time.Hour, // Less than 24h
}
scheduler := NewScheduler(cfg, crawlFunc)
from := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)
next := scheduler.calculateNextRun(from)
// Should start in about 1 minute
diff := next.Sub(from)
if diff < 30*time.Second || diff > 90*time.Second {
t.Errorf("Expected ~1 minute delay for short intervals, got %v", diff)
}
}
func TestSchedulerError(t *testing.T) {
err := ErrCrawlAlreadyRunning
if err.Error() != "crawl already running" {
t.Errorf("Unexpected error message: %s", err.Error())
}
}