fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
222
edu-search-service/internal/scheduler/scheduler.go
Normal file
222
edu-search-service/internal/scheduler/scheduler.go
Normal file
@@ -0,0 +1,222 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CrawlFunc is the function signature for executing a crawl
|
||||
type CrawlFunc func(ctx context.Context) error
|
||||
|
||||
// Status represents the current scheduler status
|
||||
type Status struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
Running bool `json:"running"`
|
||||
LastRun time.Time `json:"last_run,omitempty"`
|
||||
LastRunStatus string `json:"last_run_status,omitempty"`
|
||||
NextRun time.Time `json:"next_run,omitempty"`
|
||||
Interval string `json:"interval"`
|
||||
}
|
||||
|
||||
// Scheduler handles automatic crawl scheduling
|
||||
type Scheduler struct {
|
||||
mu sync.RWMutex
|
||||
enabled bool
|
||||
interval time.Duration
|
||||
crawlFunc CrawlFunc
|
||||
running bool
|
||||
lastRun time.Time
|
||||
lastRunStatus string
|
||||
stopChan chan struct{}
|
||||
doneChan chan struct{}
|
||||
}
|
||||
|
||||
// Config holds scheduler configuration
|
||||
type Config struct {
|
||||
Enabled bool
|
||||
Interval time.Duration
|
||||
}
|
||||
|
||||
// NewScheduler creates a new crawler scheduler
|
||||
func NewScheduler(cfg Config, crawlFunc CrawlFunc) *Scheduler {
|
||||
return &Scheduler{
|
||||
enabled: cfg.Enabled,
|
||||
interval: cfg.Interval,
|
||||
crawlFunc: crawlFunc,
|
||||
stopChan: make(chan struct{}),
|
||||
doneChan: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins the scheduler loop
|
||||
func (s *Scheduler) Start() {
|
||||
if !s.enabled {
|
||||
log.Println("Scheduler is disabled")
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("Scheduler starting with interval: %v", s.interval)
|
||||
|
||||
go s.run()
|
||||
}
|
||||
|
||||
// Stop gracefully stops the scheduler
|
||||
func (s *Scheduler) Stop() {
|
||||
s.mu.Lock()
|
||||
if !s.enabled {
|
||||
s.mu.Unlock()
|
||||
return
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
close(s.stopChan)
|
||||
<-s.doneChan
|
||||
log.Println("Scheduler stopped")
|
||||
}
|
||||
|
||||
// run is the main scheduler loop
|
||||
func (s *Scheduler) run() {
|
||||
defer close(s.doneChan)
|
||||
|
||||
// Calculate time until first run
|
||||
// Default: run at 2:00 AM to minimize impact
|
||||
now := time.Now()
|
||||
nextRun := s.calculateNextRun(now)
|
||||
|
||||
log.Printf("Scheduler: first crawl scheduled for %v", nextRun)
|
||||
|
||||
timer := time.NewTimer(time.Until(nextRun))
|
||||
defer timer.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.stopChan:
|
||||
return
|
||||
case <-timer.C:
|
||||
s.executeCrawl()
|
||||
// Schedule next run
|
||||
nextRun = time.Now().Add(s.interval)
|
||||
timer.Reset(s.interval)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// calculateNextRun determines when the next crawl should occur
|
||||
func (s *Scheduler) calculateNextRun(from time.Time) time.Time {
|
||||
// If interval is 24h or more, schedule for 2:00 AM
|
||||
if s.interval >= 24*time.Hour {
|
||||
next := time.Date(from.Year(), from.Month(), from.Day(), 2, 0, 0, 0, from.Location())
|
||||
if next.Before(from) || next.Equal(from) {
|
||||
next = next.Add(24 * time.Hour)
|
||||
}
|
||||
return next
|
||||
}
|
||||
|
||||
// For shorter intervals, start immediately
|
||||
return from.Add(1 * time.Minute)
|
||||
}
|
||||
|
||||
// executeCrawl runs the crawl function
|
||||
func (s *Scheduler) executeCrawl() {
|
||||
s.mu.Lock()
|
||||
if s.running {
|
||||
s.mu.Unlock()
|
||||
log.Println("Scheduler: crawl already running, skipping")
|
||||
return
|
||||
}
|
||||
s.running = true
|
||||
s.mu.Unlock()
|
||||
|
||||
log.Println("Scheduler: starting scheduled crawl")
|
||||
startTime := time.Now()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Hour)
|
||||
defer cancel()
|
||||
|
||||
err := s.crawlFunc(ctx)
|
||||
|
||||
s.mu.Lock()
|
||||
s.running = false
|
||||
s.lastRun = startTime
|
||||
if err != nil {
|
||||
s.lastRunStatus = "failed: " + err.Error()
|
||||
log.Printf("Scheduler: crawl failed after %v: %v", time.Since(startTime), err)
|
||||
} else {
|
||||
s.lastRunStatus = "success"
|
||||
log.Printf("Scheduler: crawl completed successfully in %v", time.Since(startTime))
|
||||
}
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
// TriggerCrawl manually triggers a crawl
|
||||
func (s *Scheduler) TriggerCrawl() error {
|
||||
s.mu.Lock()
|
||||
if s.running {
|
||||
s.mu.Unlock()
|
||||
return ErrCrawlAlreadyRunning
|
||||
}
|
||||
s.running = true
|
||||
s.mu.Unlock()
|
||||
|
||||
log.Println("Scheduler: manual crawl triggered")
|
||||
|
||||
go func() {
|
||||
startTime := time.Now()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Hour)
|
||||
defer cancel()
|
||||
|
||||
err := s.crawlFunc(ctx)
|
||||
|
||||
s.mu.Lock()
|
||||
s.running = false
|
||||
s.lastRun = startTime
|
||||
if err != nil {
|
||||
s.lastRunStatus = "failed: " + err.Error()
|
||||
log.Printf("Scheduler: manual crawl failed after %v: %v", time.Since(startTime), err)
|
||||
} else {
|
||||
s.lastRunStatus = "success"
|
||||
log.Printf("Scheduler: manual crawl completed successfully in %v", time.Since(startTime))
|
||||
}
|
||||
s.mu.Unlock()
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Status returns the current scheduler status
|
||||
func (s *Scheduler) Status() Status {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
status := Status{
|
||||
Enabled: s.enabled,
|
||||
Running: s.running,
|
||||
LastRun: s.lastRun,
|
||||
LastRunStatus: s.lastRunStatus,
|
||||
Interval: s.interval.String(),
|
||||
}
|
||||
|
||||
if s.enabled && !s.lastRun.IsZero() {
|
||||
status.NextRun = s.lastRun.Add(s.interval)
|
||||
}
|
||||
|
||||
return status
|
||||
}
|
||||
|
||||
// IsRunning returns true if a crawl is currently in progress
|
||||
func (s *Scheduler) IsRunning() bool {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.running
|
||||
}
|
||||
|
||||
// Errors
|
||||
type SchedulerError string
|
||||
|
||||
func (e SchedulerError) Error() string { return string(e) }
|
||||
|
||||
const (
|
||||
ErrCrawlAlreadyRunning = SchedulerError("crawl already running")
|
||||
)
|
||||
294
edu-search-service/internal/scheduler/scheduler_test.go
Normal file
294
edu-search-service/internal/scheduler/scheduler_test.go
Normal file
@@ -0,0 +1,294 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestNewScheduler(t *testing.T) {
|
||||
callCount := int32(0)
|
||||
crawlFunc := func(ctx context.Context) error {
|
||||
atomic.AddInt32(&callCount, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
Interval: 1 * time.Hour,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
if scheduler == nil {
|
||||
t.Fatal("Expected non-nil scheduler")
|
||||
}
|
||||
|
||||
if !scheduler.enabled {
|
||||
t.Error("Expected scheduler to be enabled")
|
||||
}
|
||||
|
||||
if scheduler.interval != 1*time.Hour {
|
||||
t.Errorf("Expected interval 1h, got %v", scheduler.interval)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_Disabled(t *testing.T) {
|
||||
callCount := int32(0)
|
||||
crawlFunc := func(ctx context.Context) error {
|
||||
atomic.AddInt32(&callCount, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Enabled: false,
|
||||
Interval: 1 * time.Second,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
scheduler.Start()
|
||||
|
||||
// Wait a bit - crawl should not run
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
if atomic.LoadInt32(&callCount) != 0 {
|
||||
t.Error("Crawl should not run when scheduler is disabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_TriggerCrawl(t *testing.T) {
|
||||
callCount := int32(0)
|
||||
crawlFunc := func(ctx context.Context) error {
|
||||
atomic.AddInt32(&callCount, 1)
|
||||
time.Sleep(50 * time.Millisecond) // Simulate work
|
||||
return nil
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Enabled: false, // Disabled scheduler, but manual trigger should work
|
||||
Interval: 24 * time.Hour,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
// Trigger manual crawl
|
||||
err := scheduler.TriggerCrawl()
|
||||
if err != nil {
|
||||
t.Fatalf("TriggerCrawl failed: %v", err)
|
||||
}
|
||||
|
||||
// Wait for crawl to complete
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
if atomic.LoadInt32(&callCount) != 1 {
|
||||
t.Errorf("Expected 1 crawl, got %d", atomic.LoadInt32(&callCount))
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_TriggerCrawl_AlreadyRunning(t *testing.T) {
|
||||
crawlFunc := func(ctx context.Context) error {
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
return nil
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Enabled: false,
|
||||
Interval: 24 * time.Hour,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
// First trigger
|
||||
err := scheduler.TriggerCrawl()
|
||||
if err != nil {
|
||||
t.Fatalf("First TriggerCrawl failed: %v", err)
|
||||
}
|
||||
|
||||
// Wait a bit for crawl to start
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Second trigger should fail
|
||||
err = scheduler.TriggerCrawl()
|
||||
if err != ErrCrawlAlreadyRunning {
|
||||
t.Errorf("Expected ErrCrawlAlreadyRunning, got %v", err)
|
||||
}
|
||||
|
||||
// Wait for crawl to complete
|
||||
time.Sleep(250 * time.Millisecond)
|
||||
|
||||
// Now trigger should work again
|
||||
err = scheduler.TriggerCrawl()
|
||||
if err != nil {
|
||||
t.Errorf("Third TriggerCrawl should succeed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_Status(t *testing.T) {
|
||||
crawlFunc := func(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
Interval: 24 * time.Hour,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
status := scheduler.Status()
|
||||
|
||||
if !status.Enabled {
|
||||
t.Error("Expected enabled=true")
|
||||
}
|
||||
|
||||
if status.Running {
|
||||
t.Error("Expected running=false initially")
|
||||
}
|
||||
|
||||
if status.Interval != "24h0m0s" {
|
||||
t.Errorf("Expected interval '24h0m0s', got '%s'", status.Interval)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_Status_AfterCrawl(t *testing.T) {
|
||||
crawlFunc := func(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Enabled: false,
|
||||
Interval: 24 * time.Hour,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
// Trigger and wait
|
||||
scheduler.TriggerCrawl()
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
status := scheduler.Status()
|
||||
|
||||
if status.LastRun.IsZero() {
|
||||
t.Error("Expected LastRun to be set")
|
||||
}
|
||||
|
||||
if status.LastRunStatus != "success" {
|
||||
t.Errorf("Expected status 'success', got '%s'", status.LastRunStatus)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_Status_FailedCrawl(t *testing.T) {
|
||||
crawlFunc := func(ctx context.Context) error {
|
||||
return errors.New("connection failed")
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Enabled: false,
|
||||
Interval: 24 * time.Hour,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
// Trigger and wait
|
||||
scheduler.TriggerCrawl()
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
status := scheduler.Status()
|
||||
|
||||
if status.LastRunStatus != "failed: connection failed" {
|
||||
t.Errorf("Expected failed status, got '%s'", status.LastRunStatus)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_IsRunning(t *testing.T) {
|
||||
crawlFunc := func(ctx context.Context) error {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
return nil
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Enabled: false,
|
||||
Interval: 24 * time.Hour,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
if scheduler.IsRunning() {
|
||||
t.Error("Should not be running initially")
|
||||
}
|
||||
|
||||
scheduler.TriggerCrawl()
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
if !scheduler.IsRunning() {
|
||||
t.Error("Should be running after trigger")
|
||||
}
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
|
||||
if scheduler.IsRunning() {
|
||||
t.Error("Should not be running after completion")
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_CalculateNextRun_Daily(t *testing.T) {
|
||||
crawlFunc := func(ctx context.Context) error { return nil }
|
||||
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
Interval: 24 * time.Hour,
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
// Test at 1 AM - should schedule for 2 AM same day
|
||||
from := time.Date(2024, 1, 15, 1, 0, 0, 0, time.UTC)
|
||||
next := scheduler.calculateNextRun(from)
|
||||
|
||||
expectedHour := 2
|
||||
if next.Hour() != expectedHour {
|
||||
t.Errorf("Expected hour %d, got %d", expectedHour, next.Hour())
|
||||
}
|
||||
|
||||
if next.Day() != 15 {
|
||||
t.Errorf("Expected day 15, got %d", next.Day())
|
||||
}
|
||||
|
||||
// Test at 3 AM - should schedule for 2 AM next day
|
||||
from = time.Date(2024, 1, 15, 3, 0, 0, 0, time.UTC)
|
||||
next = scheduler.calculateNextRun(from)
|
||||
|
||||
if next.Day() != 16 {
|
||||
t.Errorf("Expected day 16, got %d", next.Day())
|
||||
}
|
||||
}
|
||||
|
||||
func TestScheduler_CalculateNextRun_Hourly(t *testing.T) {
|
||||
crawlFunc := func(ctx context.Context) error { return nil }
|
||||
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
Interval: 1 * time.Hour, // Less than 24h
|
||||
}
|
||||
|
||||
scheduler := NewScheduler(cfg, crawlFunc)
|
||||
|
||||
from := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)
|
||||
next := scheduler.calculateNextRun(from)
|
||||
|
||||
// Should start in about 1 minute
|
||||
diff := next.Sub(from)
|
||||
if diff < 30*time.Second || diff > 90*time.Second {
|
||||
t.Errorf("Expected ~1 minute delay for short intervals, got %v", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSchedulerError(t *testing.T) {
|
||||
err := ErrCrawlAlreadyRunning
|
||||
|
||||
if err.Error() != "crawl already running" {
|
||||
t.Errorf("Unexpected error message: %s", err.Error())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user