fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit 21a844cb8a
1986 changed files with 744143 additions and 1731 deletions
@@ -0,0 +1,222 @@
+package scheduler
+
+import (
+	"context"
+	"log"
+	"sync"
+	"time"
+)
+
+// CrawlFunc is the function signature for executing a crawl
+type CrawlFunc func(ctx context.Context) error
+
+// Status represents the current scheduler status
+type Status struct {
+	Enabled       bool      `json:"enabled"`
+	Running       bool      `json:"running"`
+	LastRun       time.Time `json:"last_run,omitempty"`
+	LastRunStatus string    `json:"last_run_status,omitempty"`
+	NextRun       time.Time `json:"next_run,omitempty"`
+	Interval      string    `json:"interval"`
+}
+
+// Scheduler handles automatic crawl scheduling
+type Scheduler struct {
+	mu            sync.RWMutex
+	enabled       bool
+	interval      time.Duration
+	crawlFunc     CrawlFunc
+	running       bool
+	lastRun       time.Time
+	lastRunStatus string
+	stopChan      chan struct{}
+	doneChan      chan struct{}
+}
+
+// Config holds scheduler configuration
+type Config struct {
+	Enabled  bool
+	Interval time.Duration
+}
+
+// NewScheduler creates a new crawler scheduler
+func NewScheduler(cfg Config, crawlFunc CrawlFunc) *Scheduler {
+	return &Scheduler{
+		enabled:   cfg.Enabled,
+		interval:  cfg.Interval,
+		crawlFunc: crawlFunc,
+		stopChan:  make(chan struct{}),
+		doneChan:  make(chan struct{}),
+	}
+}
+
+// Start begins the scheduler loop
+func (s *Scheduler) Start() {
+	if !s.enabled {
+		log.Println("Scheduler is disabled")
+		return
+	}
+
+	log.Printf("Scheduler starting with interval: %v", s.interval)
+
+	go s.run()
+}
+
+// Stop gracefully stops the scheduler
+func (s *Scheduler) Stop() {
+	s.mu.Lock()
+	if !s.enabled {
+		s.mu.Unlock()
+		return
+	}
+	s.mu.Unlock()
+
+	close(s.stopChan)
+	<-s.doneChan
+	log.Println("Scheduler stopped")
+}
+
+// run is the main scheduler loop
+func (s *Scheduler) run() {
+	defer close(s.doneChan)
+
+	// Calculate time until first run
+	// Default: run at 2:00 AM to minimize impact
+	now := time.Now()
+	nextRun := s.calculateNextRun(now)
+
+	log.Printf("Scheduler: first crawl scheduled for %v", nextRun)
+
+	timer := time.NewTimer(time.Until(nextRun))
+	defer timer.Stop()
+
+	for {
+		select {
+		case <-s.stopChan:
+			return
+		case <-timer.C:
+			s.executeCrawl()
+			// Schedule next run
+			nextRun = time.Now().Add(s.interval)
+			timer.Reset(s.interval)
+		}
+	}
+}
+
+// calculateNextRun determines when the next crawl should occur
+func (s *Scheduler) calculateNextRun(from time.Time) time.Time {
+	// If interval is 24h or more, schedule for 2:00 AM
+	if s.interval >= 24*time.Hour {
+		next := time.Date(from.Year(), from.Month(), from.Day(), 2, 0, 0, 0, from.Location())
+		if next.Before(from) || next.Equal(from) {
+			next = next.Add(24 * time.Hour)
+		}
+		return next
+	}
+
+	// For shorter intervals, start immediately
+	return from.Add(1 * time.Minute)
+}
+
+// executeCrawl runs the crawl function
+func (s *Scheduler) executeCrawl() {
+	s.mu.Lock()
+	if s.running {
+		s.mu.Unlock()
+		log.Println("Scheduler: crawl already running, skipping")
+		return
+	}
+	s.running = true
+	s.mu.Unlock()
+
+	log.Println("Scheduler: starting scheduled crawl")
+	startTime := time.Now()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 4*time.Hour)
+	defer cancel()
+
+	err := s.crawlFunc(ctx)
+
+	s.mu.Lock()
+	s.running = false
+	s.lastRun = startTime
+	if err != nil {
+		s.lastRunStatus = "failed: " + err.Error()
+		log.Printf("Scheduler: crawl failed after %v: %v", time.Since(startTime), err)
+	} else {
+		s.lastRunStatus = "success"
+		log.Printf("Scheduler: crawl completed successfully in %v", time.Since(startTime))
+	}
+	s.mu.Unlock()
+}
+
+// TriggerCrawl manually triggers a crawl
+func (s *Scheduler) TriggerCrawl() error {
+	s.mu.Lock()
+	if s.running {
+		s.mu.Unlock()
+		return ErrCrawlAlreadyRunning
+	}
+	s.running = true
+	s.mu.Unlock()
+
+	log.Println("Scheduler: manual crawl triggered")
+
+	go func() {
+		startTime := time.Now()
+		ctx, cancel := context.WithTimeout(context.Background(), 4*time.Hour)
+		defer cancel()
+
+		err := s.crawlFunc(ctx)
+
+		s.mu.Lock()
+		s.running = false
+		s.lastRun = startTime
+		if err != nil {
+			s.lastRunStatus = "failed: " + err.Error()
+			log.Printf("Scheduler: manual crawl failed after %v: %v", time.Since(startTime), err)
+		} else {
+			s.lastRunStatus = "success"
+			log.Printf("Scheduler: manual crawl completed successfully in %v", time.Since(startTime))
+		}
+		s.mu.Unlock()
+	}()
+
+	return nil
+}
+
+// Status returns the current scheduler status
+func (s *Scheduler) Status() Status {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	status := Status{
+		Enabled:       s.enabled,
+		Running:       s.running,
+		LastRun:       s.lastRun,
+		LastRunStatus: s.lastRunStatus,
+		Interval:      s.interval.String(),
+	}
+
+	if s.enabled && !s.lastRun.IsZero() {
+		status.NextRun = s.lastRun.Add(s.interval)
+	}
+
+	return status
+}
+
+// IsRunning returns true if a crawl is currently in progress
+func (s *Scheduler) IsRunning() bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.running
+}
+
+// Errors
+type SchedulerError string
+
+func (e SchedulerError) Error() string { return string(e) }
+
+const (
+	ErrCrawlAlreadyRunning = SchedulerError("crawl already running")
+)
@@ -0,0 +1,294 @@
+package scheduler
+
+import (
+	"context"
+	"errors"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+func TestNewScheduler(t *testing.T) {
+	callCount := int32(0)
+	crawlFunc := func(ctx context.Context) error {
+		atomic.AddInt32(&callCount, 1)
+		return nil
+	}
+
+	cfg := Config{
+		Enabled:  true,
+		Interval: 1 * time.Hour,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	if scheduler == nil {
+		t.Fatal("Expected non-nil scheduler")
+	}
+
+	if !scheduler.enabled {
+		t.Error("Expected scheduler to be enabled")
+	}
+
+	if scheduler.interval != 1*time.Hour {
+		t.Errorf("Expected interval 1h, got %v", scheduler.interval)
+	}
+}
+
+func TestScheduler_Disabled(t *testing.T) {
+	callCount := int32(0)
+	crawlFunc := func(ctx context.Context) error {
+		atomic.AddInt32(&callCount, 1)
+		return nil
+	}
+
+	cfg := Config{
+		Enabled:  false,
+		Interval: 1 * time.Second,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+	scheduler.Start()
+
+	// Wait a bit - crawl should not run
+	time.Sleep(100 * time.Millisecond)
+
+	if atomic.LoadInt32(&callCount) != 0 {
+		t.Error("Crawl should not run when scheduler is disabled")
+	}
+}
+
+func TestScheduler_TriggerCrawl(t *testing.T) {
+	callCount := int32(0)
+	crawlFunc := func(ctx context.Context) error {
+		atomic.AddInt32(&callCount, 1)
+		time.Sleep(50 * time.Millisecond) // Simulate work
+		return nil
+	}
+
+	cfg := Config{
+		Enabled:  false, // Disabled scheduler, but manual trigger should work
+		Interval: 24 * time.Hour,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	// Trigger manual crawl
+	err := scheduler.TriggerCrawl()
+	if err != nil {
+		t.Fatalf("TriggerCrawl failed: %v", err)
+	}
+
+	// Wait for crawl to complete
+	time.Sleep(100 * time.Millisecond)
+
+	if atomic.LoadInt32(&callCount) != 1 {
+		t.Errorf("Expected 1 crawl, got %d", atomic.LoadInt32(&callCount))
+	}
+}
+
+func TestScheduler_TriggerCrawl_AlreadyRunning(t *testing.T) {
+	crawlFunc := func(ctx context.Context) error {
+		time.Sleep(200 * time.Millisecond)
+		return nil
+	}
+
+	cfg := Config{
+		Enabled:  false,
+		Interval: 24 * time.Hour,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	// First trigger
+	err := scheduler.TriggerCrawl()
+	if err != nil {
+		t.Fatalf("First TriggerCrawl failed: %v", err)
+	}
+
+	// Wait a bit for crawl to start
+	time.Sleep(10 * time.Millisecond)
+
+	// Second trigger should fail
+	err = scheduler.TriggerCrawl()
+	if err != ErrCrawlAlreadyRunning {
+		t.Errorf("Expected ErrCrawlAlreadyRunning, got %v", err)
+	}
+
+	// Wait for crawl to complete
+	time.Sleep(250 * time.Millisecond)
+
+	// Now trigger should work again
+	err = scheduler.TriggerCrawl()
+	if err != nil {
+		t.Errorf("Third TriggerCrawl should succeed: %v", err)
+	}
+}
+
+func TestScheduler_Status(t *testing.T) {
+	crawlFunc := func(ctx context.Context) error {
+		return nil
+	}
+
+	cfg := Config{
+		Enabled:  true,
+		Interval: 24 * time.Hour,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	status := scheduler.Status()
+
+	if !status.Enabled {
+		t.Error("Expected enabled=true")
+	}
+
+	if status.Running {
+		t.Error("Expected running=false initially")
+	}
+
+	if status.Interval != "24h0m0s" {
+		t.Errorf("Expected interval '24h0m0s', got '%s'", status.Interval)
+	}
+}
+
+func TestScheduler_Status_AfterCrawl(t *testing.T) {
+	crawlFunc := func(ctx context.Context) error {
+		return nil
+	}
+
+	cfg := Config{
+		Enabled:  false,
+		Interval: 24 * time.Hour,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	// Trigger and wait
+	scheduler.TriggerCrawl()
+	time.Sleep(50 * time.Millisecond)
+
+	status := scheduler.Status()
+
+	if status.LastRun.IsZero() {
+		t.Error("Expected LastRun to be set")
+	}
+
+	if status.LastRunStatus != "success" {
+		t.Errorf("Expected status 'success', got '%s'", status.LastRunStatus)
+	}
+}
+
+func TestScheduler_Status_FailedCrawl(t *testing.T) {
+	crawlFunc := func(ctx context.Context) error {
+		return errors.New("connection failed")
+	}
+
+	cfg := Config{
+		Enabled:  false,
+		Interval: 24 * time.Hour,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	// Trigger and wait
+	scheduler.TriggerCrawl()
+	time.Sleep(50 * time.Millisecond)
+
+	status := scheduler.Status()
+
+	if status.LastRunStatus != "failed: connection failed" {
+		t.Errorf("Expected failed status, got '%s'", status.LastRunStatus)
+	}
+}
+
+func TestScheduler_IsRunning(t *testing.T) {
+	crawlFunc := func(ctx context.Context) error {
+		time.Sleep(100 * time.Millisecond)
+		return nil
+	}
+
+	cfg := Config{
+		Enabled:  false,
+		Interval: 24 * time.Hour,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	if scheduler.IsRunning() {
+		t.Error("Should not be running initially")
+	}
+
+	scheduler.TriggerCrawl()
+	time.Sleep(10 * time.Millisecond)
+
+	if !scheduler.IsRunning() {
+		t.Error("Should be running after trigger")
+	}
+
+	time.Sleep(150 * time.Millisecond)
+
+	if scheduler.IsRunning() {
+		t.Error("Should not be running after completion")
+	}
+}
+
+func TestScheduler_CalculateNextRun_Daily(t *testing.T) {
+	crawlFunc := func(ctx context.Context) error { return nil }
+
+	cfg := Config{
+		Enabled:  true,
+		Interval: 24 * time.Hour,
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	// Test at 1 AM - should schedule for 2 AM same day
+	from := time.Date(2024, 1, 15, 1, 0, 0, 0, time.UTC)
+	next := scheduler.calculateNextRun(from)
+
+	expectedHour := 2
+	if next.Hour() != expectedHour {
+		t.Errorf("Expected hour %d, got %d", expectedHour, next.Hour())
+	}
+
+	if next.Day() != 15 {
+		t.Errorf("Expected day 15, got %d", next.Day())
+	}
+
+	// Test at 3 AM - should schedule for 2 AM next day
+	from = time.Date(2024, 1, 15, 3, 0, 0, 0, time.UTC)
+	next = scheduler.calculateNextRun(from)
+
+	if next.Day() != 16 {
+		t.Errorf("Expected day 16, got %d", next.Day())
+	}
+}
+
+func TestScheduler_CalculateNextRun_Hourly(t *testing.T) {
+	crawlFunc := func(ctx context.Context) error { return nil }
+
+	cfg := Config{
+		Enabled:  true,
+		Interval: 1 * time.Hour, // Less than 24h
+	}
+
+	scheduler := NewScheduler(cfg, crawlFunc)
+
+	from := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)
+	next := scheduler.calculateNextRun(from)
+
+	// Should start in about 1 minute
+	diff := next.Sub(from)
+	if diff < 30*time.Second || diff > 90*time.Second {
+		t.Errorf("Expected ~1 minute delay for short intervals, got %v", diff)
+	}
+}
+
+func TestSchedulerError(t *testing.T) {
+	err := ErrCrawlAlreadyRunning
+
+	if err.Error() != "crawl already running" {
+		t.Errorf("Unexpected error message: %s", err.Error())
+	}
+}