package handlers import ( "encoding/json" "net/http" "os" "path/filepath" "sync" "time" "github.com/gin-gonic/gin" "github.com/google/uuid" ) // SeedURL represents a seed URL configuration type SeedURL struct { ID string `json:"id"` URL string `json:"url"` Category string `json:"category"` Name string `json:"name"` Description string `json:"description"` TrustBoost float64 `json:"trustBoost"` Enabled bool `json:"enabled"` LastCrawled *string `json:"lastCrawled,omitempty"` DocumentCount int `json:"documentCount,omitempty"` CreatedAt time.Time `json:"createdAt"` UpdatedAt time.Time `json:"updatedAt"` } // CrawlStats contains crawl statistics type CrawlStats struct { TotalDocuments int `json:"totalDocuments"` TotalSeeds int `json:"totalSeeds"` LastCrawlTime *string `json:"lastCrawlTime,omitempty"` CrawlStatus string `json:"crawlStatus"` DocumentsPerCategory map[string]int `json:"documentsPerCategory"` DocumentsPerDocType map[string]int `json:"documentsPerDocType"` AvgTrustScore float64 `json:"avgTrustScore"` } // SeedStore manages seed URLs in memory and file type SeedStore struct { seeds map[string]SeedURL mu sync.RWMutex filePath string } var seedStore *SeedStore var crawlStatus = "idle" var lastCrawlTime *string // InitSeedStore initializes the seed store func InitSeedStore(seedsDir string) error { seedStore = &SeedStore{ seeds: make(map[string]SeedURL), filePath: filepath.Join(seedsDir, "seeds.json"), } // Try to load existing seeds from JSON file if err := seedStore.loadFromFile(); err != nil { // If file doesn't exist, load from txt files return seedStore.loadFromTxtFiles(seedsDir) } return nil } func (s *SeedStore) loadFromFile() error { data, err := os.ReadFile(s.filePath) if err != nil { return err } var seeds []SeedURL if err := json.Unmarshal(data, &seeds); err != nil { return err } s.mu.Lock() defer s.mu.Unlock() for _, seed := range seeds { s.seeds[seed.ID] = seed } return nil } func (s *SeedStore) loadFromTxtFiles(seedsDir string) error { // Default seeds from category files defaultSeeds := []SeedURL{ {ID: uuid.New().String(), URL: "https://www.kmk.org", Category: "federal", Name: "Kultusministerkonferenz", Description: "Beschlüsse und Bildungsstandards", TrustBoost: 0.50, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.bildungsserver.de", Category: "federal", Name: "Deutscher Bildungsserver", Description: "Zentrale Bildungsinformationen", TrustBoost: 0.50, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.bpb.de", Category: "federal", Name: "Bundeszentrale politische Bildung", Description: "Politische Bildung", TrustBoost: 0.45, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.bmbf.de", Category: "federal", Name: "BMBF", Description: "Bundesbildungsministerium", TrustBoost: 0.50, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.iqb.hu-berlin.de", Category: "federal", Name: "IQB", Description: "Institut Qualitätsentwicklung", TrustBoost: 0.50, Enabled: true}, // Science {ID: uuid.New().String(), URL: "https://www.bertelsmann-stiftung.de/de/themen/bildung", Category: "science", Name: "Bertelsmann Stiftung", Description: "Bildungsstudien und Ländermonitor", TrustBoost: 0.40, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.oecd.org/pisa", Category: "science", Name: "PISA-Studien", Description: "Internationale Schulleistungsstudie", TrustBoost: 0.45, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.iea.nl/studies/iea/pirls", Category: "science", Name: "IGLU/PIRLS", Description: "Internationale Grundschul-Lese-Untersuchung", TrustBoost: 0.45, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.iea.nl/studies/iea/timss", Category: "science", Name: "TIMSS", Description: "Trends in International Mathematics and Science Study", TrustBoost: 0.45, Enabled: true}, // Bundesländer {ID: uuid.New().String(), URL: "https://www.km.bayern.de", Category: "states", Name: "Bayern Kultusministerium", Description: "Lehrpläne Bayern", TrustBoost: 0.45, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.schulministerium.nrw", Category: "states", Name: "NRW Schulministerium", Description: "Lehrpläne NRW", TrustBoost: 0.45, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.berlin.de/sen/bildung", Category: "states", Name: "Berlin Bildung", Description: "Rahmenlehrpläne Berlin", TrustBoost: 0.45, Enabled: true}, {ID: uuid.New().String(), URL: "https://kultusministerium.hessen.de", Category: "states", Name: "Hessen Kultusministerium", Description: "Kerncurricula Hessen", TrustBoost: 0.45, Enabled: true}, // Portale {ID: uuid.New().String(), URL: "https://www.lehrer-online.de", Category: "portals", Name: "Lehrer-Online", Description: "Unterrichtsmaterialien", TrustBoost: 0.20, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.4teachers.de", Category: "portals", Name: "4teachers", Description: "Lehrercommunity", TrustBoost: 0.20, Enabled: true}, {ID: uuid.New().String(), URL: "https://www.zum.de", Category: "portals", Name: "ZUM", Description: "Zentrale für Unterrichtsmedien", TrustBoost: 0.25, Enabled: true}, } s.mu.Lock() defer s.mu.Unlock() now := time.Now() for _, seed := range defaultSeeds { seed.CreatedAt = now seed.UpdatedAt = now s.seeds[seed.ID] = seed } return s.saveToFile() } func (s *SeedStore) saveToFile() error { seeds := make([]SeedURL, 0, len(s.seeds)) for _, seed := range s.seeds { seeds = append(seeds, seed) } data, err := json.MarshalIndent(seeds, "", " ") if err != nil { return err } return os.WriteFile(s.filePath, data, 0644) } // GetAllSeeds returns all seeds func (s *SeedStore) GetAllSeeds() []SeedURL { s.mu.RLock() defer s.mu.RUnlock() seeds := make([]SeedURL, 0, len(s.seeds)) for _, seed := range s.seeds { seeds = append(seeds, seed) } return seeds } // GetSeed returns a single seed by ID func (s *SeedStore) GetSeed(id string) (SeedURL, bool) { s.mu.RLock() defer s.mu.RUnlock() seed, ok := s.seeds[id] return seed, ok } // CreateSeed adds a new seed func (s *SeedStore) CreateSeed(seed SeedURL) (SeedURL, error) { s.mu.Lock() defer s.mu.Unlock() seed.ID = uuid.New().String() seed.CreatedAt = time.Now() seed.UpdatedAt = time.Now() s.seeds[seed.ID] = seed if err := s.saveToFile(); err != nil { delete(s.seeds, seed.ID) return SeedURL{}, err } return seed, nil } // UpdateSeed updates an existing seed func (s *SeedStore) UpdateSeed(id string, updates SeedURL) (SeedURL, bool, error) { s.mu.Lock() defer s.mu.Unlock() seed, ok := s.seeds[id] if !ok { return SeedURL{}, false, nil } // Update fields if updates.URL != "" { seed.URL = updates.URL } if updates.Name != "" { seed.Name = updates.Name } if updates.Category != "" { seed.Category = updates.Category } if updates.Description != "" { seed.Description = updates.Description } seed.TrustBoost = updates.TrustBoost seed.Enabled = updates.Enabled seed.UpdatedAt = time.Now() s.seeds[id] = seed if err := s.saveToFile(); err != nil { return SeedURL{}, true, err } return seed, true, nil } // DeleteSeed removes a seed func (s *SeedStore) DeleteSeed(id string) bool { s.mu.Lock() defer s.mu.Unlock() if _, ok := s.seeds[id]; !ok { return false } delete(s.seeds, id) s.saveToFile() return true } // Admin Handlers // GetSeeds returns all seed URLs func (h *Handler) GetSeeds(c *gin.Context) { if seedStore == nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"}) return } seeds := seedStore.GetAllSeeds() c.JSON(http.StatusOK, seeds) } // CreateSeed adds a new seed URL func (h *Handler) CreateSeed(c *gin.Context) { if seedStore == nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"}) return } var seed SeedURL if err := c.ShouldBindJSON(&seed); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()}) return } if seed.URL == "" { c.JSON(http.StatusBadRequest, gin.H{"error": "URL is required"}) return } created, err := seedStore.CreateSeed(seed) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create seed", "details": err.Error()}) return } c.JSON(http.StatusCreated, created) } // UpdateSeed updates an existing seed URL func (h *Handler) UpdateSeed(c *gin.Context) { if seedStore == nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"}) return } id := c.Param("id") if id == "" { c.JSON(http.StatusBadRequest, gin.H{"error": "Seed ID required"}) return } var updates SeedURL if err := c.ShouldBindJSON(&updates); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()}) return } updated, found, err := seedStore.UpdateSeed(id, updates) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update seed", "details": err.Error()}) return } if !found { c.JSON(http.StatusNotFound, gin.H{"error": "Seed not found"}) return } c.JSON(http.StatusOK, updated) } // DeleteSeed removes a seed URL func (h *Handler) DeleteSeed(c *gin.Context) { if seedStore == nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"}) return } id := c.Param("id") if id == "" { c.JSON(http.StatusBadRequest, gin.H{"error": "Seed ID required"}) return } if !seedStore.DeleteSeed(id) { c.JSON(http.StatusNotFound, gin.H{"error": "Seed not found"}) return } c.JSON(http.StatusOK, gin.H{"deleted": true, "id": id}) } // GetStats returns crawl statistics func (h *Handler) GetStats(c *gin.Context) { // Get document count from OpenSearch totalDocs := 0 // TODO: Get real count from OpenSearch seeds := []SeedURL{} if seedStore != nil { seeds = seedStore.GetAllSeeds() } enabledSeeds := 0 for _, seed := range seeds { if seed.Enabled { enabledSeeds++ } } stats := CrawlStats{ TotalDocuments: totalDocs, TotalSeeds: enabledSeeds, LastCrawlTime: lastCrawlTime, CrawlStatus: crawlStatus, DocumentsPerCategory: map[string]int{ "federal": 0, "states": 0, "science": 0, "universities": 0, "portals": 0, }, DocumentsPerDocType: map[string]int{ "Lehrplan": 0, "Arbeitsblatt": 0, "Unterrichtsentwurf": 0, "Erlass_Verordnung": 0, "Pruefung_Abitur": 0, "Studie_Bericht": 0, "Sonstiges": 0, }, AvgTrustScore: 0.0, } c.JSON(http.StatusOK, stats) } // StartCrawl initiates a crawl run func (h *Handler) StartCrawl(c *gin.Context) { if crawlStatus == "running" { c.JSON(http.StatusConflict, gin.H{"error": "Crawl already running"}) return } crawlStatus = "running" // TODO: Start actual crawl in background goroutine go func() { time.Sleep(5 * time.Second) // Simulate crawl now := time.Now().Format(time.RFC3339) lastCrawlTime = &now crawlStatus = "idle" }() c.JSON(http.StatusAccepted, gin.H{ "status": "started", "message": "Crawl initiated", }) } // SetupAdminRoutes configures admin API routes func SetupAdminRoutes(r *gin.RouterGroup, h *Handler) { admin := r.Group("/admin") { // Seeds CRUD admin.GET("/seeds", h.GetSeeds) admin.POST("/seeds", h.CreateSeed) admin.PUT("/seeds/:id", h.UpdateSeed) admin.DELETE("/seeds/:id", h.DeleteSeed) // Stats admin.GET("/stats", h.GetStats) // Crawl control admin.POST("/crawl/start", h.StartCrawl) } }