All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
407 lines
12 KiB
Go
407 lines
12 KiB
Go
package handlers
|
|
|
|
import (
|
|
"encoding/json"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// SeedURL represents a seed URL configuration
|
|
type SeedURL struct {
|
|
ID string `json:"id"`
|
|
URL string `json:"url"`
|
|
Category string `json:"category"`
|
|
Name string `json:"name"`
|
|
Description string `json:"description"`
|
|
TrustBoost float64 `json:"trustBoost"`
|
|
Enabled bool `json:"enabled"`
|
|
LastCrawled *string `json:"lastCrawled,omitempty"`
|
|
DocumentCount int `json:"documentCount,omitempty"`
|
|
CreatedAt time.Time `json:"createdAt"`
|
|
UpdatedAt time.Time `json:"updatedAt"`
|
|
}
|
|
|
|
// CrawlStats contains crawl statistics
|
|
type CrawlStats struct {
|
|
TotalDocuments int `json:"totalDocuments"`
|
|
TotalSeeds int `json:"totalSeeds"`
|
|
LastCrawlTime *string `json:"lastCrawlTime,omitempty"`
|
|
CrawlStatus string `json:"crawlStatus"`
|
|
DocumentsPerCategory map[string]int `json:"documentsPerCategory"`
|
|
DocumentsPerDocType map[string]int `json:"documentsPerDocType"`
|
|
AvgTrustScore float64 `json:"avgTrustScore"`
|
|
}
|
|
|
|
// SeedStore manages seed URLs in memory and file
|
|
type SeedStore struct {
|
|
seeds map[string]SeedURL
|
|
mu sync.RWMutex
|
|
filePath string
|
|
}
|
|
|
|
var seedStore *SeedStore
|
|
var crawlStatus = "idle"
|
|
var lastCrawlTime *string
|
|
|
|
// InitSeedStore initializes the seed store
|
|
func InitSeedStore(seedsDir string) error {
|
|
seedStore = &SeedStore{
|
|
seeds: make(map[string]SeedURL),
|
|
filePath: filepath.Join(seedsDir, "seeds.json"),
|
|
}
|
|
|
|
// Try to load existing seeds from JSON file
|
|
if err := seedStore.loadFromFile(); err != nil {
|
|
// If file doesn't exist, load from txt files
|
|
return seedStore.loadFromTxtFiles(seedsDir)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *SeedStore) loadFromFile() error {
|
|
data, err := os.ReadFile(s.filePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var seeds []SeedURL
|
|
if err := json.Unmarshal(data, &seeds); err != nil {
|
|
return err
|
|
}
|
|
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
for _, seed := range seeds {
|
|
s.seeds[seed.ID] = seed
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *SeedStore) loadFromTxtFiles(seedsDir string) error {
|
|
// Default seeds from category files
|
|
defaultSeeds := []SeedURL{
|
|
{ID: uuid.New().String(), URL: "https://www.kmk.org", Category: "federal", Name: "Kultusministerkonferenz", Description: "Beschlüsse und Bildungsstandards", TrustBoost: 0.50, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.bildungsserver.de", Category: "federal", Name: "Deutscher Bildungsserver", Description: "Zentrale Bildungsinformationen", TrustBoost: 0.50, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.bpb.de", Category: "federal", Name: "Bundeszentrale politische Bildung", Description: "Politische Bildung", TrustBoost: 0.45, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.bmbf.de", Category: "federal", Name: "BMBF", Description: "Bundesbildungsministerium", TrustBoost: 0.50, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.iqb.hu-berlin.de", Category: "federal", Name: "IQB", Description: "Institut Qualitätsentwicklung", TrustBoost: 0.50, Enabled: true},
|
|
|
|
// Science
|
|
{ID: uuid.New().String(), URL: "https://www.bertelsmann-stiftung.de/de/themen/bildung", Category: "science", Name: "Bertelsmann Stiftung", Description: "Bildungsstudien und Ländermonitor", TrustBoost: 0.40, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.oecd.org/pisa", Category: "science", Name: "PISA-Studien", Description: "Internationale Schulleistungsstudie", TrustBoost: 0.45, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.iea.nl/studies/iea/pirls", Category: "science", Name: "IGLU/PIRLS", Description: "Internationale Grundschul-Lese-Untersuchung", TrustBoost: 0.45, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.iea.nl/studies/iea/timss", Category: "science", Name: "TIMSS", Description: "Trends in International Mathematics and Science Study", TrustBoost: 0.45, Enabled: true},
|
|
|
|
// Bundesländer
|
|
{ID: uuid.New().String(), URL: "https://www.km.bayern.de", Category: "states", Name: "Bayern Kultusministerium", Description: "Lehrpläne Bayern", TrustBoost: 0.45, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.schulministerium.nrw", Category: "states", Name: "NRW Schulministerium", Description: "Lehrpläne NRW", TrustBoost: 0.45, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.berlin.de/sen/bildung", Category: "states", Name: "Berlin Bildung", Description: "Rahmenlehrpläne Berlin", TrustBoost: 0.45, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://kultusministerium.hessen.de", Category: "states", Name: "Hessen Kultusministerium", Description: "Kerncurricula Hessen", TrustBoost: 0.45, Enabled: true},
|
|
|
|
// Portale
|
|
{ID: uuid.New().String(), URL: "https://www.lehrer-online.de", Category: "portals", Name: "Lehrer-Online", Description: "Unterrichtsmaterialien", TrustBoost: 0.20, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.4teachers.de", Category: "portals", Name: "4teachers", Description: "Lehrercommunity", TrustBoost: 0.20, Enabled: true},
|
|
{ID: uuid.New().String(), URL: "https://www.zum.de", Category: "portals", Name: "ZUM", Description: "Zentrale für Unterrichtsmedien", TrustBoost: 0.25, Enabled: true},
|
|
}
|
|
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
now := time.Now()
|
|
for _, seed := range defaultSeeds {
|
|
seed.CreatedAt = now
|
|
seed.UpdatedAt = now
|
|
s.seeds[seed.ID] = seed
|
|
}
|
|
|
|
return s.saveToFile()
|
|
}
|
|
|
|
func (s *SeedStore) saveToFile() error {
|
|
seeds := make([]SeedURL, 0, len(s.seeds))
|
|
for _, seed := range s.seeds {
|
|
seeds = append(seeds, seed)
|
|
}
|
|
|
|
data, err := json.MarshalIndent(seeds, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return os.WriteFile(s.filePath, data, 0644)
|
|
}
|
|
|
|
// GetAllSeeds returns all seeds
|
|
func (s *SeedStore) GetAllSeeds() []SeedURL {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
seeds := make([]SeedURL, 0, len(s.seeds))
|
|
for _, seed := range s.seeds {
|
|
seeds = append(seeds, seed)
|
|
}
|
|
return seeds
|
|
}
|
|
|
|
// GetSeed returns a single seed by ID
|
|
func (s *SeedStore) GetSeed(id string) (SeedURL, bool) {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
seed, ok := s.seeds[id]
|
|
return seed, ok
|
|
}
|
|
|
|
// CreateSeed adds a new seed
|
|
func (s *SeedStore) CreateSeed(seed SeedURL) (SeedURL, error) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
seed.ID = uuid.New().String()
|
|
seed.CreatedAt = time.Now()
|
|
seed.UpdatedAt = time.Now()
|
|
s.seeds[seed.ID] = seed
|
|
|
|
if err := s.saveToFile(); err != nil {
|
|
delete(s.seeds, seed.ID)
|
|
return SeedURL{}, err
|
|
}
|
|
|
|
return seed, nil
|
|
}
|
|
|
|
// UpdateSeed updates an existing seed
|
|
func (s *SeedStore) UpdateSeed(id string, updates SeedURL) (SeedURL, bool, error) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
seed, ok := s.seeds[id]
|
|
if !ok {
|
|
return SeedURL{}, false, nil
|
|
}
|
|
|
|
// Update fields
|
|
if updates.URL != "" {
|
|
seed.URL = updates.URL
|
|
}
|
|
if updates.Name != "" {
|
|
seed.Name = updates.Name
|
|
}
|
|
if updates.Category != "" {
|
|
seed.Category = updates.Category
|
|
}
|
|
if updates.Description != "" {
|
|
seed.Description = updates.Description
|
|
}
|
|
seed.TrustBoost = updates.TrustBoost
|
|
seed.Enabled = updates.Enabled
|
|
seed.UpdatedAt = time.Now()
|
|
|
|
s.seeds[id] = seed
|
|
|
|
if err := s.saveToFile(); err != nil {
|
|
return SeedURL{}, true, err
|
|
}
|
|
|
|
return seed, true, nil
|
|
}
|
|
|
|
// DeleteSeed removes a seed
|
|
func (s *SeedStore) DeleteSeed(id string) bool {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if _, ok := s.seeds[id]; !ok {
|
|
return false
|
|
}
|
|
|
|
delete(s.seeds, id)
|
|
s.saveToFile()
|
|
return true
|
|
}
|
|
|
|
// Admin Handlers
|
|
|
|
// GetSeeds returns all seed URLs
|
|
func (h *Handler) GetSeeds(c *gin.Context) {
|
|
if seedStore == nil {
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"})
|
|
return
|
|
}
|
|
|
|
seeds := seedStore.GetAllSeeds()
|
|
c.JSON(http.StatusOK, seeds)
|
|
}
|
|
|
|
// CreateSeed adds a new seed URL
|
|
func (h *Handler) CreateSeed(c *gin.Context) {
|
|
if seedStore == nil {
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"})
|
|
return
|
|
}
|
|
|
|
var seed SeedURL
|
|
if err := c.ShouldBindJSON(&seed); err != nil {
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
|
|
return
|
|
}
|
|
|
|
if seed.URL == "" {
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "URL is required"})
|
|
return
|
|
}
|
|
|
|
created, err := seedStore.CreateSeed(seed)
|
|
if err != nil {
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create seed", "details": err.Error()})
|
|
return
|
|
}
|
|
|
|
c.JSON(http.StatusCreated, created)
|
|
}
|
|
|
|
// UpdateSeed updates an existing seed URL
|
|
func (h *Handler) UpdateSeed(c *gin.Context) {
|
|
if seedStore == nil {
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"})
|
|
return
|
|
}
|
|
|
|
id := c.Param("id")
|
|
if id == "" {
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "Seed ID required"})
|
|
return
|
|
}
|
|
|
|
var updates SeedURL
|
|
if err := c.ShouldBindJSON(&updates); err != nil {
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
|
|
return
|
|
}
|
|
|
|
updated, found, err := seedStore.UpdateSeed(id, updates)
|
|
if err != nil {
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update seed", "details": err.Error()})
|
|
return
|
|
}
|
|
if !found {
|
|
c.JSON(http.StatusNotFound, gin.H{"error": "Seed not found"})
|
|
return
|
|
}
|
|
|
|
c.JSON(http.StatusOK, updated)
|
|
}
|
|
|
|
// DeleteSeed removes a seed URL
|
|
func (h *Handler) DeleteSeed(c *gin.Context) {
|
|
if seedStore == nil {
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "Seed store not initialized"})
|
|
return
|
|
}
|
|
|
|
id := c.Param("id")
|
|
if id == "" {
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "Seed ID required"})
|
|
return
|
|
}
|
|
|
|
if !seedStore.DeleteSeed(id) {
|
|
c.JSON(http.StatusNotFound, gin.H{"error": "Seed not found"})
|
|
return
|
|
}
|
|
|
|
c.JSON(http.StatusOK, gin.H{"deleted": true, "id": id})
|
|
}
|
|
|
|
// GetStats returns crawl statistics
|
|
func (h *Handler) GetStats(c *gin.Context) {
|
|
// Get document count from OpenSearch
|
|
totalDocs := 0
|
|
// TODO: Get real count from OpenSearch
|
|
|
|
seeds := []SeedURL{}
|
|
if seedStore != nil {
|
|
seeds = seedStore.GetAllSeeds()
|
|
}
|
|
|
|
enabledSeeds := 0
|
|
for _, seed := range seeds {
|
|
if seed.Enabled {
|
|
enabledSeeds++
|
|
}
|
|
}
|
|
|
|
stats := CrawlStats{
|
|
TotalDocuments: totalDocs,
|
|
TotalSeeds: enabledSeeds,
|
|
LastCrawlTime: lastCrawlTime,
|
|
CrawlStatus: crawlStatus,
|
|
DocumentsPerCategory: map[string]int{
|
|
"federal": 0,
|
|
"states": 0,
|
|
"science": 0,
|
|
"universities": 0,
|
|
"portals": 0,
|
|
},
|
|
DocumentsPerDocType: map[string]int{
|
|
"Lehrplan": 0,
|
|
"Arbeitsblatt": 0,
|
|
"Unterrichtsentwurf": 0,
|
|
"Erlass_Verordnung": 0,
|
|
"Pruefung_Abitur": 0,
|
|
"Studie_Bericht": 0,
|
|
"Sonstiges": 0,
|
|
},
|
|
AvgTrustScore: 0.0,
|
|
}
|
|
|
|
c.JSON(http.StatusOK, stats)
|
|
}
|
|
|
|
// StartCrawl initiates a crawl run
|
|
func (h *Handler) StartCrawl(c *gin.Context) {
|
|
if crawlStatus == "running" {
|
|
c.JSON(http.StatusConflict, gin.H{"error": "Crawl already running"})
|
|
return
|
|
}
|
|
|
|
crawlStatus = "running"
|
|
|
|
// TODO: Start actual crawl in background goroutine
|
|
go func() {
|
|
time.Sleep(5 * time.Second) // Simulate crawl
|
|
now := time.Now().Format(time.RFC3339)
|
|
lastCrawlTime = &now
|
|
crawlStatus = "idle"
|
|
}()
|
|
|
|
c.JSON(http.StatusAccepted, gin.H{
|
|
"status": "started",
|
|
"message": "Crawl initiated",
|
|
})
|
|
}
|
|
|
|
// SetupAdminRoutes configures admin API routes
|
|
func SetupAdminRoutes(r *gin.RouterGroup, h *Handler) {
|
|
admin := r.Group("/admin")
|
|
{
|
|
// Seeds CRUD
|
|
admin.GET("/seeds", h.GetSeeds)
|
|
admin.POST("/seeds", h.CreateSeed)
|
|
admin.PUT("/seeds/:id", h.UpdateSeed)
|
|
admin.DELETE("/seeds/:id", h.DeleteSeed)
|
|
|
|
// Stats
|
|
admin.GET("/stats", h.GetStats)
|
|
|
|
// Crawl control
|
|
admin.POST("/crawl/start", h.StartCrawl)
|
|
}
|
|
}
|