All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
429 lines
11 KiB
Go
429 lines
11 KiB
Go
package crawler
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestNewAPIClient(t *testing.T) {
|
|
client := NewAPIClient("http://backend:8000")
|
|
|
|
if client == nil {
|
|
t.Fatal("Expected non-nil client")
|
|
}
|
|
|
|
if client.baseURL != "http://backend:8000" {
|
|
t.Errorf("Expected baseURL 'http://backend:8000', got '%s'", client.baseURL)
|
|
}
|
|
|
|
if client.httpClient == nil {
|
|
t.Fatal("Expected non-nil httpClient")
|
|
}
|
|
}
|
|
|
|
func TestFetchSeeds_Success(t *testing.T) {
|
|
// Create mock server
|
|
mockResponse := SeedsExportResponse{
|
|
Seeds: []SeedFromAPI{
|
|
{
|
|
URL: "https://www.kmk.org",
|
|
Trust: 0.8,
|
|
Source: "GOV",
|
|
Scope: "FEDERAL",
|
|
State: "",
|
|
Depth: 3,
|
|
Category: "federal",
|
|
},
|
|
{
|
|
URL: "https://www.km-bw.de",
|
|
Trust: 0.7,
|
|
Source: "GOV",
|
|
Scope: "STATE",
|
|
State: "BW",
|
|
Depth: 2,
|
|
Category: "states",
|
|
},
|
|
},
|
|
Total: 2,
|
|
ExportedAt: "2025-01-17T10:00:00Z",
|
|
}
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
// Verify request path
|
|
if r.URL.Path != "/v1/edu-search/seeds/export/for-crawler" {
|
|
t.Errorf("Expected path '/v1/edu-search/seeds/export/for-crawler', got '%s'", r.URL.Path)
|
|
}
|
|
|
|
// Verify headers
|
|
if r.Header.Get("Accept") != "application/json" {
|
|
t.Errorf("Expected Accept header 'application/json', got '%s'", r.Header.Get("Accept"))
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(mockResponse)
|
|
}))
|
|
defer server.Close()
|
|
|
|
// Test
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
result, err := client.FetchSeeds(ctx)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
if result.Total != 2 {
|
|
t.Errorf("Expected 2 seeds, got %d", result.Total)
|
|
}
|
|
|
|
if len(result.Seeds) != 2 {
|
|
t.Fatalf("Expected 2 seeds in array, got %d", len(result.Seeds))
|
|
}
|
|
|
|
// Verify first seed
|
|
if result.Seeds[0].URL != "https://www.kmk.org" {
|
|
t.Errorf("Expected URL 'https://www.kmk.org', got '%s'", result.Seeds[0].URL)
|
|
}
|
|
|
|
if result.Seeds[0].Trust != 0.8 {
|
|
t.Errorf("Expected Trust 0.8, got %f", result.Seeds[0].Trust)
|
|
}
|
|
|
|
if result.Seeds[0].Source != "GOV" {
|
|
t.Errorf("Expected Source 'GOV', got '%s'", result.Seeds[0].Source)
|
|
}
|
|
|
|
// Verify second seed with state
|
|
if result.Seeds[1].State != "BW" {
|
|
t.Errorf("Expected State 'BW', got '%s'", result.Seeds[1].State)
|
|
}
|
|
}
|
|
|
|
func TestFetchSeeds_ServerError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
w.Write([]byte("Internal server error"))
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
_, err := client.FetchSeeds(ctx)
|
|
|
|
if err == nil {
|
|
t.Fatal("Expected error for server error response")
|
|
}
|
|
}
|
|
|
|
func TestFetchSeeds_InvalidJSON(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.Write([]byte("not valid json"))
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
_, err := client.FetchSeeds(ctx)
|
|
|
|
if err == nil {
|
|
t.Fatal("Expected error for invalid JSON response")
|
|
}
|
|
}
|
|
|
|
func TestFetchSeeds_Timeout(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
// Simulate slow response
|
|
time.Sleep(2 * time.Second)
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
// Very short timeout
|
|
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
|
defer cancel()
|
|
|
|
_, err := client.FetchSeeds(ctx)
|
|
|
|
if err == nil {
|
|
t.Fatal("Expected timeout error")
|
|
}
|
|
}
|
|
|
|
func TestFetchSeeds_EmptyResponse(t *testing.T) {
|
|
mockResponse := SeedsExportResponse{
|
|
Seeds: []SeedFromAPI{},
|
|
Total: 0,
|
|
ExportedAt: "2025-01-17T10:00:00Z",
|
|
}
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(mockResponse)
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
result, err := client.FetchSeeds(ctx)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
if result.Total != 0 {
|
|
t.Errorf("Expected 0 seeds, got %d", result.Total)
|
|
}
|
|
|
|
if len(result.Seeds) != 0 {
|
|
t.Errorf("Expected empty seeds array, got %d", len(result.Seeds))
|
|
}
|
|
}
|
|
|
|
// Tests for Crawl Status Reporting
|
|
|
|
func TestReportStatus_Success(t *testing.T) {
|
|
var receivedReport CrawlStatusReport
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
// Verify request method and path
|
|
if r.Method != "POST" {
|
|
t.Errorf("Expected POST method, got %s", r.Method)
|
|
}
|
|
if r.URL.Path != "/v1/edu-search/seeds/crawl-status" {
|
|
t.Errorf("Expected path '/v1/edu-search/seeds/crawl-status', got '%s'", r.URL.Path)
|
|
}
|
|
if r.Header.Get("Content-Type") != "application/json" {
|
|
t.Errorf("Expected Content-Type 'application/json', got '%s'", r.Header.Get("Content-Type"))
|
|
}
|
|
|
|
// Parse body
|
|
json.NewDecoder(r.Body).Decode(&receivedReport)
|
|
|
|
// Send response
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(CrawlStatusResponse{
|
|
Success: true,
|
|
SeedURL: receivedReport.SeedURL,
|
|
Message: "Status updated",
|
|
})
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
report := &CrawlStatusReport{
|
|
SeedURL: "https://www.kmk.org",
|
|
Status: "success",
|
|
DocumentsCrawled: 42,
|
|
CrawlDuration: 15.5,
|
|
}
|
|
|
|
err := client.ReportStatus(ctx, report)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
// Verify the report was sent correctly
|
|
if receivedReport.SeedURL != "https://www.kmk.org" {
|
|
t.Errorf("Expected SeedURL 'https://www.kmk.org', got '%s'", receivedReport.SeedURL)
|
|
}
|
|
if receivedReport.Status != "success" {
|
|
t.Errorf("Expected Status 'success', got '%s'", receivedReport.Status)
|
|
}
|
|
if receivedReport.DocumentsCrawled != 42 {
|
|
t.Errorf("Expected DocumentsCrawled 42, got %d", receivedReport.DocumentsCrawled)
|
|
}
|
|
}
|
|
|
|
func TestReportStatus_ServerError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
w.Write([]byte("Internal server error"))
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
report := &CrawlStatusReport{
|
|
SeedURL: "https://www.kmk.org",
|
|
Status: "success",
|
|
}
|
|
|
|
err := client.ReportStatus(ctx, report)
|
|
|
|
if err == nil {
|
|
t.Fatal("Expected error for server error response")
|
|
}
|
|
}
|
|
|
|
func TestReportStatus_NotFound(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusNotFound)
|
|
w.Write([]byte(`{"detail": "Seed nicht gefunden"}`))
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
report := &CrawlStatusReport{
|
|
SeedURL: "https://unknown.example.com",
|
|
Status: "error",
|
|
}
|
|
|
|
err := client.ReportStatus(ctx, report)
|
|
|
|
if err == nil {
|
|
t.Fatal("Expected error for 404 response")
|
|
}
|
|
}
|
|
|
|
func TestReportStatusBulk_Success(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
// Verify request method and path
|
|
if r.Method != "POST" {
|
|
t.Errorf("Expected POST method, got %s", r.Method)
|
|
}
|
|
if r.URL.Path != "/v1/edu-search/seeds/crawl-status/bulk" {
|
|
t.Errorf("Expected path '/v1/edu-search/seeds/crawl-status/bulk', got '%s'", r.URL.Path)
|
|
}
|
|
|
|
// Parse body
|
|
var payload struct {
|
|
Updates []*CrawlStatusReport `json:"updates"`
|
|
}
|
|
json.NewDecoder(r.Body).Decode(&payload)
|
|
|
|
// Send response
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(BulkCrawlStatusResponse{
|
|
Updated: len(payload.Updates),
|
|
Failed: 0,
|
|
Errors: []string{},
|
|
})
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
reports := []*CrawlStatusReport{
|
|
{
|
|
SeedURL: "https://www.kmk.org",
|
|
Status: "success",
|
|
DocumentsCrawled: 42,
|
|
},
|
|
{
|
|
SeedURL: "https://www.km-bw.de",
|
|
Status: "partial",
|
|
DocumentsCrawled: 15,
|
|
},
|
|
}
|
|
|
|
result, err := client.ReportStatusBulk(ctx, reports)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
if result.Updated != 2 {
|
|
t.Errorf("Expected 2 updated, got %d", result.Updated)
|
|
}
|
|
if result.Failed != 0 {
|
|
t.Errorf("Expected 0 failed, got %d", result.Failed)
|
|
}
|
|
}
|
|
|
|
func TestReportStatusBulk_PartialFailure(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(BulkCrawlStatusResponse{
|
|
Updated: 1,
|
|
Failed: 1,
|
|
Errors: []string{"Seed nicht gefunden: https://unknown.example.com"},
|
|
})
|
|
}))
|
|
defer server.Close()
|
|
|
|
client := NewAPIClient(server.URL)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
reports := []*CrawlStatusReport{
|
|
{SeedURL: "https://www.kmk.org", Status: "success"},
|
|
{SeedURL: "https://unknown.example.com", Status: "error"},
|
|
}
|
|
|
|
result, err := client.ReportStatusBulk(ctx, reports)
|
|
|
|
if err != nil {
|
|
t.Fatalf("Unexpected error: %v", err)
|
|
}
|
|
|
|
if result.Updated != 1 {
|
|
t.Errorf("Expected 1 updated, got %d", result.Updated)
|
|
}
|
|
if result.Failed != 1 {
|
|
t.Errorf("Expected 1 failed, got %d", result.Failed)
|
|
}
|
|
if len(result.Errors) != 1 {
|
|
t.Errorf("Expected 1 error, got %d", len(result.Errors))
|
|
}
|
|
}
|
|
|
|
func TestCrawlStatusReport_Struct(t *testing.T) {
|
|
report := CrawlStatusReport{
|
|
SeedURL: "https://www.example.com",
|
|
Status: "success",
|
|
DocumentsCrawled: 100,
|
|
ErrorMessage: "",
|
|
CrawlDuration: 25.5,
|
|
}
|
|
|
|
// Test JSON marshaling
|
|
data, err := json.Marshal(report)
|
|
if err != nil {
|
|
t.Fatalf("Failed to marshal: %v", err)
|
|
}
|
|
|
|
var decoded CrawlStatusReport
|
|
if err := json.Unmarshal(data, &decoded); err != nil {
|
|
t.Fatalf("Failed to unmarshal: %v", err)
|
|
}
|
|
|
|
if decoded.SeedURL != report.SeedURL {
|
|
t.Errorf("SeedURL mismatch")
|
|
}
|
|
if decoded.Status != report.Status {
|
|
t.Errorf("Status mismatch")
|
|
}
|
|
if decoded.DocumentsCrawled != report.DocumentsCrawled {
|
|
t.Errorf("DocumentsCrawled mismatch")
|
|
}
|
|
if decoded.CrawlDuration != report.CrawlDuration {
|
|
t.Errorf("CrawlDuration mismatch")
|
|
}
|
|
}
|