package crawler import ( "context" "encoding/json" "net/http" "net/http/httptest" "testing" "time" ) func TestNewAPIClient(t *testing.T) { client := NewAPIClient("http://backend:8000") if client == nil { t.Fatal("Expected non-nil client") } if client.baseURL != "http://backend:8000" { t.Errorf("Expected baseURL 'http://backend:8000', got '%s'", client.baseURL) } if client.httpClient == nil { t.Fatal("Expected non-nil httpClient") } } func TestFetchSeeds_Success(t *testing.T) { // Create mock server mockResponse := SeedsExportResponse{ Seeds: []SeedFromAPI{ { URL: "https://www.kmk.org", Trust: 0.8, Source: "GOV", Scope: "FEDERAL", State: "", Depth: 3, Category: "federal", }, { URL: "https://www.km-bw.de", Trust: 0.7, Source: "GOV", Scope: "STATE", State: "BW", Depth: 2, Category: "states", }, }, Total: 2, ExportedAt: "2025-01-17T10:00:00Z", } server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Verify request path if r.URL.Path != "/v1/edu-search/seeds/export/for-crawler" { t.Errorf("Expected path '/v1/edu-search/seeds/export/for-crawler', got '%s'", r.URL.Path) } // Verify headers if r.Header.Get("Accept") != "application/json" { t.Errorf("Expected Accept header 'application/json', got '%s'", r.Header.Get("Accept")) } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(mockResponse) })) defer server.Close() // Test client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() result, err := client.FetchSeeds(ctx) if err != nil { t.Fatalf("Unexpected error: %v", err) } if result.Total != 2 { t.Errorf("Expected 2 seeds, got %d", result.Total) } if len(result.Seeds) != 2 { t.Fatalf("Expected 2 seeds in array, got %d", len(result.Seeds)) } // Verify first seed if result.Seeds[0].URL != "https://www.kmk.org" { t.Errorf("Expected URL 'https://www.kmk.org', got '%s'", result.Seeds[0].URL) } if result.Seeds[0].Trust != 0.8 { t.Errorf("Expected Trust 0.8, got %f", result.Seeds[0].Trust) } if result.Seeds[0].Source != "GOV" { t.Errorf("Expected Source 'GOV', got '%s'", result.Seeds[0].Source) } // Verify second seed with state if result.Seeds[1].State != "BW" { t.Errorf("Expected State 'BW', got '%s'", result.Seeds[1].State) } } func TestFetchSeeds_ServerError(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) w.Write([]byte("Internal server error")) })) defer server.Close() client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() _, err := client.FetchSeeds(ctx) if err == nil { t.Fatal("Expected error for server error response") } } func TestFetchSeeds_InvalidJSON(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") w.Write([]byte("not valid json")) })) defer server.Close() client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() _, err := client.FetchSeeds(ctx) if err == nil { t.Fatal("Expected error for invalid JSON response") } } func TestFetchSeeds_Timeout(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Simulate slow response time.Sleep(2 * time.Second) w.WriteHeader(http.StatusOK) })) defer server.Close() client := NewAPIClient(server.URL) // Very short timeout ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) defer cancel() _, err := client.FetchSeeds(ctx) if err == nil { t.Fatal("Expected timeout error") } } func TestFetchSeeds_EmptyResponse(t *testing.T) { mockResponse := SeedsExportResponse{ Seeds: []SeedFromAPI{}, Total: 0, ExportedAt: "2025-01-17T10:00:00Z", } server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(mockResponse) })) defer server.Close() client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() result, err := client.FetchSeeds(ctx) if err != nil { t.Fatalf("Unexpected error: %v", err) } if result.Total != 0 { t.Errorf("Expected 0 seeds, got %d", result.Total) } if len(result.Seeds) != 0 { t.Errorf("Expected empty seeds array, got %d", len(result.Seeds)) } } // Tests for Crawl Status Reporting func TestReportStatus_Success(t *testing.T) { var receivedReport CrawlStatusReport server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Verify request method and path if r.Method != "POST" { t.Errorf("Expected POST method, got %s", r.Method) } if r.URL.Path != "/v1/edu-search/seeds/crawl-status" { t.Errorf("Expected path '/v1/edu-search/seeds/crawl-status', got '%s'", r.URL.Path) } if r.Header.Get("Content-Type") != "application/json" { t.Errorf("Expected Content-Type 'application/json', got '%s'", r.Header.Get("Content-Type")) } // Parse body json.NewDecoder(r.Body).Decode(&receivedReport) // Send response w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(CrawlStatusResponse{ Success: true, SeedURL: receivedReport.SeedURL, Message: "Status updated", }) })) defer server.Close() client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() report := &CrawlStatusReport{ SeedURL: "https://www.kmk.org", Status: "success", DocumentsCrawled: 42, CrawlDuration: 15.5, } err := client.ReportStatus(ctx, report) if err != nil { t.Fatalf("Unexpected error: %v", err) } // Verify the report was sent correctly if receivedReport.SeedURL != "https://www.kmk.org" { t.Errorf("Expected SeedURL 'https://www.kmk.org', got '%s'", receivedReport.SeedURL) } if receivedReport.Status != "success" { t.Errorf("Expected Status 'success', got '%s'", receivedReport.Status) } if receivedReport.DocumentsCrawled != 42 { t.Errorf("Expected DocumentsCrawled 42, got %d", receivedReport.DocumentsCrawled) } } func TestReportStatus_ServerError(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) w.Write([]byte("Internal server error")) })) defer server.Close() client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() report := &CrawlStatusReport{ SeedURL: "https://www.kmk.org", Status: "success", } err := client.ReportStatus(ctx, report) if err == nil { t.Fatal("Expected error for server error response") } } func TestReportStatus_NotFound(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNotFound) w.Write([]byte(`{"detail": "Seed nicht gefunden"}`)) })) defer server.Close() client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() report := &CrawlStatusReport{ SeedURL: "https://unknown.example.com", Status: "error", } err := client.ReportStatus(ctx, report) if err == nil { t.Fatal("Expected error for 404 response") } } func TestReportStatusBulk_Success(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Verify request method and path if r.Method != "POST" { t.Errorf("Expected POST method, got %s", r.Method) } if r.URL.Path != "/v1/edu-search/seeds/crawl-status/bulk" { t.Errorf("Expected path '/v1/edu-search/seeds/crawl-status/bulk', got '%s'", r.URL.Path) } // Parse body var payload struct { Updates []*CrawlStatusReport `json:"updates"` } json.NewDecoder(r.Body).Decode(&payload) // Send response w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(BulkCrawlStatusResponse{ Updated: len(payload.Updates), Failed: 0, Errors: []string{}, }) })) defer server.Close() client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() reports := []*CrawlStatusReport{ { SeedURL: "https://www.kmk.org", Status: "success", DocumentsCrawled: 42, }, { SeedURL: "https://www.km-bw.de", Status: "partial", DocumentsCrawled: 15, }, } result, err := client.ReportStatusBulk(ctx, reports) if err != nil { t.Fatalf("Unexpected error: %v", err) } if result.Updated != 2 { t.Errorf("Expected 2 updated, got %d", result.Updated) } if result.Failed != 0 { t.Errorf("Expected 0 failed, got %d", result.Failed) } } func TestReportStatusBulk_PartialFailure(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(BulkCrawlStatusResponse{ Updated: 1, Failed: 1, Errors: []string{"Seed nicht gefunden: https://unknown.example.com"}, }) })) defer server.Close() client := NewAPIClient(server.URL) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() reports := []*CrawlStatusReport{ {SeedURL: "https://www.kmk.org", Status: "success"}, {SeedURL: "https://unknown.example.com", Status: "error"}, } result, err := client.ReportStatusBulk(ctx, reports) if err != nil { t.Fatalf("Unexpected error: %v", err) } if result.Updated != 1 { t.Errorf("Expected 1 updated, got %d", result.Updated) } if result.Failed != 1 { t.Errorf("Expected 1 failed, got %d", result.Failed) } if len(result.Errors) != 1 { t.Errorf("Expected 1 error, got %d", len(result.Errors)) } } func TestCrawlStatusReport_Struct(t *testing.T) { report := CrawlStatusReport{ SeedURL: "https://www.example.com", Status: "success", DocumentsCrawled: 100, ErrorMessage: "", CrawlDuration: 25.5, } // Test JSON marshaling data, err := json.Marshal(report) if err != nil { t.Fatalf("Failed to marshal: %v", err) } var decoded CrawlStatusReport if err := json.Unmarshal(data, &decoded); err != nil { t.Fatalf("Failed to unmarshal: %v", err) } if decoded.SeedURL != report.SeedURL { t.Errorf("SeedURL mismatch") } if decoded.Status != report.Status { t.Errorf("Status mismatch") } if decoded.DocumentsCrawled != report.DocumentsCrawled { t.Errorf("DocumentsCrawled mismatch") } if decoded.CrawlDuration != report.CrawlDuration { t.Errorf("CrawlDuration mismatch") } }