Files
breakpilot-lehrer/edu-search-service/internal/crawler/api_client_test.go
Benjamin Boenisch 414e0f5ec0
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
feat: edu-search-service migriert, voice-service/geo-service entfernt
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor)
- opensearch + edu-search-service in docker-compose.yml hinzugefuegt
- voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core)
- geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt)
- CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt
  (Go lint, test mit go mod download, build, SBOM)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 18:36:38 +01:00

429 lines
11 KiB
Go

package crawler
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestNewAPIClient(t *testing.T) {
client := NewAPIClient("http://backend:8000")
if client == nil {
t.Fatal("Expected non-nil client")
}
if client.baseURL != "http://backend:8000" {
t.Errorf("Expected baseURL 'http://backend:8000', got '%s'", client.baseURL)
}
if client.httpClient == nil {
t.Fatal("Expected non-nil httpClient")
}
}
func TestFetchSeeds_Success(t *testing.T) {
// Create mock server
mockResponse := SeedsExportResponse{
Seeds: []SeedFromAPI{
{
URL: "https://www.kmk.org",
Trust: 0.8,
Source: "GOV",
Scope: "FEDERAL",
State: "",
Depth: 3,
Category: "federal",
},
{
URL: "https://www.km-bw.de",
Trust: 0.7,
Source: "GOV",
Scope: "STATE",
State: "BW",
Depth: 2,
Category: "states",
},
},
Total: 2,
ExportedAt: "2025-01-17T10:00:00Z",
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Verify request path
if r.URL.Path != "/v1/edu-search/seeds/export/for-crawler" {
t.Errorf("Expected path '/v1/edu-search/seeds/export/for-crawler', got '%s'", r.URL.Path)
}
// Verify headers
if r.Header.Get("Accept") != "application/json" {
t.Errorf("Expected Accept header 'application/json', got '%s'", r.Header.Get("Accept"))
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(mockResponse)
}))
defer server.Close()
// Test
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
result, err := client.FetchSeeds(ctx)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if result.Total != 2 {
t.Errorf("Expected 2 seeds, got %d", result.Total)
}
if len(result.Seeds) != 2 {
t.Fatalf("Expected 2 seeds in array, got %d", len(result.Seeds))
}
// Verify first seed
if result.Seeds[0].URL != "https://www.kmk.org" {
t.Errorf("Expected URL 'https://www.kmk.org', got '%s'", result.Seeds[0].URL)
}
if result.Seeds[0].Trust != 0.8 {
t.Errorf("Expected Trust 0.8, got %f", result.Seeds[0].Trust)
}
if result.Seeds[0].Source != "GOV" {
t.Errorf("Expected Source 'GOV', got '%s'", result.Seeds[0].Source)
}
// Verify second seed with state
if result.Seeds[1].State != "BW" {
t.Errorf("Expected State 'BW', got '%s'", result.Seeds[1].State)
}
}
func TestFetchSeeds_ServerError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte("Internal server error"))
}))
defer server.Close()
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_, err := client.FetchSeeds(ctx)
if err == nil {
t.Fatal("Expected error for server error response")
}
}
func TestFetchSeeds_InvalidJSON(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("not valid json"))
}))
defer server.Close()
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_, err := client.FetchSeeds(ctx)
if err == nil {
t.Fatal("Expected error for invalid JSON response")
}
}
func TestFetchSeeds_Timeout(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Simulate slow response
time.Sleep(2 * time.Second)
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
client := NewAPIClient(server.URL)
// Very short timeout
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
_, err := client.FetchSeeds(ctx)
if err == nil {
t.Fatal("Expected timeout error")
}
}
func TestFetchSeeds_EmptyResponse(t *testing.T) {
mockResponse := SeedsExportResponse{
Seeds: []SeedFromAPI{},
Total: 0,
ExportedAt: "2025-01-17T10:00:00Z",
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(mockResponse)
}))
defer server.Close()
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
result, err := client.FetchSeeds(ctx)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if result.Total != 0 {
t.Errorf("Expected 0 seeds, got %d", result.Total)
}
if len(result.Seeds) != 0 {
t.Errorf("Expected empty seeds array, got %d", len(result.Seeds))
}
}
// Tests for Crawl Status Reporting
func TestReportStatus_Success(t *testing.T) {
var receivedReport CrawlStatusReport
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Verify request method and path
if r.Method != "POST" {
t.Errorf("Expected POST method, got %s", r.Method)
}
if r.URL.Path != "/v1/edu-search/seeds/crawl-status" {
t.Errorf("Expected path '/v1/edu-search/seeds/crawl-status', got '%s'", r.URL.Path)
}
if r.Header.Get("Content-Type") != "application/json" {
t.Errorf("Expected Content-Type 'application/json', got '%s'", r.Header.Get("Content-Type"))
}
// Parse body
json.NewDecoder(r.Body).Decode(&receivedReport)
// Send response
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(CrawlStatusResponse{
Success: true,
SeedURL: receivedReport.SeedURL,
Message: "Status updated",
})
}))
defer server.Close()
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
report := &CrawlStatusReport{
SeedURL: "https://www.kmk.org",
Status: "success",
DocumentsCrawled: 42,
CrawlDuration: 15.5,
}
err := client.ReportStatus(ctx, report)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
// Verify the report was sent correctly
if receivedReport.SeedURL != "https://www.kmk.org" {
t.Errorf("Expected SeedURL 'https://www.kmk.org', got '%s'", receivedReport.SeedURL)
}
if receivedReport.Status != "success" {
t.Errorf("Expected Status 'success', got '%s'", receivedReport.Status)
}
if receivedReport.DocumentsCrawled != 42 {
t.Errorf("Expected DocumentsCrawled 42, got %d", receivedReport.DocumentsCrawled)
}
}
func TestReportStatus_ServerError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte("Internal server error"))
}))
defer server.Close()
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
report := &CrawlStatusReport{
SeedURL: "https://www.kmk.org",
Status: "success",
}
err := client.ReportStatus(ctx, report)
if err == nil {
t.Fatal("Expected error for server error response")
}
}
func TestReportStatus_NotFound(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
w.Write([]byte(`{"detail": "Seed nicht gefunden"}`))
}))
defer server.Close()
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
report := &CrawlStatusReport{
SeedURL: "https://unknown.example.com",
Status: "error",
}
err := client.ReportStatus(ctx, report)
if err == nil {
t.Fatal("Expected error for 404 response")
}
}
func TestReportStatusBulk_Success(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Verify request method and path
if r.Method != "POST" {
t.Errorf("Expected POST method, got %s", r.Method)
}
if r.URL.Path != "/v1/edu-search/seeds/crawl-status/bulk" {
t.Errorf("Expected path '/v1/edu-search/seeds/crawl-status/bulk', got '%s'", r.URL.Path)
}
// Parse body
var payload struct {
Updates []*CrawlStatusReport `json:"updates"`
}
json.NewDecoder(r.Body).Decode(&payload)
// Send response
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(BulkCrawlStatusResponse{
Updated: len(payload.Updates),
Failed: 0,
Errors: []string{},
})
}))
defer server.Close()
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
reports := []*CrawlStatusReport{
{
SeedURL: "https://www.kmk.org",
Status: "success",
DocumentsCrawled: 42,
},
{
SeedURL: "https://www.km-bw.de",
Status: "partial",
DocumentsCrawled: 15,
},
}
result, err := client.ReportStatusBulk(ctx, reports)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if result.Updated != 2 {
t.Errorf("Expected 2 updated, got %d", result.Updated)
}
if result.Failed != 0 {
t.Errorf("Expected 0 failed, got %d", result.Failed)
}
}
func TestReportStatusBulk_PartialFailure(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(BulkCrawlStatusResponse{
Updated: 1,
Failed: 1,
Errors: []string{"Seed nicht gefunden: https://unknown.example.com"},
})
}))
defer server.Close()
client := NewAPIClient(server.URL)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
reports := []*CrawlStatusReport{
{SeedURL: "https://www.kmk.org", Status: "success"},
{SeedURL: "https://unknown.example.com", Status: "error"},
}
result, err := client.ReportStatusBulk(ctx, reports)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if result.Updated != 1 {
t.Errorf("Expected 1 updated, got %d", result.Updated)
}
if result.Failed != 1 {
t.Errorf("Expected 1 failed, got %d", result.Failed)
}
if len(result.Errors) != 1 {
t.Errorf("Expected 1 error, got %d", len(result.Errors))
}
}
func TestCrawlStatusReport_Struct(t *testing.T) {
report := CrawlStatusReport{
SeedURL: "https://www.example.com",
Status: "success",
DocumentsCrawled: 100,
ErrorMessage: "",
CrawlDuration: 25.5,
}
// Test JSON marshaling
data, err := json.Marshal(report)
if err != nil {
t.Fatalf("Failed to marshal: %v", err)
}
var decoded CrawlStatusReport
if err := json.Unmarshal(data, &decoded); err != nil {
t.Fatalf("Failed to unmarshal: %v", err)
}
if decoded.SeedURL != report.SeedURL {
t.Errorf("SeedURL mismatch")
}
if decoded.Status != report.Status {
t.Errorf("Status mismatch")
}
if decoded.DocumentsCrawled != report.DocumentsCrawled {
t.Errorf("DocumentsCrawled mismatch")
}
if decoded.CrawlDuration != report.CrawlDuration {
t.Errorf("CrawlDuration mismatch")
}
}