feat: BreakPilot PWA - Full codebase (clean push without large binaries)
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
This commit is contained in:
183
edu-search-service/internal/crawler/api_client.go
Normal file
183
edu-search-service/internal/crawler/api_client.go
Normal file
@@ -0,0 +1,183 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SeedFromAPI represents a seed URL from the Backend API
|
||||
type SeedFromAPI struct {
|
||||
URL string `json:"url"`
|
||||
Trust float64 `json:"trust"`
|
||||
Source string `json:"source"` // GOV, EDU, UNI, etc.
|
||||
Scope string `json:"scope"` // FEDERAL, STATE, etc.
|
||||
State string `json:"state"` // BW, BY, etc. (optional)
|
||||
Depth int `json:"depth"` // Crawl depth for this seed
|
||||
Category string `json:"category"` // Category name
|
||||
}
|
||||
|
||||
// SeedsExportResponse represents the API response from /seeds/export/for-crawler
|
||||
type SeedsExportResponse struct {
|
||||
Seeds []SeedFromAPI `json:"seeds"`
|
||||
Total int `json:"total"`
|
||||
ExportedAt string `json:"exported_at"`
|
||||
}
|
||||
|
||||
// APIClient handles communication with the Python Backend
|
||||
type APIClient struct {
|
||||
baseURL string
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
// NewAPIClient creates a new API client for fetching seeds
|
||||
func NewAPIClient(backendURL string) *APIClient {
|
||||
return &APIClient{
|
||||
baseURL: backendURL,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// FetchSeeds retrieves enabled seeds from the Backend API
|
||||
func (c *APIClient) FetchSeeds(ctx context.Context) (*SeedsExportResponse, error) {
|
||||
url := fmt.Sprintf("%s/v1/edu-search/seeds/export/for-crawler", c.baseURL)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Accept", "application/json")
|
||||
req.Header.Set("User-Agent", "EduSearchCrawler/1.0")
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch seeds: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("API returned status %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
var result SeedsExportResponse
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
// CrawlStatusReport represents a crawl status to report to the Backend
|
||||
type CrawlStatusReport struct {
|
||||
SeedURL string `json:"seed_url"`
|
||||
Status string `json:"status"` // "success", "error", "partial"
|
||||
DocumentsCrawled int `json:"documents_crawled"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
CrawlDuration float64 `json:"crawl_duration_seconds"`
|
||||
}
|
||||
|
||||
// CrawlStatusResponse represents the response from crawl status endpoint
|
||||
type CrawlStatusResponse struct {
|
||||
Success bool `json:"success"`
|
||||
SeedURL string `json:"seed_url"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// BulkCrawlStatusResponse represents the response from bulk crawl status endpoint
|
||||
type BulkCrawlStatusResponse struct {
|
||||
Updated int `json:"updated"`
|
||||
Failed int `json:"failed"`
|
||||
Errors []string `json:"errors"`
|
||||
}
|
||||
|
||||
// ReportStatus sends crawl status for a single seed to the Backend
|
||||
func (c *APIClient) ReportStatus(ctx context.Context, report *CrawlStatusReport) error {
|
||||
url := fmt.Sprintf("%s/v1/edu-search/seeds/crawl-status", c.baseURL)
|
||||
|
||||
body, err := json.Marshal(report)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal report: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
req.Header.Set("User-Agent", "EduSearchCrawler/1.0")
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to report status: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("API returned status %d: %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReportStatusBulk sends crawl status for multiple seeds in one request
|
||||
func (c *APIClient) ReportStatusBulk(ctx context.Context, reports []*CrawlStatusReport) (*BulkCrawlStatusResponse, error) {
|
||||
url := fmt.Sprintf("%s/v1/edu-search/seeds/crawl-status/bulk", c.baseURL)
|
||||
|
||||
payload := struct {
|
||||
Updates []*CrawlStatusReport `json:"updates"`
|
||||
}{
|
||||
Updates: reports,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal reports: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
req.Header.Set("User-Agent", "EduSearchCrawler/1.0")
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to report status: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("API returned status %d: %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
var result BulkCrawlStatusResponse
|
||||
if err := json.Unmarshal(respBody, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
Reference in New Issue
Block a user