feat: BreakPilot PWA - Full codebase (clean push without large binaries)
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
This commit is contained in:
324
edu-search-service/internal/robots/robots_test.go
Normal file
324
edu-search-service/internal/robots/robots_test.go
Normal file
@@ -0,0 +1,324 @@
|
||||
package robots
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewChecker(t *testing.T) {
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
if checker == nil {
|
||||
t.Fatal("Expected non-nil checker")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsAllowed_NoRobots(t *testing.T) {
|
||||
// Server that returns 404 for robots.txt
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
allowed, err := checker.IsAllowed(context.Background(), server.URL+"/some/page")
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if !allowed {
|
||||
t.Error("Should be allowed when robots.txt doesn't exist")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsAllowed_AllowAll(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Allow: /
|
||||
`
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/robots.txt" {
|
||||
w.Write([]byte(robotsTxt))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
allowed, _ := checker.IsAllowed(context.Background(), server.URL+"/any/path")
|
||||
|
||||
if !allowed {
|
||||
t.Error("Should be allowed with Allow: /")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsAllowed_DisallowPath(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /private/
|
||||
Disallow: /admin/
|
||||
`
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/robots.txt" {
|
||||
w.Write([]byte(robotsTxt))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
|
||||
// Should be disallowed
|
||||
allowed, _ := checker.IsAllowed(context.Background(), server.URL+"/private/secret")
|
||||
if allowed {
|
||||
t.Error("/private/secret should be disallowed")
|
||||
}
|
||||
|
||||
allowed, _ = checker.IsAllowed(context.Background(), server.URL+"/admin/users")
|
||||
if allowed {
|
||||
t.Error("/admin/users should be disallowed")
|
||||
}
|
||||
|
||||
// Should be allowed
|
||||
allowed, _ = checker.IsAllowed(context.Background(), server.URL+"/public/page")
|
||||
if !allowed {
|
||||
t.Error("/public/page should be allowed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsAllowed_AllowTakesPrecedence(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /api/
|
||||
Allow: /api/public/
|
||||
`
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/robots.txt" {
|
||||
w.Write([]byte(robotsTxt))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
|
||||
// Allow takes precedence
|
||||
allowed, _ := checker.IsAllowed(context.Background(), server.URL+"/api/public/docs")
|
||||
if !allowed {
|
||||
t.Error("/api/public/docs should be allowed (Allow takes precedence)")
|
||||
}
|
||||
|
||||
// Still disallowed
|
||||
allowed, _ = checker.IsAllowed(context.Background(), server.URL+"/api/internal")
|
||||
if allowed {
|
||||
t.Error("/api/internal should be disallowed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsAllowed_SpecificUserAgent(t *testing.T) {
|
||||
robotsTxt := `User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: *
|
||||
Allow: /
|
||||
`
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/robots.txt" {
|
||||
w.Write([]byte(robotsTxt))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("GoodBot/1.0")
|
||||
allowed, _ := checker.IsAllowed(context.Background(), server.URL+"/page")
|
||||
|
||||
if !allowed {
|
||||
t.Error("GoodBot should be allowed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetCrawlDelay(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Crawl-delay: 5
|
||||
`
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/robots.txt" {
|
||||
w.Write([]byte(robotsTxt))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
delay, err := checker.GetCrawlDelay(context.Background(), server.URL+"/page")
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %v", err)
|
||||
}
|
||||
if delay != 5 {
|
||||
t.Errorf("Expected delay 5, got %d", delay)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchPattern_Simple(t *testing.T) {
|
||||
tests := []struct {
|
||||
pattern string
|
||||
path string
|
||||
match bool
|
||||
}{
|
||||
{"/private/", "/private/secret", true},
|
||||
{"/private/", "/public/", false},
|
||||
{"/", "/anything", true},
|
||||
{"", "/anything", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
result := matchPattern(tt.pattern, tt.path)
|
||||
if result != tt.match {
|
||||
t.Errorf("Pattern '%s' vs Path '%s': expected %v, got %v",
|
||||
tt.pattern, tt.path, tt.match, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchPattern_Wildcard(t *testing.T) {
|
||||
tests := []struct {
|
||||
pattern string
|
||||
path string
|
||||
match bool
|
||||
}{
|
||||
{"/*.pdf", "/document.pdf", true},
|
||||
{"/*.pdf", "/folder/doc.pdf", true},
|
||||
{"/*.pdf", "/document.html", false},
|
||||
{"/dir/*/page", "/dir/sub/page", true},
|
||||
{"/dir/*/page", "/dir/other/page", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
result := matchPattern(tt.pattern, tt.path)
|
||||
if result != tt.match {
|
||||
t.Errorf("Pattern '%s' vs Path '%s': expected %v, got %v",
|
||||
tt.pattern, tt.path, tt.match, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchPattern_EndAnchor(t *testing.T) {
|
||||
tests := []struct {
|
||||
pattern string
|
||||
path string
|
||||
match bool
|
||||
}{
|
||||
{"/exact$", "/exact", true},
|
||||
{"/exact$", "/exactmore", false},
|
||||
{"/exact$", "/exact/more", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
result := matchPattern(tt.pattern, tt.path)
|
||||
if result != tt.match {
|
||||
t.Errorf("Pattern '%s' vs Path '%s': expected %v, got %v",
|
||||
tt.pattern, tt.path, tt.match, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheStats(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Allow: /
|
||||
`
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte(robotsTxt))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
|
||||
// Initially empty
|
||||
count, _ := checker.CacheStats()
|
||||
if count != 0 {
|
||||
t.Errorf("Expected 0 cached entries, got %d", count)
|
||||
}
|
||||
|
||||
// Fetch robots.txt
|
||||
checker.IsAllowed(context.Background(), server.URL+"/page")
|
||||
|
||||
// Should have 1 entry
|
||||
count, hosts := checker.CacheStats()
|
||||
if count != 1 {
|
||||
t.Errorf("Expected 1 cached entry, got %d", count)
|
||||
}
|
||||
if len(hosts) != 1 {
|
||||
t.Errorf("Expected 1 host, got %v", hosts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClearCache(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Allow: /
|
||||
`
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte(robotsTxt))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
|
||||
// Populate cache
|
||||
checker.IsAllowed(context.Background(), server.URL+"/page")
|
||||
|
||||
count, _ := checker.CacheStats()
|
||||
if count != 1 {
|
||||
t.Errorf("Expected 1 cached entry, got %d", count)
|
||||
}
|
||||
|
||||
// Clear cache
|
||||
checker.ClearCache()
|
||||
|
||||
count, _ = checker.CacheStats()
|
||||
if count != 0 {
|
||||
t.Errorf("Expected 0 cached entries after clear, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRobotsTxt_Comments(t *testing.T) {
|
||||
robotsTxt := `# This is a comment
|
||||
User-agent: *
|
||||
# Another comment
|
||||
Disallow: /private/ # inline comment
|
||||
Allow: /public/
|
||||
`
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/robots.txt" {
|
||||
w.Write([]byte(robotsTxt))
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
|
||||
allowed, _ := checker.IsAllowed(context.Background(), server.URL+"/public/page")
|
||||
if !allowed {
|
||||
t.Error("/public/page should be allowed")
|
||||
}
|
||||
|
||||
allowed, _ = checker.IsAllowed(context.Background(), server.URL+"/private/page")
|
||||
if allowed {
|
||||
t.Error("/private/page should be disallowed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsAllowed_InvalidURL(t *testing.T) {
|
||||
checker := NewChecker("TestBot/1.0")
|
||||
|
||||
_, err := checker.IsAllowed(context.Background(), "not a valid url ://")
|
||||
if err == nil {
|
||||
t.Error("Expected error for invalid URL")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user