package config import ( "os" "strconv" ) type Config struct { // Server Port string // OpenSearch OpenSearchURL string OpenSearchUsername string OpenSearchPassword string IndexName string // Crawler UserAgent string RateLimitPerSec float64 MaxDepth int MaxPagesPerRun int // Paths SeedsDir string RulesDir string // API APIKey string // Backend Integration BackendURL string // URL to Python Backend for Seeds API SeedsFromAPI bool // If true, fetch seeds from API instead of files // Embedding/Semantic Search EmbeddingProvider string // "openai", "ollama", or "none" OpenAIAPIKey string // API Key for OpenAI embeddings EmbeddingModel string // Model name (e.g., "text-embedding-3-small") EmbeddingDimension int // Vector dimension (1536 for OpenAI small) OllamaURL string // Ollama base URL for local embeddings SemanticSearchEnabled bool // Enable semantic search features // Scheduler SchedulerEnabled bool // Enable automatic crawl scheduling SchedulerInterval string // Crawl interval (e.g., "24h", "168h" for weekly) // PostgreSQL (for Staff/Publications database) DBHost string DBPort string DBUser string DBPassword string DBName string DBSSLMode string // Staff Crawler StaffCrawlerEmail string // Contact email for CrossRef polite pool } func Load() *Config { return &Config{ Port: getEnv("PORT", "8084"), OpenSearchURL: getEnv("OPENSEARCH_URL", "http://opensearch:9200"), OpenSearchUsername: getEnv("OPENSEARCH_USERNAME", "admin"), OpenSearchPassword: getEnv("OPENSEARCH_PASSWORD", "admin"), IndexName: getEnv("INDEX_NAME", "bp_documents_v1"), UserAgent: getEnv("USER_AGENT", "BreakpilotEduCrawler/1.0 (+contact: security@breakpilot.com)"), RateLimitPerSec: getEnvFloat("RATE_LIMIT_PER_SEC", 0.2), MaxDepth: getEnvInt("MAX_DEPTH", 4), MaxPagesPerRun: getEnvInt("MAX_PAGES_PER_RUN", 500), SeedsDir: getEnv("SEEDS_DIR", "./seeds"), RulesDir: getEnv("RULES_DIR", "./rules"), APIKey: getEnv("EDU_SEARCH_API_KEY", ""), BackendURL: getEnv("BACKEND_URL", "http://backend:8000"), SeedsFromAPI: getEnvBool("SEEDS_FROM_API", true), // Embedding/Semantic Search EmbeddingProvider: getEnv("EMBEDDING_PROVIDER", "none"), // "openai", "ollama", or "none" OpenAIAPIKey: getEnv("OPENAI_API_KEY", ""), EmbeddingModel: getEnv("EMBEDDING_MODEL", "text-embedding-3-small"), EmbeddingDimension: getEnvInt("EMBEDDING_DIMENSION", 1536), OllamaURL: getEnv("OLLAMA_URL", "http://ollama:11434"), SemanticSearchEnabled: getEnvBool("SEMANTIC_SEARCH_ENABLED", false), // Scheduler SchedulerEnabled: getEnvBool("SCHEDULER_ENABLED", false), SchedulerInterval: getEnv("SCHEDULER_INTERVAL", "24h"), // PostgreSQL DBHost: getEnv("DB_HOST", "postgres"), DBPort: getEnv("DB_PORT", "5432"), DBUser: getEnv("DB_USER", "postgres"), DBPassword: getEnv("DB_PASSWORD", "postgres"), DBName: getEnv("DB_NAME", "breakpilot"), DBSSLMode: getEnv("DB_SSLMODE", "disable"), // Staff Crawler StaffCrawlerEmail: getEnv("STAFF_CRAWLER_EMAIL", "crawler@breakpilot.de"), } } func getEnvBool(key string, fallback bool) bool { if value := os.Getenv(key); value != "" { return value == "true" || value == "1" || value == "yes" } return fallback } func getEnv(key, fallback string) string { if value := os.Getenv(key); value != "" { return value } return fallback } func getEnvInt(key string, fallback int) int { if value := os.Getenv(key); value != "" { if i, err := strconv.Atoi(value); err == nil { return i } } return fallback } func getEnvFloat(key string, fallback float64) float64 { if value := os.Getenv(key); value != "" { if f, err := strconv.ParseFloat(value, 64); err == nil { return f } } return fallback }