# Document Crawler - Auto-Onboarding Service FROM python:3.11-slim LABEL maintainer="BreakPilot " LABEL description="Document Crawler & Auto-Onboarding Service" WORKDIR /app # Install curl for healthcheck and system dependencies for document extraction RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ && rm -rf /var/lib/apt/lists/* # Install Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application COPY . . # Environment variables ENV PORT=8098 ENV DATABASE_URL=postgresql://breakpilot:breakpilot123@bp-core-postgres:5432/breakpilot_db ENV LLM_GATEWAY_URL=http://ai-compliance-sdk:8090 ENV DSMS_GATEWAY_URL=http://dsms-gateway:8082 ENV CRAWL_BASE_PATH=/data/crawl ENV MAX_FILE_SIZE_MB=50 # Expose port EXPOSE 8098 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \ CMD curl -f http://localhost:8098/health || exit 1 # Run application CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8098"]