# Document Crawler - Auto-Onboarding Service
FROM python:3.11-slim

LABEL maintainer="BreakPilot <dev@breakpilot.app>"
LABEL description="Document Crawler & Auto-Onboarding Service"

WORKDIR /app

# Install curl for healthcheck and system dependencies for document extraction
RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application
COPY . .

# Environment variables
ENV PORT=8098
ENV DATABASE_URL=postgresql://breakpilot:breakpilot123@bp-core-postgres:5432/breakpilot_db
ENV LLM_GATEWAY_URL=http://ai-compliance-sdk:8090
ENV DSMS_GATEWAY_URL=http://dsms-gateway:8082
ENV CRAWL_BASE_PATH=/data/crawl
ENV MAX_FILE_SIZE_MB=50

# Expose port
EXPOSE 8098

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
  CMD curl -f http://localhost:8098/health || exit 1

# Run application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8098"]
