diff --git a/.gitea/workflows/rag-ingest.yaml b/.gitea/workflows/rag-ingest.yaml index 1a55348..ef56aa9 100644 --- a/.gitea/workflows/rag-ingest.yaml +++ b/.gitea/workflows/rag-ingest.yaml @@ -20,49 +20,56 @@ on: jobs: ingest: runs-on: docker - container: python:3.12-slim + container: docker:27-cli steps: - name: Setup run: | - apt-get update -qq && apt-get install -y -qq git curl > /dev/null 2>&1 + apk add --no-cache git curl bash python3 > /dev/null 2>&1 - name: Checkout run: | git clone --depth 1 --branch main ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git . - - name: Check RAG service - run: | - # RAG-Service laeuft auf dem Host, nicht im Container - # Qdrant ist extern erreichbar - echo "Checking Qdrant..." - curl -sf "${QDRANT_URL}/collections" > /dev/null 2>&1 \ - && echo "Qdrant: OK" \ - || echo "WARNUNG: Qdrant nicht erreichbar (${QDRANT_URL})" - - echo "Checking RAG API..." - curl -sf -k "${RAG_URL}" -X POST 2>/dev/null | head -c 200 \ - && echo "" && echo "RAG API: OK" \ - || echo "WARNUNG: RAG API nicht erreichbar (${RAG_URL})" - env: - QDRANT_URL: "https://qdrant-dev.breakpilot.ai" - RAG_URL: "https://localhost:8097/api/v1/documents/upload" - - name: Run Ingestion run: | set -euo pipefail PHASE="${{ github.event.inputs.phase }}" + DEPLOY_DIR="/opt/breakpilot-compliance" echo "=== RAG Ingestion: Phase ${PHASE} ===" echo "" - # Konfiguration fuer Hetzner + # Script aus dem Deploy-Dir nutzen (aktuellster Stand nach git pull) + cd "${DEPLOY_DIR}" + + # RAG-Service laeuft im Docker-Netzwerk als bp-core-rag-service + # Qdrant ist extern erreichbar export WORK_DIR="/tmp/rag-ingestion" - export RAG_URL="https://localhost:8097/api/v1/documents/upload" + export RAG_URL="http://bp-core-rag-service:8097/api/v1/documents/upload" export QDRANT_URL="https://qdrant-dev.breakpilot.ai" - export SDK_URL="https://localhost:8093" + export SDK_URL="http://bp-compliance-ai-sdk:8090" + + # Source .env fuer DB_URL falls vorhanden + if [ -f .env ]; then + export DB_URL=$(grep COMPLIANCE_DATABASE_URL .env 2>/dev/null | cut -d= -f2- || echo "") + fi mkdir -p "$WORK_DIR"/{pdfs,repos,texts} + echo "RAG API: $RAG_URL" + echo "Qdrant: $QDRANT_URL" + echo "Work Dir: $WORK_DIR" + echo "" + + # Health Check: RAG erreichbar? + # Runner muss im breakpilot-network sein fuer bp-core-rag-service + # Fallback: Host-Netzwerk via host.docker.internal + if ! curl -sf "$RAG_URL" -X POST 2>/dev/null | head -c 100 | grep -q .; then + echo "RAG API nicht im Container-Netzwerk erreichbar." + echo "Versuche Host-Netzwerk (host.docker.internal)..." + export RAG_URL="http://host.docker.internal:8097/api/v1/documents/upload" + fi + if [ "$PHASE" = "all" ]; then bash scripts/ingest-legal-corpus.sh else