diff --git a/.gitea/workflows/rag-ingest.yaml b/.gitea/workflows/rag-ingest.yaml index 06c52b9..4ad92e8 100644 --- a/.gitea/workflows/rag-ingest.yaml +++ b/.gitea/workflows/rag-ingest.yaml @@ -30,6 +30,16 @@ jobs: run: | git clone --depth 1 --branch main ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git . + - name: Join breakpilot-network + run: | + # Runner-Container ans breakpilot-network anhaengen, + # damit bp-core-rag-service erreichbar ist + CONTAINER_ID=$(cat /etc/hostname) + echo "Runner container: $CONTAINER_ID" + docker network connect breakpilot-network "$CONTAINER_ID" 2>/dev/null \ + && echo "Verbunden mit breakpilot-network" \ + || echo "WARNUNG: breakpilot-network nicht verfuegbar" + - name: Run Ingestion run: | set -euo pipefail @@ -38,46 +48,25 @@ jobs: echo "=== RAG Ingestion: Phase ${PHASE} ===" echo "" - # Code wurde im Checkout-Step ins aktuelle Verzeichnis geklont - # Kein cd noetig — wir sind bereits im Workspace - export WORK_DIR="/tmp/rag-ingestion" export QDRANT_URL="https://qdrant-dev.breakpilot.ai" - - # RAG-Service: Versuche Container-Netzwerk, dann Host-Netzwerk export RAG_URL="http://bp-core-rag-service:8097/api/v1/documents/upload" export SDK_URL="http://bp-compliance-ai-sdk:8090" - # Source .env aus Deploy-Dir falls vorhanden (fuer DB_URL) - DEPLOY_ENV="/opt/breakpilot-compliance/.env" - if [ -f "$DEPLOY_ENV" ]; then - export DB_URL=$(grep COMPLIANCE_DATABASE_URL "$DEPLOY_ENV" 2>/dev/null | cut -d= -f2- || echo "") - fi - mkdir -p "$WORK_DIR"/{pdfs,repos,texts} echo "RAG API: $RAG_URL" echo "Qdrant: $QDRANT_URL" echo "Work Dir: $WORK_DIR" - echo "PWD: $(pwd)" echo "" - # Health Check: RAG erreichbar? - # Runner muss im breakpilot-network sein fuer bp-core-rag-service - # Fallback: Host-Netzwerk via localhost (act_runner laeuft auf dem Host) + # Health Check: RAG ueber Container-Netzwerk erreichbar? if ! curl -sf "$RAG_URL" -X POST -o /dev/null 2>/dev/null; then - echo "RAG API nicht im Container-Netzwerk erreichbar." - echo "Versuche localhost:8097..." - export RAG_URL="http://localhost:8097/api/v1/documents/upload" - export SDK_URL="http://localhost:8090" - if ! curl -sf "$RAG_URL" -X POST -o /dev/null 2>/dev/null; then - echo "Versuche host.docker.internal:8097..." - export RAG_URL="http://host.docker.internal:8097/api/v1/documents/upload" - export SDK_URL="http://host.docker.internal:8090" - fi + echo "FEHLER: RAG API nicht erreichbar unter $RAG_URL" + echo "Stelle sicher, dass bp-core-rag-service laeuft und breakpilot-network existiert." + exit 1 fi - - echo "Finale RAG URL: $RAG_URL" + echo "RAG API erreichbar." echo "" if [ "$PHASE" = "all" ]; then diff --git a/scripts/ingest-legal-corpus.sh b/scripts/ingest-legal-corpus.sh index 4a0552e..843c17f 100755 --- a/scripts/ingest-legal-corpus.sh +++ b/scripts/ingest-legal-corpus.sh @@ -14,8 +14,8 @@ set -euo pipefail # --- Configuration ----------------------------------------------------------- WORK_DIR="${WORK_DIR:-$HOME/rag-ingestion}" -RAG_URL="https://localhost:8097/api/v1/documents/upload" -QDRANT_URL="http://localhost:6333" +RAG_URL="${RAG_URL:-https://localhost:8097/api/v1/documents/upload}" +QDRANT_URL="${QDRANT_URL:-http://localhost:6333}" SDK_URL="${SDK_URL:-https://localhost:8093}" DB_URL="${DB_URL:-postgresql://localhost:5432/breakpilot?search_path=compliance,core,public}" CURL_OPTS="-sk --connect-timeout 10 --max-time 300"