All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 44s
CI/CD / test-python-document-crawler (push) Successful in 29s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / deploy-hetzner (push) Successful in 18s
The RAG workflow mounts scripts from /opt/breakpilot-compliance/scripts (deploy dir) but this may not have the latest fixes if CI hasn't deployed yet. Add explicit git pull before running ingestion. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
106 lines
4.1 KiB
YAML
106 lines
4.1 KiB
YAML
# Gitea Actions — RAG Legal Corpus Ingestion
|
|
#
|
|
# Manuell triggerbarer Workflow zur Ingestion von Rechtstexten in Qdrant.
|
|
# Trigger: Gitea UI → Actions → "RAG Ingestion" → Run
|
|
#
|
|
# Phasen: gesetze, eu, templates, datenschutz, verbraucherschutz, verify, version, all
|
|
#
|
|
# Voraussetzung: RAG-Service und Qdrant muessen auf Hetzner laufen.
|
|
# Die BreakPilot-Services muessen deployed sein (ci.yaml deploy-hetzner).
|
|
|
|
name: RAG Ingestion
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
phase:
|
|
description: 'Ingestion Phase (gesetze, eu, templates, datenschutz, verbraucherschutz, verify, version, all)'
|
|
required: true
|
|
default: 'verbraucherschutz'
|
|
|
|
jobs:
|
|
ingest:
|
|
runs-on: docker
|
|
container: docker:27-cli
|
|
steps:
|
|
- name: Setup
|
|
run: |
|
|
apk add --no-cache git curl bash > /dev/null 2>&1
|
|
|
|
- name: Checkout
|
|
run: |
|
|
git clone --depth 1 --branch main ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git .
|
|
|
|
- name: Run Ingestion
|
|
run: |
|
|
set -euo pipefail
|
|
PHASE="${{ github.event.inputs.phase }}"
|
|
DEPLOY_DIR="/opt/breakpilot-compliance"
|
|
|
|
echo "=== RAG Ingestion: Phase ${PHASE} ==="
|
|
echo ""
|
|
|
|
# Pruefen ob Services laufen
|
|
echo "--- BreakPilot Container ---"
|
|
docker ps --filter name=bp- --format "{{.Names}}: {{.Status}}" 2>/dev/null || true
|
|
echo ""
|
|
|
|
# Netzwerk finden in dem die bp-Services laufen
|
|
BP_NETWORK=$(docker inspect bp-core-rag-service --format '{{range $k,$v := .NetworkSettings.Networks}}{{$k}}{{end}}' 2>/dev/null || echo "")
|
|
if [ -z "$BP_NETWORK" ]; then
|
|
# Fallback: Netzwerk vom Compliance-Backend nehmen
|
|
BP_NETWORK=$(docker inspect bp-compliance-backend --format '{{range $k,$v := .NetworkSettings.Networks}}{{$k}}{{end}}' 2>/dev/null || echo "")
|
|
fi
|
|
|
|
if [ -z "$BP_NETWORK" ]; then
|
|
echo "FEHLER: Keine BreakPilot-Container gefunden."
|
|
echo "Bitte zuerst deployen (CI/CD Pipeline oder manuell)."
|
|
echo ""
|
|
echo "Verfuegbare Container:"
|
|
docker ps --format " {{.Names}}" 2>/dev/null || true
|
|
echo ""
|
|
echo "Verfuegbare Netzwerke:"
|
|
docker network ls --format " {{.Name}}" 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
|
|
echo "BreakPilot Netzwerk: $BP_NETWORK"
|
|
echo ""
|
|
|
|
# Scripts aus dem Deploy-Dir aktualisieren (git pull)
|
|
# damit die neuesten Fixes verfuegbar sind
|
|
cd "${DEPLOY_DIR}" && git pull --no-rebase origin main 2>/dev/null || true
|
|
cd -
|
|
|
|
# Ingestion in einem Container auf dem BP-Netzwerk ausfuehren,
|
|
# mit Zugriff auf die Scripts aus dem Deploy-Dir
|
|
docker run --rm \
|
|
--network "$BP_NETWORK" \
|
|
-v "${DEPLOY_DIR}/scripts:/workspace/scripts:ro" \
|
|
-e "WORK_DIR=/tmp/rag-ingestion" \
|
|
-e "RAG_URL=http://bp-core-rag-service:8097/api/v1/documents/upload" \
|
|
-e "QDRANT_URL=https://qdrant-dev.breakpilot.ai" \
|
|
-e "QDRANT_API_KEY=z9cKbT74vl1aKPD1QGIlKWfET47VH93u" \
|
|
-e "SDK_URL=http://bp-compliance-ai-sdk:8090" \
|
|
alpine:3.19 \
|
|
sh -c "
|
|
apk add --no-cache curl bash coreutils git python3 unzip > /dev/null 2>&1
|
|
mkdir -p /tmp/rag-ingestion/{pdfs,repos,texts}
|
|
cd /workspace
|
|
if [ '${PHASE}' = 'all' ]; then
|
|
bash scripts/ingest-legal-corpus.sh
|
|
elif [ '${PHASE}' = 'download' ]; then
|
|
bash scripts/ingest-legal-corpus.sh --only download
|
|
else
|
|
# Download-Phase muss immer zuerst laufen (erstellt Textdateien)
|
|
echo '=== Running download phase first ==='
|
|
bash scripts/ingest-legal-corpus.sh --only download
|
|
echo ''
|
|
echo '=== Running phase: ${PHASE} ==='
|
|
bash scripts/ingest-legal-corpus.sh --only '${PHASE}'
|
|
fi
|
|
"
|
|
|
|
echo ""
|
|
echo "=== Ingestion abgeschlossen ==="
|