All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 37s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / deploy-hetzner (push) Successful in 20s
The gesetze phase failed because it expects text files created by the download phase. Now the workflow automatically runs download first for any phase that depends on it. Also adds git and python3 to the alpine container for repo cloning and text extraction. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
100 lines
3.8 KiB
YAML
100 lines
3.8 KiB
YAML
# Gitea Actions — RAG Legal Corpus Ingestion
|
|
#
|
|
# Manuell triggerbarer Workflow zur Ingestion von Rechtstexten in Qdrant.
|
|
# Trigger: Gitea UI → Actions → "RAG Ingestion" → Run
|
|
#
|
|
# Phasen: gesetze, eu, templates, datenschutz, verbraucherschutz, verify, version, all
|
|
#
|
|
# Voraussetzung: RAG-Service und Qdrant muessen auf Hetzner laufen.
|
|
# Die BreakPilot-Services muessen deployed sein (ci.yaml deploy-hetzner).
|
|
|
|
name: RAG Ingestion
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
phase:
|
|
description: 'Ingestion Phase (gesetze, eu, templates, datenschutz, verbraucherschutz, verify, version, all)'
|
|
required: true
|
|
default: 'verbraucherschutz'
|
|
|
|
jobs:
|
|
ingest:
|
|
runs-on: docker
|
|
container: docker:27-cli
|
|
steps:
|
|
- name: Setup
|
|
run: |
|
|
apk add --no-cache git curl bash > /dev/null 2>&1
|
|
|
|
- name: Checkout
|
|
run: |
|
|
git clone --depth 1 --branch main ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git .
|
|
|
|
- name: Run Ingestion
|
|
run: |
|
|
set -euo pipefail
|
|
PHASE="${{ github.event.inputs.phase }}"
|
|
DEPLOY_DIR="/opt/breakpilot-compliance"
|
|
|
|
echo "=== RAG Ingestion: Phase ${PHASE} ==="
|
|
echo ""
|
|
|
|
# Pruefen ob Services laufen
|
|
echo "--- BreakPilot Container ---"
|
|
docker ps --filter name=bp- --format "{{.Names}}: {{.Status}}" 2>/dev/null || true
|
|
echo ""
|
|
|
|
# Netzwerk finden in dem die bp-Services laufen
|
|
BP_NETWORK=$(docker inspect bp-core-rag-service --format '{{range $k,$v := .NetworkSettings.Networks}}{{$k}}{{end}}' 2>/dev/null || echo "")
|
|
if [ -z "$BP_NETWORK" ]; then
|
|
# Fallback: Netzwerk vom Compliance-Backend nehmen
|
|
BP_NETWORK=$(docker inspect bp-compliance-backend --format '{{range $k,$v := .NetworkSettings.Networks}}{{$k}}{{end}}' 2>/dev/null || echo "")
|
|
fi
|
|
|
|
if [ -z "$BP_NETWORK" ]; then
|
|
echo "FEHLER: Keine BreakPilot-Container gefunden."
|
|
echo "Bitte zuerst deployen (CI/CD Pipeline oder manuell)."
|
|
echo ""
|
|
echo "Verfuegbare Container:"
|
|
docker ps --format " {{.Names}}" 2>/dev/null || true
|
|
echo ""
|
|
echo "Verfuegbare Netzwerke:"
|
|
docker network ls --format " {{.Name}}" 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
|
|
echo "BreakPilot Netzwerk: $BP_NETWORK"
|
|
echo ""
|
|
|
|
# Ingestion in einem Container auf dem BP-Netzwerk ausfuehren,
|
|
# mit Zugriff auf die Scripts aus dem Deploy-Dir
|
|
docker run --rm \
|
|
--network "$BP_NETWORK" \
|
|
-v "${DEPLOY_DIR}/scripts:/workspace/scripts:ro" \
|
|
-e "WORK_DIR=/tmp/rag-ingestion" \
|
|
-e "RAG_URL=http://bp-core-rag-service:8097/api/v1/documents/upload" \
|
|
-e "QDRANT_URL=https://qdrant-dev.breakpilot.ai" \
|
|
-e "SDK_URL=http://bp-compliance-ai-sdk:8090" \
|
|
alpine:3.19 \
|
|
sh -c "
|
|
apk add --no-cache curl bash coreutils git python3 > /dev/null 2>&1
|
|
mkdir -p /tmp/rag-ingestion/{pdfs,repos,texts}
|
|
cd /workspace
|
|
if [ '${PHASE}' = 'all' ]; then
|
|
bash scripts/ingest-legal-corpus.sh
|
|
elif [ '${PHASE}' = 'download' ]; then
|
|
bash scripts/ingest-legal-corpus.sh --only download
|
|
else
|
|
# Download-Phase muss immer zuerst laufen (erstellt Textdateien)
|
|
echo '=== Running download phase first ==='
|
|
bash scripts/ingest-legal-corpus.sh --only download
|
|
echo ''
|
|
echo '=== Running phase: ${PHASE} ==='
|
|
bash scripts/ingest-legal-corpus.sh --only '${PHASE}'
|
|
fi
|
|
"
|
|
|
|
echo ""
|
|
echo "=== Ingestion abgeschlossen ==="
|