Files
breakpilot-compliance/.gitea/workflows/rag-ingest.yaml
Benjamin Admin b4d2be83eb
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 30s
CI/CD / test-python-dsms-gateway (push) Successful in 24s
CI/CD / validate-canonical-controls (push) Successful in 15s
CI/CD / Deploy (push) Successful in 3s
Merge gitea/main: resolve ci.yaml conflict, keep Coolify deploy
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 13:26:17 +01:00

116 lines
4.4 KiB
YAML

# Gitea Actions — RAG Legal Corpus Ingestion
#
# Manuell triggerbarer Workflow zur Ingestion von Rechtstexten in Qdrant.
# Trigger: Gitea UI → Actions → "RAG Ingestion" → Run
#
# Phasen: gesetze, eu, templates, datenschutz, verbraucherschutz, verify, version, all
#
# Voraussetzung: RAG-Service und Qdrant muessen auf Coolify laufen.
# Die BreakPilot-Services muessen deployed sein (ci.yaml deploy-coolify).
name: RAG Ingestion
on:
workflow_dispatch:
inputs:
phase:
description: 'Ingestion Phase (gesetze, eu, templates, datenschutz, verbraucherschutz, dach, security, verify, version, all)'
required: true
default: 'verbraucherschutz'
jobs:
ingest:
runs-on: docker
container: docker:27-cli
steps:
- name: Setup
run: |
apk add --no-cache git curl bash > /dev/null 2>&1
- name: Checkout
run: |
git clone --depth 1 --branch main ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git .
- name: Run Ingestion
run: |
set -euo pipefail
PHASE="${{ github.event.inputs.phase }}"
echo "=== RAG Ingestion: Phase ${PHASE} ==="
echo ""
# Pruefen ob Services laufen
echo "--- BreakPilot Container ---"
docker ps --filter name=bp- --format "{{.Names}}: {{.Status}}" 2>/dev/null || true
echo ""
# Netzwerk finden in dem die bp-Services laufen
BP_NETWORK=$(docker inspect bp-core-rag-service --format '{{range $k,$v := .NetworkSettings.Networks}}{{$k}}{{end}}' 2>/dev/null || echo "")
if [ -z "$BP_NETWORK" ]; then
BP_NETWORK=$(docker inspect bp-compliance-backend --format '{{range $k,$v := .NetworkSettings.Networks}}{{$k}}{{end}}' 2>/dev/null || echo "")
fi
if [ -z "$BP_NETWORK" ]; then
echo "FEHLER: Keine BreakPilot-Container gefunden."
echo "Bitte zuerst deployen (CI/CD Pipeline oder manuell)."
echo ""
echo "Verfuegbare Container:"
docker ps --format " {{.Names}}" 2>/dev/null || true
echo ""
echo "Verfuegbare Netzwerke:"
docker network ls --format " {{.Name}}" 2>/dev/null || true
exit 1
fi
echo "BreakPilot Netzwerk: $BP_NETWORK"
echo ""
# Ingestion-Container erstellen (noch nicht starten),
# dann Scripts aus dem Checkout per docker cp hineinkopieren.
# So verwenden wir IMMER die neueste Version der Scripts,
# unabhaengig vom Deploy-Dir auf dem Host.
CONTAINER_ID=$(docker create \
--network "$BP_NETWORK" \
-e "WORK_DIR=/tmp/rag-ingestion" \
-e "RAG_URL=http://bp-core-rag-service:8097/api/v1/documents/upload" \
-e "QDRANT_URL=https://qdrant-dev.breakpilot.ai" \
-e "QDRANT_API_KEY=z9cKbT74vl1aKPD1QGIlKWfET47VH93u" \
-e "SDK_URL=http://bp-compliance-ai-sdk:8090" \
alpine:3.19 \
sh -c "
apk add --no-cache curl bash coreutils git python3 unzip > /dev/null 2>&1
mkdir -p /tmp/rag-ingestion/{pdfs,repos,texts}
mkdir -p /workspace/scripts
cp -r /workspace_scripts/* /workspace/scripts/ 2>/dev/null || true
cd /workspace
if [ '${PHASE}' = 'all' ]; then
bash scripts/ingest-legal-corpus.sh
elif [ '${PHASE}' = 'download' ]; then
bash scripts/ingest-legal-corpus.sh --only download
else
echo '=== Running download phase first ==='
bash scripts/ingest-legal-corpus.sh --only download
echo ''
echo '=== Running phase: ${PHASE} ==='
bash scripts/ingest-legal-corpus.sh --only '${PHASE}'
fi
")
echo "Container: $CONTAINER_ID"
# Workspace-Dir im Container anlegen und Scripts hineinkopieren
docker cp scripts "${CONTAINER_ID}:/workspace_scripts"
echo "Scripts kopiert (aus Git-Checkout)"
# Container starten und Output streamen
docker start -a "${CONTAINER_ID}" || EXITCODE=$?
# Container aufraeumen
docker rm -f "${CONTAINER_ID}" 2>/dev/null || true
echo ""
echo "=== Ingestion abgeschlossen ==="
# Exit mit dem Original-Exitcode
exit ${EXITCODE:-0}