Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 39s
CI/CD / test-python-backend-compliance (push) Successful in 44s
CI/CD / test-python-document-crawler (push) Successful in 31s
CI/CD / test-python-dsms-gateway (push) Successful in 26s
CI/CD / deploy-hetzner (push) Failing after 2s
The runner container doesn't always have /opt/breakpilot-compliance mounted. Use the git-cloned workspace (current dir) and add multi-fallback for RAG API URL (container network → localhost → host.docker.internal). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
91 lines
3.1 KiB
YAML
91 lines
3.1 KiB
YAML
# Gitea Actions — RAG Legal Corpus Ingestion
|
|
#
|
|
# Manuell triggerbarer Workflow zur Ingestion von Rechtstexten in Qdrant.
|
|
# Trigger: Gitea UI → Actions → "RAG Ingestion" → Run
|
|
#
|
|
# Phasen: gesetze, eu, templates, datenschutz, verbraucherschutz, verify, version, all
|
|
#
|
|
# Voraussetzung: RAG-Service und Qdrant muessen auf Hetzner laufen.
|
|
|
|
name: RAG Ingestion
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
phase:
|
|
description: 'Ingestion Phase (gesetze, eu, templates, datenschutz, verbraucherschutz, verify, version, all)'
|
|
required: true
|
|
default: 'verbraucherschutz'
|
|
|
|
jobs:
|
|
ingest:
|
|
runs-on: docker
|
|
container: docker:27-cli
|
|
steps:
|
|
- name: Setup
|
|
run: |
|
|
apk add --no-cache git curl bash python3 > /dev/null 2>&1
|
|
|
|
- name: Checkout
|
|
run: |
|
|
git clone --depth 1 --branch main ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git .
|
|
|
|
- name: Run Ingestion
|
|
run: |
|
|
set -euo pipefail
|
|
PHASE="${{ github.event.inputs.phase }}"
|
|
|
|
echo "=== RAG Ingestion: Phase ${PHASE} ==="
|
|
echo ""
|
|
|
|
# Code wurde im Checkout-Step ins aktuelle Verzeichnis geklont
|
|
# Kein cd noetig — wir sind bereits im Workspace
|
|
|
|
export WORK_DIR="/tmp/rag-ingestion"
|
|
export QDRANT_URL="https://qdrant-dev.breakpilot.ai"
|
|
|
|
# RAG-Service: Versuche Container-Netzwerk, dann Host-Netzwerk
|
|
export RAG_URL="http://bp-core-rag-service:8097/api/v1/documents/upload"
|
|
export SDK_URL="http://bp-compliance-ai-sdk:8090"
|
|
|
|
# Source .env aus Deploy-Dir falls vorhanden (fuer DB_URL)
|
|
DEPLOY_ENV="/opt/breakpilot-compliance/.env"
|
|
if [ -f "$DEPLOY_ENV" ]; then
|
|
export DB_URL=$(grep COMPLIANCE_DATABASE_URL "$DEPLOY_ENV" 2>/dev/null | cut -d= -f2- || echo "")
|
|
fi
|
|
|
|
mkdir -p "$WORK_DIR"/{pdfs,repos,texts}
|
|
|
|
echo "RAG API: $RAG_URL"
|
|
echo "Qdrant: $QDRANT_URL"
|
|
echo "Work Dir: $WORK_DIR"
|
|
echo "PWD: $(pwd)"
|
|
echo ""
|
|
|
|
# Health Check: RAG erreichbar?
|
|
# Runner muss im breakpilot-network sein fuer bp-core-rag-service
|
|
# Fallback: Host-Netzwerk via localhost (act_runner laeuft auf dem Host)
|
|
if ! curl -sf "$RAG_URL" -X POST -o /dev/null 2>/dev/null; then
|
|
echo "RAG API nicht im Container-Netzwerk erreichbar."
|
|
echo "Versuche localhost:8097..."
|
|
export RAG_URL="http://localhost:8097/api/v1/documents/upload"
|
|
export SDK_URL="http://localhost:8090"
|
|
if ! curl -sf "$RAG_URL" -X POST -o /dev/null 2>/dev/null; then
|
|
echo "Versuche host.docker.internal:8097..."
|
|
export RAG_URL="http://host.docker.internal:8097/api/v1/documents/upload"
|
|
export SDK_URL="http://host.docker.internal:8090"
|
|
fi
|
|
fi
|
|
|
|
echo "Finale RAG URL: $RAG_URL"
|
|
echo ""
|
|
|
|
if [ "$PHASE" = "all" ]; then
|
|
bash scripts/ingest-legal-corpus.sh
|
|
else
|
|
bash scripts/ingest-legal-corpus.sh --only "$PHASE"
|
|
fi
|
|
|
|
echo ""
|
|
echo "=== Ingestion abgeschlossen ==="
|