diff --git a/.gitea/workflows/rag-ingest.yaml b/.gitea/workflows/rag-ingest.yaml index 7c1c534..c7d50c5 100644 --- a/.gitea/workflows/rag-ingest.yaml +++ b/.gitea/workflows/rag-ingest.yaml @@ -78,12 +78,19 @@ jobs: -e "SDK_URL=http://bp-compliance-ai-sdk:8090" \ alpine:3.19 \ sh -c " - apk add --no-cache curl bash coreutils > /dev/null 2>&1 + apk add --no-cache curl bash coreutils git python3 > /dev/null 2>&1 mkdir -p /tmp/rag-ingestion/{pdfs,repos,texts} cd /workspace if [ '${PHASE}' = 'all' ]; then bash scripts/ingest-legal-corpus.sh + elif [ '${PHASE}' = 'download' ]; then + bash scripts/ingest-legal-corpus.sh --only download else + # Download-Phase muss immer zuerst laufen (erstellt Textdateien) + echo '=== Running download phase first ===' + bash scripts/ingest-legal-corpus.sh --only download + echo '' + echo '=== Running phase: ${PHASE} ===' bash scripts/ingest-legal-corpus.sh --only '${PHASE}' fi "