From 0b47612272db1290150373acaa1bda6cfaf6098c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 11 Mar 2026 23:13:33 +0100 Subject: [PATCH] fix(rag): Always run download phase before ingestion phases The gesetze phase failed because it expects text files created by the download phase. Now the workflow automatically runs download first for any phase that depends on it. Also adds git and python3 to the alpine container for repo cloning and text extraction. Co-Authored-By: Claude Opus 4.6 --- .gitea/workflows/rag-ingest.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.gitea/workflows/rag-ingest.yaml b/.gitea/workflows/rag-ingest.yaml index 7c1c534..c7d50c5 100644 --- a/.gitea/workflows/rag-ingest.yaml +++ b/.gitea/workflows/rag-ingest.yaml @@ -78,12 +78,19 @@ jobs: -e "SDK_URL=http://bp-compliance-ai-sdk:8090" \ alpine:3.19 \ sh -c " - apk add --no-cache curl bash coreutils > /dev/null 2>&1 + apk add --no-cache curl bash coreutils git python3 > /dev/null 2>&1 mkdir -p /tmp/rag-ingestion/{pdfs,repos,texts} cd /workspace if [ '${PHASE}' = 'all' ]; then bash scripts/ingest-legal-corpus.sh + elif [ '${PHASE}' = 'download' ]; then + bash scripts/ingest-legal-corpus.sh --only download else + # Download-Phase muss immer zuerst laufen (erstellt Textdateien) + echo '=== Running download phase first ===' + bash scripts/ingest-legal-corpus.sh --only download + echo '' + echo '=== Running phase: ${PHASE} ===' bash scripts/ingest-legal-corpus.sh --only '${PHASE}' fi "