From 87d06c8b204e3ab76a8984fa1b467843ddbe00ba Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 11 Mar 2026 23:33:28 +0100 Subject: [PATCH] fix(rag): Handle large file uploads + don't abort on individual failures - Extended timeout (15 min) for files > 500KB (BGB is 1.5MB) - upload_file returns 0 even on failure so set -e doesn't kill script - Failed uploads are still counted and reported in summary Co-Authored-By: Claude Opus 4.6 --- scripts/ingest-legal-corpus.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/scripts/ingest-legal-corpus.sh b/scripts/ingest-legal-corpus.sh index 843c17f..7dc5ec5 100755 --- a/scripts/ingest-legal-corpus.sh +++ b/scripts/ingest-legal-corpus.sh @@ -19,6 +19,7 @@ QDRANT_URL="${QDRANT_URL:-http://localhost:6333}" SDK_URL="${SDK_URL:-https://localhost:8093}" DB_URL="${DB_URL:-postgresql://localhost:5432/breakpilot?search_path=compliance,core,public}" CURL_OPTS="-sk --connect-timeout 10 --max-time 300" +CURL_OPTS_LARGE="-sk --connect-timeout 10 --max-time 900" # Counters UPLOADED=0 @@ -60,7 +61,7 @@ upload_file() { if [[ ! -f "$file" ]]; then warn "File not found: $file" FAILED=$((FAILED + 1)) - return 1 + return 0 # Don't abort script fi local filesize @@ -68,13 +69,20 @@ upload_file() { if [[ "$filesize" -lt 100 ]]; then warn "File too small (${filesize}B), skipping: $label" SKIPPED=$((SKIPPED + 1)) - return 1 + return 0 # Don't abort script fi log "Uploading: $label → $collection ($(( filesize / 1024 ))KB)" + # Use longer timeout for large files (>500KB) + local curl_opts="$CURL_OPTS" + if [[ "$filesize" -gt 512000 ]]; then + curl_opts="$CURL_OPTS_LARGE" + log " (large file, using extended timeout)" + fi + local response - response=$(curl $CURL_OPTS -X POST "$RAG_URL" \ + response=$(curl $curl_opts -X POST "$RAG_URL" \ -F "file=@${file}" \ -F "collection=${collection}" \ -F "data_type=${data_type}" \ @@ -98,9 +106,9 @@ upload_file() { UPLOADED=$((UPLOADED + 1)) else fail "Upload failed: $label" - fail "Response: $response" + fail "Response: ${response:0:200}" FAILED=$((FAILED + 1)) - return 1 + return 0 # Don't abort script on individual upload failure fi }