fix(rag): Handle large file uploads + don't abort on individual failures
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 1m5s
CI/CD / test-python-backend-compliance (push) Successful in 43s
CI/CD / test-python-document-crawler (push) Successful in 33s
CI/CD / test-python-dsms-gateway (push) Successful in 27s
CI/CD / deploy-hetzner (push) Successful in 17s
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 1m5s
CI/CD / test-python-backend-compliance (push) Successful in 43s
CI/CD / test-python-document-crawler (push) Successful in 33s
CI/CD / test-python-dsms-gateway (push) Successful in 27s
CI/CD / deploy-hetzner (push) Successful in 17s
- Extended timeout (15 min) for files > 500KB (BGB is 1.5MB) - upload_file returns 0 even on failure so set -e doesn't kill script - Failed uploads are still counted and reported in summary Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,7 @@ QDRANT_URL="${QDRANT_URL:-http://localhost:6333}"
|
||||
SDK_URL="${SDK_URL:-https://localhost:8093}"
|
||||
DB_URL="${DB_URL:-postgresql://localhost:5432/breakpilot?search_path=compliance,core,public}"
|
||||
CURL_OPTS="-sk --connect-timeout 10 --max-time 300"
|
||||
CURL_OPTS_LARGE="-sk --connect-timeout 10 --max-time 900"
|
||||
|
||||
# Counters
|
||||
UPLOADED=0
|
||||
@@ -60,7 +61,7 @@ upload_file() {
|
||||
if [[ ! -f "$file" ]]; then
|
||||
warn "File not found: $file"
|
||||
FAILED=$((FAILED + 1))
|
||||
return 1
|
||||
return 0 # Don't abort script
|
||||
fi
|
||||
|
||||
local filesize
|
||||
@@ -68,13 +69,20 @@ upload_file() {
|
||||
if [[ "$filesize" -lt 100 ]]; then
|
||||
warn "File too small (${filesize}B), skipping: $label"
|
||||
SKIPPED=$((SKIPPED + 1))
|
||||
return 1
|
||||
return 0 # Don't abort script
|
||||
fi
|
||||
|
||||
log "Uploading: $label → $collection ($(( filesize / 1024 ))KB)"
|
||||
|
||||
# Use longer timeout for large files (>500KB)
|
||||
local curl_opts="$CURL_OPTS"
|
||||
if [[ "$filesize" -gt 512000 ]]; then
|
||||
curl_opts="$CURL_OPTS_LARGE"
|
||||
log " (large file, using extended timeout)"
|
||||
fi
|
||||
|
||||
local response
|
||||
response=$(curl $CURL_OPTS -X POST "$RAG_URL" \
|
||||
response=$(curl $curl_opts -X POST "$RAG_URL" \
|
||||
-F "file=@${file}" \
|
||||
-F "collection=${collection}" \
|
||||
-F "data_type=${data_type}" \
|
||||
@@ -98,9 +106,9 @@ upload_file() {
|
||||
UPLOADED=$((UPLOADED + 1))
|
||||
else
|
||||
fail "Upload failed: $label"
|
||||
fail "Response: $response"
|
||||
fail "Response: ${response:0:200}"
|
||||
FAILED=$((FAILED + 1))
|
||||
return 1
|
||||
return 0 # Don't abort script on individual upload failure
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user