fix(rag): Handle large file uploads + don't abort on individual failures
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 1m5s
CI/CD / test-python-backend-compliance (push) Successful in 43s
CI/CD / test-python-document-crawler (push) Successful in 33s
CI/CD / test-python-dsms-gateway (push) Successful in 27s
CI/CD / deploy-hetzner (push) Successful in 17s
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 1m5s
CI/CD / test-python-backend-compliance (push) Successful in 43s
CI/CD / test-python-document-crawler (push) Successful in 33s
CI/CD / test-python-dsms-gateway (push) Successful in 27s
CI/CD / deploy-hetzner (push) Successful in 17s
- Extended timeout (15 min) for files > 500KB (BGB is 1.5MB) - upload_file returns 0 even on failure so set -e doesn't kill script - Failed uploads are still counted and reported in summary Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,7 @@ QDRANT_URL="${QDRANT_URL:-http://localhost:6333}"
|
|||||||
SDK_URL="${SDK_URL:-https://localhost:8093}"
|
SDK_URL="${SDK_URL:-https://localhost:8093}"
|
||||||
DB_URL="${DB_URL:-postgresql://localhost:5432/breakpilot?search_path=compliance,core,public}"
|
DB_URL="${DB_URL:-postgresql://localhost:5432/breakpilot?search_path=compliance,core,public}"
|
||||||
CURL_OPTS="-sk --connect-timeout 10 --max-time 300"
|
CURL_OPTS="-sk --connect-timeout 10 --max-time 300"
|
||||||
|
CURL_OPTS_LARGE="-sk --connect-timeout 10 --max-time 900"
|
||||||
|
|
||||||
# Counters
|
# Counters
|
||||||
UPLOADED=0
|
UPLOADED=0
|
||||||
@@ -60,7 +61,7 @@ upload_file() {
|
|||||||
if [[ ! -f "$file" ]]; then
|
if [[ ! -f "$file" ]]; then
|
||||||
warn "File not found: $file"
|
warn "File not found: $file"
|
||||||
FAILED=$((FAILED + 1))
|
FAILED=$((FAILED + 1))
|
||||||
return 1
|
return 0 # Don't abort script
|
||||||
fi
|
fi
|
||||||
|
|
||||||
local filesize
|
local filesize
|
||||||
@@ -68,13 +69,20 @@ upload_file() {
|
|||||||
if [[ "$filesize" -lt 100 ]]; then
|
if [[ "$filesize" -lt 100 ]]; then
|
||||||
warn "File too small (${filesize}B), skipping: $label"
|
warn "File too small (${filesize}B), skipping: $label"
|
||||||
SKIPPED=$((SKIPPED + 1))
|
SKIPPED=$((SKIPPED + 1))
|
||||||
return 1
|
return 0 # Don't abort script
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "Uploading: $label → $collection ($(( filesize / 1024 ))KB)"
|
log "Uploading: $label → $collection ($(( filesize / 1024 ))KB)"
|
||||||
|
|
||||||
|
# Use longer timeout for large files (>500KB)
|
||||||
|
local curl_opts="$CURL_OPTS"
|
||||||
|
if [[ "$filesize" -gt 512000 ]]; then
|
||||||
|
curl_opts="$CURL_OPTS_LARGE"
|
||||||
|
log " (large file, using extended timeout)"
|
||||||
|
fi
|
||||||
|
|
||||||
local response
|
local response
|
||||||
response=$(curl $CURL_OPTS -X POST "$RAG_URL" \
|
response=$(curl $curl_opts -X POST "$RAG_URL" \
|
||||||
-F "file=@${file}" \
|
-F "file=@${file}" \
|
||||||
-F "collection=${collection}" \
|
-F "collection=${collection}" \
|
||||||
-F "data_type=${data_type}" \
|
-F "data_type=${data_type}" \
|
||||||
@@ -98,9 +106,9 @@ upload_file() {
|
|||||||
UPLOADED=$((UPLOADED + 1))
|
UPLOADED=$((UPLOADED + 1))
|
||||||
else
|
else
|
||||||
fail "Upload failed: $label"
|
fail "Upload failed: $label"
|
||||||
fail "Response: $response"
|
fail "Response: ${response:0:200}"
|
||||||
FAILED=$((FAILED + 1))
|
FAILED=$((FAILED + 1))
|
||||||
return 1
|
return 0 # Don't abort script on individual upload failure
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user