Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
92 lines
3.6 KiB
Bash
92 lines
3.6 KiB
Bash
#!/bin/bash
|
|
# Full Compliance Update Script
|
|
# Run on Mac Mini in background:
|
|
# nohup ./run_full_compliance_update.sh > /tmp/compliance_update.log 2>&1 &
|
|
|
|
set -e
|
|
LOG_FILE="/tmp/compliance_update.log"
|
|
TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
|
|
|
|
log() {
|
|
echo "[$TIMESTAMP] $1" | tee -a $LOG_FILE
|
|
}
|
|
|
|
log "=============================================="
|
|
log "FULL COMPLIANCE UPDATE PIPELINE"
|
|
log "Started at: $TIMESTAMP"
|
|
log "=============================================="
|
|
|
|
# Step 1: Wait for ongoing re-ingestion to complete
|
|
log ""
|
|
log "Step 1: Checking if re-ingestion is still running..."
|
|
|
|
while /usr/local/bin/docker exec breakpilot-pwa-klausur-service pgrep -f "legal_corpus_ingestion.py" > /dev/null 2>&1; do
|
|
log " Re-ingestion still running, waiting 60 seconds..."
|
|
CURRENT_COUNT=$(curl -s http://localhost:6333/collections/bp_legal_corpus 2>/dev/null | python3 -c "import sys, json; print(json.load(sys.stdin).get('result',{}).get('points_count',0))" 2>/dev/null || echo "0")
|
|
log " Current chunk count: $CURRENT_COUNT"
|
|
sleep 60
|
|
done
|
|
log " Re-ingestion complete!"
|
|
|
|
# Step 1b: Re-run TDDDG with PDF support
|
|
log ""
|
|
log "Step 1b: Re-ingesting TDDDG with PDF support..."
|
|
/usr/local/bin/docker exec -e QDRANT_HOST=qdrant -e EMBEDDING_SERVICE_URL=http://embedding-service:8087 breakpilot-pwa-klausur-service python -c "
|
|
import asyncio
|
|
from legal_corpus_ingestion import LegalCorpusIngestion
|
|
async def main():
|
|
ingestion = LegalCorpusIngestion()
|
|
await ingestion.ingest_single('TDDDG', force=True)
|
|
asyncio.run(main())
|
|
" 2>&1 | tee -a $LOG_FILE
|
|
log " TDDDG re-ingestion complete!"
|
|
|
|
# Step 2: Check Qdrant chunk count
|
|
log ""
|
|
log "Step 2: Checking Qdrant collection status..."
|
|
CHUNK_COUNT=$(curl -s http://localhost:6333/collections/bp_legal_corpus | python3 -c "import sys, json; print(json.load(sys.stdin)['result']['points_count'])")
|
|
log " Total chunks in bp_legal_corpus: $CHUNK_COUNT"
|
|
|
|
# Step 3: Run compliance pipeline (checkpoint extraction + control generation)
|
|
log ""
|
|
log "Step 3: Running compliance pipeline..."
|
|
/usr/local/bin/docker exec breakpilot-pwa-klausur-service python /app/full_compliance_pipeline.py 2>&1 | tee -a $LOG_FILE
|
|
|
|
# Step 4: Check if compliance output was generated
|
|
log ""
|
|
log "Step 4: Checking compliance output..."
|
|
if /usr/local/bin/docker exec breakpilot-pwa-klausur-service ls /tmp/compliance_output/statistics.json > /dev/null 2>&1; then
|
|
log " Compliance output generated successfully!"
|
|
/usr/local/bin/docker exec breakpilot-pwa-klausur-service cat /tmp/compliance_output/statistics.json
|
|
else
|
|
log " ERROR: Compliance output not found!"
|
|
fi
|
|
|
|
# Step 5: Re-seed compliance database with new data
|
|
log ""
|
|
log "Step 5: Updating compliance database..."
|
|
# Call the backend API to re-seed compliance data
|
|
SEED_RESULT=$(curl -s -X POST http://localhost:8000/api/v1/compliance/seed -H "Content-Type: application/json" -d '{"force": true}' || echo '{"error": "API call failed"}')
|
|
log " Seed result: $SEED_RESULT"
|
|
|
|
# Step 6: Update statistics
|
|
log ""
|
|
log "Step 6: Updating dashboard statistics..."
|
|
STATS=$(curl -s http://localhost:8000/api/v1/compliance/dashboard/statistics || echo '{"error": "API call failed"}')
|
|
log " Dashboard statistics:"
|
|
echo "$STATS" | python3 -m json.tool 2>/dev/null || echo "$STATS"
|
|
|
|
# Final Summary
|
|
log ""
|
|
log "=============================================="
|
|
log "PIPELINE COMPLETE"
|
|
log "Finished at: $(date +"%Y-%m-%d %H:%M:%S")"
|
|
log "=============================================="
|
|
log ""
|
|
log "Results:"
|
|
log " - Chunks in RAG: $CHUNK_COUNT"
|
|
log " - Check /tmp/compliance_output/ for detailed results"
|
|
log " - View Frontend at: https://macmini:3002/compliance"
|
|
log " - View RAG Status at: https://macmini:3002/ai/rag"
|
|
log ""
|