#!/bin/bash # Full Compliance Update Script # Run on Mac Mini in background: # nohup ./run_full_compliance_update.sh > /tmp/compliance_update.log 2>&1 & set -e LOG_FILE="/tmp/compliance_update.log" TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S") log() { echo "[$TIMESTAMP] $1" | tee -a $LOG_FILE } log "==============================================" log "FULL COMPLIANCE UPDATE PIPELINE" log "Started at: $TIMESTAMP" log "==============================================" # Step 1: Wait for ongoing re-ingestion to complete log "" log "Step 1: Checking if re-ingestion is still running..." while /usr/local/bin/docker exec breakpilot-pwa-klausur-service pgrep -f "legal_corpus_ingestion.py" > /dev/null 2>&1; do log " Re-ingestion still running, waiting 60 seconds..." CURRENT_COUNT=$(curl -s http://localhost:6333/collections/bp_legal_corpus 2>/dev/null | python3 -c "import sys, json; print(json.load(sys.stdin).get('result',{}).get('points_count',0))" 2>/dev/null || echo "0") log " Current chunk count: $CURRENT_COUNT" sleep 60 done log " Re-ingestion complete!" # Step 1b: Re-run TDDDG with PDF support log "" log "Step 1b: Re-ingesting TDDDG with PDF support..." /usr/local/bin/docker exec -e QDRANT_HOST=qdrant -e EMBEDDING_SERVICE_URL=http://embedding-service:8087 breakpilot-pwa-klausur-service python -c " import asyncio from legal_corpus_ingestion import LegalCorpusIngestion async def main(): ingestion = LegalCorpusIngestion() await ingestion.ingest_single('TDDDG', force=True) asyncio.run(main()) " 2>&1 | tee -a $LOG_FILE log " TDDDG re-ingestion complete!" # Step 2: Check Qdrant chunk count log "" log "Step 2: Checking Qdrant collection status..." CHUNK_COUNT=$(curl -s http://localhost:6333/collections/bp_legal_corpus | python3 -c "import sys, json; print(json.load(sys.stdin)['result']['points_count'])") log " Total chunks in bp_legal_corpus: $CHUNK_COUNT" # Step 3: Run compliance pipeline (checkpoint extraction + control generation) log "" log "Step 3: Running compliance pipeline..." /usr/local/bin/docker exec breakpilot-pwa-klausur-service python /app/full_compliance_pipeline.py 2>&1 | tee -a $LOG_FILE # Step 4: Check if compliance output was generated log "" log "Step 4: Checking compliance output..." if /usr/local/bin/docker exec breakpilot-pwa-klausur-service ls /tmp/compliance_output/statistics.json > /dev/null 2>&1; then log " Compliance output generated successfully!" /usr/local/bin/docker exec breakpilot-pwa-klausur-service cat /tmp/compliance_output/statistics.json else log " ERROR: Compliance output not found!" fi # Step 5: Re-seed compliance database with new data log "" log "Step 5: Updating compliance database..." # Call the backend API to re-seed compliance data SEED_RESULT=$(curl -s -X POST http://localhost:8000/api/v1/compliance/seed -H "Content-Type: application/json" -d '{"force": true}' || echo '{"error": "API call failed"}') log " Seed result: $SEED_RESULT" # Step 6: Update statistics log "" log "Step 6: Updating dashboard statistics..." STATS=$(curl -s http://localhost:8000/api/v1/compliance/dashboard/statistics || echo '{"error": "API call failed"}') log " Dashboard statistics:" echo "$STATS" | python3 -m json.tool 2>/dev/null || echo "$STATS" # Final Summary log "" log "==============================================" log "PIPELINE COMPLETE" log "Finished at: $(date +"%Y-%m-%d %H:%M:%S")" log "==============================================" log "" log "Results:" log " - Chunks in RAG: $CHUNK_COUNT" log " - Check /tmp/compliance_output/ for detailed results" log " - View Frontend at: https://macmini:3002/compliance" log " - View RAG Status at: https://macmini:3002/ai/rag" log ""