2f4a3f2ea2
_SECTION_NUMBER_RE only had patterns for §/Art/Section/Kapitel/Annex but missed NIST-style identifiers (AC-1, GV.OC-01, 3.1, A01:2021). This caused 0% section rate for all NIST/BSI/ENISA documents even though sections were correctly detected — the section NUMBER wasn't extracted from the header. Also adds: - reupload_legal_strategy.py: re-upload with legal chunking - extract_and_upload_nist.py: local PDF extraction workaround - qdrant-snapshot.sh: backup mechanism for Qdrant collections Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
66 lines
2.2 KiB
Bash
Executable File
66 lines
2.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# Qdrant Snapshot — erstellt Snapshots aller Collections
|
|
#
|
|
# Usage:
|
|
# bash scripts/qdrant-snapshot.sh # Create snapshots
|
|
# bash scripts/qdrant-snapshot.sh --list # List existing snapshots
|
|
# bash scripts/qdrant-snapshot.sh --restore <file> # Restore (interactive)
|
|
#
|
|
# Snapshots werden im Qdrant-Volume unter /qdrant/storage/snapshots/ gespeichert.
|
|
# Zusaetzlich werden sie nach ./backups/qdrant/ kopiert.
|
|
|
|
set -euo pipefail
|
|
|
|
QDRANT_URL="${QDRANT_URL:-http://localhost:6333}"
|
|
BACKUP_DIR="${BACKUP_DIR:-./backups/qdrant}"
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
|
|
# --- List existing snapshots ---
|
|
if [[ "${1:-}" == "--list" ]]; then
|
|
echo "=== Qdrant Snapshots ==="
|
|
for coll in $(curl -sf "$QDRANT_URL/collections" | python3 -c "import sys,json; [print(c['name']) for c in json.load(sys.stdin)['result']['collections']]"); do
|
|
echo ""
|
|
echo "Collection: $coll"
|
|
curl -sf "$QDRANT_URL/collections/$coll/snapshots" | python3 -c "
|
|
import sys, json
|
|
snaps = json.load(sys.stdin).get('result', [])
|
|
if not snaps:
|
|
print(' (no snapshots)')
|
|
else:
|
|
for s in snaps:
|
|
print(f\" {s['name']} size={s.get('size',0)/(1024*1024):.1f}MB\")
|
|
"
|
|
done
|
|
exit 0
|
|
fi
|
|
|
|
# --- Create snapshots ---
|
|
echo "=== Creating Qdrant Snapshots ($TIMESTAMP) ==="
|
|
mkdir -p "$BACKUP_DIR"
|
|
|
|
COLLECTIONS=$(curl -sf "$QDRANT_URL/collections" | python3 -c "import sys,json; [print(c['name']) for c in json.load(sys.stdin)['result']['collections']]")
|
|
|
|
for coll in $COLLECTIONS; do
|
|
echo ""
|
|
echo "[$coll] Creating snapshot..."
|
|
|
|
SNAP=$(curl -sf -X POST "$QDRANT_URL/collections/$coll/snapshots" | python3 -c "import sys,json; print(json.load(sys.stdin)['result']['name'])")
|
|
|
|
if [[ -z "$SNAP" ]]; then
|
|
echo "[$coll] ERROR: snapshot creation failed"
|
|
continue
|
|
fi
|
|
|
|
echo "[$coll] Snapshot: $SNAP"
|
|
|
|
# Download snapshot to backup dir
|
|
OUTFILE="$BACKUP_DIR/${coll}_${TIMESTAMP}.snapshot"
|
|
curl -sf "$QDRANT_URL/collections/$coll/snapshots/$SNAP" -o "$OUTFILE"
|
|
SIZE=$(du -h "$OUTFILE" | cut -f1)
|
|
echo "[$coll] Saved: $OUTFILE ($SIZE)"
|
|
done
|
|
|
|
echo ""
|
|
echo "=== Done ==="
|
|
ls -lh "$BACKUP_DIR"/*_${TIMESTAMP}.snapshot 2>/dev/null || echo "No snapshots created"
|