#!/bin/bash # NiBiS Ingestion Pipeline Runner # Extracts ZIPs and indexes Erwartungshorizonte in Qdrant set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" BACKEND_DIR="$SCRIPT_DIR/../backend" echo "==============================================" echo "NiBiS Ingestion Pipeline" echo "==============================================" # Check if we're in Docker or local if [ -f /.dockerenv ]; then echo "Running in Docker container" cd /app/backend else echo "Running locally" cd "$BACKEND_DIR" # Activate venv if exists if [ -d "venv" ]; then source venv/bin/activate fi fi # Check environment echo "" echo "Environment:" echo " QDRANT_URL: ${QDRANT_URL:-http://localhost:6333}" echo " OPENAI_API_KEY: ${OPENAI_API_KEY:+[SET]}" echo "" # Parse arguments DRY_RUN="" YEAR="" SUBJECT="" ALL_DOCS="" MANIFEST="" while [[ $# -gt 0 ]]; do case $1 in --dry-run) DRY_RUN="--dry-run" shift ;; --year) YEAR="--year $2" shift 2 ;; --subject) SUBJECT="--subject $2" shift 2 ;; --all-docs) ALL_DOCS="--all-docs" shift ;; --manifest) MANIFEST="--manifest $2" shift 2 ;; *) echo "Unknown option: $1" echo "" echo "Usage: $0 [options]" echo "" echo "Options:" echo " --dry-run Only analyze, don't index" echo " --year YEAR Filter by year (e.g., 2024)" echo " --subject SUBJ Filter by subject (e.g., Deutsch)" echo " --all-docs Include all documents, not just EWH" echo " --manifest FILE Create manifest JSON file" exit 1 ;; esac done # Run ingestion echo "Starting ingestion..." python3 nibis_ingestion.py $DRY_RUN $YEAR $SUBJECT $ALL_DOCS $MANIFEST echo "" echo "Done!"