#!/usr/bin/env bash # ============================================================================= # BreakPilot Compliance — Phase H RAG Ingestion # # Downloads and ingests ~35 new legal sources into Qdrant: # - 16 German laws (gesetze-im-internet.de) → bp_compliance_gesetze # - 18 EU regulations (EUR-Lex PDFs) → bp_compliance_ce # - 3 framework docs (NIST, HLEG) → bp_compliance_datenschutz # # Run on Mac Mini: # bash ~/Projekte/breakpilot-compliance/scripts/ingest-phase-h.sh # ============================================================================= set -euo pipefail WORK_DIR="${WORK_DIR:-$HOME/rag-ingestion}" RAG_URL="${RAG_URL:-https://localhost:8097/api/v1/documents/upload}" QDRANT_URL="${QDRANT_URL:-http://localhost:6333}" CURL_OPTS="-sk --connect-timeout 10 --max-time 300" CURL_OPTS_LARGE="-sk --connect-timeout 10 --max-time 900" UPLOADED=0 FAILED=0 SKIPPED=0 log() { echo "[$(date '+%H:%M:%S')] $*"; } ok() { echo "[$(date '+%H:%M:%S')] ok $*"; } warn() { echo "[$(date '+%H:%M:%S')] WARN $*" >&2; } fail() { echo "[$(date '+%H:%M:%S')] FAIL $*" >&2; } download_pdf() { local url="$1" local target="$2" if [[ -f "$target" ]]; then log "PDF exists: $(basename "$target") (skipping download)" return 0 fi log "Downloading: $(basename "$target")" curl $CURL_OPTS -L "$url" -o "$target" 2>/dev/null || { warn "Download failed: $url" rm -f "$target" return 0 } local fsize fsize=$(stat -f%z "$target" 2>/dev/null || stat -c%s "$target" 2>/dev/null || echo 0) if [[ "$fsize" -lt 1000 ]]; then warn "Download too small (${fsize}B): $(basename "$target")" rm -f "$target" fi } download_gesetz_pdf() { local law_id="$1" local target="$2" if [[ -f "$target" ]]; then log "PDF exists: $(basename "$target") (skipping download)" return 0 fi log "Downloading: $law_id (gesetze-im-internet.de)" curl $CURL_OPTS_LARGE -L "https://www.gesetze-im-internet.de/${law_id}/gesamt.pdf" -o "$target" 2>/dev/null || { warn "Download failed: $law_id" rm -f "$target" return 0 } local fsize fsize=$(stat -f%z "$target" 2>/dev/null || stat -c%s "$target" 2>/dev/null || echo 0) if [[ "$fsize" -lt 1000 ]]; then warn "Download too small (${fsize}B): $law_id" rm -f "$target" else log " Downloaded: $(( fsize / 1024 ))KB" fi } upload_file() { local file="$1" local collection="$2" local data_type="$3" local use_case="$4" local year="$5" local metadata_json="$6" local label="${7:-$(basename "$file")}" if [[ ! -f "$file" ]]; then warn "File not found: $file" FAILED=$((FAILED + 1)) return 0 fi # Dedup check local reg_id reg_id=$(echo "$metadata_json" | python3 -c "import sys,json; print(json.load(sys.stdin).get('regulation_id',''))" 2>/dev/null || echo "") if [[ -n "$reg_id" ]]; then local existing existing=$(curl -sk --max-time 5 -X POST "${QDRANT_URL}/collections/${collection}/points/scroll" \ -H "Content-Type: application/json" \ -d "{\"filter\":{\"must\":[{\"key\":\"regulation_id\",\"match\":{\"value\":\"$reg_id\"}}]},\"limit\":1}" \ 2>/dev/null | python3 -c "import sys,json; r=json.load(sys.stdin).get('result',{}); print(len(r.get('points',[])))" 2>/dev/null || echo "0") if [[ "$existing" -gt 0 ]] 2>/dev/null; then log "SKIP (already in Qdrant): $label [regulation_id=$reg_id]" SKIPPED=$((SKIPPED + 1)) return 0 fi fi local filesize filesize=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo 0) if [[ "$filesize" -lt 100 ]]; then warn "File too small (${filesize}B): $label" SKIPPED=$((SKIPPED + 1)) return 0 fi log "Uploading: $label -> $collection ($(( filesize / 1024 ))KB)" local curl_opts="$CURL_OPTS" [[ "$filesize" -gt 256000 ]] && curl_opts="$CURL_OPTS_LARGE" local response response=$(curl $curl_opts -X POST "$RAG_URL" \ -F "file=@${file}" \ -F "collection=${collection}" \ -F "data_type=${data_type}" \ -F "use_case=${use_case}" \ -F "year=${year}" \ -F "chunk_strategy=recursive" \ -F "chunk_size=1024" \ -F "chunk_overlap=128" \ -F "metadata_json=${metadata_json}" \ 2>/dev/null) || true if echo "$response" | grep -q '"chunks_count"\|"vectors_indexed"'; then local chunks chunks=$(echo "$response" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('chunks_count', d.get('vectors_indexed',0)))" 2>/dev/null || echo "?") ok "$label -> $chunks chunks" UPLOADED=$((UPLOADED + 1)) else fail "Upload failed: $label" fail "Response: ${response:0:200}" FAILED=$((FAILED + 1)) fi } # ============================================================================= # PHASE H-1: Downloads # ============================================================================= phase_h_download() { log "==========================================" log "PHASE H-1: Downloads" log "==========================================" mkdir -p "$WORK_DIR/pdfs" "$WORK_DIR/texts" # --- EU Regulations (EUR-Lex) --- log "--- EUR-Lex PDFs ---" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32016R0679" \ "$WORK_DIR/pdfs/dsgvo_2016_679.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32024R2847" \ "$WORK_DIR/pdfs/cra_2024_2847.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32024R1689" \ "$WORK_DIR/pdfs/ai_act_2024_1689.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022L2555" \ "$WORK_DIR/pdfs/nis2_2022_2555.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R1925" \ "$WORK_DIR/pdfs/dma_2022_1925.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32011L0083" \ "$WORK_DIR/pdfs/consumer_rights_2011_83.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0770" \ "$WORK_DIR/pdfs/digital_content_2019_770.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0771" \ "$WORK_DIR/pdfs/sale_of_goods_2019_771.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32000L0031" \ "$WORK_DIR/pdfs/ecommerce_2000_31.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:31993L0013" \ "$WORK_DIR/pdfs/unfair_terms_93_13.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32005L0029" \ "$WORK_DIR/pdfs/unfair_practices_2005_29.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:31998L0006" \ "$WORK_DIR/pdfs/price_indication_98_6.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L2161" \ "$WORK_DIR/pdfs/omnibus_2019_2161.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R0988" \ "$WORK_DIR/pdfs/gpsr_2023_988.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:31985L0374" \ "$WORK_DIR/pdfs/product_liability_85_374.pdf" download_pdf "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1542" \ "$WORK_DIR/pdfs/battery_2023_1542.pdf" # --- German Laws (gesetze-im-internet.de) --- log "--- Deutsche Gesetze (PDFs) ---" download_gesetz_pdf "pangv" "$WORK_DIR/pdfs/pangv.pdf" download_gesetz_pdf "vsbg" "$WORK_DIR/pdfs/vsbg.pdf" download_gesetz_pdf "prodhaftg" "$WORK_DIR/pdfs/prodhaftg.pdf" download_gesetz_pdf "verpackg" "$WORK_DIR/pdfs/verpackg.pdf" download_gesetz_pdf "elektrog" "$WORK_DIR/pdfs/elektrog.pdf" download_gesetz_pdf "battg_2009" "$WORK_DIR/pdfs/battdg.pdf" download_gesetz_pdf "bfsg" "$WORK_DIR/pdfs/bfsg.pdf" download_gesetz_pdf "uwg_2004" "$WORK_DIR/pdfs/uwg.pdf" download_gesetz_pdf "bdsg_2018" "$WORK_DIR/pdfs/bdsg.pdf" download_gesetz_pdf "ddg" "$WORK_DIR/pdfs/ddg.pdf" download_gesetz_pdf "tkg_2021" "$WORK_DIR/pdfs/tkg.pdf" download_gesetz_pdf "hgb" "$WORK_DIR/pdfs/hgb.pdf" download_gesetz_pdf "ao_1977" "$WORK_DIR/pdfs/ao.pdf" download_gesetz_pdf "gewo" "$WORK_DIR/pdfs/gewo.pdf" download_gesetz_pdf "bgb" "$WORK_DIR/pdfs/bgb_komplett.pdf" download_gesetz_pdf "bgbeg" "$WORK_DIR/pdfs/egbgb.pdf" # --- NIST & HLEG --- log "--- Frameworks (NIST, HLEG) ---" download_pdf "https://nvlpubs.nist.gov/nistpubs/CSWP/NIST.CSWP.29.pdf" \ "$WORK_DIR/pdfs/nist_csf_2_0.pdf" download_pdf "https://nvlpubs.nist.gov/nistpubs/CSWP/NIST.CSWP.01162020.pdf" \ "$WORK_DIR/pdfs/nist_privacy_framework.pdf" download_pdf "https://op.europa.eu/en/publication-detail/-/publication/d3988569-0434-11ea-8c1f-01aa75ed71a1" \ "$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf" log "Downloads abgeschlossen." } # ============================================================================= # PHASE H-2: Deutsche Gesetze → bp_compliance_gesetze # ============================================================================= phase_h_gesetze() { log "==========================================" log "PHASE H-2: Deutsche Gesetze -> bp_compliance_gesetze" log "==========================================" local col="bp_compliance_gesetze" upload_file "$WORK_DIR/pdfs/pangv.pdf" "$col" "compliance" "legal_reference" "2022" \ '{"regulation_id":"pangv","regulation_name_de":"Preisangabenverordnung (PAngV)","category":"verbraucherschutz","license":"public_law","source":"gesetze-im-internet.de"}' \ "PAngV (Preisangabenverordnung)" upload_file "$WORK_DIR/pdfs/vsbg.pdf" "$col" "compliance" "legal_reference" "2016" \ '{"regulation_id":"vsbg","regulation_name_de":"Verbraucherstreitbeilegungsgesetz (VSBG)","category":"verbraucherschutz","license":"public_law","source":"gesetze-im-internet.de"}' \ "VSBG (Verbraucherstreitbeilegung)" upload_file "$WORK_DIR/pdfs/prodhaftg.pdf" "$col" "compliance" "legal_reference" "1989" \ '{"regulation_id":"prodhaftg","regulation_name_de":"Produkthaftungsgesetz (ProdHaftG)","category":"produkthaftung","license":"public_law","source":"gesetze-im-internet.de"}' \ "ProdHaftG (Produkthaftung)" upload_file "$WORK_DIR/pdfs/verpackg.pdf" "$col" "compliance" "legal_reference" "2017" \ '{"regulation_id":"verpackg","regulation_name_de":"Verpackungsgesetz (VerpackG)","category":"umwelt","license":"public_law","source":"gesetze-im-internet.de"}' \ "VerpackG (Verpackungsgesetz)" upload_file "$WORK_DIR/pdfs/elektrog.pdf" "$col" "compliance" "legal_reference" "2015" \ '{"regulation_id":"elektrog","regulation_name_de":"Elektro- und Elektronikgeraetegesetz (ElektroG)","category":"umwelt","license":"public_law","source":"gesetze-im-internet.de"}' \ "ElektroG (WEEE)" upload_file "$WORK_DIR/pdfs/battdg.pdf" "$col" "compliance" "legal_reference" "2009" \ '{"regulation_id":"battdg","regulation_name_de":"Batteriegesetz (BattG)","category":"umwelt","license":"public_law","source":"gesetze-im-internet.de"}' \ "BattG (Batteriegesetz)" upload_file "$WORK_DIR/pdfs/bfsg.pdf" "$col" "compliance" "legal_reference" "2021" \ '{"regulation_id":"bfsg","regulation_name_de":"Barrierefreiheitsstaerkungsgesetz (BFSG)","category":"barrierefreiheit","license":"public_law","source":"gesetze-im-internet.de"}' \ "BFSG (Barrierefreiheit)" upload_file "$WORK_DIR/pdfs/uwg.pdf" "$col" "compliance" "legal_reference" "2004" \ '{"regulation_id":"uwg","regulation_name_de":"Gesetz gegen den unlauteren Wettbewerb (UWG)","category":"wettbewerb","license":"public_law","source":"gesetze-im-internet.de"}' \ "UWG (Unlauterer Wettbewerb)" upload_file "$WORK_DIR/pdfs/bdsg.pdf" "$col" "compliance" "legal_reference" "2018" \ '{"regulation_id":"bdsg","regulation_name_de":"Bundesdatenschutzgesetz (BDSG)","category":"datenschutz","license":"public_law","source":"gesetze-im-internet.de"}' \ "BDSG (Bundesdatenschutzgesetz)" upload_file "$WORK_DIR/pdfs/ddg.pdf" "$col" "compliance" "legal_reference" "2024" \ '{"regulation_id":"ddg","regulation_name_de":"Digitale-Dienste-Gesetz (DDG)","category":"plattformen","license":"public_law","source":"gesetze-im-internet.de"}' \ "DDG (Digitale-Dienste-Gesetz, komplett)" upload_file "$WORK_DIR/pdfs/tkg.pdf" "$col" "compliance" "legal_reference" "2021" \ '{"regulation_id":"tkg","regulation_name_de":"Telekommunikationsgesetz (TKG)","category":"telekommunikation","license":"public_law","source":"gesetze-im-internet.de"}' \ "TKG (Telekommunikationsgesetz)" upload_file "$WORK_DIR/pdfs/hgb.pdf" "$col" "compliance" "legal_reference" "1897" \ '{"regulation_id":"hgb","regulation_name_de":"Handelsgesetzbuch (HGB)","category":"handelsrecht","license":"public_law","source":"gesetze-im-internet.de"}' \ "HGB (Handelsgesetzbuch)" upload_file "$WORK_DIR/pdfs/ao.pdf" "$col" "compliance" "legal_reference" "1977" \ '{"regulation_id":"ao","regulation_name_de":"Abgabenordnung (AO)","category":"steuerrecht","license":"public_law","source":"gesetze-im-internet.de"}' \ "AO (Abgabenordnung)" upload_file "$WORK_DIR/pdfs/gewo.pdf" "$col" "compliance" "legal_reference" "1999" \ '{"regulation_id":"gewo","regulation_name_de":"Gewerbeordnung (GewO)","category":"gewerberecht","license":"public_law","source":"gesetze-im-internet.de"}' \ "GewO (Gewerbeordnung)" upload_file "$WORK_DIR/pdfs/bgb_komplett.pdf" "$col" "compliance" "legal_reference" "2002" \ '{"regulation_id":"bgb_komplett","regulation_name_de":"Buergerliches Gesetzbuch (BGB, komplett)","category":"zivilrecht","license":"public_law","source":"gesetze-im-internet.de"}' \ "BGB (komplett als PDF)" upload_file "$WORK_DIR/pdfs/egbgb.pdf" "$col" "compliance" "legal_reference" "1896" \ '{"regulation_id":"egbgb_komplett","regulation_name_de":"EGBGB (komplett)","category":"zivilrecht","license":"public_law","source":"gesetze-im-internet.de"}' \ "EGBGB (komplett als PDF)" } # ============================================================================= # PHASE H-3: EU-Rechtstexte → bp_compliance_ce # ============================================================================= phase_h_eu() { log "==========================================" log "PHASE H-3: EU-Rechtstexte -> bp_compliance_ce" log "==========================================" local col="bp_compliance_ce" upload_file "$WORK_DIR/pdfs/dsgvo_2016_679.pdf" "$col" "compliance_ce" "legal_reference" "2016" \ '{"regulation_id":"eu_2016_679","regulation_name_de":"Datenschutz-Grundverordnung (DSGVO)","regulation_name_en":"General Data Protection Regulation","regulation_short":"DSGVO","category":"datenschutz","celex":"32016R0679","source":"eur-lex","license":"public_law"}' \ "DSGVO (EU) 2016/679" upload_file "$WORK_DIR/pdfs/cra_2024_2847.pdf" "$col" "compliance_ce" "legal_reference" "2024" \ '{"regulation_id":"eu_2024_2847","regulation_name_de":"Cyber Resilience Act (CRA)","regulation_name_en":"Cyber Resilience Act","regulation_short":"CRA","category":"cybersecurity","celex":"32024R2847","source":"eur-lex","license":"public_law"}' \ "Cyber Resilience Act (EU) 2024/2847" upload_file "$WORK_DIR/pdfs/ai_act_2024_1689.pdf" "$col" "compliance_ce" "legal_reference" "2024" \ '{"regulation_id":"eu_2024_1689","regulation_name_de":"KI-Verordnung (AI Act)","regulation_name_en":"Artificial Intelligence Act","regulation_short":"AI Act","category":"ki_regulierung","celex":"32024R1689","source":"eur-lex","license":"public_law"}' \ "AI Act (EU) 2024/1689" upload_file "$WORK_DIR/pdfs/nis2_2022_2555.pdf" "$col" "compliance_ce" "legal_reference" "2022" \ '{"regulation_id":"eu_2022_2555","regulation_name_de":"NIS2-Richtlinie","regulation_name_en":"NIS2 Directive","regulation_short":"NIS2","category":"cybersecurity","celex":"32022L2555","source":"eur-lex","license":"public_law"}' \ "NIS2 Directive (EU) 2022/2555" upload_file "$WORK_DIR/pdfs/dma_2022_1925.pdf" "$col" "compliance_ce" "legal_reference" "2022" \ '{"regulation_id":"eu_2022_1925","regulation_name_de":"Digital Markets Act (DMA)","regulation_name_en":"Digital Markets Act","regulation_short":"DMA","category":"plattformregulierung","celex":"32022R1925","source":"eur-lex","license":"public_law"}' \ "Digital Markets Act (EU) 2022/1925" upload_file "$WORK_DIR/pdfs/consumer_rights_2011_83.pdf" "$col" "compliance_ce" "legal_reference" "2011" \ '{"regulation_id":"eu_2011_83","regulation_name_de":"Verbraucherrechte-Richtlinie","regulation_name_en":"Consumer Rights Directive","regulation_short":"CRD","category":"verbraucherschutz","celex":"32011L0083","source":"eur-lex","license":"public_law"}' \ "Consumer Rights Directive 2011/83/EU" upload_file "$WORK_DIR/pdfs/digital_content_2019_770.pdf" "$col" "compliance_ce" "legal_reference" "2019" \ '{"regulation_id":"eu_2019_770","regulation_name_de":"Digitale-Inhalte-Richtlinie","regulation_name_en":"Digital Content Directive","regulation_short":"DCD","category":"verbraucherschutz","celex":"32019L0770","source":"eur-lex","license":"public_law"}' \ "Digital Content Directive 2019/770" upload_file "$WORK_DIR/pdfs/sale_of_goods_2019_771.pdf" "$col" "compliance_ce" "legal_reference" "2019" \ '{"regulation_id":"eu_2019_771","regulation_name_de":"Warenkauf-Richtlinie","regulation_name_en":"Sale of Goods Directive","regulation_short":"SGD","category":"verbraucherschutz","celex":"32019L0771","source":"eur-lex","license":"public_law"}' \ "Sale of Goods Directive 2019/771" upload_file "$WORK_DIR/pdfs/ecommerce_2000_31.pdf" "$col" "compliance_ce" "legal_reference" "2000" \ '{"regulation_id":"eu_2000_31","regulation_name_de":"E-Commerce-Richtlinie","regulation_name_en":"E-Commerce Directive","regulation_short":"ECD","category":"plattformregulierung","celex":"32000L0031","source":"eur-lex","license":"public_law"}' \ "E-Commerce Directive 2000/31/EC" upload_file "$WORK_DIR/pdfs/unfair_terms_93_13.pdf" "$col" "compliance_ce" "legal_reference" "1993" \ '{"regulation_id":"eu_1993_13","regulation_name_de":"Klausel-Richtlinie","regulation_name_en":"Unfair Contract Terms Directive","regulation_short":"UCTD","category":"verbraucherschutz","celex":"31993L0013","source":"eur-lex","license":"public_law"}' \ "Unfair Contract Terms Directive 93/13/EEC" upload_file "$WORK_DIR/pdfs/unfair_practices_2005_29.pdf" "$col" "compliance_ce" "legal_reference" "2005" \ '{"regulation_id":"eu_2005_29","regulation_name_de":"UGP-Richtlinie","regulation_name_en":"Unfair Commercial Practices Directive","regulation_short":"UCPD","category":"verbraucherschutz","celex":"32005L0029","source":"eur-lex","license":"public_law"}' \ "Unfair Commercial Practices Directive 2005/29/EC" upload_file "$WORK_DIR/pdfs/price_indication_98_6.pdf" "$col" "compliance_ce" "legal_reference" "1998" \ '{"regulation_id":"eu_1998_6","regulation_name_de":"Preisangaben-Richtlinie","regulation_name_en":"Price Indication Directive","regulation_short":"PID","category":"verbraucherschutz","celex":"31998L0006","source":"eur-lex","license":"public_law"}' \ "Price Indication Directive 98/6/EC" upload_file "$WORK_DIR/pdfs/omnibus_2019_2161.pdf" "$col" "compliance_ce" "legal_reference" "2019" \ '{"regulation_id":"eu_2019_2161","regulation_name_de":"Omnibus-Richtlinie","regulation_name_en":"Omnibus Directive","regulation_short":"Omnibus","category":"verbraucherschutz","celex":"32019L2161","source":"eur-lex","license":"public_law"}' \ "Omnibus Directive 2019/2161" upload_file "$WORK_DIR/pdfs/gpsr_2023_988.pdf" "$col" "compliance_ce" "legal_reference" "2023" \ '{"regulation_id":"eu_2023_988","regulation_name_de":"Allgemeine Produktsicherheitsverordnung (GPSR)","regulation_name_en":"General Product Safety Regulation","regulation_short":"GPSR","category":"produktsicherheit","celex":"32023R0988","source":"eur-lex","license":"public_law"}' \ "GPSR (EU) 2023/988" upload_file "$WORK_DIR/pdfs/product_liability_85_374.pdf" "$col" "compliance_ce" "legal_reference" "1985" \ '{"regulation_id":"eu_1985_374","regulation_name_de":"Produkthaftungsrichtlinie","regulation_name_en":"Product Liability Directive","regulation_short":"PLD","category":"produkthaftung","celex":"31985L0374","source":"eur-lex","license":"public_law"}' \ "Product Liability Directive 85/374/EEC" upload_file "$WORK_DIR/pdfs/battery_2023_1542.pdf" "$col" "compliance_ce" "legal_reference" "2023" \ '{"regulation_id":"eu_2023_1542","regulation_name_de":"Batterieverordnung","regulation_name_en":"Battery Regulation","regulation_short":"BattVO","category":"umwelt","celex":"32023R1542","source":"eur-lex","license":"public_law"}' \ "Batterieverordnung (EU) 2023/1542" } # ============================================================================= # PHASE H-4: Datenschutz-Frameworks → bp_compliance_datenschutz # ============================================================================= phase_h_datenschutz() { log "==========================================" log "PHASE H-4: Frameworks -> bp_compliance_datenschutz" log "==========================================" local col="bp_compliance_datenschutz" upload_file "$WORK_DIR/pdfs/nist_csf_2_0.pdf" "$col" "compliance" "framework" "2024" \ '{"regulation_id":"nist_csf_2_0","regulation_name_de":"NIST Cybersecurity Framework 2.0","regulation_name_en":"NIST Cybersecurity Framework 2.0","regulation_short":"NIST CSF 2.0","category":"security","license":"public_domain_us","source":"nist.gov"}' \ "NIST Cybersecurity Framework 2.0" upload_file "$WORK_DIR/pdfs/nist_privacy_framework.pdf" "$col" "compliance" "framework" "2020" \ '{"regulation_id":"nist_privacy_1_0","regulation_name_de":"NIST Privacy Framework 1.0","regulation_name_en":"NIST Privacy Framework 1.0","regulation_short":"NIST PF 1.0","category":"datenschutz","license":"public_domain_us","source":"nist.gov"}' \ "NIST Privacy Framework 1.0" # HLEG may need special handling - the op.europa.eu link may redirect if [[ -f "$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf" ]]; then upload_file "$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf" "$col" "compliance" "framework" "2019" \ '{"regulation_id":"hleg_trustworthy_ai","regulation_name_de":"HLEG Ethik-Leitlinien Vertrauenswuerdige KI","regulation_name_en":"HLEG Ethics Guidelines for Trustworthy AI","regulation_short":"HLEG AI","category":"ki_ethik","license":"cc_by_4","source":"ec.europa.eu"}' \ "HLEG Ethics Guidelines Trustworthy AI" else warn "HLEG PDF not available (download may have failed)" fi } # ============================================================================= # MAIN # ============================================================================= main() { log "==========================================" log "PHASE H: RAG Ingestion — ~37 neue Dokumente" log "==========================================" log "Work dir: $WORK_DIR" log "RAG URL: $RAG_URL" phase_h_download phase_h_gesetze phase_h_eu phase_h_datenschutz log "==========================================" log "PHASE H ABGESCHLOSSEN" log " Hochgeladen: $UPLOADED" log " Uebersprungen: $SKIPPED" log " Fehlgeschlagen: $FAILED" log "==========================================" } main "$@"