Files
breakpilot-compliance/scripts/populate-rag-originals.sh
Benjamin Admin f7c5effb9f fix: correct EDPB/ENISA/EDPS PDF download URLs
EDPB migrated from /sites/default/files/ to /system/files/YYYY-MM/.
Updated all URLs to current working paths.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 11:17:12 +01:00

127 lines
7.2 KiB
Bash
Executable File

#!/bin/bash
#
# populate-rag-originals.sh
#
# Laedt Original-PDFs der Regulierungen in ~/rag-originals/
# Dient als Referenz fuer die QA-Split-View im Chunk-Browser.
#
# Ausfuehrung auf dem Mac Mini:
# chmod +x scripts/populate-rag-originals.sh
# ./scripts/populate-rag-originals.sh
#
RAG_DIR="$HOME/rag-originals"
mkdir -p "$RAG_DIR"
UA='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
download_if_missing() {
local filename="$1"
local url="$2"
if [ -f "$RAG_DIR/$filename" ]; then
echo " [skip] $filename (exists)"
return
fi
echo " [download] $filename ..."
curl -L -A "$UA" -o "$RAG_DIR/$filename" "$url" 2>/dev/null
if [ $? -ne 0 ] || [ ! -s "$RAG_DIR/$filename" ]; then
echo " [WARN] Download fehlgeschlagen: $filename"
rm -f "$RAG_DIR/$filename"
fi
}
echo "=== EU Verordnungen ==="
download_if_missing "GDPR_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32016R0679"
download_if_missing "AIACT_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32024R1689"
download_if_missing "CRA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32024R2847"
download_if_missing "NIS2_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022L2555"
download_if_missing "DSA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R2065"
download_if_missing "DMA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R1925"
download_if_missing "DGA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R0868"
download_if_missing "DATAACT_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R2854"
download_if_missing "DORA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R2554"
download_if_missing "PSD2_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32015L2366"
download_if_missing "AMLR_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32024R1624"
download_if_missing "MiCA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1114"
download_if_missing "EHDS_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32025R0327"
download_if_missing "EUCSA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019R0881"
download_if_missing "DPF_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023D1795"
download_if_missing "GPSR_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R0988"
download_if_missing "EPRIVACY_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32002L0058"
download_if_missing "SCC_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32021D0914"
download_if_missing "SCC_FULL_TEXT_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32021D0914"
download_if_missing "PLD_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:31985L0374"
download_if_missing "E_COMMERCE_RL_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32000L0031"
download_if_missing "VERBRAUCHERRECHTE_RL_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32011L0083"
download_if_missing "DIGITALE_INHALTE_RL_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0770"
download_if_missing "EAA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0882"
download_if_missing "DSM_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0790"
download_if_missing "EU_IFRS_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1803"
download_if_missing "EU_IFRS_EN.pdf" "https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:32023R1803"
echo ""
echo "=== EDPB Guidelines ==="
# EDPB switched from /sites/default/files/ to /system/files/YYYY-MM/ around 2021
download_if_missing "EDPB_GUIDELINES_1_2020.pdf" "https://www.edpb.europa.eu/system/files/2021-03/edpb_guidelines_202001_connected_vehicles_v2.0_adopted_en.pdf"
download_if_missing "EDPB_GUIDELINES_1_2022.pdf" "https://www.edpb.europa.eu/system/files/2023-04/edpb_guidelines_202201_data_subject_rights_access_v2_en.pdf"
download_if_missing "EDPB_GUIDELINES_2_2023.pdf" "https://www.edpb.europa.eu/system/files/2024-10/edpb_guidelines_202302_technical_scope_art_53_eprivacydirective_v2_en_0.pdf"
download_if_missing "EDPB_GUIDELINES_2_2024.pdf" "https://www.edpb.europa.eu/system/files/2025-06/edpb_guidelines_202402_article48_v2_en.pdf"
download_if_missing "EDPB_GUIDELINES_4_2019.pdf" "https://www.edpb.europa.eu/sites/default/files/files/file1/edpb_guidelines_201904_dataprotection_by_design_and_by_default_v2.0_en.pdf"
download_if_missing "EDPB_GUIDELINES_9_2022.pdf" "https://www.edpb.europa.eu/system/files/2023-04/edpb_guidelines_202209_personal_data_breach_notification_v2.0_en.pdf"
download_if_missing "EDPB_DPIA_LIST.pdf" "https://ec.europa.eu/newsroom/document.cfm?doc_id=47711"
download_if_missing "EDPB_LEGITIMATE_INTEREST.pdf" "https://www.edpb.europa.eu/system/files/2024-10/edpb_guidelines_202401_legitimateinterest_en.pdf"
echo ""
echo "=== EDPS ==="
download_if_missing "EDPS_DPIA_LIST.pdf" "https://www.edps.europa.eu/sites/default/files/publication/19-07-16_edps_dpia_list_en.pdf"
echo ""
echo "=== ENISA (zusaetzlich) ==="
download_if_missing "ENISA_THREAT_LANDSCAPE.pdf" "https://www.enisa.europa.eu/sites/default/files/2024-11/ENISA%20Threat%20Landscape%202024_0.pdf"
download_if_missing "ENISA_ICS_SCADA.pdf" "https://www.enisa.europa.eu/sites/default/files/publications/WP2016%203-1%202%20ICS%20SCADA%20Dependencies.pdf"
download_if_missing "ENISA_CYBERSECURITY_2024.pdf" "https://www.enisa.europa.eu/sites/default/files/2024-11/CSPA%20-%20NIS%20Investments%20-%202024_0.pdf"
echo ""
echo "=== DE Gesetze (gesetze-im-internet.de) ==="
echo " [info] DE Gesetze muessen manuell als PDF aus gesetze-im-internet.de exportiert werden."
echo " [info] Benoetigte Dateien:"
for f in TDDDG_DE BDSG_FULL_DE DE_DDG DE_BGB_AGB DE_EGBGB DE_HGB_RET DE_AO_RET DE_UWG DE_TKG DE_PANGV DE_DLINFOV DE_BETRVG DE_GESCHGEHG DE_BSIG DE_USTG_RET TMG_KOMPLETT DE_URHG; do
if [ ! -f "$RAG_DIR/$f.pdf" ]; then
echo " FEHLEND: $f.pdf"
fi
done
echo ""
echo "=== AT Gesetze (ris.bka.gv.at) ==="
echo " [info] AT Gesetze muessen manuell aus RIS exportiert werden."
for f in AT_DSG AT_DSG_FULL AT_ECG AT_TKG AT_KSCHG AT_FAGG AT_UGB_RET AT_BAO_RET AT_MEDIENG AT_ABGB_AGB AT_UWG; do
if [ ! -f "$RAG_DIR/$f.pdf" ]; then
echo " FEHLEND: $f.pdf"
fi
done
echo ""
echo "=== CH Gesetze (fedlex.data.admin.ch) ==="
echo " [info] CH Gesetze muessen manuell aus Fedlex exportiert werden."
for f in CH_DSG CH_DSV CH_OR_AGB CH_UWG CH_FMG CH_GEBUV CH_ZERTES CH_ZGB_PERS; do
if [ ! -f "$RAG_DIR/$f.pdf" ]; then
echo " FEHLEND: $f.pdf"
fi
done
echo ""
echo "=== BSI Standards ==="
echo " [info] BSI TRs muessen manuell von bsi.bund.de heruntergeladen werden."
for f in "BSI-TR-03161-1" "BSI-TR-03161-2" "BSI-TR-03161-3"; do
if [ ! -f "$RAG_DIR/$f.pdf" ]; then
echo " FEHLEND: $f.pdf"
fi
done
echo ""
echo "=== Zusammenfassung ==="
TOTAL=$(ls -1 "$RAG_DIR"/*.pdf 2>/dev/null | wc -l | tr -d ' ')
echo "$TOTAL PDFs in $RAG_DIR"
echo ""
echo "Fertig. Fehlende PDFs bitte manuell herunterladen."