Script downloads original regulation PDFs from EUR-Lex into ~/rag-originals/ for use with the RAG QA Split-View Chunk-Browser. Lists missing national law PDFs that require manual download. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
105 lines
5.2 KiB
Bash
Executable File
105 lines
5.2 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# populate-rag-originals.sh
|
|
#
|
|
# Laedt Original-PDFs der Regulierungen in ~/rag-originals/
|
|
# Dient als Referenz fuer die QA-Split-View im Chunk-Browser.
|
|
#
|
|
# Ausfuehrung auf dem Mac Mini:
|
|
# chmod +x scripts/populate-rag-originals.sh
|
|
# ./scripts/populate-rag-originals.sh
|
|
#
|
|
|
|
RAG_DIR="$HOME/rag-originals"
|
|
mkdir -p "$RAG_DIR"
|
|
|
|
UA='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
|
|
|
|
download_if_missing() {
|
|
local filename="$1"
|
|
local url="$2"
|
|
if [ -f "$RAG_DIR/$filename" ]; then
|
|
echo " [skip] $filename (exists)"
|
|
return
|
|
fi
|
|
echo " [download] $filename ..."
|
|
curl -L -A "$UA" -o "$RAG_DIR/$filename" "$url" 2>/dev/null
|
|
if [ $? -ne 0 ] || [ ! -s "$RAG_DIR/$filename" ]; then
|
|
echo " [WARN] Download fehlgeschlagen: $filename"
|
|
rm -f "$RAG_DIR/$filename"
|
|
fi
|
|
}
|
|
|
|
echo "=== EU Verordnungen ==="
|
|
download_if_missing "GDPR_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32016R0679"
|
|
download_if_missing "AIACT_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32024R1689"
|
|
download_if_missing "CRA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32024R2847"
|
|
download_if_missing "NIS2_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022L2555"
|
|
download_if_missing "DSA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R2065"
|
|
download_if_missing "DMA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R1925"
|
|
download_if_missing "DGA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R0868"
|
|
download_if_missing "DATAACT_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R2854"
|
|
download_if_missing "DORA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32022R2554"
|
|
download_if_missing "PSD2_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32015L2366"
|
|
download_if_missing "AMLR_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32024R1624"
|
|
download_if_missing "MiCA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1114"
|
|
download_if_missing "EHDS_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32025R0327"
|
|
download_if_missing "EUCSA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019R0881"
|
|
download_if_missing "DPF_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023D1795"
|
|
download_if_missing "GPSR_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R0988"
|
|
download_if_missing "EPRIVACY_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32002L0058"
|
|
download_if_missing "SCC_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32021D0914"
|
|
download_if_missing "SCC_FULL_TEXT_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32021D0914"
|
|
download_if_missing "PLD_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:31985L0374"
|
|
download_if_missing "E_COMMERCE_RL_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32000L0031"
|
|
download_if_missing "VERBRAUCHERRECHTE_RL_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32011L0083"
|
|
download_if_missing "DIGITALE_INHALTE_RL_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0770"
|
|
download_if_missing "EAA_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0882"
|
|
download_if_missing "DSM_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0790"
|
|
download_if_missing "EU_IFRS_DE.pdf" "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1803"
|
|
download_if_missing "EU_IFRS_EN.pdf" "https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:32023R1803"
|
|
|
|
echo ""
|
|
echo "=== DE Gesetze (gesetze-im-internet.de) ==="
|
|
echo " [info] DE Gesetze muessen manuell als PDF aus gesetze-im-internet.de exportiert werden."
|
|
echo " [info] Benoetigte Dateien:"
|
|
for f in TDDDG_DE BDSG_FULL_DE DE_DDG DE_BGB_AGB DE_EGBGB DE_HGB_RET DE_AO_RET DE_UWG DE_TKG DE_PANGV DE_DLINFOV DE_BETRVG DE_GESCHGEHG DE_BSIG DE_USTG_RET; do
|
|
if [ ! -f "$RAG_DIR/$f.pdf" ]; then
|
|
echo " FEHLEND: $f.pdf"
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "=== AT Gesetze (ris.bka.gv.at) ==="
|
|
echo " [info] AT Gesetze muessen manuell aus RIS exportiert werden."
|
|
for f in AT_DSG AT_DSG_FULL AT_ECG AT_TKG AT_KSCHG AT_FAGG AT_UGB_RET AT_BAO_RET AT_MEDIENG AT_ABGB_AGB AT_UWG; do
|
|
if [ ! -f "$RAG_DIR/$f.pdf" ]; then
|
|
echo " FEHLEND: $f.pdf"
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "=== CH Gesetze (fedlex.data.admin.ch) ==="
|
|
echo " [info] CH Gesetze muessen manuell aus Fedlex exportiert werden."
|
|
for f in CH_DSG CH_DSV CH_OR_AGB CH_UWG CH_FMG CH_GEBUV CH_ZERTES CH_ZGB_PERS; do
|
|
if [ ! -f "$RAG_DIR/$f.pdf" ]; then
|
|
echo " FEHLEND: $f.pdf"
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "=== BSI Standards ==="
|
|
echo " [info] BSI TRs muessen manuell von bsi.bund.de heruntergeladen werden."
|
|
for f in "BSI-TR-03161-1" "BSI-TR-03161-2" "BSI-TR-03161-3"; do
|
|
if [ ! -f "$RAG_DIR/$f.pdf" ]; then
|
|
echo " FEHLEND: $f.pdf"
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "=== Zusammenfassung ==="
|
|
TOTAL=$(ls -1 "$RAG_DIR"/*.pdf 2>/dev/null | wc -l | tr -d ' ')
|
|
echo "$TOTAL PDFs in $RAG_DIR"
|
|
echo ""
|
|
echo "Fertig. Fehlende PDFs bitte manuell herunterladen."
|