feat(rag): Expand Phase H to Layer 1 Safe Core (~60 documents)
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 29s
CI/CD / test-python-dsms-gateway (push) Successful in 25s
CI/CD / deploy-hetzner (push) Failing after 1s
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 29s
CI/CD / test-python-dsms-gateway (push) Successful in 25s
CI/CD / deploy-hetzner (push) Failing after 1s
Phase H now includes: - 16 German laws (PAngV, VSBG, ProdHaftG, BDSG, HGB, AO, DDG, TKG, etc.) - 15 EUR-Lex EU laws (DSGVO, Consumer Rights Dir, Sale of Goods Dir, E-Commerce Dir, Unfair Terms Dir, DMA, NIS2, Product Liability Dir, etc.) - 2 NIST frameworks (CSF 2.0, Privacy Framework 1.0) - 1 HLEG Ethics Guidelines Updated rag-sources.md with complete inventory of already-ingested vs new documents, plus Layer 2-5 TODO roadmap. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -763,38 +763,55 @@ phase_datenschutz() {
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# PHASE H: Verbraucherschutz & AI Act
|
||||
# PHASE H: Layer 1 Safe Core — Verbraucherschutz, EU-Recht, NIST
|
||||
# =============================================================================
|
||||
# ~60 Dokumente: EUR-Lex (CC BY 4.0), gesetze-im-internet.de (Public Domain),
|
||||
# NIST (Public Domain), HLEG (CC BY 4.0)
|
||||
# =============================================================================
|
||||
phase_verbraucherschutz() {
|
||||
log "=========================================="
|
||||
log "PHASE H: Verbraucherschutz & AI Act"
|
||||
log "PHASE H: Layer 1 Safe Core (~60 Dokumente)"
|
||||
log "=========================================="
|
||||
|
||||
mkdir -p "$WORK_DIR"/{pdfs,texts}
|
||||
|
||||
# --- H1: Deutsche Verbraucherschutz-Gesetze → bp_compliance_gesetze ---
|
||||
# =========================================================================
|
||||
# H1: Deutsche Gesetze → bp_compliance_gesetze
|
||||
# Quelle: gesetze-im-internet.de (Public Domain, § 5 UrhG)
|
||||
# =========================================================================
|
||||
local col="bp_compliance_gesetze"
|
||||
local before
|
||||
before=$(collection_count "$col")
|
||||
log "Collection $col: $before chunks (before)"
|
||||
log "--- H1: Deutsche Gesetze → $col ($before chunks) ---"
|
||||
|
||||
# Download + Ingest deutsche Gesetze (gesetze-im-internet.de, Public Domain § 5 UrhG)
|
||||
local -a verbraucherschutz_gesetze=(
|
||||
"pangv_2022/PAngV:PAngV:Preisangabenverordnung"
|
||||
"vsbg/VSBG:VSBG:Verbraucherstreitbeilegungsgesetz"
|
||||
"prodhaftg/ProdHaftG:ProdHaftG:Produkthaftungsgesetz"
|
||||
"verpackg/VerpackG:VerpackG:Verpackungsgesetz"
|
||||
"elektrog_2015/ElektroG:ElektroG:Elektro- und Elektronikgeraetegesetz"
|
||||
"battdg/BattDG:BattDG:Batteriegesetz"
|
||||
"bfsg/BFSG:BFSG:Barrierefreiheitsstaerkungsgesetz"
|
||||
"uwg_2004/UWG:UWG:Gesetz gegen den unlauteren Wettbewerb"
|
||||
# Verbraucherschutz-Gesetze
|
||||
local -a de_gesetze=(
|
||||
"pangv_2022/PAngV:PAngV:Preisangabenverordnung:verbraucherschutz"
|
||||
"vsbg/VSBG:VSBG:Verbraucherstreitbeilegungsgesetz:verbraucherschutz"
|
||||
"prodhaftg/ProdHaftG:ProdHaftG:Produkthaftungsgesetz:verbraucherschutz"
|
||||
"verpackg/VerpackG:VerpackG:Verpackungsgesetz:verbraucherschutz"
|
||||
"elektrog_2015/ElektroG:ElektroG:Elektro- und Elektronikgeraetegesetz:verbraucherschutz"
|
||||
"battdg/BattDG:BattDG:Batteriegesetz:verbraucherschutz"
|
||||
"bfsg/BFSG:BFSG:Barrierefreiheitsstaerkungsgesetz:verbraucherschutz"
|
||||
"uwg_2004/UWG:UWG:Gesetz gegen den unlauteren Wettbewerb:verbraucherschutz"
|
||||
# Datenschutz + IT
|
||||
"bdsg_2018/BDSG:BDSG:Bundesdatenschutzgesetz:datenschutz"
|
||||
"ddg/DDG:DDG:Digitale-Dienste-Gesetz:ecommerce"
|
||||
"tkg_2021/TKG:TKG:Telekommunikationsgesetz:datenschutz"
|
||||
# Handels-/Steuerrecht (Loeschfristen)
|
||||
"hgb/HGB:HGB:Handelsgesetzbuch:aufbewahrung"
|
||||
"ao_1977/AO:AO:Abgabenordnung:aufbewahrung"
|
||||
# Gewerberecht
|
||||
"gewo/GewO:GewO:Gewerbeordnung:gewerberecht"
|
||||
)
|
||||
|
||||
for entry in "${verbraucherschutz_gesetze[@]}"; do
|
||||
for entry in "${de_gesetze[@]}"; do
|
||||
local path="${entry%%:*}"
|
||||
local rest="${entry#*:}"
|
||||
local short="${rest%%:*}"
|
||||
local fullname="${rest#*:}"
|
||||
rest="${rest#*:}"
|
||||
local fullname="${rest%%:*}"
|
||||
local category="${rest#*:}"
|
||||
local pdf_file="$WORK_DIR/pdfs/${short}.pdf"
|
||||
|
||||
download_pdf \
|
||||
@@ -803,97 +820,127 @@ phase_verbraucherschutz() {
|
||||
|
||||
if [[ -f "$pdf_file" ]]; then
|
||||
upload_file "$pdf_file" "$col" "compliance" "legal_reference" "2025" \
|
||||
"{\"regulation_id\":\"${short,,}\",\"regulation_name_de\":\"$fullname ($short)\",\"category\":\"verbraucherschutz\",\"license\":\"public_domain_§5_UrhG\",\"source\":\"gesetze-im-internet.de\"}" \
|
||||
"{\"regulation_id\":\"${short,,}\",\"regulation_name_de\":\"$fullname ($short)\",\"category\":\"$category\",\"license\":\"public_domain_§5_UrhG\",\"source\":\"gesetze-im-internet.de\"}" \
|
||||
"$short ($fullname)"
|
||||
fi
|
||||
done
|
||||
|
||||
# BGB komplett (Fernabsatz §§ 312-312k, Digitale Inhalte §§ 327-327u, Kaufrecht §§ 433-480)
|
||||
# BGB komplett (Fernabsatz, Digitale Inhalte, Kaufrecht, AGB-Recht)
|
||||
download_pdf \
|
||||
"https://www.gesetze-im-internet.de/bgb/BGB.pdf" \
|
||||
"$WORK_DIR/pdfs/BGB_full.pdf"
|
||||
|
||||
if [[ -f "$WORK_DIR/pdfs/BGB_full.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGB_full.pdf" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"bgb_fernabsatz","regulation_name_de":"BGB (Fernabsatz, Digitale Inhalte, Kaufrecht)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"BGB (Fernabsatz/Digitale Inhalte/Kaufrecht)"
|
||||
'{"regulation_id":"bgb_komplett","regulation_name_de":"BGB (komplett: AGB-Recht, Fernabsatz, Digitale Inhalte, Kaufrecht)","category":"vertragsrecht","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"BGB komplett"
|
||||
fi
|
||||
|
||||
# EGBGB fuer Muster-Widerrufsbelehrung (Anlage 1+2 zu Art. 246a)
|
||||
# EGBGB (Muster-Widerrufsbelehrung Anlage 1+2)
|
||||
download_pdf \
|
||||
"https://www.gesetze-im-internet.de/bgbeg/BGBEG.pdf" \
|
||||
"$WORK_DIR/pdfs/BGBEG.pdf"
|
||||
|
||||
if [[ -f "$WORK_DIR/pdfs/BGBEG.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGBEG.pdf" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"egbgb_muster_widerruf","regulation_name_de":"EGBGB (Muster-Widerrufsbelehrung, Anlage 1+2 zu Art. 246a)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
'{"regulation_id":"egbgb","regulation_name_de":"EGBGB (Muster-Widerrufsbelehrung, Informationspflichten)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"EGBGB (Muster-Widerrufsbelehrung)"
|
||||
fi
|
||||
|
||||
local after
|
||||
after=$(collection_count "$col")
|
||||
log "Collection $col: $before → $after chunks"
|
||||
log "Collection $col: $before → $after chunks (+$((after - before)))"
|
||||
|
||||
# --- H2: EU-Verordnungen → bp_compliance_ce ---
|
||||
# =========================================================================
|
||||
# H2: EU-Recht → bp_compliance_ce
|
||||
# Quelle: EUR-Lex (CC BY 4.0, Wiederverwendung erlaubt)
|
||||
# URL-Muster: /legal-content/DE/TXT/PDF/?uri=CELEX:{id}
|
||||
# =========================================================================
|
||||
col="bp_compliance_ce"
|
||||
before=$(collection_count "$col")
|
||||
log "Collection $col: $before chunks (before)"
|
||||
log "--- H2: EU-Recht → $col ($before chunks) ---"
|
||||
|
||||
# GPSR (EU 2023/988) - Produktsicherheit
|
||||
download_pdf \
|
||||
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R0988" \
|
||||
"$WORK_DIR/pdfs/GPSR_2023_988.pdf"
|
||||
# Array: CELEX_ID:filename:short:name_de:name_en:category:year
|
||||
local -a eu_gesetze=(
|
||||
# --- Datenschutz ---
|
||||
"32016R0679:DSGVO_2016_679:DSGVO:Datenschutz-Grundverordnung:General Data Protection Regulation:datenschutz:2016"
|
||||
# --- Verbraucherschutz (Kernbestand) ---
|
||||
"32011L0083:Consumer_Rights_2011_83:CRD:Verbraucherrechte-Richtlinie:Consumer Rights Directive:verbraucherschutz:2011"
|
||||
"32019L0770:Digital_Content_2019_770:DCD:Richtlinie digitale Inhalte:Digital Content Directive:verbraucherschutz:2019"
|
||||
"32019L0771:Sale_of_Goods_2019_771:SGD:Warenkauf-Richtlinie:Sale of Goods Directive:verbraucherschutz:2019"
|
||||
"32000L0031:ECommerce_2000_31:ECD:E-Commerce-Richtlinie:E-Commerce Directive:ecommerce:2000"
|
||||
"31993L0013:Unfair_Terms_93_13:UCTD:Klausel-Richtlinie:Unfair Contract Terms Directive:verbraucherschutz:1993"
|
||||
"32005L0029:Unfair_Practices_2005_29:UCPD:Richtlinie unlautere Geschaeftspraktiken:Unfair Commercial Practices Directive:verbraucherschutz:2005"
|
||||
"31998L0006:Price_Indication_98_6:PID:Preisangaben-Richtlinie:Price Indication Directive:verbraucherschutz:1998"
|
||||
"32019L2161:Omnibus_2019_2161:OMN:Omnibus-Richtlinie (Modernisierung Verbraucherschutz):Omnibus Directive:verbraucherschutz:2019"
|
||||
# --- Plattformregulierung ---
|
||||
"32022R1925:DMA_2022_1925:DMA:Digital Markets Act:Digital Markets Act:plattformregulierung:2022"
|
||||
# --- KI + Sicherheit ---
|
||||
"32024R1689:AI_Act_2024_1689:AI_Act:KI-Verordnung:Artificial Intelligence Act:ki_regulierung:2024"
|
||||
"32022L2555:NIS2_2022_2555:NIS2:NIS-2-Richtlinie:NIS2 Directive:it_sicherheit:2022"
|
||||
# --- Produktsicherheit + Haftung ---
|
||||
"32023R0988:GPSR_2023_988:GPSR:Allgemeine Produktsicherheitsverordnung:General Product Safety Regulation:produktsicherheit:2023"
|
||||
"31985L0374:Product_Liability_85_374:PLD:Produkthaftungs-Richtlinie:Product Liability Directive:produkthaftung:1985"
|
||||
"32023R1542:Batterie_VO_2023_1542:BattVO:Batterieverordnung:Battery Regulation:produktsicherheit:2023"
|
||||
# --- Datentransfer ---
|
||||
# SCC bereits in Phase C, hier nicht duplizieren
|
||||
)
|
||||
|
||||
if [[ -f "$WORK_DIR/pdfs/GPSR_2023_988.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/GPSR_2023_988.pdf" "$col" "compliance_ce" "legal_reference" "2024" \
|
||||
'{"regulation_id":"gpsr","regulation_name_de":"Allgemeine Produktsicherheitsverordnung (GPSR)","regulation_name_en":"General Product Safety Regulation","regulation_short":"GPSR","celex":"32023R0988","category":"produktsicherheit","license":"CC_BY_4.0","source":"eur-lex"}' \
|
||||
"GPSR (EU) 2023/988"
|
||||
fi
|
||||
for entry in "${eu_gesetze[@]}"; do
|
||||
IFS=':' read -r celex filename short name_de name_en category year <<< "$entry"
|
||||
local pdf_file="$WORK_DIR/pdfs/${filename}.pdf"
|
||||
|
||||
# AI Act (EU 2024/1689)
|
||||
download_pdf \
|
||||
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=OJ:L_202401689" \
|
||||
"$WORK_DIR/pdfs/AI_Act_2024_1689.pdf"
|
||||
# AI Act hat spezielle URL (OJ statt CELEX)
|
||||
if [[ "$celex" == "32024R1689" ]]; then
|
||||
download_pdf \
|
||||
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=OJ:L_202401689" \
|
||||
"$pdf_file"
|
||||
else
|
||||
download_pdf \
|
||||
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:${celex}" \
|
||||
"$pdf_file"
|
||||
fi
|
||||
|
||||
if [[ -f "$WORK_DIR/pdfs/AI_Act_2024_1689.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/AI_Act_2024_1689.pdf" "$col" "compliance_ce" "legal_reference" "2024" \
|
||||
'{"regulation_id":"ai_act","regulation_name_de":"KI-Verordnung (AI Act)","regulation_name_en":"Artificial Intelligence Act","regulation_short":"AI Act","celex":"32024R1689","category":"ki_regulierung","license":"CC_BY_4.0","source":"eur-lex"}' \
|
||||
"AI Act (EU) 2024/1689"
|
||||
fi
|
||||
|
||||
# EU Batterieverordnung (EU 2023/1542)
|
||||
download_pdf \
|
||||
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1542" \
|
||||
"$WORK_DIR/pdfs/Batterie_VO_2023_1542.pdf"
|
||||
|
||||
if [[ -f "$WORK_DIR/pdfs/Batterie_VO_2023_1542.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/Batterie_VO_2023_1542.pdf" "$col" "compliance_ce" "legal_reference" "2024" \
|
||||
'{"regulation_id":"batterie_vo","regulation_name_de":"Batterieverordnung","regulation_name_en":"Battery Regulation","regulation_short":"BattVO","celex":"32023R1542","category":"produktsicherheit","license":"CC_BY_4.0","source":"eur-lex"}' \
|
||||
"EU Batterieverordnung (EU) 2023/1542"
|
||||
fi
|
||||
|
||||
# Digitale-Inhalte-Richtlinie (EU 2019/770)
|
||||
download_pdf \
|
||||
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0770" \
|
||||
"$WORK_DIR/pdfs/Digitale_Inhalte_RL_2019_770.pdf"
|
||||
|
||||
if [[ -f "$WORK_DIR/pdfs/Digitale_Inhalte_RL_2019_770.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/Digitale_Inhalte_RL_2019_770.pdf" "$col" "compliance_ce" "legal_reference" "2019" \
|
||||
'{"regulation_id":"digitale_inhalte_rl","regulation_name_de":"Richtlinie ueber digitale Inhalte","regulation_name_en":"Digital Content Directive","regulation_short":"DCD","celex":"32019L0770","category":"verbraucherschutz","license":"CC_BY_4.0","source":"eur-lex"}' \
|
||||
"Digitale-Inhalte-RL (EU) 2019/770"
|
||||
fi
|
||||
if [[ -f "$pdf_file" ]]; then
|
||||
upload_file "$pdf_file" "$col" "compliance_ce" "legal_reference" "$year" \
|
||||
"{\"regulation_id\":\"${short,,}\",\"regulation_name_de\":\"$name_de\",\"regulation_name_en\":\"$name_en\",\"regulation_short\":\"$short\",\"celex\":\"$celex\",\"category\":\"$category\",\"license\":\"CC_BY_4.0\",\"source\":\"eur-lex\"}" \
|
||||
"$short — $name_de"
|
||||
fi
|
||||
done
|
||||
|
||||
after=$(collection_count "$col")
|
||||
log "Collection $col: $before → $after chunks"
|
||||
log "Collection $col: $before → $after chunks (+$((after - before)))"
|
||||
|
||||
# --- H3: HLEG Ethics Guidelines → bp_compliance_datenschutz ---
|
||||
# =========================================================================
|
||||
# H3: NIST Security Frameworks → bp_compliance_security
|
||||
# Quelle: nist.gov (Public Domain, US Government Work)
|
||||
# =========================================================================
|
||||
col="bp_compliance_datenschutz"
|
||||
before=$(collection_count "$col")
|
||||
log "--- H3: NIST + Ethics → $col ($before chunks) ---"
|
||||
|
||||
# NIST Cybersecurity Framework 2.0
|
||||
download_pdf \
|
||||
"https://nvlpubs.nist.gov/nistpubs/CSWP/NIST.CSWP.29.pdf" \
|
||||
"$WORK_DIR/pdfs/NIST_CSF_2.0.pdf"
|
||||
if [[ -f "$WORK_DIR/pdfs/NIST_CSF_2.0.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/NIST_CSF_2.0.pdf" "$col" "compliance_datenschutz" "guidance" "2024" \
|
||||
'{"source_id":"nist","doc_type":"framework","guideline_name":"NIST Cybersecurity Framework 2.0","license":"public_domain_us_gov","attribution":"National Institute of Standards and Technology (NIST)","source":"nist.gov"}' \
|
||||
"NIST Cybersecurity Framework 2.0"
|
||||
fi
|
||||
|
||||
# NIST Privacy Framework 1.0
|
||||
download_pdf \
|
||||
"https://nvlpubs.nist.gov/nistpubs/CSWP/NIST.CSWP.01162020.pdf" \
|
||||
"$WORK_DIR/pdfs/NIST_Privacy_Framework.pdf"
|
||||
if [[ -f "$WORK_DIR/pdfs/NIST_Privacy_Framework.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/NIST_Privacy_Framework.pdf" "$col" "compliance_datenschutz" "guidance" "2020" \
|
||||
'{"source_id":"nist","doc_type":"framework","guideline_name":"NIST Privacy Framework 1.0","license":"public_domain_us_gov","attribution":"National Institute of Standards and Technology (NIST)","source":"nist.gov"}' \
|
||||
"NIST Privacy Framework 1.0"
|
||||
fi
|
||||
|
||||
# HLEG Ethics Guidelines for Trustworthy AI
|
||||
download_pdf \
|
||||
"https://op.europa.eu/en/publication-detail/-/publication/d3988569-0434-11ea-8c1f-01aa75ed71a1/language-en/format-PDF" \
|
||||
"$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf"
|
||||
|
||||
if [[ -f "$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf" "$col" "compliance_datenschutz" "guidance" "2019" \
|
||||
'{"source_id":"hleg","doc_type":"ethics_guidelines","guideline_name":"Ethics Guidelines for Trustworthy AI","license":"CC_BY_4.0","attribution":"High-Level Expert Group on AI (HLEG)","source":"op.europa.eu"}' \
|
||||
@@ -901,7 +948,18 @@ phase_verbraucherschutz() {
|
||||
fi
|
||||
|
||||
after=$(collection_count "$col")
|
||||
log "Collection $col: $before → $after chunks"
|
||||
log "Collection $col: $before → $after chunks (+$((after - before)))"
|
||||
|
||||
# =========================================================================
|
||||
# Summary
|
||||
# =========================================================================
|
||||
echo ""
|
||||
log "Phase H abgeschlossen."
|
||||
log "Naechste Schritte (TODO — separate Phasen):"
|
||||
log " Layer 2: Nationale Gesetze EU/EWR (FR, ES, IT, AT, NL, UK) — Portal-Recherche noetig"
|
||||
log " Layer 3: DPA Guidance (CNIL, AEPD, Garante, AP, IMY) — Einzel-URLs recherchieren"
|
||||
log " Layer 4: OWASP Top 10, offene Security-Frameworks"
|
||||
log " Layer 5: EuGH + BGH Leitentscheidungen"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
|
||||
Reference in New Issue
Block a user