feat(rag): Phase I DACH-Erweiterung — Gesetze, Templates, Urteile
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 56s
CI/CD / test-python-backend-compliance (push) Successful in 49s
CI/CD / test-python-document-crawler (push) Successful in 32s
CI/CD / test-python-dsms-gateway (push) Successful in 25s
CI/CD / deploy-hetzner (push) Successful in 17s

New ingestion phase 'dach' adds missing documents from DACH catalog:

I1: UStG (Retention), MStV (Impressum)
I2: DSK Muster-VVT, DSK KP5 DSFA, BfDI Beispiel-VVT (DL-DE/BY-2.0)
I3: BSI IT-Grundschutz Kompendium 2024 (CC BY-SA 4.0)
I4: 7 Gerichtsentscheidungen as Praxisanker:
  - DE: LG Bonn 1&1, BGH Planet49, BGH Art.82 (2x)
  - AT: OGH Schutzzweck, OGH Art.15+82 EuGH-Vorlage
  - CH: BVGer DSG-Auskunft, BGer Datensperre

Trigger: workflow_dispatch phase=dach

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-12 14:36:59 +01:00
parent 9945a62a50
commit 42ec3cad6d
2 changed files with 253 additions and 1 deletions

View File

@@ -1183,6 +1183,255 @@ with open('$WORK_DIR/pdfs/EGBGB_Widerruf.txt', 'w') as f:
log " Layer 5: EuGH + BGH Leitentscheidungen"
}
# =============================================================================
# PHASE I: DACH-Erweiterung (Quellenkatalog)
# DSK-Templates, Gerichtsentscheidungen, fehlende Gesetze
# Lizenzen: §5 UrhG (DE), §7 UrhG (AT), Art.5 URG (CH), DL-DE/BY-2.0, CC BY 4.0
# =============================================================================
phase_dach() {
log "=========================================="
log "PHASE I: DACH-Erweiterung"
log "=========================================="
# =========================================================================
# I1: Fehlende DE-Gesetze → bp_compliance_gesetze
# =========================================================================
local col="bp_compliance_gesetze"
local before
before=$(collection_count "$col")
log "--- I1: Fehlende DE-Gesetze → $col ($before chunks) ---"
# UStG (Umsatzsteuergesetz) — Retention/Rechnungspflichten
download_pdf "https://www.gesetze-im-internet.de/ustg_1980/UStG.pdf" "$WORK_DIR/pdfs/UStG.pdf"
upload_file "$WORK_DIR/pdfs/UStG.pdf" "$col" "compliance" "legal_reference" "2025" \
'{"regulation_id":"ustg","regulation_name_de":"Umsatzsteuergesetz (UStG)","category":"steuerrecht","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
"UStG (Umsatzsteuergesetz)"
# MStV § 18 — Medienstaatsvertrag (Impressumspflicht)
# Nicht als PDF auf gesetze-im-internet.de, daher von Bayern
curl -sL "https://www.gesetze-bayern.de/Content/Pdf/MStV" -o "$WORK_DIR/pdfs/MStV.pdf" 2>/dev/null || true
if [[ -f "$WORK_DIR/pdfs/MStV.pdf" && $(stat -f%z "$WORK_DIR/pdfs/MStV.pdf" 2>/dev/null || stat -c%s "$WORK_DIR/pdfs/MStV.pdf" 2>/dev/null || echo 0) -gt 1000 ]]; then
upload_file "$WORK_DIR/pdfs/MStV.pdf" "$col" "compliance" "legal_reference" "2025" \
'{"regulation_id":"mstv","regulation_name_de":"Medienstaatsvertrag (MStV)","category":"medienrecht","license":"public_domain_§5_UrhG","source":"gesetze-bayern.de"}' \
"MStV (Medienstaatsvertrag)"
else
warn "MStV PDF download failed — skipping"
fi
local after
after=$(collection_count "$col")
local diff=$(( after - before ))
log "Collection $col: ${before:-?}${after:-?} chunks (+${diff})"
# =========================================================================
# I2: DSK Templates & BfDI → bp_legal_templates
# Lizenz: DL-DE/BY-2.0 (kommerziell mit Attribution)
# =========================================================================
col="bp_legal_templates"
before=$(collection_count "$col")
log "--- I2: DSK/BfDI Templates → $col ($before chunks) ---"
# DSK Muster-VVT (Art. 30 DSGVO)
download_pdf \
"https://www.datenschutzkonferenz-online.de/media/ah/201802_ah_muster_verantwortliche.pdf" \
"$WORK_DIR/pdfs/DSK_Muster_VVT.pdf"
upload_file "$WORK_DIR/pdfs/DSK_Muster_VVT.pdf" "$col" "compliance_template" "template" "2018" \
'{"regulation_id":"dsk_muster_vvt","regulation_name_de":"DSK Muster-VVT (Art. 30 DSGVO)","category":"datenschutz","license":"DL-DE/BY-2.0","attribution":"Datenschutzkonferenz (DSK)","source":"datenschutzkonferenz-online.de"}' \
"DSK Muster-VVT"
# DSK Kurzpapier Nr. 5 DSFA
download_pdf \
"https://www.datenschutzkonferenz-online.de/media/kp/dsk_kpnr_5.pdf" \
"$WORK_DIR/pdfs/DSK_KP5_DSFA.pdf"
upload_file "$WORK_DIR/pdfs/DSK_KP5_DSFA.pdf" "$col" "compliance_template" "guidance" "2018" \
'{"regulation_id":"dsk_kp5_dsfa","regulation_name_de":"DSK Kurzpapier Nr. 5 Datenschutz-Folgenabschaetzung","category":"datenschutz","license":"DL-DE/BY-2.0","attribution":"Datenschutzkonferenz (DSK)","source":"datenschutzkonferenz-online.de"}' \
"DSK Kurzpapier Nr. 5 DSFA"
# BfDI Beispiel-VVT
download_pdf \
"https://www.bfdi.bund.de/SharedDocs/Downloads/DE/DokumenteBfDI/AccessForAll/2023/2023_Verzeichnis-Verarbeitungst%C3%A4tigkeiten.pdf?__blob=publicationFile&v=2" \
"$WORK_DIR/pdfs/BfDI_Beispiel_VVT.pdf"
upload_file "$WORK_DIR/pdfs/BfDI_Beispiel_VVT.pdf" "$col" "compliance_template" "template" "2023" \
'{"regulation_id":"bfdi_beispiel_vvt","regulation_name_de":"BfDI Beispiel-VVT mit Loeschfristen","category":"datenschutz","license":"DL-DE/BY-2.0","attribution":"Bundesbeauftragter fuer den Datenschutz und die Informationsfreiheit (BfDI)","source":"bfdi.bund.de"}' \
"BfDI Beispiel-VVT"
after=$(collection_count "$col")
diff=$(( after - before ))
log "Collection $col: ${before:-?}${after:-?} chunks (+${diff})"
# =========================================================================
# I3: BSI Stand-der-Technik (OSCAL) → bp_compliance_gesetze
# Lizenz: CC BY-SA 4.0
# =========================================================================
col="bp_compliance_gesetze"
before=$(collection_count "$col")
log "--- I3: BSI OSCAL → $col ($before chunks) ---"
# BSI IT-Grundschutz Kompendium (PDF)
download_pdf \
"https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Grundschutz/Kompendium/IT_Grundschutz_Kompendium_Edition2024.pdf?__blob=publicationFile&v=4" \
"$WORK_DIR/pdfs/BSI_Grundschutz_2024.pdf"
if [[ -f "$WORK_DIR/pdfs/BSI_Grundschutz_2024.pdf" ]]; then
upload_file "$WORK_DIR/pdfs/BSI_Grundschutz_2024.pdf" "$col" "compliance" "guidance" "2024" \
'{"regulation_id":"bsi_grundschutz_2024","regulation_name_de":"BSI IT-Grundschutz Kompendium 2024","category":"informationssicherheit","license":"CC_BY-SA_4.0","attribution":"Bundesamt fuer Sicherheit in der Informationstechnik (BSI)","source":"bsi.bund.de"}' \
"BSI IT-Grundschutz Kompendium 2024"
fi
after=$(collection_count "$col")
diff=$(( after - before ))
log "Collection $col: ${before:-?}${after:-?} chunks (+${diff})"
# =========================================================================
# I4: Gerichtsentscheidungen → bp_compliance_datenschutz
# Lizenz: §5 UrhG (DE), §7 UrhG (AT), Art.5 URG (CH) — amtliche Werke
# =========================================================================
col="bp_compliance_datenschutz"
before=$(collection_count "$col")
log "--- I4: Gerichtsentscheidungen → $col ($before chunks) ---"
# DE: LG Bonn 29 OWi 1/20 (1&1 Bussgeldbescheid, Art. 32)
curl -sL "https://www.justiz.nrw.de/nrwe/lgs/bonn/lg_bonn/j2020/29OWi1_20_Urteil_20201111.html" \
-o "$WORK_DIR/texts/LG_Bonn_29OWi1_20.html" 2>/dev/null || true
if [[ -f "$WORK_DIR/texts/LG_Bonn_29OWi1_20.html" && $(wc -c < "$WORK_DIR/texts/LG_Bonn_29OWi1_20.html") -gt 500 ]]; then
# Convert HTML to text
python3 -c "
import re
with open('$WORK_DIR/texts/LG_Bonn_29OWi1_20.html') as f:
html = f.read()
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
with open('$WORK_DIR/texts/LG_Bonn_29OWi1_20.txt', 'w') as f:
f.write('LG Bonn, Urteil vom 11.11.2020, 29 OWi 1/20 (1&1 Telecom)\n\n' + text)
" 2>/dev/null
upload_file "$WORK_DIR/texts/LG_Bonn_29OWi1_20.txt" "$col" "compliance_datenschutz" "case_law" "2020" \
'{"regulation_id":"lg_bonn_29owi1_20","doc_type":"court_decision","court":"LG Bonn","case_number":"29 OWi 1/20","date":"2020-11-11","topic":"Art. 32 DSGVO Authentifizierung Bussgeld","country":"DE","license":"public_domain_§5_UrhG","source":"justiz.nrw.de"}' \
"LG Bonn 29 OWi 1/20 (Art. 32 Bussgeld)"
fi
# DE: BGH I ZR 7/16 (Planet49 Cookie-Einwilligung)
curl -sL "https://www.bundesgerichtshof.de/SharedDocs/Pressemitteilungen/DE/2020/2020067.html" \
-o "$WORK_DIR/texts/BGH_Planet49.html" 2>/dev/null || true
if [[ -f "$WORK_DIR/texts/BGH_Planet49.html" && $(wc -c < "$WORK_DIR/texts/BGH_Planet49.html") -gt 500 ]]; then
python3 -c "
import re
with open('$WORK_DIR/texts/BGH_Planet49.html') as f:
html = f.read()
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
with open('$WORK_DIR/texts/BGH_Planet49.txt', 'w') as f:
f.write('BGH, Urteil vom 28.05.2020, I ZR 7/16 (Planet49 Cookie-Einwilligung)\n\n' + text)
" 2>/dev/null
upload_file "$WORK_DIR/texts/BGH_Planet49.txt" "$col" "compliance_datenschutz" "case_law" "2020" \
'{"regulation_id":"bgh_i_zr_7_16","doc_type":"court_decision","court":"BGH","case_number":"I ZR 7/16","date":"2020-05-28","topic":"Cookie-Einwilligung Opt-in Planet49","country":"DE","license":"public_domain_§5_UrhG","source":"bundesgerichtshof.de"}' \
"BGH I ZR 7/16 (Planet49 Cookie-Einwilligung)"
fi
# DE: BGH Pressemitteilung 218/2024 (Art. 82 DSGVO Schadensersatz)
curl -sL "https://www.bundesgerichtshof.de/SharedDocs/Pressemitteilungen/DE/2024/2024218.html" \
-o "$WORK_DIR/texts/BGH_Art82_2024.html" 2>/dev/null || true
if [[ -f "$WORK_DIR/texts/BGH_Art82_2024.html" && $(wc -c < "$WORK_DIR/texts/BGH_Art82_2024.html") -gt 500 ]]; then
python3 -c "
import re
with open('$WORK_DIR/texts/BGH_Art82_2024.html') as f:
html = f.read()
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
with open('$WORK_DIR/texts/BGH_Art82_2024.txt', 'w') as f:
f.write('BGH, Pressemitteilung 218/2024, Art. 82 DSGVO Schadensersatz\n\n' + text)
" 2>/dev/null
upload_file "$WORK_DIR/texts/BGH_Art82_2024.txt" "$col" "compliance_datenschutz" "case_law" "2024" \
'{"regulation_id":"bgh_art82_2024_218","doc_type":"court_decision","court":"BGH","date":"2024-11-18","topic":"Art. 82 DSGVO immaterieller Schadensersatz Bemessung","country":"DE","license":"public_domain_§5_UrhG","source":"bundesgerichtshof.de"}' \
"BGH PM 218/2024 (Art. 82 Schadensersatz)"
fi
# DE: BGH VI ZR 396/24 (Art. 82 Konkretisierung)
curl -sL "https://www.bundesgerichtshof.de/SharedDocs/Entscheidungen/DE/2025/2025-11-11-VIZR396_24.html" \
-o "$WORK_DIR/texts/BGH_VI_ZR_396_24.html" 2>/dev/null || true
if [[ -f "$WORK_DIR/texts/BGH_VI_ZR_396_24.html" && $(wc -c < "$WORK_DIR/texts/BGH_VI_ZR_396_24.html") -gt 500 ]]; then
python3 -c "
import re
with open('$WORK_DIR/texts/BGH_VI_ZR_396_24.html') as f:
html = f.read()
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
with open('$WORK_DIR/texts/BGH_VI_ZR_396_24.txt', 'w') as f:
f.write('BGH, Urteil vom 11.11.2025, VI ZR 396/24 (Art. 82 DSGVO Konkretisierung)\n\n' + text)
" 2>/dev/null
upload_file "$WORK_DIR/texts/BGH_VI_ZR_396_24.txt" "$col" "compliance_datenschutz" "case_law" "2025" \
'{"regulation_id":"bgh_vi_zr_396_24","doc_type":"court_decision","court":"BGH","case_number":"VI ZR 396/24","date":"2025-11-11","topic":"Art. 82 DSGVO Darlegungslast Schadensbemessung","country":"DE","license":"public_domain_§5_UrhG","source":"bundesgerichtshof.de"}' \
"BGH VI ZR 396/24 (Art. 82 Konkretisierung)"
fi
# AT: OGH 6 Ob 70/24y (Schutzzweck/Kausalitaet)
curl -sL "https://www.ogh.gv.at/entscheidungen/entscheidungen-ogh/datenschutzrecht-zum-schutzzweck-der-datenschutz-grundverordnung-dsgvo-und-des-datenschutzgesetzes-dsg/" \
-o "$WORK_DIR/texts/OGH_6Ob70_24y.html" 2>/dev/null || true
if [[ -f "$WORK_DIR/texts/OGH_6Ob70_24y.html" && $(wc -c < "$WORK_DIR/texts/OGH_6Ob70_24y.html") -gt 500 ]]; then
python3 -c "
import re
with open('$WORK_DIR/texts/OGH_6Ob70_24y.html') as f:
html = f.read()
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
with open('$WORK_DIR/texts/OGH_6Ob70_24y.txt', 'w') as f:
f.write('OGH, 6 Ob 70/24y, 15.05.2024 (Schutzzweck DSGVO/DSG)\n\n' + text)
" 2>/dev/null
upload_file "$WORK_DIR/texts/OGH_6Ob70_24y.txt" "$col" "compliance_datenschutz" "case_law" "2024" \
'{"regulation_id":"ogh_6ob70_24y","doc_type":"court_decision","court":"OGH","case_number":"6 Ob 70/24y","date":"2024-05-15","topic":"Schutzzweck DSGVO Kausalitaet Schadensersatz","country":"AT","license":"public_domain_§7_UrhG_AT","source":"ogh.gv.at"}' \
"OGH 6 Ob 70/24y (Schutzzweck DSGVO)"
fi
# AT: OGH 6 Ob 102/24d (Art. 15 Herkunft + Art. 82)
curl -sL "https://www.ogh.gv.at/entscheidungen/vorabentscheidungsersuchen-eugh/vorabentscheidungsersuchen-zum-auskunftsrecht-nach-art-15-dsgvo-und-dem-recht-auf-schadenersatz-nach-art-82-dsgvo/" \
-o "$WORK_DIR/texts/OGH_6Ob102_24d.html" 2>/dev/null || true
if [[ -f "$WORK_DIR/texts/OGH_6Ob102_24d.html" && $(wc -c < "$WORK_DIR/texts/OGH_6Ob102_24d.html") -gt 500 ]]; then
python3 -c "
import re
with open('$WORK_DIR/texts/OGH_6Ob102_24d.html') as f:
html = f.read()
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
with open('$WORK_DIR/texts/OGH_6Ob102_24d.txt', 'w') as f:
f.write('OGH, 6 Ob 102/24d, 18.02.2025 (EuGH-Vorlage Art. 15 + Art. 82)\n\n' + text)
" 2>/dev/null
upload_file "$WORK_DIR/texts/OGH_6Ob102_24d.txt" "$col" "compliance_datenschutz" "case_law" "2025" \
'{"regulation_id":"ogh_6ob102_24d","doc_type":"court_decision","court":"OGH","case_number":"6 Ob 102/24d","date":"2025-02-18","topic":"Art. 15 Auskunftsrecht Herkunft Art. 82 Schadensersatz EuGH-Vorlage","country":"AT","license":"public_domain_§7_UrhG_AT","source":"ogh.gv.at"}' \
"OGH 6 Ob 102/24d (Art. 15 + Art. 82 EuGH-Vorlage)"
fi
# CH: BVGer B-915/2022 (BVGE 2024 IV/2 — DSG-Auskunft vs. Akteneinsicht)
download_pdf \
"https://jurispub.admin.ch/publiws/download?decisionId=ed176fe0-fb98-425f-8ded-ca9da49c816b" \
"$WORK_DIR/pdfs/BVGE_2024_IV_2.pdf"
upload_file "$WORK_DIR/pdfs/BVGE_2024_IV_2.pdf" "$col" "compliance_datenschutz" "case_law" "2024" \
'{"regulation_id":"bvge_2024_iv_2","doc_type":"court_decision","court":"BVGer","case_number":"B-915/2022","date":"2024-04-03","topic":"DSG-Auskunft vs. Akteneinsicht CH","country":"CH","license":"public_domain_Art5_URG","source":"jurispub.admin.ch"}' \
"BVGer B-915/2022 (DSG-Auskunft vs. Akteneinsicht)"
# CH: BGer 1C 562/2024 (Datensperre/Anonymisierung)
curl -sL "https://relevancy.bger.ch/cgi-bin/JumpCGI?id=13.01.2025_1C_562%2F2024" \
-o "$WORK_DIR/texts/BGer_1C_562_2024.html" 2>/dev/null || true
if [[ -f "$WORK_DIR/texts/BGer_1C_562_2024.html" && $(wc -c < "$WORK_DIR/texts/BGer_1C_562_2024.html") -gt 500 ]]; then
python3 -c "
import re
with open('$WORK_DIR/texts/BGer_1C_562_2024.html') as f:
html = f.read()
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
with open('$WORK_DIR/texts/BGer_1C_562_2024.txt', 'w') as f:
f.write('BGer, 1C 562/2024, 13.01.2025 (Datensperre/Anonymisierung)\n\n' + text)
" 2>/dev/null
upload_file "$WORK_DIR/texts/BGer_1C_562_2024.txt" "$col" "compliance_datenschutz" "case_law" "2025" \
'{"regulation_id":"bger_1c_562_2024","doc_type":"court_decision","court":"BGer","case_number":"1C 562/2024","date":"2025-01-13","topic":"Datensperre Anonymisierung DSG","country":"CH","license":"public_domain_Art5_URG","source":"bger.ch"}' \
"BGer 1C 562/2024 (Datensperre/Anonymisierung)"
fi
after=$(collection_count "$col")
diff=$(( after - before ))
log "Collection $col: ${before:-?}${after:-?} chunks (+${diff})"
echo ""
log "Phase I abgeschlossen."
}
# =============================================================================
# PHASE F: Verifizierung
# =============================================================================
@@ -1378,6 +1627,7 @@ main() {
templates) phase_templates ;;
datenschutz) phase_datenschutz ;;
verbraucherschutz) phase_verbraucherschutz ;;
dach) phase_dach ;;
verify) phase_verify ;;
version) phase_register_version ;;
*) fail "Unknown phase: $ONLY_PHASE"; exit 1 ;;
@@ -1399,6 +1649,8 @@ main() {
echo ""
phase_verbraucherschutz
echo ""
phase_dach
echo ""
phase_verify
echo ""
phase_register_version