feat(scope): Split HT-H01 B2B/B2C + register Verbraucherschutz document types + RAG ingestion
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 38s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 27s
CI/CD / test-python-dsms-gateway (push) Successful in 24s
CI/CD / deploy-hetzner (push) Has been cancelled

- Split HT-H01 into HT-H01a (B2C/Hybrid mit Verbraucherschutzpflichten) und
  HT-H01b (reiner B2B mit Basis-Pflichten). B2B-Webshops bekommen keine
  Widerrufsbelehrung/Preisangaben/Fernabsatz mehr.
- Add excludeWhen/requireWhen to HardTriggerRule for conditional trigger logic
- Register 6 neue ScopeDocumentType: widerrufsbelehrung, preisangaben,
  fernabsatz_info, streitbeilegung, produktsicherheit, ai_act_doku
- Full DOCUMENT_SCOPE_MATRIX L1-L4 for all new types
- Align HardTriggerRule interface with actual engine field names
- Add Phase H (Verbraucherschutz) to RAG ingestion script:
  10 deutsche Gesetze + 4 EU-Verordnungen + HLEG Ethics Guidelines
- Add scripts/rag-sources.md with license documentation
- 9 new tests for B2B/B2C trigger split, all 326 tests pass

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-11 16:03:49 +01:00
parent cb48b8289e
commit 7f38df9d9c
5 changed files with 666 additions and 20 deletions

View File

@@ -35,7 +35,7 @@ while [[ $# -gt 0 ]]; do
--only) ONLY_PHASE="$2"; shift 2 ;;
-h|--help)
echo "Usage: $0 [--skip-download] [--only PHASE]"
echo "Phases: download, gesetze, eu, templates, datenschutz, verify, version"
echo "Phases: download, gesetze, eu, templates, datenschutz, verbraucherschutz, verify, version"
exit 0
;;
*) echo "Unknown option: $1"; exit 1 ;;
@@ -762,6 +762,148 @@ phase_datenschutz() {
log "Collection $col: $before$after chunks"
}
# =============================================================================
# PHASE H: Verbraucherschutz & AI Act
# =============================================================================
phase_verbraucherschutz() {
log "=========================================="
log "PHASE H: Verbraucherschutz & AI Act"
log "=========================================="
mkdir -p "$WORK_DIR"/{pdfs,texts}
# --- H1: Deutsche Verbraucherschutz-Gesetze → bp_compliance_gesetze ---
local col="bp_compliance_gesetze"
local before
before=$(collection_count "$col")
log "Collection $col: $before chunks (before)"
# Download + Ingest deutsche Gesetze (gesetze-im-internet.de, Public Domain § 5 UrhG)
local -a verbraucherschutz_gesetze=(
"pangv_2022/PAngV:PAngV:Preisangabenverordnung"
"vsbg/VSBG:VSBG:Verbraucherstreitbeilegungsgesetz"
"prodhaftg/ProdHaftG:ProdHaftG:Produkthaftungsgesetz"
"verpackg/VerpackG:VerpackG:Verpackungsgesetz"
"elektrog_2015/ElektroG:ElektroG:Elektro- und Elektronikgeraetegesetz"
"battdg/BattDG:BattDG:Batteriegesetz"
"bfsg/BFSG:BFSG:Barrierefreiheitsstaerkungsgesetz"
"uwg_2004/UWG:UWG:Gesetz gegen den unlauteren Wettbewerb"
)
for entry in "${verbraucherschutz_gesetze[@]}"; do
local path="${entry%%:*}"
local rest="${entry#*:}"
local short="${rest%%:*}"
local fullname="${rest#*:}"
local pdf_file="$WORK_DIR/pdfs/${short}.pdf"
download_pdf \
"https://www.gesetze-im-internet.de/${path}.pdf" \
"$pdf_file"
if [[ -f "$pdf_file" ]]; then
upload_file "$pdf_file" "$col" "compliance" "legal_reference" "2025" \
"{\"regulation_id\":\"${short,,}\",\"regulation_name_de\":\"$fullname ($short)\",\"category\":\"verbraucherschutz\",\"license\":\"public_domain_§5_UrhG\",\"source\":\"gesetze-im-internet.de\"}" \
"$short ($fullname)"
fi
done
# BGB komplett (Fernabsatz §§ 312-312k, Digitale Inhalte §§ 327-327u, Kaufrecht §§ 433-480)
download_pdf \
"https://www.gesetze-im-internet.de/bgb/BGB.pdf" \
"$WORK_DIR/pdfs/BGB_full.pdf"
if [[ -f "$WORK_DIR/pdfs/BGB_full.pdf" ]]; then
upload_file "$WORK_DIR/pdfs/BGB_full.pdf" "$col" "compliance" "legal_reference" "2025" \
'{"regulation_id":"bgb_fernabsatz","regulation_name_de":"BGB (Fernabsatz, Digitale Inhalte, Kaufrecht)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
"BGB (Fernabsatz/Digitale Inhalte/Kaufrecht)"
fi
# EGBGB fuer Muster-Widerrufsbelehrung (Anlage 1+2 zu Art. 246a)
download_pdf \
"https://www.gesetze-im-internet.de/bgbeg/BGBEG.pdf" \
"$WORK_DIR/pdfs/BGBEG.pdf"
if [[ -f "$WORK_DIR/pdfs/BGBEG.pdf" ]]; then
upload_file "$WORK_DIR/pdfs/BGBEG.pdf" "$col" "compliance" "legal_reference" "2025" \
'{"regulation_id":"egbgb_muster_widerruf","regulation_name_de":"EGBGB (Muster-Widerrufsbelehrung, Anlage 1+2 zu Art. 246a)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
"EGBGB (Muster-Widerrufsbelehrung)"
fi
local after
after=$(collection_count "$col")
log "Collection $col: $before$after chunks"
# --- H2: EU-Verordnungen → bp_compliance_ce ---
col="bp_compliance_ce"
before=$(collection_count "$col")
log "Collection $col: $before chunks (before)"
# GPSR (EU 2023/988) - Produktsicherheit
download_pdf \
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R0988" \
"$WORK_DIR/pdfs/GPSR_2023_988.pdf"
if [[ -f "$WORK_DIR/pdfs/GPSR_2023_988.pdf" ]]; then
upload_file "$WORK_DIR/pdfs/GPSR_2023_988.pdf" "$col" "compliance_ce" "legal_reference" "2024" \
'{"regulation_id":"gpsr","regulation_name_de":"Allgemeine Produktsicherheitsverordnung (GPSR)","regulation_name_en":"General Product Safety Regulation","regulation_short":"GPSR","celex":"32023R0988","category":"produktsicherheit","license":"CC_BY_4.0","source":"eur-lex"}' \
"GPSR (EU) 2023/988"
fi
# AI Act (EU 2024/1689)
download_pdf \
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=OJ:L_202401689" \
"$WORK_DIR/pdfs/AI_Act_2024_1689.pdf"
if [[ -f "$WORK_DIR/pdfs/AI_Act_2024_1689.pdf" ]]; then
upload_file "$WORK_DIR/pdfs/AI_Act_2024_1689.pdf" "$col" "compliance_ce" "legal_reference" "2024" \
'{"regulation_id":"ai_act","regulation_name_de":"KI-Verordnung (AI Act)","regulation_name_en":"Artificial Intelligence Act","regulation_short":"AI Act","celex":"32024R1689","category":"ki_regulierung","license":"CC_BY_4.0","source":"eur-lex"}' \
"AI Act (EU) 2024/1689"
fi
# EU Batterieverordnung (EU 2023/1542)
download_pdf \
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1542" \
"$WORK_DIR/pdfs/Batterie_VO_2023_1542.pdf"
if [[ -f "$WORK_DIR/pdfs/Batterie_VO_2023_1542.pdf" ]]; then
upload_file "$WORK_DIR/pdfs/Batterie_VO_2023_1542.pdf" "$col" "compliance_ce" "legal_reference" "2024" \
'{"regulation_id":"batterie_vo","regulation_name_de":"Batterieverordnung","regulation_name_en":"Battery Regulation","regulation_short":"BattVO","celex":"32023R1542","category":"produktsicherheit","license":"CC_BY_4.0","source":"eur-lex"}' \
"EU Batterieverordnung (EU) 2023/1542"
fi
# Digitale-Inhalte-Richtlinie (EU 2019/770)
download_pdf \
"https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32019L0770" \
"$WORK_DIR/pdfs/Digitale_Inhalte_RL_2019_770.pdf"
if [[ -f "$WORK_DIR/pdfs/Digitale_Inhalte_RL_2019_770.pdf" ]]; then
upload_file "$WORK_DIR/pdfs/Digitale_Inhalte_RL_2019_770.pdf" "$col" "compliance_ce" "legal_reference" "2019" \
'{"regulation_id":"digitale_inhalte_rl","regulation_name_de":"Richtlinie ueber digitale Inhalte","regulation_name_en":"Digital Content Directive","regulation_short":"DCD","celex":"32019L0770","category":"verbraucherschutz","license":"CC_BY_4.0","source":"eur-lex"}' \
"Digitale-Inhalte-RL (EU) 2019/770"
fi
after=$(collection_count "$col")
log "Collection $col: $before$after chunks"
# --- H3: HLEG Ethics Guidelines → bp_compliance_datenschutz ---
col="bp_compliance_datenschutz"
before=$(collection_count "$col")
download_pdf \
"https://op.europa.eu/en/publication-detail/-/publication/d3988569-0434-11ea-8c1f-01aa75ed71a1/language-en/format-PDF" \
"$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf"
if [[ -f "$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf" ]]; then
upload_file "$WORK_DIR/pdfs/hleg_trustworthy_ai.pdf" "$col" "compliance_datenschutz" "guidance" "2019" \
'{"source_id":"hleg","doc_type":"ethics_guidelines","guideline_name":"Ethics Guidelines for Trustworthy AI","license":"CC_BY_4.0","attribution":"High-Level Expert Group on AI (HLEG)","source":"op.europa.eu"}' \
"HLEG Ethics Guidelines Trustworthy AI"
fi
after=$(collection_count "$col")
log "Collection $col: $before$after chunks"
}
# =============================================================================
# PHASE F: Verifizierung
# =============================================================================
@@ -809,6 +951,36 @@ try:
for r in results[:3]:
print(f' [{r.get(\"score\",0):.3f}] {r.get(\"regulation_code\",\"?\")} - {r.get(\"content\",\"\")[:80]}...')
except: print(' (parse error)')
" 2>/dev/null || echo " (search failed)"
log "Suche: 'Widerrufsbelehrung Fernabsatz' in bp_compliance_gesetze"
curl $CURL_OPTS -X POST "https://localhost:8097/api/v1/search" \
-H 'Content-Type: application/json' \
-d '{"query":"Widerrufsbelehrung Fernabsatz Widerrufsfrist","regulation_codes":null,"limit":3,"min_score":0.5}' 2>/dev/null \
| python3 -c "
import sys,json
try:
data = json.load(sys.stdin)
results = data.get('results', [])
print(f' Treffer: {len(results)}')
for r in results[:3]:
print(f' [{r.get(\"score\",0):.3f}] {r.get(\"regulation_code\",\"?\")} - {r.get(\"content\",\"\")[:80]}...')
except: print(' (parse error)')
" 2>/dev/null || echo " (search failed)"
log "Suche: 'AI Act Hochrisiko Konformitaet' in bp_compliance_ce"
curl $CURL_OPTS -X POST "https://localhost:8097/api/v1/search" \
-H 'Content-Type: application/json' \
-d '{"query":"AI Act Hochrisiko Konformitaetsbewertung","regulation_codes":null,"limit":3,"min_score":0.5}' 2>/dev/null \
| python3 -c "
import sys,json
try:
data = json.load(sys.stdin)
results = data.get('results', [])
print(f' Treffer: {len(results)}')
for r in results[:3]:
print(f' [{r.get(\"score\",0):.3f}] {r.get(\"regulation_code\",\"?\")} - {r.get(\"content\",\"\")[:80]}...')
except: print(' (parse error)')
" 2>/dev/null || echo " (search failed)"
log "Suche: 'Privacy Policy Template GDPR' in bp_legal_templates"
@@ -925,9 +1097,10 @@ main() {
gesetze) phase_gesetze ;;
eu) phase_eu ;;
templates) phase_templates ;;
datenschutz) phase_datenschutz ;;
verify) phase_verify ;;
version) phase_register_version ;;
datenschutz) phase_datenschutz ;;
verbraucherschutz) phase_verbraucherschutz ;;
verify) phase_verify ;;
version) phase_register_version ;;
*) fail "Unknown phase: $ONLY_PHASE"; exit 1 ;;
esac
else
@@ -945,6 +1118,8 @@ main() {
echo ""
phase_datenschutz
echo ""
phase_verbraucherschutz
echo ""
phase_verify
echo ""
phase_register_version

77
scripts/rag-sources.md Normal file
View File

@@ -0,0 +1,77 @@
# RAG-Quellennachweis — BreakPilot Compliance
Stand: 2026-03-11
## Collection: bp_compliance_gesetze
| # | Dokument | Quelle | Lizenz |
|---|----------|--------|--------|
| 1 | DDG § 5 (Impressum) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 2 | TDDDG § 25 (Cookies) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 3 | UrhG § 5 (Amtliche Werke) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 4 | EGBGB Muster-Widerrufsbelehrung | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 5 | BGB (komplett) | github.com/bundestag/gesetze | Unlicense |
| 6 | UrhG (komplett) | github.com/bundestag/gesetze | Unlicense |
| 7 | TMG (komplett) | github.com/bundestag/gesetze | Unlicense |
| 8 | PAngV (Preisangabenverordnung) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 9 | VSBG (Verbraucherstreitbeilegungsgesetz) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 10 | ProdHaftG (Produkthaftungsgesetz) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 11 | VerpackG (Verpackungsgesetz) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 12 | ElektroG (WEEE) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 13 | BattDG (Batterierecht) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 14 | BFSG (Barrierefreiheit) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 15 | UWG (Unlauterer Wettbewerb) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 16 | BGB (Fernabsatz/Digitale Inhalte/Kaufrecht) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
| 17 | EGBGB (Muster-Widerrufsbelehrung Anlage 1+2) | gesetze-im-internet.de | Public Domain (§ 5 UrhG) |
## Collection: bp_compliance_ce
| # | Dokument | Quelle | Lizenz |
|---|----------|--------|--------|
| 1 | Digital Services Act (EU) 2022/2065 | eur-lex.europa.eu | CC BY 4.0 |
| 2 | ePrivacy-Richtlinie 2002/58/EC | eur-lex.europa.eu | CC BY 4.0 |
| 3 | Standardvertragsklauseln (EU) 2021/914 | eur-lex.europa.eu | CC BY 4.0 |
| 4 | GPSR (EU) 2023/988 | eur-lex.europa.eu | CC BY 4.0 |
| 5 | AI Act (EU) 2024/1689 | eur-lex.europa.eu | CC BY 4.0 |
| 6 | Batterieverordnung (EU) 2023/1542 | eur-lex.europa.eu | CC BY 4.0 |
| 7 | Digitale-Inhalte-RL (EU) 2019/770 | eur-lex.europa.eu | CC BY 4.0 |
## Collection: bp_legal_templates
| # | Dokument | Quelle | Lizenz |
|---|----------|--------|--------|
| 1 | GitHub Site Policy | github.com/github/site-policy | CC0 |
| 2 | OpenGov Site Policy | github.com/opengovfoundation/site-policy | CC0 |
| 3 | CC Legal Tools | github.com/creativecommons/cc-legal-tools-data | CC0 |
| 4 | opr.vc DSGVO-Mustertexte | github.com/oprvc/oprvc.github.io | CC0 |
| 5 | webflorist Privacy Policy Text | github.com/webflorist/privacy-policy-text | MIT |
| 6 | Tempest Privacy Policy Generator | github.com/Tempest-Solutions-Company | MIT |
| 7 | Tempest Terms of Service Generator | github.com/Tempest-Solutions-Company | MIT |
| 8 | Tempest Cookie Banner | github.com/Tempest-Solutions-Company | MIT |
| 9 | CookieConsent (orestbida) | github.com/orestbida/cookieconsent | MIT |
| 10 | CommonPaper CSA/SLA/PSA | github.com/CommonPaper | CC BY 4.0 |
| 11 | Datennutzungsklauseln | gitlab.opencode.de/wernerth | CC BY 4.0 |
## Collection: bp_compliance_datenschutz
| # | Dokument | Quelle | Lizenz |
|---|----------|--------|--------|
| 1 | EDPB Guidelines 05/2020 Consent | edpb.europa.eu | Reuse Notice |
| 2 | EDPB Guidelines 4/2019 Privacy by Design | edpb.europa.eu | Reuse Notice |
| 3 | EDPB Guidelines 03/2022 Dark Patterns | edpb.europa.eu | Reuse Notice |
| 4 | EDPB Guidelines 8/2020 Social Media Targeting | edpb.europa.eu | Reuse Notice |
| 5 | EDPB Cookie Banner Taskforce Report 2023 | edpb.europa.eu | Reuse Notice |
| 6 | EDPB Guidelines 2/2023 ePrivacy Art. 5(3) | edpb.europa.eu | Reuse Notice |
| 7 | EDPB Guidelines 1/2024 Legitimate Interest | edpb.europa.eu | Reuse Notice |
| 8 | EDPB DPO Enforcement Report 2024 | edpb.europa.eu | Reuse Notice |
| 9 | EDPS GenAI Orientations 2024 | edps.europa.eu | Reuse Notice |
| 10 | EDPS Digital Ethics Report 2018 | edps.europa.eu | Reuse Notice |
| 11 | HLEG Ethics Guidelines Trustworthy AI | op.europa.eu | CC BY 4.0 |
## Lizenz-Hinweise
- **Public Domain (§ 5 UrhG):** Deutsche amtliche Werke (Gesetze, Verordnungen) sind gemeinfrei.
- **CC BY 4.0:** EU-Rechtstexte und EU-Publikationen. Attribution: "European Union, https://eur-lex.europa.eu"
- **CC0:** Public-Domain-Widmung, keine Einschraenkungen.
- **MIT:** Permissive Open-Source-Lizenz, kommerzielle Nutzung erlaubt.
- **Reuse Notice:** EDPB/EDPS-Dokumente duerfen unter Quellenangabe wiederverwendet werden.