Compare commits
2 Commits
87d06c8b20
...
cf60c39658
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cf60c39658 | ||
|
|
c88653b221 |
@@ -78,7 +78,7 @@ jobs:
|
||||
-e "SDK_URL=http://bp-compliance-ai-sdk:8090" \
|
||||
alpine:3.19 \
|
||||
sh -c "
|
||||
apk add --no-cache curl bash coreutils git python3 > /dev/null 2>&1
|
||||
apk add --no-cache curl bash coreutils git python3 unzip > /dev/null 2>&1
|
||||
mkdir -p /tmp/rag-ingestion/{pdfs,repos,texts}
|
||||
cd /workspace
|
||||
if [ '${PHASE}' = 'all' ]; then
|
||||
|
||||
@@ -265,6 +265,42 @@ describe('buildDocumentScope', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('normalizes UPPERCASE trigger doc names to lowercase ScopeDocumentType', () => {
|
||||
const t = trigger('HT-test', 'L2', {
|
||||
category: 'test',
|
||||
mandatoryDocuments: ['VVT', 'TOM', 'DSFA'],
|
||||
})
|
||||
const docs = complianceScopeEngine.buildDocumentScope('L2', [t], [])
|
||||
const vvt = docs.find((d: any) => d.documentType === 'vvt')
|
||||
const tom = docs.find((d: any) => d.documentType === 'tom')
|
||||
const dsfa = docs.find((d: any) => d.documentType === 'dsfa')
|
||||
expect(vvt).toBeDefined()
|
||||
expect(vvt!.requirement).toBe('mandatory')
|
||||
expect(vvt!.triggeredBy).toContain('HT-test')
|
||||
expect(tom).toBeDefined()
|
||||
expect(tom!.requirement).toBe('mandatory')
|
||||
expect(dsfa).toBeDefined()
|
||||
expect(dsfa!.requirement).toBe('mandatory')
|
||||
})
|
||||
|
||||
it('normalizes aliased doc names (DSE→dsi, LOESCHKONZEPT→lf)', () => {
|
||||
const t = trigger('HT-alias', 'L2', {
|
||||
category: 'test',
|
||||
mandatoryDocuments: ['DSE', 'LOESCHKONZEPT', 'DSR_PROZESS'],
|
||||
})
|
||||
const docs = complianceScopeEngine.buildDocumentScope('L2', [t], [])
|
||||
const dsi = docs.find((d: any) => d.documentType === 'dsi')
|
||||
const lf = docs.find((d: any) => d.documentType === 'lf')
|
||||
const betroffenenrechte = docs.find((d: any) => d.documentType === 'betroffenenrechte')
|
||||
expect(dsi).toBeDefined()
|
||||
expect(dsi!.requirement).toBe('mandatory')
|
||||
expect(dsi!.triggeredBy).toContain('HT-alias')
|
||||
expect(lf).toBeDefined()
|
||||
expect(lf!.requirement).toBe('mandatory')
|
||||
expect(betroffenenrechte).toBeDefined()
|
||||
expect(betroffenenrechte!.requirement).toBe('mandatory')
|
||||
})
|
||||
|
||||
it('documents sorted: mandatory first', () => {
|
||||
const decision = complianceScopeEngine.evaluate([
|
||||
ans('data_art9', ['gesundheit']),
|
||||
|
||||
@@ -1328,6 +1328,38 @@ export class ComplianceScopeEngine {
|
||||
return maxDepthLevel(levelFromScore, maxTriggerLevel)
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalisiert UPPERCASE Dokumenttyp-Bezeichner aus den Hard-Trigger-Rules
|
||||
* auf die lowercase ScopeDocumentType-Schlüssel.
|
||||
*/
|
||||
private normalizeDocType(raw: string): ScopeDocumentType | null {
|
||||
const mapping: Record<string, ScopeDocumentType> = {
|
||||
VVT: 'vvt',
|
||||
TOM: 'tom',
|
||||
DSFA: 'dsfa',
|
||||
DSE: 'dsi',
|
||||
AGB: 'vertragsmanagement',
|
||||
AVV: 'av_vertrag',
|
||||
COOKIE_BANNER: 'einwilligung',
|
||||
EINWILLIGUNGEN: 'einwilligung',
|
||||
TRANSFER_DOKU: 'daten_transfer',
|
||||
AUDIT_CHECKLIST: 'audit_log',
|
||||
VENDOR_MANAGEMENT: 'vertragsmanagement',
|
||||
LOESCHKONZEPT: 'lf',
|
||||
DSR_PROZESS: 'betroffenenrechte',
|
||||
NOTFALLPLAN: 'notfallplan',
|
||||
AI_ACT_DOKU: 'ai_act_doku',
|
||||
WIDERRUFSBELEHRUNG: 'widerrufsbelehrung',
|
||||
PREISANGABEN: 'preisangaben',
|
||||
FERNABSATZ_INFO: 'fernabsatz_info',
|
||||
STREITBEILEGUNG: 'streitbeilegung',
|
||||
PRODUKTSICHERHEIT: 'produktsicherheit',
|
||||
}
|
||||
// Falls raw bereits ein gueltiger ScopeDocumentType ist
|
||||
if (raw in DOCUMENT_SCOPE_MATRIX) return raw as ScopeDocumentType
|
||||
return mapping[raw] ?? null
|
||||
}
|
||||
|
||||
/**
|
||||
* Baut den Dokumenten-Scope basierend auf Level und Triggers
|
||||
*/
|
||||
@@ -1338,11 +1370,18 @@ export class ComplianceScopeEngine {
|
||||
): RequiredDocument[] {
|
||||
const requiredDocs: RequiredDocument[] = []
|
||||
const mandatoryFromTriggers = new Set<ScopeDocumentType>()
|
||||
// Mapping: normalisierter DocType → original Rule-Strings (fuer triggeredBy Lookup)
|
||||
const triggerDocOrigins = new Map<ScopeDocumentType, string[]>()
|
||||
|
||||
// Sammle mandatory docs aus Triggern
|
||||
// Sammle mandatory docs aus Triggern (normalisiert)
|
||||
for (const trigger of triggers) {
|
||||
for (const doc of trigger.mandatoryDocuments) {
|
||||
mandatoryFromTriggers.add(doc as ScopeDocumentType)
|
||||
const normalized = this.normalizeDocType(doc)
|
||||
if (normalized) {
|
||||
mandatoryFromTriggers.add(normalized)
|
||||
if (!triggerDocOrigins.has(normalized)) triggerDocOrigins.set(normalized, [])
|
||||
triggerDocOrigins.get(normalized)!.push(doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1352,6 +1391,7 @@ export class ComplianceScopeEngine {
|
||||
const isMandatoryFromTrigger = mandatoryFromTriggers.has(docType)
|
||||
|
||||
if (requirement === 'mandatory' || isMandatoryFromTrigger) {
|
||||
const originDocs = triggerDocOrigins.get(docType) ?? []
|
||||
requiredDocs.push({
|
||||
documentType: docType,
|
||||
label: DOCUMENT_TYPE_LABELS[docType],
|
||||
@@ -1361,7 +1401,7 @@ export class ComplianceScopeEngine {
|
||||
sdkStepUrl: DOCUMENT_SDK_STEP_MAP[docType],
|
||||
triggeredBy: isMandatoryFromTrigger
|
||||
? triggers
|
||||
.filter((t) => t.mandatoryDocuments.includes(docType as any))
|
||||
.filter((t) => t.mandatoryDocuments.some((d) => originDocs.includes(d)))
|
||||
.map((t) => t.ruleId)
|
||||
: [],
|
||||
})
|
||||
@@ -1410,29 +1450,33 @@ export class ComplianceScopeEngine {
|
||||
* Schätzt den Aufwand für ein Dokument (in Stunden)
|
||||
*/
|
||||
private estimateEffort(docType: ScopeDocumentType): number {
|
||||
const effortMap: Record<ScopeDocumentType, number> = {
|
||||
VVT: 8,
|
||||
TOM: 12,
|
||||
DSFA: 16,
|
||||
AVV: 4,
|
||||
DSE: 6,
|
||||
EINWILLIGUNGEN: 6,
|
||||
LOESCHKONZEPT: 10,
|
||||
TRANSFER_DOKU: 8,
|
||||
DSR_PROZESS: 8,
|
||||
NOTFALLPLAN: 12,
|
||||
COOKIE_BANNER: 4,
|
||||
AGB: 6,
|
||||
WIDERRUFSBELEHRUNG: 3,
|
||||
PREISANGABEN: 2,
|
||||
FERNABSATZ_INFO: 4,
|
||||
STREITBEILEGUNG: 1,
|
||||
PRODUKTSICHERHEIT: 8,
|
||||
AI_ACT_DOKU: 12,
|
||||
AUDIT_CHECKLIST: 8,
|
||||
VENDOR_MANAGEMENT: 10,
|
||||
const effortMap: Partial<Record<ScopeDocumentType, number>> = {
|
||||
vvt: 8,
|
||||
tom: 12,
|
||||
dsfa: 16,
|
||||
av_vertrag: 4,
|
||||
dsi: 6,
|
||||
einwilligung: 6,
|
||||
lf: 10,
|
||||
daten_transfer: 8,
|
||||
betroffenenrechte: 8,
|
||||
notfallplan: 12,
|
||||
vertragsmanagement: 10,
|
||||
audit_log: 8,
|
||||
risikoanalyse: 6,
|
||||
schulung: 4,
|
||||
datenpannen: 6,
|
||||
zertifizierung: 8,
|
||||
datenschutzmanagement: 12,
|
||||
iace_ce_assessment: 8,
|
||||
widerrufsbelehrung: 3,
|
||||
preisangaben: 2,
|
||||
fernabsatz_info: 4,
|
||||
streitbeilegung: 1,
|
||||
produktsicherheit: 8,
|
||||
ai_act_doku: 12,
|
||||
}
|
||||
return effortMap[docType] || 6
|
||||
return effortMap[docType] ?? 6
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -64,6 +64,22 @@ upload_file() {
|
||||
return 0 # Don't abort script
|
||||
fi
|
||||
|
||||
# Dedup-Check: Prüfe ob regulation_id bereits in Qdrant vorhanden ist
|
||||
local reg_id
|
||||
reg_id=$(echo "$metadata_json" | python3 -c "import sys,json; print(json.load(sys.stdin).get('regulation_id',''))" 2>/dev/null || echo "")
|
||||
if [[ -n "$reg_id" && -n "${QDRANT_URL:-}" ]]; then
|
||||
local existing
|
||||
existing=$(curl -sk --max-time 5 -X POST "${QDRANT_URL}/collections/${collection}/points/scroll" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"filter\":{\"must\":[{\"key\":\"regulation_id\",\"match\":{\"value\":\"$reg_id\"}}]},\"limit\":1}" \
|
||||
2>/dev/null | python3 -c "import sys,json; r=json.load(sys.stdin).get('result',{}); print(len(r.get('points',[])))" 2>/dev/null || echo "0")
|
||||
if [[ "$existing" -gt 0 ]] 2>/dev/null; then
|
||||
log "⏭ Skip (already in Qdrant): $label [regulation_id=$reg_id]"
|
||||
SKIPPED=$((SKIPPED + 1))
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
local filesize
|
||||
filesize=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo 0)
|
||||
if [[ "$filesize" -lt 100 ]]; then
|
||||
@@ -76,7 +92,7 @@ upload_file() {
|
||||
|
||||
# Use longer timeout for large files (>500KB)
|
||||
local curl_opts="$CURL_OPTS"
|
||||
if [[ "$filesize" -gt 512000 ]]; then
|
||||
if [[ "$filesize" -gt 384000 ]]; then
|
||||
curl_opts="$CURL_OPTS_LARGE"
|
||||
log " (large file, using extended timeout)"
|
||||
fi
|
||||
@@ -833,29 +849,210 @@ phase_verbraucherschutz() {
|
||||
fi
|
||||
done
|
||||
|
||||
# BGB komplett (Fernabsatz, Digitale Inhalte, Kaufrecht, AGB-Recht)
|
||||
download_pdf \
|
||||
"https://www.gesetze-im-internet.de/bgb/BGB.pdf" \
|
||||
"$WORK_DIR/pdfs/BGB_full.pdf"
|
||||
if [[ -f "$WORK_DIR/pdfs/BGB_full.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGB_full.pdf" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"bgb_komplett","regulation_name_de":"BGB (komplett: AGB-Recht, Fernabsatz, Digitale Inhalte, Kaufrecht)","category":"vertragsrecht","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"BGB komplett"
|
||||
# BGB in Teilen statt komplett (2.7MB PDF ist zu gross fuer CPU-Embeddings)
|
||||
# gesetze-im-internet.de bietet XML-Download pro Gesetz
|
||||
local bgb_xml="$WORK_DIR/pdfs/bgb_xml.zip"
|
||||
curl -sL "https://www.gesetze-im-internet.de/bgb/xml.zip" -o "$bgb_xml" 2>/dev/null
|
||||
if [[ -f "$bgb_xml" && $(stat -f%z "$bgb_xml" 2>/dev/null || stat -c%s "$bgb_xml" 2>/dev/null || echo 0) -gt 1000 ]]; then
|
||||
local bgb_extract="$WORK_DIR/pdfs/bgb_xml"
|
||||
mkdir -p "$bgb_extract"
|
||||
unzip -qo "$bgb_xml" -d "$bgb_extract" 2>/dev/null || true
|
||||
|
||||
# Relevante BGB-Abschnitte als Text extrahieren und einzeln uploaden
|
||||
# Die XML-Datei hat <norm> Elemente mit <metadaten><enbez>§ 305</enbez>
|
||||
local bgb_xmlfile
|
||||
bgb_xmlfile=$(find "$bgb_extract" -name "*.xml" | head -1)
|
||||
if [[ -n "$bgb_xmlfile" ]]; then
|
||||
# BGB Teil 1: AGB-Recht §§ 305-310
|
||||
python3 -c "
|
||||
import xml.etree.ElementTree as ET, sys, re
|
||||
tree = ET.parse('$bgb_xmlfile')
|
||||
root = tree.getroot()
|
||||
ns = {'': root.tag.split('}')[0].lstrip('{') if '}' in root.tag else ''}
|
||||
text_parts = []
|
||||
capture = False
|
||||
for norm in root.iter():
|
||||
if norm.tag.endswith('norm'):
|
||||
enbez = norm.find('.//' + ('{' + ns[''] + '}' if ns[''] else '') + 'enbez')
|
||||
if enbez is not None and enbez.text:
|
||||
num = re.search(r'§\s*(\d+)', enbez.text)
|
||||
if num:
|
||||
n = int(num.group(1))
|
||||
capture = 305 <= n <= 310
|
||||
else:
|
||||
capture = False
|
||||
if capture:
|
||||
for t in norm.itertext():
|
||||
text_parts.append(t.strip())
|
||||
with open('$WORK_DIR/pdfs/BGB_AGB_305_310.txt', 'w') as f:
|
||||
f.write('BGB AGB-Recht §§ 305-310\n\n' + '\n'.join(p for p in text_parts if p))
|
||||
" 2>/dev/null
|
||||
if [[ -f "$WORK_DIR/pdfs/BGB_AGB_305_310.txt" && $(wc -c < "$WORK_DIR/pdfs/BGB_AGB_305_310.txt") -gt 100 ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGB_AGB_305_310.txt" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"bgb_agb","regulation_name_de":"BGB AGB-Recht (§§ 305-310)","category":"vertragsrecht","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"BGB AGB-Recht §§ 305-310"
|
||||
fi
|
||||
|
||||
# BGB Teil 2: Fernabsatzrecht §§ 312-312k
|
||||
python3 -c "
|
||||
import xml.etree.ElementTree as ET, sys, re
|
||||
tree = ET.parse('$bgb_xmlfile')
|
||||
root = tree.getroot()
|
||||
text_parts = []
|
||||
capture = False
|
||||
for norm in root.iter():
|
||||
if norm.tag.endswith('norm'):
|
||||
enbez = norm.find('.//' + ('{' + root.tag.split('}')[0].lstrip('{') + '}' if '}' in root.tag else '') + 'enbez')
|
||||
if enbez is not None and enbez.text:
|
||||
if re.search(r'§\s*312', enbez.text):
|
||||
capture = True
|
||||
elif re.search(r'§\s*31[3-9]|§\s*32', enbez.text):
|
||||
capture = False
|
||||
else:
|
||||
if capture and not any(norm.itertext()):
|
||||
capture = False
|
||||
if capture:
|
||||
for t in norm.itertext():
|
||||
text_parts.append(t.strip())
|
||||
with open('$WORK_DIR/pdfs/BGB_Fernabsatz_312.txt', 'w') as f:
|
||||
f.write('BGB Fernabsatzrecht §§ 312-312k\n\n' + '\n'.join(p for p in text_parts if p))
|
||||
" 2>/dev/null
|
||||
if [[ -f "$WORK_DIR/pdfs/BGB_Fernabsatz_312.txt" && $(wc -c < "$WORK_DIR/pdfs/BGB_Fernabsatz_312.txt") -gt 100 ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGB_Fernabsatz_312.txt" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"bgb_fernabsatz","regulation_name_de":"BGB Fernabsatzrecht (§§ 312-312k)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"BGB Fernabsatzrecht §§ 312-312k"
|
||||
fi
|
||||
|
||||
# BGB Teil 3: Kaufrecht + Gewährleistung §§ 433-480
|
||||
python3 -c "
|
||||
import xml.etree.ElementTree as ET, sys, re
|
||||
tree = ET.parse('$bgb_xmlfile')
|
||||
root = tree.getroot()
|
||||
text_parts = []
|
||||
capture = False
|
||||
for norm in root.iter():
|
||||
if norm.tag.endswith('norm'):
|
||||
enbez = norm.find('.//' + ('{' + root.tag.split('}')[0].lstrip('{') + '}' if '}' in root.tag else '') + 'enbez')
|
||||
if enbez is not None and enbez.text:
|
||||
num = re.search(r'§\s*(\d+)', enbez.text)
|
||||
if num:
|
||||
n = int(num.group(1))
|
||||
capture = 433 <= n <= 480
|
||||
else:
|
||||
capture = False
|
||||
if capture:
|
||||
for t in norm.itertext():
|
||||
text_parts.append(t.strip())
|
||||
with open('$WORK_DIR/pdfs/BGB_Kaufrecht_433_480.txt', 'w') as f:
|
||||
f.write('BGB Kaufrecht §§ 433-480\n\n' + '\n'.join(p for p in text_parts if p))
|
||||
" 2>/dev/null
|
||||
if [[ -f "$WORK_DIR/pdfs/BGB_Kaufrecht_433_480.txt" && $(wc -c < "$WORK_DIR/pdfs/BGB_Kaufrecht_433_480.txt") -gt 100 ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGB_Kaufrecht_433_480.txt" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"bgb_kaufrecht","regulation_name_de":"BGB Kaufrecht + Gewaehrleistung (§§ 433-480)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"BGB Kaufrecht §§ 433-480"
|
||||
fi
|
||||
|
||||
# BGB Teil 4: Widerrufsrecht §§ 355-361
|
||||
python3 -c "
|
||||
import xml.etree.ElementTree as ET, sys, re
|
||||
tree = ET.parse('$bgb_xmlfile')
|
||||
root = tree.getroot()
|
||||
text_parts = []
|
||||
capture = False
|
||||
for norm in root.iter():
|
||||
if norm.tag.endswith('norm'):
|
||||
enbez = norm.find('.//' + ('{' + root.tag.split('}')[0].lstrip('{') + '}' if '}' in root.tag else '') + 'enbez')
|
||||
if enbez is not None and enbez.text:
|
||||
num = re.search(r'§\s*(\d+)', enbez.text)
|
||||
if num:
|
||||
n = int(num.group(1))
|
||||
capture = 355 <= n <= 361
|
||||
else:
|
||||
capture = False
|
||||
if capture:
|
||||
for t in norm.itertext():
|
||||
text_parts.append(t.strip())
|
||||
with open('$WORK_DIR/pdfs/BGB_Widerruf_355_361.txt', 'w') as f:
|
||||
f.write('BGB Widerrufsrecht §§ 355-361\n\n' + '\n'.join(p for p in text_parts if p))
|
||||
" 2>/dev/null
|
||||
if [[ -f "$WORK_DIR/pdfs/BGB_Widerruf_355_361.txt" && $(wc -c < "$WORK_DIR/pdfs/BGB_Widerruf_355_361.txt") -gt 100 ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGB_Widerruf_355_361.txt" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"bgb_widerruf","regulation_name_de":"BGB Widerrufsrecht (§§ 355-361)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"BGB Widerrufsrecht §§ 355-361"
|
||||
fi
|
||||
|
||||
# BGB Teil 5: Digitale Produkte §§ 327-327u
|
||||
python3 -c "
|
||||
import xml.etree.ElementTree as ET, sys, re
|
||||
tree = ET.parse('$bgb_xmlfile')
|
||||
root = tree.getroot()
|
||||
text_parts = []
|
||||
capture = False
|
||||
for norm in root.iter():
|
||||
if norm.tag.endswith('norm'):
|
||||
enbez = norm.find('.//' + ('{' + root.tag.split('}')[0].lstrip('{') + '}' if '}' in root.tag else '') + 'enbez')
|
||||
if enbez is not None and enbez.text:
|
||||
if re.search(r'§\s*327', enbez.text):
|
||||
capture = True
|
||||
elif re.search(r'§\s*328', enbez.text):
|
||||
capture = False
|
||||
if capture:
|
||||
for t in norm.itertext():
|
||||
text_parts.append(t.strip())
|
||||
with open('$WORK_DIR/pdfs/BGB_Digital_327.txt', 'w') as f:
|
||||
f.write('BGB Digitale Produkte §§ 327-327u\n\n' + '\n'.join(p for p in text_parts if p))
|
||||
" 2>/dev/null
|
||||
if [[ -f "$WORK_DIR/pdfs/BGB_Digital_327.txt" && $(wc -c < "$WORK_DIR/pdfs/BGB_Digital_327.txt") -gt 100 ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGB_Digital_327.txt" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"bgb_digital","regulation_name_de":"BGB Digitale Produkte (§§ 327-327u)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"BGB Digitale Produkte §§ 327-327u"
|
||||
fi
|
||||
else
|
||||
warn "BGB XML file not found in archive"
|
||||
fi
|
||||
else
|
||||
warn "BGB XML download failed"
|
||||
fi
|
||||
|
||||
# EGBGB (Muster-Widerrufsbelehrung Anlage 1+2)
|
||||
download_pdf \
|
||||
"https://www.gesetze-im-internet.de/bgbeg/BGBEG.pdf" \
|
||||
"$WORK_DIR/pdfs/BGBEG.pdf"
|
||||
if [[ -f "$WORK_DIR/pdfs/BGBEG.pdf" ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/BGBEG.pdf" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"egbgb","regulation_name_de":"EGBGB (Muster-Widerrufsbelehrung, Informationspflichten)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"EGBGB (Muster-Widerrufsbelehrung)"
|
||||
# EGBGB — XML statt PDF (BGBEG.pdf war leer)
|
||||
local egbgb_xml="$WORK_DIR/pdfs/bgbeg_xml.zip"
|
||||
curl -sL "https://www.gesetze-im-internet.de/bgbeg/xml.zip" -o "$egbgb_xml" 2>/dev/null
|
||||
if [[ -f "$egbgb_xml" && $(stat -f%z "$egbgb_xml" 2>/dev/null || stat -c%s "$egbgb_xml" 2>/dev/null || echo 0) -gt 1000 ]]; then
|
||||
local egbgb_extract="$WORK_DIR/pdfs/egbgb_xml"
|
||||
mkdir -p "$egbgb_extract"
|
||||
unzip -qo "$egbgb_xml" -d "$egbgb_extract" 2>/dev/null || true
|
||||
local egbgb_xmlfile
|
||||
egbgb_xmlfile=$(find "$egbgb_extract" -name "*.xml" | head -1)
|
||||
if [[ -n "$egbgb_xmlfile" ]]; then
|
||||
# Art. 246a EGBGB (Informationspflichten Fernabsatz) + Anlage 1+2 (Widerrufsbelehrung)
|
||||
python3 -c "
|
||||
import xml.etree.ElementTree as ET
|
||||
tree = ET.parse('$egbgb_xmlfile')
|
||||
root = tree.getroot()
|
||||
text_parts = []
|
||||
for norm in root.iter():
|
||||
if norm.tag.endswith('norm'):
|
||||
# Capture all text — EGBGB is not too large
|
||||
parts = [t.strip() for t in norm.itertext() if t.strip()]
|
||||
if any('246' in p or 'Anlage' in p or 'Widerruf' in p or 'Muster' in p for p in parts):
|
||||
text_parts.extend(parts)
|
||||
with open('$WORK_DIR/pdfs/EGBGB_Widerruf.txt', 'w') as f:
|
||||
f.write('EGBGB - Informationspflichten und Muster-Widerrufsbelehrung\n\n' + '\n'.join(text_parts))
|
||||
" 2>/dev/null
|
||||
if [[ -f "$WORK_DIR/pdfs/EGBGB_Widerruf.txt" && $(wc -c < "$WORK_DIR/pdfs/EGBGB_Widerruf.txt") -gt 100 ]]; then
|
||||
upload_file "$WORK_DIR/pdfs/EGBGB_Widerruf.txt" "$col" "compliance" "legal_reference" "2025" \
|
||||
'{"regulation_id":"egbgb","regulation_name_de":"EGBGB (Muster-Widerrufsbelehrung, Art. 246a + Anlage 1+2)","category":"verbraucherschutz","license":"public_domain_§5_UrhG","source":"gesetze-im-internet.de"}' \
|
||||
"EGBGB Muster-Widerrufsbelehrung"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
warn "EGBGB XML download failed"
|
||||
fi
|
||||
|
||||
local after
|
||||
after=$(collection_count "$col")
|
||||
log "Collection $col: $before → $after chunks (+$((after - before)))"
|
||||
local diff=$(( ${after:-0} - ${before:-0} )) 2>/dev/null || diff="?"
|
||||
log "Collection $col: ${before:-?} → ${after:-?} chunks (+${diff})"
|
||||
|
||||
# =========================================================================
|
||||
# H2: EU-Recht → bp_compliance_ce
|
||||
@@ -915,7 +1112,8 @@ phase_verbraucherschutz() {
|
||||
done
|
||||
|
||||
after=$(collection_count "$col")
|
||||
log "Collection $col: $before → $after chunks (+$((after - before)))"
|
||||
local diff=$(( ${after:-0} - ${before:-0} )) 2>/dev/null || diff="?"
|
||||
log "Collection $col: ${before:-?} → ${after:-?} chunks (+${diff})"
|
||||
|
||||
# =========================================================================
|
||||
# H3: NIST Security Frameworks → bp_compliance_security
|
||||
@@ -956,7 +1154,8 @@ phase_verbraucherschutz() {
|
||||
fi
|
||||
|
||||
after=$(collection_count "$col")
|
||||
log "Collection $col: $before → $after chunks (+$((after - before)))"
|
||||
local diff=$(( ${after:-0} - ${before:-0} )) 2>/dev/null || diff="?"
|
||||
log "Collection $col: ${before:-?} → ${after:-?} chunks (+${diff})"
|
||||
|
||||
# =========================================================================
|
||||
# Summary
|
||||
|
||||
Reference in New Issue
Block a user