From 9496e758fcb6d89a762c89fd3bec5b9fb9530586 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sat, 28 Feb 2026 01:56:04 +0100 Subject: [PATCH] feat: EU-IFRS 2023/1803 + EFRAG Endorsement ingestion & system prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Ingestion script: Add 3 new PDFs (IFRS DE/EN, EFRAG Endorsement Status) to ingest-industry-compliance.sh (7 → 10 documents total) - System prompt: Add EU-IFRS and EFRAG to competence area, add mandatory IFRS endorsement warning section for all IFRS/IAS queries Co-Authored-By: Claude Opus 4.6 --- .../api/sdk/compliance-advisor/chat/route.ts | 16 + scripts/ingest-industry-compliance.sh | 443 ++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100755 scripts/ingest-industry-compliance.sh diff --git a/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts b/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts index 66e72bd..044ba09 100644 --- a/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts +++ b/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts @@ -59,6 +59,22 @@ offiziellen Quellen und gibst praxisnahe Hinweise. - WP29/WP248 (Art.-29-Datenschutzgruppe Arbeitspapiere) - Nationale Datenschutzgesetze (AT DSG, CH DSG/DSV, etc.) - EU-Verordnungen (DORA, MiCA, Data Act, EHDS, PSD2, AMLR, etc.) +- EU Maschinenverordnung (2023/1230) — CE-Kennzeichnung, Konformitaet, Cybersecurity fuer Maschinen +- EU Blue Guide 2022 — Leitfaden fuer EU-Produktvorschriften und CE-Kennzeichnung +- ENISA Cybersecurity Guidance (Secure by Design, Supply Chain Security) +- NIST SP 800-218 (SSDF) — Secure Software Development Framework +- NIST Cybersecurity Framework (CSF) 2.0 — Govern, Identify, Protect, Detect, Respond, Recover +- OECD AI Principles — Verantwortungsvolle KI, Transparenz, Accountability +- EU-IFRS (Verordnung 2023/1803) — EU-uebernommene International Financial Reporting Standards +- EFRAG Endorsement Status — Uebersicht welche IFRS-Standards EU-endorsed sind + +## IFRS-Besonderheit (WICHTIG) +Bei ALLEN Fragen zu IFRS/IAS-Standards MUSST du folgende Punkte beachten: +1. Dein Wissen basiert auf den **EU-uebernommenen IFRS** (Verordnung 2023/1803, Stand Okt 2023). +2. Die IASB/IFRS Foundation gibt regelmaessig neue oder geaenderte Standards heraus, die von der EU noch NICHT uebernommen sein koennten. +3. Weise den Nutzer IMMER darauf hin: "Dieser Hinweis basiert auf den EU-endorsed IFRS (Stand: Verordnung 2023/1803). Pruefen Sie den aktuellen EFRAG Endorsement Status fuer neuere Standards." +4. Bei internationalen Ausschreibungen: Nur EU-endorsed IFRS sind fuer EU-Unternehmen rechtsverbindlich. +5. Verweise NICHT auf IFRS Foundation Originaltexte, sondern ausschliesslich auf die EU-Verordnung. ## RAG-Nutzung Nutze das gesamte RAG-Corpus fuer Kontext und Quellenangaben — ausgenommen sind diff --git a/scripts/ingest-industry-compliance.sh b/scripts/ingest-industry-compliance.sh new file mode 100755 index 0000000..a0a961d --- /dev/null +++ b/scripts/ingest-industry-compliance.sh @@ -0,0 +1,443 @@ +#!/usr/bin/env bash +# ============================================================================= +# BreakPilot Compliance — Industry Compliance Ingestion +# +# Laedt 10 freie Industrie-Compliance-Dokumente herunter und ingestiert sie +# in Qdrant via die Core RAG-API (Port 8097). +# +# Dokumente: +# 1. EU Machinery Regulation 2023/1230 → bp_compliance_ce +# 2. EU Blue Guide 2022 → bp_compliance_ce +# 3. ENISA Secure by Design → bp_compliance_datenschutz +# 4. ENISA Supply Chain Security → bp_compliance_datenschutz +# 5. NIST SP 800-218 (SSDF) → bp_compliance_datenschutz +# 6. NIST Cybersecurity Framework 2.0 → bp_compliance_datenschutz +# 7. OECD AI Principles → bp_compliance_datenschutz +# 8. EU-IFRS Regulation 2023/1803 (DE) → bp_compliance_ce +# 9. EU-IFRS Regulation 2023/1803 (EN) → bp_compliance_ce +# 10. EFRAG Endorsement Status Report → bp_compliance_datenschutz +# +# Ausfuehrung auf dem Mac Mini: +# ~/rag-ingestion/ingest-industry-compliance.sh [--skip-download] [--only PHASE] +# +# Phasen: download, ce, datenschutz, verify +# ============================================================================= +set -euo pipefail + +# --- Configuration ----------------------------------------------------------- +WORK_DIR="${WORK_DIR:-$HOME/rag-ingestion}" +RAG_URL="https://localhost:8097/api/v1/documents/upload" +QDRANT_URL="http://localhost:6333" +CURL_OPTS="-sk --connect-timeout 10 --max-time 300" + +# Counters +UPLOADED=0 +FAILED=0 +SKIPPED=0 + +# --- CLI Args ---------------------------------------------------------------- +SKIP_DOWNLOAD=false +ONLY_PHASE="" + +while [[ $# -gt 0 ]]; do + case $1 in + --skip-download) SKIP_DOWNLOAD=true; shift ;; + --only) ONLY_PHASE="$2"; shift 2 ;; + -h|--help) + echo "Usage: $0 [--skip-download] [--only PHASE]" + echo "Phases: download, ce, datenschutz, verify" + exit 0 + ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +# --- Helpers ----------------------------------------------------------------- +log() { echo "[$(date '+%H:%M:%S')] $*"; } +ok() { echo "[$(date '+%H:%M:%S')] ✓ $*"; } +warn() { echo "[$(date '+%H:%M:%S')] ⚠ $*" >&2; } +fail() { echo "[$(date '+%H:%M:%S')] ✗ $*" >&2; } + +upload_file() { + local file="$1" + local collection="$2" + local data_type="$3" + local use_case="$4" + local year="$5" + local metadata_json="$6" + local label="${7:-$(basename "$file")}" + + if [[ ! -f "$file" ]]; then + warn "File not found: $file" + FAILED=$((FAILED + 1)) + return 1 + fi + + local filesize + filesize=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo 0) + if [[ "$filesize" -lt 100 ]]; then + warn "File too small (${filesize}B), skipping: $label" + SKIPPED=$((SKIPPED + 1)) + return 1 + fi + + log "Uploading: $label → $collection ($(( filesize / 1024 ))KB)" + + local response + response=$(curl $CURL_OPTS -X POST "$RAG_URL" \ + -F "file=@${file}" \ + -F "collection=${collection}" \ + -F "data_type=${data_type}" \ + -F "use_case=${use_case}" \ + -F "year=${year}" \ + -F "chunk_strategy=recursive" \ + -F "chunk_size=512" \ + -F "chunk_overlap=50" \ + -F "metadata_json=${metadata_json}" \ + 2>/dev/null) || true + + if echo "$response" | grep -q '"chunks_count"'; then + local chunks + chunks=$(echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('chunks_count',0))" 2>/dev/null || echo "?") + ok "$label → $chunks chunks" + UPLOADED=$((UPLOADED + 1)) + elif echo "$response" | grep -q '"vectors_indexed"'; then + local vectors + vectors=$(echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('vectors_indexed',0))" 2>/dev/null || echo "?") + ok "$label → $vectors vectors" + UPLOADED=$((UPLOADED + 1)) + else + fail "Upload failed: $label" + fail "Response: $response" + FAILED=$((FAILED + 1)) + return 1 + fi +} + +download_pdf() { + local url="$1" + local target="$2" + + if [[ -f "$target" ]]; then + log "PDF exists: $(basename "$target") (skipping)" + return 0 + fi + + log "Downloading: $(basename "$target")" + curl $CURL_OPTS -L "$url" -o "$target" 2>/dev/null || { + warn "Download failed: $url" + return 1 + } +} + +collection_count() { + local col="$1" + curl -s "${QDRANT_URL}/collections/${col}" 2>/dev/null \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['result']['points_count'])" 2>/dev/null || echo "?" +} + +# ============================================================================= +# PHASE A: Downloads (7 PDFs) +# ============================================================================= +phase_download() { + log "==========================================" + log "PHASE A: Downloads (10 Industry Compliance PDFs)" + log "==========================================" + + mkdir -p "$WORK_DIR/pdfs" + + # --- A1: EUR-Lex --- + log "--- EUR-Lex: Machinery Regulation ---" + download_pdf \ + "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1230" \ + "$WORK_DIR/pdfs/machinery_regulation_2023_1230.pdf" + + # --- A2: EU Blue Guide 2022 --- + log "--- EU Blue Guide 2022 ---" + download_pdf \ + "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:52022XC0629(04)" \ + "$WORK_DIR/pdfs/blue_guide_2022.pdf" + + # --- A3: ENISA Publications --- + log "--- ENISA Publications ---" + download_pdf \ + "https://www.enisa.europa.eu/publications/secure-development-best-practices/@@download/fullReport" \ + "$WORK_DIR/pdfs/enisa_secure_by_design.pdf" + + download_pdf \ + "https://www.enisa.europa.eu/publications/threat-landscape-for-supply-chain-attacks/@@download/fullReport" \ + "$WORK_DIR/pdfs/enisa_supply_chain_security.pdf" + + # --- A4: NIST Publications --- + log "--- NIST Publications ---" + download_pdf \ + "https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-218.pdf" \ + "$WORK_DIR/pdfs/nist_sp_800_218_ssdf.pdf" + + download_pdf \ + "https://nvlpubs.nist.gov/nistpubs/CSWP/NIST.CSWP.29.pdf" \ + "$WORK_DIR/pdfs/nist_csf_2_0.pdf" + + # --- A5: OECD AI Principles --- + log "--- OECD AI Principles ---" + download_pdf \ + "https://legalinstruments.oecd.org/api/print?ids=648&lang=en" \ + "$WORK_DIR/pdfs/oecd_ai_principles.pdf" + + # --- A6: EUR-Lex IFRS (DE + EN) --- + log "--- EUR-Lex: EU-IFRS Regulation 2023/1803 ---" + download_pdf \ + "https://eur-lex.europa.eu/legal-content/DE/TXT/PDF/?uri=CELEX:32023R1803" \ + "$WORK_DIR/pdfs/ifrs_regulation_2023_1803_de.pdf" + + download_pdf \ + "https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:32023R1803" \ + "$WORK_DIR/pdfs/ifrs_regulation_2023_1803_en.pdf" + + # --- A7: EFRAG Endorsement Status Report --- + log "--- EFRAG Endorsement Status Report ---" + download_pdf \ + "https://www.efrag.org/sites/default/files/media/document/2025-12/EFRAG%20Endorsement%20Status%20Report%2018%20December%202025.pdf" \ + "$WORK_DIR/pdfs/efrag_endorsement_status_2025.pdf" + + log "Downloads complete." +} + +# ============================================================================= +# PHASE B: EU-Rechtstexte → bp_compliance_ce +# ============================================================================= +phase_ce() { + log "==========================================" + log "PHASE B: EU-Rechtstexte → bp_compliance_ce" + log "==========================================" + + local col="bp_compliance_ce" + local before + before=$(collection_count "$col") + log "Collection $col: $before chunks (before)" + + # 1. Machinery Regulation (EU) 2023/1230 + upload_file "$WORK_DIR/pdfs/machinery_regulation_2023_1230.pdf" "$col" "compliance_ce" "legal_reference" "2023" \ + '{"regulation_id":"eu_2023_1230","regulation_name_de":"Maschinenverordnung","regulation_name_en":"Machinery Regulation","regulation_short":"MACHINERY_REG","category":"maschinensicherheit","celex":"32023R1230","source":"eur-lex","license":"public_law"}' \ + "Maschinenverordnung (EU) 2023/1230" + + # 2. Blue Guide 2022 + upload_file "$WORK_DIR/pdfs/blue_guide_2022.pdf" "$col" "compliance_ce" "legal_reference" "2022" \ + '{"regulation_id":"eu_blue_guide_2022","regulation_name_de":"Leitfaden fuer die Umsetzung der Produktvorschriften (Blue Guide)","regulation_name_en":"Blue Guide on EU Product Rules","regulation_short":"BLUE_GUIDE","category":"produktregulierung","celex":"52022XC0629(04)","source":"eur-lex","license":"public_law"}' \ + "Blue Guide 2022 — EU-Produktvorschriften" + + # 8. EU-IFRS Regulation 2023/1803 (DE) + upload_file "$WORK_DIR/pdfs/ifrs_regulation_2023_1803_de.pdf" "$col" "compliance_ce" "legal_reference" "2023" \ + '{"regulation_id":"eu_2023_1803","regulation_name_de":"IFRS-Uebernahmeverordnung","regulation_name_en":"IFRS Adoption Regulation","regulation_short":"EU_IFRS","category":"rechnungslegung","celex":"32023R1803","source":"eur-lex","license":"public_law","language":"de","endorsement_note":"Nur EU-endorsed IFRS. Neuere IASB-Standards sind moeglicherweise noch nicht uebernommen."}' \ + "EU-IFRS Regulation 2023/1803 (DE)" + + # 9. EU-IFRS Regulation 2023/1803 (EN) + upload_file "$WORK_DIR/pdfs/ifrs_regulation_2023_1803_en.pdf" "$col" "compliance_ce" "legal_reference" "2023" \ + '{"regulation_id":"eu_2023_1803","regulation_name_de":"IFRS-Uebernahmeverordnung","regulation_name_en":"IFRS Adoption Regulation","regulation_short":"EU_IFRS","category":"rechnungslegung","celex":"32023R1803","source":"eur-lex","license":"public_law","language":"en","endorsement_note":"EU-endorsed IFRS only. Newer IASB standards may not yet be adopted by the EU."}' \ + "EU-IFRS Regulation 2023/1803 (EN)" + + local after + after=$(collection_count "$col") + log "Collection $col: $before → $after chunks" +} + +# ============================================================================= +# PHASE C: Frameworks/Guidance → bp_compliance_datenschutz +# ============================================================================= +phase_datenschutz() { + log "==========================================" + log "PHASE C: Frameworks/Guidance → bp_compliance_datenschutz" + log "==========================================" + + local col="bp_compliance_datenschutz" + local before + before=$(collection_count "$col") + log "Collection $col: $before chunks (before)" + + # 3. ENISA Secure by Design + upload_file "$WORK_DIR/pdfs/enisa_secure_by_design.pdf" "$col" "compliance_datenschutz" "guidance" "2023" \ + '{"source_id":"enisa","doc_type":"guidance","guideline_name":"Secure Software Development — Best Practices","license":"reuse_notice","attribution":"European Union Agency for Cybersecurity (ENISA)","source":"enisa.europa.eu"}' \ + "ENISA: Secure by Design Best Practices" + + # 4. ENISA Supply Chain Security + upload_file "$WORK_DIR/pdfs/enisa_supply_chain_security.pdf" "$col" "compliance_datenschutz" "guidance" "2021" \ + '{"source_id":"enisa","doc_type":"guidance","guideline_name":"Threat Landscape for Supply Chain Attacks","license":"reuse_notice","attribution":"European Union Agency for Cybersecurity (ENISA)","source":"enisa.europa.eu"}' \ + "ENISA: Supply Chain Security Threat Landscape" + + # 5. NIST SP 800-218 (SSDF) + upload_file "$WORK_DIR/pdfs/nist_sp_800_218_ssdf.pdf" "$col" "compliance_datenschutz" "guidance" "2022" \ + '{"source_id":"nist","doc_type":"framework","guideline_name":"Secure Software Development Framework (SSDF) SP 800-218","license":"public_domain","attribution":"National Institute of Standards and Technology (NIST)","source":"nist.gov"}' \ + "NIST SP 800-218 — Secure Software Development Framework" + + # 6. NIST Cybersecurity Framework 2.0 + upload_file "$WORK_DIR/pdfs/nist_csf_2_0.pdf" "$col" "compliance_datenschutz" "guidance" "2024" \ + '{"source_id":"nist","doc_type":"framework","guideline_name":"NIST Cybersecurity Framework (CSF) 2.0","license":"public_domain","attribution":"National Institute of Standards and Technology (NIST)","source":"nist.gov"}' \ + "NIST Cybersecurity Framework 2.0" + + # 7. OECD AI Principles + upload_file "$WORK_DIR/pdfs/oecd_ai_principles.pdf" "$col" "compliance_datenschutz" "guidance" "2024" \ + '{"source_id":"oecd","doc_type":"guidance","guideline_name":"OECD Recommendation on Artificial Intelligence (AI Principles)","license":"reuse_notice","attribution":"Organisation for Economic Co-operation and Development (OECD)","source":"oecd.org"}' \ + "OECD AI Principles (Recommendation on AI)" + + # 10. EFRAG Endorsement Status Report + upload_file "$WORK_DIR/pdfs/efrag_endorsement_status_2025.pdf" "$col" "compliance_datenschutz" "guidance" "2025" \ + '{"source_id":"efrag","doc_type":"guidance","guideline_name":"EFRAG Endorsement Status Report (Dec 2025)","license":"reuse_notice","attribution":"European Financial Reporting Advisory Group (EFRAG)","source":"efrag.org"}' \ + "EFRAG Endorsement Status Report (Dec 2025)" + + local after + after=$(collection_count "$col") + log "Collection $col: $before → $after chunks" +} + +# ============================================================================= +# PHASE D: Verifizierung +# ============================================================================= +phase_verify() { + log "==========================================" + log "PHASE D: Verifizierung" + log "==========================================" + + echo "" + echo "=== Collection Stats ===" + for col in bp_compliance_ce bp_compliance_datenschutz; do + local count + count=$(collection_count "$col") + printf " %-30s %s chunks\n" "$col" "$count" + done + + echo "" + echo "=== Test-Suchen ===" + + log "Suche: 'Maschinenverordnung CE-Kennzeichnung' in bp_compliance_ce" + curl $CURL_OPTS -X POST "https://localhost:8097/api/v1/search" \ + -H 'Content-Type: application/json' \ + -d '{"query":"Maschinenverordnung CE-Kennzeichnung","collection":"bp_compliance_ce","top_k":3}' 2>/dev/null \ + | python3 -c " +import sys,json +try: + data = json.load(sys.stdin) + results = data.get('results', []) + print(f' Treffer: {len(results)}') + for r in results[:3]: + print(f' [{r.get(\"score\",0):.3f}] {r.get(\"content\",\"\")[:80]}...') +except: print(' (parse error)') +" 2>/dev/null || echo " (search failed)" + + log "Suche: 'Supply Chain Cybersecurity ENISA' in bp_compliance_datenschutz" + curl $CURL_OPTS -X POST "https://localhost:8097/api/v1/search" \ + -H 'Content-Type: application/json' \ + -d '{"query":"Supply Chain Cybersecurity ENISA","collection":"bp_compliance_datenschutz","top_k":3}' 2>/dev/null \ + | python3 -c " +import sys,json +try: + data = json.load(sys.stdin) + results = data.get('results', []) + print(f' Treffer: {len(results)}') + for r in results[:3]: + print(f' [{r.get(\"score\",0):.3f}] {r.get(\"content\",\"\")[:80]}...') +except: print(' (parse error)') +" 2>/dev/null || echo " (search failed)" + + log "Suche: 'NIST Cybersecurity Framework Governance' in bp_compliance_datenschutz" + curl $CURL_OPTS -X POST "https://localhost:8097/api/v1/search" \ + -H 'Content-Type: application/json' \ + -d '{"query":"NIST Cybersecurity Framework Governance","collection":"bp_compliance_datenschutz","top_k":3}' 2>/dev/null \ + | python3 -c " +import sys,json +try: + data = json.load(sys.stdin) + results = data.get('results', []) + print(f' Treffer: {len(results)}') + for r in results[:3]: + print(f' [{r.get(\"score\",0):.3f}] {r.get(\"content\",\"\")[:80]}...') +except: print(' (parse error)') +" 2>/dev/null || echo " (search failed)" + + log "Suche: 'OECD AI Principles transparency accountability' in bp_compliance_datenschutz" + curl $CURL_OPTS -X POST "https://localhost:8097/api/v1/search" \ + -H 'Content-Type: application/json' \ + -d '{"query":"OECD AI Principles transparency accountability","collection":"bp_compliance_datenschutz","top_k":3}' 2>/dev/null \ + | python3 -c " +import sys,json +try: + data = json.load(sys.stdin) + results = data.get('results', []) + print(f' Treffer: {len(results)}') + for r in results[:3]: + print(f' [{r.get(\"score\",0):.3f}] {r.get(\"content\",\"\")[:80]}...') +except: print(' (parse error)') +" 2>/dev/null || echo " (search failed)" + + log "Suche: 'IFRS Rechnungslegung EU endorsed' in bp_compliance_ce" + curl $CURL_OPTS -X POST "https://localhost:8097/api/v1/search" \ + -H 'Content-Type: application/json' \ + -d '{"query":"IFRS Rechnungslegung EU endorsed","collection":"bp_compliance_ce","top_k":3}' 2>/dev/null \ + | python3 -c " +import sys,json +try: + data = json.load(sys.stdin) + results = data.get('results', []) + print(f' Treffer: {len(results)}') + for r in results[:3]: + print(f' [{r.get(\"score\",0):.3f}] {r.get(\"content\",\"\")[:80]}...') +except: print(' (parse error)') +" 2>/dev/null || echo " (search failed)" + + log "Suche: 'EFRAG endorsement status IFRS 18' in bp_compliance_datenschutz" + curl $CURL_OPTS -X POST "https://localhost:8097/api/v1/search" \ + -H 'Content-Type: application/json' \ + -d '{"query":"EFRAG endorsement status IFRS 18","collection":"bp_compliance_datenschutz","top_k":3}' 2>/dev/null \ + | python3 -c " +import sys,json +try: + data = json.load(sys.stdin) + results = data.get('results', []) + print(f' Treffer: {len(results)}') + for r in results[:3]: + print(f' [{r.get(\"score\",0):.3f}] {r.get(\"content\",\"\")[:80]}...') +except: print(' (parse error)') +" 2>/dev/null || echo " (search failed)" +} + +# ============================================================================= +# MAIN +# ============================================================================= +log "============================================================" +log "BreakPilot Industry Compliance Ingestion" +log "Work dir: $WORK_DIR" +log "RAG API: $RAG_URL" +log "============================================================" + +if [[ -n "$ONLY_PHASE" ]]; then + case "$ONLY_PHASE" in + download) phase_download ;; + ce) phase_ce ;; + datenschutz) phase_datenschutz ;; + verify) phase_verify ;; + *) fail "Unknown phase: $ONLY_PHASE"; exit 1 ;; + esac +else + if [[ "$SKIP_DOWNLOAD" == "false" ]]; then + phase_download + else + log "(Skipping downloads)" + fi + + echo "" + phase_ce + + echo "" + phase_datenschutz + + echo "" + phase_verify +fi + +echo "" +log "============================================================" +log "DONE — Uploaded: $UPLOADED | Failed: $FAILED | Skipped: $SKIPPED" +log "============================================================" + +if [[ "$FAILED" -gt 0 ]]; then + exit 1 +fi