From 755ea443430d8f75af5351cbd8d74db0b0dc70eb Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 12 Jun 2026 09:18:03 +0200 Subject: [PATCH] feat(iace): refresh architecture tab + data-flow diagram + E1 ingest script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - architecture.go: DataSources now reflect the real ingested set (ESAW 2023, BLS CFOI, OSHA OTM, PRISM, cobot CC-BY, HSE) with their RAG collections; risk stage cites BLS + the searchable RAG layer; matrix stage now mentions the distance-benchmark dimension. - Architektur & Datenfluss tab: new DataFlowDiagram — 4 lanes (input → knowledge/RAG-evidence → deterministic engine → outputs) with live counts. - scripts/ingest_iace_kb.sh: idempotent E1 ingest — creates the 2 collections and uploads the 6 datasources docs against a configurable RAG_URL (for prod Qdrant), with retry. Co-Authored-By: Claude Opus 4.7 --- .../_components/DataFlowDiagram.tsx | 98 +++++++++++++++++++ .../sdk/iace/[projectId]/architektur/page.tsx | 4 + .../internal/iace/architecture.go | 25 ++--- ai-compliance-sdk/scripts/ingest_iace_kb.sh | 79 +++++++++++++++ 4 files changed, 195 insertions(+), 11 deletions(-) create mode 100644 admin-compliance/app/sdk/iace/[projectId]/architektur/_components/DataFlowDiagram.tsx create mode 100755 ai-compliance-sdk/scripts/ingest_iace_kb.sh diff --git a/admin-compliance/app/sdk/iace/[projectId]/architektur/_components/DataFlowDiagram.tsx b/admin-compliance/app/sdk/iace/[projectId]/architektur/_components/DataFlowDiagram.tsx new file mode 100644 index 00000000..3d67adc5 --- /dev/null +++ b/admin-compliance/app/sdk/iace/[projectId]/architektur/_components/DataFlowDiagram.tsx @@ -0,0 +1,98 @@ +'use client' + +import type { ReactNode } from 'react' +import type { Architecture } from '../_hooks/useArchitecture' + +function Box({ title, sub, accent }: { title: string; sub?: string; accent?: 'purple' | 'amber' | 'green' | 'gray' }) { + const c = + accent === 'purple' + ? 'border-purple-300 bg-purple-50/60 dark:border-purple-700 dark:bg-purple-900/20' + : accent === 'amber' + ? 'border-amber-300 bg-amber-50/60 dark:border-amber-700 dark:bg-amber-900/20' + : accent === 'green' + ? 'border-green-300 bg-green-50/60 dark:border-green-700 dark:bg-green-900/20' + : 'border-gray-200 bg-white dark:border-gray-700 dark:bg-gray-800' + return ( +
+
{title}
+ {sub &&
{sub}
} +
+ ) +} + +function Lane({ label, children }: { label: string; children: ReactNode }) { + return ( +
+
{label}
+
{children}
+
+ ) +} + +// Arrow between lanes: horizontal on desktop, down-chevron when wrapped. +function Arrow() { + return ( +
+ + +
+ ) +} + +/** + * Audit data-flow diagram: where every datum enters, how it is processed and + * where it lands. Four lanes (input → knowledge/evidence → deterministic engine + * → outputs); counts are live from the architecture endpoint. + */ +export function DataFlowDiagram({ data }: { data: Architecture }) { + const libCount = (needle: string) => data.libraries.find((l) => l.name.includes(needle))?.count + const stages = data.stages + + return ( +
+

Datenfluss (Überblick)

+
+
+ {/* 1 — Input */} + + + + + + {/* 2 — Knowledge + evidence */} + + + + + + + + {/* 3 — Deterministic engine */} + +
+ {stages.map((s) => ( +
+ {s.title} +
+ ))} +
+
+ + + {/* 4 — Outputs */} + + + + + + + +
+

+ Deterministische Engine (links→rechts) = reproduzierbar ohne LLM. Die RAG-Evidenz verankert/belegt die + Risiko-Zahlen, ersetzt aber nicht die Tier-Logik. Norm-Tabellen werden nie reproduziert. +

+
+
+ ) +} diff --git a/admin-compliance/app/sdk/iace/[projectId]/architektur/page.tsx b/admin-compliance/app/sdk/iace/[projectId]/architektur/page.tsx index e8adf243..3458a7df 100644 --- a/admin-compliance/app/sdk/iace/[projectId]/architektur/page.tsx +++ b/admin-compliance/app/sdk/iace/[projectId]/architektur/page.tsx @@ -2,6 +2,7 @@ import { useState } from 'react' import { useArchitecture, type ArchStage } from './_hooks/useArchitecture' +import { DataFlowDiagram } from './_components/DataFlowDiagram' export default function ArchitekturPage() { const { data, loading } = useArchitecture() @@ -26,6 +27,9 @@ export default function ArchitekturPage() {

+ {/* Data-flow overview diagram */} + + {/* Pipeline flow */}

Deterministische Pipeline

diff --git a/ai-compliance-sdk/internal/iace/architecture.go b/ai-compliance-sdk/internal/iace/architecture.go index 0cac3701..014b1dcf 100644 --- a/ai-compliance-sdk/internal/iace/architecture.go +++ b/ai-compliance-sdk/internal/iace/architecture.go @@ -115,8 +115,8 @@ func BuildArchitecture() Architecture { ID: "risk", Title: "8 · Risiko (S/F/W/P + Konfidenz)", Summary: "Konfidenz-bewusste Risikoschätzung je Gefährdung — als Bereich, nicht Punktwert.", Input: "Gefährdungskategorie + Szenario (Kontaktart) + Lebensphasen", - Logic: "EstimateSeverity/Frequency/ProbabilityW/AvoidabilityP → R = S×(F+W+P), Band + Bereich (±1 je validierter Genauigkeit) + Konfidenz (Verletzungsmechanismus eindeutig?). W verankert am ESAW-Kontaktmodus-Ranking; eigenes Modell, KEINE Norm-Tabelle.", - DataSource: "risk_estimation.go + risk_data_sources.go (ESAW, CC BY 4.0)", + Logic: "EstimateSeverity/Frequency/ProbabilityW/AvoidabilityP → R = S×(F+W+P), Band + Bereich (±1 je validierter Genauigkeit) + Konfidenz (Verletzungsmechanismus eindeutig?). W verankert am ESAW-2023-Kontaktmodus-Ranking, Schwere zusätzlich an BLS-CFOI; eigenes Modell, KEINE Norm-Tabelle. Belege durchsuchbar im RAG (bp_iace_accident_stats).", + DataSource: "risk_estimation.go + risk_data_sources.go (ESAW hsw_ph3_08 2023 + BLS CFOI) + RAG bp_iace_accident_stats", Example: "Elektrischer Schlag: R≈32 (Bereich 21–45, mittel–kritisch), Konfidenz hoch.", }, { @@ -129,11 +129,11 @@ func BuildArchitecture() Architecture { }, { ID: "matrix", Title: "10 · Risiko-Matrix / GT-Benchmark", - Summary: "Projektweite Risiko-Matrix (Schwere × Wahrscheinlichkeit) und Abgleich gegen Experten-Ground-Truth.", + Summary: "Projektweite Risiko-Matrix (Schwere × Wahrscheinlichkeit), Abgleich gegen Experten-GT inkl. Abstands-/Geschwindigkeits-Maße.", Input: "Alle Gefährdungen + (optional) GT-Projekt", - Logic: "BuildRiskMatrix aggregiert je Zelle; Benchmark vergleicht Tool-S/F/W/P + Fine-Kinney gegen Fachmann-GT (Übereinstimmung within±1, Rang-Konkordanz).", - DataSource: "risk_matrix.go + risk_benchmark.go", - Example: "Kistenhub vs. eigene GT: S±1 94 %, Ranking 86 %.", + Logic: "BuildRiskMatrix aggregiert je Zelle; Benchmark vergleicht Tool-S/F/W/P + Fine-Kinney gegen Fachmann-GT (within±1, Rang-Konkordanz) UND die mm-/mm-s-Maße (CompareDistances: matched / Lücken / Extras + Agreement-%).", + DataSource: "risk_matrix.go + risk_benchmark.go + distance_benchmark.go", + Example: "Kistenhub: Gefährdungs-Coverage hoch; Abstands-Maße nach Lückenfüllung 4/4 (100 %).", }, }, Libraries: []ArchLibrary{ @@ -149,11 +149,14 @@ func BuildArchitecture() Architecture { {Name: "OSHA-Mindestabstände", Count: len(GetOSHAMinimumDistances()), SourceFile: "minimum_distances.go", Description: "OSHA 29 CFR 1910 Sicherheitsabstände (Public Domain) + Maßnahmen-Verknüpfung; EU-Normen nur referenziert."}, }, DataSources: []ArchDataSource{ - {Name: "Eurostat ESAW (Kontaktmodus-Unfallstatistik)", License: "CC BY 4.0", Usage: "Anker für Wahrscheinlichkeits-Tiers (W) + zitierbare Quoten", Status: "verwendet"}, - {Name: "US BLS / OSHA (Arbeitsunfälle)", License: "Public Domain", Usage: "Ergänzende Häufigkeits-/Schwere-Anker + OSHA-Maßnahmen", Status: "verwendet"}, - {Name: "UK HSE (RIDDOR)", License: "Open Government Licence v3", Usage: "Zulässige Ergänzung (Attribution)", Status: "verwendet"}, - {Name: "DGUV-Statistik", License: "nur redaktionell, keine Bearbeitung", Usage: "—", Status: "ausgeschlossen"}, - {Name: "DIN/Beuth/ISO/IEC Risikograph-Tabellen", License: "urheberrechtlich", Usage: "Nur als Referenz genannt, NIE reproduziert/re-implementiert", Status: "ausgeschlossen"}, + {Name: "Eurostat ESAW (Kontaktmodus, hsw_ph3_08, 2023)", License: "CC BY 4.0", Usage: "Anker für W-Tiers + zitierbare Quoten → RAG bp_iace_accident_stats", Status: "verwendet"}, + {Name: "US BLS CFOI (tödliche Arbeitsunfälle 2023–24)", License: "Public Domain", Usage: "US-Schwere-Anker (Contact = führend in Manufacturing) → RAG bp_iace_accident_stats", Status: "verwendet"}, + {Name: "OSHA Technical Manual / eTools (Roboter, Mindestabstände)", License: "Public Domain", Usage: "250 mm/s Teach-Anker + 29 CFR 1910 Sicherheitsabstände → minimum_distances.go + RAG bp_iace_safety_kb", Status: "verwendet"}, + {Name: "OPSS PRISM (Severity × Probability Risikomatrix)", License: "Open Government Licence v3", Usage: "Methodik-Anker für S×W → Risiko-Level (RAPEX-aligned) → RAG bp_iace_safety_kb", Status: "verwendet"}, + {Name: "Cobot-Schmerzschwellen (Behrens 2022 / Park 2019)", License: "CC BY 4.0", Usage: "Kraft/Druck-Limits je Körperregion (Daten hinter ISO/TS 15066, nicht die Norm) → RAG bp_iace_safety_kb", Status: "verwendet"}, + {Name: "UK HSE Beispiel-Risikobeurteilungen", License: "Open Government Licence v3", Usage: "qualitative Gefährdung→Maßnahme-Struktur → RAG bp_iace_safety_kb", Status: "verwendet"}, + {Name: "DGUV/IFA-Statistik & -Tabellen", License: "nur redaktionell, keine Bearbeitung", Usage: "—", Status: "ausgeschlossen"}, + {Name: "DIN/Beuth/ISO/IEC Norm- & Risikograph-Tabellen", License: "urheberrechtlich", Usage: "Nur als Referenz genannt, NIE reproduziert (ISO-15066-Werte nur via CC-BY-Papers)", Status: "ausgeschlossen"}, }, NormMatching: []string{ "C-Normen (maschinenspezifisch): Match nur über die kanonische Maschinentyp-FAMILIE — `canonicalMachineType` faltet das feingranulare Normen-Vokabular (455 Keys: welding_machine, band_saw, mobile_crane …) auf die 68 Dropdown-Keys. Ohne Familien-Match wird die C-Norm verworfen (kein Tag/Kategorie-Fallback → keine Fremd-Domänen-Normen).", diff --git a/ai-compliance-sdk/scripts/ingest_iace_kb.sh b/ai-compliance-sdk/scripts/ingest_iace_kb.sh new file mode 100755 index 00000000..5afabfe9 --- /dev/null +++ b/ai-compliance-sdk/scripts/ingest_iace_kb.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Ingest the IACE open-source knowledge-base documents into a RAG/Qdrant target. +# +# Idempotent: creates the two collections (1024-dim, BGE-M3) if missing and +# uploads each versioned source doc with retry. Use it to populate a fresh +# Qdrant (e.g. production) from the repo — the docs under datasources/ are the +# single source of truth. +# +# Usage: +# RAG_URL=https://rag.prod.example ./scripts/ingest_iace_kb.sh +# ./scripts/ingest_iace_kb.sh # defaults to https://127.0.0.1:8097 +# +# Env: +# RAG_URL base URL of the RAG service (default https://127.0.0.1:8097) +# INSECURE set to 0 to disable curl -k (default 1, for self-signed dev certs) +set -uo pipefail + +RAG_URL="${RAG_URL:-https://127.0.0.1:8097}" +INSECURE="${INSECURE:-1}" +VECTOR_SIZE=1024 + +CURL=(curl -sS --max-time 120) +[ "$INSECURE" = "1" ] && CURL+=(-k) + +# Resolve the datasources dir relative to this script (repo-portable). +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DS_DIR="$SCRIPT_DIR/../internal/iace/datasources" + +# doc -> collection mapping (parallel arrays; macOS bash 3.2 compatible). +DOCS=( + "esaw_accident_stats_2023.md|bp_iace_accident_stats|Accidents at work - ESAW 2023|CC BY 4.0" + "bls_cfoi_fatal_2024.md|bp_iace_accident_stats|BLS CFOI fatal injuries 2023-24|US Public Domain" + "prism_risk_methodology.md|bp_iace_safety_kb|PRISM risk methodology|OGL v3" + "cobot_biomech_limits.md|bp_iace_safety_kb|Cobot biomechanical pain limits|CC BY 4.0" + "hse_example_risk_assessments.md|bp_iace_safety_kb|HSE example risk assessments|OGL v3" + "osha_robot_safety.md|bp_iace_safety_kb|OSHA industrial robot safety|US Public Domain" +) + +create_collection() { + local name="$1" + echo " • ensure collection $name (${VECTOR_SIZE}d)" + "${CURL[@]}" -X POST "$RAG_URL/api/v1/collections" \ + -H 'Content-Type: application/json' \ + -d "{\"name\":\"$name\",\"vector_size\":$VECTOR_SIZE}" >/dev/null 2>&1 || true +} + +upload() { + local file="$1" collection="$2" title="$3" license="$4" + local path="$DS_DIR/$file" + if [ ! -f "$path" ]; then echo " ✗ MISSING: $path"; return 1; fi + local meta="{\"title\":\"$title\",\"license\":\"$license\",\"source\":\"iace_kb\"}" + local try resp + for try in 1 2 3; do + resp="$("${CURL[@]}" -X POST "$RAG_URL/api/v1/documents/upload" \ + -F "file=@$path" -F "collection=$collection" \ + -F 'data_type=safety_kb' -F 'use_case=iace_risk' -F 'year=2024' \ + -F "metadata_json=$meta" 2>&1)" + if echo "$resp" | grep -q 'chunks_count'; then + echo " ✓ $file -> $collection ($(echo "$resp" | grep -o '"chunks_count":[0-9]*'))" + return 0 + fi + sleep 4 + done + echo " ✗ FAILED $file: $(echo "$resp" | head -c 120)" + return 1 +} + +echo "Ingesting IACE KB into $RAG_URL" +# Unique collections first. +for c in bp_iace_accident_stats bp_iace_safety_kb; do create_collection "$c"; done + +rc=0 +for entry in "${DOCS[@]}"; do + IFS='|' read -r file collection title license <<<"$entry" + upload "$file" "$collection" "$title" "$license" || rc=1 +done + +echo "Done (exit $rc)." +exit $rc