feat(iace): refresh architecture tab + data-flow diagram + E1 ingest script
- architecture.go: DataSources now reflect the real ingested set (ESAW 2023, BLS CFOI, OSHA OTM, PRISM, cobot CC-BY, HSE) with their RAG collections; risk stage cites BLS + the searchable RAG layer; matrix stage now mentions the distance-benchmark dimension. - Architektur & Datenfluss tab: new DataFlowDiagram — 4 lanes (input → knowledge/RAG-evidence → deterministic engine → outputs) with live counts. - scripts/ingest_iace_kb.sh: idempotent E1 ingest — creates the 2 collections and uploads the 6 datasources docs against a configurable RAG_URL (for prod Qdrant), with retry. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,98 @@
|
||||
'use client'
|
||||
|
||||
import type { ReactNode } from 'react'
|
||||
import type { Architecture } from '../_hooks/useArchitecture'
|
||||
|
||||
function Box({ title, sub, accent }: { title: string; sub?: string; accent?: 'purple' | 'amber' | 'green' | 'gray' }) {
|
||||
const c =
|
||||
accent === 'purple'
|
||||
? 'border-purple-300 bg-purple-50/60 dark:border-purple-700 dark:bg-purple-900/20'
|
||||
: accent === 'amber'
|
||||
? 'border-amber-300 bg-amber-50/60 dark:border-amber-700 dark:bg-amber-900/20'
|
||||
: accent === 'green'
|
||||
? 'border-green-300 bg-green-50/60 dark:border-green-700 dark:bg-green-900/20'
|
||||
: 'border-gray-200 bg-white dark:border-gray-700 dark:bg-gray-800'
|
||||
return (
|
||||
<div className={`rounded-lg border ${c} px-2.5 py-1.5`}>
|
||||
<div className="text-[11px] font-medium text-gray-800 dark:text-gray-200 leading-tight">{title}</div>
|
||||
{sub && <div className="text-[10px] text-gray-500 leading-tight mt-0.5">{sub}</div>}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function Lane({ label, children }: { label: string; children: ReactNode }) {
|
||||
return (
|
||||
<div className="flex-1 min-w-[150px] space-y-2">
|
||||
<div className="text-[10px] font-semibold uppercase tracking-wide text-gray-400 text-center">{label}</div>
|
||||
<div className="space-y-1.5">{children}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Arrow between lanes: horizontal on desktop, down-chevron when wrapped.
|
||||
function Arrow() {
|
||||
return (
|
||||
<div className="flex items-center justify-center text-gray-300 dark:text-gray-600 shrink-0 px-0.5">
|
||||
<span className="hidden lg:block text-lg">→</span>
|
||||
<span className="lg:hidden text-sm">↓</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Audit data-flow diagram: where every datum enters, how it is processed and
|
||||
* where it lands. Four lanes (input → knowledge/evidence → deterministic engine
|
||||
* → outputs); counts are live from the architecture endpoint.
|
||||
*/
|
||||
export function DataFlowDiagram({ data }: { data: Architecture }) {
|
||||
const libCount = (needle: string) => data.libraries.find((l) => l.name.includes(needle))?.count
|
||||
const stages = data.stages
|
||||
|
||||
return (
|
||||
<section className="space-y-2">
|
||||
<h2 className="text-sm font-semibold text-gray-700 dark:text-gray-300">Datenfluss (Überblick)</h2>
|
||||
<div className="rounded-xl border border-gray-200 dark:border-gray-700 bg-gray-50/50 dark:bg-gray-900/20 p-3 overflow-x-auto">
|
||||
<div className="flex flex-col lg:flex-row gap-1.5 lg:items-stretch min-w-[280px]">
|
||||
{/* 1 — Input */}
|
||||
<Lane label="Eingabe">
|
||||
<Box title="Grenzen-Formular" sub="17 Felder, EN ISO 12100" accent="purple" />
|
||||
</Lane>
|
||||
<Arrow />
|
||||
|
||||
{/* 2 — Knowledge + evidence */}
|
||||
<Lane label="Wissensbasen + Evidenz">
|
||||
<Box title="Code-Bibliotheken" sub={`Patterns ${libCount('Pattern') ?? '–'} · Maßnahmen ${libCount('Maßnahmen') ?? '–'} · Normen ${libCount('Normen') ?? '–'} · OSHA-Abstände ${libCount('OSHA') ?? '–'}`} />
|
||||
<Box title="RAG bp_iace_accident_stats" sub="ESAW 2023 + BLS CFOI (Risiko-Anker)" accent="amber" />
|
||||
<Box title="RAG bp_iace_safety_kb" sub="PRISM · Cobot · HSE · OSHA" accent="amber" />
|
||||
</Lane>
|
||||
<Arrow />
|
||||
|
||||
{/* 3 — Deterministic engine */}
|
||||
<Lane label="Deterministische Engine">
|
||||
<div className="rounded-lg border border-gray-200 dark:border-gray-700 bg-white dark:bg-gray-800 p-1.5 space-y-1">
|
||||
{stages.map((s) => (
|
||||
<div key={s.id} className="text-[10px] text-gray-600 dark:text-gray-300 leading-tight">
|
||||
{s.title}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</Lane>
|
||||
<Arrow />
|
||||
|
||||
{/* 4 — Outputs */}
|
||||
<Lane label="Ausgaben">
|
||||
<Box title="Gefährdungen" sub="Szenario/Trigger/Harm/Zone" accent="green" />
|
||||
<Box title="Maßnahmen" sub="+ OSHA-Mindestabstand" accent="green" />
|
||||
<Box title="Risiko" sub="S/F/W/P + Konfidenz (Bereich)" accent="green" />
|
||||
<Box title="Normen" sub="A/B/C, DIN↔OSHA" accent="green" />
|
||||
<Box title="Benchmark" sub="Coverage + Abstands-Agreement %" accent="green" />
|
||||
</Lane>
|
||||
</div>
|
||||
<p className="text-[10px] text-gray-400 mt-2">
|
||||
Deterministische Engine (links→rechts) = reproduzierbar ohne LLM. Die RAG-Evidenz verankert/belegt die
|
||||
Risiko-Zahlen, ersetzt aber nicht die Tier-Logik. Norm-Tabellen werden nie reproduziert.
|
||||
</p>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import { useState } from 'react'
|
||||
import { useArchitecture, type ArchStage } from './_hooks/useArchitecture'
|
||||
import { DataFlowDiagram } from './_components/DataFlowDiagram'
|
||||
|
||||
export default function ArchitekturPage() {
|
||||
const { data, loading } = useArchitecture()
|
||||
@@ -26,6 +27,9 @@ export default function ArchitekturPage() {
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Data-flow overview diagram */}
|
||||
<DataFlowDiagram data={data} />
|
||||
|
||||
{/* Pipeline flow */}
|
||||
<section className="space-y-2">
|
||||
<h2 className="text-sm font-semibold text-gray-700 dark:text-gray-300">Deterministische Pipeline</h2>
|
||||
|
||||
@@ -115,8 +115,8 @@ func BuildArchitecture() Architecture {
|
||||
ID: "risk", Title: "8 · Risiko (S/F/W/P + Konfidenz)",
|
||||
Summary: "Konfidenz-bewusste Risikoschätzung je Gefährdung — als Bereich, nicht Punktwert.",
|
||||
Input: "Gefährdungskategorie + Szenario (Kontaktart) + Lebensphasen",
|
||||
Logic: "EstimateSeverity/Frequency/ProbabilityW/AvoidabilityP → R = S×(F+W+P), Band + Bereich (±1 je validierter Genauigkeit) + Konfidenz (Verletzungsmechanismus eindeutig?). W verankert am ESAW-Kontaktmodus-Ranking; eigenes Modell, KEINE Norm-Tabelle.",
|
||||
DataSource: "risk_estimation.go + risk_data_sources.go (ESAW, CC BY 4.0)",
|
||||
Logic: "EstimateSeverity/Frequency/ProbabilityW/AvoidabilityP → R = S×(F+W+P), Band + Bereich (±1 je validierter Genauigkeit) + Konfidenz (Verletzungsmechanismus eindeutig?). W verankert am ESAW-2023-Kontaktmodus-Ranking, Schwere zusätzlich an BLS-CFOI; eigenes Modell, KEINE Norm-Tabelle. Belege durchsuchbar im RAG (bp_iace_accident_stats).",
|
||||
DataSource: "risk_estimation.go + risk_data_sources.go (ESAW hsw_ph3_08 2023 + BLS CFOI) + RAG bp_iace_accident_stats",
|
||||
Example: "Elektrischer Schlag: R≈32 (Bereich 21–45, mittel–kritisch), Konfidenz hoch.",
|
||||
},
|
||||
{
|
||||
@@ -129,11 +129,11 @@ func BuildArchitecture() Architecture {
|
||||
},
|
||||
{
|
||||
ID: "matrix", Title: "10 · Risiko-Matrix / GT-Benchmark",
|
||||
Summary: "Projektweite Risiko-Matrix (Schwere × Wahrscheinlichkeit) und Abgleich gegen Experten-Ground-Truth.",
|
||||
Summary: "Projektweite Risiko-Matrix (Schwere × Wahrscheinlichkeit), Abgleich gegen Experten-GT inkl. Abstands-/Geschwindigkeits-Maße.",
|
||||
Input: "Alle Gefährdungen + (optional) GT-Projekt",
|
||||
Logic: "BuildRiskMatrix aggregiert je Zelle; Benchmark vergleicht Tool-S/F/W/P + Fine-Kinney gegen Fachmann-GT (Übereinstimmung within±1, Rang-Konkordanz).",
|
||||
DataSource: "risk_matrix.go + risk_benchmark.go",
|
||||
Example: "Kistenhub vs. eigene GT: S±1 94 %, Ranking 86 %.",
|
||||
Logic: "BuildRiskMatrix aggregiert je Zelle; Benchmark vergleicht Tool-S/F/W/P + Fine-Kinney gegen Fachmann-GT (within±1, Rang-Konkordanz) UND die mm-/mm-s-Maße (CompareDistances: matched / Lücken / Extras + Agreement-%).",
|
||||
DataSource: "risk_matrix.go + risk_benchmark.go + distance_benchmark.go",
|
||||
Example: "Kistenhub: Gefährdungs-Coverage hoch; Abstands-Maße nach Lückenfüllung 4/4 (100 %).",
|
||||
},
|
||||
},
|
||||
Libraries: []ArchLibrary{
|
||||
@@ -149,11 +149,14 @@ func BuildArchitecture() Architecture {
|
||||
{Name: "OSHA-Mindestabstände", Count: len(GetOSHAMinimumDistances()), SourceFile: "minimum_distances.go", Description: "OSHA 29 CFR 1910 Sicherheitsabstände (Public Domain) + Maßnahmen-Verknüpfung; EU-Normen nur referenziert."},
|
||||
},
|
||||
DataSources: []ArchDataSource{
|
||||
{Name: "Eurostat ESAW (Kontaktmodus-Unfallstatistik)", License: "CC BY 4.0", Usage: "Anker für Wahrscheinlichkeits-Tiers (W) + zitierbare Quoten", Status: "verwendet"},
|
||||
{Name: "US BLS / OSHA (Arbeitsunfälle)", License: "Public Domain", Usage: "Ergänzende Häufigkeits-/Schwere-Anker + OSHA-Maßnahmen", Status: "verwendet"},
|
||||
{Name: "UK HSE (RIDDOR)", License: "Open Government Licence v3", Usage: "Zulässige Ergänzung (Attribution)", Status: "verwendet"},
|
||||
{Name: "DGUV-Statistik", License: "nur redaktionell, keine Bearbeitung", Usage: "—", Status: "ausgeschlossen"},
|
||||
{Name: "DIN/Beuth/ISO/IEC Risikograph-Tabellen", License: "urheberrechtlich", Usage: "Nur als Referenz genannt, NIE reproduziert/re-implementiert", Status: "ausgeschlossen"},
|
||||
{Name: "Eurostat ESAW (Kontaktmodus, hsw_ph3_08, 2023)", License: "CC BY 4.0", Usage: "Anker für W-Tiers + zitierbare Quoten → RAG bp_iace_accident_stats", Status: "verwendet"},
|
||||
{Name: "US BLS CFOI (tödliche Arbeitsunfälle 2023–24)", License: "Public Domain", Usage: "US-Schwere-Anker (Contact = führend in Manufacturing) → RAG bp_iace_accident_stats", Status: "verwendet"},
|
||||
{Name: "OSHA Technical Manual / eTools (Roboter, Mindestabstände)", License: "Public Domain", Usage: "250 mm/s Teach-Anker + 29 CFR 1910 Sicherheitsabstände → minimum_distances.go + RAG bp_iace_safety_kb", Status: "verwendet"},
|
||||
{Name: "OPSS PRISM (Severity × Probability Risikomatrix)", License: "Open Government Licence v3", Usage: "Methodik-Anker für S×W → Risiko-Level (RAPEX-aligned) → RAG bp_iace_safety_kb", Status: "verwendet"},
|
||||
{Name: "Cobot-Schmerzschwellen (Behrens 2022 / Park 2019)", License: "CC BY 4.0", Usage: "Kraft/Druck-Limits je Körperregion (Daten hinter ISO/TS 15066, nicht die Norm) → RAG bp_iace_safety_kb", Status: "verwendet"},
|
||||
{Name: "UK HSE Beispiel-Risikobeurteilungen", License: "Open Government Licence v3", Usage: "qualitative Gefährdung→Maßnahme-Struktur → RAG bp_iace_safety_kb", Status: "verwendet"},
|
||||
{Name: "DGUV/IFA-Statistik & -Tabellen", License: "nur redaktionell, keine Bearbeitung", Usage: "—", Status: "ausgeschlossen"},
|
||||
{Name: "DIN/Beuth/ISO/IEC Norm- & Risikograph-Tabellen", License: "urheberrechtlich", Usage: "Nur als Referenz genannt, NIE reproduziert (ISO-15066-Werte nur via CC-BY-Papers)", Status: "ausgeschlossen"},
|
||||
},
|
||||
NormMatching: []string{
|
||||
"C-Normen (maschinenspezifisch): Match nur über die kanonische Maschinentyp-FAMILIE — `canonicalMachineType` faltet das feingranulare Normen-Vokabular (455 Keys: welding_machine, band_saw, mobile_crane …) auf die 68 Dropdown-Keys. Ohne Familien-Match wird die C-Norm verworfen (kein Tag/Kategorie-Fallback → keine Fremd-Domänen-Normen).",
|
||||
|
||||
Executable
+79
@@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env bash
|
||||
# Ingest the IACE open-source knowledge-base documents into a RAG/Qdrant target.
|
||||
#
|
||||
# Idempotent: creates the two collections (1024-dim, BGE-M3) if missing and
|
||||
# uploads each versioned source doc with retry. Use it to populate a fresh
|
||||
# Qdrant (e.g. production) from the repo — the docs under datasources/ are the
|
||||
# single source of truth.
|
||||
#
|
||||
# Usage:
|
||||
# RAG_URL=https://rag.prod.example ./scripts/ingest_iace_kb.sh
|
||||
# ./scripts/ingest_iace_kb.sh # defaults to https://127.0.0.1:8097
|
||||
#
|
||||
# Env:
|
||||
# RAG_URL base URL of the RAG service (default https://127.0.0.1:8097)
|
||||
# INSECURE set to 0 to disable curl -k (default 1, for self-signed dev certs)
|
||||
set -uo pipefail
|
||||
|
||||
RAG_URL="${RAG_URL:-https://127.0.0.1:8097}"
|
||||
INSECURE="${INSECURE:-1}"
|
||||
VECTOR_SIZE=1024
|
||||
|
||||
CURL=(curl -sS --max-time 120)
|
||||
[ "$INSECURE" = "1" ] && CURL+=(-k)
|
||||
|
||||
# Resolve the datasources dir relative to this script (repo-portable).
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
DS_DIR="$SCRIPT_DIR/../internal/iace/datasources"
|
||||
|
||||
# doc -> collection mapping (parallel arrays; macOS bash 3.2 compatible).
|
||||
DOCS=(
|
||||
"esaw_accident_stats_2023.md|bp_iace_accident_stats|Accidents at work - ESAW 2023|CC BY 4.0"
|
||||
"bls_cfoi_fatal_2024.md|bp_iace_accident_stats|BLS CFOI fatal injuries 2023-24|US Public Domain"
|
||||
"prism_risk_methodology.md|bp_iace_safety_kb|PRISM risk methodology|OGL v3"
|
||||
"cobot_biomech_limits.md|bp_iace_safety_kb|Cobot biomechanical pain limits|CC BY 4.0"
|
||||
"hse_example_risk_assessments.md|bp_iace_safety_kb|HSE example risk assessments|OGL v3"
|
||||
"osha_robot_safety.md|bp_iace_safety_kb|OSHA industrial robot safety|US Public Domain"
|
||||
)
|
||||
|
||||
create_collection() {
|
||||
local name="$1"
|
||||
echo " • ensure collection $name (${VECTOR_SIZE}d)"
|
||||
"${CURL[@]}" -X POST "$RAG_URL/api/v1/collections" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"name\":\"$name\",\"vector_size\":$VECTOR_SIZE}" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
upload() {
|
||||
local file="$1" collection="$2" title="$3" license="$4"
|
||||
local path="$DS_DIR/$file"
|
||||
if [ ! -f "$path" ]; then echo " ✗ MISSING: $path"; return 1; fi
|
||||
local meta="{\"title\":\"$title\",\"license\":\"$license\",\"source\":\"iace_kb\"}"
|
||||
local try resp
|
||||
for try in 1 2 3; do
|
||||
resp="$("${CURL[@]}" -X POST "$RAG_URL/api/v1/documents/upload" \
|
||||
-F "file=@$path" -F "collection=$collection" \
|
||||
-F 'data_type=safety_kb' -F 'use_case=iace_risk' -F 'year=2024' \
|
||||
-F "metadata_json=$meta" 2>&1)"
|
||||
if echo "$resp" | grep -q 'chunks_count'; then
|
||||
echo " ✓ $file -> $collection ($(echo "$resp" | grep -o '"chunks_count":[0-9]*'))"
|
||||
return 0
|
||||
fi
|
||||
sleep 4
|
||||
done
|
||||
echo " ✗ FAILED $file: $(echo "$resp" | head -c 120)"
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "Ingesting IACE KB into $RAG_URL"
|
||||
# Unique collections first.
|
||||
for c in bp_iace_accident_stats bp_iace_safety_kb; do create_collection "$c"; done
|
||||
|
||||
rc=0
|
||||
for entry in "${DOCS[@]}"; do
|
||||
IFS='|' read -r file collection title license <<<"$entry"
|
||||
upload "$file" "$collection" "$title" "$license" || rc=1
|
||||
done
|
||||
|
||||
echo "Done (exit $rc)."
|
||||
exit $rc
|
||||
Reference in New Issue
Block a user