feat(iace): add data-driven Architektur & Datenfluss explainer tab

Adds an auditor-facing view of the IACE engine: a clickable 10-stage pipeline flow (Grenzen-Formular → ParseNarrative → Pattern-Gates → Relevanz → Caps → Gefährdungen → Maßnahmen → Risiko → Normen → Matrix), plus live library counts, the data-source/license register (incl. the DIN/Beuth + DGUV exclusions), and the norm-matching logic that reconciles DIN/ISO/OSHA machine-type vocabulary via canonicalMachineType folding. Backend: BuildArchitecture() with LIVE counts so the diagram can never drift; GET /iace/architecture; collectAllNorms() extracted from SuggestNorms as the single source of truth for the norm-library count. Frontend: useArchitecture hook + page + new IACE nav tab. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-11 09:35:37 +02:00
parent 05a1795ea8
commit 32ba8d16b1
8 changed files with 522 additions and 1 deletions
@@ -58,3 +58,11 @@ func (h *IACEHandler) GetRiskDataSources(c *gin.Context) {
 		"evidence": iace.AllRiskEvidence(),
 	})
 }
+
+// GetArchitecture handles GET /architecture.
+// Data-driven self-description of the IACE engine (pipeline stages, libraries
+// with LIVE counts, data sources + licenses, norm-matching logic) for the
+// "Architektur & Datenfluss" auditability tab.
+func (h *IACEHandler) GetArchitecture(c *gin.Context) {
+	c.JSON(http.StatusOK, iace.BuildArchitecture())
+}
@@ -73,6 +73,7 @@ func registerIACERoutes(v1 *gin.RouterGroup, h *handlers.IACEHandler) {
 		iaceRoutes.GET("/projects/:id/risk-summary", h.GetRiskSummary)
 		iaceRoutes.GET("/projects/:id/risk-matrix", h.GetRiskMatrix)
 		iaceRoutes.GET("/risk-data-sources", h.GetRiskDataSources)
+		iaceRoutes.GET("/architecture", h.GetArchitecture)
 		iaceRoutes.GET("/projects/:id/suggested-norms", h.SuggestProjectNorms)
 		iaceRoutes.POST("/projects/:id/hazards/:hid/reassess", h.ReassessRisk)

@@ -0,0 +1,166 @@
+package iace
+
+// Data-driven self-description of the IACE engine for the "Architektur &
+// Datenfluss" explainer. Counts are LIVE (derived from the running engine) so
+// the diagram can never drift from reality; the stage/source prose is curated.
+// Purpose: let an auditor see WHERE each datum comes from and HOW a risk
+// assessment is reached — every gate, library and data source, in order.
+
+// ArchStage is one step of the deterministic pipeline.
+type ArchStage struct {
+	ID         string `json:"id"`
+	Title      string `json:"title"`
+	Summary    string `json:"summary"`
+	Input      string `json:"input"`
+	Logic      string `json:"logic"`
+	DataSource string `json:"data_source"` // code/library it draws from
+	Example    string `json:"example"`
+}
+
+// ArchLibrary is one knowledge base with a LIVE entry count.
+type ArchLibrary struct {
+	Name        string `json:"name"`
+	Count       int    `json:"count"`
+	SourceFile  string `json:"source_file"`
+	Description string `json:"description"`
+}
+
+// ArchDataSource is an external statistic/standard with its license + status.
+type ArchDataSource struct {
+	Name    string `json:"name"`
+	License string `json:"license"`
+	Usage   string `json:"usage"`
+	Status  string `json:"status"` // "verwendet" | "ausgeschlossen"
+}
+
+// Architecture is the full self-description returned by GET /iace/architecture.
+type Architecture struct {
+	Stages       []ArchStage      `json:"stages"`
+	Libraries    []ArchLibrary    `json:"libraries"`
+	DataSources  []ArchDataSource `json:"data_sources"`
+	NormMatching []string         `json:"norm_matching"`
+	Evidence     []RiskEvidence   `json:"evidence"`
+}
+
+// distinctDomainGates counts the distinct dom_* capability tags the engine gates on.
+func distinctDomainGates() int {
+	seen := map[string]bool{}
+	for _, tag := range domainGateTerms {
+		seen[tag] = true
+	}
+	return len(seen)
+}
+
+// BuildArchitecture assembles the engine self-description with live counts.
+func BuildArchitecture() Architecture {
+	return Architecture{
+		Stages: []ArchStage{
+			{
+				ID: "grenzen", Title: "1 · Grenzen-Formular",
+				Summary:    "Maschinenbeschreibung nach EN ISO 12100 (Verwendungs-, räumliche, zeitliche Grenzen).",
+				Input:      "17 Felder: Beschreibung, Verwendung, Fehlanwendung, Schnittstellen (elektrisch/mechanisch/pneumatisch-hydraulisch), Umgebung, Personen …",
+				Logic:      "Alle Felder werden zu einer Narrative zusammengeführt (kein Whitelist — jedes Feld ist eine potenzielle Gefährdungsquelle).",
+				DataSource: "project.metadata.limits_form",
+				Example:    "„Hubantrieb über Kette … 230 V … keine pneumatischen Schnittstellen.“",
+			},
+			{
+				ID: "parse", Title: "2 · ParseNarrative",
+				Summary:    "Deterministische Extraktion von Komponenten, Energiequellen, Domänen-Tags und Negationen.",
+				Input:      "Narrative-Text + Maschinentyp",
+				Logic:      "Keyword-Wörterbuch (Substring, umlaut-normalisiert) → Komponenten + Energie + dom_*-Tags. Negation („keine Pneumatik“) ⇒ Komponente als verneint markiert, liefert KEINE Tags.",
+				DataSource: "keyword_dictionary.go",
+				Example:    "„Kette“→Komponente, „230 V“→electrical_energy, „Presse“→dom_press.",
+			},
+			{
+				ID: "match", Title: "3 · Pattern-Engine (Gates)",
+				Summary:    "Jedes Gefährdungsmuster wird gegen die Maschine geprüft — harte UND-Gates.",
+				Input:      "Komponenten-Tags, Energie-Tags, Lebensphasen, Maschinentyp, dom_*-Tags",
+				Logic:      "patternMatches: MachineType ∧ Required-Component-Tags ∧ Required-Energy-Tags ∧ Lifecycle ∧ Operational-States. Capability-Domain-Gates (dom_*) verhindern Cross-Domänen-Leaks (z. B. Schwimmbad-Muster feuert nicht für eine Presse). Default-open, wenn ein Gate-Input leer ist.",
+				DataSource: "pattern_engine.go + pattern_domain_gates.go + hazard_patterns_*.go",
+				Example:    "Presse-Muster feuert nur, wenn machine_type∈Presse-Familie UND high_force-Tag vorhanden.",
+			},
+			{
+				ID: "relevance", Title: "4 · Relevanz-Backstop",
+				Summary:    "Generischer Filter gegen Rest-Leaks ungegateter Muster.",
+				Input:      "Gefeuertes Muster + Narrative + Komponenten-Namen",
+				Logic:      "IsPatternRelevant: Token-Grenzen + Stoppwort-Liste — ein Muster wird verworfen, wenn sein spezifischer Anker nicht in der Narrative vorkommt.",
+				DataSource: "pattern_relevance.go",
+				Example:    "Verwirft „Bandsäge“-Hazard, wenn die Narrative keine Säge nennt.",
+			},
+			{
+				ID: "caps", Title: "5 · Kategorie-Caps",
+				Summary:    "Begrenzung der Gefährdungen je Kategorie (skaliert mit Komponentenzahl).",
+				Input:      "Gefeuerte Muster je Gefährdungskategorie",
+				Logic:      "categoryHazardCap: pro Kategorie ein Maximum (verhindert Über-Flutung); Dedupe über Kategorie × Zone.",
+				DataSource: "iace_handler_init.go",
+				Example:    "max. N mechanical_hazard-Gefährdungen je Projekt.",
+			},
+			{
+				ID: "hazards", Title: "6 · Gefährdungen",
+				Summary:    "Die erzeugten Gefährdungen (Szenario, Auslöser, Schaden, Zone, betroffene Person).",
+				Input:      "Überlebende Muster + zugeordnete Komponente",
+				Logic:      "Pro Muster: Szenario/Trigger/Harm/Zone aus dem Muster; Komponentenzuordnung tag-basiert (pickComponentForPattern).",
+				DataSource: "iace_hazards (DB)",
+				Example:    "„Quetschen im Werkzeugeinbauraum zwischen Ober- und Unterwerkzeug.“",
+			},
+			{
+				ID: "measures", Title: "7 · Maßnahmen",
+				Summary:    "Schutzmaßnahmen je Gefährdung — kategorie-gefiltert, KEINE generischen Defaults.",
+				Input:      "Gefährdung + musterspezifische Suggested-Measure-IDs",
+				Logic:      "Nur Maßnahmen, deren Kategorie zur Gefährdung passt (isCategoryCompatible). Ohne passende Maßnahme ⇒ 0 Maßnahmen + Coverage-Gap (ehrlich, statt Unsinn).",
+				DataSource: "measures_library*.go",
+				Example:    "Sharp-edge-Gefährdung ⇒ keine „Rotation vermeiden“-Maßnahme.",
+			},
+			{
+				ID: "risk", Title: "8 · Risiko (S/F/W/P + Konfidenz)",
+				Summary:    "Konfidenz-bewusste Risikoschätzung je Gefährdung — als Bereich, nicht Punktwert.",
+				Input:      "Gefährdungskategorie + Szenario (Kontaktart) + Lebensphasen",
+				Logic:      "EstimateSeverity/Frequency/ProbabilityW/AvoidabilityP → R = S×(F+W+P), Band + Bereich (±1 je validierter Genauigkeit) + Konfidenz (Verletzungsmechanismus eindeutig?). W verankert am ESAW-Kontaktmodus-Ranking; eigenes Modell, KEINE Norm-Tabelle.",
+				DataSource: "risk_estimation.go + risk_data_sources.go (ESAW, CC BY 4.0)",
+				Example:    "Elektrischer Schlag: R≈32 (Bereich 21–45, mittel–kritisch), Konfidenz hoch.",
+			},
+			{
+				ID: "norms", Title: "9 · Normen (A/B/C + Familien-Matching)",
+				Summary:    "Passende Normen je Maschinentyp und Gefährdung; DIN/ISO/OSHA-Vokabular versöhnt.",
+				Input:      "Maschinentyp + Gefährdungskategorien + Tags",
+				Logic:      "SuggestNorms: C-Normen exakt per Maschinentyp-FAMILIE (canonicalMachineType: welding_machine→welding); B-Normen per Gefährdungskategorie/Tags; A-Normen gelten immer. Normen werden nur referenziert, Tabellen nie reproduziert.",
+				DataSource: "norms_engine.go + machine_type_families.go + norms_library*.go",
+				Example:    "Schweißanlage ⇒ EN 60974-x (Lichtbogenschweißen), obwohl Norm auf „welding_machine“ getaggt.",
+			},
+			{
+				ID: "matrix", Title: "10 · Risiko-Matrix / GT-Benchmark",
+				Summary:    "Projektweite Risiko-Matrix (Schwere × Wahrscheinlichkeit) und Abgleich gegen Experten-Ground-Truth.",
+				Input:      "Alle Gefährdungen + (optional) GT-Projekt",
+				Logic:      "BuildRiskMatrix aggregiert je Zelle; Benchmark vergleicht Tool-S/F/W/P + Fine-Kinney gegen Fachmann-GT (Übereinstimmung within±1, Rang-Konkordanz).",
+				DataSource: "risk_matrix.go + risk_benchmark.go",
+				Example:    "Kistenhub vs. eigene GT: S±1 94 %, Ranking 86 %.",
+			},
+		},
+		Libraries: []ArchLibrary{
+			{Name: "Hazard-Pattern-Bibliothek", Count: len(AllPatterns()), SourceFile: "hazard_patterns_*.go", Description: "Gefährdungsmuster mit Gates (MachineType/Tags/Energy/Lifecycle) + Szenario/Trigger/Harm/Zone."},
+			{Name: "Maßnahmen-Bibliothek", Count: len(GetProtectiveMeasureLibrary()), SourceFile: "measures_library*.go", Description: "Schutzmaßnahmen mit Reduktionstyp + Norm-Referenzen, kategorie-gefiltert."},
+			{Name: "Normen-Bibliothek (A/B/C)", Count: len(collectAllNorms()), SourceFile: "norms_library*.go", Description: "A-/B-/C-Normen mit Maschinentypen, Gefährdungskategorien und Tags."},
+			{Name: "Komponenten-Bibliothek", Count: len(GetComponentLibrary()), SourceFile: "component_library.go", Description: "Bauteiltypen mit Capability-Tags für das Pattern-Gating."},
+			{Name: "Energiequellen", Count: len(GetEnergySources()), SourceFile: "component_library.go", Description: "Energiearten (elektrisch/pneumatisch/hydraulisch …) für Energie-Gates."},
+			{Name: "Maschinentyp-Vokabular", Count: len(MachineTypeVocabulary()), SourceFile: "machine_types.go", Description: "Kanonische Dropdown-Maschinentypen, auf die Patterns gaten."},
+			{Name: "Domänen-Capability-Gates", Count: distinctDomainGates(), SourceFile: "pattern_domain_gates.go", Description: "dom_*-Tags, die domänenspezifische Muster auf ihre echte Maschine begrenzen (Leak-Schutz)."},
+			{Name: "Kontaktmodus-Tiers", Count: len(contactModeTable), SourceFile: "risk_estimation.go", Description: "Verletzungsmechanismen mit W/P/S-Tiers (ESAW-verankert, GT-kalibriert)."},
+			{Name: "Kontaktmodus-Evidenz", Count: len(contactModeEvidence), SourceFile: "risk_data_sources.go", Description: "Belegte öffentliche Statistik-Quoten (ESAW) als Zitat-/Audit-Schicht."},
+		},
+		DataSources: []ArchDataSource{
+			{Name: "Eurostat ESAW (Kontaktmodus-Unfallstatistik)", License: "CC BY 4.0", Usage: "Anker für Wahrscheinlichkeits-Tiers (W) + zitierbare Quoten", Status: "verwendet"},
+			{Name: "US BLS / OSHA (Arbeitsunfälle)", License: "Public Domain", Usage: "Ergänzende Häufigkeits-/Schwere-Anker + OSHA-Maßnahmen", Status: "verwendet"},
+			{Name: "UK HSE (RIDDOR)", License: "Open Government Licence v3", Usage: "Zulässige Ergänzung (Attribution)", Status: "verwendet"},
+			{Name: "DGUV-Statistik", License: "nur redaktionell, keine Bearbeitung", Usage: "—", Status: "ausgeschlossen"},
+			{Name: "DIN/Beuth/ISO/IEC Risikograph-Tabellen", License: "urheberrechtlich", Usage: "Nur als Referenz genannt, NIE reproduziert/re-implementiert", Status: "ausgeschlossen"},
+		},
+		NormMatching: []string{
+			"C-Normen (maschinenspezifisch): Match nur über die kanonische Maschinentyp-FAMILIE — `canonicalMachineType` faltet das feingranulare Normen-Vokabular (455 Keys: welding_machine, band_saw, mobile_crane …) auf die 68 Dropdown-Keys. Ohne Familien-Match wird die C-Norm verworfen (kein Tag/Kategorie-Fallback → keine Fremd-Domänen-Normen).",
+			"B-Normen (gefährdungsspezifisch): Match über Gefährdungskategorie und Komponenten-/Energie-Tags.",
+			"A-Normen (Grundnormen): gelten immer (z. B. EN ISO 12100).",
+			"DIN/ISO/OSHA-Versöhnung: Normen tragen teils OSHA-/ISO-/DIN-nahe Maschinen-Keys; die Familien-Faltung sorgt dafür, dass z. B. eine „welding_machine“-Norm für eine „welding“-Maschine matched.",
+			"Lizenz-Leitplanke: Norm-Tabellen/Risikographen werden NIE reproduziert — nur Norm-Referenzen ausgegeben.",
+		},
+		Evidence: AllRiskEvidence(),
+	}
+}
@@ -0,0 +1,88 @@
+package iace
+
+import (
+	"strings"
+	"testing"
+)
+
+// BuildArchitecture is the data-driven engine self-description rendered in the
+// "Architektur & Datenfluss" auditability tab. These tests pin its shape and,
+// crucially, that the library counts are LIVE (non-zero, drawn from the running
+// engine) — a zero count would mean the diagram silently drifted from reality.
+
+func TestBuildArchitecture_Shape(t *testing.T) {
+	a := BuildArchitecture()
+
+	if len(a.Stages) != 10 {
+		t.Errorf("expected 10 pipeline stages, got %d", len(a.Stages))
+	}
+	// Stage order is the audit narrative — first is the limits form, last the matrix.
+	if len(a.Stages) > 0 && a.Stages[0].ID != "grenzen" {
+		t.Errorf("first stage should be grenzen, got %q", a.Stages[0].ID)
+	}
+	if last := a.Stages[len(a.Stages)-1]; last.ID != "matrix" {
+		t.Errorf("last stage should be matrix, got %q", last.ID)
+	}
+	for _, s := range a.Stages {
+		if s.Title == "" || s.Summary == "" || s.Logic == "" || s.DataSource == "" {
+			t.Errorf("stage %q has empty required prose: %+v", s.ID, s)
+		}
+	}
+
+	if len(a.NormMatching) == 0 {
+		t.Error("norm_matching explanation must not be empty")
+	}
+	if len(a.Evidence) == 0 {
+		t.Error("evidence (ESAW citations) must not be empty")
+	}
+}
+
+func TestBuildArchitecture_LiveLibraryCounts(t *testing.T) {
+	a := BuildArchitecture()
+
+	if len(a.Libraries) == 0 {
+		t.Fatal("no libraries reported")
+	}
+	for _, l := range a.Libraries {
+		if l.Name == "" || l.SourceFile == "" {
+			t.Errorf("library missing name/source: %+v", l)
+		}
+		if l.Count <= 0 {
+			t.Errorf("library %q has non-live count %d (expected >0 from running engine)", l.Name, l.Count)
+		}
+	}
+}
+
+func TestBuildArchitecture_DataSourcesIncludeExclusions(t *testing.T) {
+	a := BuildArchitecture()
+
+	var hasUsed, hasExcluded bool
+	for _, d := range a.DataSources {
+		switch d.Status {
+		case "verwendet":
+			hasUsed = true
+		case "ausgeschlossen":
+			hasExcluded = true
+		default:
+			t.Errorf("data source %q has unexpected status %q", d.Name, d.Status)
+		}
+		if d.License == "" {
+			t.Errorf("data source %q missing license", d.Name)
+		}
+	}
+	if !hasUsed {
+		t.Error("expected at least one used data source")
+	}
+	// The copyright guardrail is auditable only if the EXCLUDED norm tables are
+	// shown as deliberately not-reproduced — not silently omitted.
+	if !hasExcluded {
+		t.Error("expected DIN/Beuth norm tables to appear as an explicit exclusion")
+	}
+
+	// The licensing guardrail must be spelled out in the norm-matching prose:
+	// norm tables are referenced, never reproduced.
+	joined := strings.ToLower(strings.Join(a.NormMatching, " "))
+	if !strings.Contains(joined, "reproduziert") {
+		t.Error("norm-matching prose should state norm tables are never reproduced")
+	}
+}
@@ -23,7 +23,9 @@ type NormSuggestionResult struct {
 // identified hazard categories, and component/energy tags.
 // A-norms are always included (they apply universally). B/C norms are matched
 // by machine type (confidence 0.9), hazard category (0.8), or tag (0.7).
-func SuggestNorms(machineType string, hazardCategories []string, tags []string) *NormSuggestionResult {
+// collectAllNorms aggregates every A/B/C norm source into one slice. Single
+// source of truth used by SuggestNorms and the architecture self-description.
+func collectAllNorms() []NormReference {
 	allNorms := GetNormsLibrary()
 	allNorms = append(allNorms, GetExtendedB2Norms()...)
 	allNorms = append(allNorms, GetCNormsLibrary()...)
@@ -48,6 +50,11 @@ func SuggestNorms(machineType string, hazardCategories []string, tags []string)
 	allNorms = append(allNorms, GetWave3dExtCNorms()...)
 	allNorms = append(allNorms, GetWave3dHvacCNorms()...)
 	allNorms = append(allNorms, GetFinalCNorms()...)
+	return allNorms
+}
+
+func SuggestNorms(machineType string, hazardCategories []string, tags []string) *NormSuggestionResult {
+	allNorms := collectAllNorms()

 	// Build lookup sets for efficient matching
 	hazardSet := toSet(hazardCategories)