5e18df63b1
Executes the accident-statistics pipeline for the risk anchors: - Refresh contactModeEvidence with real Eurostat ESAW figures (dataset hsw_ph3_08, reference year 2023): impact 24.0%/21.4%, struck-by 13.0%/23.8%, sharp 14.5%, trapped/crushed 13.8% (fatal), + new physical/mental-stress mode 24.7% → ergonomic. GT-calibrated tier VALUES unchanged; the real data confirms the ordering. - Add the versioned source document (datasources/esaw_accident_stats_2023.md, ESAW CC BY 4.0 + OSHA public-domain context) that is ingested into the core RAG collection bp_iace_accident_stats for searchable evidence. - Whitelist bp_iace_accident_stats in the RAG search handler so seeding can full-text search the statistics with citation at seed time. Two-layer design: the small license-tagged code table stays the deterministic tier/citation lookup; the RAG holds the searchable source evidence. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
79 lines
3.9 KiB
Go
79 lines
3.9 KiB
Go
package iace
|
|
|
|
import "sort"
|
|
|
|
// Evidence / citation layer for the risk-frequency anchors. Each entry is an
|
|
// aggregate, permissively-licensed public statistic (Eurostat ESAW, CC BY 4.0)
|
|
// that anchors the RELATIVE ordering of a contact mode's probability tier. The
|
|
// tier VALUES in contactModeTable are BreakPilot's own GT-calibrated numbers —
|
|
// this table only carries the provenance so generated risk numbers are
|
|
// auditable and correctly attributed. No raw dataset is vendored; only these
|
|
// aggregate facts. Excluded by license: DGUV, DIN/Beuth/ISO/IEC. See
|
|
// DATA_SOURCES.md.
|
|
//
|
|
// Two-layer design: this small license-tagged CODE table is the deterministic
|
|
// tier/citation lookup (fast, stable, no nondeterminism). The underlying SOURCE
|
|
// documents are additionally ingested into the core RAG collection
|
|
// `bp_iace_accident_stats` so the seeding UI / an auditor can full-text search
|
|
// the evidence and pull the original figure — the RAG is the evidence/search
|
|
// layer, not the tier lookup.
|
|
//
|
|
// Figures below are the EU aggregate shares from Eurostat ESAW dataset
|
|
// hsw_ph3_08, reference year 2023 (Figure 7, "contact - mode of injury").
|
|
|
|
// RiskEvidence is the public-statistics provenance for one contact mode.
|
|
type RiskEvidence struct {
|
|
Mode string `json:"mode"`
|
|
Label string `json:"label"` // German contact-mode label
|
|
Stat string `json:"stat"` // the cited aggregate figure
|
|
Source string `json:"source"` // "Eurostat (ESAW)"
|
|
License string `json:"license"` // "CC BY 4.0"
|
|
Attribution string `json:"attribution"` // ready-to-print citation line
|
|
Retrieved string `json:"retrieved"` // retrieval month
|
|
}
|
|
|
|
const (
|
|
esawSource = "Eurostat (ESAW, hsw_ph3_08, 2023)"
|
|
esawLicense = "CC BY 4.0"
|
|
esawAttribution = "Quelle: Eurostat (ESAW) hsw_ph3_08, Bezugsjahr 2023, CC BY 4.0"
|
|
esawRetrieved = "2026-06"
|
|
)
|
|
|
|
func esawEvidence(mode, label, stat string) RiskEvidence {
|
|
return RiskEvidence{Mode: mode, Label: label, Stat: stat, Source: esawSource,
|
|
License: esawLicense, Attribution: esawAttribution, Retrieved: esawRetrieved}
|
|
}
|
|
|
|
// contactModeEvidence holds only the contact modes for which a specific public
|
|
// figure is documented; other modes are anchored by the ESAW ordering and
|
|
// GT-calibrated without a single citable share, so they carry no fabricated stat.
|
|
var contactModeEvidence = map[string]RiskEvidence{
|
|
"impact_stationary": esawEvidence("impact_stationary", "Anstoßen an ruhendem Objekt", "24,0 % (nicht-tödlich) / 21,4 % (tödlich)"),
|
|
"struck_by": esawEvidence("struck_by", "Getroffen von bewegtem Objekt", "13,0 % (nicht-tödlich) / 23,8 % (tödlich)"),
|
|
"crushing": esawEvidence("crushing", "Eingeklemmt / zerquetscht", "13,8 % (tödlich)"),
|
|
"cutting": esawEvidence("cutting", "Kontakt mit scharfem/spitzem Agens", "14,5 % (nicht-tödlich)"),
|
|
"ergonomic": esawEvidence("ergonomic", "Physische/psychische Belastung", "24,7 % (nicht-tödlich)"),
|
|
}
|
|
|
|
// RiskEvidenceFor returns the documented public statistic for a contact mode.
|
|
func RiskEvidenceFor(mode string) (RiskEvidence, bool) {
|
|
e, ok := contactModeEvidence[mode]
|
|
return e, ok
|
|
}
|
|
|
|
// AllRiskEvidence returns the full evidence register (sorted), for a
|
|
// "Datenquellen" panel / risk-assessment export attribution.
|
|
func AllRiskEvidence() []RiskEvidence {
|
|
out := make([]RiskEvidence, 0, len(contactModeEvidence))
|
|
for _, e := range contactModeEvidence {
|
|
out = append(out, e)
|
|
}
|
|
sort.Slice(out, func(i, j int) bool { return out[i].Mode < out[j].Mode })
|
|
return out
|
|
}
|
|
|
|
// RiskDataSourcesNote is the overall attribution shown wherever engine risk
|
|
// numbers appear, satisfying the ESAW CC BY 4.0 source-acknowledgement.
|
|
const RiskDataSourcesNote = "Häufigkeits-/Wahrscheinlichkeits-Tiers verankert am öffentlichen Kontaktmodus-Ranking von " +
|
|
esawSource + " (" + esawLicense + "), kalibriert an BreakPilot-Ground-Truth. Keine Norm-Tabelle reproduziert; DGUV/DIN/ISO ausgeschlossen."
|