Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/risk_data_sources.go
T
Benjamin Admin ee64b7e95c feat(iace): cite ESAW source + license on risk-frequency anchors
Surfaces the public-statistics provenance for the contact-mode probability
tiers so generated risk numbers are auditable and attributed (not RAG —
~a dozen stable aggregate facts are better as a license-tagged code table).

- risk_data_sources.go: RiskEvidence register (Eurostat ESAW figures + CC BY
  4.0 attribution) for the documented contact modes; RiskDataSourcesNote.
- risk_suggestion.go: the W justification now cites the actual ESAW share +
  license where documented; RiskSuggestion gains a data_source field.
- GET /iace/risk-data-sources returns the evidence register + attribution.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 09:14:36 +02:00

70 lines
3.3 KiB
Go

package iace
import "sort"
// Evidence / citation layer for the risk-frequency anchors. Each entry is an
// aggregate, permissively-licensed public statistic (Eurostat ESAW, CC BY 4.0)
// that anchors the RELATIVE ordering of a contact mode's probability tier. The
// tier VALUES in contactModeTable are BreakPilot's own GT-calibrated numbers —
// this table only carries the provenance so generated risk numbers are
// auditable and correctly attributed. No raw dataset is vendored; only these
// aggregate facts. Excluded by license: DGUV, DIN/Beuth/ISO/IEC. See
// DATA_SOURCES.md. RAG/Qdrant ingestion is deliberately NOT used here: ~a dozen
// stable aggregate facts are better served by a license-tagged code table than
// by vector retrieval.
// RiskEvidence is the public-statistics provenance for one contact mode.
type RiskEvidence struct {
Mode string `json:"mode"`
Label string `json:"label"` // German contact-mode label
Stat string `json:"stat"` // the cited aggregate figure
Source string `json:"source"` // "Eurostat (ESAW)"
License string `json:"license"` // "CC BY 4.0"
Attribution string `json:"attribution"` // ready-to-print citation line
Retrieved string `json:"retrieved"` // retrieval month
}
const (
esawSource = "Eurostat (ESAW)"
esawLicense = "CC BY 4.0"
esawAttribution = "Quelle: Eurostat (ESAW), CC BY 4.0"
esawRetrieved = "2026-06"
)
func esawEvidence(mode, label, stat string) RiskEvidence {
return RiskEvidence{Mode: mode, Label: label, Stat: stat, Source: esawSource,
License: esawLicense, Attribution: esawAttribution, Retrieved: esawRetrieved}
}
// contactModeEvidence holds only the contact modes for which a specific public
// figure is documented; other modes are anchored by the ESAW ordering and
// GT-calibrated without a single citable share, so they carry no fabricated stat.
var contactModeEvidence = map[string]RiskEvidence{
"impact_stationary": esawEvidence("impact_stationary", "Anstoßen an ruhendem Objekt", "~24 % der Arbeitsunfälle"),
"struck_by": esawEvidence("struck_by", "Getroffen von bewegtem Objekt", "~13 % (nicht-tödlich) / ~24 % (tödlich)"),
"crushing": esawEvidence("crushing", "Quetschen / Einklemmen", "~14 % der tödlichen Arbeitsunfälle"),
"cutting": esawEvidence("cutting", "Kontakt mit scharfem Gegenstand", "~15 % der Arbeitsunfälle"),
}
// RiskEvidenceFor returns the documented public statistic for a contact mode.
func RiskEvidenceFor(mode string) (RiskEvidence, bool) {
e, ok := contactModeEvidence[mode]
return e, ok
}
// AllRiskEvidence returns the full evidence register (sorted), for a
// "Datenquellen" panel / risk-assessment export attribution.
func AllRiskEvidence() []RiskEvidence {
out := make([]RiskEvidence, 0, len(contactModeEvidence))
for _, e := range contactModeEvidence {
out = append(out, e)
}
sort.Slice(out, func(i, j int) bool { return out[i].Mode < out[j].Mode })
return out
}
// RiskDataSourcesNote is the overall attribution shown wherever engine risk
// numbers appear, satisfying the ESAW CC BY 4.0 source-acknowledgement.
const RiskDataSourcesNote = "Häufigkeits-/Wahrscheinlichkeits-Tiers verankert am öffentlichen Kontaktmodus-Ranking von " +
esawSource + " (" + esawLicense + "), kalibriert an BreakPilot-Ground-Truth. Keine Norm-Tabelle reproduziert; DGUV/DIN/ISO ausgeschlossen."