feat(iace): FMEA source-document register + Anthropic extraction (Haiku)
Quote-verifiable failure extraction via Claude (Haiku 4.5): PDF sent directly, tool-schema forces verbatim source quotes + applicable flag + confidence — replaces the unreliable local llama run. Only applicable=true tuples ingest into bp_iace_failure_kb; every processed doc lands in the source manifest. Frontend: FMEA tab now shows a "Quelldokumente" register (every document we use, with source + licence + link + what was extracted) served from the embedded manifest via GET /iace/failure-knowledge/sources. Manifest is placeholder until the 100-doc Haiku run is folded in. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"generated": "pending-first-run",
|
||||
"model": "claude-haiku-4-5-20251001",
|
||||
"count": 0,
|
||||
"documents": []
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
package iace
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"encoding/json"
|
||||
)
|
||||
|
||||
// Source-document register for the FMEA failure-knowledge corpus. The manifest
|
||||
// is generated by the Anthropic extraction run (scripts/fmea_anthropic_extract.py)
|
||||
// and committed here, so the FMEA frontend can ALWAYS show every source document
|
||||
// we use — with its source + licence — straight from the deployed binary.
|
||||
// Each entry is auditable: a public URL + the licence under which we use it.
|
||||
|
||||
//go:embed datasources/nasa_failure_sources.json
|
||||
var failureSourcesJSON []byte
|
||||
|
||||
// FailureSourceDoc is one source document in the register.
|
||||
type FailureSourceDoc struct {
|
||||
ID int64 `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Source string `json:"source"`
|
||||
License string `json:"license"`
|
||||
URL string `json:"url"`
|
||||
Used bool `json:"used"` // true = applicable failure, ingested into the corpus
|
||||
Component string `json:"component"` // extracted (empty if not used)
|
||||
FailureMode string `json:"failure_mode"`
|
||||
Confidence string `json:"confidence"`
|
||||
}
|
||||
|
||||
// FailureSources is the full register manifest.
|
||||
type FailureSources struct {
|
||||
Generated string `json:"generated"`
|
||||
Model string `json:"model"`
|
||||
Count int `json:"count"`
|
||||
Documents []FailureSourceDoc `json:"documents"`
|
||||
}
|
||||
|
||||
// GetFailureSources returns the embedded source-document register.
|
||||
func GetFailureSources() FailureSources {
|
||||
var fs FailureSources
|
||||
_ = json.Unmarshal(failureSourcesJSON, &fs)
|
||||
if fs.Documents == nil {
|
||||
fs.Documents = []FailureSourceDoc{}
|
||||
}
|
||||
return fs
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package iace
|
||||
|
||||
import "testing"
|
||||
|
||||
// The embedded manifest must always parse and every document must carry a
|
||||
// source + licence (the register is the auditable provenance shown in the UI).
|
||||
func TestGetFailureSources_ParsesAndAttributed(t *testing.T) {
|
||||
fs := GetFailureSources()
|
||||
if fs.Documents == nil {
|
||||
t.Fatal("documents must never be nil")
|
||||
}
|
||||
for _, d := range fs.Documents {
|
||||
if d.Source == "" || d.License == "" {
|
||||
t.Errorf("doc %d missing source/license: %+v", d.ID, d)
|
||||
}
|
||||
if !FailureKnowledgeLicenseAllowed(d.License) {
|
||||
t.Errorf("doc %d carries a non-allowed licence %q", d.ID, d.License)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user