d27c1b9e7d
Stage 1 of the FailureKnowledge bulk loader: harvest NASA NTRS lessons-learned with a strict public-reuse gate (NTRSUsable: public release, not export-controlled/EAR/ITAR, not CUI, PUBLIC_USE_PERMITTED, no third-party copyright). NTRSPDFURL prefers the PDF download for downstream text/OCR extraction. GET /iace/failure-knowledge/ntrs runs the live harvest and returns only the licence-clean records. Pure parse/gate helpers are fixture-tested (usable vs ITAR / third-party / restricted / video-only); accepted licences also pass the FK allowlist. Next: tuple extraction (abstract -> FailureKnowledge) + Playwright/OCR for scanned PDFs -> bp_iace_failure_kb. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
67 lines
3.1 KiB
Go
67 lines
3.1 KiB
Go
package iace
|
|
|
|
import "testing"
|
|
|
|
// Fixture mirrors the real NTRS /search response shape (copyright is an OBJECT).
|
|
const ntrsFixture = `{"results":[
|
|
{"id":20205010628,"title":"Lessons Learned from Large-Scale Aerospace Structural Testing","abstract":"A bracket fractured under load.",
|
|
"distribution":"PUBLIC","isLessonsLearned":true,
|
|
"copyright":{"determinationType":"GOV_PUBLIC_USE_PERMITTED","containsThirdPartyMaterial":false,"licenseType":"NO"},
|
|
"exportControl":{"isExportControl":"NO","ear":"NO","itar":"NO"},"cui":{"isCui":false},
|
|
"downloads":[{"mimetype":"application/pdf","links":{"original":"/api/citations/20205010628/downloads/paper.pdf"}}]},
|
|
{"id":1001,"title":"ITAR controlled","abstract":"x","distribution":"PUBLIC","isLessonsLearned":true,
|
|
"copyright":{"determinationType":"GOV_PUBLIC_USE_PERMITTED","containsThirdPartyMaterial":false},
|
|
"exportControl":{"isExportControl":"YES","ear":"NO","itar":"YES"},"cui":null,
|
|
"downloads":[{"mimetype":"application/pdf","links":{"original":"/x.pdf"}}]},
|
|
{"id":1002,"title":"Third party","abstract":"x","distribution":"PUBLIC","isLessonsLearned":true,
|
|
"copyright":{"determinationType":"PUBLIC_USE_PERMITTED","containsThirdPartyMaterial":true},
|
|
"exportControl":{"isExportControl":"NO"},"cui":null,"downloads":[]},
|
|
{"id":1003,"title":"Restricted dist","abstract":"x","distribution":"RESTRICTED",
|
|
"copyright":{"determinationType":"GOV_PUBLIC_USE_PERMITTED"},"exportControl":{"isExportControl":"NO"}},
|
|
{"id":1004,"title":"Video only","abstract":"x","distribution":"PUBLIC","isLessonsLearned":true,
|
|
"copyright":{"determinationType":"PUBLIC_USE_PERMITTED","containsThirdPartyMaterial":false},
|
|
"exportControl":{"isExportControl":"NO"},"cui":{"isCui":false},
|
|
"downloads":[{"mimetype":"video/mp4","links":{"original":"/v.mp4"}}]}
|
|
]}`
|
|
|
|
func TestParseNTRSSearch(t *testing.T) {
|
|
ls, err := parseNTRSSearch([]byte(ntrsFixture))
|
|
if err != nil {
|
|
t.Fatalf("parse: %v", err)
|
|
}
|
|
if len(ls) != 5 {
|
|
t.Fatalf("expected 5 results, got %d", len(ls))
|
|
}
|
|
if ls[0].ID != 20205010628 || ls[0].Copyright.DeterminationType != "GOV_PUBLIC_USE_PERMITTED" {
|
|
t.Errorf("first record parsed wrong: %+v", ls[0])
|
|
}
|
|
}
|
|
|
|
func TestNTRSUsable_Gate(t *testing.T) {
|
|
ls, _ := parseNTRSSearch([]byte(ntrsFixture))
|
|
want := []bool{true, false, false, false, true} // ok, ITAR, third-party, restricted, ok(video)
|
|
for i, l := range ls {
|
|
ok, lic := NTRSUsable(l)
|
|
if ok != want[i] {
|
|
t.Errorf("record %d (%q): usable=%v, want %v", l.ID, l.Title, ok, want[i])
|
|
}
|
|
if ok && lic == "" {
|
|
t.Errorf("record %d usable but empty licence", l.ID)
|
|
}
|
|
// Every accepted record must also pass the failure-knowledge allowlist.
|
|
if ok && !FailureKnowledgeLicenseAllowed(lic) {
|
|
t.Errorf("record %d licence %q not allowed by FK allowlist", l.ID, lic)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestNTRSPDFURL(t *testing.T) {
|
|
ls, _ := parseNTRSSearch([]byte(ntrsFixture))
|
|
if got := NTRSPDFURL(ls[0]); got != "https://ntrs.nasa.gov/api/citations/20205010628/downloads/paper.pdf" {
|
|
t.Errorf("pdf url = %q", got)
|
|
}
|
|
if got := NTRSPDFURL(ls[4]); got != "" { // video-only → no PDF
|
|
t.Errorf("video-only should have no PDF url, got %q", got)
|
|
}
|
|
}
|