package iace import ( "context" "encoding/json" "fmt" "io" "net/http" "net/url" "strings" "time" ) // NTRS (NASA Technical Reports Server) harvester — Stage 1 of the FailureKnowledge // bulk loader. Fetches lessons-learned / failure reports, applies a strict // public-reuse licence gate (public release, not export-controlled, not CUI, // public-use-permitted, no embedded third-party copyright), and exposes the // readable metadata (title/abstract) + the PDF link for downstream extraction. // NASA NTRS documents that pass the gate are US-Government public domain. const ntrsSearchURL = "https://ntrs.nasa.gov/api/citations/search" type ntrsCopyright struct { DeterminationType string `json:"determinationType"` ContainsThirdPartyMaterial bool `json:"containsThirdPartyMaterial"` LicenseType string `json:"licenseType"` } type ntrsExport struct { IsExportControl string `json:"isExportControl"` EAR string `json:"ear"` ITAR string `json:"itar"` } type ntrsCui struct { IsCui bool `json:"isCui"` } type ntrsDownload struct { Mimetype string `json:"mimetype"` Links struct { Original string `json:"original"` } `json:"links"` } // NTRSLesson is one harvested NTRS record (only the fields we use). type NTRSLesson struct { ID int64 `json:"id"` Title string `json:"title"` Abstract string `json:"abstract"` Distribution string `json:"distribution"` IsLessonsLearned bool `json:"isLessonsLearned"` Copyright ntrsCopyright `json:"copyright"` ExportControl ntrsExport `json:"exportControl"` Cui *ntrsCui `json:"cui"` Downloads []ntrsDownload `json:"downloads"` } type ntrsSearchResponse struct { Results []NTRSLesson `json:"results"` } // NTRSUsable reports whether a lesson is publicly + commercially reusable and // returns the licence string. The gate is conservative: any export-control, CUI, // third-party copyright, non-public distribution, or non-public-use copyright // determination disqualifies the record. func NTRSUsable(l NTRSLesson) (bool, string) { if !strings.EqualFold(l.Distribution, "PUBLIC") { return false, "" } if strings.EqualFold(l.ExportControl.IsExportControl, "YES") || strings.EqualFold(l.ExportControl.EAR, "YES") || strings.EqualFold(l.ExportControl.ITAR, "YES") { return false, "" } if l.Cui != nil && l.Cui.IsCui { return false, "" } if l.Copyright.ContainsThirdPartyMaterial { return false, "" } switch strings.ToUpper(l.Copyright.DeterminationType) { case "PUBLIC_USE_PERMITTED", "GOV_PUBLIC_USE_PERMITTED": return true, "Public Domain (NASA NTRS, " + l.Copyright.DeterminationType + ")" } return false, "" } // NTRSPDFURL returns the absolute URL of the first PDF download, or "". func NTRSPDFURL(l NTRSLesson) string { for _, d := range l.Downloads { if strings.Contains(strings.ToLower(d.Mimetype), "pdf") && d.Links.Original != "" { return "https://ntrs.nasa.gov" + d.Links.Original } } return "" } // parseNTRSSearch parses an NTRS /search response body. func parseNTRSSearch(body []byte) ([]NTRSLesson, error) { var r ntrsSearchResponse if err := json.Unmarshal(body, &r); err != nil { return nil, err } return r.Results, nil } // FetchNTRSLessons queries the NTRS search API. Network call; the parsing/gating // helpers above are pure and unit-tested. func FetchNTRSLessons(ctx context.Context, query string, limit int) ([]NTRSLesson, error) { if limit <= 0 || limit > 100 { limit = 25 } u := fmt.Sprintf("%s?q=%s&page.size=%d&highlight=false", ntrsSearchURL, url.QueryEscape(query), limit) req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) if err != nil { return nil, err } resp, err := (&http.Client{Timeout: 30 * time.Second}).Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("ntrs status %d", resp.StatusCode) } b, err := io.ReadAll(io.LimitReader(resp.Body, 8<<20)) if err != nil { return nil, err } return parseNTRSSearch(b) }