feat(ai-sdk): runnable iace-audit propose CLI + live LLM wiring (P2 slice 3)
Makes the offline proposer runnable end-to-end.
- BuildProposerInput (proposer_input.go): non-test engine->hazards path. The
PatternMatch->Hazard converter is lifted out of the GT test files into
production scope so both the tests and the CLI share one pipeline.
- iace-audit propose <narrative.json> [<ground-truth.json>]: detect candidates ->
GT-screen survivors (when a ground truth is given) -> judge (HeuristicJudge by
default, LLMJudge over ollama when IACE_PROPOSE_LLM=1) -> write the human-review
queue to audit-reports/proposals.{md,json}. Propose-only.
Smoke run on a dishwasher narrative: 32 fired -> 3 candidates -> queue with a
confident duplicate, a confident distinct, and one punted to the LLM judge; GT
wall recall-safe. Live qwen is opt-in via env; the heuristic default keeps the
tool runnable (and CI deterministic) without a model. Proposal types 2-4
(foreign-framing gates, vocab->tag, coverage blind spots) remain for slice 4.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -34,6 +34,8 @@ func main() {
|
|||||||
cmdEcho(os.Args[2:])
|
cmdEcho(os.Args[2:])
|
||||||
case "hierarchy":
|
case "hierarchy":
|
||||||
cmdHierarchy(os.Args[2:])
|
cmdHierarchy(os.Args[2:])
|
||||||
|
case "propose":
|
||||||
|
cmdPropose(os.Args[2:])
|
||||||
default:
|
default:
|
||||||
usage()
|
usage()
|
||||||
os.Exit(2)
|
os.Exit(2)
|
||||||
@@ -41,7 +43,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func usage() {
|
func usage() {
|
||||||
fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy> [args]")
|
fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy|propose> [args]")
|
||||||
}
|
}
|
||||||
|
|
||||||
func cmdReachability(_ []string) {
|
func cmdReachability(_ []string) {
|
||||||
|
|||||||
@@ -0,0 +1,141 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type narrativeInput struct {
|
||||||
|
MachineType string `json:"machine_type"`
|
||||||
|
Narrative string `json:"narrative"`
|
||||||
|
MachineTypes []string `json:"machine_types,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// cmdPropose — Method P: offline dedup-candidate proposer.
|
||||||
|
//
|
||||||
|
// iace-audit propose <narrative.json> [<ground-truth.json>]
|
||||||
|
//
|
||||||
|
// Detect near-duplicate patterns, screen survivors against a ground truth (if
|
||||||
|
// given), judge them (heuristic by default, LLM when enabled), and write the
|
||||||
|
// human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
|
||||||
|
// writes a report and never mutates the pattern library.
|
||||||
|
//
|
||||||
|
// Env:
|
||||||
|
//
|
||||||
|
// IACE_PROPOSE_THRESHOLD candidate score threshold (default 0.30)
|
||||||
|
// IACE_PROPOSE_LLM=1 use the offline LLM judge instead of the heuristic
|
||||||
|
// OLLAMA_URL ollama base URL (default http://localhost:11434)
|
||||||
|
// SELF_HOSTED_LLM_MODEL model name (default qwen2.5:32b-instruct)
|
||||||
|
func cmdPropose(args []string) {
|
||||||
|
if len(args) < 1 {
|
||||||
|
fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
var in narrativeInput
|
||||||
|
must(readJSONFile(args[0], &in))
|
||||||
|
if in.Narrative == "" {
|
||||||
|
fmt.Fprintln(os.Stderr, "propose: narrative is empty")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
var gt *iace.GroundTruth
|
||||||
|
if len(args) >= 2 {
|
||||||
|
var g iace.GroundTruth
|
||||||
|
must(readJSONFile(args[1], &g))
|
||||||
|
gt = &g
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
|
||||||
|
hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
|
||||||
|
candidates := iace.FindDedupCandidates(fired, threshold)
|
||||||
|
|
||||||
|
byID := make(map[string]iace.PatternMatch, len(fired))
|
||||||
|
for _, pm := range fired {
|
||||||
|
byID[pm.PatternID] = pm
|
||||||
|
}
|
||||||
|
|
||||||
|
judge := selectJudge(in.MachineType)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
var proposals []iace.JudgedProposal
|
||||||
|
blocked := 0
|
||||||
|
for _, c := range candidates {
|
||||||
|
var sr iace.ScreenResult
|
||||||
|
if gt != nil {
|
||||||
|
sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
|
||||||
|
if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
|
||||||
|
blocked++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
|
||||||
|
proposals = append(proposals, iace.JudgedProposal{
|
||||||
|
Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
|
||||||
|
writeJSON("audit-reports/proposals.json", proposals)
|
||||||
|
|
||||||
|
printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
|
||||||
|
"fired_patterns": len(fired),
|
||||||
|
"candidates": len(candidates),
|
||||||
|
"in_queue": len(proposals),
|
||||||
|
"gt_blocked": blocked,
|
||||||
|
})
|
||||||
|
if gt == nil {
|
||||||
|
fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func selectJudge(machineClass string) iace.CandidateJudge {
|
||||||
|
if os.Getenv("IACE_PROPOSE_LLM") != "1" {
|
||||||
|
return iace.HeuristicJudge{}
|
||||||
|
}
|
||||||
|
base := envStr("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
|
||||||
|
reg := llm.NewProviderRegistry("ollama", "")
|
||||||
|
reg.Register(llm.NewOllamaAdapter(base, model))
|
||||||
|
fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
|
||||||
|
return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
|
||||||
|
}
|
||||||
|
|
||||||
|
func readJSONFile(path string, v any) error {
|
||||||
|
raw, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return json.Unmarshal(raw, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeText(path, content string) {
|
||||||
|
_ = os.MkdirAll("audit-reports", 0o755)
|
||||||
|
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Println("→ wrote", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func envStr(key, def string) string {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
|
func envFloat(key string, def float64) float64 {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
if f, err := strconv.ParseFloat(v, 64); err == nil {
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
@@ -7,8 +7,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
|
// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
|
||||||
@@ -110,65 +108,6 @@ func TestKistenhub_GTCoverage(t *testing.T) {
|
|||||||
// patternsToHazardsAndMitigations converts a pattern match output into the
|
// patternsToHazardsAndMitigations converts a pattern match output into the
|
||||||
// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
|
// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
|
||||||
// iace_handler_init.go does in production but without DB writes.
|
// iace_handler_init.go does in production but without DB writes.
|
||||||
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
|
|
||||||
hazards := make([]Hazard, 0, len(out.MatchedPatterns))
|
|
||||||
patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
|
|
||||||
|
|
||||||
for _, pm := range out.MatchedPatterns {
|
|
||||||
cat := ""
|
|
||||||
if len(pm.HazardCats) > 0 {
|
|
||||||
cat = pm.HazardCats[0]
|
|
||||||
}
|
|
||||||
zone := pm.ZoneDE
|
|
||||||
lifecycle := ""
|
|
||||||
if len(pm.ApplicableLifecycles) > 0 {
|
|
||||||
lifecycle = pm.ApplicableLifecycles[0]
|
|
||||||
}
|
|
||||||
h := Hazard{
|
|
||||||
ID: uuid.New(),
|
|
||||||
Name: pm.ScenarioDE,
|
|
||||||
Category: cat,
|
|
||||||
Description: pm.ScenarioDE,
|
|
||||||
Scenario: pm.ScenarioDE,
|
|
||||||
TriggerEvent: pm.TriggerDE,
|
|
||||||
PossibleHarm: pm.HarmDE,
|
|
||||||
AffectedPerson: pm.AffectedDE,
|
|
||||||
HazardousZone: zone,
|
|
||||||
LifecyclePhase: lifecycle,
|
|
||||||
}
|
|
||||||
if h.Name == "" {
|
|
||||||
h.Name = pm.PatternName
|
|
||||||
}
|
|
||||||
hazards = append(hazards, h)
|
|
||||||
patternToHazard[pm.PatternID] = h.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
measureNames := make(map[string]string)
|
|
||||||
for _, m := range GetProtectiveMeasureLibrary() {
|
|
||||||
measureNames[m.ID] = m.Name
|
|
||||||
}
|
|
||||||
|
|
||||||
var mitigations []Mitigation
|
|
||||||
for _, sm := range out.SuggestedMeasures {
|
|
||||||
name := measureNames[sm.MeasureID]
|
|
||||||
if name == "" {
|
|
||||||
name = sm.MeasureID
|
|
||||||
}
|
|
||||||
for _, srcPattern := range sm.SourcePatterns {
|
|
||||||
hid, ok := patternToHazard[srcPattern]
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
mitigations = append(mitigations, Mitigation{
|
|
||||||
ID: uuid.New(),
|
|
||||||
HazardID: hid,
|
|
||||||
Name: name,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hazards, mitigations
|
|
||||||
}
|
|
||||||
|
|
||||||
func abbrev(s string, max int) string {
|
func abbrev(s string, max int) string {
|
||||||
if len(s) <= max {
|
if len(s) <= max {
|
||||||
return s
|
return s
|
||||||
|
|||||||
@@ -0,0 +1,123 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "github.com/google/uuid"
|
||||||
|
|
||||||
|
// Non-test plumbing for the offline proposer (P2 slice 3): run the engine for a
|
||||||
|
// narrative and produce the fired patterns + the engine-built hazards/mitigations
|
||||||
|
// the dedup proposer and GT screen consume. This is the same pipeline the GT
|
||||||
|
// benchmark tests use, lifted out of test scope so the dev-time CLI can call it.
|
||||||
|
|
||||||
|
// universalLifecyclePhases are appended so patterns gated to a specific lifecycle
|
||||||
|
// (maintenance/cleaning/setup/fault clearing) still fire — the proposer wants the
|
||||||
|
// full hazard picture, not only normal-operation hazards.
|
||||||
|
var universalLifecyclePhases = []string{"normal_operation", "maintenance", "cleaning", "setup", "fault_clearing"}
|
||||||
|
|
||||||
|
// BuildProposerInput parses a narrative, runs the pattern engine, keeps the
|
||||||
|
// narrative-relevant patterns, and returns the hazards, mitigations and fired
|
||||||
|
// patterns. NOTE: it does not apply the CE cyber-category skip, so the proposer
|
||||||
|
// view may include cyber/AI hazards that the CE log excludes — harmless for the
|
||||||
|
// GT recall screen (they match no CE ground-truth entry).
|
||||||
|
func BuildProposerInput(narrative, machineType string, extraMachineTypes []string) ([]Hazard, []Mitigation, []PatternMatch) {
|
||||||
|
res := ParseNarrative(narrative, machineType)
|
||||||
|
|
||||||
|
var compIDs, compNames, energyIDs []string
|
||||||
|
for _, c := range res.Components {
|
||||||
|
if c.Negated {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
compIDs = append(compIDs, c.LibraryID)
|
||||||
|
compNames = append(compNames, c.NameDE)
|
||||||
|
}
|
||||||
|
for _, e := range res.EnergySources {
|
||||||
|
energyIDs = append(energyIDs, e.SourceID)
|
||||||
|
}
|
||||||
|
|
||||||
|
machineTypes := append([]string{}, extraMachineTypes...)
|
||||||
|
if machineType != "" {
|
||||||
|
machineTypes = append(machineTypes, machineType)
|
||||||
|
}
|
||||||
|
lifecycles := append(append([]string{}, res.LifecyclePhases...), universalLifecyclePhases...)
|
||||||
|
|
||||||
|
out := NewPatternEngine().Match(MatchInput{
|
||||||
|
ComponentLibraryIDs: compIDs,
|
||||||
|
EnergySourceIDs: energyIDs,
|
||||||
|
LifecyclePhases: lifecycles,
|
||||||
|
CustomTags: res.CustomTags,
|
||||||
|
OperationalStates: res.OperationalStates,
|
||||||
|
StateTransitions: res.StateTransitions,
|
||||||
|
HumanRoles: res.Roles,
|
||||||
|
MachineTypes: machineTypes,
|
||||||
|
})
|
||||||
|
|
||||||
|
kept := make([]PatternMatch, 0, len(out.MatchedPatterns))
|
||||||
|
for _, pm := range out.MatchedPatterns {
|
||||||
|
if IsPatternRelevant(pm, narrative, compNames) {
|
||||||
|
kept = append(kept, pm)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filtered := *out
|
||||||
|
filtered.MatchedPatterns = kept
|
||||||
|
hazards, mits := patternsToHazardsAndMitigations(&filtered)
|
||||||
|
return hazards, mits, kept
|
||||||
|
}
|
||||||
|
|
||||||
|
// patternsToHazardsAndMitigations converts engine output into the hazard/mitigation
|
||||||
|
// entities the benchmark + proposer compare on. Simplified vs InitializeProject
|
||||||
|
// (no risk estimation, no norm refs) — it only needs category/zone/scenario/measures.
|
||||||
|
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
|
||||||
|
hazards := make([]Hazard, 0, len(out.MatchedPatterns))
|
||||||
|
patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
|
||||||
|
|
||||||
|
for _, pm := range out.MatchedPatterns {
|
||||||
|
cat := ""
|
||||||
|
if len(pm.HazardCats) > 0 {
|
||||||
|
cat = pm.HazardCats[0]
|
||||||
|
}
|
||||||
|
lifecycle := ""
|
||||||
|
if len(pm.ApplicableLifecycles) > 0 {
|
||||||
|
lifecycle = pm.ApplicableLifecycles[0]
|
||||||
|
}
|
||||||
|
h := Hazard{
|
||||||
|
ID: uuid.New(),
|
||||||
|
Name: pm.ScenarioDE,
|
||||||
|
Category: cat,
|
||||||
|
Description: pm.ScenarioDE,
|
||||||
|
Scenario: pm.ScenarioDE,
|
||||||
|
TriggerEvent: pm.TriggerDE,
|
||||||
|
PossibleHarm: pm.HarmDE,
|
||||||
|
AffectedPerson: pm.AffectedDE,
|
||||||
|
HazardousZone: pm.ZoneDE,
|
||||||
|
LifecyclePhase: lifecycle,
|
||||||
|
}
|
||||||
|
if h.Name == "" {
|
||||||
|
h.Name = pm.PatternName
|
||||||
|
}
|
||||||
|
hazards = append(hazards, h)
|
||||||
|
patternToHazard[pm.PatternID] = h.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
measureNames := make(map[string]string)
|
||||||
|
for _, m := range GetProtectiveMeasureLibrary() {
|
||||||
|
measureNames[m.ID] = m.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
var mitigations []Mitigation
|
||||||
|
for _, sm := range out.SuggestedMeasures {
|
||||||
|
name := measureNames[sm.MeasureID]
|
||||||
|
if name == "" {
|
||||||
|
name = sm.MeasureID
|
||||||
|
}
|
||||||
|
for _, srcPattern := range sm.SourcePatterns {
|
||||||
|
hid, ok := patternToHazard[srcPattern]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mitigations = append(mitigations, Mitigation{
|
||||||
|
ID: uuid.New(),
|
||||||
|
HazardID: hid,
|
||||||
|
Name: name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hazards, mitigations
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestBuildProposerInput_WarewashingFires(t *testing.T) {
|
||||||
|
hazards, _, fired := BuildProposerInput(
|
||||||
|
warewashingNarrative,
|
||||||
|
"Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
|
||||||
|
[]string{"food_processing"},
|
||||||
|
)
|
||||||
|
if len(fired) == 0 || len(hazards) == 0 {
|
||||||
|
t.Fatalf("want fired patterns + hazards, got %d patterns / %d hazards", len(fired), len(hazards))
|
||||||
|
}
|
||||||
|
has := func(id string) bool {
|
||||||
|
for _, pm := range fired {
|
||||||
|
if pm.PatternID == id {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !has("HP2201") {
|
||||||
|
t.Errorf("warewashing-specific HP2201 must fire via BuildProposerInput")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user