feat(ai-sdk): runnable iace-audit propose CLI + live LLM wiring (P2 slice 3)

Makes the offline proposer runnable end-to-end. - BuildProposerInput (proposer_input.go): non-test engine->hazards path. The PatternMatch->Hazard converter is lifted out of the GT test files into production scope so both the tests and the CLI share one pipeline. - iace-audit propose <narrative.json> [<ground-truth.json>]: detect candidates -> GT-screen survivors (when a ground truth is given) -> judge (HeuristicJudge by default, LLMJudge over ollama when IACE_PROPOSE_LLM=1) -> write the human-review queue to audit-reports/proposals.{md,json}. Propose-only. Smoke run on a dishwasher narrative: 32 fired -> 3 candidates -> queue with a confident duplicate, a confident distinct, and one punted to the LLM judge; GT wall recall-safe. Live qwen is opt-in via env; the heuristic default keeps the tool runnable (and CI deterministic) without a model. Proposal types 2-4 (foreign-framing gates, vocab->tag, coverage blind spots) remain for slice 4. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-25 09:10:43 +02:00
parent 0ce4794767
commit 8440ddfecb
5 changed files with 292 additions and 62 deletions
@@ -34,6 +34,8 @@ func main() {
 		cmdEcho(os.Args[2:])
 	case "hierarchy":
 		cmdHierarchy(os.Args[2:])
 	case "propose":
 		cmdPropose(os.Args[2:])
 	default:
 		usage()
 		os.Exit(2)
@@ -41,7 +43,7 @@ func main() {
 }
 func usage() {
-	fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy> [args]")
+	fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy|propose> [args]")
 }
 func cmdReachability(_ []string) {
@@ -0,0 +1,141 @@
 package main
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"os"
 	"strconv"
 	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
 	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
 )
 type narrativeInput struct {
 	MachineType  string   `json:"machine_type"`
 	Narrative    string   `json:"narrative"`
 	MachineTypes []string `json:"machine_types,omitempty"`
 }
 // cmdPropose — Method P: offline dedup-candidate proposer.
 //
 //	iace-audit propose <narrative.json> [<ground-truth.json>]
 //
 // Detect near-duplicate patterns, screen survivors against a ground truth (if
 // given), judge them (heuristic by default, LLM when enabled), and write the
 // human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
 // writes a report and never mutates the pattern library.
 //
 // Env:
 //
 //	IACE_PROPOSE_THRESHOLD  candidate score threshold (default 0.30)
 //	IACE_PROPOSE_LLM=1      use the offline LLM judge instead of the heuristic
 //	OLLAMA_URL              ollama base URL (default http://localhost:11434)
 //	SELF_HOSTED_LLM_MODEL   model name (default qwen2.5:32b-instruct)
 func cmdPropose(args []string) {
 	if len(args) < 1 {
 		fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
 		os.Exit(2)
 	}
 	var in narrativeInput
 	must(readJSONFile(args[0], &in))
 	if in.Narrative == "" {
 		fmt.Fprintln(os.Stderr, "propose: narrative is empty")
 		os.Exit(2)
 	}
 	var gt *iace.GroundTruth
 	if len(args) >= 2 {
 		var g iace.GroundTruth
 		must(readJSONFile(args[1], &g))
 		gt = &g
 	}
 	threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
 	hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
 	candidates := iace.FindDedupCandidates(fired, threshold)
 	byID := make(map[string]iace.PatternMatch, len(fired))
 	for _, pm := range fired {
 		byID[pm.PatternID] = pm
 	}
 	judge := selectJudge(in.MachineType)
 	ctx := context.Background()
 	var proposals []iace.JudgedProposal
 	blocked := 0
 	for _, c := range candidates {
 		var sr iace.ScreenResult
 		if gt != nil {
 			sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
 			if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
 				blocked++
 				continue
 			}
 		}
 		v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
 		proposals = append(proposals, iace.JudgedProposal{
 			Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
 		})
 	}
 	writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
 	writeJSON("audit-reports/proposals.json", proposals)
 	printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
 		"fired_patterns": len(fired),
 		"candidates":     len(candidates),
 		"in_queue":       len(proposals),
 		"gt_blocked":     blocked,
 	})
 	if gt == nil {
 		fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
 	}
 }
 func selectJudge(machineClass string) iace.CandidateJudge {
 	if os.Getenv("IACE_PROPOSE_LLM") != "1" {
 		return iace.HeuristicJudge{}
 	}
 	base := envStr("OLLAMA_URL", "http://localhost:11434")
 	model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
 	reg := llm.NewProviderRegistry("ollama", "")
 	reg.Register(llm.NewOllamaAdapter(base, model))
 	fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
 	return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
 }
 func readJSONFile(path string, v any) error {
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		return err
 	}
 	return json.Unmarshal(raw, v)
 }
 func writeText(path, content string) {
 	_ = os.MkdirAll("audit-reports", 0o755)
 	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
 		fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
 		return
 	}
 	fmt.Println("→ wrote", path)
 }
 func envStr(key, def string) string {
 	if v := os.Getenv(key); v != "" {
 		return v
 	}
 	return def
 }
 func envFloat(key string, def float64) float64 {
 	if v := os.Getenv(key); v != "" {
 		if f, err := strconv.ParseFloat(v, 64); err == nil {
 			return f
 		}
 	}
 	return def
 }
@@ -7,8 +7,6 @@ import (
 	"path/filepath"
 	"sort"
 	"testing"
 	"github.com/google/uuid"
 )
 // TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
@@ -110,65 +108,6 @@ func TestKistenhub_GTCoverage(t *testing.T) {
 // patternsToHazardsAndMitigations converts a pattern match output into the
 // Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
 // iace_handler_init.go does in production but without DB writes.
 func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
 	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
 	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		cat := ""
 		if len(pm.HazardCats) > 0 {
 			cat = pm.HazardCats[0]
 		}
 		zone := pm.ZoneDE
 		lifecycle := ""
 		if len(pm.ApplicableLifecycles) > 0 {
 			lifecycle = pm.ApplicableLifecycles[0]
 		}
 		h := Hazard{
 			ID:             uuid.New(),
 			Name:           pm.ScenarioDE,
 			Category:       cat,
 			Description:    pm.ScenarioDE,
 			Scenario:       pm.ScenarioDE,
 			TriggerEvent:   pm.TriggerDE,
 			PossibleHarm:   pm.HarmDE,
 			AffectedPerson: pm.AffectedDE,
 			HazardousZone:  zone,
 			LifecyclePhase: lifecycle,
 		}
 		if h.Name == "" {
 			h.Name = pm.PatternName
 		}
 		hazards = append(hazards, h)
 		patternToHazard[pm.PatternID] = h.ID
 	}
 	measureNames := make(map[string]string)
 	for _, m := range GetProtectiveMeasureLibrary() {
 		measureNames[m.ID] = m.Name
 	}
 	var mitigations []Mitigation
 	for _, sm := range out.SuggestedMeasures {
 		name := measureNames[sm.MeasureID]
 		if name == "" {
 			name = sm.MeasureID
 		}
 		for _, srcPattern := range sm.SourcePatterns {
 			hid, ok := patternToHazard[srcPattern]
 			if !ok {
 				continue
 			}
 			mitigations = append(mitigations, Mitigation{
 				ID:       uuid.New(),
 				HazardID: hid,
 				Name:     name,
 			})
 		}
 	}
 	return hazards, mitigations
 }
 func abbrev(s string, max int) string {
 	if len(s) <= max {
 		return s
@@ -0,0 +1,123 @@
 package iace
 import "github.com/google/uuid"
 // Non-test plumbing for the offline proposer (P2 slice 3): run the engine for a
 // narrative and produce the fired patterns + the engine-built hazards/mitigations
 // the dedup proposer and GT screen consume. This is the same pipeline the GT
 // benchmark tests use, lifted out of test scope so the dev-time CLI can call it.
 // universalLifecyclePhases are appended so patterns gated to a specific lifecycle
 // (maintenance/cleaning/setup/fault clearing) still fire — the proposer wants the
 // full hazard picture, not only normal-operation hazards.
 var universalLifecyclePhases = []string{"normal_operation", "maintenance", "cleaning", "setup", "fault_clearing"}
 // BuildProposerInput parses a narrative, runs the pattern engine, keeps the
 // narrative-relevant patterns, and returns the hazards, mitigations and fired
 // patterns. NOTE: it does not apply the CE cyber-category skip, so the proposer
 // view may include cyber/AI hazards that the CE log excludes — harmless for the
 // GT recall screen (they match no CE ground-truth entry).
 func BuildProposerInput(narrative, machineType string, extraMachineTypes []string) ([]Hazard, []Mitigation, []PatternMatch) {
 	res := ParseNarrative(narrative, machineType)
 	var compIDs, compNames, energyIDs []string
 	for _, c := range res.Components {
 		if c.Negated {
 			continue
 		}
 		compIDs = append(compIDs, c.LibraryID)
 		compNames = append(compNames, c.NameDE)
 	}
 	for _, e := range res.EnergySources {
 		energyIDs = append(energyIDs, e.SourceID)
 	}
 	machineTypes := append([]string{}, extraMachineTypes...)
 	if machineType != "" {
 		machineTypes = append(machineTypes, machineType)
 	}
 	lifecycles := append(append([]string{}, res.LifecyclePhases...), universalLifecyclePhases...)
 	out := NewPatternEngine().Match(MatchInput{
 		ComponentLibraryIDs: compIDs,
 		EnergySourceIDs:     energyIDs,
 		LifecyclePhases:     lifecycles,
 		CustomTags:          res.CustomTags,
 		OperationalStates:   res.OperationalStates,
 		StateTransitions:    res.StateTransitions,
 		HumanRoles:          res.Roles,
 		MachineTypes:        machineTypes,
 	})
 	kept := make([]PatternMatch, 0, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		if IsPatternRelevant(pm, narrative, compNames) {
 			kept = append(kept, pm)
 		}
 	}
 	filtered := *out
 	filtered.MatchedPatterns = kept
 	hazards, mits := patternsToHazardsAndMitigations(&filtered)
 	return hazards, mits, kept
 }
 // patternsToHazardsAndMitigations converts engine output into the hazard/mitigation
 // entities the benchmark + proposer compare on. Simplified vs InitializeProject
 // (no risk estimation, no norm refs) — it only needs category/zone/scenario/measures.
 func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
 	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
 	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		cat := ""
 		if len(pm.HazardCats) > 0 {
 			cat = pm.HazardCats[0]
 		}
 		lifecycle := ""
 		if len(pm.ApplicableLifecycles) > 0 {
 			lifecycle = pm.ApplicableLifecycles[0]
 		}
 		h := Hazard{
 			ID:             uuid.New(),
 			Name:           pm.ScenarioDE,
 			Category:       cat,
 			Description:    pm.ScenarioDE,
 			Scenario:       pm.ScenarioDE,
 			TriggerEvent:   pm.TriggerDE,
 			PossibleHarm:   pm.HarmDE,
 			AffectedPerson: pm.AffectedDE,
 			HazardousZone:  pm.ZoneDE,
 			LifecyclePhase: lifecycle,
 		}
 		if h.Name == "" {
 			h.Name = pm.PatternName
 		}
 		hazards = append(hazards, h)
 		patternToHazard[pm.PatternID] = h.ID
 	}
 	measureNames := make(map[string]string)
 	for _, m := range GetProtectiveMeasureLibrary() {
 		measureNames[m.ID] = m.Name
 	}
 	var mitigations []Mitigation
 	for _, sm := range out.SuggestedMeasures {
 		name := measureNames[sm.MeasureID]
 		if name == "" {
 			name = sm.MeasureID
 		}
 		for _, srcPattern := range sm.SourcePatterns {
 			hid, ok := patternToHazard[srcPattern]
 			if !ok {
 				continue
 			}
 			mitigations = append(mitigations, Mitigation{
 				ID:       uuid.New(),
 				HazardID: hid,
 				Name:     name,
 			})
 		}
 	}
 	return hazards, mitigations
 }
@@ -0,0 +1,25 @@
 package iace
 import "testing"
 func TestBuildProposerInput_WarewashingFires(t *testing.T) {
 	hazards, _, fired := BuildProposerInput(
 		warewashingNarrative,
 		"Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
 		[]string{"food_processing"},
 	)
 	if len(fired) == 0 || len(hazards) == 0 {
 		t.Fatalf("want fired patterns + hazards, got %d patterns / %d hazards", len(fired), len(hazards))
 	}
 	has := func(id string) bool {
 		for _, pm := range fired {
 			if pm.PatternID == id {
 				return true
 			}
 		}
 		return false
 	}
 	if !has("HP2201") {
 		t.Errorf("warewashing-specific HP2201 must fire via BuildProposerInput")
 	}
 }