4d225f73a8
Completes the proposer's four types.
- FindCoverageGaps (proposer_coverage.go): deterministic — which EN ISO 12100
hazard groups A-G did the engine leave with zero hazards for this machine? An
empty group is a structural blind-spot signal (the machine may truly lack it,
or a pattern/GT case is missing). Useful with no model at all.
- ProposeMissingHazards + BuildCoveragePrompt: optional LLM expansion of each gap
into specific expected-but-missing hazards a safety assessor would name
(propose-only, reuses LLMCompleter, degrades to nil on any error).
- Wired into iace-audit propose -> audit-reports/coverage.{md,json}.
On the dishwasher: D. Pneumatik (truly absent — nothing invented), E. Laerm
(borderline), F. Ergonomie (a genuine gap: manual loading the engine did not
produce). P3 (pin an accepted proposal into a GT case) remains as a human-in-the-
loop follow-up.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
189 lines
6.4 KiB
Go
189 lines
6.4 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
|
|
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
|
)
|
|
|
|
type narrativeInput struct {
|
|
MachineType string `json:"machine_type"`
|
|
Narrative string `json:"narrative"`
|
|
MachineTypes []string `json:"machine_types,omitempty"`
|
|
}
|
|
|
|
// cmdPropose — Method P: offline dedup-candidate proposer.
|
|
//
|
|
// iace-audit propose <narrative.json> [<ground-truth.json>]
|
|
//
|
|
// Detect near-duplicate patterns, screen survivors against a ground truth (if
|
|
// given), judge them (heuristic by default, LLM when enabled), and write the
|
|
// human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
|
|
// writes a report and never mutates the pattern library.
|
|
//
|
|
// Env:
|
|
//
|
|
// IACE_PROPOSE_THRESHOLD candidate score threshold (default 0.30)
|
|
// IACE_PROPOSE_LLM=1 use the offline LLM judge instead of the heuristic
|
|
// OLLAMA_URL ollama base URL (default http://localhost:11434)
|
|
// SELF_HOSTED_LLM_MODEL model name (default qwen2.5:32b-instruct)
|
|
func cmdPropose(args []string) {
|
|
if len(args) < 1 {
|
|
fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
|
|
os.Exit(2)
|
|
}
|
|
|
|
var in narrativeInput
|
|
must(readJSONFile(args[0], &in))
|
|
if in.Narrative == "" {
|
|
fmt.Fprintln(os.Stderr, "propose: narrative is empty")
|
|
os.Exit(2)
|
|
}
|
|
|
|
var gt *iace.GroundTruth
|
|
if len(args) >= 2 {
|
|
var g iace.GroundTruth
|
|
must(readJSONFile(args[1], &g))
|
|
gt = &g
|
|
}
|
|
|
|
threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
|
|
hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
|
|
candidates := iace.FindDedupCandidates(fired, threshold)
|
|
|
|
byID := make(map[string]iace.PatternMatch, len(fired))
|
|
for _, pm := range fired {
|
|
byID[pm.PatternID] = pm
|
|
}
|
|
|
|
judge := selectJudge(in.MachineType)
|
|
ctx := context.Background()
|
|
|
|
var proposals []iace.JudgedProposal
|
|
blocked := 0
|
|
for _, c := range candidates {
|
|
var sr iace.ScreenResult
|
|
if gt != nil {
|
|
sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
|
|
if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
|
|
blocked++
|
|
continue
|
|
}
|
|
}
|
|
v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
|
|
proposals = append(proposals, iace.JudgedProposal{
|
|
Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
|
|
})
|
|
}
|
|
|
|
writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
|
|
writeJSON("audit-reports/proposals.json", proposals)
|
|
|
|
// Type 2: foreign-framing candidates (zone terms with no narrative echo).
|
|
framing := iace.FindFramingCandidates(fired, in.Narrative, envFloat("IACE_FRAMING_MIN_ORPHAN", 0.6))
|
|
writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
|
|
writeJSON("audit-reports/framing.json", framing)
|
|
|
|
// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
|
|
// names as a whole word, with a dominant shared required tag).
|
|
vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
|
|
var vgaps []audit.DictionarySuggestion
|
|
for _, s := range vocab.SuggestedDictionaryEntries {
|
|
if len(s.SuggestedTags) > 0 {
|
|
vgaps = append(vgaps, s)
|
|
}
|
|
}
|
|
writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
|
|
writeJSON("audit-reports/vocab.json", vgaps)
|
|
|
|
// Type 4: coverage blind-spots (empty ISO 12100 groups A-G) + LLM expansion.
|
|
gaps := iace.FindCoverageGaps(hazards)
|
|
var missing []iace.MissingHazard
|
|
if lj, ok := judge.(iace.LLMJudge); ok {
|
|
missing = iace.ProposeMissingHazards(ctx, lj.Completer, in.MachineType, in.Narrative, hazards, gaps)
|
|
}
|
|
writeText("audit-reports/coverage.md", iace.RenderCoverageQueue(in.MachineType, gaps, missing))
|
|
writeJSON("audit-reports/coverage.json", gaps)
|
|
|
|
printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
|
|
"fired_patterns": len(fired),
|
|
"candidates": len(candidates),
|
|
"in_queue": len(proposals),
|
|
"gt_blocked": blocked,
|
|
"framing_flags": len(framing),
|
|
"vocab_gaps": len(vgaps),
|
|
"coverage_gaps": len(gaps),
|
|
})
|
|
if gt == nil {
|
|
fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
|
|
}
|
|
}
|
|
|
|
func selectJudge(machineClass string) iace.CandidateJudge {
|
|
if os.Getenv("IACE_PROPOSE_LLM") != "1" {
|
|
return iace.HeuristicJudge{}
|
|
}
|
|
base := envStr("OLLAMA_URL", "http://localhost:11434")
|
|
model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
|
|
reg := llm.NewProviderRegistry("ollama", "")
|
|
reg.Register(llm.NewOllamaAdapter(base, model))
|
|
fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
|
|
return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
|
|
}
|
|
|
|
func readJSONFile(path string, v any) error {
|
|
raw, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return json.Unmarshal(raw, v)
|
|
}
|
|
|
|
func writeText(path, content string) {
|
|
_ = os.MkdirAll("audit-reports", 0o755)
|
|
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
|
fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
|
|
return
|
|
}
|
|
fmt.Println("→ wrote", path)
|
|
}
|
|
|
|
func envStr(key, def string) string {
|
|
if v := os.Getenv(key); v != "" {
|
|
return v
|
|
}
|
|
return def
|
|
}
|
|
|
|
func envFloat(key string, def float64) float64 {
|
|
if v := os.Getenv(key); v != "" {
|
|
if f, err := strconv.ParseFloat(v, 64); err == nil {
|
|
return f
|
|
}
|
|
}
|
|
return def
|
|
}
|
|
|
|
func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
|
|
var b strings.Builder
|
|
fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
|
|
fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
|
|
for i, s := range entries {
|
|
tag := "<tag>"
|
|
if len(s.SuggestedTags) > 0 {
|
|
tag = s.SuggestedTags[0]
|
|
}
|
|
fmt.Fprintf(&b, "## %d. \"%s\" → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
|
|
fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
|
|
fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
|
|
}
|
|
return b.String()
|
|
}
|