breakpilot-compliance/ai-compliance-sdk/cmd/iace-audit/propose.go

package main

import (
	"context"
	"encoding/json"
	"fmt"
	"os"
	"strconv"
	"strings"

	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
	"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
)

type narrativeInput struct {
	MachineType  string   `json:"machine_type"`
	Narrative    string   `json:"narrative"`
	MachineTypes []string `json:"machine_types,omitempty"`
}

// cmdPropose — Method P: offline dedup-candidate proposer.
//
//	iace-audit propose <narrative.json> [<ground-truth.json>]
//
// Detect near-duplicate patterns, screen survivors against a ground truth (if
// given), judge them (heuristic by default, LLM when enabled), and write the
// human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
// writes a report and never mutates the pattern library.
//
// Env:
//
//	IACE_PROPOSE_THRESHOLD  candidate score threshold (default 0.30)
//	IACE_PROPOSE_LLM=1      use the offline LLM judge instead of the heuristic
//	OLLAMA_URL              ollama base URL (default http://localhost:11434)
//	SELF_HOSTED_LLM_MODEL   model name (default qwen2.5:32b-instruct)
func cmdPropose(args []string) {
	if len(args) < 1 {
		fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
		os.Exit(2)
	}

	var in narrativeInput
	must(readJSONFile(args[0], &in))
	if in.Narrative == "" {
		fmt.Fprintln(os.Stderr, "propose: narrative is empty")
		os.Exit(2)
	}

	var gt *iace.GroundTruth
	if len(args) >= 2 {
		var g iace.GroundTruth
		must(readJSONFile(args[1], &g))
		gt = &g
	}

	threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
	hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
	candidates := iace.FindDedupCandidates(fired, threshold)

	byID := make(map[string]iace.PatternMatch, len(fired))
	for _, pm := range fired {
		byID[pm.PatternID] = pm
	}

	judge := selectJudge(in.MachineType)
	ctx := context.Background()

	var proposals []iace.JudgedProposal
	blocked := 0
	for _, c := range candidates {
		var sr iace.ScreenResult
		if gt != nil {
			sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
			if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
				blocked++
				continue
			}
		}
		v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
		proposals = append(proposals, iace.JudgedProposal{
			Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
		})
	}

	writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
	writeJSON("audit-reports/proposals.json", proposals)

	// Type 2: foreign-framing candidates (zone terms with no narrative echo).
	framing := iace.FindFramingCandidates(fired, in.Narrative, envFloat("IACE_FRAMING_MIN_ORPHAN", 0.6))
	writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
	writeJSON("audit-reports/framing.json", framing)

	// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
	// names as a whole word, with a dominant shared required tag).
	vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
	var vgaps []audit.DictionarySuggestion
	for _, s := range vocab.SuggestedDictionaryEntries {
		if len(s.SuggestedTags) > 0 {
			vgaps = append(vgaps, s)
		}
	}
	writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
	writeJSON("audit-reports/vocab.json", vgaps)

	printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
		"fired_patterns": len(fired),
		"candidates":     len(candidates),
		"in_queue":       len(proposals),
		"gt_blocked":     blocked,
		"framing_flags":  len(framing),
		"vocab_gaps":     len(vgaps),
	})
	if gt == nil {
		fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
	}
}

func selectJudge(machineClass string) iace.CandidateJudge {
	if os.Getenv("IACE_PROPOSE_LLM") != "1" {
		return iace.HeuristicJudge{}
	}
	base := envStr("OLLAMA_URL", "http://localhost:11434")
	model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
	reg := llm.NewProviderRegistry("ollama", "")
	reg.Register(llm.NewOllamaAdapter(base, model))
	fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
	return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
}

func readJSONFile(path string, v any) error {
	raw, err := os.ReadFile(path)
	if err != nil {
		return err
	}
	return json.Unmarshal(raw, v)
}

func writeText(path, content string) {
	_ = os.MkdirAll("audit-reports", 0o755)
	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
		fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
		return
	}
	fmt.Println("→ wrote", path)
}

func envStr(key, def string) string {
	if v := os.Getenv(key); v != "" {
		return v
	}
	return def
}

func envFloat(key string, def float64) float64 {
	if v := os.Getenv(key); v != "" {
		if f, err := strconv.ParseFloat(v, 64); err == nil {
			return f
		}
	}
	return def
}

func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
	var b strings.Builder
	fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
	fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
	for i, s := range entries {
		tag := "<tag>"
		if len(s.SuggestedTags) > 0 {
			tag = s.SuggestedTags[0]
		}
		fmt.Fprintf(&b, "## %d. \"%s\"  → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
		fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
		fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
	}
	return b.String()
}