Merge pull request 'fix(onboarding): review decisions — ISO13485 + patch rationale + summary counter' (#50 ) from feat/review-decisions into main

fix(onboarding): apply hypothesis/vocabulary review decisions (ISO13485, patch-policy rationale, summary)
Two reviewed knowledge decisions (2026-06-28) + the deferred cosmetic counter, before #59. 1. ISO13485 removed from the incident_management hypothesis. ISO 13485 CAPA / quality-safety incident handling is NOT security incident management — the mapping was too broad and would seed false hypotheses for the empirical loop. A dedicated manage_quality_and_safety_incidents capability can come later IF a target needs it; not forced now. (ISO27001/TISAX/IEC62443 keep incident_management.) 2. patch_policy_doc -> secure_signed_update_distribution stays `partial`, but the curated rationale is sharpened: "indicates update governance, does not evidence signed distribution" (a patch policy is not proof of SIGNED distribution). New optional SignalMapping.rationale field carries the curated note. (github_actions_ci -> SDL and dependency_scanning -> vuln-mgmt reviewed and APPROVED as-is.) 3. Cosmetic (folded in since we touched the file): the silent-intake summary now counts detected and indications SEPARATELY ("N automatisch erkannt, M Indikation(en)") instead of lumping partial signals into "automatisch erkannt" — consistent with the three-state model just shipped. Tests: ISO13485 no longer resolves to incident_management; summary counts split correctly. 29 onboarding tests pass, mypy --strict clean, demo runs, check-loc 0. Runtime-visible (hypothesis resolution + summary text) -> deploy + smoke.
2026-06-28 16:18:32 +02:00 · 2026-06-28 16:18:28 +02:00 · 2026-06-28 16:02:55 +02:00 · 2026-06-28 16:02:35 +02:00 · 2026-06-28 15:53:10 +02:00 · 2026-06-28 15:52:50 +02:00
231 changed files with 25366 additions and 130 deletions
@@ -1,4 +1,6 @@
 # Build stage
 # ci-retrigger 2026-06-27: transient registry.meghsakha.com 502 on push (Runde 1) + last-build
 # tag-bug skipped the rerun (Runde 2). No logic change — forces detect-changes to rebuild ai-sdk.
 FROM golang:1.24-alpine AS builder
 WORKDIR /app
@@ -34,6 +34,8 @@ func main() {
 		cmdEcho(os.Args[2:])
 	case "hierarchy":
 		cmdHierarchy(os.Args[2:])
 	case "propose":
 		cmdPropose(os.Args[2:])
 	default:
 		usage()
 		os.Exit(2)
@@ -41,7 +43,7 @@ func main() {
 }
 func usage() {
-	fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy> [args]")
+	fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy|propose> [args]")
 }
 func cmdReachability(_ []string) {
@@ -0,0 +1,188 @@
 package main
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"os"
 	"strconv"
 	"strings"
 	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
 	"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
 	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
 )
 type narrativeInput struct {
 	MachineType  string   `json:"machine_type"`
 	Narrative    string   `json:"narrative"`
 	MachineTypes []string `json:"machine_types,omitempty"`
 }
 // cmdPropose — Method P: offline dedup-candidate proposer.
 //
 //	iace-audit propose <narrative.json> [<ground-truth.json>]
 //
 // Detect near-duplicate patterns, screen survivors against a ground truth (if
 // given), judge them (heuristic by default, LLM when enabled), and write the
 // human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
 // writes a report and never mutates the pattern library.
 //
 // Env:
 //
 //	IACE_PROPOSE_THRESHOLD  candidate score threshold (default 0.30)
 //	IACE_PROPOSE_LLM=1      use the offline LLM judge instead of the heuristic
 //	OLLAMA_URL              ollama base URL (default http://localhost:11434)
 //	SELF_HOSTED_LLM_MODEL   model name (default qwen2.5:32b-instruct)
 func cmdPropose(args []string) {
 	if len(args) < 1 {
 		fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
 		os.Exit(2)
 	}
 	var in narrativeInput
 	must(readJSONFile(args[0], &in))
 	if in.Narrative == "" {
 		fmt.Fprintln(os.Stderr, "propose: narrative is empty")
 		os.Exit(2)
 	}
 	var gt *iace.GroundTruth
 	if len(args) >= 2 {
 		var g iace.GroundTruth
 		must(readJSONFile(args[1], &g))
 		gt = &g
 	}
 	threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
 	hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
 	candidates := iace.FindDedupCandidates(fired, threshold)
 	byID := make(map[string]iace.PatternMatch, len(fired))
 	for _, pm := range fired {
 		byID[pm.PatternID] = pm
 	}
 	judge := selectJudge(in.MachineType)
 	ctx := context.Background()
 	var proposals []iace.JudgedProposal
 	blocked := 0
 	for _, c := range candidates {
 		var sr iace.ScreenResult
 		if gt != nil {
 			sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
 			if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
 				blocked++
 				continue
 			}
 		}
 		v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
 		proposals = append(proposals, iace.JudgedProposal{
 			Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
 		})
 	}
 	writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
 	writeJSON("audit-reports/proposals.json", proposals)
 	// Type 2: foreign-framing candidates (zone terms with no narrative echo).
 	framing := iace.FindFramingCandidates(fired, in.Narrative, envFloat("IACE_FRAMING_MIN_ORPHAN", 0.6))
 	writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
 	writeJSON("audit-reports/framing.json", framing)
 	// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
 	// names as a whole word, with a dominant shared required tag).
 	vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
 	var vgaps []audit.DictionarySuggestion
 	for _, s := range vocab.SuggestedDictionaryEntries {
 		if len(s.SuggestedTags) > 0 {
 			vgaps = append(vgaps, s)
 		}
 	}
 	writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
 	writeJSON("audit-reports/vocab.json", vgaps)
 	// Type 4: coverage blind-spots (empty ISO 12100 groups A-G) + LLM expansion.
 	gaps := iace.FindCoverageGaps(hazards)
 	var missing []iace.MissingHazard
 	if lj, ok := judge.(iace.LLMJudge); ok {
 		missing = iace.ProposeMissingHazards(ctx, lj.Completer, in.MachineType, in.Narrative, hazards, gaps)
 	}
 	writeText("audit-reports/coverage.md", iace.RenderCoverageQueue(in.MachineType, gaps, missing))
 	writeJSON("audit-reports/coverage.json", gaps)
 	printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
 		"fired_patterns": len(fired),
 		"candidates":     len(candidates),
 		"in_queue":       len(proposals),
 		"gt_blocked":     blocked,
 		"framing_flags":  len(framing),
 		"vocab_gaps":     len(vgaps),
 		"coverage_gaps":  len(gaps),
 	})
 	if gt == nil {
 		fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
 	}
 }
 func selectJudge(machineClass string) iace.CandidateJudge {
 	if os.Getenv("IACE_PROPOSE_LLM") != "1" {
 		return iace.HeuristicJudge{}
 	}
 	base := envStr("OLLAMA_URL", "http://localhost:11434")
 	model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
 	reg := llm.NewProviderRegistry("ollama", "")
 	reg.Register(llm.NewOllamaAdapter(base, model))
 	fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
 	return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
 }
 func readJSONFile(path string, v any) error {
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		return err
 	}
 	return json.Unmarshal(raw, v)
 }
 func writeText(path, content string) {
 	_ = os.MkdirAll("audit-reports", 0o755)
 	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
 		fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
 		return
 	}
 	fmt.Println("→ wrote", path)
 }
 func envStr(key, def string) string {
 	if v := os.Getenv(key); v != "" {
 		return v
 	}
 	return def
 }
 func envFloat(key string, def float64) float64 {
 	if v := os.Getenv(key); v != "" {
 		if f, err := strconv.ParseFloat(v, 64); err == nil {
 			return f
 		}
 	}
 	return def
 }
 func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
 	fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
 	for i, s := range entries {
 		tag := "<tag>"
 		if len(s.SuggestedTags) > 0 {
 			tag = s.SuggestedTags[0]
 		}
 		fmt.Fprintf(&b, "## %d. \"%s\"  → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
 		fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
 		fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
 	}
 	return b.String()
 }
@@ -36,6 +36,10 @@ type DictionarySuggestion struct {
 	Token      string   `json:"token"`
 	Field      string   `json:"field"`
 	PatternIDs []string `json:"pattern_ids"`
 	// SuggestedTags are the RequiredComponentTags shared by the naming patterns,
 	// ranked by frequency — the candidate tags a keyword_dictionary entry for this
 	// token would emit so narratives mentioning it can trigger those patterns.
 	SuggestedTags []string `json:"suggested_tags,omitempty"`
 }
 type VocabularyReport struct {
@@ -66,14 +66,19 @@ func runVocabulary(form map[string]any) VocabularyReport {
 	// For each unknown token check if any pattern names it
 	patterns := iace.AllPatterns()
 	byID := make(map[string]iace.HazardPattern, len(patterns))
 	for _, p := range patterns {
 		byID[p.ID] = p
 	}
 	for _, tok := range report.UnknownTokens {
 		hits := patternsMentioning(tok, patterns)
 		if len(hits) == 0 {
 			continue
 		}
 		report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
-			Token:      tok,
+			Token:         tok,
-			PatternIDs: hits,
+			PatternIDs:    hits,
 			SuggestedTags: suggestTagsFor(hits, byID),
 		})
 	}
 	sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
@@ -129,18 +134,24 @@ func dictTokenHit(tok string, dict map[string]bool) bool {
 	return false
 }
-// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
+// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/harm/
-// harm/zone text contains the token (case-insensitive substring).
+// zone text names the token as a WHOLE WORD. Whole-word (not substring) matching
 // is essential: a substring match flags common fragments like "stehen" inside
 // "entstehen", producing spurious hits and nonsensical tag suggestions.
 func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
 	tokLower := strings.ToLower(tok)
 	seen := map[string]bool{}
 	var out []string
 	for _, p := range patterns {
 		hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
-		if !strings.Contains(hay, tokLower) {
+		matched := false
-			continue
+		for _, w := range tokenRE.FindAllString(hay, -1) {
 			if w == tokLower {
 				matched = true
 				break
 			}
 		}
-		if seen[p.ID] {
+		if !matched || seen[p.ID] {
 			continue
 		}
 		seen[p.ID] = true
@@ -151,3 +162,57 @@ func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
 	}
 	return out
 }
 // suggestTagsFor returns the RequiredComponentTags shared across the naming
 // patterns, ranked by how many of them require each tag (ties broken by name),
 // top 3. These are the candidate tags a dictionary entry for the token should
 // emit so a narrative mentioning the token can trigger those patterns.
 func suggestTagsFor(ids []string, byID map[string]iace.HazardPattern) []string {
 	freq := map[string]int{}
 	total := 0
 	for _, id := range ids {
 		p, ok := byID[id]
 		if !ok {
 			continue
 		}
 		total++
 		seen := map[string]bool{}
 		for _, tag := range p.RequiredComponentTags {
 			if seen[tag] {
 				continue
 			}
 			seen[tag] = true
 			freq[tag]++
 		}
 	}
 	if total == 0 {
 		return nil
 	}
 	type tf struct {
 		tag string
 		n   int
 	}
 	ranked := make([]tf, 0, len(freq))
 	for t, n := range freq {
 		ranked = append(ranked, tf{t, n})
 	}
 	sort.Slice(ranked, func(i, j int) bool {
 		if ranked[i].n != ranked[j].n {
 			return ranked[i].n > ranked[j].n
 		}
 		return ranked[i].tag < ranked[j].tag
 	})
 	// Only suggest a tag shared by >= 40% of the naming patterns. Diffuse tokens
 	// (common verbs spread across categories) get no dominant tag and are dropped.
 	var out []string
 	for _, x := range ranked {
 		if float64(x.n)/float64(total) < 0.4 {
 			break
 		}
 		out = append(out, x.tag)
 		if len(out) >= 3 {
 			break
 		}
 	}
 	return out
 }
@@ -0,0 +1,36 @@
 package audit
 import (
 	"testing"
 	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
 )
 func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) {
 	byID := map[string]iace.HazardPattern{
 		"P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}},
 		"P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}},
 		"P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}},
 	}
 	got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID)
 	if len(got) == 0 || got[0] != "backflow_risk" {
 		t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got)
 	}
 }
 func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) {
 	byID := map[string]iace.HazardPattern{
 		"P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}},
 	}
 	got := suggestTagsFor([]string{"P1"}, byID)
 	if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" {
 		t.Fatalf("want stable alpha top-3 [a b c], got %v", got)
 	}
 }
 func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) {
 	byID := map[string]iace.HazardPattern{}
 	if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 {
 		t.Fatalf("want empty for unknown patterns, got %v", got)
 	}
 }
@@ -7,8 +7,6 @@ import (
 	"path/filepath"
 	"sort"
 	"testing"
 	"github.com/google/uuid"
 )
 // TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
@@ -110,65 +108,6 @@ func TestKistenhub_GTCoverage(t *testing.T) {
 // patternsToHazardsAndMitigations converts a pattern match output into the
 // Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
 // iace_handler_init.go does in production but without DB writes.
 func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
 	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
 	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		cat := ""
 		if len(pm.HazardCats) > 0 {
 			cat = pm.HazardCats[0]
 		}
 		zone := pm.ZoneDE
 		lifecycle := ""
 		if len(pm.ApplicableLifecycles) > 0 {
 			lifecycle = pm.ApplicableLifecycles[0]
 		}
 		h := Hazard{
 			ID:             uuid.New(),
 			Name:           pm.ScenarioDE,
 			Category:       cat,
 			Description:    pm.ScenarioDE,
 			Scenario:       pm.ScenarioDE,
 			TriggerEvent:   pm.TriggerDE,
 			PossibleHarm:   pm.HarmDE,
 			AffectedPerson: pm.AffectedDE,
 			HazardousZone:  zone,
 			LifecyclePhase: lifecycle,
 		}
 		if h.Name == "" {
 			h.Name = pm.PatternName
 		}
 		hazards = append(hazards, h)
 		patternToHazard[pm.PatternID] = h.ID
 	}
 	measureNames := make(map[string]string)
 	for _, m := range GetProtectiveMeasureLibrary() {
 		measureNames[m.ID] = m.Name
 	}
 	var mitigations []Mitigation
 	for _, sm := range out.SuggestedMeasures {
 		name := measureNames[sm.MeasureID]
 		if name == "" {
 			name = sm.MeasureID
 		}
 		for _, srcPattern := range sm.SourcePatterns {
 			hid, ok := patternToHazard[srcPattern]
 			if !ok {
 				continue
 			}
 			mitigations = append(mitigations, Mitigation{
 				ID:       uuid.New(),
 				HazardID: hid,
 				Name:     name,
 			})
 		}
 	}
 	return hazards, mitigations
 }
 func abbrev(s string, max int) string {
 	if len(s) <= max {
 		return s
@@ -1,6 +1,7 @@
 package iace
 import (
 	"context"
 	"encoding/json"
 	"os"
 	"path/filepath"
@@ -45,7 +46,7 @@ var warewashingCyberCategories = map[string]bool{
 // warewashingEngineOutput runs the production chain and returns the filtered
 // hazards/mitigations the user would see for the UC-M.
-func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
+func warewashingEngineOutput() ([]Hazard, []Mitigation, []PatternMatch) {
 	res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
 	var compIDs, compNames []string
@@ -94,7 +95,7 @@ func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
 	filtered := *out
 	filtered.MatchedPatterns = kept
 	hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
-	return hazards, mitigations, len(kept)
+	return hazards, mitigations, kept
 }
 func TestWarewashing_GTCoverage(t *testing.T) {
@@ -119,8 +120,8 @@ func TestWarewashing_GTCoverage(t *testing.T) {
 		t.Logf("Parsed components: %v", cn)
 	}
-	hazards, mitigations, nPatterns := warewashingEngineOutput()
+	hazards, mitigations, keptPatterns := warewashingEngineOutput()
-	t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", nPatterns, len(hazards))
+	t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", len(keptPatterns), len(hazards))
 	result := CompareBenchmark(&gt, hazards, mitigations)
 	precision := 0.0
@@ -180,3 +181,57 @@ func TestWarewashing_GTCoverage(t *testing.T) {
 		t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
 	}
 }
 // TestWarewashing_DedupProposer exercises the offline dedup-candidate proposer
 // end-to-end on the real warewashing engine output: detect candidates, screen
 // each against the GT, and log the human-review queue. It asserts the WALL is
 // self-consistent — a PASS verdict may never coincide with a recall drop.
 func TestWarewashing_DedupProposer(t *testing.T) {
 	raw, err := os.ReadFile(filepath.Join("testdata", "ground_truth_warewashing.json"))
 	if err != nil {
 		t.Fatalf("read GT: %v", err)
 	}
 	var gt GroundTruth
 	if err := json.Unmarshal(raw, &gt); err != nil {
 		t.Fatalf("parse GT: %v", err)
 	}
 	hazards, mits, kept := warewashingEngineOutput()
 	byID := map[string]PatternMatch{}
 	for _, pm := range kept {
 		byID[pm.PatternID] = pm
 	}
 	// 0.25 is a deliberately permissive candidate threshold: the proposer is meant
 	// to over-surface, because the deterministic GT wall below (and a human, and the
 	// LLM judge) is the precision filter — not the detector.
 	candidates := FindDedupCandidates(kept, 0.25)
 	t.Logf("Proposer: %d dedup candidate(s) from %d fired patterns", len(candidates), len(kept))
 	// Deterministic judge in the test; the dev-time CLI swaps in LLMJudge.
 	judge := HeuristicJudge{}
 	var judged []JudgedProposal
 	blocked := 0
 	for _, c := range candidates {
 		sr := ScreenSupersession(&gt, hazards, mits, c.KeepHazardName, c.DropName)
 		switch {
 		case sr.RecallAfter < sr.RecallBefore:
 			t.Logf("[BLOCK recall-load-bearing] keep %s / drop %s", c.KeepPattern, c.DropPattern)
 			blocked++
 		case sr.DistinctGT:
 			t.Logf("[BLOCK distinct GT %s vs %s] keep %s / drop %s", sr.KeepGT, sr.DropGT, c.KeepPattern, c.DropPattern)
 			blocked++
 		default:
 			if !sr.Safe {
 				t.Errorf("RECALL-SAFE branch but ScreenResult.Safe=false for drop %s", c.DropPattern)
 			}
 			v, conf, rat := judge.Judge(context.Background(), c, byID[c.KeepPattern], byID[c.DropPattern])
 			judged = append(judged, JudgedProposal{
 				Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
 			})
 		}
 	}
 	t.Logf("\n%s", RenderProposalQueue("Gewerbliche Geschirrspuelmaschine (vernetzt)", judged))
 	t.Logf("Proposer summary: %d candidate(s) in queue (judge=%s), %d BLOCKED by the GT wall — propose-only, nothing auto-applied",
 		len(judged), judge.Name(), blocked)
 }
@@ -0,0 +1,50 @@
 package iace
 import "sort"
 // EN ISO 12100 hazard-group ordering for the hazard log. Without it the log is
 // returned in pattern-firing order, which reads as a jumble. This groups the
 // hazards top-down by type (A. Mechanisch, B. Elektrisch, C. Thermisch, …),
 // matching the frontend CATEGORY_LABELS.
 var isoCategoryRank = map[string]int{
 	// A. Mechanisch
 	"mechanical_hazard": 10, "mechanical": 10, "maintenance_hazard": 11,
 	// B. Elektrisch
 	"electrical_hazard": 20, "electrical": 20, "emc_hazard": 21,
 	// C. Thermisch
 	"thermal_hazard": 30, "thermal": 30, "high_temperature": 31, "fire_explosion": 32,
 	// D. Pneumatik / Hydraulik
 	"pneumatic_hydraulic": 40,
 	// E. Laerm / Vibration
 	"noise_hazard": 50, "noise_vibration": 50, "vibration_hazard": 51,
 	// F. Ergonomie
 	"ergonomic_hazard": 60, "ergonomic": 60,
 	// G. Stoffe / Umwelt
 	"material_environmental": 70, "chemical_risk": 71, "radiation_hazard": 72,
 	// H. Software / Steuerung (funktionale Sicherheit)
 	"software_control": 80, "software_fault": 80, "safety_function_failure": 81,
 	"configuration_error": 82, "sensor_fault": 83, "hmi_error": 84, "mode_confusion": 85,
 	"communication_failure": 86, "update_failure": 87,
 	// I. Cyber / Netzwerk (zur Ordnungs-Vollstaendigkeit; im CE-Log ausgeschlossen)
 	"unauthorized_access": 90, "firmware_corruption": 91, "cyber_resilience": 92,
 	"cyber_network": 93, "logging_audit_failure": 94, "sensor_spoofing": 95,
 	// J. KI-spezifisch
 	"ai_specific": 100, "ai_misclassification": 100, "false_classification": 100,
 	"model_drift": 100, "data_poisoning": 100, "unintended_bias": 100,
 }
 func categoryRank(cat string) int {
 	if r, ok := isoCategoryRank[cat]; ok {
 		return r
 	}
 	return 999 // unknown categories last
 }
 // SortHazardsByISO12100 groups hazards by ISO 12100 hazard group. Stable: the
 // relative order within a group (creation/priority order from the engine) is
 // preserved.
 func SortHazardsByISO12100(hazards []Hazard) {
 	sort.SliceStable(hazards, func(i, j int) bool {
 		return categoryRank(hazards[i].Category) < categoryRank(hazards[j].Category)
 	})
 }
@@ -157,7 +157,7 @@ func GetGTBremseHazardPatterns() []HazardPattern {
 		// ════════════════════════════════════════════════════════════════
 		{
 			ID: "HP1717", NameDE: "Verletzung durch unvermittelt austretende pneumatische Restenergie", NameEN: "Injury from unexpectedly released pneumatic stored energy",
-			RequiredComponentTags: []string{"stored_energy"},
+			RequiredComponentTags: []string{"pneumatic_part"},
 			RequiredEnergyTags:    []string{"pneumatic_pressure"},
 			GeneratedHazardCats:   []string{"mechanical_hazard"},
 			SuggestedMeasureIDs:   []string{"M485", "M534", "M527"},
@@ -375,7 +375,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		// ================================================================
 		{
 			ID: "HP753", NameDE: "Thermal Runaway bei Lithium-Batterie", NameEN: "Thermal runaway of lithium battery",
-			RequiredComponentTags: []string{"stored_energy", "high_temperature"},
+			RequiredComponentTags: []string{"battery", "high_temperature"},
 			RequiredEnergyTags:    []string{"electrical_energy", "thermal"},
 			GeneratedHazardCats:   []string{"thermal_hazard", "electrical_hazard"},
 			SuggestedMeasureIDs:   []string{"M005", "M141"},
@@ -390,7 +390,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		},
 		{
 			ID: "HP754", NameDE: "Ausgasung giftiger Daempfe aus Batterie", NameEN: "Toxic gas emission from battery",
-			RequiredComponentTags: []string{"stored_energy", "chemical_risk"},
+			RequiredComponentTags: []string{"battery", "chemical_risk"},
 			RequiredEnergyTags:    []string{},
 			GeneratedHazardCats:   []string{"material_environmental"},
 			SuggestedMeasureIDs:   []string{"M005", "M141"},
@@ -405,7 +405,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		},
 		{
 			ID: "HP755", NameDE: "Elektrischer Schlag an Hochvolt-Batteriespeicher", NameEN: "Electric shock from high-voltage battery storage",
-			RequiredComponentTags: []string{"stored_energy", "electrical_part"},
+			RequiredComponentTags: []string{"battery", "electrical_part"},
 			RequiredEnergyTags:    []string{"electrical_energy"},
 			GeneratedHazardCats:   []string{"electrical_hazard"},
 			SuggestedMeasureIDs:   []string{"M082", "M141"},
@@ -137,7 +137,7 @@ func GetKeywordDictionary() []KeywordEntry {
 		{Keywords: []string{"kreiselmaeher", "scheibenmaeher", "maehwerk"}, ExtraTags: []string{"agri_mower"}},
 		{Keywords: []string{"spruehduese", "spritzduese", "spruehkopf"}, ExtraTags: []string{"spray_nozzle"}},
 		{Keywords: []string{"galvanikbad", "tauchbad", "beizbad", "chemiebad"}, ExtraTags: []string{"chemical_bath"}},
-		{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie"}, ExtraTags: []string{"battery"}},
+		{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie", "lithium", "batteriespeicher", "hochvoltbatterie", "lithium-batterie"}, ExtraTags: []string{"battery"}},
 		{Keywords: []string{"heizelement", "heizpatrone", "heizband"}, ExtraTags: []string{"heating_element"}},
 		{Keywords: []string{"uv-lampe", "uv-strahler", "uv-c-strahler"}, ExtraTags: []string{"uv_source"}},
 		{Keywords: []string{"roentgen", "radioaktiv", "strahlenquelle", "gammastrahl", "isotop"}, ExtraTags: []string{"radiation_source"}},
@@ -42,3 +42,29 @@ func guardedLifecycles(p HazardPattern, tagSet map[string]bool) []string {
 	}
 	return p.ApplicableLifecycles
 }
 // Domain-specific supersession.
 //
 // A generic pattern that fires via a broad tag (e.g. high_temperature) can
 // duplicate a domain-specific pattern that describes the same hazard more
 // precisely. When the domain is present, the specific pattern wins and the
 // generic duplicate is dropped. Scoped to the domain tag, so machines outside
 // the domain keep the generic pattern — regression-safe by construction.
 //
 //	HP016 (generic hot surfaces)  -> HP2201 (Boiler/Tank/Spuelkammer)
 //	HP018 (actuator burn)         -> HP2201 (same contact-burn hazard)
 //	HP013 (stored electrical NRG) -> HP144  (residual voltage; HP013's zone is
 //	                                 framed for Batteriefaecher/USV-Anlagen a
 //	                                 dishwasher does not have, HP144 is the
 //	                                 Frequenzumrichter/Zwischenkreis variant)
 var genericSupersededByWarewashing = map[string]bool{
 	"HP016": true,
 	"HP018": true,
 	"HP013": true,
 }
 // supersededByDomainSpecific reports whether a generic pattern is replaced by a
 // more precise equivalent that the project's domain already provides.
 func supersededByDomainSpecific(p HazardPattern, tagSet map[string]bool) bool {
 	return tagSet["dom_warewashing"] && genericSupersededByWarewashing[p.ID]
 }
@@ -416,6 +416,11 @@ func patternMatches(p HazardPattern, tagSet map[string]bool, input MatchInput) b
 		return false
 	}
 	// Domain-specific supersession (generic duplicate replaced by a precise one).
 	if supersededByDomainSpecific(p, tagSet) {
 		return false
 	}
 	return true
 }
@@ -0,0 +1,143 @@
 package iace
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"strings"
 )
 // Coverage blind-spot proposer (P2 slice 6, type 4). DEV-TIME, propose-only.
 //
 // Deterministic skeleton: which EN ISO 12100 hazard groups (A-G, the classic CE
 // groups; H-J are control/CRA and routinely routed elsewhere) did the engine
 // leave with ZERO hazards for this machine? An empty group is a structural
 // blind-spot signal — the machine may genuinely lack that hazard, or a pattern
 // may be missing. The LLM then expands each gap into specific expected-but-missing
 // hazards a safety assessor would name, for a human to confirm into a new pattern
 // or GT case. The gaps alone are useful without any model.
 type isoGroup struct {
 	Key   string
 	Label string
 	Cats  []string
 }
 var iso12100Groups = []isoGroup{
 	{"mechanical", "A. Mechanisch", []string{"mechanical_hazard", "mechanical", "maintenance_hazard"}},
 	{"electrical", "B. Elektrisch", []string{"electrical_hazard", "electrical", "emc_hazard"}},
 	{"thermal", "C. Thermisch", []string{"thermal_hazard", "thermal", "high_temperature", "fire_explosion"}},
 	{"pneumatic_hydraulic", "D. Pneumatik/Hydraulik", []string{"pneumatic_hydraulic"}},
 	{"noise_vibration", "E. Laerm/Vibration", []string{"noise_hazard", "noise_vibration", "vibration_hazard"}},
 	{"ergonomic", "F. Ergonomie", []string{"ergonomic_hazard", "ergonomic"}},
 	{"material", "G. Stoffe/Umwelt", []string{"material_environmental", "chemical_risk", "radiation_hazard"}},
 }
 // CoverageGap is an ISO 12100 hazard group with no engine hazard.
 type CoverageGap struct {
 	Group string `json:"group"`
 	Key   string `json:"key"`
 	Note  string `json:"note"`
 }
 // FindCoverageGaps returns the A-G hazard groups that produced zero hazards.
 func FindCoverageGaps(hazards []Hazard) []CoverageGap {
 	present := make(map[string]bool, len(hazards))
 	for _, h := range hazards {
 		present[h.Category] = true
 	}
 	var gaps []CoverageGap
 	for _, g := range iso12100Groups {
 		covered := false
 		for _, c := range g.Cats {
 			if present[c] {
 				covered = true
 				break
 			}
 		}
 		if !covered {
 			gaps = append(gaps, CoverageGap{
 				Group: g.Label, Key: g.Key,
 				Note: "no engine hazard in this ISO 12100 group — verify the machine truly lacks it, or a pattern is missing",
 			})
 		}
 	}
 	return gaps
 }
 // MissingHazard is an LLM-proposed hazard a safety assessor would expect.
 type MissingHazard struct {
 	Group  string `json:"group"`
 	Hazard string `json:"hazard"`
 	Why    string `json:"why"`
 }
 // ProposeMissingHazards asks the LLM to expand the empty groups into specific
 // expected hazards. Returns nil without a completer or on any error — propose-only,
 // never breaks the run.
 func ProposeMissingHazards(ctx context.Context, completer LLMCompleter, machineClass, narrative string, produced []Hazard, gaps []CoverageGap) []MissingHazard {
 	if completer == nil || len(gaps) == 0 {
 		return nil
 	}
 	system, user := BuildCoveragePrompt(machineClass, narrative, produced, gaps)
 	raw, err := completer.Complete(ctx, system, user)
 	if err != nil {
 		return nil
 	}
 	return parseMissingHazards(raw)
 }
 // BuildCoveragePrompt frames the "what is missing?" question for the LLM.
 func BuildCoveragePrompt(machineClass, narrative string, produced []Hazard, gaps []CoverageGap) (system, user string) {
 	system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
 		"Dir werden eine Maschine, die bereits erkannten Gefaehrdungen und Gefaehrdungsgruppen OHNE Eintrag genannt. " +
 		"Nenne nur Gefaehrdungen, die ein Sachverstaendiger fuer DIESE Maschine ERWARTET, die aber FEHLEN. " +
 		"Erfinde nichts Maschinenfremdes. Antworte AUSSCHLIESSLICH als JSON-Array: " +
 		`[{"group":"...","hazard":"...","why":"..."}].`
 	var have []string
 	seen := map[string]bool{}
 	for _, h := range produced {
 		if h.Category != "" && !seen[h.Category] {
 			seen[h.Category] = true
 			have = append(have, h.Category)
 		}
 	}
 	var empty []string
 	for _, g := range gaps {
 		empty = append(empty, g.Group)
 	}
 	user = fmt.Sprintf("Maschinenklasse: %s\n\nBeschreibung:\n%s\n\nBereits erkannte Kategorien: %s\n\nGruppen OHNE Eintrag (Fokus): %s\n\nWelche erwarteten Gefaehrdungen fehlen?",
 		machineClass, narrative, strings.Join(have, ", "), strings.Join(empty, ", "))
 	return system, user
 }
 func parseMissingHazards(raw string) []MissingHazard {
 	start, end := strings.Index(raw, "["), strings.LastIndex(raw, "]")
 	if start < 0 || end <= start {
 		return nil
 	}
 	var out []MissingHazard
 	if err := json.Unmarshal([]byte(raw[start:end+1]), &out); err != nil {
 		return nil
 	}
 	return out
 }
 // RenderCoverageQueue renders the deterministic gaps plus any LLM-proposed missing
 // hazards as a markdown review queue.
 func RenderCoverageQueue(machine string, gaps []CoverageGap, missing []MissingHazard) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "# Coverage blind-spot queue — %s\n\n", machine)
 	fmt.Fprintf(&b, "%d ISO 12100 group(s) (A-G) have no engine hazard. Propose-only — a human confirms whether the machine truly lacks it or a pattern/GT case is missing.\n\n", len(gaps))
 	for _, g := range gaps {
 		fmt.Fprintf(&b, "- **%s** — %s\n", g.Group, g.Note)
 	}
 	if len(missing) > 0 {
 		fmt.Fprintf(&b, "\n## LLM-proposed expected-but-missing hazards (%d)\n\n", len(missing))
 		for i, m := range missing {
 			fmt.Fprintf(&b, "%d. [%s] %s\n   - why: %s\n", i+1, m.Group, m.Hazard, m.Why)
 		}
 	}
 	return b.String()
 }
@@ -0,0 +1,59 @@
 package iace
 import (
 	"context"
 	"strings"
 	"testing"
 )
 func TestFindCoverageGaps(t *testing.T) {
 	hazards := []Hazard{
 		{Category: "mechanical_hazard"},
 		{Category: "thermal_hazard"},
 		{Category: "electrical_hazard"},
 		{Category: "material_environmental"},
 	}
 	gapKeys := map[string]bool{}
 	for _, g := range FindCoverageGaps(hazards) {
 		gapKeys[g.Key] = true
 	}
 	for _, want := range []string{"pneumatic_hydraulic", "noise_vibration", "ergonomic"} {
 		if !gapKeys[want] {
 			t.Errorf("expected gap %s", want)
 		}
 	}
 	for _, notWant := range []string{"mechanical", "thermal", "electrical", "material"} {
 		if gapKeys[notWant] {
 			t.Errorf("did not expect gap %s (covered)", notWant)
 		}
 	}
 }
 func TestBuildCoveragePrompt_ContainsContext(t *testing.T) {
 	produced := []Hazard{{Category: "thermal_hazard"}}
 	gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
 	system, user := BuildCoveragePrompt("Geschirrspuelmaschine", "Eine Spuelmaschine mit Tank.", produced, gaps)
 	if !strings.Contains(system, "EN ISO 12100") || !strings.Contains(system, "JSON") {
 		t.Errorf("system prompt missing framing")
 	}
 	for _, want := range []string{"Geschirrspuelmaschine", "thermal_hazard", "F. Ergonomie", "Spuelmaschine mit Tank"} {
 		if !strings.Contains(user, want) {
 			t.Errorf("user prompt missing %q", want)
 		}
 	}
 }
 func TestProposeMissingHazards_ParsesAndDegrades(t *testing.T) {
 	gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
 	c := fakeCompleter{out: `Hier: [{"group":"F. Ergonomie","hazard":"Heben schwerer Koerbe","why":"manuelles Beladen"}] fertig`}
 	got := ProposeMissingHazards(context.Background(), c, "x", "n", nil, gaps)
 	if len(got) != 1 || got[0].Hazard != "Heben schwerer Koerbe" {
 		t.Fatalf("parse: got %+v", got)
 	}
 	if ProposeMissingHazards(context.Background(), nil, "x", "n", nil, gaps) != nil {
 		t.Errorf("nil completer must return nil")
 	}
 	if ProposeMissingHazards(context.Background(), fakeCompleter{err: context.DeadlineExceeded}, "x", "n", nil, gaps) != nil {
 		t.Errorf("error must return nil")
 	}
 }
@@ -0,0 +1,152 @@
 package iace
 import (
 	"fmt"
 	"math"
 	"regexp"
 	"sort"
 	"strings"
 )
 // Offline dedup-candidate proposer (P2, type 1). DEV-TIME ONLY.
 //
 // It inspects the patterns that fired for one machine and proposes which look
 // like duplicates, so a human (later an LLM) can decide a supersession/merge. It
 // NEVER mutates the pattern library or the runtime — it only surfaces candidates.
 // The deterministic GT screen (ScreenSupersession, proposer_screen.go) is the
 // wall that proves a proposal is safe before a human ever sees it.
 //
 // Detection here is purely structural (category + zone + measure + scenario
 // overlap) and therefore reproducible. Two safety rules bake in what P1 taught
 // us about the dishwasher review:
 //   - only patterns with the SAME primary category are ever compared;
 //   - a pair with DIFFERENT operational states is NEVER proposed, because
 //     normal-operation and maintenance are legitimately distinct contexts with
 //     different protective measures (e.g. HP011 vs HP077). Merging them would
 //     erase the maintenance view.
 // DedupCandidate is a proposed near-duplicate pattern pair for one machine class.
 type DedupCandidate struct {
 	KeepPattern     string  `json:"keep_pattern"` // higher-priority survivor
 	DropPattern     string  `json:"drop_pattern"` // supersession target
 	KeepName        string  `json:"keep_name"`
 	KeepHazardName  string  `json:"keep_hazard_name"` // keep pattern ScenarioDE (for the GT-distinctness screen)
 	DropName        string  `json:"drop_name"`        // == generated hazard Name (ScenarioDE) of the drop pattern
 	Category        string  `json:"category"`
 	ZoneJaccard     float64 `json:"zone_jaccard"`
 	MeasureJaccard  float64 `json:"measure_jaccard"`
 	ScenarioJaccard float64 `json:"scenario_jaccard"`
 	Score           float64 `json:"score"`
 	Rationale       string  `json:"rationale"`
 }
 // FindDedupCandidates compares the fired patterns pairwise and returns near-dup
 // candidates whose combined overlap score meets threshold, deterministically
 // ordered (score desc, then drop-pattern id). The combined score weights measure
 // overlap highest (shared measures are the strongest duplicate signal), then zone
 // and scenario equally.
 func FindDedupCandidates(fired []PatternMatch, threshold float64) []DedupCandidate {
 	var out []DedupCandidate
 	for i := 0; i < len(fired); i++ {
 		for j := i + 1; j < len(fired); j++ {
 			a, b := fired[i], fired[j]
 			ca := primaryCat(a)
 			if ca == "" || ca != primaryCat(b) {
 				continue
 			}
 			if !sameOpStateSet(a.OperationalStates, b.OperationalStates) {
 				continue // legitimate lifecycle variants — never propose a merge
 			}
 			zj := tokenJaccard(zoneTokenSet(a.ZoneDE), zoneTokenSet(b.ZoneDE))
 			mj := tokenJaccard(toSet(a.SuggestedMeasureIDs), toSet(b.SuggestedMeasureIDs))
 			sj := tokenJaccard(wordTokenSet(a.ScenarioDE), wordTokenSet(b.ScenarioDE))
 			score := 0.4*mj + 0.3*zj + 0.3*sj
 			if score < threshold {
 				continue
 			}
 			keep, drop := a, b
 			if b.Priority > a.Priority {
 				keep, drop = b, a
 			}
 			out = append(out, DedupCandidate{
 				KeepPattern: keep.PatternID, DropPattern: drop.PatternID,
 				KeepName: keep.PatternName, KeepHazardName: keep.ScenarioDE, DropName: drop.ScenarioDE,
 				Category: ca, ZoneJaccard: round2(zj), MeasureJaccard: round2(mj),
 				ScenarioJaccard: round2(sj), Score: round2(score),
 				Rationale: fmt.Sprintf(
 					"same category %q · measure overlap %.0f%% · zone overlap %.0f%% · scenario overlap %.0f%% → keep %s (P%d), supersede %s (P%d)",
 					ca, mj*100, zj*100, sj*100, keep.PatternID, keep.Priority, drop.PatternID, drop.Priority),
 			})
 		}
 	}
 	sort.SliceStable(out, func(i, j int) bool {
 		if out[i].Score != out[j].Score {
 			return out[i].Score > out[j].Score
 		}
 		return out[i].DropPattern < out[j].DropPattern
 	})
 	return out
 }
 func primaryCat(pm PatternMatch) string {
 	if len(pm.HazardCats) == 0 {
 		return ""
 	}
 	return pm.HazardCats[0]
 }
 func sameOpStateSet(a, b []string) bool {
 	sa, sb := toSet(a), toSet(b)
 	if len(sa) != len(sb) {
 		return false
 	}
 	for k := range sa {
 		if !sb[k] {
 			return false
 		}
 	}
 	return true
 }
 var proposerWordSplit = regexp.MustCompile(`[^\p{L}]+`)
 // zoneTokenSet splits a comma-separated zone string into its component terms.
 func zoneTokenSet(zone string) map[string]bool {
 	out := map[string]bool{}
 	for _, part := range strings.Split(strings.ToLower(zone), ",") {
 		if t := strings.TrimSpace(part); len([]rune(t)) >= 3 {
 			out[t] = true
 		}
 	}
 	return out
 }
 // wordTokenSet tokenises free text into words of length >= 4 (drops connectives).
 func wordTokenSet(s string) map[string]bool {
 	out := map[string]bool{}
 	for _, w := range proposerWordSplit.Split(strings.ToLower(s), -1) {
 		if len([]rune(w)) >= 4 {
 			out[w] = true
 		}
 	}
 	return out
 }
 func tokenJaccard(a, b map[string]bool) float64 {
 	if len(a) == 0 && len(b) == 0 {
 		return 0
 	}
 	inter := 0
 	for k := range a {
 		if b[k] {
 			inter++
 		}
 	}
 	union := len(a) + len(b) - inter
 	if union == 0 {
 		return 0
 	}
 	return float64(inter) / float64(union)
 }
 func round2(x float64) float64 { return math.Round(x*100) / 100 }
@@ -0,0 +1,67 @@
 package iace
 import "testing"
 func mkPM(id, cat, zone, scenario string, prio int, measures, opstates []string) PatternMatch {
 	return PatternMatch{
 		PatternID: id, PatternName: id, Priority: prio,
 		HazardCats: []string{cat}, ZoneDE: zone, ScenarioDE: scenario,
 		SuggestedMeasureIDs: measures, OperationalStates: opstates,
 	}
 }
 func TestFindDedupCandidates_FindsOverlappingPair(t *testing.T) {
 	fired := []PatternMatch{
 		mkPM("HPa", "update_failure", "Steuerung, SPS", "Software-Update der Steuerung scheitert nach Abbruch", 80,
 			[]string{"M138", "M146"}, nil),
 		mkPM("HPb", "update_failure", "Steuerung, Antriebsregler", "Software-Update der Steuerung schlaegt fehl", 75,
 			[]string{"M138", "M146", "M141"}, nil),
 		mkPM("HPc", "mechanical_hazard", "Tuer", "Quetschen der Finger an der Tuer", 70,
 			[]string{"M003"}, nil),
 	}
 	got := FindDedupCandidates(fired, 0.4)
 	if len(got) != 1 {
 		t.Fatalf("want 1 candidate, got %d: %+v", len(got), got)
 	}
 	// Higher-priority pattern survives, lower one is the drop target.
 	if got[0].KeepPattern != "HPa" || got[0].DropPattern != "HPb" {
 		t.Errorf("want keep HPa / drop HPb, got keep %s / drop %s", got[0].KeepPattern, got[0].DropPattern)
 	}
 	if got[0].DropName != "Software-Update der Steuerung schlaegt fehl" {
 		t.Errorf("DropName must equal drop pattern ScenarioDE, got %q", got[0].DropName)
 	}
 }
 func TestFindDedupCandidates_LifecycleGuard(t *testing.T) {
 	// Same category, zone and measures — but normal-operation vs maintenance.
 	// These are legitimate variants (HP011 vs HP077) and must NOT be proposed.
 	fired := []PatternMatch{
 		mkPM("HP011", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 95,
 			[]string{"M481", "M482"}, nil),
 		mkPM("HP077", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 80,
 			[]string{"M481", "M482"}, []string{"maintenance"}),
 	}
 	if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
 		t.Fatalf("lifecycle guard failed: want 0 candidates, got %d: %+v", len(got), got)
 	}
 }
 func TestFindDedupCandidates_DifferentCategoryIgnored(t *testing.T) {
 	fired := []PatternMatch{
 		mkPM("HPa", "thermal_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
 		mkPM("HPb", "mechanical_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
 	}
 	if got := FindDedupCandidates(fired, 0.3); len(got) != 0 {
 		t.Fatalf("cross-category pair must not be proposed, got %d", len(got))
 	}
 }
 func TestFindDedupCandidates_BelowThresholdDropped(t *testing.T) {
 	fired := []PatternMatch{
 		mkPM("HPa", "mechanical_hazard", "Tuer", "Quetschen an der Tuer", 80, []string{"M003"}, nil),
 		mkPM("HPb", "mechanical_hazard", "Foerderband", "Einzug am Foerderband", 80, []string{"M540"}, nil),
 	}
 	if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
 		t.Fatalf("disjoint pair must be below threshold, got %d: %+v", len(got), got)
 	}
 }
@@ -0,0 +1,154 @@
 package iace
 import (
 	"fmt"
 	"sort"
 	"strings"
 )
 // Foreign-framing proposer (P2 slice 4, type 2). DEV-TIME, propose-only.
 //
 // A pattern can fire for a machine yet describe its hazard with a zone text
 // framed for a DIFFERENT machine (e.g. a dishwasher hazard whose zone names
 // "Walzen, Transportbaender" or "Bearbeitungszone"). Such foreign framing leaks
 // through terms that are NOT yet in domainGateTerms — once a term is a gate term,
 // the ghost-pattern invariant already fences the pattern out. So we surface the
 // candidates structurally: zone terms a fired pattern names that the machine's
 // narrative never mentions (minus generic hazard-location vocabulary). A human
 // (or the LLM) then decides: add a dom_* gate term, or re-frame the zone text.
 //
 // This OVER-surfaces by design — the human/LLM is the precision filter, not the
 // detector (same contract as the dedup proposer).
 // genericHazardStop are hazard-LOCATION words that legitimately appear in zones
 // without being echoed in a narrative — they are not evidence of foreign framing.
 var genericHazardStop = map[string]bool{
 	"quetschstelle": true, "einzugstelle": true, "einzugsstelle": true, "scherstelle": true,
 	"schneidstelle": true, "stossstelle": true, "fangstelle": true, "klemmstelle": true,
 	"gefahrbereich": true, "gefahrenbereich": true, "gefahrstelle": true, "gefahrenstelle": true,
 	"arbeitsbereich": true, "wirkbereich": true, "schutzbereich": true, "umgebung": true,
 	"bereich": true, "zugang": true, "oberflaeche": true, "oberflaechen": true,
 	"gehaeuse": true, "bauteil": true, "bauteile": true, "komponente": true, "maschine": true,
 }
 // FramingCandidate is a fired pattern whose zone text looks foreign for the machine.
 type FramingCandidate struct {
 	Pattern        string   `json:"pattern"`
 	Name           string   `json:"name"`
 	Category       string   `json:"category"`
 	Zone           string   `json:"zone"`
 	OrphanTerms    []string `json:"orphan_terms"`
 	OrphanFraction float64  `json:"orphan_fraction"`
 	Verdict        string   `json:"verdict"` // heuristic lean: foreign | plausible
 	Evidence       string   `json:"evidence"`
 }
 // FindFramingCandidates returns fired patterns whose zone is mostly not echoed in
 // the narrative, sorted by orphan fraction descending (deterministic).
 func FindFramingCandidates(fired []PatternMatch, narrative string, minFraction float64) []FramingCandidate {
 	nar := strings.ToLower(narrative)
 	var narStems []string
 	for _, w := range proposerWordSplit.Split(nar, -1) {
 		if len([]rune(w)) >= 5 {
 			narStems = append(narStems, w)
 		}
 	}
 	var out []FramingCandidate
 	for _, pm := range fired {
 		parts := zoneParts(pm.ZoneDE)
 		if len(parts) == 0 {
 			continue
 		}
 		var orphans []string
 		for _, p := range parts {
 			if !partEchoed(p, nar, narStems) {
 				orphans = append(orphans, p)
 			}
 		}
 		frac := float64(len(orphans)) / float64(len(parts))
 		if len(orphans) == 0 || frac < minFraction {
 			continue
 		}
 		out = append(out, FramingCandidate{
 			Pattern: pm.PatternID, Name: pm.PatternName, Category: primaryCat(pm),
 			Zone: pm.ZoneDE, OrphanTerms: orphans, OrphanFraction: round2(frac),
 			Verdict:  framingHeuristicVerdict(frac),
 			Evidence: fmt.Sprintf("%d/%d zone terms have no narrative echo: %s", len(orphans), len(parts), strings.Join(orphans, ", ")),
 		})
 	}
 	sort.SliceStable(out, func(i, j int) bool {
 		if out[i].OrphanFraction != out[j].OrphanFraction {
 			return out[i].OrphanFraction > out[j].OrphanFraction
 		}
 		return out[i].Pattern < out[j].Pattern
 	})
 	return out
 }
 func framingHeuristicVerdict(frac float64) string {
 	if frac >= 0.99 {
 		return "foreign" // nothing in the zone is echoed by the narrative
 	}
 	return "plausible" // partial echo — likely generic vocabulary, human to confirm
 }
 // zoneParts splits a zone string into significant terms on commas, slashes,
 // parentheses and semicolons, lowercased, length >= 4.
 func zoneParts(zone string) []string {
 	fields := strings.FieldsFunc(strings.ToLower(zone), func(r rune) bool {
 		return r == ',' || r == '/' || r == ';' || r == '(' || r == ')'
 	})
 	var out []string
 	for _, f := range fields {
 		if t := strings.TrimSpace(f); len([]rune(t)) >= 4 {
 			out = append(out, t)
 		}
 	}
 	return out
 }
 // partEchoed reports whether a zone part is reflected in the narrative. Matching
 // is bidirectional to survive German compounding: a zone word echoes if it is a
 // generic hazard term, if it is a substring of the narrative, OR if any narrative
 // stem (>= 5 chars) is a substring of the zone word (so narrative "Steuerung"
 // echoes zone "Steuerungssystem").
 func partEchoed(part, narrative string, narStems []string) bool {
 	for _, w := range strings.Fields(part) {
 		if genericHazardStop[w] {
 			return true
 		}
 		if len([]rune(w)) < 4 {
 			continue
 		}
 		if strings.Contains(narrative, w) {
 			return true
 		}
 		for _, ns := range narStems {
 			if strings.Contains(w, ns) {
 				return true
 			}
 		}
 	}
 	return false
 }
 // RenderFramingQueue renders foreign-framing candidates as a markdown review queue.
 func RenderFramingQueue(machine string, candidates []FramingCandidate) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "# Foreign-framing review queue — %s\n\n", machine)
 	fmt.Fprintf(&b, "%d fired pattern(s) name zone terms the narrative never mentions. Propose-only — a human (or the LLM) decides: add a dom_* gate term, or re-frame the zone.\n\n", len(candidates))
 	for i, c := range candidates {
 		fmt.Fprintf(&b, "## %d. %s — %s  [%s, orphan %.0f%%]\n", i+1, c.Pattern, c.Name, c.Verdict, c.OrphanFraction*100)
 		fmt.Fprintf(&b, "- category: %s\n- zone: %s\n", c.Category, c.Zone)
 		fmt.Fprintf(&b, "- orphan terms (no narrative echo): %s\n", strings.Join(c.OrphanTerms, ", "))
 		fmt.Fprintf(&b, "- suggested action: %s\n\n", framingAction(c.Verdict))
 	}
 	return b.String()
 }
 func framingAction(verdict string) string {
 	if verdict == "foreign" {
 		return "likely foreign-framed — propose a dom_* gate term for the orphan term(s), or re-frame the zone; human confirms + commits + pins a GT case"
 	}
 	return "partial echo — likely generic vocabulary; human to confirm whether any orphan term is a foreign-machine component"
 }
@@ -0,0 +1,33 @@
 package iace
 import "testing"
 func TestFindFramingCandidates_FlagsForeignZone(t *testing.T) {
 	narrative := "Gewerbliche Geschirrspuelmaschine mit Boiler und Tank. Die Tuer ist verriegelt."
 	fired := []PatternMatch{
 		mkPM("HPforeign", "mechanical_hazard", "Walzen, Transportbaender, Bearbeitungszone", "Einzug", 80, nil, nil),
 		mkPM("HPlocal", "thermal_hazard", "Boiler, Tank, Tuer", "Verbrennung", 80, nil, nil),
 		mkPM("HPgeneric", "mechanical_hazard", "Quetschstelle, Gefahrbereich", "Quetschen", 80, nil, nil),
 	}
 	got := FindFramingCandidates(fired, narrative, 0.6)
 	if len(got) != 1 || got[0].Pattern != "HPforeign" {
 		t.Fatalf("want only HPforeign flagged, got %+v", got)
 	}
 	if got[0].Verdict != "foreign" {
 		t.Errorf("fully-orphan zone should be 'foreign', got %s", got[0].Verdict)
 	}
 }
 func TestFindFramingCandidates_PartialEchoIsPlausible(t *testing.T) {
 	narrative := "Maschine mit Boiler und Tank."
 	fired := []PatternMatch{
 		mkPM("HPx", "thermal_hazard", "Boiler, Tank, Auspuffleitung", "x", 80, nil, nil),
 	}
 	got := FindFramingCandidates(fired, narrative, 0.3)
 	if len(got) != 1 {
 		t.Fatalf("want 1 candidate (1/3 orphan >= 0.3), got %d", len(got))
 	}
 	if got[0].Verdict != "plausible" || len(got[0].OrphanTerms) != 1 || got[0].OrphanTerms[0] != "auspuffleitung" {
 		t.Errorf("want plausible + orphan [auspuffleitung], got %s %v", got[0].Verdict, got[0].OrphanTerms)
 	}
 }
@@ -0,0 +1,123 @@
 package iace
 import "github.com/google/uuid"
 // Non-test plumbing for the offline proposer (P2 slice 3): run the engine for a
 // narrative and produce the fired patterns + the engine-built hazards/mitigations
 // the dedup proposer and GT screen consume. This is the same pipeline the GT
 // benchmark tests use, lifted out of test scope so the dev-time CLI can call it.
 // universalLifecyclePhases are appended so patterns gated to a specific lifecycle
 // (maintenance/cleaning/setup/fault clearing) still fire — the proposer wants the
 // full hazard picture, not only normal-operation hazards.
 var universalLifecyclePhases = []string{"normal_operation", "maintenance", "cleaning", "setup", "fault_clearing"}
 // BuildProposerInput parses a narrative, runs the pattern engine, keeps the
 // narrative-relevant patterns, and returns the hazards, mitigations and fired
 // patterns. NOTE: it does not apply the CE cyber-category skip, so the proposer
 // view may include cyber/AI hazards that the CE log excludes — harmless for the
 // GT recall screen (they match no CE ground-truth entry).
 func BuildProposerInput(narrative, machineType string, extraMachineTypes []string) ([]Hazard, []Mitigation, []PatternMatch) {
 	res := ParseNarrative(narrative, machineType)
 	var compIDs, compNames, energyIDs []string
 	for _, c := range res.Components {
 		if c.Negated {
 			continue
 		}
 		compIDs = append(compIDs, c.LibraryID)
 		compNames = append(compNames, c.NameDE)
 	}
 	for _, e := range res.EnergySources {
 		energyIDs = append(energyIDs, e.SourceID)
 	}
 	machineTypes := append([]string{}, extraMachineTypes...)
 	if machineType != "" {
 		machineTypes = append(machineTypes, machineType)
 	}
 	lifecycles := append(append([]string{}, res.LifecyclePhases...), universalLifecyclePhases...)
 	out := NewPatternEngine().Match(MatchInput{
 		ComponentLibraryIDs: compIDs,
 		EnergySourceIDs:     energyIDs,
 		LifecyclePhases:     lifecycles,
 		CustomTags:          res.CustomTags,
 		OperationalStates:   res.OperationalStates,
 		StateTransitions:    res.StateTransitions,
 		HumanRoles:          res.Roles,
 		MachineTypes:        machineTypes,
 	})
 	kept := make([]PatternMatch, 0, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		if IsPatternRelevant(pm, narrative, compNames) {
 			kept = append(kept, pm)
 		}
 	}
 	filtered := *out
 	filtered.MatchedPatterns = kept
 	hazards, mits := patternsToHazardsAndMitigations(&filtered)
 	return hazards, mits, kept
 }
 // patternsToHazardsAndMitigations converts engine output into the hazard/mitigation
 // entities the benchmark + proposer compare on. Simplified vs InitializeProject
 // (no risk estimation, no norm refs) — it only needs category/zone/scenario/measures.
 func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
 	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
 	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		cat := ""
 		if len(pm.HazardCats) > 0 {
 			cat = pm.HazardCats[0]
 		}
 		lifecycle := ""
 		if len(pm.ApplicableLifecycles) > 0 {
 			lifecycle = pm.ApplicableLifecycles[0]
 		}
 		h := Hazard{
 			ID:             uuid.New(),
 			Name:           pm.ScenarioDE,
 			Category:       cat,
 			Description:    pm.ScenarioDE,
 			Scenario:       pm.ScenarioDE,
 			TriggerEvent:   pm.TriggerDE,
 			PossibleHarm:   pm.HarmDE,
 			AffectedPerson: pm.AffectedDE,
 			HazardousZone:  pm.ZoneDE,
 			LifecyclePhase: lifecycle,
 		}
 		if h.Name == "" {
 			h.Name = pm.PatternName
 		}
 		hazards = append(hazards, h)
 		patternToHazard[pm.PatternID] = h.ID
 	}
 	measureNames := make(map[string]string)
 	for _, m := range GetProtectiveMeasureLibrary() {
 		measureNames[m.ID] = m.Name
 	}
 	var mitigations []Mitigation
 	for _, sm := range out.SuggestedMeasures {
 		name := measureNames[sm.MeasureID]
 		if name == "" {
 			name = sm.MeasureID
 		}
 		for _, srcPattern := range sm.SourcePatterns {
 			hid, ok := patternToHazard[srcPattern]
 			if !ok {
 				continue
 			}
 			mitigations = append(mitigations, Mitigation{
 				ID:       uuid.New(),
 				HazardID: hid,
 				Name:     name,
 			})
 		}
 	}
 	return hazards, mitigations
 }
@@ -0,0 +1,25 @@
 package iace
 import "testing"
 func TestBuildProposerInput_WarewashingFires(t *testing.T) {
 	hazards, _, fired := BuildProposerInput(
 		warewashingNarrative,
 		"Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
 		[]string{"food_processing"},
 	)
 	if len(fired) == 0 || len(hazards) == 0 {
 		t.Fatalf("want fired patterns + hazards, got %d patterns / %d hazards", len(fired), len(hazards))
 	}
 	has := func(id string) bool {
 		for _, pm := range fired {
 			if pm.PatternID == id {
 				return true
 			}
 		}
 		return false
 	}
 	if !has("HP2201") {
 		t.Errorf("warewashing-specific HP2201 must fire via BuildProposerInput")
 	}
 }
@@ -0,0 +1,174 @@
 package iace
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"strings"
 	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
 )
 // Semantic judgement over RECALL-SAFE dedup candidates (P2 slice 2). DEV-TIME,
 // propose-only. The deterministic GT wall (proposer_screen.go) has already
 // removed candidates that would drop recall or that credit different GT entries;
 // the judge only adds an opinion on whether the survivors are truly the same
 // hazard, plus a rationale, for the human review queue. It NEVER mutates anything.
 //
 // The judge is pluggable behind CandidateJudge so the runtime/tests stay
 // deterministic (HeuristicJudge) while the dev-time CLI can plug in the
 // non-deterministic LLM (LLMJudge over the shared llm.ProviderRegistry).
 const (
 	VerdictDuplicate = "duplicate"
 	VerdictDistinct  = "distinct"
 	VerdictUncertain = "uncertain"
 )
 // JudgedProposal is one candidate with its GT-wall result and the judge's opinion.
 type JudgedProposal struct {
 	Candidate  DedupCandidate `json:"candidate"`
 	Screen     ScreenResult   `json:"screen"`
 	Verdict    string         `json:"verdict"`
 	Confidence string         `json:"confidence"`
 	Rationale  string         `json:"rationale"`
 	Judge      string         `json:"judge"`
 }
 // CandidateJudge decides whether two near-duplicate patterns are the same hazard.
 type CandidateJudge interface {
 	Name() string
 	Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (verdict, confidence, rationale string)
 }
 // HeuristicJudge is the deterministic default/fallback. It only ever returns "low"
 // confidence — it is a placeholder for the LLM, and it deliberately punts to
 // "uncertain" on the hard cases (low text overlap, shared measures) so the queue
 // makes clear exactly where the LLM earns its keep.
 type HeuristicJudge struct{}
 func (HeuristicJudge) Name() string { return "heuristic" }
 func (HeuristicJudge) Judge(_ context.Context, c DedupCandidate, _, _ PatternMatch) (string, string, string) {
 	switch {
 	case c.ScenarioJaccard >= 0.5 || (c.ZoneJaccard >= 0.5 && c.MeasureJaccard >= 0.5):
 		return VerdictDuplicate, "low", "structural: high scenario, or combined zone+measure, overlap"
 	case c.MeasureJaccard >= 0.99 && c.ZoneJaccard == 0 && c.ScenarioJaccard < 0.3:
 		return VerdictDistinct, "low", "structural: identical measures but no zone/scenario overlap — likely distinct hazards sharing generic measures"
 	default:
 		return VerdictUncertain, "low", "structural signal inconclusive — needs the LLM judge"
 	}
 }
 // LLMJudge asks an offline model to make the semantic call. Non-deterministic, so
 // it lives only in the dev-time tool, never in tests or the runtime. It degrades
 // to "uncertain" on any transport or parse error — it must never break the run.
 type LLMJudge struct {
 	Completer    LLMCompleter
 	MachineClass string
 }
 func (LLMJudge) Name() string { return "llm" }
 func (j LLMJudge) Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (string, string, string) {
 	system, user := BuildJudgePrompt(j.MachineClass, a, b)
 	raw, err := j.Completer.Complete(ctx, system, user)
 	if err != nil {
 		return VerdictUncertain, "low", "LLM error: " + err.Error()
 	}
 	return parseJudgeJSON(raw)
 }
 // BuildJudgePrompt is the real LLM artifact — built and unit-tested deterministically
 // even though the call itself is not. It frames the ISO 12100 same-vs-distinct
 // question and forces a JSON answer.
 func BuildJudgePrompt(machineClass string, a, b PatternMatch) (system, user string) {
 	system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
 		"Entscheide, ob zwei generierte Gefaehrdungen fuer DIESE Maschine DIESELBE Gefaehrdung " +
 		"beschreiben (Dublette) oder fachlich VERSCHIEDENE Gefaehrdungen sind, die nur zufaellig " +
 		"dieselben Schutzmassnahmen teilen. Verschieden, wenn Wirkort, Ausloeser oder " +
 		"Schadensmechanismus abweichen — auch bei gleicher Kategorie und gleichen Massnahmen. " +
 		"Antworte AUSSCHLIESSLICH als JSON: " +
 		`{"verdict":"duplicate|distinct|uncertain","confidence":"high|medium|low","rationale":"..."}.`
 	user = fmt.Sprintf(`Maschinenklasse: %s
 Gefaehrdung A (%s):
  Name: %s
  Kategorie: %s
  Zone: %s
  Szenario: %s
  Ausloeser: %s
  Schaden: %s
  Massnahmen: %s
 Gefaehrdung B (%s):
  Name: %s
  Kategorie: %s
  Zone: %s
  Szenario: %s
  Ausloeser: %s
  Schaden: %s
  Massnahmen: %s
 Sind A und B dieselbe Gefaehrdung fuer diese Maschine?`,
 		machineClass,
 		a.PatternID, a.PatternName, primaryCat(a), a.ZoneDE, a.ScenarioDE, a.TriggerDE, a.HarmDE, strings.Join(a.SuggestedMeasureIDs, ", "),
 		b.PatternID, b.PatternName, primaryCat(b), b.ZoneDE, b.ScenarioDE, b.TriggerDE, b.HarmDE, strings.Join(b.SuggestedMeasureIDs, ", "))
 	return system, user
 }
 func parseJudgeJSON(raw string) (verdict, confidence, rationale string) {
 	start, end := strings.Index(raw, "{"), strings.LastIndex(raw, "}")
 	if start < 0 || end <= start {
 		return VerdictUncertain, "low", "unparseable LLM output"
 	}
 	var v struct {
 		Verdict    string `json:"verdict"`
 		Confidence string `json:"confidence"`
 		Rationale  string `json:"rationale"`
 	}
 	if err := json.Unmarshal([]byte(raw[start:end+1]), &v); err != nil {
 		return VerdictUncertain, "low", "unparseable LLM JSON: " + err.Error()
 	}
 	switch v.Verdict {
 	case VerdictDuplicate, VerdictDistinct, VerdictUncertain:
 	default:
 		v.Verdict = VerdictUncertain
 	}
 	if v.Confidence == "" {
 		v.Confidence = "low"
 	}
 	return v.Verdict, v.Confidence, v.Rationale
 }
 // LLMCompleter is the minimal text-in/text-out the LLM judge needs. Tests pass a
 // stub; the dev-time tool passes a registry-backed adapter (NewRegistryCompleter).
 type LLMCompleter interface {
 	Complete(ctx context.Context, system, user string) (string, error)
 }
 type registryCompleter struct {
 	reg   *llm.ProviderRegistry
 	model string
 }
 // NewRegistryCompleter adapts the shared llm.ProviderRegistry to LLMCompleter so
 // the proposer can reuse the platform's offline model wiring (e.g. self-hosted qwen).
 func NewRegistryCompleter(reg *llm.ProviderRegistry, model string) LLMCompleter {
 	return &registryCompleter{reg: reg, model: model}
 }
 func (rc *registryCompleter) Complete(ctx context.Context, system, user string) (string, error) {
 	resp, err := rc.reg.Chat(ctx, &llm.ChatRequest{
 		Model: rc.model,
 		Messages: []llm.Message{
 			{Role: "system", Content: system},
 			{Role: "user", Content: user},
 		},
 		Temperature: 0,
 	})
 	if err != nil {
 		return "", err
 	}
 	return resp.Message.Content, nil
 }
@@ -0,0 +1,104 @@
 package iace
 import (
 	"context"
 	"errors"
 	"strings"
 	"testing"
 )
 func TestHeuristicJudge_Verdicts(t *testing.T) {
 	tests := []struct {
 		name        string
 		zone, meas  float64
 		scenario    float64
 		wantVerdict string
 	}{
 		{"high scenario overlap -> duplicate", 0, 0.3, 0.6, VerdictDuplicate},
 		{"high zone+measure -> duplicate", 0.6, 0.6, 0.1, VerdictDuplicate},
 		{"identical measures, no text -> distinct", 0, 1.0, 0.0, VerdictDistinct},
 		{"shared measures, low text -> uncertain", 0, 0.67, 0.19, VerdictUncertain},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			c := DedupCandidate{ZoneJaccard: tt.zone, MeasureJaccard: tt.meas, ScenarioJaccard: tt.scenario}
 			v, conf, _ := HeuristicJudge{}.Judge(context.Background(), c, PatternMatch{}, PatternMatch{})
 			if v != tt.wantVerdict {
 				t.Errorf("verdict: want %s, got %s", tt.wantVerdict, v)
 			}
 			if conf != "low" {
 				t.Errorf("heuristic confidence must be low, got %s", conf)
 			}
 		})
 	}
 }
 func TestBuildJudgePrompt_ContainsKeyFacts(t *testing.T) {
 	a := PatternMatch{PatternID: "HPa", PatternName: "Heisse Flaeche", HazardCats: []string{"thermal_hazard"},
 		ZoneDE: "Boiler", ScenarioDE: "Beruehrung heisser Boiler", SuggestedMeasureIDs: []string{"M071"}}
 	b := PatternMatch{PatternID: "HPb", PatternName: "Heisses Spuelgut", HazardCats: []string{"thermal_hazard"},
 		ZoneDE: "Spuelgut", ScenarioDE: "Beruehrung heisses Geschirr", SuggestedMeasureIDs: []string{"M071"}}
 	system, user := BuildJudgePrompt("Geschirrspuelmaschine", a, b)
 	for _, want := range []string{"EN ISO 12100", "JSON", "verdict"} {
 		if !strings.Contains(system, want) {
 			t.Errorf("system prompt missing %q", want)
 		}
 	}
 	for _, want := range []string{"Geschirrspuelmaschine", "HPa", "HPb", "Boiler", "Spuelgut", "thermal_hazard"} {
 		if !strings.Contains(user, want) {
 			t.Errorf("user prompt missing %q", want)
 		}
 	}
 }
 type fakeCompleter struct {
 	out string
 	err error
 }
 func (f fakeCompleter) Complete(_ context.Context, _, _ string) (string, error) { return f.out, f.err }
 func TestLLMJudge_ParsesAndDegrades(t *testing.T) {
 	cand := DedupCandidate{KeepPattern: "HPa", DropPattern: "HPb"}
 	// Well-formed JSON, even wrapped in chatter, parses.
 	j := LLMJudge{Completer: fakeCompleter{out: "Sicher. {\"verdict\":\"distinct\",\"confidence\":\"high\",\"rationale\":\"andere Wirkorte\"}"}, MachineClass: "x"}
 	if v, conf, r := j.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictDistinct || conf != "high" || r != "andere Wirkorte" {
 		t.Errorf("parse: got %s/%s/%q", v, conf, r)
 	}
 	// Unknown verdict value normalises to uncertain.
 	j2 := LLMJudge{Completer: fakeCompleter{out: `{"verdict":"maybe","confidence":"medium","rationale":"x"}`}}
 	if v, _, _ := j2.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
 		t.Errorf("unknown verdict must normalise to uncertain, got %s", v)
 	}
 	// Transport error degrades gracefully, never panics.
 	j3 := LLMJudge{Completer: fakeCompleter{err: errors.New("connection refused")}}
 	if v, _, r := j3.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain || !strings.Contains(r, "LLM error") {
 		t.Errorf("error path: got %s / %q", v, r)
 	}
 	// Garbage (no JSON) degrades to uncertain.
 	j4 := LLMJudge{Completer: fakeCompleter{out: "no json here"}}
 	if v, _, _ := j4.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
 		t.Errorf("garbage must degrade to uncertain, got %s", v)
 	}
 }
 func TestRenderProposalQueue_ShowsActions(t *testing.T) {
 	proposals := []JudgedProposal{
 		{
 			Candidate: DedupCandidate{KeepPattern: "HP807", DropPattern: "HP033", Category: "update_failure", Score: 0.32},
 			Screen:    ScreenResult{RecallBefore: 1, RecallAfter: 1},
 			Verdict:   VerdictDuplicate, Confidence: "medium", Rationale: "same update failure", Judge: "llm",
 		},
 	}
 	out := RenderProposalQueue("Geschirrspuelmaschine", proposals)
 	for _, want := range []string{"HP807", "HP033", "update_failure", "supersession", "Propose-only"} {
 		if !strings.Contains(out, want) {
 			t.Errorf("queue missing %q\n%s", want, out)
 		}
 	}
 }
@@ -0,0 +1,47 @@
 package iace
 import (
 	"fmt"
 	"strings"
 )
 // RenderProposalQueue turns judged dedup proposals into the human-review queue
 // (markdown). Deterministic. Nothing here applies a change — every entry is a
 // suggestion for a human to confirm, edit, commit, and pin with a GT case.
 func RenderProposalQueue(machine string, proposals []JudgedProposal) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "# Dedup proposal queue — %s\n\n", machine)
 	fmt.Fprintf(&b, "%d candidate(s) survived the deterministic GT wall. Propose-only — nothing is applied automatically.\n\n", len(proposals))
 	for i, p := range proposals {
 		c := p.Candidate
 		fmt.Fprintf(&b, "## %d. keep %s  ⊃  drop %s   [%s → %s (%s)]\n",
 			i+1, c.KeepPattern, c.DropPattern, p.Judge, p.Verdict, p.Confidence)
 		fmt.Fprintf(&b, "- category %s · score %.2f (measures %.0f%%, zone %.0f%%, scenario %.0f%%)\n",
 			c.Category, c.Score, c.MeasureJaccard*100, c.ZoneJaccard*100, c.ScenarioJaccard*100)
 		fmt.Fprintf(&b, "- GT recall %.1f%% → %.1f%% when %s is dropped (wall: %s)\n",
 			p.Screen.RecallBefore*100, p.Screen.RecallAfter*100, c.DropPattern, wallNote(p.Screen))
 		fmt.Fprintf(&b, "- keep: %s\n- drop: %s\n", c.KeepHazardName, c.DropName)
 		fmt.Fprintf(&b, "- judge rationale: %s\n", p.Rationale)
 		fmt.Fprintf(&b, "- suggested action: %s\n\n", suggestedAction(p))
 	}
 	return b.String()
 }
 func wallNote(s ScreenResult) string {
 	if s.DistinctGT {
 		return fmt.Sprintf("distinct GT %s vs %s", s.KeepGT, s.DropGT)
 	}
 	return "recall-safe"
 }
 func suggestedAction(p JudgedProposal) string {
 	switch p.Verdict {
 	case VerdictDuplicate:
 		return fmt.Sprintf("add %s to a supersession set, then a human confirms + commits + pins a GT case", p.Candidate.DropPattern)
 	case VerdictDistinct:
 		return "keep both — judge considers them distinct hazards"
 	default:
 		return "needs human (or higher-confidence LLM) review — no automatic action"
 	}
 }
@@ -0,0 +1,61 @@
 package iace
 import "github.com/google/uuid"
 // ScreenResult is the deterministic GT verdict for one proposed supersession.
 type ScreenResult struct {
 	RecallBefore float64 `json:"recall_before"`
 	RecallAfter  float64 `json:"recall_after"`
 	KeepGT       string  `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any)
 	DropGT       string  `json:"drop_gt,omitempty"` // GT entry the drop credits (if any)
 	DistinctGT   bool    `json:"distinct_gt"`       // keep & drop credit DIFFERENT GT entries -> distinct hazards
 	Safe         bool    `json:"safe"`              // recall preserved AND not distinct
 }
 // ScreenSupersession is the WALL between "propose" and "decide". A proposal is
 // safe only if BOTH deterministic checks pass:
 //
 //  1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed
 //     — otherwise the drop is load-bearing for GT coverage.
 //  2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone
 //     is necessary but not sufficient: two genuinely distinct hazards that share
 //     the same measures (e.g. hot boiler surface vs hot ware on unloading) keep
 //     recall at 100% when one is dropped, yet must NOT be merged. If keep and
 //     drop each match a different GT entry, they are distinct.
 //
 // Whatever survives both is still only RECALL-SAFE — a candidate for a human (and
 // in slice 2, an LLM) to confirm semantically. Deterministic; reuses
 // CompareBenchmark; touches neither the library nor the runtime.
 func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult {
 	before := CompareBenchmark(gt, hazards, mits)
 	gtOf := map[string]string{}
 	for _, p := range before.MatchedPairs {
 		gtOf[p.EngineHazard.Name] = p.GTEntry.Nr
 	}
 	keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName]
 	distinct := keepGT != "" && dropGT != "" && keepGT != dropGT
 	kept := make([]Hazard, 0, len(hazards))
 	dropped := map[uuid.UUID]bool{}
 	for _, h := range hazards {
 		if h.Name == dropHazardName {
 			dropped[h.ID] = true
 			continue
 		}
 		kept = append(kept, h)
 	}
 	keptMits := make([]Mitigation, 0, len(mits))
 	for _, m := range mits {
 		if !dropped[m.HazardID] {
 			keptMits = append(keptMits, m)
 		}
 	}
 	after := CompareBenchmark(gt, kept, keptMits)
 	return ScreenResult{
 		RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore,
 		KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct,
 		Safe: after.CoverageScore >= before.CoverageScore && !distinct,
 	}
 }
@@ -160,6 +160,7 @@ func (s *Store) ListHazards(ctx context.Context, projectID uuid.UUID) ([]Hazard,
 		hazards = append(hazards, h)
 	}
 	SortHazardsByISO12100(hazards)
 	return hazards, nil
 }
@@ -110,9 +110,10 @@ type domainDef struct {
 // Deterministic order (slice, not map) — important for stable classification + tests.
 var domains = []domainDef{
 	{"data_protection",
-		[]string{"DSGVO", "GDPR", "BDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
+		[]string{"DSGVO", "GDPR", "BDSG", "TDDDG", "TTDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
 		[]string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb",
-			"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeit"}},
+			"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeit",
 			"cookie", "endeinrichtung", "endgerät", "endgeraet", "tracking"}},
 	{"cyber",
 		[]string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"},
 		[]string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom",
@@ -200,6 +201,11 @@ var topics = []topicDef{
 	{[]string{"bussgeld", "geldbusse"}, []string{"Art. 83"}},
 	{[]string{"security update", "sicherheitsupdate", "schwachstelle", "sbom", "cybersicherheitsanforderung"}, []string{"CRA Anhang I"}},
 	{[]string{"meldepflicht", "sicherheitsvorfall"}, []string{"Art. 14 CRA"}},
 	// ePrivacy / cookies: § 25 TDDDG (ex-TTDSG) is lex specialis for terminal-equipment access /
 	// cookie consent. Co-primary on a cookie/tracking query, so the subsidiarity rule does NOT
 	// demote it like general-DP DE law subsidiary to the DSGVO. Keywords are cookie-specific
 	// (NOT bare "Einwilligung") so a general consent question still resolves to Art. 7 DSGVO.
 	{[]string{"cookie", "endeinrichtung", "endgerät", "endgeraet", "tracking", "speicherung von informationen", "zugriff auf informationen"}, []string{"§ 25 TDDDG"}},
 }
 // resultMatchesTopic reports whether the result is a preferred norm of a topic the query hits.
@@ -123,6 +123,28 @@ func TestRerankByAuthority_Acceptance(t *testing.T) {
 		}
 	})
 	t.Run("ePrivacy: a cookie query lifts §25 TDDDG above DSGVO consent (lex specialis topic)", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("Art. 7 DSGVO", "DSGVO", "EU", 0.70), // higher semantic
 			bindingRes("§ 25 TDDDG", "TDDDG", "DE", 0.66),
 		}
 		out := rerankByAuthority("Wann ist eine Einwilligung fuer das Speichern von Cookies auf Endgeraeten erforderlich?", in)
 		if out[0].RegulationShort != "TDDDG" {
 			t.Fatalf("§25 TDDDG must win a cookie question (lex specialis topic), got %q", out[0].ArticleLabel)
 		}
 	})
 	t.Run("a general consent question still resolves to DSGVO, not §25 TDDDG", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 25 TDDDG", "TDDDG", "DE", 0.70), // higher semantic but no cookie topic
 			bindingRes("Art. 7 DSGVO", "DSGVO", "EU", 0.66),
 		}
 		out := rerankByAuthority("Welche Anforderungen gelten an eine wirksame Einwilligung?", in)
 		if out[0].RegulationShort != "DSGVO" {
 			t.Fatalf("a general consent question must resolve to DSGVO (TDDDG demoted), got %q", out[0].ArticleLabel)
 		}
 	})
 	t.Run("co-primary dp_01: BDSG §38 stays top on a DSB question (national special rule)", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 38 BDSG", "BDSG", "DE", 0.66),
@@ -77,6 +77,8 @@ _ROUTER_MODULES = [
    "licenses_routes",
    "template_rule_routes",
    "specialist_agent_routes",
    "reasoning_routes",
    "onboarding_routes",
 ]
 _loaded_count = 0
@@ -0,0 +1,74 @@
 """Onboarding Advisor endpoint — exposes the existing Smart Onboarding Advisor at runtime.
 This adds NO new reasoning logic. It exposes the already-built, tested orchestration (Signal Producers
 -> Normalizer -> Silent Knowledge Pass -> Advisor) through one runtime endpoint. No DB, no persistence.
  POST /onboarding/advisor-start  — (company + certs + target + scanner findings) -> advisory payload
  GET  /onboarding/targets        — the supported target ids
 """
 import logging
 from typing import List, Optional
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
 from compliance.onboarding import (
    AdvisorMeasure,
    AdvisorQuestion,
    InferredAssumption,
    ProducedSignal,
    RejectedAssumption,
 )
 from compliance.services.onboarding_service import run_advisor, supported_targets
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/onboarding", tags=["onboarding"])
 class OnboardingAdvisorRequest(BaseModel):
    company: str = ""
    industry: Optional[str] = None
    products: List[str] = Field(default_factory=list)
    markets: List[str] = Field(default_factory=list)
    certifications: List[str] = Field(default_factory=list)
    known_evidence: List[str] = Field(default_factory=list)
    target: str = "CRA"
    scanner_findings: List[ProducedSignal] = Field(default_factory=list)   # adapters upstream produced these
 class AdvisorResponse(BaseModel):
    silent_intake_summary: str = ""
    headline: str = ""
    auto_detected: List[str] = Field(default_factory=list)
    indications: List[str] = Field(default_factory=list)        # partial signal: raises strength, still asked
    inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
    rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
    top_5_questions: List[AdvisorQuestion] = Field(default_factory=list)
    capability_delta: List[str] = Field(default_factory=list)
    top_measures: List[AdvisorMeasure] = Field(default_factory=list)
    evidence_requests: List[str] = Field(default_factory=list)
    unsupported_domains: List[str] = Field(default_factory=list)
    completeness_summary: str = ""
@router.get("/targets")
 def list_targets() -> dict:
    return {"targets": supported_targets()}
@router.post("/advisor-start", response_model=AdvisorResponse)
 def advisor_start_endpoint(req: OnboardingAdvisorRequest) -> AdvisorResponse:
    if req.target not in supported_targets():
        raise HTTPException(status_code=404, detail="unsupported target '%s'; supported: %s" % (req.target, supported_targets()))
    result, si_summary = run_advisor(
        company=req.company, certifications=req.certifications, target=req.target,
        signals=req.scanner_findings, known_evidence=req.known_evidence,
        products=req.products, markets=req.markets, industry=req.industry or "")
    return AdvisorResponse(
        silent_intake_summary=si_summary, headline=result.headline, auto_detected=result.auto_detected,
        indications=result.indications,
        inferred_assumptions=result.inferred_assumptions, rejected_assumptions=result.rejected_assumptions,
        top_5_questions=result.next_best_questions, capability_delta=result.capability_delta,
        top_measures=result.top_measures, evidence_requests=result.evidence_requests,
        unsupported_domains=result.unsupported_domains, completeness_summary=result.completeness_summary)
@@ -0,0 +1,98 @@
 """HTTP endpoints for the Regulatory Reasoning Engine (spec §7).
 Thin handlers — all reasoning lives in `compliance.reasoning.*`. No DB, no RAG;
 pure deterministic rule evaluation.
    POST /reasoning/scope                      -> which regulations apply + missing facts
    POST /reasoning/obligations                -> obligations, overlaps, multi-evidence
    POST /reasoning/implementation-reasoning   -> claim->obligation mapping (Welt 1, no verdict)
    POST /reasoning/interpretation-assessment  -> verdict on a customer interpretation
    POST /reasoning/product-scope              -> gate on facts, else run discover_scope once
    POST /reasoning/regulatory-map             -> customer-readable read-model over the scope
    POST /reasoning/interpretation-in-map      -> judge a customer interpretation within the map
 """
 from __future__ import annotations
 from fastapi import APIRouter
 from compliance.interpretation_map import (
    InterpretationInMapRequest,
    InterpretationInMapResult,
    interpret_in_map,
 )
 from compliance.product_scope import (
    ProductScopeRequest,
    ProductScopeResponse,
    resolve_product_scope,
 )
 from compliance.regulatory_map import RegulatoryMap, RegulatoryMapRequest, render_regulatory_map
 from compliance.reasoning import (
    assess_interpretation,
    derive_obligations,
    discover_scope,
    reason_implementation_claim,
 )
 from compliance.reasoning.schemas import (
    ImplementationReasoningRequest,
    ImplementationReasoningResponse,
    InterpretationRequest,
    InterpretationResponse,
    ObligationsRequest,
    ObligationsResponse,
    ScopeRequest,
    ScopeResponse,
 )
 router = APIRouter(prefix="/reasoning", tags=["reasoning"])
@router.post("/scope", response_model=ScopeResponse)
 def scope_discovery(req: ScopeRequest) -> ScopeResponse:
    scope = discover_scope(req.product_profile)
    return ScopeResponse(
        regulatory_scope=scope,
        missing_facts=scope.missing_facts,
        confidence=scope.confidence,
    )
@router.post("/obligations", response_model=ObligationsResponse)
 def applicable_obligations(req: ObligationsRequest) -> ObligationsResponse:
    return derive_obligations(req.product_profile, req.regulatory_scope)
@router.post("/implementation-reasoning", response_model=ImplementationReasoningResponse)
 def implementation_reasoning(req: ImplementationReasoningRequest) -> ImplementationReasoningResponse:
    return reason_implementation_claim(req.product_profile, req.customer_claim)
@router.post("/product-scope", response_model=ProductScopeResponse)
 def product_scope(req: ProductScopeRequest) -> ProductScopeResponse:
    return resolve_product_scope(req.product_profile)
@router.post("/regulatory-map", response_model=RegulatoryMap)
 def regulatory_map(req: RegulatoryMapRequest) -> RegulatoryMap:
    return render_regulatory_map(req.product_profile)
@router.post("/interpretation-in-map", response_model=InterpretationInMapResult)
 def interpretation_in_map(req: InterpretationInMapRequest) -> InterpretationInMapResult:
    reg_map = render_regulatory_map(req.product_profile)
    return interpret_in_map(reg_map, req.customer_interpretation)
@router.post("/interpretation-assessment", response_model=InterpretationResponse)
 def interpretation_assessment(req: InterpretationRequest) -> InterpretationResponse:
    result = assess_interpretation(req.customer_interpretation, req.product_profile)
    return InterpretationResponse(
        assessment=result.assessment,
        affected_regulations=result.affected_regulations,
        affected_obligations=result.affected_obligations,
        corrected_interpretation=result.corrected_interpretation,
        risks=result.risks,
        legal_basis_refs=result.legal_basis_refs,
        explanation=result.explanation,
        confidence=result.confidence,
    )
@@ -0,0 +1,70 @@
 """Master Capability Registry v0 (Phase 2C) — Compliance Execution domain.
 Registry + minting layer for Master Capabilities — the third instance of the
 identity-machine pattern (Master Controls, Master Obligations, Master Capabilities).
 STORED: identities, sources, relationship types, policy versions, lifecycle events,
 provenance. DERIVED (never stored): confidence, coverage, gap.
 v0 scope: types + minting + typed relations + versioned policy + identity lifecycle.
 NOT here: Company-Gap, real ISO/cert mappings, certification derivations, UI, RAG,
 new meta-model class, generic canonicalization engine.
 """
 from __future__ import annotations
 from .engine import (
    CapabilityRegistry,
    deprecate_capability,
    evaluate_relation,
    merge_capabilities,
    mint_capability,
    resolve,
    split_capability,
 )
 from .policy import DEFAULT_POLICY, assert_no_certification_confirms
 from .schemas import (
    AssertionStatus,
    CapabilityCandidate,
    CapabilityRelation,
    Confidence,
    DerivedAssessment,
    EvidenceKind,
    IdentityLifecycleEvent,
    LifecycleEventType,
    LifecycleState,
    MasterCapability,
    PolicyRule,
    PolicyVersion,
    Provenance,
    RelationType,
 )
 __all__ = [
    # engine
    "CapabilityRegistry",
    "mint_capability",
    "evaluate_relation",
    "resolve",
    "deprecate_capability",
    "merge_capabilities",
    "split_capability",
    # policy
    "DEFAULT_POLICY",
    "assert_no_certification_confirms",
    # schemas
    "MasterCapability",
    "CapabilityCandidate",
    "CapabilityRelation",
    "RelationType",
    "EvidenceKind",
    "AssertionStatus",
    "Confidence",
    "PolicyRule",
    "PolicyVersion",
    "IdentityLifecycleEvent",
    "LifecycleEventType",
    "LifecycleState",
    "Provenance",
    "DerivedAssessment",
 ]
@@ -0,0 +1,191 @@
 """Master Capability Registry v0 — minting, derivation, identity lifecycle.
 STORED on the registry: identities, sources, relation types, policy versions,
 lifecycle events, provenance. DERIVED (never stored): confidence/status, via
 `evaluate_relation` under a versioned policy.
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Set
 from pydantic import BaseModel, Field
 from .policy import DEFAULT_POLICY
 from .schemas import (
    AssertionStatus,
    CapabilityCandidate,
    CapabilityRelation,
    Confidence,
    DerivedAssessment,
    IdentityLifecycleEvent,
    LifecycleEventType,
    LifecycleState,
    MasterCapability,
    PolicyVersion,
    Provenance,
 )
 class CapabilityRegistry(BaseModel):
    # NOTE: no confidence/coverage field anywhere — those are DERIVED, never stored.
    capabilities: Dict[str, MasterCapability] = Field(default_factory=dict)
    relations: List[CapabilityRelation] = Field(default_factory=list)
    lifecycle_events: List[IdentityLifecycleEvent] = Field(default_factory=list)
    policy: PolicyVersion = Field(default_factory=lambda: DEFAULT_POLICY)
    next_serial: int = 1
 def _mcap_id(serial: int) -> str:
    return "MCAP-%05d" % serial
 def _next_event_id(registry: "CapabilityRegistry") -> str:
    return "evt-%d" % (len(registry.lifecycle_events) + 1)
 def mint_capability(
    registry: CapabilityRegistry,
    candidate: CapabilityCandidate,
    provenance: Optional[Provenance] = None,
    name: str = "",
    definition: str = "",
    category: str = "",
    domains: Optional[List[str]] = None,
 ) -> MasterCapability:
    """Assign the next stable MCAP id to a candidate and register it (with provenance)."""
    cap_id = _mcap_id(registry.next_serial)
    cap = MasterCapability(
        capability_id=cap_id,
        name=name or candidate.normalized or candidate.raw_term,
        definition=definition,
        category=category,
        domains=domains or [],
        provenance=provenance
        or Provenance(author="system", basis="minted from candidate '%s'" % candidate.raw_term),
    )
    registry.capabilities[cap_id] = cap
    registry.next_serial += 1
    return cap
 def evaluate_relation(
    relation: CapabilityRelation, policy: Optional[PolicyVersion] = None
 ) -> DerivedAssessment:
    """Derive (status, confidence) from (relationship_type, evidence_kind) under a
    versioned policy. Deterministic; result is returned, never stored."""
    pol = policy if policy is not None else DEFAULT_POLICY
    status = AssertionStatus.UNKNOWN
    confidence = Confidence.LOW
    found = False
    for rule in pol.rules:
        if (
            rule.relationship_type == relation.relationship_type
            and rule.evidence_kind == relation.evidence_kind
        ):
            status, confidence, found = rule.status, rule.confidence, True
            break
    expl = "%s + %s under %s -> %s/%s%s" % (
        relation.relationship_type.value,
        relation.evidence_kind.value,
        pol.policy_version,
        status.value,
        confidence.value,
        "" if found else " (no rule)",
    )
    return DerivedAssessment(
        target_capability_id=relation.target_capability_id,
        status=status,
        confidence=confidence,
        policy_version=pol.policy_version,
        explanation=expl,
    )
 def resolve(
    registry: CapabilityRegistry, capability_id: str, _seen: Optional[Set[str]] = None
 ) -> Optional[MasterCapability]:
    """Follow redirects (from merge/deprecate) to the current canonical capability."""
    seen = _seen if _seen is not None else set()
    if capability_id in seen:
        return None  # redirect cycle guard
    seen.add(capability_id)
    cap = registry.capabilities.get(capability_id)
    if cap is None:
        return None
    if cap.redirect_to:
        return resolve(registry, cap.redirect_to, seen)
    # terminal: only an ACTIVE capability resolves; a deprecated dead-end -> None
    return cap if cap.state == LifecycleState.ACTIVE else None
 def deprecate_capability(
    registry: CapabilityRegistry,
    capability_id: str,
    redirect_to: Optional[str] = None,
    provenance: Optional[Provenance] = None,
 ) -> IdentityLifecycleEvent:
    cap = registry.capabilities.get(capability_id)
    if cap is None:
        raise KeyError(capability_id)
    cap.state = LifecycleState.DEPRECATED
    cap.redirect_to = redirect_to
    event = IdentityLifecycleEvent(
        event_id=_next_event_id(registry),
        event_type=LifecycleEventType.REDIRECT if redirect_to else LifecycleEventType.DEPRECATE,
        from_ids=[capability_id],
        to_ids=[redirect_to] if redirect_to else [],
        provenance=provenance or Provenance(author="system", basis="deprecate %s" % capability_id),
    )
    registry.lifecycle_events.append(event)
    return event
 def merge_capabilities(
    registry: CapabilityRegistry,
    from_id: str,
    into_id: str,
    provenance: Optional[Provenance] = None,
 ) -> IdentityLifecycleEvent:
    """Merge `from_id` into `into_id`: deprecate `from_id` with a redirect to `into_id`."""
    if from_id not in registry.capabilities or into_id not in registry.capabilities:
        raise KeyError("%s or %s" % (from_id, into_id))
    frm = registry.capabilities[from_id]
    frm.state = LifecycleState.DEPRECATED
    frm.redirect_to = into_id
    event = IdentityLifecycleEvent(
        event_id=_next_event_id(registry),
        event_type=LifecycleEventType.MERGE,
        from_ids=[from_id],
        to_ids=[into_id],
        provenance=provenance or Provenance(author="system", basis="merge %s -> %s" % (from_id, into_id)),
    )
    registry.lifecycle_events.append(event)
    return event
 def split_capability(
    registry: CapabilityRegistry,
    from_id: str,
    into_ids: List[str],
    primary: Optional[str] = None,
    provenance: Optional[Provenance] = None,
 ) -> IdentityLifecycleEvent:
    """Split `from_id` into several capabilities. The old id deprecates; it redirects
    to `primary` only if one is given (else it resolves to None — split is ambiguous)."""
    if from_id not in registry.capabilities:
        raise KeyError(from_id)
    frm = registry.capabilities[from_id]
    frm.state = LifecycleState.DEPRECATED
    frm.redirect_to = primary
    event = IdentityLifecycleEvent(
        event_id=_next_event_id(registry),
        event_type=LifecycleEventType.SPLIT,
        from_ids=[from_id],
        to_ids=list(into_ids),
        provenance=provenance or Provenance(author="system", basis="split %s" % from_id),
    )
    registry.lifecycle_events.append(event)
    return event
@@ -0,0 +1,65 @@
 """Derivation policy v0 for the Master Capability Registry.
 Confidence + status are DERIVED from (relationship_type, evidence_kind) under a
 versioned policy — never stored. HARD RULE baked in and structurally guarded: a
 CERTIFICATION is a claim, never proof — no certification-backed rule may yield
 CONFIRMED. CONFIRMED requires a CONFIRMS relation backed by a concrete ARTIFACT
 (or an EXPERT assertion).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from .schemas import (
    AssertionStatus,
    Confidence,
    EvidenceKind,
    PolicyRule,
    PolicyVersion,
    RelationType,
 )
 def _rule(rt: RelationType, ek: EvidenceKind, st: AssertionStatus, cf: Confidence) -> PolicyRule:
    return PolicyRule(relationship_type=rt, evidence_kind=ek, status=st, confidence=cf)
 # (relationship_type, evidence_kind) -> (status, confidence)
 _V0_RULES = [
    # concrete artifact / expert confirming the capability -> CONFIRMED
    _rule(RelationType.CONFIRMS, EvidenceKind.ARTIFACT, AssertionStatus.CONFIRMED, Confidence.HIGH),
    _rule(RelationType.CONFIRMS, EvidenceKind.EXPERT, AssertionStatus.CONFIRMED, Confidence.MEDIUM),
    # equivalent capability — certificate or artifact behind it -> INFERRED (never confirmed)
    _rule(RelationType.EQUIVALENT, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.HIGH),
    _rule(RelationType.EQUIVALENT, EvidenceKind.ARTIFACT, AssertionStatus.INFERRED, Confidence.HIGH),
    # supports — weaker
    _rule(RelationType.SUPPORTS, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
    _rule(RelationType.SUPPORTS, EvidenceKind.ARTIFACT, AssertionStatus.INFERRED, Confidence.MEDIUM),
    # requires = an obligation NEEDS the capability (relevance, not possession)
    _rule(RelationType.REQUIRES, EvidenceKind.NONE, AssertionStatus.UNKNOWN, Confidence.LOW),
    # broader/narrower certificate -> weak inference
    _rule(RelationType.BROADER_THAN, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
    _rule(RelationType.NARROWER_THAN, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
    _rule(RelationType.RELATED_TO, EvidenceKind.CERTIFICATION, AssertionStatus.UNKNOWN, Confidence.LOW),
 ]
 DEFAULT_POLICY = PolicyVersion(
    policy_version="capability-policy-v0",
    description="v0: certification never yields CONFIRMED; only CONFIRMS + ARTIFACT/EXPERT does.",
    rules=_V0_RULES,
 )
 def assert_no_certification_confirms(policy: PolicyVersion) -> None:
    """Structural guard for the hard rule: no CERTIFICATION-backed rule is CONFIRMED."""
    for r in policy.rules:
        if r.evidence_kind == EvidenceKind.CERTIFICATION and r.status == AssertionStatus.CONFIRMED:
            raise ValueError(
                "policy %s violates hard rule: certification -> confirmed (%s)"
                % (policy.policy_version, r.relationship_type.value)
            )
 # fail fast at import: the shipped default must satisfy the hard rule
 assert_no_certification_confirms(DEFAULT_POLICY)
@@ -0,0 +1,150 @@
 """Master Capability Registry v0 — Compliance Execution domain (Phase 2C).
 Built from the Reasoning session per user directive, but this IS the Compliance
 Execution model (Execution owns Capability). Third real instance of the
 identity-machine pattern (after Master Controls and Master Obligations):
    Candidate -> Normalization -> Dedup -> Stable Identity (MCAP) -> Typed Relations
 KEY SENTENCE (stored vs derived):
    STORED  : identities, sources, relationship types, policy versions, lifecycle
              events, provenance.
    DERIVED : confidence, coverage and gap statements — computed on demand, NEVER
              stored (see policy.py / engine.evaluate_relation).
 These are APPLICATION/registry types, NOT compliance-meta-model classes. In
 particular `CapabilityRelation` is relation METADATA inside the registry — it does
 NOT introduce a new meta-model class. Whether a reified relation must enter the
 frozen meta-model is a Meta-Model-Owner decision (architecture freeze v1.0),
 deferred until a demonstrable failure case exists.
 Self-contained (no Reasoning import — Reasoning consumes Capability, not the other
 way round). Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List, Optional
 from pydantic import BaseModel, Field
 class Confidence(str, Enum):
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
 class AssertionStatus(str, Enum):
    """How well-established a capability claim is. A numeric score is presentation;
    THIS type is the truth (derived from relationship type + evidence + policy)."""
    DECLARED = "declared"
    INFERRED = "inferred"
    CONFIRMED = "confirmed"
    UNKNOWN = "unknown"
 class RelationType(str, Enum):
    EQUIVALENT = "equivalent"
    SUPPORTS = "supports"
    REQUIRES = "requires"
    CONFIRMS = "confirms"
    BROADER_THAN = "broader_than"
    NARROWER_THAN = "narrower_than"
    RELATED_TO = "related_to"
 class EvidenceKind(str, Enum):
    CERTIFICATION = "certification"  # a held certificate — a CLAIM, never proof
    ARTIFACT = "artifact"  # concrete doc/config/test/log
    EXPERT = "expert"  # human expert assertion
    NONE = "none"
 class LifecycleState(str, Enum):
    ACTIVE = "active"
    DEPRECATED = "deprecated"
 class LifecycleEventType(str, Enum):
    MERGE = "merge"
    SPLIT = "split"
    DEPRECATE = "deprecate"
    REDIRECT = "redirect"
 class Provenance(BaseModel):
    """Every CURATED atom carries its own provenance (who / when / on what basis)."""
    author: str = ""
    asserted_at: Optional[str] = None  # ISO timestamp passed in; never generated here
    basis: str = ""
 # ── stored: identity ──────────────────────────────────────────────────────
 class MasterCapability(BaseModel):
    capability_id: str  # stable MCAP-xxxxx
    name: str = ""
    definition: str = ""
    category: str = ""
    domains: List[str] = Field(default_factory=list)
    typical_evidence: List[str] = Field(default_factory=list)
    version: int = 1
    state: LifecycleState = LifecycleState.ACTIVE
    redirect_to: Optional[str] = None  # set on merge/deprecate
    provenance: Provenance = Field(default_factory=Provenance)
 class CapabilityCandidate(BaseModel):
    raw_term: str  # e.g. "Patch Management"
    source: str = ""  # e.g. "CRA:Annex I (2)(d)"
    normalized: str = ""
 # ── stored: typed relation metadata (NOT a meta-model class) ──────────────
 class CapabilityRelation(BaseModel):
    relation_id: str
    source: str  # external term/obligation/certification id, e.g. "certification:ISO27001"
    target_capability_id: str  # MCAP-...
    relationship_type: RelationType
    evidence_kind: EvidenceKind = EvidenceKind.NONE
    provenance: Provenance = Field(default_factory=Provenance)
 # ── stored: versioned derivation policy ───────────────────────────────────
 class PolicyRule(BaseModel):
    relationship_type: RelationType
    evidence_kind: EvidenceKind
    status: AssertionStatus
    confidence: Confidence
 class PolicyVersion(BaseModel):
    """A versioned derivation policy. `policy_version` is recorded with every
    assessment so "why did you say X last year" is answerable with the policy
    as-of-then. Without this, `derived` and `auditable/reproducible` contradict."""
    policy_version: str
    description: str = ""
    rules: List[PolicyRule] = Field(default_factory=list)
 # ── stored: identity lifecycle ────────────────────────────────────────────
 class IdentityLifecycleEvent(BaseModel):
    event_id: str
    event_type: LifecycleEventType
    from_ids: List[str] = Field(default_factory=list)
    to_ids: List[str] = Field(default_factory=list)
    at: Optional[str] = None
    provenance: Provenance = Field(default_factory=Provenance)
 # ── DERIVED — never stored ────────────────────────────────────────────────
 class DerivedAssessment(BaseModel):
    target_capability_id: str
    status: AssertionStatus
    confidence: Confidence
    policy_version: str
    explanation: str = ""
@@ -0,0 +1,46 @@
 """Company Intelligence (Phase 2A) — Company Capability Profile foundation.
 The HEAD of the spine Company -> Capability -> Product -> Regulation -> Obligation
 -> Procedure -> Evidence. Builds a CompanyContext into a CompanyCapabilityProfile
 with a four-state trust model (declared/inferred/confirmed/unknown). A certification
 yields at most an INFERRED candidate — never "erfuellt".
 Reasoning OWNS the container + trust-state; it CONSUMES the Certification->Capability
 mapping (Execution-owned) via an injected contract — no mapping data in product code.
 """
 from __future__ import annotations
 from .contract import CapabilityMappingEntry, CertificationCapabilityMap, EMPTY_MAPPING
 from .engine import build_company_profile
 from .schemas import (
    CapabilityEvidence,
    Certification,
    CompanyCapabilityProfile,
    CompanyContext,
    Declaration,
    ExistingEvidence,
    ExistingProcess,
    ExistingSystem,
    OperationalCapability,
    OperationalCapabilityCandidate,
    VerificationStatus,
 )
 __all__ = [
    "build_company_profile",
    "CompanyContext",
    "CompanyCapabilityProfile",
    "Certification",
    "Declaration",
    "ExistingProcess",
    "ExistingSystem",
    "ExistingEvidence",
    "CapabilityEvidence",
    "OperationalCapabilityCandidate",
    "OperationalCapability",
    "VerificationStatus",
    "CapabilityMappingEntry",
    "CertificationCapabilityMap",
    "EMPTY_MAPPING",
 ]
@@ -0,0 +1,43 @@
 """Consumption contract for the Certification -> Capability mapping.
 OWNERSHIP BOUNDARY (hard): the Capability Registry, CapabilityDefinition and the
 Certification->Capability / Feature->Capability mapping RULES live in the Compliance
 Execution domain. This Reasoning layer defines ONLY the shape it consumes and never
 ships mapping DATA in product code — tests inject mocks, so the real table can only
 ever live in Execution.
 Execution will eventually provide CapabilityRegistry / CapabilityMapping /
 CapabilityDefinition; Reasoning consumes exactly `OperationalCapabilityCandidate`
 {capability_id, source, confidence, verification_status} (see schemas.py) and the
 minimal mapping SHAPE below — nothing more.
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import Dict, List
 from pydantic import BaseModel, Field
 from compliance.reasoning.enums import Confidence
 class CapabilityMappingEntry(BaseModel):
    """One mapping rule SHAPE: a certification implies candidate capabilities.
    Contract type only. The actual table (which capabilities ISO27001 implies) is
    Execution's DATA and MUST NOT be hard-coded here or anywhere in product code.
    """
    capability_ids: List[str] = Field(default_factory=list)
    confidence: Confidence = Confidence.MEDIUM
 # certification_id -> entry. Injected at call time; product code holds NO entries.
 CertificationCapabilityMap = Dict[str, CapabilityMappingEntry]
 # Intentionally empty: without an injected mapping there are zero inferred
 # candidates. This is the architectural guarantee that the registry lives only in
 # the Compliance Execution domain.
 EMPTY_MAPPING: CertificationCapabilityMap = {}
@@ -0,0 +1,114 @@
 """Company Intelligence engine (Phase 2A) — build the Company Capability Profile.
 Deterministic, no LLM/RAG. Turns a raw CompanyContext into capability evidence,
 candidates and (only via explicit verification) confirmed capabilities.
 HARD RULE enforced here: a certification yields at most an INFERRED candidate; it
 can NEVER produce a CONFIRMED capability on its own. Only real ExistingEvidence
 (`proves_capability_id`) promotes a capability to CONFIRMED. Certifications without
 a known mapping yield evidence-of-claim but NO inferred capability (the mapping is
 Execution's data, injected — never hard-coded here).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Tuple
 from compliance.reasoning.enums import Confidence
 from .contract import EMPTY_MAPPING, CertificationCapabilityMap
 from .schemas import (
    CapabilityEvidence,
    CompanyCapabilityProfile,
    CompanyContext,
    OperationalCapability,
    OperationalCapabilityCandidate,
    VerificationStatus,
 )
 def _declared(context: CompanyContext) -> List[OperationalCapabilityCandidate]:
    out: List[OperationalCapabilityCandidate] = []
    for d in context.declarations:
        out.append(
            OperationalCapabilityCandidate(
                capability_id=d.capability_id,
                source="declaration:%s" % context.company_id,
                confidence=Confidence.MEDIUM,
                verification_status=VerificationStatus.DECLARED,
            )
        )
    return out
 def _from_certifications(
    context: CompanyContext, mapping: CertificationCapabilityMap
 ) -> Tuple[List[CapabilityEvidence], List[OperationalCapabilityCandidate]]:
    # refinement 1: certification -> evidence-of-capability (claim) -> inferred candidate
    evidence: List[CapabilityEvidence] = []
    inferred: List[OperationalCapabilityCandidate] = []
    for cert in context.certifications:
        source = "certification:%s" % cert.certification_id
        evidence.append(
            CapabilityEvidence(
                source=source,
                claim="Company holds %s" % (cert.name or cert.certification_id),
                certification_id=cert.certification_id,
            )
        )
        entry = mapping.get(cert.certification_id)
        if entry is None:
            continue  # no mapping known -> NO inferred capability (data is Execution's)
        for cap_id in entry.capability_ids:
            inferred.append(
                OperationalCapabilityCandidate(
                    capability_id=cap_id,
                    source=source,
                    confidence=entry.confidence,
                    verification_status=VerificationStatus.INFERRED,
                )
            )
    return evidence, inferred
 def _confirmed_from_evidence(context: CompanyContext) -> List[OperationalCapability]:
    proven: Dict[str, List[str]] = {}
    for ev in context.evidence:
        cap = ev.proves_capability_id
        if not cap:
            continue
        proven.setdefault(cap, []).append(ev.evidence_id)
    return [
        OperationalCapability(
            capability_id=cap,
            verification_status=VerificationStatus.CONFIRMED,
            confidence=Confidence.HIGH,
            sources=sources,
        )
        for cap, sources in proven.items()
    ]
 def build_company_profile(
    context: CompanyContext, mapping: Optional[CertificationCapabilityMap] = None
 ) -> CompanyCapabilityProfile:
    """Build the Company Capability Profile from raw context + an injected mapping.
    `mapping` defaults to EMPTY (no inferred candidates) so that the cert->capability
    table can only ever come from the Compliance Execution domain.
    """
    mapping = EMPTY_MAPPING if mapping is None else mapping
    evidence, inferred = _from_certifications(context, mapping)
    declared = _declared(context)
    confirmed = _confirmed_from_evidence(context)
    confirmed_ids = {oc.capability_id for oc in confirmed}
    # a confirmed capability is no longer a mere candidate
    candidates = [c for c in (declared + inferred) if c.capability_id not in confirmed_ids]
    return CompanyCapabilityProfile(
        company_id=context.company_id,
        capability_evidence=evidence,
        candidate_capabilities=candidates,
        confirmed_capabilities=confirmed,
    )
@@ -0,0 +1,150 @@
 """Company Intelligence (Phase 2A) — Company Capability Profile (domain objects).
 This is the HEAD of the spine
    Company -> (Operational) Capability -> Product -> Applicable Regulation ->
    Obligation -> Procedure -> Evidence
 and answers a DIFFERENT question than Regulatory Intelligence: not "which laws
 apply to my product" but "which capabilities does my company already have, and
 which regulatory obligations might they already cover".
 HARD RULE (structural, not convention): a capability derived from a certification
 is at most INFERRED — never CONFIRMED, never "erfuellt". A certification produces
 EVIDENCE for a capability, an inference produces a CANDIDATE, and only checked
 evidence produces a CONFIRMED capability. This keeps the company side inside
 Welt 1 (potential), mirroring `ClaimCoverage` on the obligation side; it is NOT a
 conformity verdict (`ComplianceStatus`, Welt 2, owned by Compliance Execution).
 OWNERSHIP: Reasoning OWNS this CompanyContext container + the trust-state machine.
 It does NOT own the Certification->Capability mapping RULES — those are the same
 kind of rule as Feature->Capability and belong to the Compliance Execution
 Capability Registry. This layer only CONSUMES `OperationalCapabilityCandidate`
 {capability_id, source, confidence, verification_status} via an injected mapping
 (see contract.py). No mapping DATA lives in product code (tests inject mocks).
 Application/reasoning types, NOT compliance-meta-model classes (architecture
 freeze v1.0 untouched). Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List, Optional
 from pydantic import BaseModel, Field
 from compliance.reasoning.enums import Confidence
 class VerificationStatus(str, Enum):
    """Trust state of an operational capability — a FOURTH vocabulary.
    Disjoint from ClaimCoverage (Welt 1, customer claim vs obligation),
    ComplianceStatus (Welt 2, verified conformity) and DeltaType (RCI). It says
    only how well-established a company CAPABILITY is, never whether an obligation
    is met. Progression: DECLARED (customer says) -> INFERRED (a certification
    implies it) -> CONFIRMED (checked against real evidence); UNKNOWN = no signal.
    """
    DECLARED = "declared"
    INFERRED = "inferred"
    CONFIRMED = "confirmed"
    UNKNOWN = "unknown"
 # ── raw company inputs (the CompanyContext children) ──────────────────────
 class Certification(BaseModel):
    certification_id: str  # e.g. "ISO27001"
    name: str = ""
    scope: str = ""  # what the cert covers, customer-stated
 class Declaration(BaseModel):
    """A customer statement that they have a capability ("we do patch management")."""
    capability_id: str
    statement: str = ""
 class ExistingProcess(BaseModel):
    process_id: str
    name: str = ""
 class ExistingSystem(BaseModel):
    system_id: str
    name: str = ""
 class ExistingEvidence(BaseModel):
    """A concrete artefact the company already holds (policy, audit log, SBOM ...).
    `proves_capability_id` is the ONLY thing that may lift a capability to
    CONFIRMED — and only when a human/engine has attached real evidence.
    """
    evidence_id: str
    evidence_type: str = ""  # config_export/test_report/policy/audit_log/...
    proves_capability_id: Optional[str] = None
 # ── intermediate: certification -> evidence-of-capability (refinement 1) ──
 class CapabilityEvidence(BaseModel):
    """A certification does not yield a capability directly — only EVIDENCE for one.
    "Company holds a certified ISMS" is the evidence/claim; capabilities are then
    INFERRED from it via the injected (Execution-owned) mapping, never directly.
    """
    source: str  # provenance, e.g. "certification:ISO27001"
    claim: str = ""
    certification_id: str = ""
 # ── consumed contract type (refinement 2) ─────────────────────────────────
 class OperationalCapabilityCandidate(BaseModel):
    """The ONLY thing Reasoning consumes from Execution's capability mapping.
    Named "operational" (organisational ability) to stay distinct from later
    Product/AI/Safety capabilities. A candidate is always Welt 1 — DECLARED or
    INFERRED — and never CONFIRMED on its own.
    """
    capability_id: str
    source: str
    confidence: Confidence = Confidence.MEDIUM
    verification_status: VerificationStatus = VerificationStatus.INFERRED
 class OperationalCapability(BaseModel):
    """A capability the company actually has, CONFIRMED against real evidence."""
    capability_id: str
    verification_status: VerificationStatus
    confidence: Confidence = Confidence.MEDIUM
    sources: List[str] = Field(default_factory=list)
 # ── the container Reasoning OWNS (raw inputs) ─────────────────────────────
 class CompanyContext(BaseModel):
    company_id: str
    certifications: List[Certification] = Field(default_factory=list)
    declarations: List[Declaration] = Field(default_factory=list)
    processes: List[ExistingProcess] = Field(default_factory=list)
    systems: List[ExistingSystem] = Field(default_factory=list)
    evidence: List[ExistingEvidence] = Field(default_factory=list)
 # ── derived view (the Company Capability Profile) ─────────────────────────
 class CompanyCapabilityProfile(BaseModel):
    """Derived: capability evidence + candidates (declared/inferred) + confirmed.
    `candidate_capabilities` NEVER auto-promote to `confirmed_capabilities`; only
    explicit ExistingEvidence does that. The hard rule is enforced in engine.py.
    """
    company_id: str
    capability_evidence: List[CapabilityEvidence] = Field(default_factory=list)
    candidate_capabilities: List[OperationalCapabilityCandidate] = Field(default_factory=list)
    confirmed_capabilities: List[OperationalCapability] = Field(default_factory=list)
@@ -0,0 +1,24 @@
 """Regulatory Completeness — auditable knowledge coverage, not confidence.
 An internal quality machine: for an assessment it reports identified vs assessed regulations and
 justifies every open or excluded domain (corpus gap -> future_corpus; applicability uncertain ->
 query_required). The metric is counts, never a single percentage. The product never claims full
 coverage — it makes its own knowledge state transparent and auditable. Deterministic, no LLM, no
 new corpus/meta-model class (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import assess_completeness
 from .schemas import (
    Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion,
 )
 __all__ = [
    "assess_completeness",
    "CompletenessReport",
    "CorpusStatus",
    "DomainCoverage",
    "Exclusion",
    "Assumption",
 ]
@@ -0,0 +1,89 @@
 """Regulatory Completeness Engine — measure auditable knowledge coverage for an assessment.
 Separates what we IDENTIFIED (triggered regulations) from what we ASSESSED (validated corpus AND
 determined applicability), and justifies every gap. Two kinds of „open":
  - corpus gap        — no validated corpus yet (e.g. Environmental)            -> future_corpus
  - applicability open — corpus exists but applicability is uncertain (Data Act) -> query_required
 The metric is COUNTS, never a single percentage. The audit statement says plainly „wir bewerteten M
 von N Domänen; K sind nicht im validierten Korpus und wurden bewusst nicht bewertet".
 Deterministic, computed-not-stored, no LLM, no new corpus/meta-model class (freeze v1.0). Python 3.9.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
 from .schemas import (
    Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion,
 )
 _VALID = {s.value for s in CorpusStatus}
 def _status(corpus_status: Dict[str, str], reg: str) -> CorpusStatus:
    raw = corpus_status.get(reg, "unknown")
    return CorpusStatus(raw) if raw in _VALID else CorpusStatus.UNKNOWN
 def assess_completeness(
    identified_regulations: List[str],
    corpus_status: Dict[str, str],
    uncertain: Optional[List[Dict[str, Any]]] = None,
    assumptions: Optional[List[Dict[str, Any]]] = None,
    assessed_obligations: int = 0,
 ) -> CompletenessReport:
    """Build the auditable coverage report.
    `identified_regulations`: triggered/identified for this product. `corpus_status`: regulation ->
    one of validated/draft/unsupported/unknown (curated/injected corpus registry). `uncertain`:
    applicability-uncertain regulations [{regulation, deciding_question, reason}]. `assumptions`:
    [{key, value, note}]. `assessed_obligations`: count from Execution (injected, default 0).
    """
    ids = sorted(set(identified_regulations))
    unc = uncertain or []
    unc_subjects = {str(u.get("regulation") or u.get("subject")) for u in unc if (u.get("regulation") or u.get("subject"))}
    coverage = [DomainCoverage(regulation=r, status=_status(corpus_status, r)) for r in ids]
    assessed = [r for r in ids if _status(corpus_status, r) == CorpusStatus.VALIDATED and r not in unc_subjects]
    open_regs = [r for r in ids if r not in assessed]
    open_corpora = [r for r in ids if _status(corpus_status, r) in (CorpusStatus.UNSUPPORTED, CorpusStatus.UNKNOWN)]
    exclusions: List[Exclusion] = []
    for u in unc:
        subj = str(u.get("regulation") or u.get("subject") or "")
        if not subj:
            continue
        exclusions.append(Exclusion(
            subject=subj, reason=str(u.get("reason", "Anwendbarkeit unsicher")),
            deciding_question=str(u.get("deciding_question", "")), resolution="query_required"))
    for r in open_regs:
        if r in unc_subjects:
            continue
        st = _status(corpus_status, r)
        if st == CorpusStatus.DRAFT:
            exclusions.append(Exclusion(subject=r, reason="Korpus in Bearbeitung (draft)", resolution="in_review"))
        else:
            exclusions.append(Exclusion(subject=r, reason="nicht im validierten Korpus", resolution="future_corpus"))
    covered_subjects = {e.subject for e in exclusions}
    justification = (not open_regs) or set(open_regs) <= covered_subjects
    assumptions_m = [Assumption(key=str(a.get("key", "")), value=str(a.get("value", "")), note=str(a.get("note", ""))) for a in (assumptions or [])]
    summary = "Identifiziert %d · bewertet %d · offen %d · Unsicherheiten %d · Begründung %s" % (
        len(ids), len(assessed), len(open_regs), len(unc), "ja" if justification else "nein")
    if open_regs:
        audit = (
            "Für dieses Produkt konnten wir %d von %d identifizierten regulatorischen Domänen vollständig "
            "bewerten. %d weitere %s noch nicht Bestandteil des validierten Korpus bzw. anwendungsunsicher "
            "und wurden deshalb bewusst nicht bewertet." % (
                len(assessed), len(ids), len(open_regs), "ist" if len(open_regs) == 1 else "sind"))
    else:
        audit = "Für dieses Produkt konnten wir alle %d identifizierten regulatorischen Domänen vollständig bewerten." % len(ids)
    return CompletenessReport(
        identified_regulations=ids, assessed_regulations=assessed, open_regulations=open_regs,
        open_corpora=open_corpora, coverage=coverage, assumptions=assumptions_m, exclusions=exclusions,
        uncertainties_count=len(unc), assessed_obligations=assessed_obligations,
        justification_present=justification, completeness_summary=summary, audit_statement=audit,
    )
@@ -0,0 +1,62 @@
 """Schemas for the Regulatory Completeness Engine — auditable knowledge-coverage, not confidence.
 For an assessment it answers „wie sicher sind wir, dass diese Antwort VOLLSTÄNDIG ist?" by separating
 IDENTIFIED regulations from ASSESSED ones (those in the validated corpus) and listing every open or
 excluded domain WITH a reason. The metric is counts, never a single „87%". This is an internal quality
 machine: the product never claims full coverage — it makes its own knowledge state transparent.
 Deterministic, computed-not-stored, no new meta-model class (freeze v1.0). Python 3.9 compatible.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List
 from pydantic import BaseModel, Field
 class CorpusStatus(str, Enum):
    """The maturity of our knowledge corpus for a regulation/domain."""
    VALIDATED = "validated"      # we can fully assess this
    DRAFT = "draft"             # partial / under review
    UNSUPPORTED = "unsupported"  # triggered but no corpus yet
    UNKNOWN = "unknown"          # not in our registry at all
 class DomainCoverage(BaseModel):
    regulation: str
    status: CorpusStatus = CorpusStatus.UNKNOWN
    note: str = ""
 class Exclusion(BaseModel):
    """A domain/regulation DELIBERATELY not assessed — always with a reason (the heart of the engine)."""
    subject: str
    reason: str
    deciding_question: str = ""                 # what would resolve it (if a query)
    resolution: str = "future_corpus"           # query_required | future_corpus | not_applicable
 class Assumption(BaseModel):
    key: str
    value: str = ""
    note: str = ""
 class CompletenessReport(BaseModel):
    """The auditable coverage report for one assessment — counts + justification, NO single percentage."""
    identified_regulations: List[str] = Field(default_factory=list)
    assessed_regulations: List[str] = Field(default_factory=list)      # in the validated corpus
    open_regulations: List[str] = Field(default_factory=list)          # identified but not validated
    open_corpora: List[str] = Field(default_factory=list)             # missing domains worth building
    coverage: List[DomainCoverage] = Field(default_factory=list)
    assumptions: List[Assumption] = Field(default_factory=list)
    exclusions: List[Exclusion] = Field(default_factory=list)
    uncertainties_count: int = 0
    assessed_obligations: int = 0                                      # injected (Execution-owned)
    justification_present: bool = False
    completeness_summary: str = ""                                    # "Identifiziert N · bewertet M · offen K · ..."
    audit_statement: str = ""                                         # the honest narrative sentence
@@ -0,0 +1,18 @@
 """Interpretation-in-Map — evaluate a customer interpretation within the map.
 Thin adapter over the existing `assess_interpretation`: it judges the customer's
 reading against the regulations/obligations actually present in the product's
 RegulatoryMap, and flags touched unsupported domains as future_corpus_needed
 instead of pseudo-evaluating them. No new legal reasoning, no RCI, no UI.
 """
 from __future__ import annotations
 from .adapter import interpret_in_map
 from .schemas import InterpretationInMapRequest, InterpretationInMapResult
 __all__ = [
    "interpret_in_map",
    "InterpretationInMapRequest",
    "InterpretationInMapResult",
 ]
@@ -0,0 +1,90 @@
 """Interpretation-in-Map adapter (step 5).
 Evaluates a customer interpretation WITHIN the already-built RegulatoryMap. It
 reuses the existing `assess_interpretation` (no new legal engine), restricts the
 affected regulations/obligations to those present in the map, and reports any
 touched unsupported domain (wastewater/chemicals/...) as future_corpus_needed
 rather than pseudo-evaluating it.
 """
 from __future__ import annotations
 from typing import Dict, List
 from compliance.reasoning.enums import InterpretationVerdict
 from compliance.reasoning.interpretation_engine import assess_interpretation
 from compliance.regulatory_map.schemas import RegulatoryMap
 from .schemas import InterpretationInMapResult
 _LABEL: Dict[InterpretationVerdict, str] = {
    InterpretationVerdict.PLAUSIBLE: "plausibel",
    InterpretationVerdict.TOO_NARROW: "zu eng",
    InterpretationVerdict.TOO_BROAD: "zu weit",
    InterpretationVerdict.PARTIALLY_CORRECT: "teilweise korrekt",
    InterpretationVerdict.UNSUPPORTED: "nicht belegt",
    InterpretationVerdict.UNCERTAIN: "unsicher",
 }
 # domain -> keywords that signal the interpretation is ABOUT that (uncovered) domain.
 _ENV_KEYWORDS: Dict[str, List[str]] = {
    "environment_water": ["abwasser", "wastewater", "gewässer", "gewaesser", "einleitung", "abfluss"],
    "chemicals": ["chemikalie", "reach", "clp", "reinigungsmittel", "biozid", "gefahrstoff", "detergenz", "lösemittel", "loesemittel"],
    "environment_air": ["luft", "emission", "voc", "immission", "abluft", "verbrennung"],
    "waste": ["abfall", "entsorgung", "weee", "recycling"],
    "energy_resources": ["energie", "ökodesign", "oekodesign", "verbrauch"],
 }
 def _touches(text: str, domain: str) -> bool:
    low = text.lower()
    return any(kw in low for kw in _ENV_KEYWORDS.get(domain, []))
 def _explain(label: str, detail: str, affected_regs: List[str], future_domains: List[str], in_scope: bool) -> str:
    base = "Ihre Interpretation ist wahrscheinlich %s." % label
    if detail:
        base += " " + detail
    if affected_regs:
        base += " Betroffen in Ihrer Map: %s." % ", ".join(affected_regs)
    if future_domains:
        base += (
            " Für %s liegt noch kein Regelkorpus vor — diese Aspekte werden nicht bewertet (future_corpus_needed)."
            % ", ".join(future_domains)
        )
    if not in_scope and not future_domains:
        base += " Diese Auslegung betrifft kein Regelwerk Ihrer aktuellen Produkt-Map."
    return base
 def interpret_in_map(reg_map: RegulatoryMap, interpretation: str) -> InterpretationInMapResult:
    a = assess_interpretation(interpretation)  # existing engine — no new reasoning
    map_reg_ids = (
        {v.regulation_id for v in reg_map.applicable_regulations}
        | {v.regulation_id for v in reg_map.uncertain_regulations}
        | {v.regulation_id for v in reg_map.excluded_regulations}
    )
    map_ob_ids = {o.obligation_id for v in reg_map.applicable_regulations for o in v.obligations}
    uncertain_ids = {v.regulation_id for v in reg_map.uncertain_regulations}
    affected_regs = [r for r in a.affected_regulations if r in map_reg_ids]
    affected_obs = [o for o in a.affected_obligations if o in map_ob_ids]
    related_unc = [r for r in a.affected_regulations if r in uncertain_ids]
    future = [d for d in reg_map.unsupported_domains if _touches(interpretation, d.domain)]
    in_scope = bool(affected_regs or affected_obs)
    return InterpretationInMapResult(
        raw_interpretation=interpretation,
        assessment=a.assessment,
        in_scope_of_map=in_scope,
        affected_regulations=affected_regs,
        affected_obligations=affected_obs,
        related_uncertainties=related_unc,
        future_corpus_domains=future,
        corrected_interpretation=a.corrected_interpretation,
        risks=a.risks,
        legal_basis_refs=a.legal_basis_refs,
        explanation=_explain(_LABEL[a.assessment], a.explanation, affected_regs, [d.domain for d in future], in_scope),
        confidence=a.confidence,
    )
@@ -0,0 +1,36 @@
 """Schemas for Interpretation-in-Map (step 5).
 A thin adapter that evaluates a customer interpretation WITHIN the already-built
 RegulatoryMap — it does not assess abstract legal questions. Application types
 only; no compliance-meta-model classes (freeze v1.0 untouched).
 """
 from __future__ import annotations
 from typing import List
 from pydantic import BaseModel, Field
 from compliance.product_scope.schemas import UnsupportedDomain
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.reasoning.enums import Confidence, InterpretationVerdict
 class InterpretationInMapRequest(BaseModel):
    product_profile: CanonicalProductRegulatoryProfile
    customer_interpretation: str
 class InterpretationInMapResult(BaseModel):
    raw_interpretation: str
    assessment: InterpretationVerdict
    in_scope_of_map: bool  # True if it touches a regulation/obligation present in the map
    affected_regulations: List[str] = Field(default_factory=list)  # intersected with the map
    affected_obligations: List[str] = Field(default_factory=list)  # intersected (registry-linked)
    related_uncertainties: List[str] = Field(default_factory=list)  # map-uncertain regs it touches
    future_corpus_domains: List[UnsupportedDomain] = Field(default_factory=list)  # NOT evaluated
    corrected_interpretation: str = ""
    risks: List[str] = Field(default_factory=list)
    legal_basis_refs: List[str] = Field(default_factory=list)
    explanation: str = ""
    confidence: Confidence = Confidence.MEDIUM
@@ -0,0 +1,30 @@
 """Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
 The third independent function of the pipeline (after Company 2A `Evidence -> Capability` and RS-005
 `Capability -> Delta`): given ONLY the Capability Delta, rank the known journeys that best EXPLAIN it.
 A Journey is an EXPLANATION of the delta, not its cause — order is `Goal -> Required -> Delta -> Journey`.
 Deliberately dumb + deterministic (pure set overlap; no ML/embeddings/LLM), fully auditable, signatures
 INJECTED (certificate-agnostic capability clusters). No new corpus, no graph (freeze v1.0). The Matcher
 is sanctioned as the last architectural building block; everything after is knowledge work.
 """
 from __future__ import annotations
 from .engine import match_journeys
 from .schemas import (
    JourneyMatch,
    JourneyMatchReason,
    JourneyMatchResult,
    JourneySignature,
    MatchContext,
 )
 __all__ = [
    "match_journeys",
    "JourneySignature",
    "MatchContext",
    "JourneyMatch",
    "JourneyMatchReason",
    "JourneyMatchResult",
 ]
@@ -0,0 +1,94 @@
 """Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
 Three INDEPENDENT functions now compose the pipeline, each a different problem, all interchangeable:
  1. Evidence   -> Capability   (Company 2A)
  2. Capability -> Delta        (RS-005, transition_reasoning)
  3. Delta      -> Journey      (THIS module)
 The paradigm shift: a Journey is no longer the CAUSE (Goal -> Journey -> Delta) but the EXPLANATION
 (Goal -> Required -> Delta -> Journey). The matcher does NOT look at certifications, regulations,
 tenders, OEM specs or the goal — it looks ONLY at the Capability Delta and asks: which known journeys
 describe exactly this delta? Output is a ranked, auditable explanation ("Journey A explains 82% of the
 delta, because 8 of 10 missing capabilities are identical, same target type, ...").
 Deliberately DUMB and deterministic: pure set overlap, NO ML, NO embeddings, NO LLM. A learning ranker
 can be layered ON TOP later; this core stays auditable. Journey signatures are INJECTED (certificate-
 agnostic capability clusters), never loaded here — the engine stays hermetic. No new corpus, no
 graph/meta-model class (freeze v1.0). Python 3.9 compatible.
 Honesty: `score` is the share of the DELTA a journey explains (recall over the customer's missing
 capabilities), never a "fit" or a compliance verdict. `journey_only` documents where a journey reaches
 BEYOND this delta, so a broad journey that explains everything is not silently preferred.
 """
 from __future__ import annotations
 from typing import List, Optional, Sequence
 from .schemas import (
    JourneyMatch,
    JourneyMatchReason,
    JourneyMatchResult,
    JourneySignature,
    MatchContext,
 )
 def _context_signals(journey: JourneySignature, context: Optional[MatchContext]) -> List[str]:
    """Corroborating reasons only — these are documented, they never change the score."""
    if context is None:
        return []
    signals: List[str] = []
    if context.target_type and journey.target_type and context.target_type == journey.target_type:
        signals.append("gleiche Zielart")
    if context.industry and journey.industry and context.industry == journey.industry:
        signals.append("gleiche Branche")
    if context.product_type and journey.product_type and context.product_type == journey.product_type:
        signals.append("gleicher Produkttyp")
    return signals
 def match_journeys(
    delta: Sequence[str],
    journeys: Sequence[JourneySignature],
    context: Optional[MatchContext] = None,
 ) -> JourneyMatchResult:
    """Rank known journeys by the share of the Capability Delta they EXPLAIN.
    `delta` = the customer's MISSING capabilities (from RS-005). `journeys` = injected, certificate-
    agnostic signatures. score = |delta INTERSECT pattern| / |delta|. Ranking is deterministic:
    score desc, then context-signal count desc (corroboration only), then journey_id asc. Context
    never changes the score — only the documented reasons. Pure; no I/O; computed-not-stored.
    """
    delta_set = set(delta)
    n = len(delta_set)
    matches: List[JourneyMatch] = []
    for j in journeys:
        pattern = set(j.capability_pattern)
        matched = sorted(delta_set & pattern)
        score = (len(matched) / n) if n else 0.0
        signals = _context_signals(j, context)
        reason = JourneyMatchReason(
            matched_capabilities=matched,
            unexplained_delta=sorted(delta_set - pattern),
            journey_only=sorted(pattern - delta_set),
            context_signals=signals,
        )
        matches.append(
            JourneyMatch(
                journey_id=j.journey_id,
                label=j.label,
                score=round(score, 2),
                explains="%d von %d fehlenden Capabilities" % (len(matched), n),
                reason=reason,
            )
        )
    matches.sort(key=lambda m: (-m.score, -len(m.reason.context_signals), m.journey_id))
    best = matches[0] if matches and matches[0].score > 0.0 else None
    headline = (
        "%d Journeys erklaeren das Delta; beste: %s (%d%% des Deltas)"
        % (sum(1 for m in matches if m.score > 0.0), best.label, round(best.score * 100))
        if best
        else "Keine bekannte Journey erklaert dieses Delta (neue Journey-Kandidatin)"
    )
    return JourneyMatchResult(delta_size=n, matches=matches, best=best, headline=headline)
@@ -0,0 +1,66 @@
 """Schemas for the Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
 Derived views (computed-not-stored): nothing here is persisted; every match is recomputed from the
 input delta + injected journey signatures each call. No new corpus, no graph (freeze v1.0).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import List, Optional
 from pydantic import BaseModel, Field
 class JourneySignature(BaseModel):
    """A known journey described ONLY by its capability pattern (Input cluster -> Output cluster).
    Deliberately certificate-/regulation-agnostic: the match uses `capability_pattern` alone. `label`
    and the context fields exist for the human-auditable explanation, NEVER for the score. (Today the
    signatures are derived from the transition patterns; the IDs like "ISO27001->CRA" are just one way
    to describe the clusters — the matcher never reads them.)
    """
    journey_id: str
    label: str
    capability_pattern: List[str] = Field(default_factory=list)     # OUTPUT cluster: the delta this journey is about
    assumed_capabilities: List[str] = Field(default_factory=list)   # INPUT cluster: typically already present
    industry: Optional[str] = None
    product_type: Optional[str] = None
    target_type: Optional[str] = None        # context only: regulation / certification / contract / environmental
 class MatchContext(BaseModel):
    """Optional corroborating context — surfaced as documented reasons, never part of the score."""
    industry: Optional[str] = None
    product_type: Optional[str] = None
    target_type: Optional[str] = None
 class JourneyMatchReason(BaseModel):
    """The auditable WHY behind one match — everything a reviewer needs, no opaque score."""
    matched_capabilities: List[str] = Field(default_factory=list)   # delta INTERSECT pattern (what it explains)
    unexplained_delta: List[str] = Field(default_factory=list)      # delta - pattern (what it does NOT explain)
    journey_only: List[str] = Field(default_factory=list)           # pattern - delta (journey covers, not needed here)
    context_signals: List[str] = Field(default_factory=list)        # "gleiche Zielart", "gleiche Branche", ...
 class JourneyMatch(BaseModel):
    """One known journey, ranked by how much of the delta it EXPLAINS (not how well it 'fits')."""
    journey_id: str
    label: str
    score: float = 0.0                       # |delta INTERSECT pattern| / |delta|, 0..1: share of the delta explained
    explains: str = ""                       # "8 von 10 fehlenden Capabilities"
    reason: JourneyMatchReason
 class JourneyMatchResult(BaseModel):
    """Ranked known journeys that EXPLAIN a Capability Delta. Journey = explanation, not cause."""
    delta_size: int = 0
    matches: List[JourneyMatch] = Field(default_factory=list)       # ranked desc by score
    best: Optional[JourneyMatch] = None
    headline: str = ""
@@ -0,0 +1,23 @@
 """Knowledge Intake — classify an incoming document and assess its impact on existing knowledge.
 The stage BEFORE the parser: no content extraction, only Einordnung. Intersects a document's signals
 (regulations + keywords) with an index of the existing knowledge to emit a `KnowledgePackage` — which
 capabilities / playbooks / patterns / reference scenarios / obligations it probably touches, whether
 it is a new domain, and how much review it warrants. Deterministic, no LLM, no new corpus (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import assess_document_impact, build_knowledge_index
 from .schemas import (
    DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage,
 )
 __all__ = [
    "build_knowledge_index",
    "assess_document_impact",
    "DocumentDescriptor",
    "KnowledgeIndex",
    "KnowledgePackage",
    "ImpactLevel",
 ]
@@ -0,0 +1,111 @@
 """Knowledge Intake — classify a document and assess its impact on existing knowledge.
 The real Knowledge Production is not writing — it is TARGETED UPDATING: when 20 documents arrive,
 which 5 actually change our knowledge and which 15 are ignorable? Intake answers this deterministically
 by intersecting a document's signals (declared regulations + keywords) with an index of the existing
 knowledge (capabilities, playbooks, transition patterns, reference scenarios, injected obligations).
 It performs NO content extraction (that is the later parser stage) and uses NO LLM.
 Pipeline: Knowledge Intake -> Knowledge Package -> Parser -> Draft Generator -> Review -> Published.
 Pure, deterministic, computed-not-stored. No new corpus/meta-model class (freeze v1.0). Python 3.9.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional, Set
 from .schemas import DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage
 def _targets(goal_to: Any) -> List[str]:
    """Extract target regulations from a transition_goal.to (single dict OR list of targets)."""
    out: List[str] = []
    items = goal_to if isinstance(goal_to, list) else [goal_to]
    for it in items:
        if isinstance(it, dict):
            reg = it.get("regulation") or it.get("target") or it.get("framework")
            if reg:
                out.append(str(reg))
    return out
 def build_knowledge_index(
    patterns: List[Dict[str, Any]],
    playbooks: List[Dict[str, Any]],
    reference_scenarios: List[Dict[str, Any]],
    obligation_index: Optional[Dict[str, List[str]]] = None,
 ) -> KnowledgeIndex:
    """Assemble the matching index from already-loaded knowledge dicts (file I/O stays in the caller)."""
    tp: Dict[str, List[str]] = {}
    cap_regs: Dict[str, List[str]] = {}
    for p in patterns:
        pid = str(p.get("id", ""))
        targets = _targets(p.get("transition_goal", {}).get("to"))
        if pid:
            tp[pid] = targets
        for item in list(p.get("likely_covered", [])) + list(p.get("delta_requirements", [])):
            cap = item.get("capability")
            if not cap:
                continue
            regs = [str(t) for t in item.get("covers_targets", [])] or targets
            cap_regs.setdefault(str(cap), [])
            cap_regs[str(cap)] = sorted(set(cap_regs[str(cap)]) | set(regs))
    rts = {str(r.get("id", "")): _targets(r.get("transition_goal", {}).get("to")) for r in reference_scenarios}
    rts.pop("", None)
    obl = obligation_index or {}
    regulations = sorted(
        {t for ts in tp.values() for t in ts}
        | {t for ts in rts.values() for t in ts}
        | {t for ts in cap_regs.values() for t in ts}
        | set(obl.keys())
    )
    return KnowledgeIndex(
        regulations=regulations, capability_regulations=cap_regs,
        playbook_capabilities=sorted({str(pb.get("capability_id", "")) for pb in playbooks} - {""}),
        transition_patterns=tp, reference_scenarios=rts, obligation_index=dict(obl),
    )
 def _kw_match(keywords: Set[str], capability: str) -> bool:
    tokens = set(capability.lower().split("_"))
    return bool(keywords & tokens) or capability.lower() in keywords
 def assess_document_impact(descriptor: DocumentDescriptor, index: KnowledgeIndex) -> KnowledgePackage:
    """Classify the document and compute which existing knowledge it probably touches, and how much."""
    doc_regs = set(descriptor.regulations)
    known = set(index.regulations)
    unknown = sorted(doc_regs - known)
    new_domain = bool(doc_regs) and not (doc_regs & known)
    kw = {k.lower() for k in descriptor.keywords}
    caps = sorted(c for c, regs in index.capability_regulations.items() if (set(regs) & doc_regs) or _kw_match(kw, c))
    playbooks = sorted(set(caps) & set(index.playbook_capabilities))
    patterns = sorted(pid for pid, regs in index.transition_patterns.items() if set(regs) & doc_regs)
    scenarios = sorted(rid for rid, regs in index.reference_scenarios.items() if set(regs) & doc_regs)
    obligations = sorted({o for r in doc_regs for o in index.obligation_index.get(r, [])})
    total = len(caps) + len(playbooks) + len(patterns) + len(scenarios) + len(obligations)
    if new_domain:
        level, rec = ImpactLevel.NEW_DOMAIN, "Neue Domäne — Corpus-Intake nötig (kein bestehendes Wissen betroffen)."
    elif total == 0:
        level, rec = ImpactLevel.NONE, "Wahrscheinlich ignorierbar — betrifft keinen bekannten Wissensbaustein."
    elif len(caps) >= 3 or playbooks or len(obligations) >= 5:
        level, rec = ImpactLevel.HIGH, "Gezielter Review priorisieren — hoher Impact auf bestehendes Wissen."
    else:
        level, rec = ImpactLevel.LOW, "Gezielter Review — geringer, eingegrenzter Impact."
    summary = "Betrifft %d Capabilities, %d Playbooks, %d Patterns, %d Reference Scenarios, %d Obligations; %s." % (
        len(caps), len(playbooks), len(patterns), len(scenarios), len(obligations),
        "NEUE Domäne" if new_domain else "keine neue Domäne",
    )
    return KnowledgePackage(
        document_id=descriptor.document_id,
        classification={"regulations": sorted(doc_regs), "keywords": sorted(kw),
                        "document_type": [descriptor.document_type] if descriptor.document_type else []},
        new_domain=new_domain, unknown_regulations=unknown,
        affected_capabilities=caps, affected_playbooks=playbooks,
        affected_transition_patterns=patterns, affected_reference_scenarios=scenarios,
        affected_obligations=obligations, impact_level=level,
        impact_summary=summary, recommendation=rec,
    )
@@ -0,0 +1,62 @@
 """Schemas for Knowledge Intake — classify a new document and assess its IMPACT (no extraction yet).
 Before the parser/draft stages, Intake answers „welche Teile unseres Wissensbestands sind überhaupt
 betroffen?". It does NOT extract content — it only classifies the document and intersects its signals
 with an index of the existing knowledge (capabilities, playbooks, transition patterns, reference
 scenarios, injected obligations) to emit a `KnowledgePackage` (an impact analysis). Deterministic,
 computed-not-stored, no new corpus, no new meta-model class (freeze v1.0). Python 3.9 compatible.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List
 from pydantic import BaseModel, Field
 class ImpactLevel(str, Enum):
    NONE = "none"                # touches nothing known -> likely ignorable
    LOW = "low"                  # touches a little -> targeted review
    HIGH = "high"                # touches a lot -> prioritise review
    NEW_DOMAIN = "new_domain"    # references only unknown regulations -> domain intake
 class DocumentDescriptor(BaseModel):
    """Lightweight signals of an incoming document — NO content body, only classification inputs."""
    document_id: str
    title: str = ""
    source: str = ""                                      # e.g. BSI, ENISA, EU
    document_type: str = ""                               # e.g. guidance, faq, regulation, recommendation
    regulations: List[str] = Field(default_factory=list)  # declared regulations it references
    keywords: List[str] = Field(default_factory=list)     # lightweight topic signals (e.g. sbom)
    product_types: List[str] = Field(default_factory=list)
 class KnowledgeIndex(BaseModel):
    """A deterministic index of the EXISTING knowledge to match an incoming document against."""
    regulations: List[str] = Field(default_factory=list)               # all regulations the corpus knows
    capability_regulations: Dict[str, List[str]] = Field(default_factory=dict)   # capability -> covers_targets
    playbook_capabilities: List[str] = Field(default_factory=list)     # capabilities that HAVE a playbook
    transition_patterns: Dict[str, List[str]] = Field(default_factory=dict)      # pattern_id -> target regulations
    reference_scenarios: Dict[str, List[str]] = Field(default_factory=dict)      # rts_id -> regulations
    obligation_index: Dict[str, List[str]] = Field(default_factory=dict)         # regulation -> obligation ids (INJECTED)
 class KnowledgePackage(BaseModel):
    """The impact analysis for one document — what of our knowledge it probably touches, and how much."""
    document_id: str
    classification: Dict[str, List[str]] = Field(default_factory=dict)   # echoed regulations/keywords/types
    new_domain: bool = False
    unknown_regulations: List[str] = Field(default_factory=list)
    affected_capabilities: List[str] = Field(default_factory=list)
    affected_playbooks: List[str] = Field(default_factory=list)
    affected_transition_patterns: List[str] = Field(default_factory=list)
    affected_reference_scenarios: List[str] = Field(default_factory=list)
    affected_obligations: List[str] = Field(default_factory=list)
    impact_level: ImpactLevel = ImpactLevel.NONE
    impact_summary: str = ""
    recommendation: str = ""
@@ -0,0 +1,19 @@
 """Knowledge Production — deterministically prepare the corpus, then curate it.
 The corpus is not written by hand: the Playbook Draft Generator structures drafts from data the
 software already owns (Transition Pattern + leverage + injected Execution controls), leaving the
 practitioner know-how as TODO for expert review. Mirrors the legal pipeline (Parser -> Review).
 Deterministic, no LLM in core, no new corpus, no new meta-model class (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import drafts_from_pattern, generate_playbook_draft
 from .schemas import DraftStatus, PlaybookDraft
 __all__ = [
    "generate_playbook_draft",
    "drafts_from_pattern",
    "PlaybookDraft",
    "DraftStatus",
 ]
@@ -0,0 +1,91 @@
 """Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review).
 Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge:
 new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review ->
 versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the
 software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets,
 expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps /
 how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step).
 Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline,
 advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
 from .schemas import DraftStatus, PlaybookDraft
 _SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"]   # practitioner know-how — expert/offline-propose
 _DISCLAIMER = (
    "Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + "
    "injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) "
    "und Statuswechsel draft_generated -> reviewed -> validated."
 )
 def generate_playbook_draft(
    capability_id: str,
    requirement: Optional[Dict[str, Any]] = None,
    control_links: Optional[List[str]] = None,
 ) -> PlaybookDraft:
    """Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic).
    `requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned
    fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected
    Execution controls (default empty — no Execution data in the draft generator).
    """
    req = requirement or {}
    why = str(req.get("why_asked") or req.get("missing_because") or "")
    closes = sorted({str(t) for t in req.get("covers_targets", [])})
    evidence = [str(e) for e in req.get("expected_evidence", [])]
    controls = list(control_links or [])
    provenance: Dict[str, str] = {}
    todo: List[str] = []
    if why:
        provenance["why"] = "transition_pattern:why_asked"
    else:
        todo.append("why")
    if closes:
        provenance["closes_regulations"] = "leverage:covers_targets"
    if evidence:
        provenance["expected_evidence"] = "transition_pattern:expected_evidence"
    else:
        todo.append("expected_evidence")
    if controls:
        provenance["typical_controls"] = "execution:control_links"
    todo.extend(_SOFT_FIELDS)   # always expert-owned
    return PlaybookDraft(
        capability_id=capability_id,
        status=DraftStatus.DRAFT_GENERATED,
        title=capability_id.replace("_", " "),
        why=why,
        closes_regulations=closes,
        expected_evidence=evidence,
        typical_controls=controls,
        provenance=provenance,
        todo=todo,
        disclaimer=_DISCLAIMER,
    )
 def drafts_from_pattern(
    pattern: Dict[str, Any],
    control_links_by_cap: Optional[Dict[str, List[str]]] = None,
 ) -> List[PlaybookDraft]:
    """Assemble one playbook draft per delta capability of a transition/convergence pattern.
    This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing
    capability, ready for expert review. Deterministic + order-preserving (pattern order).
    """
    links = control_links_by_cap or {}
    drafts: List[PlaybookDraft] = []
    for d in pattern.get("delta_requirements", []):
        cap = d.get("capability")
        if not cap:
            continue
        drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap))))
    return drafts
@@ -0,0 +1,46 @@
 """Schemas for Knowledge Production — deterministic draft assembly + lifecycle.
 The corpus is no longer written by hand: it is deterministically PREPARED from data the software
 already owns (Capability, Transition Pattern, Controls, Evidence, leverage), then curated by an
 expert. A `PlaybookDraft` is a machine-assembled skeleton with per-field provenance and an explicit
 TODO list of what still needs human (or offline-propose) input. No LLM in the deterministic core.
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List
 from pydantic import BaseModel, Field
 class DraftStatus(str, Enum):
    """Freigabestatus — the knowledge lifecycle from machine draft to proven (mirrors the
    transition-pattern / playbook maturity, with a machine-assembled pre-stage)."""
    DRAFT_GENERATED = "draft_generated"   # machine-assembled, NOT yet expert-touched
    IN_REVIEW = "in_review"               # an expert is curating it
    REVIEWED = "reviewed"                 # internally reviewed
    VALIDATED = "validated"               # domain expert confirmed
    PROVEN = "proven"                     # confirmed in the field
 class PlaybookDraft(BaseModel):
    """A deterministically assembled playbook draft for one capability.
    Owned fields (why / closes_regulations / expected_evidence / typical_controls) are filled from
    existing data with provenance; the practitioner know-how (tools / process_steps / how_others)
    is left as TODO. The expert reviews a draft instead of writing from a blank page.
    """
    capability_id: str
    status: DraftStatus = DraftStatus.DRAFT_GENERATED
    title: str = ""
    why: str = ""                                          # from the transition pattern (why_asked/missing_because)
    closes_regulations: List[str] = Field(default_factory=list)   # from leverage (covers_targets)
    expected_evidence: List[str] = Field(default_factory=list)    # from the transition pattern
    typical_controls: List[str] = Field(default_factory=list)     # injected from Execution (may be empty)
    provenance: Dict[str, str] = Field(default_factory=dict)      # field -> source it was assembled from
    todo: List[str] = Field(default_factory=list)          # fields the expert/offline-propose must still add
    disclaimer: str = ""                                   # machine draft, requires expert curation
@@ -0,0 +1,29 @@
 """Product Regulatory Navigator — thin missing-facts layer.
 Sits above the CanonicalProductRegulatoryProfile (prefilled from company-profile /
 ProductWizard) and reports only which facts are still missing + prioritized
 questions to collect them. It decides which facts are needed, NOT what regulation
 applies — that stays with the Scope Engine (step 3). No regulation logic, no UI,
 no Go, no RAG.
 """
 from __future__ import annotations
 from .engine import CompletenessSummary, NavigatorResult, apply_answers, navigate
 from .questions import (
    QUESTION_CATALOG,
    AnswerType,
    NavigatorQuestion,
    QuestionPriority,
 )
 __all__ = [
    "navigate",
    "apply_answers",
    "NavigatorResult",
    "CompletenessSummary",
    "NavigatorQuestion",
    "AnswerType",
    "QuestionPriority",
    "QUESTION_CATALOG",
 ]
@@ -0,0 +1,116 @@
 """Product Regulatory Navigator engine — missing-facts only.
 `navigate(profile)` reports which canonical fields are still unknown and the
 prioritized questions to fill them. `apply_answers(profile, answers)` returns the
 updated profile. It NEVER decides what applies — that is the Scope Engine (step 3).
 Pure field-presence checking; no scope-engine import, no regulation evaluation.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Type
 from pydantic import BaseModel, Field
 from compliance.profile.canonical import (
    CanonicalLifecyclePhase,
    CanonicalProductRegulatoryProfile,
    EconomicOperatorRole,
    ProductComponent,
 )
 from .questions import QUESTION_CATALOG, NavigatorQuestion, QuestionPriority
 _ENUM_FIELDS: Dict[str, Type[Any]] = {
    "economic_operator_role": EconomicOperatorRole,
    "lifecycle_phase": CanonicalLifecyclePhase,
 }
 class CompletenessSummary(BaseModel):
    total_relevant: int
    answered: int
    missing: int
    missing_by_priority: Dict[str, int] = Field(default_factory=dict)
    ready_for_scope: bool  # True once no P0 fact is missing
    note: str = ""
 class NavigatorResult(BaseModel):
    missing_facts: List[str] = Field(default_factory=list)  # canonical target fields
    suggested_questions: List[NavigatorQuestion] = Field(default_factory=list)
    completeness_summary: CompletenessSummary
 def _value(profile: CanonicalProductRegulatoryProfile, dotted: str) -> Any:
    if "." in dotted:
        head, tail = dotted.split(".", 1)
        return getattr(getattr(profile, head), tail, None)
    return getattr(profile, dotted, None)
 def _is_unknown(profile: CanonicalProductRegulatoryProfile, q: NavigatorQuestion) -> bool:
    value = _value(profile, q.target_field)
    if value is None:
        return True
    if isinstance(value, list) and not value:
        return True
    return False
 def navigate(profile: CanonicalProductRegulatoryProfile) -> NavigatorResult:
    missing = [q for q in QUESTION_CATALOG if _is_unknown(profile, q)]
    missing.sort(key=lambda q: q.order())
    by_priority: Dict[str, int] = {}
    for q in missing:
        by_priority[q.priority.value] = by_priority.get(q.priority.value, 0) + 1
    ready = QuestionPriority.P0.value not in by_priority
    total = len(QUESTION_CATALOG)
    summary = CompletenessSummary(
        total_relevant=total,
        answered=total - len(missing),
        missing=len(missing),
        missing_by_priority=by_priority,
        ready_for_scope=ready,
        note=(
            "%d von %d Fakten vorhanden; %d offen. Scope-Engine startklar: %s."
            % (total - len(missing), total, len(missing), "ja" if ready else "nein (P0 fehlt)")
        ),
    )
    return NavigatorResult(
        missing_facts=[q.target_field for q in missing],
        suggested_questions=missing,
        completeness_summary=summary,
    )
 def _coerce(q: NavigatorQuestion, value: Any) -> Any:
    if q.target_field in _ENUM_FIELDS:
        return _ENUM_FIELDS[q.target_field](value)
    if q.target_field == "components":
        return [c if isinstance(c, ProductComponent) else ProductComponent(**c) for c in (value or [])]
    if q.answer_type.value in {"country_list", "multiselect"}:
        return list(value or [])
    if q.answer_type.value == "bool":
        return bool(value)
    return value
 def apply_answers(
    profile: CanonicalProductRegulatoryProfile, answers: Dict[str, Any]
 ) -> CanonicalProductRegulatoryProfile:
    updated = profile.model_copy(deep=True)
    by_id = {q.question_id: q for q in QUESTION_CATALOG}
    for question_id, raw in answers.items():
        q = by_id.get(question_id)
        if q is None or raw is None:
            continue
        value = _coerce(q, raw)
        if "." in q.target_field:
            head, tail = q.target_field.split(".", 1)
            setattr(getattr(updated, head), tail, value)
        else:
            setattr(updated, q.target_field, value)
    return updated
@@ -0,0 +1,171 @@
 """Product Regulatory Navigator — question catalog.
 The Navigator is a THIN missing-facts layer over CanonicalProductRegulatoryProfile.
 It does NOT decide what applies — `regulatory_domains_unblocked` is static metadata
 (which domains a fact would help the Scope Engine decide later), never an
 evaluation. No regulation logic, no UI, no Go, no RAG.
 `NavigatorQuestion` is an interaction type, NOT a compliance-meta-model class
 (architecture freeze v1.0 untouched).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List
 from pydantic import BaseModel, Field
 from compliance.profile.canonical import CanonicalLifecyclePhase, EconomicOperatorRole
 class AnswerType(str, Enum):
    BOOL = "bool"
    ENUM = "enum"
    MULTISELECT = "multiselect"
    TEXT = "text"
    COUNTRY_LIST = "country_list"
    COMPONENT_LIST = "component_list"
 class QuestionPriority(str, Enum):
    P0 = "P0"  # blocks scope: EU-vs-not, role, lifecycle, machine/component
    P1 = "P1"  # unblocks a specific domain: RED, Data Act, environment, security
    P2 = "P2"  # refinement: structured BOM
 _PRIORITY_ORDER = {QuestionPriority.P0: 0, QuestionPriority.P1: 1, QuestionPriority.P2: 2}
 class NavigatorQuestion(BaseModel):
    question_id: str
    target_field: str  # dotted path into the canonical profile
    label: str
    why_needed: str
    regulatory_domains_unblocked: List[str] = Field(default_factory=list)
    answer_type: AnswerType
    options: List[str] = Field(default_factory=list)
    priority: QuestionPriority
    def order(self) -> int:
        return _PRIORITY_ORDER[self.priority]
 _ROLE_OPTIONS = [e.value for e in EconomicOperatorRole]
 _PHASE_OPTIONS = [e.value for e in CanonicalLifecyclePhase]
 QUESTION_CATALOG: List[NavigatorQuestion] = [
    # ── P0: block the scope decision itself ───────────────────────────
    NavigatorQuestion(
        question_id="markets",
        target_field="markets",
        label="In welche Märkte / Länder liefern Sie das Produkt?",
        why_needed="Bestimmt EU- vs. Nicht-EU-Anwendbarkeit und nationale Pflichten.",
        regulatory_domains_unblocked=["cyber", "machine_safety", "data", "radio", "emv", "environment"],
        answer_type=AnswerType.COUNTRY_LIST,
        priority=QuestionPriority.P0,
    ),
    NavigatorQuestion(
        question_id="economic_operator_role",
        target_field="economic_operator_role",
        label="Welche Rolle nehmen Sie ein?",
        why_needed="Pflichten hängen von der Rolle ab (Hersteller/Importeur/Händler/Betreiber/Service).",
        regulatory_domains_unblocked=["cyber", "machine_safety", "data"],
        answer_type=AnswerType.ENUM,
        options=_ROLE_OPTIONS,
        priority=QuestionPriority.P0,
    ),
    NavigatorQuestion(
        question_id="lifecycle_phase",
        target_field="lifecycle_phase",
        label="In welcher Lebenszyklusphase betrachten Sie das Produkt?",
        why_needed="Manche Pflichten greifen nur beim Inverkehrbringen oder in der Wartung.",
        regulatory_domains_unblocked=["cyber", "machine_safety"],
        answer_type=AnswerType.ENUM,
        options=_PHASE_OPTIONS,
        priority=QuestionPriority.P0,
    ),
    NavigatorQuestion(
        question_id="is_machine",
        target_field="is_machine",
        label="Ist das Produkt eine (vollständige) Maschine?",
        why_needed="Entscheidet die Anwendbarkeit der Maschinenverordnung.",
        regulatory_domains_unblocked=["machine_safety"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P0,
    ),
    NavigatorQuestion(
        question_id="is_component",
        target_field="is_component",
        label="Ist das Produkt ein Bauteil / eine unvollständige Maschine?",
        why_needed="Sicherheitsbauteil vs. vollständige Maschine ändert die Pflichten.",
        regulatory_domains_unblocked=["machine_safety"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P0,
    ),
    # ── P1: unblock one specific domain ───────────────────────────────
    NavigatorQuestion(
        question_id="has_radio_module",
        target_field="has_radio_module",
        label="Enthält das Produkt ein Funkmodul (WLAN/Bluetooth/Mobilfunk)?",
        why_needed="Ein Funkmodul löst die Funkanlagen-Richtlinie (RED) aus.",
        regulatory_domains_unblocked=["radio"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="generates_usage_data",
        target_field="generates_usage_data",
        label="Erzeugt das vernetzte Produkt nutzbare Produkt-/Nutzungsdaten?",
        why_needed="Erzeugte Nutzungsdaten entscheiden über Data-Act-Pflichten.",
        regulatory_domains_unblocked=["data"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="has_security_function",
        target_field="has_security_function",
        label="Hat das Produkt eine dedizierte Security-Funktion (gegen böswillige Akteure)?",
        why_needed="Trennt Security- von Safety-Funktion (CRA vs. MaschinenVO).",
        regulatory_domains_unblocked=["cyber", "machine_safety"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="env_wastewater",
        target_field="environmental.discharges_to_wastewater",
        label="Gibt das Produkt Stoffe an Wasser / Abwasser ab?",
        why_needed="Abwassereinleitung löst Abwasser-/Gewässerrecht aus.",
        regulatory_domains_unblocked=["environment_water"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="env_air",
        target_field="environmental.emits_to_air",
        label="Entstehen Luftemissionen (VOC, Staub, Verbrennung, Aerosole)?",
        why_needed="Luftemissionen lösen Immissionsschutzrecht aus.",
        regulatory_domains_unblocked=["environment_air"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="env_chemicals",
        target_field="environmental.uses_cleaning_chemicals",
        label="Werden Reinigungs-, Desinfektions- oder Biozidmittel verwendet/mitgeliefert?",
        why_needed="Chemikalien lösen REACH/CLP/Detergenzien-/Biozidrecht aus.",
        regulatory_domains_unblocked=["chemicals"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    # ── P2: refinement ────────────────────────────────────────────────
    NavigatorQuestion(
        question_id="components",
        target_field="components",
        label="Aus welchen wesentlichen Komponenten besteht das Produkt?",
        why_needed="Eine strukturierte Stückliste verfeinert komponenten-abgeleitete Pflichten.",
        regulatory_domains_unblocked=["radio", "emv", "environment_water", "chemicals"],
        answer_type=AnswerType.COMPONENT_LIST,
        priority=QuestionPriority.P2,
    ),
 ]
@@ -0,0 +1,72 @@
 """Smart Onboarding Advisor — the onboarding runtime step (orchestration over existing engines).
 Turns (company + products + certifications + target) into inferred assumptions, the next best questions
 (<=5, each self-explaining), the capability delta, top measures, evidence requests and completeness —
 with NO sales interpretation and NO regulation picking. Orchestrator only: no new engine/registry/
 meta-model; certificate->capability hypotheses and target requirements are INJECTED.
 """
 from __future__ import annotations
 from .engine import advisor_start, apply_answer
 from .hypotheses import (
    CapabilityHypothesis,
    inferred_hypotheses,
    resolve_for_certifications,
 )
 from .observations import (
    Observation,
    ObservationType,
    empirical_confidence,
    empirical_distribution,
    reviewed,
 )
 from .signals import (
    ProducedSignal,
    SignalVocabularyEntry,
    normalize_signals,
 )
 from .silent_intake import (
    DetectedCapability,
    IntakeSignal,
    ProductFact,
    SignalMapping,
    SilentIntakeResult,
    silent_intake,
 )
 from .schemas import (
    AdvisorMeasure,
    AdvisorQuestion,
    AdvisorResult,
    InferredAssumption,
    OnboardingInput,
    RejectedAssumption,
 )
 __all__ = [
    "advisor_start",
    "apply_answer",
    "OnboardingInput",
    "AdvisorResult",
    "AdvisorQuestion",
    "AdvisorMeasure",
    "InferredAssumption",
    "RejectedAssumption",
    "CapabilityHypothesis",
    "inferred_hypotheses",
    "resolve_for_certifications",
    "Observation",
    "ObservationType",
    "empirical_distribution",
    "empirical_confidence",
    "reviewed",
    "silent_intake",
    "IntakeSignal",
    "SignalMapping",
    "DetectedCapability",
    "ProductFact",
    "SilentIntakeResult",
    "ProducedSignal",
    "SignalVocabularyEntry",
    "normalize_signals",
 ]
@@ -0,0 +1,159 @@
 """Smart Onboarding Advisor — orchestration over the existing engines (the onboarding runtime step).
 The point of the whole platform, made usable: the user types company + products + certifications +
 target, and the system does the rest — no sales interpretation, no regulation picking. This is an
 ORCHESTRATOR, not a new engine: it wires Company 2A (Evidence -> Capability), RS-005 (Capability ->
 Delta), optimization (Delta -> Roadmap) and completeness into one onboarding flow.
 Three principles it must honour (acceptance criteria):
  - Multi-cert works; a profile is built from ALL certificates.
  - relevance(evidence, target): ISO 14001 is NOT falsely relevant to the CRA; ISO 27001/TISAX REDUCE
    questions but satisfy NOTHING automatically (Welt-1 -> verification_required).
  - Only the NEXT BEST questions (<= 5), each explaining WHY; every answer updates the profile.
 Certificate -> probable-capability hypotheses and the target's required capabilities are INJECTED (the
 hypotheses are curated knowledge, not in this code). No corpus loaded here. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Sequence
 from ..company import (
    CapabilityMappingEntry,
    Certification,
    CompanyCapabilityProfile,
    CompanyContext,
    build_company_profile,
 )
 from ..completeness import assess_completeness
 from ..optimization import roadmap_from_delta
 from ..reasoning.enums import Confidence
 from ..transition_reasoning import (
    CoverageStatus,
    TargetRequirement,
    TransitionContext,
    TransitionGoal,
    assess_transition,
 )
 from .schemas import (
    AdvisorMeasure,
    AdvisorQuestion,
    AdvisorResult,
    InferredAssumption,
    OnboardingInput,
    RejectedAssumption,
 )
 _GAIN = {"high": 3, "medium": 2, "low": 1}
 _RISK = {"high": 2, "medium": 1, "low": 0}
 def _profile(
    inp: OnboardingInput, cert_hypotheses: Dict[str, List[str]],
    detected: Optional[Sequence[str]] = None,
 ) -> CompanyCapabilityProfile:
    cmap = {
        cert: CapabilityMappingEntry(capability_ids=list(caps), confidence=Confidence.MEDIUM)
        for cert, caps in cert_hypotheses.items()
        if cert in inp.certifications and caps
    }
    certs = [Certification(certification_id=c) for c in cmap]
    if detected:                                            # Silent Pass: concrete findings -> HIGH confidence
        cmap["__detected__"] = CapabilityMappingEntry(
            capability_ids=list(dict.fromkeys(detected)), confidence=Confidence.HIGH)
        certs.append(Certification(certification_id="__detected__"))
    return build_company_profile(CompanyContext(company_id=inp.company or "company", certifications=certs), cmap)
 def advisor_start(
    inp: OnboardingInput,
    cert_hypotheses: Dict[str, List[str]],
    target_requirements: Sequence[TargetRequirement],
    target_id: str = "target",
    covers_targets: Optional[Dict[str, List[str]]] = None,
    corpus_status: Optional[Dict[str, str]] = None,
    uncertain: Optional[List[Dict[str, str]]] = None,
    detected_capabilities: Optional[Sequence[str]] = None,
    indicative_capabilities: Optional[Sequence[str]] = None,
 ) -> AdvisorResult:
    """Run the onboarding flow: (silent intake +) certs -> profile -> delta -> ranked questions + measures.
    Pure orchestration; deterministic. `cert_hypotheses` (cert -> probable cap ids), `target_requirements`
    and `detected_capabilities` (from the Silent Knowledge Pass) are INJECTED. Detected capabilities are
    recognised WITHOUT asking -> they shrink the delta and remove questions.
    """
    covers_targets = covers_targets or {}
    required = {r.capability_id for r in target_requirements}
    profile = _profile(inp, cert_hypotheses, detected_capabilities)
    auto_detected = sorted(set(detected_capabilities or []) & required)
    # partial/indicative signals raise assumption strength but are NOT fed into the profile -> the gap
    # stays open and is still asked. Surface only those still relevant and NOT already auto-detected.
    indications = sorted((set(indicative_capabilities or []) & required) - set(auto_detected))
    assess = assess_transition(
        TransitionContext(company_id=inp.company or "company", target=TransitionGoal(target_id=target_id)),
        list(target_requirements), profile)
    # inferred (Welt-1): per cert, the caps it probably provides that are RELEVANT to this target
    inferred: List[InferredAssumption] = []
    rejected: List[RejectedAssumption] = []
    for cert in inp.certifications:
        caps = set(cert_hypotheses.get(cert, []))
        relevant = sorted(caps & required)
        if relevant:
            inferred.append(InferredAssumption(
                certification=cert, capabilities=relevant,
                statement="%s legt %d relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt"
                % (cert, len(relevant))))
        elif caps:
            rejected.append(RejectedAssumption(
                certification=cert,
                statement="%s ist für dieses Ziel nicht relevant" % cert,
                reason="relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt"))
    # next best questions (<=5): re-rank the RS-005 requests by info gain + leverage + risk + evidence-gap
    known_ev = set(inp.known_evidence)
    scored = []
    for q in assess.question_requests:
        lev = len(covers_targets.get(q.capability_id, []))
        ev_missing = 1 if (q.expected_evidence and not (set(q.expected_evidence) & known_ev)) else 0
        score = _GAIN.get(q.information_gain.value, 1) + lev + _RISK.get(q.priority.value, 0) + ev_missing
        scored.append((score, q))
    scored.sort(key=lambda x: (-x[0], x[1].capability_id))
    next_q = [
        AdvisorQuestion(capability_id=q.capability_id, question_intent=q.question_intent, why=q.reason,
                        information_value=float(s), priority=q.priority.value)
        for s, q in scored[:5]
    ]
    delta = sorted({c.capability_id for c in assess.coverage if c.status == CoverageStatus.MISSING})
    plan = roadmap_from_delta(assess, {c: covers_targets.get(c, []) for c in delta})
    measures = [AdvisorMeasure(capability_id=m.capability_id, leverage=m.leverage, closes=m.covers)
                for m in plan.ranked_measures[:5]]
    evidence = sorted({e for q in assess.question_requests for e in q.expected_evidence})
    applicable = list(inp.target) or [target_id]
    rep = assess_completeness(applicable, corpus_status or {}, uncertain=uncertain or [])
    unsupported = [e.subject for e in rep.exclusions]
    probably = [c for c in assess.summary.probably_covered if c not in set(auto_detected)]
    return AdvisorResult(
        inferred_assumptions=inferred, rejected_assumptions=rejected, auto_detected=auto_detected,
        indications=indications,
        next_best_questions=next_q, capability_delta=delta, top_measures=measures,
        evidence_requests=evidence, unsupported_domains=unsupported,
        completeness_summary=rep.completeness_summary,
        headline="%d Anforderungen erkannt · %d automatisch erkannt (Intake) · %d wahrscheinlich (Zertifikate) · %d zu klären"
        % (len(assess.coverage), len(auto_detected), len(probably), len(next_q)))
 def apply_answer(known_capabilities: Sequence[str], capability_id: str, answer: str) -> List[str]:
    """Update the known-capability set from one answer. `answer` in {confirmed, rejected, unknown}.
    A confirmed answer adds the capability to the known set (shrinking the delta on the next run);
    rejected/unknown leave it open. This is how every answer updates the profile (criterion 6).
    """
    known = list(dict.fromkeys(known_capabilities))
    if answer == "confirmed" and capability_id not in known:
        known.append(capability_id)
    return known
@@ -0,0 +1,54 @@
 """Certification Capability Hypotheses — capability-centric, with EMPIRICAL (computed) confidence.
 Each hypothesis is its own knowledge object: "IF a company holds one of `supported_by` certs, we EXPECT
 `capability` (verification required)" — Welt-1, never "erfüllt". Written ONCE per capability with a list
 of supporting certs (reuse, not redundancy), so multi-certification merges AUTOMATICALLY.
 `confidence` is NOT an expert/LLM score: it is COMPUTED from real-onboarding observations
 (confirmed / (confirmed+refuted)), `None` until any are seen. This is the empirical learning loop — the
 long-term moat. The library is DATA, loaded outside this module and injected. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Sequence
 from pydantic import BaseModel, Field
 class CapabilityHypothesis(BaseModel):
    """Curated knowledge only. Confidence is NOT stored here — it is computed from the reviewed
    observation stream (see observations.py); a raw answer never changes a hypothesis (review gate)."""
    id: str
    capability: str
    supported_by: List[str] = Field(default_factory=list)        # certifications that suggest this capability
    relationship: str = "supports"                               # supports / partially_supports
    verification_required: bool = True                           # Welt-1: never auto-satisfied
    question_intent: str = "verify_existence"
    expected_evidence: List[str] = Field(default_factory=list)
    kind: str = "shared"                                         # shared / specific
 def inferred_hypotheses(
    certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
 ) -> List[CapabilityHypothesis]:
    """Every hypothesis whose `supported_by` intersects the company's certs — the auto multi-cert merge."""
    certs = set(certifications)
    return [h for h in library if certs & set(h.supported_by)]
 def resolve_for_certifications(
    certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
 ) -> Dict[str, List[str]]:
    """Adapt the capability-centric library to the Advisor's `cert -> [capability]` input.
    For each held certification, the capabilities its hypotheses suggest (deduped, deterministic order).
    """
    certs = set(certifications)
    out: Dict[str, List[str]] = {}
    for h in library:
        for cert in h.supported_by:
            if cert in certs and h.capability not in out.setdefault(cert, []):
                out[cert].append(h.capability)
    return {c: out[c] for c in sorted(out)}
@@ -0,0 +1,85 @@
 """Observation Model — the empirical learning unit (Task 59a: model BEFORE persistence/API).
 The learning point is NOT the hypothesis, it is the QUESTION. A hypothesis ("ISO 27001 suggests supplier
 management") produces a question ("Is there a documented supplier-security process?"), and the answer is
 rarely binary — "yes" / "no" / "partial, only critical suppliers" / "certified but not lived" are very
 different observations. So the chain is:
    Hypothesis -> Question -> Observation -> (Review) -> Hypothesis
 Two principles (durable):
  - Richer than confirmed/refuted: an Observation carries an `observation_type` (confirmed / partial /
    refuted / not_applicable / unknown), a free-text answer, a scope_note ("only critical suppliers"),
    and whether evidence was uploaded.
  - REVIEW GATE: a raw answer NEVER changes a hypothesis directly. Only REVIEWED observations calibrate;
    otherwise the system learns from outliers. Hypotheses stay curated knowledge; confidence is COMPUTED
    from the reviewed observation stream (keyed by hypothesis id), not stored on the hypothesis.
 This module defines the model + the deterministic statistics it enables (a DISTRIBUTION, not a single
 %). Persistence (store), aggregation across customers and hypothesis calibration are later tasks
 (59b/c/d). Pure, no I/O. Python 3.9 compatible.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List, Optional, Sequence
 from pydantic import BaseModel, Field
 class ObservationType(str, Enum):
    CONFIRMED = "confirmed"
    PARTIAL = "partial"
    REFUTED = "refuted"
    NOT_APPLICABLE = "not_applicable"
    UNKNOWN = "unknown"
 class Observation(BaseModel):
    """One real-onboarding answer to one hypothesis-driven question. The raw empirical unit."""
    hypothesis_id: str
    capability: str = ""                                # denormalised for convenient aggregation
    question: str = ""                                  # the question that was actually asked
    answer: str = ""                                    # the customer's raw answer (free text)
    observation_type: ObservationType = ObservationType.UNKNOWN
    scope_note: Optional[str] = None                    # "only critical suppliers" / "only DE" / "not lived"
    evidence_uploaded: bool = False
    reviewed: bool = False                              # the review gate: only reviewed obs calibrate
    reviewed_by: Optional[str] = None
 # observation types that count as evidence for/against the capability (n/a + unknown do not)
 _FOR_AGAINST = (ObservationType.CONFIRMED, ObservationType.PARTIAL, ObservationType.REFUTED)
 def empirical_distribution(
    observations: Sequence[Observation], reviewed_only: bool = True
 ) -> Dict[str, int]:
    """Count observations per type — the DISTRIBUTION (e.g. confirmed 61 / partial 31 / refuted 8),
    far richer than a single percentage. By default only REVIEWED observations count (the review gate)."""
    dist = {t.value: 0 for t in ObservationType}
    for o in observations:
        if o.reviewed or not reviewed_only:
            dist[o.observation_type.value] += 1
    return dist
 def empirical_confidence(
    observations: Sequence[Observation], reviewed_only: bool = True
 ) -> Optional[float]:
    """Confidence from the reviewed stream: (confirmed + 0.5*partial) / (confirmed+partial+refuted).
    `not_applicable` and `unknown` are excluded from the denominator (they are not evidence either way).
    `None` until any for/against observation is reviewed — never an expert/LLM score."""
    dist = empirical_distribution(observations, reviewed_only)
    base = dist[ObservationType.CONFIRMED.value] + dist[ObservationType.PARTIAL.value] + dist[ObservationType.REFUTED.value]
    if base == 0:
        return None
    return round((dist[ObservationType.CONFIRMED.value] + 0.5 * dist[ObservationType.PARTIAL.value]) / base, 2)
 def reviewed(observations: Sequence[Observation]) -> List[Observation]:
    """The calibration set: only reviewed observations (a raw answer never updates a hypothesis)."""
    return [o for o in observations if o.reviewed]
@@ -0,0 +1,64 @@
 """Schemas for the Smart Onboarding Advisor — the onboarding RUNTIME step.
 DTOs only. The Advisor ORCHESTRATES the existing engines (Company 2A, RS-005, optimization,
 completeness) — no new reasoning engine, no new capability registry, no new meta-model. Welt-1
 discipline: a certificate yields PROBABLE capabilities (verification required), never "erfüllt".
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import List, Optional
 from pydantic import BaseModel, Field
 class OnboardingInput(BaseModel):
    company: str = ""
    industry: Optional[str] = None
    products: List[str] = Field(default_factory=list)
    markets: List[str] = Field(default_factory=list)
    certifications: List[str] = Field(default_factory=list)
    known_evidence: List[str] = Field(default_factory=list)
    target: List[str] = Field(default_factory=list)          # informational; the delta uses injected requirements
 class InferredAssumption(BaseModel):
    certification: str
    capabilities: List[str] = Field(default_factory=list)    # RELEVANT-to-target caps the cert probably provides
    verification_required: bool = True                       # Welt-1: never auto-satisfied
    statement: str = ""
 class RejectedAssumption(BaseModel):
    certification: Optional[str] = None
    statement: str = ""
    reason: str = ""                                         # e.g. "relevance(evidence, target) = 0"
 class AdvisorQuestion(BaseModel):
    capability_id: str
    question_intent: str
    why: str                                                 # every question explains itself
    information_value: float = 0.0                           # deterministic rank score
    priority: str = "medium"
 class AdvisorMeasure(BaseModel):
    capability_id: str
    leverage: int = 0
    closes: List[str] = Field(default_factory=list)
 class AdvisorResult(BaseModel):
    inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
    rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
    auto_detected: List[str] = Field(default_factory=list)                     # detected (concrete artifact): recognised w/o asking
    indications: List[str] = Field(default_factory=list)                       # partial signal: raises assumption strength, STILL asked
    next_best_questions: List[AdvisorQuestion] = Field(default_factory=list)   # max 5
    capability_delta: List[str] = Field(default_factory=list)
    top_measures: List[AdvisorMeasure] = Field(default_factory=list)
    evidence_requests: List[str] = Field(default_factory=list)
    unsupported_domains: List[str] = Field(default_factory=list)
    completeness_summary: str = ""
    headline: str = ""                                       # "N erkannt, M wahrscheinlich abgedeckt, K zu klären"
@@ -0,0 +1,73 @@
 """Signal Producer interface + Normalizer — one signal language, but TWO signal KINDS.
 The platform already HAS scanners (website, repo/code, SBOM, security headers, TLS, SPF/DKIM/DMARC,
 document analysis, RAG over uploads, product classification). The Silent Pass does not want a
 WebsiteScanner or a RepoScanner — it wants their UNIFIED output. So every source (a scanner, a PDF
 parser, a tender parser, an OEM spec, an API, or the user) emits the SAME `ProducedSignal`
 {signal_id, source_type, kind, confidence, evidence, provenance}, and `normalize_signals` reduces
 producer-specific ids to ONE canonical signal via a vocabulary (id + aliases + kind) — exactly the
 Requirement-Source / MCAP / regulation-alias pattern. The Silent Pass then never gets per-scanner logic.
 CRITICAL — a signal is one of two KINDS, and they NEVER substitute for each other:
  observation = "I SAW X"          — a repo with an SBOM, a published security.txt, a risk-assessment PDF.
  requirement = "someone DEMANDS X" — a tender clause `requires_sbom`, an OEM spec `supplier_requires_psirt`.
 A demanded SBOM is NOT a present SBOM. `kind` is carried on the canonical VOCABULARY entry (authoritative),
 so even a mislabelled producer signal cannot collapse the two. The Silent Pass consumes ONLY observations;
 requirement signals are preserved and feed the required-set / prioritisation later. This Observation-vs-
 Requirement split is the very one the Requirements Verification Platform rests on: Observations (reality)
 vs Requirements (targets); their comparison IS the delta. Pure, deterministic, no I/O. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Sequence
 from pydantic import BaseModel, Field
 from .silent_intake import IntakeSignal
 class ProducedSignal(BaseModel):
    """What ANY signal producer emits — the common interface every source agrees on."""
    signal_id: str                              # raw or canonical id the producer used
    source_type: str = ""                       # website / repository / document / product / tender / oem / user / api
    kind: str = ""                              # "observation" | "requirement"; empty -> resolved from the vocabulary
    confidence: float = 1.0
    evidence: Optional[str] = None              # the artifact found (already in hand)
    provenance: str = ""                        # url / filename / tender clause / "customer statement"
 class SignalVocabularyEntry(BaseModel):
    """One canonical signal + its aliases + its KIND (the authoritative observation/requirement label)."""
    id: str
    kind: str = "observation"                   # "observation" (I saw X) | "requirement" (someone DEMANDS X)
    aliases: List[str] = Field(default_factory=list)
 def normalize_signals(
    produced: Sequence[ProducedSignal], vocabulary: Sequence[SignalVocabularyEntry]
 ) -> List[IntakeSignal]:
    """Reduce heterogeneous producer signals to the canonical IntakeSignal stream (alias resolution).
    The canonical vocabulary entry's `kind` is AUTHORITATIVE — a producer cannot relabel a requirement as
    an observation (that is what stops a demanded SBOM from masquerading as a present one). Unknown signal
    ids pass through unchanged (a new producer's signal stays visible, not silently dropped) and keep the
    producer-declared kind (default observation). Deterministic; carries confidence/evidence/provenance.
    """
    alias: Dict[str, str] = {}
    kind_of: Dict[str, str] = {}
    for v in vocabulary:
        alias[v.id] = v.id
        kind_of[v.id] = v.kind
        for a in v.aliases:
            alias[a] = v.id
    out: List[IntakeSignal] = []
    for p in produced:
        canonical = alias.get(p.signal_id, p.signal_id)
        kind = kind_of.get(canonical) or p.kind or "observation"
        out.append(IntakeSignal(
            source=p.source_type, signal=canonical, kind=kind, confidence=p.confidence,
            evidence=p.evidence, provenance=p.provenance))
    return out
@@ -0,0 +1,124 @@
 """Silent Knowledge Pass — recognise everything possible BEFORE asking a single question (Phase 0).
 The Advisor can say "I need 5 answers" but does not yet decide WHAT it can find out by itself. The Silent
 Pass runs first: from signals that existing scanners/parsers already produce (website, repository,
 documents, product data) it deterministically derives capabilities the company demonstrably HAS and
 product facts that drive scope — so every recognised item shrinks the delta and removes a question.
 The customer then experiences "we already recognised 11 of 17 — only these 4 remain" instead of a
 question wall. This is NOT new architecture: it is one orchestration step in front of the Advisor
  Company -> Silent Intake -> Company Profile -> Hypotheses -> Delta -> Top Questions
 All building blocks already exist. SIGNALS are INJECTED (the scanners produce them); the signal->capability
 map is curated DATA, also injected. Pure, deterministic, no I/O. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Sequence, Set
 from pydantic import BaseModel, Field
 class IntakeSignal(BaseModel):
    """A CANONICAL signal the Silent Pass consumes. Producer-agnostic: the same `signal` may have come
    from a website, a repo, a PDF, a tender or the user — normalize_signals() unified them (see signals.py)."""
    source: str                                 # source_type: website / repository / document / product / tender / user
    signal: str                                 # CANONICAL signal id, e.g. "sbom_present"
    kind: str = "observation"                   # "observation" (I saw X) | "requirement" (someone DEMANDS X)
    confidence: float = 1.0                     # carried from the producer
    evidence: Optional[str] = None              # the artifact already in hand
    provenance: str = ""                        # where it came from (url / filename / tender clause) — audit trail
    detail: str = ""                            # free-text (kept for back-compat)
 class SignalMapping(BaseModel):
    """Curated: what a signal lets us conclude. A signal yields a capability OR a product fact."""
    signal: str
    capability: Optional[str] = None            # capability the signal evidences
    relationship: str = "detected"              # detected (concrete artifact) / partial (indicative)
    evidence: Optional[str] = None              # the artifact found (already in hand -> no upload needed)
    product_fact: Optional[str] = None          # e.g. "connected_to_internet"
    fact_value: str = "true"
    rationale: str = ""                         # curated note: WHY only indicative (esp. for partial mappings)
 class DetectedCapability(BaseModel):
    capability: str
    relationship: str = "detected"
    source: str = ""                            # which signal/source detected it (audit trail)
    evidence: Optional[str] = None
    confidence: float = 1.0                     # carried from the producing signal
    provenance: str = ""                        # where the signal came from
 class ProductFact(BaseModel):
    key: str
    value: str = "true"
    source: str = ""
 class SilentIntakeResult(BaseModel):
    detected_capabilities: List[DetectedCapability] = Field(default_factory=list)
    product_facts: List[ProductFact] = Field(default_factory=list)
    evidence_found: List[str] = Field(default_factory=list)
    requirements_seen: List[str] = Field(default_factory=list)   # requirement-kind signals — preserved, NOT present
    summary: str = ""
    def capability_ids(self) -> List[str]:
        """The DETECTED capability ids (relationship == detected) — fed into the Advisor as already-present
        (delta-reducing, not asked). ONLY observation-kind signals reach here (requirements never become a
        present capability); a merely PARTIAL/indicative signal does NOT (see indicative_capability_ids)."""
        return sorted({d.capability for d in self.detected_capabilities if d.relationship == "detected"})
    def indicative_capability_ids(self) -> List[str]:
        """Capabilities backed only by a PARTIAL/indicative signal — they raise assumption strength but do
        NOT replace a question (the gap stays open and is still asked, just with an indication shown)."""
        return sorted({d.capability for d in self.detected_capabilities if d.relationship != "detected"})
 def silent_intake(
    signals: Sequence[IntakeSignal], signal_map: Sequence[SignalMapping]
 ) -> SilentIntakeResult:
    """Derive capabilities + product facts from injected scanner signals (deterministic, no questions).
    Each signal is matched to curated mappings by `signal` id; a mapping contributes either a detected
    capability (+ optional evidence already in hand) or a product fact. Deduped, deterministic order.
    """
    by_signal: Dict[str, List[SignalMapping]] = {}
    for m in signal_map:
        by_signal.setdefault(m.signal, []).append(m)
    caps: Dict[str, DetectedCapability] = {}
    facts: Dict[str, ProductFact] = {}
    evidence: Set[str] = set()
    requirements: Set[str] = set()
    for s in signals:
        if s.kind != "observation":             # a requirement describes a TARGET, never the present state
            requirements.add(s.signal)          # preserved + visible, but NEVER turned into a capability
            continue
        for m in by_signal.get(s.signal, []):
            if m.capability and m.capability not in caps:
                caps[m.capability] = DetectedCapability(
                    capability=m.capability, relationship=m.relationship,
                    source="%s:%s" % (s.source, s.signal), evidence=m.evidence,
                    confidence=s.confidence, provenance=s.provenance)
                if m.evidence:
                    evidence.add(m.evidence)
            if m.product_fact:
                facts[m.product_fact] = ProductFact(key=m.product_fact, value=m.fact_value, source=s.source)
    detected = [caps[k] for k in sorted(caps)]
    product_facts = [facts[k] for k in sorted(facts)]
    requirements_seen = sorted(requirements)
    n_detected = sum(1 for d in detected if d.relationship == "detected")   # concrete artifacts -> auto-detected
    n_indication = len(detected) - n_detected                               # partial -> indication, still asked
    summary = (
        "Stille Vorbefüllung: %d Fähigkeit(en) automatisch erkannt, %d Indikation(en), %d Produktfakt(en), "
        "%d Nachweis(e) bereits vorhanden, %d Anforderung(en) erkannt (nicht als vorhanden gewertet)."
        % (n_detected, n_indication, len(product_facts), len(evidence), len(requirements_seen))
    )
    return SilentIntakeResult(
        detected_capabilities=detected, product_facts=product_facts,
        evidence_found=sorted(evidence), requirements_seen=requirements_seen, summary=summary)
@@ -0,0 +1,21 @@
 """Regulatory Optimization — the Roadmap / Management renderer of the Capability Delta Engine.
 Ranks the OPEN Capability Delta (from RS-005) by regulatory leverage: which measure closes the
 most regulatory requirements at once. Answers the Geschäftsführer question "Womit anfangen?".
 Pure, deterministic, computed-not-stored. Consumes the RS-005 delta (acyclic dependency); the
 delta engine stays hermetic. No new corpus, no new meta-model class (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import regulatory_leverage, roadmap_from_delta, select_within_budget
 from .schemas import BudgetPlan, OptimizationPlan, RankedMeasure
 __all__ = [
    "regulatory_leverage",
    "select_within_budget",
    "roadmap_from_delta",
    "OptimizationPlan",
    "RankedMeasure",
    "BudgetPlan",
 ]
@@ -0,0 +1,134 @@
 """Regulatory Optimization — the Roadmap / Management RENDERER of the Capability Delta Engine.
 GAP analysis and measure-prioritisation are TWO VIEWS OF THE SAME COMPUTATION. The Capability
 Delta Engine (`compliance/transition_reasoning`, RS-005) computes Required - Known = the
 Capability Delta once. Renderers read that ONE delta:
  - Interview Renderer  (missing INFORMATION -> questions)    = `TransitionQuestionRequest` (built)
  - Roadmap / Management Renderer (missing CAPABILITIES -> measures by leverage) = THIS module
  - Evidence Renderer   (missing EVIDENCE -> upload requests) = later
 There is one truth, not a Gap engine and a separate Roadmap engine.
 A measure (a capability to implement) has *regulatory leverage* = the number of distinct
 regulatory requirements it closes AT ONCE (e.g. patch management closes a CRA, a MaschinenVO,
 an IEC 62443 and an ISO 27001 requirement -> leverage 4). The product turns from "you have N
 obligations" into "of N identified requirements you only need M measures — and these K first".
 Fully deterministic, computed-not-stored, NO new corpus. `regulatory_leverage`/`select_within_budget`
 are pure math over `capability -> requirements`; `roadmap_from_delta` binds them to the RS-005
 delta (dependency optimization -> transition_reasoning, acyclic; the delta engine stays hermetic).
 No new graph/meta-model class (freeze v1.0). Python 3.9 compatible.
 Honesty (Welt-1): the percentages are exact count ratios over the IDENTIFIED requirements from
 the known patterns — never "% gesetzeskonform". Label outputs as "der identifizierten Anforderungen".
 """
 from __future__ import annotations
 from typing import Dict, List, Optional
 from ..transition_reasoning import CoverageStatus, TransitionAssessment
 from .schemas import BudgetPlan, OptimizationPlan, RankedMeasure
 def _ranked(
    capability_requirements: Dict[str, List[str]], in_scope: Optional[List[str]]
 ) -> List[RankedMeasure]:
    """Rank measures: leverage desc, then capability_id asc (deterministic). Empty covers dropped."""
    scope = (
        set(in_scope)
        if in_scope is not None
        else {r for reqs in capability_requirements.values() for r in reqs}
    )
    measures: List[RankedMeasure] = []
    for cap, reqs in capability_requirements.items():
        covers = sorted({r for r in reqs if r in scope})
        if not covers:
            continue  # this capability closes nothing in scope -> not a measure here
        measures.append(RankedMeasure(capability_id=cap, covers=covers, leverage=len(covers)))
    measures.sort(key=lambda m: (-m.leverage, m.capability_id))
    total = sum(m.leverage for m in measures)
    running = 0
    for m in measures:
        running += m.leverage
        m.cumulative_requirements = running
        m.cumulative_coverage = (running / total) if total else 0.0
    return measures
 def regulatory_leverage(
    capability_requirements: Dict[str, List[str]], in_scope: Optional[List[str]] = None
 ) -> OptimizationPlan:
    """Rank measures by regulatory leverage; report the compression (requirements -> measures).
    `capability_requirements`: measure (capability_id) -> the requirement keys it satisfies. A
    requirement key is currently a regulation (via `covers_targets`); finer obligation granularity
    is a future extension. `in_scope`: restrict the requirement keys counted (default: all seen).
    """
    measures = _ranked(capability_requirements, in_scope)
    scope = sorted(
        set(in_scope)
        if in_scope is not None
        else {r for reqs in capability_requirements.values() for r in reqs}
    )
    total = sum(m.leverage for m in measures)
    avg = (total / len(measures)) if measures else 0.0
    headline = (
        "%d identifizierte Anforderungen aus %d Regelwerken -> %d Massnahmen (Ø Hebel %.1f)."
        % (total, len(scope), len(measures), avg)
    )
    return OptimizationPlan(
        in_scope_requirements=scope,
        total_measures=len(measures),
        total_requirements=total,
        ranked_measures=measures,
        headline=headline,
    )
 def select_within_budget(
    capability_requirements: Dict[str, List[str]],
    budget: int,
    in_scope: Optional[List[str]] = None,
 ) -> BudgetPlan:
    """The budget answer: with K measures, pick the K highest-leverage ones and report coverage.
    Because each requirement key is closed by exactly one measure here, greedy-by-leverage is the
    optimal cover, so ranking == selection. (When requirements become shared across capabilities,
    this becomes weighted set-cover; the signature is ready for that.)
    """
    measures = _ranked(capability_requirements, in_scope)
    total = sum(m.leverage for m in measures)
    k = max(0, budget)
    selected = measures[:k]
    closed = selected[-1].cumulative_requirements if selected else 0
    ratio = (closed / total) if total else 0.0
    headline = (
        "Mit den Top-%d Massnahmen (nach regulatorischem Hebel) schliessen Sie %d von %d "
        "identifizierten Anforderungen (%.0f%%)." % (len(selected), closed, total, ratio * 100)
    )
    return BudgetPlan(
        budget=budget,
        selected_capabilities=[m.capability_id for m in selected],
        requirements_closed=closed,
        total_requirements=total,
        coverage_ratio=ratio,
        headline=headline,
    )
 def roadmap_from_delta(
    assessment: TransitionAssessment,
    capability_requirements: Dict[str, List[str]],
    in_scope: Optional[List[str]] = None,
    open_statuses: Optional[List[CoverageStatus]] = None,
 ) -> OptimizationPlan:
    """Render the Roadmap view FROM a Capability Delta (an RS-005 `TransitionAssessment`).
    Takes the OPEN capabilities of the delta — MISSING by default — and ranks them by regulatory
    leverage. This is the same delta the Interview Renderer turns into questions; here it becomes
    prioritised measures. The binding that makes "one truth, two renderers" real in code.
    """
    statuses = set(open_statuses) if open_statuses is not None else {CoverageStatus.MISSING}
    open_caps = [c.capability_id for c in assessment.coverage if c.status in statuses]
    delta_reqs = {cap: capability_requirements.get(cap, []) for cap in open_caps}
    return regulatory_leverage(delta_reqs, in_scope)
@@ -0,0 +1,48 @@
 """Schemas for the Regulatory Optimization Engine.
 These DTOs are *derived views* (computed-not-stored): nothing here is persisted; every value
 is recomputed from the input each call. No new meta-model class, no graph (freeze v1.0).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import List
 from pydantic import BaseModel, Field
 class RankedMeasure(BaseModel):
    """One measure (a capability to implement) ranked by its regulatory leverage."""
    capability_id: str
    covers: List[str] = Field(default_factory=list)        # the in-scope requirements it satisfies
    leverage: int = 0                                       # = len(covers): how many it closes at once
    cumulative_requirements: int = 0                        # running total of requirements closed (ranked order)
    cumulative_coverage: float = 0.0                        # cumulative_requirements / total_requirements (0..1)
 class OptimizationPlan(BaseModel):
    """Measures ranked by regulatory leverage — greatest regulatory effect first.
    `total_requirements` counts the IDENTIFIED requirements in scope (the known delta from the
    patterns), NOT a company's total legal duties. The percentages are exact count ratios over
    this identified set — never a compliance verdict (Welt-1 discipline).
    """
    in_scope_requirements: List[str] = Field(default_factory=list)   # the distinct requirement keys counted
    total_measures: int = 0                                          # number of distinct measures (delta capabilities)
    total_requirements: int = 0                                      # Sum of leverage = identified requirements closable
    ranked_measures: List[RankedMeasure] = Field(default_factory=list)
    headline: str = ""                                               # "N identifizierte Anforderungen -> M Massnahmen ..."
 class BudgetPlan(BaseModel):
    """The budget answer: with a budget of K measures, which K and how much do they close?"""
    budget: int = 0
    selected_capabilities: List[str] = Field(default_factory=list)
    requirements_closed: int = 0
    total_requirements: int = 0
    coverage_ratio: float = 0.0                                      # requirements_closed / total_requirements (0..1)
    headline: str = ""
@@ -0,0 +1,20 @@
 """Implementation Playbook — the Berater renderer ("wie komme ich dort hin?").
 For one capability it assembles the full implementation journey (why / closes which regulations /
 tools / process / evidence / controls) from curated playbook knowledge + regulatory leverage +
 injected Execution links. `playbooks_for_plan` chains the Optimization Roadmap into per-measure
 playbooks. Pure, deterministic, computed-not-stored. No new corpus, no new meta-model class
 (freeze v1.0). Curated content = expert draft, never normative.
 """
 from __future__ import annotations
 from .engine import build_playbook, playbooks_for_plan
 from .schemas import Playbook, PlaybookStep
 __all__ = [
    "build_playbook",
    "playbooks_for_plan",
    "Playbook",
    "PlaybookStep",
 ]
@@ -0,0 +1,96 @@
 """Implementation Playbook — the Berater renderer ("wie komme ich dort hin?").
 After the Capability Delta Engine says WHAT is missing and the Optimization renderer says WHICH
 measure first, the Playbook renderer says HOW to implement it. For one capability it assembles the
 full journey from three sources:
  - curated playbook KNOWLEDGE (why / tools / process steps / evidence / how others do it) — the
    Reasoning Knowledge Acquisition layer under `knowledge/implementation_playbooks/`,
  - the regulatory LEVERAGE (which regulations a delivered capability closes) — reused from the
    Optimization renderer,
  - injected Procedure/Control/Evidence links (Execution-owned; empty until linked).
 Pure, deterministic, computed-not-stored. Chains optimization -> playbook (acyclic). No new corpus,
 no new meta-model class (freeze v1.0). Python 3.9 compatible.
 The curated content is an EXPERT DRAFT, never a normative requirement. When no playbook knowledge
 exists for a capability yet, the renderer emits a `status: missing` stub — the honest signal that
 the bottleneck is CONTENT (Knowledge Acquisition), not software.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
 from ..optimization import OptimizationPlan
 from .schemas import Playbook, PlaybookStep
 _MISSING_WHY = "(Playbook-Inhalt fehlt — Knowledge Acquisition offen.)"
 _DRAFT_DISCLAIMER = (
    "Kuratiertes Experten-Wissen (Erstentwurf), KEINE normative Anforderung. Tools/Schritte sind "
    "Empfehlungen, kein Pflichtkatalog; Controls werden aus der Execution-Schicht injiziert."
 )
 def _steps(raw: Any) -> List[PlaybookStep]:
    steps: List[PlaybookStep] = []
    for i, s in enumerate(raw or [], 1):
        steps.append(PlaybookStep(order=i, title=str(s.get("title", "")), detail=str(s.get("detail", ""))))
    return steps
 def build_playbook(
    capability_id: str,
    knowledge: Optional[Dict[str, Any]] = None,
    closes_regulations: Optional[List[str]] = None,
    control_links: Optional[List[str]] = None,
 ) -> Playbook:
    """Assemble the implementation journey for ONE capability.
    `knowledge`: the curated playbook dict (None/empty -> a `missing` stub). `closes_regulations`:
    the regulations a delivered capability closes (leverage, from `covers_targets`). `control_links`:
    Execution-owned control refs, injected (default empty — no Execution data in Reasoning code).
    """
    closes = sorted(set(closes_regulations or []))
    if not knowledge:
        return Playbook(
            capability_id=capability_id, title=capability_id, why=_MISSING_WHY,
            closes_regulations=closes, leverage=len(closes), controls=list(control_links or []),
            status="missing", disclaimer=_DRAFT_DISCLAIMER,
        )
    return Playbook(
        capability_id=capability_id,
        title=str(knowledge.get("title", capability_id)),
        why=str(knowledge.get("why", "")),
        closes_regulations=closes,
        leverage=len(closes),
        tools=list(knowledge.get("tools", [])),
        process_steps=_steps(knowledge.get("process_steps")),
        expected_evidence=list(knowledge.get("expected_evidence", [])),
        controls=list(control_links or []),
        how_others_do_it=str(knowledge.get("how_others_do_it", "")),
        status=str(knowledge.get("status", "draft")),
        disclaimer=str(knowledge.get("disclaimer", _DRAFT_DISCLAIMER)),
    )
 def playbooks_for_plan(
    plan: OptimizationPlan,
    knowledge_by_cap: Dict[str, Dict[str, Any]],
    top_k: Optional[int] = None,
    control_links_by_cap: Optional[Dict[str, List[str]]] = None,
 ) -> List[Playbook]:
    """Render playbooks for the highest-leverage measures of an OptimizationPlan (Roadmap -> How).
    Walks the ranked measures (top_k, or all) and builds each capability's playbook, using the
    measure's own `covers` as the regulations it closes. Measures without curated knowledge become
    `missing` stubs — surfacing exactly where playbook content is still owed.
    """
    links = control_links_by_cap or {}
    measures = plan.ranked_measures if top_k is None else plan.ranked_measures[: max(0, top_k)]
    return [
        build_playbook(
            m.capability_id, knowledge_by_cap.get(m.capability_id),
            closes_regulations=m.covers, control_links=links.get(m.capability_id),
        )
        for m in measures
    ]
@@ -0,0 +1,45 @@
 """Schemas for the Implementation Playbook renderer.
 A Playbook is a *derived view* (computed-not-stored): it assembles, for one capability, the full
 "wie komme ich dort hin?" journey from (a) curated playbook KNOWLEDGE, (b) the regulatory leverage
 (which regulations a delivered capability closes), and (c) injected Procedure/Control/Evidence links
 (Execution-owned). Nothing here is persisted. No new meta-model class, no graph (freeze v1.0).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import List
 from pydantic import BaseModel, Field
 class PlaybookStep(BaseModel):
    """One step in the recommended way to stand up a capability."""
    order: int
    title: str
    detail: str = ""
 class Playbook(BaseModel):
    """The complete implementation journey for ONE capability — the Berater view.
    Answers, in order: Warum? -> Welche Regelwerke schliesst das? -> Welche Tools? -> Welche
    Prozesse? -> Welche Nachweise? -> Welche Controls? The curated parts (why/tools/steps/evidence/
    how-others) are an EXPERT DRAFT, not a normative requirement; controls are injected from
    Execution (may be empty until linked).
    """
    capability_id: str
    title: str = ""
    why: str = ""                                          # why this is required (regulatory rationale)
    closes_regulations: List[str] = Field(default_factory=list)   # leverage: regulations a delivered cap closes
    leverage: int = 0                                      # = len(closes_regulations)
    tools: List[str] = Field(default_factory=list)         # typical tooling (curated knowledge)
    process_steps: List[PlaybookStep] = Field(default_factory=list)   # how to stand it up
    expected_evidence: List[str] = Field(default_factory=list)        # artifacts that prove it
    controls: List[str] = Field(default_factory=list)      # control refs (injected from Execution; may be empty)
    how_others_do_it: str = ""                             # "wie machen das andere?" (curated)
    status: str = "draft"                                  # draft -> reviewed -> validated -> proven
    disclaimer: str = ""                                   # expert draft, not a normative requirement
@@ -0,0 +1,26 @@
 """Product-scope orchestration (step 3).
 Connects the Navigator's fact-gate to the existing reasoning `discover_scope`:
 decide regulatory scope only once the minimum (P0) facts are present, otherwise
 return the missing facts. Reuses discover_scope unchanged — no new scope logic.
 """
 from __future__ import annotations
 from .orchestrator import resolve_product_scope
 from .schemas import (
    ProductScopeRequest,
    ProductScopeResponse,
    RegulatoryScopeResult,
    ScopeStatus,
    UnsupportedDomain,
 )
 __all__ = [
    "resolve_product_scope",
    "ProductScopeRequest",
    "ProductScopeResponse",
    "RegulatoryScopeResult",
    "UnsupportedDomain",
    "ScopeStatus",
 ]
@@ -0,0 +1,77 @@
 """Product-scope orchestrator (step 3) — gate, then reuse discover_scope.
 THE rule: the Scope Engine decides only once the Navigator has released the
 minimum facts. If P0 facts are missing, return the missing facts/questions and do
 NOT run discover_scope. Otherwise project the canonical into the reasoning profile
 and run the EXISTING `discover_scope` exactly once.
 No new scope rules, no new regulations, no environmental-law evaluation (those
 domains are surfaced only as unsupported_domains / future_corpus_needed).
 """
 from __future__ import annotations
 from typing import List, Tuple
 from compliance.navigator.engine import navigate
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.profile.to_reasoning import to_reasoning_profile
 from compliance.reasoning.scope_engine import discover_scope
 from .schemas import (
    ProductScopeResponse,
    RegulatoryScopeResult,
    ScopeStatus,
    UnsupportedDomain,
 )
 # environmental trigger field -> (domain, note). Transparency only — not a verdict.
 _ENV_DOMAINS: List[Tuple[str, str, str]] = [
    ("discharges_to_wastewater", "environment_water", "Abwasser-/Gewässerrecht (z. B. AbwV, WRRL) — noch nicht im Korpus."),
    ("has_cooling_or_spraying_water", "environment_water", "Wasserbezogene Anforderungen — noch nicht im Korpus."),
    ("emits_to_air", "environment_air", "Immissionsschutz-/Luftreinhalterecht (z. B. BImSchG, IED) — noch nicht im Korpus."),
    ("uses_solvents", "environment_air", "Lösemittel-/VOC-Recht (z. B. 31. BImSchV) — noch nicht im Korpus."),
    ("uses_cleaning_chemicals", "chemicals", "Chemikalienrecht (REACH/CLP/Detergenzien/Biozide) — noch nicht im Korpus."),
    ("supplies_chemicals", "chemicals", "Chemikalienrecht (REACH/CLP) — noch nicht im Korpus."),
    ("contains_restricted_substances", "chemicals", "Stoffbeschränkungen (REACH/RoHS) — noch nicht im Korpus."),
    ("creates_waste", "waste", "Abfall-/Entsorgungsrecht (u. a. WEEE) — noch nicht im Korpus."),
    ("consumes_energy_or_water", "energy_resources", "Energie-/Ökodesign-Recht — noch nicht im Korpus."),
 ]
 def _unsupported_domains(profile: CanonicalProductRegulatoryProfile) -> List[UnsupportedDomain]:
    env = profile.environmental
    seen = set()
    out: List[UnsupportedDomain] = []
    for field, domain, note in _ENV_DOMAINS:
        if getattr(env, field) is True and domain not in seen:
            seen.add(domain)
            out.append(UnsupportedDomain(domain=domain, trigger=field, note=note))
    return out
 def resolve_product_scope(profile: CanonicalProductRegulatoryProfile) -> ProductScopeResponse:
    nav = navigate(profile)
    if not nav.completeness_summary.ready_for_scope:
        return ProductScopeResponse(
            status=ScopeStatus.NEEDS_FACTS,
            completeness_summary=nav.completeness_summary,
            missing_facts=nav.missing_facts,
            suggested_questions=nav.suggested_questions,
        )
    scope = discover_scope(to_reasoning_profile(profile))  # exactly once
    result = RegulatoryScopeResult(
        applicable_regulations=scope.applicable_regulations,
        excluded_regulations=scope.excluded_regulations,
        uncertain_regulations=scope.uncertain_regulations,
        unsupported_domains=_unsupported_domains(profile),
        reasoning_summary=scope.reasoning_summary,
        confidence=scope.confidence,
    )
    return ProductScopeResponse(
        status=ScopeStatus.RESOLVED,
        completeness_summary=nav.completeness_summary,
        regulatory_scope=result,
    )
@@ -0,0 +1,63 @@
 """Response schemas for the product-scope orchestrator (step 3).
 These are application/API types — NOT compliance-meta-model classes (architecture
 freeze v1.0 untouched). The scope verdict itself is produced by the existing
 `discover_scope`; nothing here adds scope rules.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List, Optional
 from pydantic import BaseModel, Field
 from compliance.navigator.engine import CompletenessSummary
 from compliance.navigator.questions import NavigatorQuestion
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.reasoning.enums import Confidence
 from compliance.reasoning.schemas import (
    ApplicableRegulation,
    ExcludedRegulation,
    UncertainRegulation,
 )
 class ScopeStatus(str, Enum):
    NEEDS_FACTS = "needs_facts"  # P0 facts missing -> ask, do not decide
    RESOLVED = "resolved"  # minimum facts present -> scope decided
 class UnsupportedDomain(BaseModel):
    """A domain the product triggers but the corpus does not yet cover.
    Surfaced for transparency (no false completeness) — NEVER a legal evaluation.
    """
    domain: str
    trigger: str
    status: str = "future_corpus_needed"
    note: str = ""
 class RegulatoryScopeResult(BaseModel):
    applicable_regulations: List[ApplicableRegulation] = Field(default_factory=list)
    excluded_regulations: List[ExcludedRegulation] = Field(default_factory=list)
    uncertain_regulations: List[UncertainRegulation] = Field(default_factory=list)
    unsupported_domains: List[UnsupportedDomain] = Field(default_factory=list)
    reasoning_summary: str = ""
    confidence: Confidence = Confidence.MEDIUM
 class ProductScopeRequest(BaseModel):
    product_profile: CanonicalProductRegulatoryProfile
 class ProductScopeResponse(BaseModel):
    status: ScopeStatus
    completeness_summary: CompletenessSummary
    # case NEEDS_FACTS
    missing_facts: List[str] = Field(default_factory=list)
    suggested_questions: List[NavigatorQuestion] = Field(default_factory=list)
    # case RESOLVED
    regulatory_scope: Optional[RegulatoryScopeResult] = None
@@ -0,0 +1,38 @@
 """Product profile convergence layer.
 ONE canonical product profile (`CanonicalProductRegulatoryProfile`) that the Go
 gap engine and the Python reasoning engine both project from — so "SPS mit
 Remote Access" means the same thing everywhere. gap.ProductProfile leads; the
 reasoning ProductProfile is an adapter/DTO. Types + mappers only — no regulation
 logic, no UI, no new questions.
 """
 from __future__ import annotations
 from .canonical import (
    CanonicalLifecyclePhase,
    CanonicalProductRegulatoryProfile,
    CanonicalProductType,
    ComponentKind,
    EconomicOperatorRole,
    EnvironmentalImpact,
    ProductComponent,
 )
 from .from_company_profile import from_company_profile
 from .from_product_wizard import from_product_wizard
 from .to_gap import to_gap_profile
 from .to_reasoning import to_reasoning_profile
 __all__ = [
    "CanonicalProductRegulatoryProfile",
    "CanonicalProductType",
    "EconomicOperatorRole",
    "CanonicalLifecyclePhase",
    "ComponentKind",
    "ProductComponent",
    "EnvironmentalImpact",
    "from_product_wizard",
    "from_company_profile",
    "to_gap_profile",
    "to_reasoning_profile",
 ]
@@ -0,0 +1,158 @@
 """CanonicalProductRegulatoryProfile — the single semantic product profile.
 Convergence layer (spec 2026-06-26): instead of letting the Go `gap.ProductProfile`
 and the Python reasoning `ProductProfile` drift, ONE canonical type is the source
 of truth. The Go gap engine LEADS (it carries real engine logic), so the canonical
 mirrors gap's field names and adds the Navigator gaps the audit found missing
 (economic-operator role, radio module, generates_usage_data, lifecycle phase,
 structured BOM, safety-vs-security split, machine-vs-component) plus a
 forward-looking Environmental-Impact domain.
 No regulation logic lives here — types only. Mappers live in sibling modules.
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List, Optional
 from pydantic import BaseModel, Field
 class CanonicalProductType(str, Enum):  # mirrors gap.ProductType
    SOFTWARE = "software"
    HARDWARE = "hardware"
    IOT = "iot"
    SAAS = "saas"
    EXCHANGE = "exchange"
    MEDICAL_DEVICE = "medical_device"
    MACHINERY = "machinery"
    OTHER = "other"
 class EconomicOperatorRole(str, Enum):  # CE/CRA role — gap.ProductProfile has none
    MANUFACTURER = "manufacturer"
    IMPORTER = "importer"
    DISTRIBUTOR = "distributor"
    INTEGRATOR = "integrator"
    OPERATOR = "operator"
    SERVICE_PROVIDER = "service_provider"
 class CanonicalLifecyclePhase(str, Enum):
    DEVELOPMENT = "development"
    PLACING_ON_MARKET = "placing_on_market"
    OPERATION = "operation"
    MAINTENANCE = "maintenance"
    UPDATE = "update"
    END_OF_LIFE = "end_of_life"
 class ComponentKind(str, Enum):
    MOTOR = "motor"
    PUMP = "pump"
    HEATING = "heating"
    COOLING = "cooling"
    CONTROLLER = "controller"
    PLC = "plc"
    HMI = "hmi"
    SENSOR = "sensor"
    ACTUATOR = "actuator"
    CAMERA = "camera"
    NETWORK_INTERFACE = "network_interface"
    RADIO_MODULE = "radio_module"
    CHEMICAL_DOSING = "chemical_dosing"
    WATER_INLET = "water_inlet"
    WASTEWATER_OUTLET = "wastewater_outlet"
    BATTERY = "battery"
    OTHER = "other"
 class ProductComponent(BaseModel):
    """One structured BOM node — these nodes are what later trigger domains."""
    name: str
    kind: ComponentKind = ComponentKind.OTHER
    notes: Optional[str] = None
 class EnvironmentalImpact(BaseModel):
    """Forward-looking Umweltmedien-Trigger (own Navigator domain).
    No regulation logic consumes these yet — profile fields only, so the model
    is not blind to wastewater/air/chemicals/waste questions when that domain
    is wired later (AbwV/WRRL/REACH/CLP/IED/BImSchG ...).
    """
    discharges_to_wastewater: Optional[bool] = None
    uses_cleaning_chemicals: Optional[bool] = None
    supplies_chemicals: Optional[bool] = None
    emits_to_air: Optional[bool] = None
    uses_solvents: Optional[bool] = None
    creates_waste: Optional[bool] = None
    contains_restricted_substances: Optional[bool] = None
    consumes_energy_or_water: Optional[bool] = None
    has_cooling_or_spraying_water: Optional[bool] = None
 class CanonicalProductRegulatoryProfile(BaseModel):
    # --- identity ---
    name: str = ""
    description: str = ""
    product_type: Optional[CanonicalProductType] = None
    product_profile_id: Optional[str] = None
    tenant_id: Optional[str] = None
    iace_project_id: Optional[str] = None
    # --- gap-native lists ---
    technologies: List[str] = Field(default_factory=list)
    data_processing: List[str] = Field(default_factory=list)
    markets: List[str] = Field(default_factory=list)  # real list — never hardcoded ['EU']
    existing_certifications: List[str] = Field(default_factory=list)
    applied_norms: List[str] = Field(default_factory=list)
    # --- gap-native product / IST-state booleans (tri-state: None = unknown) ---
    connected_to_internet: Optional[bool] = None
    has_software_updates: Optional[bool] = None
    uses_ai: Optional[bool] = None
    processes_personal_data: Optional[bool] = None
    is_critical_infra_supplier: Optional[bool] = None
    has_risk_assessment: Optional[bool] = None
    has_technical_file: Optional[bool] = None
    has_operating_manual: Optional[bool] = None
    has_sbom: Optional[bool] = None
    has_vuln_management: Optional[bool] = None
    has_update_mechanism: Optional[bool] = None
    has_incident_response: Optional[bool] = None
    has_supply_chain_mgmt: Optional[bool] = None
    ce_marking_since: Optional[str] = None
    product_age: Optional[str] = None
    # --- NEW Navigator-gap fields (audit 2026-06-26) ---
    economic_operator_role: Optional[EconomicOperatorRole] = None
    has_radio_module: Optional[bool] = None
    generates_usage_data: Optional[bool] = None
    lifecycle_phase: Optional[CanonicalLifecyclePhase] = None
    components: List[ProductComponent] = Field(default_factory=list)
    has_safety_function: Optional[bool] = None
    safety_function_description: Optional[str] = None
    has_security_function: Optional[bool] = None  # safety vs security split
    has_remote_access: Optional[bool] = None
    has_embedded_software: Optional[bool] = None
    is_machine: Optional[bool] = None
    is_component: Optional[bool] = None
    is_spare_part: Optional[bool] = None
    # --- company / market context (NIS2 + scope; from company-profile) ---
    b2b_or_b2c: Optional[str] = None
    sector_industry: Optional[str] = None
    company_size: Optional[str] = None
    primary_jurisdiction: Optional[str] = None
    # --- AI context (classification stays delegated to ai-act/ucca) ---
    ai_integration_type: List[str] = Field(default_factory=list)
    human_oversight_level: Optional[str] = None
    # --- forward-looking environmental domain ---
    environmental: EnvironmentalImpact = Field(default_factory=EnvironmentalImpact)
@@ -0,0 +1,59 @@
 """company-profile -> CanonicalProductRegulatoryProfile (prefill, acceptance #2).
 Pulls master data (industry, business model, size, markets) and the conditional
 `machine_builder` block (camelCase JSONB keys, defined frontend-side) so the user
 re-answers nothing. The machineBuilder block is the richest product/safety/
 connectivity source — note it is industry-gated in the UI, so a prefill may find
 it empty; that is fine (fields stay None = unknown).
 """
 from __future__ import annotations
 from typing import Any, Dict, List
 from .canonical import CanonicalProductRegulatoryProfile
 _EU_MEMBER_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
 def _markets(p: Dict[str, Any], mb: Dict[str, Any]) -> List[str]:
    out: List[str] = []
    for source in (p.get("target_markets"), mb.get("exportMarkets"), [p.get("primary_jurisdiction")], [p.get("headquarters_country")]):
        for m in source or []:
            if m and m not in out:
                out.append(m)
    return out
 def _is_machine(mb: Dict[str, Any]) -> Any:
    types = mb.get("productTypes")
    if types:
        return True
    return None
 def from_company_profile(profile: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
    p = profile
    mb = p.get("machine_builder") or {}
    contains_ai = mb.get("containsAI")
    uses_ai = contains_ai if contains_ai is not None else p.get("uses_ai")
    return CanonicalProductRegulatoryProfile(
        description=mb.get("productDescription") or "",
        sector_industry=p.get("industry") or None,
        b2b_or_b2c=p.get("business_model") or None,
        company_size=p.get("company_size") or None,
        primary_jurisdiction=p.get("primary_jurisdiction") or None,
        markets=_markets(p, mb),
        uses_ai=uses_ai,
        ai_integration_type=list(mb.get("aiIntegrationType") or []),
        human_oversight_level=mb.get("humanOversightLevel") or None,
        has_embedded_software=mb.get("containsFirmware"),
        has_safety_function=mb.get("hasSafetyFunction"),
        safety_function_description=mb.get("safetyFunctionDescription") or None,
        has_remote_access=mb.get("hasRemoteAccess"),
        connected_to_internet=mb.get("isNetworked"),
        has_software_updates=mb.get("hasOTAUpdates"),
        has_risk_assessment=mb.get("hasRiskAssessment"),
        is_machine=_is_machine(mb),
        is_critical_infra_supplier=mb.get("criticalSectorClients"),
    )
@@ -0,0 +1,50 @@
 """ProductWizard payload -> CanonicalProductRegulatoryProfile (lossless).
 The gap-analysis ProductWizard POSTs exactly the gap.ProductProfile JSON shape
 (see admin-compliance/.../ProductWizard.tsx handleSubmit). This mapper copies
 every gap field verbatim so that `to_gap_profile(from_product_wizard(p))`
 reproduces the gap subset of `p` byte-for-byte (acceptance #1). New Navigator
 fields the wizard does not ask stay None.
 """
 from __future__ import annotations
 from typing import Any, Dict, Optional
 from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
 def _as_product_type(value: Any) -> Optional[CanonicalProductType]:
    try:
        return CanonicalProductType(value)
    except ValueError:
        return None
 def from_product_wizard(payload: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
    g = payload.get
    return CanonicalProductRegulatoryProfile(
        name=g("name", ""),
        description=g("description", ""),
        product_type=_as_product_type(g("product_type")),
        technologies=list(g("technologies") or []),
        data_processing=list(g("data_processing") or []),
        markets=list(g("markets") or []),
        existing_certifications=list(g("existing_certifications") or []),
        applied_norms=list(g("applied_norms") or []),
        connected_to_internet=g("connected_to_internet"),
        has_software_updates=g("has_software_updates"),
        uses_ai=g("uses_ai"),
        processes_personal_data=g("processes_personal_data"),
        is_critical_infra_supplier=g("is_critical_infra_supplier"),
        has_risk_assessment=g("has_risk_assessment"),
        has_technical_file=g("has_technical_file"),
        has_operating_manual=g("has_operating_manual"),
        has_sbom=g("has_sbom"),
        has_vuln_management=g("has_vuln_management"),
        has_update_mechanism=g("has_update_mechanism"),
        has_incident_response=g("has_incident_response"),
        has_supply_chain_mgmt=g("has_supply_chain_mgmt"),
        ce_marking_since=g("ce_marking_since"),
        product_age=g("product_age"),
    )
@@ -0,0 +1,41 @@
 """CanonicalProductRegulatoryProfile -> gap.ProductProfile JSON shape.
 Emits exactly the keys the Go gap engine already consumes (gap/models.go json
 tags), so the gap engine runs UNCHANGED — the canonical is a superset and gap is
 its lossless projection. Canonical-only fields (role/radio/components/...) are
 intentionally not emitted here; they reach the reasoning side via to_reasoning.
 """
 from __future__ import annotations
 from typing import Any, Dict
 from .canonical import CanonicalProductRegulatoryProfile
 def to_gap_profile(c: CanonicalProductRegulatoryProfile) -> Dict[str, Any]:
    return {
        "name": c.name,
        "description": c.description,
        "product_type": c.product_type.value if c.product_type else "",
        "technologies": list(c.technologies),
        "data_processing": list(c.data_processing),
        "markets": list(c.markets),
        "existing_certifications": list(c.existing_certifications),
        "applied_norms": list(c.applied_norms),
        "connected_to_internet": bool(c.connected_to_internet),
        "has_software_updates": bool(c.has_software_updates),
        "uses_ai": bool(c.uses_ai),
        "processes_personal_data": bool(c.processes_personal_data),
        "is_critical_infra_supplier": bool(c.is_critical_infra_supplier),
        "has_risk_assessment": bool(c.has_risk_assessment),
        "has_technical_file": bool(c.has_technical_file),
        "has_operating_manual": bool(c.has_operating_manual),
        "has_sbom": bool(c.has_sbom),
        "has_vuln_management": bool(c.has_vuln_management),
        "has_update_mechanism": bool(c.has_update_mechanism),
        "has_incident_response": bool(c.has_incident_response),
        "has_supply_chain_mgmt": bool(c.has_supply_chain_mgmt),
        "ce_marking_since": c.ce_marking_since if c.ce_marking_since is not None else "",
        "product_age": c.product_age if c.product_age is not None else "",
    }
@@ -0,0 +1,88 @@
 """CanonicalProductRegulatoryProfile -> reasoning ProductProfile (adapter/DTO).
 The reasoning engine stays the consumer, never the source of truth (spec): the
 canonical leads, this projects it into the Python reasoning ProductProfile so the
 Reasoning engine and the Go gap engine run off ONE semantic profile (acceptance
 #10). AI classification is NOT done here — only `uses_ai` is forwarded; risk
 classification stays delegated to ai-act/ucca (acceptance #3).
 This is the ONLY one-way coupling profile -> reasoning; reasoning never imports
 profile, so the reasoning layer stays hermetic.
 """
 from __future__ import annotations
 from typing import List, Optional
 from compliance.reasoning.enums import ManufacturerRole, MarketModel, ProductLifecyclePhase
 from compliance.reasoning.schemas import ProductProfile
 from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
 _SOFTWARE_TYPES = {CanonicalProductType.SOFTWARE, CanonicalProductType.SAAS, CanonicalProductType.IOT}
 _SOFTWARE_TECH = {"ai", "api", "database", "encryption", "ota_updates", "cloud", "blockchain"}
 _EU_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
 _B2X = {"B2B": MarketModel.B2B, "B2C": MarketModel.B2C, "B2B_B2C": MarketModel.BOTH, "B2B2C": MarketModel.BOTH}
 def _or_none(*values: Optional[bool]) -> Optional[bool]:
    """True if any value is truthy; None if all are None/absent; else False."""
    if any(v is True for v in values):
        return True
    if all(v is None for v in values):
        return None
    return False
 def _has_software(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
    type_sig = True if c.product_type in _SOFTWARE_TYPES else None
    tech_sig = True if (set(c.technologies) & _SOFTWARE_TECH) else None
    return _or_none(c.has_embedded_software, c.has_software_updates, c.uses_ai, type_sig, tech_sig)
 def _eu_market(markets: List[str]) -> Optional[bool]:
    if not markets:
        return None
    return True if (set(markets) & _EU_HINTS) else False
 def _has_radio(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
    if c.has_radio_module is not None:
        return c.has_radio_module
    if any(comp.kind.value == "radio_module" for comp in c.components):
        return True
    return None
 def to_reasoning_profile(c: CanonicalProductRegulatoryProfile) -> ProductProfile:
    role = ManufacturerRole(c.economic_operator_role.value) if c.economic_operator_role else None
    phase = ProductLifecyclePhase(c.lifecycle_phase.value) if c.lifecycle_phase else None
    b2x = _B2X.get(c.b2b_or_b2c) if c.b2b_or_b2c else None
    is_machine = c.is_machine if c.is_machine is not None else (
        True if c.product_type == CanonicalProductType.MACHINERY else None
    )
    generates_data = c.generates_usage_data if c.generates_usage_data is not None else (
        True if "telemetry" in c.data_processing else None
    )
    return ProductProfile(
        product_name=c.name or "Produkt",
        product_profile_id=c.product_profile_id,
        manufacturer_role=role,
        product_type=[c.product_type.value] if c.product_type else [],
        has_software=_has_software(c),
        has_embedded_software=c.has_embedded_software,
        has_remote_access=c.has_remote_access,
        has_cloud_connection=True if "cloud" in c.technologies else None,
        has_ai_functionality=c.uses_ai,
        has_radio_module=_has_radio(c),
        has_safety_function=c.has_safety_function,
        generates_usage_data=generates_data,
        is_machine=is_machine,
        is_component=c.is_component,
        is_spare_part=c.is_spare_part,
        eu_market=_eu_market(c.markets),
        b2b_or_b2c=b2x,
        lifecycle_phase=phase,
        company_size=c.company_size,
        sector=c.sector_industry,
    )
@@ -0,0 +1,34 @@
 """Regulatory Change Intelligence (RCI) — delta layer over the product-first map.
 Answers "what changes relative to my existing Regulatory Map?" — NOT "what does
 the new law say in general". Snapshot the pipeline into a ComplianceBaseline, then
 assess a (simulated/provided) RegulatoryChange into per-obligation deltas + a
 management ChangeImpactSummary. Read/reasoning only — no UI, no ingestion, no RAG,
 no new regulations/controls, no legal evaluation outside the stored map.
 """
 from __future__ import annotations
 from .baseline import create_baseline
 from .delta_engine import assess_change
 from .schemas import (
    ChangeAssessment,
    ChangeImpactSummary,
    ChangeType,
    ComplianceBaseline,
    DeltaType,
    ObligationDelta,
    RegulatoryChange,
 )
 __all__ = [
    "create_baseline",
    "assess_change",
    "ComplianceBaseline",
    "RegulatoryChange",
    "ObligationDelta",
    "ChangeImpactSummary",
    "ChangeAssessment",
    "DeltaType",
    "ChangeType",
 ]
@@ -0,0 +1,44 @@
 """Snapshot the current product-first pipeline into a ComplianceBaseline.
 This is the ONLY place RCI runs the pipeline — to freeze a point-in-time map +
 registry-linked obligations + their required evidence. Everything downstream
 (delta computation) works purely against this snapshot, never re-evaluating.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.profile.to_reasoning import to_reasoning_profile
 from compliance.reasoning.obligation_engine import derive_obligations
 from compliance.regulatory_map.renderer import render_regulatory_map
 from .schemas import ComplianceBaseline
 def create_baseline(
    profile: CanonicalProductRegulatoryProfile,
    evidence_refs: Optional[Dict[str, List[str]]] = None,
    baseline_id: str = "baseline",
    created_at: Optional[str] = None,
 ) -> ComplianceBaseline:
    reg_map = render_regulatory_map(profile)
    obligations = derive_obligations(to_reasoning_profile(profile)).applicable_obligations
    applicable: List[str] = []
    required: Dict[str, List[str]] = {}
    for ob in obligations:
        if ob.registry_anchor:  # only registry-linked obligations enter the baseline
            applicable.append(ob.obligation_id)
            required[ob.obligation_id] = list(ob.required_evidence)
    return ComplianceBaseline(
        baseline_id=baseline_id,
        product_profile_snapshot=profile,
        regulatory_map_snapshot=reg_map,
        applicable_obligations=applicable,
        obligation_evidence_required=required,
        evidence_refs=dict(evidence_refs or {}),
        created_at=created_at,
    )
@@ -0,0 +1,114 @@
 """RCI delta engine — assess a RegulatoryChange against a ComplianceBaseline.
 Answers "what changes relative to my existing Map?" deterministically, working
 ONLY against the stored baseline (no re-evaluation of scope, no new legal
 assessment outside the map). Per-obligation classification -> ObligationDelta;
 aggregate -> ChangeImpactSummary.
 """
 from __future__ import annotations
 from typing import List, Tuple
 from compliance.reasoning.enums import Confidence
 from .schemas import (
    ChangeAssessment,
    ChangeImpactSummary,
    ChangeType,
    ComplianceBaseline,
    DeltaType,
    ObligationDelta,
    RegulatoryChange,
 )
 _ACTION = {DeltaType.NEW, DeltaType.CHANGED, DeltaType.NEEDS_REVIEW}
 def _classify(
    in_base: bool, has_ev: bool, change_type: ChangeType, rel_app: bool, rel_unc: bool
 ) -> Tuple[DeltaType, str, Confidence]:
    if not (rel_app or rel_unc):
        return DeltaType.NOT_APPLICABLE, "Die Änderung betrifft kein Regelwerk Ihrer Map.", Confidence.HIGH
    if rel_unc and not rel_app:
        return (
            DeltaType.NEEDS_REVIEW,
            "Betrifft ein für Ihr Produkt noch UNSICHERES Regelwerk — erst Anwendbarkeit klären.",
            Confidence.LOW,
        )
    if change_type == ChangeType.REPEAL:
        if in_base:
            return DeltaType.REMOVED, "Regelwerk/Pflicht aufgehoben — entfällt für Ihr Produkt.", Confidence.HIGH
        return DeltaType.NOT_APPLICABLE, "Aufhebung betrifft keine Ihrer bestehenden Pflichten.", Confidence.HIGH
    if not in_base:
        return DeltaType.NEW, "Neue Pflicht durch die Änderung — bisher nicht in Ihrer Map.", Confidence.MEDIUM
    if change_type == ChangeType.GUIDANCE_UPDATE:
        if has_ev:
            return (
                DeltaType.ALREADY_COVERED,
                "Bestehende Pflicht mit vorhandenen Nachweisen — Leitlinien-Update vermutlich abgedeckt.",
                Confidence.MEDIUM,
            )
        return DeltaType.NEEDS_REVIEW, "Bestehende Pflicht ohne Nachweis — Leitlinien-Update prüfen.", Confidence.MEDIUM
    return DeltaType.CHANGED, "Bestehende Pflicht inhaltlich geändert — Umsetzung und Nachweis prüfen.", Confidence.MEDIUM
 def assess_change(baseline: ComplianceBaseline, change: RegulatoryChange) -> ChangeAssessment:
    snap = baseline.regulatory_map_snapshot
    app_regs = {v.regulation_id for v in snap.applicable_regulations}
    unc_regs = {v.regulation_id for v in snap.uncertain_regulations}
    base_obs = set(baseline.applicable_obligations)
    affected = set(change.affected_regulations)
    rel_app = bool(affected & app_regs)
    rel_unc = bool(affected & unc_regs)
    affects_product = rel_app or rel_unc
    deltas: List[ObligationDelta] = []
    for ob in change.affected_obligations:
        present = baseline.evidence_refs.get(ob, [])
        required = baseline.obligation_evidence_required.get(ob, [])
        dt, reason, conf = _classify(ob in base_obs, bool(present), change.change_type, rel_app, rel_unc)
        missing = [e for e in required if e not in present] if dt in _ACTION else []
        deltas.append(
            ObligationDelta(
                obligation_id=ob,
                delta_type=dt,
                reason=reason,
                affected_evidence=list(present),
                missing_evidence=missing,
                confidence=conf,
            )
        )
    return ChangeAssessment(
        change_id=change.change_id,
        affects_product=affects_product,
        deltas=deltas,
        summary=_summary(deltas, [d.domain for d in snap.unsupported_domains]),
    )
 def _ids(deltas: List[ObligationDelta], *types: DeltaType) -> List[str]:
    wanted = set(types)
    return [d.obligation_id for d in deltas if d.delta_type in wanted]
 def _summary(deltas: List[ObligationDelta], unsupported: List[str]) -> ChangeImpactSummary:
    n_new = len(_ids(deltas, DeltaType.NEW))
    n_changed = len(_ids(deltas, DeltaType.CHANGED))
    n_removed = len(_ids(deltas, DeltaType.REMOVED))
    n_covered = len(_ids(deltas, DeltaType.ALREADY_COVERED))
    n_review = len(_ids(deltas, DeltaType.NEEDS_REVIEW, DeltaType.CHANGED))
    n_na = len(_ids(deltas, DeltaType.NOT_APPLICABLE))
    return ChangeImpactSummary(
        what_changed=(
            "%d neu, %d geändert, %d entfällt, %d bereits abgedeckt, %d zu prüfen, %d nicht relevant."
            % (n_new, n_changed, n_removed, n_covered, n_review, n_na)
        ),
        what_matters_for_this_product=_ids(deltas, *_ACTION),
        already_covered=_ids(deltas, DeltaType.ALREADY_COVERED),
        needs_review=_ids(deltas, DeltaType.NEEDS_REVIEW, DeltaType.CHANGED),
        not_relevant=_ids(deltas, DeltaType.NOT_APPLICABLE),
        unsupported_domains=unsupported,
    )
@@ -0,0 +1,92 @@
 """Regulatory Change Intelligence (RCI) — domain objects.
 RCI is a read-/reasoning layer ON TOP of the product-first pipeline. It answers
 "what changes relative to my existing Regulatory Map?" — NOT "what does the new
 law say in general". A RegulatoryChange is simulated/provided INPUT (no ingestion,
 no newsletter/mailbox, no RAG); the delta is computed against a stored
 ComplianceBaseline (snapshot of the map).
 `delta_type` is a THIRD vocabulary — distinct from `ClaimCoverage` (Welt 1, what
 the customer claims) and `ComplianceStatus` (Welt 2, verified evidence). The three
 must never be conflated. These are application/reasoning types, NOT
 compliance-meta-model classes (architecture freeze v1.0 untouched).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List, Optional
 from pydantic import BaseModel, Field
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.reasoning.enums import AuthorityLevel, Confidence
 from compliance.regulatory_map.schemas import RegulatoryMap
 class DeltaType(str, Enum):
    NEW = "new"  # obligation now applies that was not in the baseline
    CHANGED = "changed"  # existing obligation substantively modified
    REMOVED = "removed"  # obligation no longer applies (repeal)
    ALREADY_COVERED = "already_covered"  # existing obligation, evidence likely suffices
    NEEDS_REVIEW = "needs_review"  # a human must check
    NOT_APPLICABLE = "not_applicable"  # change does not touch this product's map
 class ChangeType(str, Enum):
    NEW_REGULATION = "new_regulation"
    AMENDMENT = "amendment"
    REPEAL = "repeal"
    GUIDANCE_UPDATE = "guidance_update"
 # ── stored snapshot ──────────────────────────────────────────────────────
 class ComplianceBaseline(BaseModel):
    baseline_id: str
    product_profile_snapshot: CanonicalProductRegulatoryProfile
    regulatory_map_snapshot: RegulatoryMap
    applicable_obligations: List[str] = Field(default_factory=list)  # registry-linked obligation_ids
    # required evidence per obligation (derived) — to compute missing_evidence
    obligation_evidence_required: Dict[str, List[str]] = Field(default_factory=dict)
    # evidence the customer ALREADY has, per obligation (provided)
    evidence_refs: Dict[str, List[str]] = Field(default_factory=dict)
    created_at: Optional[str] = None
 # ── simulated/provided change (INPUT — never ingested) ───────────────────
 class RegulatoryChange(BaseModel):
    change_id: str
    source: str = "simulated"
    affected_regulations: List[str] = Field(default_factory=list)
    affected_obligations: List[str] = Field(default_factory=list)
    change_type: ChangeType
    effective_date: Optional[str] = None
    authority_level: AuthorityLevel = AuthorityLevel.LEGAL_TEXT
    summary: str = ""
 # ── per-obligation delta ─────────────────────────────────────────────────
 class ObligationDelta(BaseModel):
    obligation_id: str
    delta_type: DeltaType
    reason: str
    affected_evidence: List[str] = Field(default_factory=list)  # evidence already present for it
    missing_evidence: List[str] = Field(default_factory=list)  # required but not yet present
    confidence: Confidence
 # ── management-level summary ──────────────────────────────────────────────
 class ChangeImpactSummary(BaseModel):
    what_changed: str = ""
    what_matters_for_this_product: List[str] = Field(default_factory=list)  # need action
    already_covered: List[str] = Field(default_factory=list)
    needs_review: List[str] = Field(default_factory=list)
    not_relevant: List[str] = Field(default_factory=list)
    unsupported_domains: List[str] = Field(default_factory=list)
 class ChangeAssessment(BaseModel):
    change_id: str
    affects_product: bool
    deltas: List[ObligationDelta] = Field(default_factory=list)
    summary: ChangeImpactSummary
@@ -0,0 +1,27 @@
 """Regulatory Reasoning Engine.
 A deterministic reasoning layer ON TOP of the Legal Knowledge Graph (obligation
 registry) and the Compliance Execution Graph (control mapping / evidence). It
 answers, for a concrete product: which regulations apply, which obligations
 follow, whether the customer's implementation covers them, and whether a
 customer interpretation is legally sound.
 No new RAG, no new controls, no DB schema changes — scope & reasoning metamodel
 only (spec §14).
 """
 from __future__ import annotations
 from .claim_normalizer import normalize_claim
 from .implementation_engine import reason_implementation_claim
 from .interpretation_engine import assess_interpretation
 from .obligation_engine import derive_obligations
 from .scope_engine import discover_scope
 __all__ = [
    "discover_scope",
    "derive_obligations",
    "normalize_claim",
    "reason_implementation_claim",
    "assess_interpretation",
 ]
@@ -0,0 +1,45 @@
 """Customer implementation claim normaliser (spec §4.6).
 Turns a free-text statement ("Wir haben einen Update-Prozess.") into structured
 capabilities + related topics + weakness qualifiers. Deterministic substring
 matching — the claim_id is a stable hash so the same statement always maps to
 the same id (no randomness, replay-safe).
 """
 from __future__ import annotations
 import hashlib
 from typing import List, Optional
 from .schemas import CustomerImplementationClaim
 from .taxonomy_claims import match_capabilities, match_qualifiers, topics_for
 def _claim_id(raw_statement: str) -> str:
    digest = hashlib.sha1(raw_statement.strip().lower().encode("utf-8")).hexdigest()
    return "claim_%s" % digest[:10]
 def _normalized(capabilities: List[str], qualifiers: List[str]) -> str:
    if not capabilities:
        return "Keine bekannte Compliance-Fähigkeit aus der Aussage ableitbar."
    text = "Fähigkeiten: " + ", ".join(capabilities)
    if qualifiers:
        text += " | Einschränkungen: " + ", ".join(qualifiers)
    return text
 def normalize_claim(
    raw_statement: str, claim_id: Optional[str] = None, evidence_refs: Optional[List[str]] = None
 ) -> CustomerImplementationClaim:
    capabilities = match_capabilities(raw_statement)
    qualifiers = match_qualifiers(raw_statement)
    return CustomerImplementationClaim(
        claim_id=claim_id or _claim_id(raw_statement),
        raw_statement=raw_statement,
        normalized_claim=_normalized(capabilities, qualifiers),
        claimed_capability=capabilities,
        related_topics=topics_for(capabilities),
        qualifiers=qualifiers,
        evidence_refs=evidence_refs or [],
    )
@@ -0,0 +1,92 @@
 """Enumerations for the Regulatory Reasoning Engine.
 Kept dependency-free and Python 3.9 compatible (str-Enums, no `|` unions).
 The reasoning layer sits ON TOP of the Legal Knowledge Graph (obligation
 registry) and the Compliance Execution Graph (control mapping / evidence).
 See memory `project_compliance_graph.md` for the cross-session contract.
 """
 from __future__ import annotations
 from enum import Enum
 class ManufacturerRole(str, Enum):
    MANUFACTURER = "manufacturer"
    IMPORTER = "importer"
    DISTRIBUTOR = "distributor"
    INTEGRATOR = "integrator"
    OPERATOR = "operator"
    SERVICE_PROVIDER = "service_provider"
 class ProductLifecyclePhase(str, Enum):
    DEVELOPMENT = "development"
    PLACING_ON_MARKET = "placing_on_market"
    OPERATION = "operation"
    MAINTENANCE = "maintenance"
    UPDATE = "update"
    END_OF_LIFE = "end_of_life"
 class MarketModel(str, Enum):
    B2B = "b2b"
    B2C = "b2c"
    BOTH = "both"
 class ApplicabilityStatus(str, Enum):
    APPLICABLE = "applicable"
    PARTIALLY_APPLICABLE = "partially_applicable"
    UNCERTAIN = "uncertain"
    NOT_APPLICABLE = "not_applicable"
 class Confidence(str, Enum):
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
 class AuthorityLevel(str, Enum):
    """How binding a statement is — answers MUST visibly separate these."""
    LEGAL_TEXT = "legal_text"
    RECITAL = "recital"
    GUIDANCE = "guidance"
    HARMONIZED_STANDARD = "harmonized_standard"
    TECHNICAL_STANDARD = "technical_standard"
    BEST_PRACTICE = "best_practice"
    INTERNAL_INTERPRETATION = "internal_interpretation"
 class OverlapType(str, Enum):
    IDENTICAL = "identical"
    SIMILAR = "similar"
    COMPLEMENTARY = "complementary"
    CONFLICTING = "conflicting"
    DIFFERENT_SCOPE = "different_scope"
 class ClaimCoverage(str, Enum):
    """How a customer's *claim* relates to an obligation — Welt 1 (reasoning).
    This is NOT a conformity verdict. It judges only the customer's statement,
    never whether the obligation is actually met. The real compliance verdict
    (erfüllt/offen/unklar from verified evidence) is `ComplianceStatus`, owned by
    the Compliance Execution Graph — the two must never be conflated.
    """
    POTENTIALLY_ADDRESSES = "potentially_addresses"
    PARTIALLY_ADDRESSES = "partially_addresses"
    DOES_NOT_ADDRESS = "does_not_address"
    INSUFFICIENT_INFORMATION = "insufficient_information"
 class InterpretationVerdict(str, Enum):
    PLAUSIBLE = "plausible"
    TOO_NARROW = "too_narrow"
    TOO_BROAD = "too_broad"
    PARTIALLY_CORRECT = "partially_correct"
    UNSUPPORTED = "unsupported"
    UNCERTAIN = "uncertain"
@@ -0,0 +1,158 @@
 """Implementation reasoning (spec Modus 3) — Welt 1 only.
 Maps a free-text claim ("Wir haben SBOMs und machen Updates, wenn Kunden Fehler
 melden.") onto the product's applicable obligations and reports, per obligation,
 whether the *claim* potentially/partially/does-not address it — plus the
 evidence that WOULD be needed to prove real implementation.
 This is NOT a conformity verdict. It judges the customer's statement, never
 whether the obligation is met. The real verdict (ComplianceStatus: erfüllt/
 offen/unklar from verified evidence) lives in the Compliance Execution Graph.
 The four reasoning layers: claim -> interpretation (capabilities/topics on the
 claim) -> potential obligation coverage (`claim_coverage`) -> evidence required.
 """
 from __future__ import annotations
 from typing import Dict, List
 from .claim_normalizer import normalize_claim
 from .enums import ClaimCoverage, Confidence
 from .obligation_engine import derive_obligations
 from .schemas import (
    ClaimObligationMapping,
    CustomerImplementationClaim,
    ImplementationReasoningResponse,
    ProductProfile,
 )
 from .taxonomy_claims import topics_for
 DISCLAIMER = (
    "Diese Auswertung interpretiert ausschließlich die Kundenaussage (ClaimCoverage, Welt 1). "
    "Sie ist KEINE Konformitätsaussage — der tatsächliche Compliance-Status (ComplianceStatus, "
    "Welt 2) ergibt sich erst aus geprüften Nachweisen im Compliance Execution Graph."
 )
 # Typical sub-elements a capability still misses when only partially claimed.
 STANDARD_GAPS: Dict[str, List[str]] = {
    "software_bill_of_materials": [
        "Vulnerability-Monitoring der Komponenten",
        "Bewertung betroffener Komponenten",
        "Lieferantenprozess",
    ],
    "secure_updates": [
        "aktive Schwachstellenüberwachung",
        "Patch-Bewertung",
        "Fristen und Verantwortlichkeiten",
        "Nachweis der Updatefähigkeit",
    ],
    "vulnerability_management": [
        "definierter Vulnerability-Handling-Prozess",
        "Priorisierung und Fristen",
    ],
    "authentication": ["MFA für privilegierte Zugänge", "keine Standard-Zugangsdaten"],
    "security_logging": ["Schutz der Logs vor Manipulation", "Monitoring/Alerting"],
    "software_integrity": ["Signierung der Updates", "Verifikation der Update-Signatur"],
    "secure_by_default": ["Härtung der Auslieferungskonfiguration", "Minimierung der Angriffsfläche"],
    "secure_communication": ["verschlüsselte Übertragung", "Integritätsschutz der Verbindung"],
    "risk_assessment": ["dokumentierte Risikobewertung", "Aufnahme in die technische Doku"],
    "technical_documentation": ["vollständige technische Unterlagen", "Aktualisierung über den Lebenszyklus"],
 }
 def _missing_for(capabilities: List[str]) -> List[str]:
    out: List[str] = []
    for cap in capabilities:
        for gap in STANDARD_GAPS.get(cap, []):
            if gap not in out:
                out.append(gap)
    return out
 def _coverage(required: List[str], claimed: List[str], qualifiers: List[str]) -> ClaimCoverage:
    if not required:
        return ClaimCoverage.INSUFFICIENT_INFORMATION
    req, have = set(required), set(claimed)
    hit = req & have
    if not hit:
        return ClaimCoverage.DOES_NOT_ADDRESS
    if "absent" in qualifiers or "planned" in qualifiers:
        return ClaimCoverage.DOES_NOT_ADDRESS
    if "reactive" in qualifiers and hit & {"secure_updates", "vulnerability_management"}:
        return ClaimCoverage.PARTIALLY_ADDRESSES
    if req <= have:
        return ClaimCoverage.POTENTIALLY_ADDRESSES
    return ClaimCoverage.PARTIALLY_ADDRESSES
 def reason_implementation_claim(
    profile: ProductProfile, customer_claim: str
 ) -> ImplementationReasoningResponse:
    claim = normalize_claim(customer_claim)
    obligations = derive_obligations(profile).applicable_obligations
    claimed = claim.claimed_capability
    claim_topics = set(claim.related_topics) | set(claimed)
    mappings: List[ClaimObligationMapping] = []
    missing_evidence: List[str] = []
    for ob in obligations:
        from .rules_obligations import obligation_rule
        rule = obligation_rule(ob.obligation_id)
        required_caps = rule.required_capabilities if rule else []
        ob_topics = set(topics_for(required_caps)) | set(required_caps)
        directly_claimed = bool(set(required_caps) & set(claimed))
        related = bool(ob_topics & claim_topics)
        if not directly_claimed and not related:
            continue  # unrelated to the claim -> don't reason about it
        coverage = _coverage(required_caps, claimed, claim.qualifiers)
        missing = [] if coverage == ClaimCoverage.POTENTIALLY_ADDRESSES else _missing_for(required_caps)
        if coverage != ClaimCoverage.POTENTIALLY_ADDRESSES:
            for ev in ob.required_evidence:
                if ev not in missing_evidence:
                    missing_evidence.append(ev)
        mappings.append(
            ClaimObligationMapping(
                claim_id=claim.claim_id,
                obligation_id=ob.obligation_id,
                claim_coverage=coverage,
                missing_elements=missing,
                required_evidence=ob.required_evidence,
                explanation=_explain(coverage, ob.title, claim.qualifiers),
                confidence=Confidence.MEDIUM,
            )
        )
    return ImplementationReasoningResponse(
        claim=claim,
        mappings=mappings,
        missing_evidence=missing_evidence,
        summary=_summary(claim, mappings),
        disclaimer=DISCLAIMER,
    )
 def _explain(coverage: ClaimCoverage, title: str, qualifiers: List[str]) -> str:
    if coverage == ClaimCoverage.POTENTIALLY_ADDRESSES:
        return "Die Aussage adressiert die Pflicht '%s' direkt — Nachweise erforderlich für eine Bewertung der Umsetzung." % title
    if coverage == ClaimCoverage.PARTIALLY_ADDRESSES:
        extra = " Der beschriebene Prozess wirkt reaktiv." if "reactive" in qualifiers else ""
        return "Die Aussage adressiert die Pflicht '%s' nur teilweise.%s" % (title, extra)
    if coverage == ClaimCoverage.DOES_NOT_ADDRESS:
        return "Die Aussage adressiert die Pflicht '%s' nicht." % title
    return "Zur Pflicht '%s' liegen zu wenige Angaben für eine Einordnung vor." % title
 def _summary(claim: CustomerImplementationClaim, mappings: List[ClaimObligationMapping]) -> str:
    if not claim.claimed_capability:
        return "Die Aussage ist zu unspezifisch — bitte konkretisieren, was umgesetzt wurde."
    full = sum(1 for m in mappings if m.claim_coverage == ClaimCoverage.POTENTIALLY_ADDRESSES)
    partial = sum(1 for m in mappings if m.claim_coverage == ClaimCoverage.PARTIALLY_ADDRESSES)
    none = sum(1 for m in mappings if m.claim_coverage == ClaimCoverage.DOES_NOT_ADDRESS)
    return (
        "Die beschriebene Maßnahme adressiert wahrscheinlich %d Pflicht(en) direkt und %d "
        "teilweise; %d werden durch die Aussage nicht berührt. Für eine Bewertung der tatsächlichen "
        "Umsetzung sind Nachweise erforderlich. Dies ist keine Konformitätsaussage." % (full, partial, none)
    )
@@ -0,0 +1,65 @@
 """Interpretation review engine (spec Modus 4).
 Evaluates whether a customer's legal interpretation is plausible, too narrow,
 too broad, etc. Matches the interpretation against a curated pattern library;
 no match -> `uncertain` plus a request for the missing context (never invent a
 verdict, spec §6.3).
 """
 from __future__ import annotations
 import hashlib
 from typing import Optional
 from .enums import Confidence, InterpretationVerdict
 from .schemas import InterpretationAssessment, ProductProfile
 from .taxonomy_interpretations import INTERPRETATION_PATTERNS, InterpretationPattern
 def _interpretation_id(raw: str) -> str:
    digest = hashlib.sha1(raw.strip().lower().encode("utf-8")).hexdigest()
    return "interp_%s" % digest[:10]
 def _best_match(text: str) -> Optional[InterpretationPattern]:
    low = text.lower()
    best: Optional[InterpretationPattern] = None
    best_score = 0
    for pattern in INTERPRETATION_PATTERNS:
        score = sum(1 for t in pattern.triggers if t in low)
        if score > best_score:
            best, best_score = pattern, score
    return best
 def assess_interpretation(
    raw_interpretation: str, profile: Optional[ProductProfile] = None
 ) -> InterpretationAssessment:
    interp_id = _interpretation_id(raw_interpretation)
    pattern = _best_match(raw_interpretation)
    if pattern is None:
        return InterpretationAssessment(
            interpretation_id=interp_id,
            raw_interpretation=raw_interpretation,
            assessment=InterpretationVerdict.UNCERTAIN,
            corrected_interpretation=(
                "Diese Auslegung lässt sich ohne weitere Angaben nicht bewerten. Bitte Produkt, "
                "Rolle, Marktzugang und die konkret betroffene Pflicht benennen."
            ),
            explanation="Kein bekanntes Auslegungsmuster erkannt — bewusst keine Scheinsicherheit.",
            confidence=Confidence.LOW,
        )
    return InterpretationAssessment(
        interpretation_id=interp_id,
        raw_interpretation=raw_interpretation,
        affected_regulations=pattern.affected_regulations,
        affected_obligations=pattern.affected_obligations,
        assessment=pattern.verdict,
        risks=pattern.risks,
        corrected_interpretation=pattern.corrected_interpretation,
        legal_basis_refs=pattern.legal_basis_refs,
        explanation=pattern.explanation,
        confidence=pattern.confidence,
    )
@@ -0,0 +1,116 @@
 """Applicable-obligation engine (spec Modus 2).
 Maps a product profile (optionally a precomputed scope) to the concrete legal
 obligations, the overlaps between them, and which evidence types satisfy more
 than one obligation at once (the core USP, spec §16).
 """
 from __future__ import annotations
 from typing import Dict, List, Optional
 from .predicates import evaluate, true_leaves
 from .rules_obligations import ALL_OBLIGATIONS
 from .rules_overlaps import OVERLAP_GROUPS
 from .rules_regulations import FIELD_LABELS
 from .rules_types import ObligationRule
 from .schemas import (
    ApplicableObligation,
    ObligationOverlap,
    ObligationsResponse,
    ProductProfile,
    RegulatoryScope,
 )
 from .scope_engine import discover_scope
 def _applicable_regulation_ids(profile: ProductProfile, scope: Optional[RegulatoryScope]) -> List[str]:
    if scope is None:
        scope = discover_scope(profile)
    return [r.regulation_id for r in scope.applicable_regulations]
 def _applies_because(rule: ObligationRule, profile: ProductProfile) -> List[str]:
    labels: List[str] = []
    for leaf in true_leaves(rule.applies_if, profile):
        label = FIELD_LABELS.get(leaf[0])
        if label and label not in labels:
            labels.append(label)
    if not labels:
        labels.append("%s ist für dieses Produkt anwendbar." % rule.source_regulation)
    return labels
 def _role_ok(rule: ObligationRule, profile: ProductProfile) -> bool:
    role = profile.manufacturer_role
    if role is None:
        return True  # unknown role -> do not exclude
    return role.value in rule.applies_to_role
 def derive_obligations(
    profile: ProductProfile, scope: Optional[RegulatoryScope] = None
 ) -> ObligationsResponse:
    active_regs = set(_applicable_regulation_ids(profile, scope))
    response = ObligationsResponse()
    applied_ids: List[str] = []
    for rule in ALL_OBLIGATIONS:
        if rule.source_regulation not in active_regs:
            continue
        if rule.applies_unless is not None and evaluate(rule.applies_unless, profile) is True:
            continue
        verdict = evaluate(rule.applies_if, profile)
        if verdict is not True or not _role_ok(rule, profile):
            if verdict is False:
                response.excluded_obligations.append(rule.obligation_id)
            continue
        applied_ids.append(rule.obligation_id)
        response.applicable_obligations.append(
            ApplicableObligation(
                obligation_id=rule.obligation_id,
                title=rule.title,
                source_regulation=rule.source_regulation,
                legal_basis_refs=rule.legal_basis_refs,
                obligation_text=rule.obligation_text,
                authority_level=rule.authority_level,
                applies_because=_applies_because(rule, profile),
                applies_to_role=rule.applies_to_role,
                lifecycle_phase=rule.lifecycle_phase,
                overlap_group_id=rule.overlap_group_id,
                required_evidence=rule.required_evidence,
                confidence=rule.base_confidence,
                registry_anchor=rule.registry_anchor,
                proposed=rule.proposed,
            )
        )
    response.overlaps = _overlaps(applied_ids)
    response.evidence_for_multiple = _evidence_for_multiple(response.applicable_obligations)
    return response
 def _overlaps(applied_ids: List[str]) -> List[ObligationOverlap]:
    applied = set(applied_ids)
    out: List[ObligationOverlap] = []
    for group in OVERLAP_GROUPS:
        present = [m for m in group.members if m in applied]
        if len(present) >= 2:
            out.append(
                ObligationOverlap(
                    overlap_group_id=group.overlap_group_id,
                    obligations=present,
                    overlap_type=group.overlap_type,
                    canonical_obligation_id=group.canonical_obligation_id,
                    explanation=group.explanation,
                )
            )
    return out
 def _evidence_for_multiple(obligations: List[ApplicableObligation]) -> Dict[str, List[str]]:
    by_evidence: Dict[str, List[str]] = {}
    for ob in obligations:
        for ev in ob.required_evidence:
            by_evidence.setdefault(ev, []).append(ob.obligation_id)
    return {ev: ids for ev, ids in by_evidence.items() if len(ids) > 1}
@@ -0,0 +1,100 @@
 """Safe, tri-state condition evaluator for applicability rules.
 Conditions are plain data (no `eval`): a *leaf* is a 3-tuple
 ``(field, op, value)``; a *composite* is ``{"all": [...]}`` or
 ``{"any": [...]}``. Evaluation is tri-state — ``True`` / ``False`` /
 ``None`` (unknown) — so a missing product fact yields *uncertain*, never a
 false negative.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Any, Dict, List, Optional, Tuple, Union
 Leaf = Tuple[str, str, Any]
 Condition = Union[Leaf, Dict[str, Any]]
 def _attr(profile: Any, field: str) -> Any:
    value = getattr(profile, field, None)
    if isinstance(value, Enum):
        return value.value
    return value
 def _eval_leaf(leaf: Leaf, profile: Any) -> Optional[bool]:
    field, op, expected = leaf
    actual = _attr(profile, field)
    if op == "not_none":
        return actual is not None
    if op == "is_none":
        return actual is None
    if op == "contains_any":
        # list-valued field (e.g. product_type); empty list = known-empty.
        items = actual or []
        hay = " ".join(str(x).lower() for x in items)
        return any(str(k).lower() in hay for k in expected)
    if actual is None:
        return None  # unknown fact -> unknown result
    if op == "eq":
        return bool(actual == expected)
    if op == "ne":
        return bool(actual != expected)
    if op == "truthy":
        return bool(actual)
    if op == "falsy":
        return not bool(actual)
    if op == "in":
        return bool(actual in expected)
    if op == "not_in":
        return bool(actual not in expected)
    if op == "date_after":
        return bool(actual > expected)
    raise ValueError("unknown predicate op: %r" % (op,))
 def evaluate(condition: Optional[Condition], profile: Any) -> Optional[bool]:
    """Return True/False/None(unknown) for a condition tree."""
    if condition is None:
        return True
    if isinstance(condition, tuple):
        return _eval_leaf(condition, profile)
    if "all" in condition:
        results = [evaluate(c, profile) for c in condition["all"]]
        if any(r is False for r in results):
            return False
        if any(r is None for r in results):
            return None
        return True
    if "any" in condition:
        results = [evaluate(c, profile) for c in condition["any"]]
        if any(r is True for r in results):
            return True
        if any(r is None for r in results):
            return None
        return False
    raise ValueError("malformed condition: %r" % (condition,))
 def true_leaves(condition: Optional[Condition], profile: Any) -> List[Leaf]:
    """Collect the leaf conditions that evaluated True (for trigger_facts)."""
    if condition is None:
        return []
    if isinstance(condition, tuple):
        return [condition] if _eval_leaf(condition, profile) is True else []
    members = condition.get("all") or condition.get("any") or []
    out: List[Leaf] = []
    for c in members:
        out.extend(true_leaves(c, profile))
    return out
 def unknown_fields(fields: List[str], profile: Any) -> List[str]:
    """Subset of `fields` whose value on the profile is None (unknown)."""
    return [f for f in fields if _attr(profile, f) is None]
@@ -0,0 +1,23 @@
 """Aggregated obligation scope rules + lookup helpers."""
 from __future__ import annotations
 from typing import Dict, List, Optional
 from .rules_obligations_cra import CRA_OBLIGATIONS
 from .rules_obligations_machine_data import DATA_ACT_OBLIGATIONS, MACHINE_OBLIGATIONS
 from .rules_types import ObligationRule
 ALL_OBLIGATIONS: List[ObligationRule] = (
    CRA_OBLIGATIONS + MACHINE_OBLIGATIONS + DATA_ACT_OBLIGATIONS
 )
 _BY_ID: Dict[str, ObligationRule] = {o.obligation_id: o for o in ALL_OBLIGATIONS}
 def obligation_rule(obligation_id: str) -> Optional[ObligationRule]:
    return _BY_ID.get(obligation_id)
 def obligations_for_regulation(regulation_id: str) -> List[ObligationRule]:
    return [o for o in ALL_OBLIGATIONS if o.source_regulation == regulation_id]
@@ -0,0 +1,271 @@
 """CRA obligation scope rules.
 `obligation_id`s in the six CRA-P1 families (sbom/vuln/authentication/logging/
 remote_access/updates) are RE-USED verbatim from the Legal-KG registry
 (`obligations/obligation_join_keys.json`) — never re-minted (control_uuid trap,
 memory `project_compliance_graph.md`). Cross-cutting CRA *process* obligations
 (risk assessment, technical documentation, CE, instructions, secure-by-design
 umbrella) are not yet in the registry and are flagged `proposed=True`.
 """
 from __future__ import annotations
 from typing import List
 from .enums import AuthorityLevel, Confidence
 from .rules_types import ObligationRule
 _HAS_SW = ("has_software", "eq", True)
 _EU = ("eu_market", "eq", True)
 _REMOTE_OR_CLOUD = {"any": [("has_remote_access", "eq", True), ("has_cloud_connection", "eq", True)]}
 _LM = AuthorityLevel.LEGAL_TEXT
 CRA_OBLIGATIONS: List[ObligationRule] = [
    ObligationRule(
        obligation_id="sbom_creation",
        title="Software Bill of Materials erstellen",
        source_regulation="CRA",
        obligation_text="Eine SBOM erstellen, die mindestens die obersten Abhängigkeiten des Produkts dokumentiert.",
        legal_basis_refs=["CRA Annex I Part II (1)"],
        authority_level=_LM,
        family="sbom",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["software_bill_of_materials"],
        required_evidence=["sbom", "repo_scan"],
        lifecycle_phase=["development", "placing_on_market", "maintenance"],
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="provide_security_updates",
        title="Sicherheitsupdates bereitstellen",
        source_regulation="CRA",
        obligation_text="Sicherheitsrelevante Updates zeitnah und über den Supportzeitraum bereitstellen.",
        legal_basis_refs=["CRA Annex I (2)(c)", "CRA Art. 13"],
        authority_level=_LM,
        family="updates",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["secure_updates"],
        required_evidence=["policy", "ticket", "test_report"],
        lifecycle_phase=["maintenance", "update"],
        overlap_group_id="SECURITY_UPDATES",
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="support_period_maintenance",
        title="Supportzeitraum definieren und einhalten",
        source_regulation="CRA",
        obligation_text="Einen angemessenen Supportzeitraum festlegen, in dem Schwachstellen behandelt werden.",
        legal_basis_refs=["CRA Art. 13(8)"],
        authority_level=_LM,
        family="updates",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["secure_updates"],
        required_evidence=["policy"],
        lifecycle_phase=["placing_on_market", "maintenance", "update"],
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="signed_update_integrity",
        title="Integrität von Updates sicherstellen",
        source_regulation="CRA",
        obligation_text="Updates signieren und ihre Integrität bei der Verteilung verifizieren.",
        legal_basis_refs=["CRA Annex I (1)(3)(f)"],
        authority_level=_LM,
        family="updates",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["software_integrity"],
        required_evidence=["config_export", "test_report"],
        lifecycle_phase=["development", "maintenance", "update"],
        overlap_group_id="SECURITY_UPDATES",
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="vuln_handling_process",
        title="Schwachstellenbehandlungs-Prozess",
        source_regulation="CRA",
        obligation_text="Einen dokumentierten Prozess zur Identifikation, Bewertung und Behebung von Schwachstellen betreiben.",
        legal_basis_refs=["CRA Art. 13(8)", "CRA Annex VII"],
        authority_level=_LM,
        family="vuln",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["vulnerability_management"],
        required_evidence=["policy", "ticket"],
        lifecycle_phase=["development", "operation", "maintenance"],
        overlap_group_id="VULNERABILITY_HANDLING",
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="coordinated_vulnerability_disclosure",
        title="Coordinated Vulnerability Disclosure",
        source_regulation="CRA",
        obligation_text="Eine Richtlinie zur koordinierten Offenlegung von Schwachstellen bereitstellen.",
        legal_basis_refs=["CRA Annex I Part II (5)"],
        authority_level=_LM,
        family="vuln",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["coordinated_disclosure"],
        required_evidence=["policy"],
        lifecycle_phase=["operation", "maintenance"],
        overlap_group_id="VULNERABILITY_HANDLING",
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="exploited_vuln_reporting_authorities",
        title="Meldung aktiv ausgenutzter Schwachstellen / Vorfälle",
        source_regulation="CRA",
        obligation_text="Aktiv ausgenutzte Schwachstellen und schwerwiegende Vorfälle an die zuständigen Behörden melden.",
        legal_basis_refs=["CRA Art. 14", "CRA Art. 16"],
        authority_level=_LM,
        family="vuln",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["incident_reporting"],
        required_evidence=["policy", "ticket"],
        lifecycle_phase=["operation", "maintenance"],
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="user_authentication_required",
        title="Authentifizierung vorsehen",
        source_regulation="CRA",
        obligation_text="Den Zugang über einen geeigneten Authentifizierungsmechanismus schützen.",
        legal_basis_refs=["CRA Annex I (2)(d)"],
        authority_level=_LM,
        family="authentication",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["authentication"],
        required_evidence=["config_export", "pentest"],
        lifecycle_phase=["development", "operation"],
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="no_default_credentials",
        title="Keine unveränderlichen Standard-Zugangsdaten",
        source_regulation="CRA",
        obligation_text="Sichere Standardkonfiguration; keine fest hinterlegten oder unveränderlichen Standard-Passwörter.",
        legal_basis_refs=["CRA Annex I (2)(a)", "CRA Annex I (2)(b)"],
        authority_level=_LM,
        family="authentication",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["secure_by_default"],
        required_evidence=["config_export", "test_report"],
        lifecycle_phase=["development", "placing_on_market"],
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="event_logging_security_events",
        title="Sicherheitsrelevante Ereignisse protokollieren",
        source_regulation="CRA",
        obligation_text="Sicherheitsrelevante Ereignisse und Zugriffe aufzeichnen, um Vorfälle nachvollziehen zu können.",
        legal_basis_refs=["CRA Annex I Part I (2)(k)"],
        authority_level=_LM,
        family="logging",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["security_logging"],
        required_evidence=["config_export", "audit_log"],
        lifecycle_phase=["operation", "maintenance"],
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="remote_access_attack_surface_min",
        title="Angriffsfläche minimieren",
        source_regulation="CRA",
        obligation_text="Die Angriffsfläche begrenzen, insbesondere exponierte Remote-/Cloud-Schnittstellen.",
        legal_basis_refs=["CRA Annex I (1)(2)(a)"],
        authority_level=_LM,
        family="remote_access",
        applies_if={"all": [_REMOTE_OR_CLOUD, _EU]},
        required_capabilities=["secure_by_default"],
        required_evidence=["config_export", "repo_scan", "pentest"],
        lifecycle_phase=["development", "operation"],
        registry_anchor=True,
    ),
    ObligationRule(
        obligation_id="remote_access_confidentiality_integrity",
        title="Vertraulichkeit/Integrität der Fernverbindung",
        source_regulation="CRA",
        obligation_text="Daten bei Fernzugriff/Cloud-Anbindung verschlüsselt und integritätsgeschützt übertragen.",
        legal_basis_refs=["CRA Annex I (1)(2)(b)", "CRA Annex I (1)(2)(c)"],
        authority_level=_LM,
        family="remote_access",
        applies_if={"all": [_REMOTE_OR_CLOUD, _EU]},
        required_capabilities=["secure_communication"],
        required_evidence=["config_export", "pentest"],
        lifecycle_phase=["operation"],
        registry_anchor=True,
    ),
    # --- Cross-cutting CRA process obligations (not yet in registry) ---------
    ObligationRule(
        obligation_id="cra_secure_by_design",
        title="Security by Design",
        source_regulation="CRA",
        obligation_text="Das Produkt so entwerfen, entwickeln und herstellen, dass ein angemessenes Cybersicherheitsniveau gewährleistet ist.",
        legal_basis_refs=["CRA Annex I Part I (1)"],
        authority_level=_LM,
        family="cra_process",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["secure_by_default", "risk_assessment"],
        required_evidence=["policy", "test_report"],
        lifecycle_phase=["development", "placing_on_market"],
        proposed=True,
    ),
    ObligationRule(
        obligation_id="cra_risk_assessment",
        title="Cybersicherheits-Risikobewertung",
        source_regulation="CRA",
        obligation_text="Eine Cybersicherheits-Risikobewertung durchführen und dokumentieren; in die technische Dokumentation aufnehmen.",
        legal_basis_refs=["CRA Art. 13(2)", "CRA Annex I Part I (1)"],
        authority_level=_LM,
        family="cra_process",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["risk_assessment"],
        required_evidence=["policy"],
        lifecycle_phase=["development", "placing_on_market"],
        overlap_group_id="RISK_ASSESSMENT",
        proposed=True,
    ),
    ObligationRule(
        obligation_id="cra_technical_documentation",
        title="Technische Dokumentation",
        source_regulation="CRA",
        obligation_text="Technische Dokumentation erstellen und aktuell halten, die Konformität mit den Anforderungen belegt.",
        legal_basis_refs=["CRA Art. 31", "CRA Annex VII"],
        authority_level=_LM,
        family="cra_process",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["technical_documentation"],
        required_evidence=["policy"],
        lifecycle_phase=["placing_on_market", "maintenance"],
        overlap_group_id="TECHNICAL_DOCUMENTATION",
        proposed=True,
    ),
    ObligationRule(
        obligation_id="cra_ce_conformity_assessment",
        title="Konformitätsbewertung / CE-Kennzeichnung",
        source_regulation="CRA",
        obligation_text="Vor dem Inverkehrbringen das passende Konformitätsbewertungsverfahren durchlaufen und CE kennzeichnen.",
        legal_basis_refs=["CRA Art. 32", "CRA Art. 28"],
        authority_level=_LM,
        family="cra_process",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["conformity_assessment"],
        required_evidence=["test_report", "policy"],
        lifecycle_phase=["placing_on_market"],
        overlap_group_id="CE_CONFORMITY",
        proposed=True,
    ),
    ObligationRule(
        obligation_id="cra_instructions_for_use",
        title="Informationen und Anweisungen für Nutzer",
        source_regulation="CRA",
        obligation_text="Nutzern verständliche Sicherheitsinformationen und -anweisungen bereitstellen (z. B. zu Updates und Support-Ende).",
        legal_basis_refs=["CRA Annex II"],
        authority_level=_LM,
        family="cra_process",
        applies_if={"all": [_HAS_SW, _EU]},
        required_capabilities=["technical_documentation"],
        required_evidence=["policy"],
        lifecycle_phase=["placing_on_market"],
        overlap_group_id="INSTRUCTIONS_FOR_USE",
        proposed=True,
    ),
 ]
@@ -0,0 +1,139 @@
 """MaschinenVO and Data Act obligation scope rules.
 These regulations are NOT yet in the Legal-KG registry (which currently covers
 the six CRA-P1 families). Every obligation here is therefore `proposed=True`:
 the reasoning layer proposes the snake_case id, the Obligation Registry session
 remains the only authority that may canonicalise it (re-link, never re-mint).
 """
 from __future__ import annotations
 from typing import List
 from .enums import AuthorityLevel, Confidence
 from .rules_types import ObligationRule
 _EU = ("eu_market", "eq", True)
 _IS_MACHINE = ("is_machine", "eq", True)
 _LM = AuthorityLevel.LEGAL_TEXT
 MACHINE_OBLIGATIONS: List[ObligationRule] = [
    ObligationRule(
        obligation_id="machine_risk_assessment",
        title="Maschinen-Risikobeurteilung",
        source_regulation="MaschinenVO",
        obligation_text="Eine Risikobeurteilung der Maschine durchführen, um Gefährdungen zu ermitteln und zu mindern.",
        legal_basis_refs=["MaschinenVO (EU) 2023/1230 Anhang III (1.1.1)", "EN ISO 12100"],
        authority_level=_LM,
        family="machine_safety",
        applies_if={"all": [_IS_MACHINE, _EU]},
        required_capabilities=["risk_assessment"],
        required_evidence=["policy"],
        lifecycle_phase=["development", "placing_on_market"],
        overlap_group_id="RISK_ASSESSMENT",
        proposed=True,
    ),
    ObligationRule(
        obligation_id="machine_safety_control_systems",
        title="Sichere Steuerungssysteme",
        source_regulation="MaschinenVO",
        obligation_text="Sicherheitsbezogene Teile der Steuerung so auslegen, dass Ausfälle nicht zu gefährlichen Zuständen führen.",
        legal_basis_refs=["MaschinenVO (EU) 2023/1230 Anhang III (1.2.1)", "EN ISO 13849-1"],
        authority_level=_LM,
        family="machine_safety",
        applies_if={"all": [_IS_MACHINE, ("has_safety_function", "eq", True), _EU]},
        required_capabilities=["functional_safety"],
        required_evidence=["test_report", "policy"],
        lifecycle_phase=["development", "placing_on_market"],
        proposed=True,
    ),
    ObligationRule(
        obligation_id="machine_protection_against_corruption",
        title="Schutz gegen Korrumpierung sicherheitsrelevanter Funktionen",
        source_regulation="MaschinenVO",
        obligation_text="Sicherstellen, dass eine (auch beabsichtigte) Korrumpierung der Software/Verbindung keine gefährliche Situation auslöst.",
        legal_basis_refs=["MaschinenVO (EU) 2023/1230 Anhang III (1.1.9)"],
        authority_level=_LM,
        family="machine_safety",
        applies_if={
            "all": [
                _IS_MACHINE,
                ("has_safety_function", "eq", True),
                {"any": [("has_remote_access", "eq", True), ("has_software", "eq", True)]},
                _EU,
            ]
        },
        required_capabilities=["software_integrity", "secure_by_default"],
        required_evidence=["test_report", "config_export"],
        lifecycle_phase=["development", "operation", "maintenance"],
        overlap_group_id="VULNERABILITY_HANDLING",
        proposed=True,
    ),
    ObligationRule(
        obligation_id="machine_instructions_for_use",
        title="Betriebsanleitung",
        source_regulation="MaschinenVO",
        obligation_text="Eine vollständige Betriebsanleitung mit Sicherheitshinweisen bereitstellen.",
        legal_basis_refs=["MaschinenVO (EU) 2023/1230 Anhang III (1.7.4)"],
        authority_level=_LM,
        family="machine_safety",
        applies_if={"all": [_IS_MACHINE, _EU]},
        required_capabilities=["technical_documentation"],
        required_evidence=["policy"],
        lifecycle_phase=["placing_on_market"],
        overlap_group_id="INSTRUCTIONS_FOR_USE",
        proposed=True,
    ),
    ObligationRule(
        obligation_id="machine_ce_conformity",
        title="Konformitätsbewertung / CE (Maschine)",
        source_regulation="MaschinenVO",
        obligation_text="Das passende Konformitätsbewertungsverfahren der MaschinenVO durchlaufen und CE kennzeichnen.",
        legal_basis_refs=["MaschinenVO (EU) 2023/1230 Art. 25", "Anhang IV"],
        authority_level=_LM,
        family="machine_safety",
        applies_if={"all": [_IS_MACHINE, _EU]},
        required_capabilities=["conformity_assessment"],
        required_evidence=["test_report", "policy"],
        lifecycle_phase=["placing_on_market"],
        overlap_group_id="CE_CONFORMITY",
        proposed=True,
    ),
 ]
 DATA_ACT_OBLIGATIONS: List[ObligationRule] = [
    ObligationRule(
        obligation_id="data_act_data_access_by_design",
        title="Datenzugang by design",
        source_regulation="DataAct",
        obligation_text="Vernetzte Produkte so gestalten, dass die erzeugten Produktdaten standardmäßig zugänglich sind.",
        legal_basis_refs=["Data Act (EU) 2023/2854 Art. 3"],
        authority_level=_LM,
        family="data_act",
        applies_if={
            "all": [
                ("generates_usage_data", "eq", True),
                {"any": [("has_cloud_connection", "eq", True), ("has_remote_access", "eq", True)]},
                _EU,
            ]
        },
        required_capabilities=["data_access_provision"],
        required_evidence=["config_export", "policy"],
        lifecycle_phase=["development", "placing_on_market"],
        proposed=True,
    ),
    ObligationRule(
        obligation_id="data_act_user_data_access",
        title="Datenzugang für Nutzer",
        source_regulation="DataAct",
        obligation_text="Nutzern Zugang zu den von ihnen erzeugten Daten gewähren und Weitergabe an Dritte ermöglichen.",
        legal_basis_refs=["Data Act (EU) 2023/2854 Art. 4", "Art. 5"],
        authority_level=_LM,
        family="data_act",
        applies_if={"all": [("generates_usage_data", "eq", True), _EU]},
        required_capabilities=["data_access_provision"],
        required_evidence=["policy"],
        lifecycle_phase=["operation"],
        proposed=True,
    ),
 ]
@@ -0,0 +1,91 @@
 """Obligation overlap groups (spec §4.5 / Modus 2).
 Overlaps are emitted only for the members that are actually applicable to the
 product. `canonical_obligation_id` points at the strongest / most specific
 obligation in the group (preferring a registry-anchored CRA id).
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import List
 from .enums import OverlapType
@dataclass(frozen=True)
 class OverlapGroup:
    overlap_group_id: str
    members: List[str]
    overlap_type: OverlapType
    canonical_obligation_id: str
    explanation: str
 OVERLAP_GROUPS: List[OverlapGroup] = [
    OverlapGroup(
        overlap_group_id="VULNERABILITY_HANDLING",
        members=[
            "vuln_handling_process",
            "coordinated_vulnerability_disclosure",
            "machine_protection_against_corruption",
        ],
        overlap_type=OverlapType.COMPLEMENTARY,
        canonical_obligation_id="vuln_handling_process",
        explanation=(
            "CRA adressiert die Schwachstellenbehandlung des Produkts. Die MaschinenVO wird "
            "komplementär relevant, sobald eine Cyber-Schwachstelle eine Sicherheitsfunktion "
            "beeinflussen kann (Anhang III 1.1.9). Nicht identisch, aber gemeinsam zu erfüllen."
        ),
    ),
    OverlapGroup(
        overlap_group_id="SECURITY_UPDATES",
        members=["provide_security_updates", "signed_update_integrity"],
        overlap_type=OverlapType.COMPLEMENTARY,
        canonical_obligation_id="provide_security_updates",
        explanation=(
            "Updates bereitstellen und ihre Integrität sichern sind zwei Seiten desselben "
            "Update-Prozesses; ein Nachweis (Update-Policy, Release Notes) deckt teils beide ab."
        ),
    ),
    OverlapGroup(
        overlap_group_id="RISK_ASSESSMENT",
        members=["cra_risk_assessment", "machine_risk_assessment"],
        overlap_type=OverlapType.DIFFERENT_SCOPE,
        canonical_obligation_id="cra_risk_assessment",
        explanation=(
            "Zwei getrennte Risikobetrachtungen: CRA = Cybersicherheits-Risiko, MaschinenVO = "
            "Sicherheits-/Gefährdungsbeurteilung. Methodisch verwandt, inhaltlich unterschiedlich."
        ),
    ),
    OverlapGroup(
        overlap_group_id="TECHNICAL_DOCUMENTATION",
        members=["cra_technical_documentation", "machine_risk_assessment"],
        overlap_type=OverlapType.SIMILAR,
        canonical_obligation_id="cra_technical_documentation",
        explanation=(
            "Beide Regime verlangen eine technische Dokumentation; Teile (Risikobetrachtung, "
            "Konstruktionsunterlagen) lassen sich in einem konsolidierten technischen Dossier führen."
        ),
    ),
    OverlapGroup(
        overlap_group_id="CE_CONFORMITY",
        members=["cra_ce_conformity_assessment", "machine_ce_conformity"],
        overlap_type=OverlapType.COMPLEMENTARY,
        canonical_obligation_id="machine_ce_conformity",
        explanation=(
            "Ein Produkt kann zwei CE-Regime gleichzeitig erfüllen müssen (MaschinenVO + CRA). "
            "Eine gemeinsame CE-Kennzeichnung, aber getrennte Konformitätsbewertungen."
        ),
    ),
    OverlapGroup(
        overlap_group_id="INSTRUCTIONS_FOR_USE",
        members=["cra_instructions_for_use", "machine_instructions_for_use"],
        overlap_type=OverlapType.SIMILAR,
        canonical_obligation_id="machine_instructions_for_use",
        explanation=(
            "Betriebsanleitung (MaschinenVO) und Sicherheitsinformationen (CRA) überschneiden sich; "
            "ein integriertes Anleitungsdokument kann beide Pflichten bedienen."
        ),
    ),
 ]
@@ -0,0 +1,160 @@
 """Regulation-level applicability trigger rules (scope discovery, spec Modus 1).
 Each rule is pure data consumed by `scope_engine`. Triggers reference
 `ProductProfile` fields through the safe predicate evaluator. `required_facts`
 that are unknown turn the verdict *uncertain* and surface `fact_prompts`.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Dict, List, Optional
 from .enums import Confidence
 from .predicates import Condition
 # Positive, human-readable label per profile fact (for trigger_facts output).
 FIELD_LABELS: Dict[str, str] = {
    "has_software": "Produkt enthält Software / digitale Elemente",
    "has_embedded_software": "Produkt enthält eingebettete Software",
    "has_remote_access": "Produkt besitzt Fernzugriff / Fernwartung",
    "has_cloud_connection": "Produkt ist mit einer Cloud verbunden",
    "has_radio_module": "Produkt enthält ein Funkmodul",
    "has_safety_function": "Produkt erfüllt eine Sicherheitsfunktion",
    "generates_usage_data": "Vernetztes Produkt erzeugt nutzbare Produktdaten",
    "is_machine": "Produkt ist eine Maschine",
    "is_component": "Produkt ist ein (Sicherheits-)Bauteil",
    "eu_market": "Produkt wird auf dem EU-Markt bereitgestellt",
    "is_essential_or_important_entity": "Unternehmen ist wesentliche/wichtige Einrichtung",
    "manufacturer_role": "Wirtschaftsakteur-Rolle (Hersteller/Importeur/Händler)",
 }
@dataclass(frozen=True)
 class RegulationRule:
    regulation_id: str
    name: str
    trigger: Condition
    required_facts: List[str]
    fact_prompts: Dict[str, str]
    legal_basis_refs: List[str]
    summary: str
    confidence_when_applicable: Confidence = Confidence.HIGH
    exclusion: Optional[Condition] = None
    # Status is downgraded to PARTIALLY_APPLICABLE / MEDIUM when the trigger
    # fires only via inference rather than a directly stated fact.
    inferred: bool = False
    excludable_roles: List[str] = field(default_factory=list)
 _ECONOMIC_ROLES = ["manufacturer", "importer", "distributor"]
 REGULATION_RULES: List[RegulationRule] = [
    RegulationRule(
        regulation_id="CRA",
        name="Cyber Resilience Act (EU) 2024/2847",
        trigger={
            "all": [
                {"any": [("has_software", "eq", True), ("has_embedded_software", "eq", True)]},
                ("eu_market", "eq", True),
            ]
        },
        required_facts=["has_software", "eu_market", "manufacturer_role"],
        fact_prompts={
            "has_software": "Enthält das Produkt Software / digitale Elemente?",
            "eu_market": "Wird das Produkt auf dem EU-Markt bereitgestellt oder in Verkehr gebracht?",
            "manufacturer_role": "Welche Rolle nehmen Sie ein (Hersteller / Importeur / Händler)?",
        },
        legal_basis_refs=["CRA Art. 2(1)", "CRA Art. 3(1)"],
        summary="Produkte mit digitalen Elementen, die auf dem EU-Markt bereitgestellt werden.",
        confidence_when_applicable=Confidence.HIGH,
        excludable_roles=["operator"],
    ),
    RegulationRule(
        regulation_id="MaschinenVO",
        name="Maschinenverordnung (EU) 2023/1230",
        trigger={
            "any": [
                ("is_machine", "eq", True),
                {"all": [("is_component", "eq", True), ("has_safety_function", "eq", True)]},
            ]
        },
        required_facts=["is_machine", "eu_market"],
        fact_prompts={
            "is_machine": "Ist das Produkt eine Maschine oder ein Sicherheitsbauteil?",
            "has_safety_function": "Erfüllt das Bauteil eine Sicherheitsfunktion?",
        },
        legal_basis_refs=["MaschinenVO (EU) 2023/1230 Art. 2", "Anhang III"],
        summary="Maschinen oder Sicherheitsbauteile, ggf. mit sicherheitsrelevanter Steuerung.",
        confidence_when_applicable=Confidence.MEDIUM,
    ),
    RegulationRule(
        regulation_id="RED",
        name="Radio Equipment Directive 2014/53/EU",
        trigger=("has_radio_module", "eq", True),
        required_facts=["has_radio_module"],
        fact_prompts={
            "has_radio_module": "Besitzt das Produkt ein Funkmodul (WLAN, Bluetooth, Mobilfunk)?",
        },
        legal_basis_refs=["RED 2014/53/EU Art. 1", "Art. 3(3)(d-f)"],
        summary="Funkanlagen; Art. 3(3) deckt zusätzlich Cybersecurity-Anforderungen ab.",
        confidence_when_applicable=Confidence.HIGH,
    ),
    RegulationRule(
        regulation_id="EMV",
        name="EMV-Richtlinie 2014/30/EU",
        trigger={
            "any": [
                ("has_software", "eq", True),
                ("has_embedded_software", "eq", True),
                ("has_radio_module", "eq", True),
            ]
        },
        required_facts=[],
        fact_prompts={
            "is_electrical": "Ist das Produkt ein elektrisches / elektronisches Betriebsmittel?",
        },
        legal_basis_refs=["EMV-RL 2014/30/EU Art. 2"],
        summary="Elektrische/elektronische Betriebsmittel (hier aus den digitalen Elementen abgeleitet).",
        confidence_when_applicable=Confidence.MEDIUM,
        inferred=True,
    ),
    RegulationRule(
        regulation_id="DataAct",
        name="Data Act (EU) 2023/2854",
        trigger={
            "all": [
                {"any": [("has_cloud_connection", "eq", True), ("has_remote_access", "eq", True)]},
                ("generates_usage_data", "eq", True),
            ]
        },
        required_facts=["generates_usage_data"],
        fact_prompts={
            "generates_usage_data": "Erzeugt das vernetzte Produkt nutzbare Produkt-/Nutzungsdaten?",
        },
        legal_basis_refs=["Data Act (EU) 2023/2854 Art. 2(5)", "Art. 3-5"],
        summary="Vernetzte Produkte, die Nutzungsdaten erzeugen und zugänglich machen.",
        confidence_when_applicable=Confidence.HIGH,
    ),
    RegulationRule(
        regulation_id="NIS2",
        name="NIS2-Richtlinie (EU) 2022/2555",
        trigger=("is_essential_or_important_entity", "eq", True),
        required_facts=["company_size", "sector", "is_essential_or_important_entity"],
        fact_prompts={
            "company_size": "Unternehmensgröße (Mitarbeiterzahl / Umsatz)?",
            "sector": "In welchem Sektor ist das Unternehmen tätig (Anhang I/II)?",
            "is_essential_or_important_entity": "Fällt das Unternehmen als wesentliche/wichtige Einrichtung unter NIS2?",
        },
        legal_basis_refs=["NIS2-RL (EU) 2022/2555 Art. 2", "Art. 3"],
        summary="Adressiert die ORGANISATION (Größe/Sektor/Rolle), nicht das Produkt.",
        confidence_when_applicable=Confidence.MEDIUM,
    ),
 ]
 def regulation_rule(regulation_id: str) -> Optional[RegulationRule]:
    for rule in REGULATION_RULES:
        if rule.regulation_id == regulation_id:
            return rule
    return None
@@ -0,0 +1,58 @@
 """Shared types for obligation scope rules.
 `required_evidence` MUST draw from the framework-AGNOSTIC evidence catalog
 owned by the Compliance Execution Graph (memory `project_compliance_graph.md`,
 User-Direktive 2026-06-25). Do not invent framework-specific evidence types.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import List, Optional
 from .enums import AuthorityLevel, Confidence
 from .predicates import Condition
 # Framework-agnostic shared evidence catalog (the only allowed tokens).
 EVIDENCE_CATALOG = frozenset(
    {
        "config_export",
        "test_report",
        "repo_scan",
        "sbom",
        "policy",
        "audit_log",
        "pentest",
        "ticket",
    }
 )
@dataclass(frozen=True)
 class ObligationRule:
    obligation_id: str
    title: str
    source_regulation: str
    obligation_text: str
    legal_basis_refs: List[str]
    authority_level: AuthorityLevel
    family: str
    applies_if: Condition
    required_capabilities: List[str]
    required_evidence: List[str]
    base_confidence: Confidence = Confidence.HIGH
    applies_unless: Optional[Condition] = None
    lifecycle_phase: List[str] = field(default_factory=list)
    applies_to_role: List[str] = field(default_factory=lambda: ["manufacturer", "importer"])
    overlap_group_id: Optional[str] = None
    # True => obligation_id is owned by the Legal-KG registry (re-link, never re-mint).
    registry_anchor: bool = False
    # True => Machine/Data-Act obligation the registry has not canonicalised yet.
    proposed: bool = False
    def __post_init__(self) -> None:
        bad = [e for e in self.required_evidence if e not in EVIDENCE_CATALOG]
        if bad:
            raise ValueError(
                "obligation %s uses non-catalog evidence %r" % (self.obligation_id, bad)
            )
@@ -0,0 +1,226 @@
 """Pydantic domain objects for the Regulatory Reasoning Engine.
 Trigger facts that drive scope are tri-state (`Optional[bool] = None`): `None`
 means "fact unknown" and produces an *uncertain* verdict plus a concrete
 missing-fact prompt — never silent false security (spec §6.3).
 """
 from __future__ import annotations
 from datetime import date
 from typing import Dict, List, Optional
 from pydantic import BaseModel, Field
 from .enums import (
    ApplicabilityStatus,
    AuthorityLevel,
    ClaimCoverage,
    Confidence,
    InterpretationVerdict,
    ManufacturerRole,
    MarketModel,
    OverlapType,
    ProductLifecyclePhase,
 )
 # ---------------------------------------------------------------------------
 # Input
 # ---------------------------------------------------------------------------
 class ProductProfile(BaseModel):
    """The customer's product / system. Tri-state booleans => unknown facts."""
    product_name: str
    product_profile_id: Optional[str] = None
    manufacturer_role: Optional[ManufacturerRole] = None
    product_type: List[str] = Field(default_factory=list)
    has_software: Optional[bool] = None
    has_embedded_software: Optional[bool] = None
    has_remote_access: Optional[bool] = None
    has_cloud_connection: Optional[bool] = None
    has_ai_functionality: Optional[bool] = None
    has_radio_module: Optional[bool] = None
    has_safety_function: Optional[bool] = None
    generates_usage_data: Optional[bool] = None
    is_machine: Optional[bool] = None
    is_component: Optional[bool] = None
    is_spare_part: Optional[bool] = None
    placed_on_market_after: Optional[date] = None
    intended_use: Optional[str] = None
    eu_market: Optional[bool] = None
    b2b_or_b2c: Optional[MarketModel] = None
    lifecycle_phase: Optional[ProductLifecyclePhase] = None
    # Organisation context — only needed for NIS2 (not a product fact).
    company_size: Optional[str] = None
    sector: Optional[str] = None
    is_essential_or_important_entity: Optional[bool] = None
 # ---------------------------------------------------------------------------
 # Scope
 # ---------------------------------------------------------------------------
 class ApplicableRegulation(BaseModel):
    regulation_id: str
    name: str
    applicability_status: ApplicabilityStatus
    trigger_facts: List[str] = Field(default_factory=list)
    legal_basis_refs: List[str] = Field(default_factory=list)
    confidence: Confidence
    explanation: str
 class ExcludedRegulation(BaseModel):
    regulation_id: str
    name: str
    reason: str
 class UncertainRegulation(BaseModel):
    regulation_id: str
    name: str
    missing_facts: List[str] = Field(default_factory=list)
    explanation: str
 class RegulatoryScope(BaseModel):
    product_profile_id: Optional[str] = None
    applicable_regulations: List[ApplicableRegulation] = Field(default_factory=list)
    excluded_regulations: List[ExcludedRegulation] = Field(default_factory=list)
    uncertain_regulations: List[UncertainRegulation] = Field(default_factory=list)
    missing_facts: List[str] = Field(default_factory=list)
    confidence: Confidence = Confidence.MEDIUM
    reasoning_summary: str = ""
 # ---------------------------------------------------------------------------
 # Obligations
 # ---------------------------------------------------------------------------
 class ApplicableObligation(BaseModel):
    obligation_id: str
    title: str
    source_regulation: str
    legal_basis_refs: List[str] = Field(default_factory=list)
    obligation_text: str
    authority_level: AuthorityLevel
    applies_because: List[str] = Field(default_factory=list)
    applies_to_role: List[str] = Field(default_factory=list)
    lifecycle_phase: List[str] = Field(default_factory=list)
    overlap_group_id: Optional[str] = None
    required_evidence: List[str] = Field(default_factory=list)
    confidence: Confidence
    # True only when obligation_id is owned by the Legal-KG registry (CRA P1).
    registry_anchor: bool = False
    # Machine/Data-Act obligations the registry has not canonicalised yet.
    proposed: bool = False
 class ObligationOverlap(BaseModel):
    overlap_group_id: str
    obligations: List[str] = Field(default_factory=list)
    overlap_type: OverlapType
    canonical_obligation_id: str
    explanation: str
 # ---------------------------------------------------------------------------
 # Customer claims & assessments
 # ---------------------------------------------------------------------------
 class CustomerImplementationClaim(BaseModel):
    claim_id: str
    raw_statement: str
    normalized_claim: str = ""
    claimed_capability: List[str] = Field(default_factory=list)
    related_topics: List[str] = Field(default_factory=list)
    qualifiers: List[str] = Field(default_factory=list)
    evidence_refs: List[str] = Field(default_factory=list)
 class ClaimObligationMapping(BaseModel):
    """One row of Welt-1 reasoning: how a customer claim relates to an obligation.
    Layers (spec / architect): claim -> interpretation (on the claim object) ->
    *potential* obligation coverage (`claim_coverage`) -> evidence required.
    Carries NO compliance verdict.
    """
    claim_id: str
    obligation_id: str
    claim_coverage: ClaimCoverage
    missing_elements: List[str] = Field(default_factory=list)
    required_evidence: List[str] = Field(default_factory=list)
    explanation: str
    confidence: Confidence
 class InterpretationAssessment(BaseModel):
    interpretation_id: str
    raw_interpretation: str
    affected_regulations: List[str] = Field(default_factory=list)
    affected_obligations: List[str] = Field(default_factory=list)
    assessment: InterpretationVerdict
    risks: List[str] = Field(default_factory=list)
    corrected_interpretation: str = ""
    legal_basis_refs: List[str] = Field(default_factory=list)
    explanation: str
    confidence: Confidence
 # ---------------------------------------------------------------------------
 # API request / response envelopes
 # ---------------------------------------------------------------------------
 class ScopeRequest(BaseModel):
    product_profile: ProductProfile
 class ScopeResponse(BaseModel):
    regulatory_scope: RegulatoryScope
    missing_facts: List[str] = Field(default_factory=list)
    confidence: Confidence
 class ObligationsRequest(BaseModel):
    product_profile: ProductProfile
    regulatory_scope: Optional[RegulatoryScope] = None
 class ObligationsResponse(BaseModel):
    applicable_obligations: List[ApplicableObligation] = Field(default_factory=list)
    overlaps: List[ObligationOverlap] = Field(default_factory=list)
    excluded_obligations: List[str] = Field(default_factory=list)
    evidence_for_multiple: Dict[str, List[str]] = Field(default_factory=dict)
 class ImplementationReasoningRequest(BaseModel):
    product_profile: ProductProfile
    customer_claim: str
 class ImplementationReasoningResponse(BaseModel):
    claim: CustomerImplementationClaim
    mappings: List[ClaimObligationMapping] = Field(default_factory=list)
    missing_evidence: List[str] = Field(default_factory=list)
    summary: str = ""
    # Makes the Welt-1 boundary explicit: this is advisory claim-mapping, not a
    # conformity verdict (that is ComplianceStatus in the Execution Graph).
    disclaimer: str = ""
 class InterpretationRequest(BaseModel):
    product_profile: Optional[ProductProfile] = None
    customer_interpretation: str
 class InterpretationResponse(BaseModel):
    assessment: InterpretationVerdict
    affected_regulations: List[str] = Field(default_factory=list)
    affected_obligations: List[str] = Field(default_factory=list)
    corrected_interpretation: str = ""
    risks: List[str] = Field(default_factory=list)
    legal_basis_refs: List[str] = Field(default_factory=list)
    explanation: str = ""
    confidence: Confidence = Confidence.MEDIUM
@@ -0,0 +1,136 @@
 """Scope discovery engine (spec Modus 1).
 Answers "which regulations apply to my product?" — and, crucially, never says
 "X applies" without the triggers, and never hides a missing fact behind a false
 verdict. Pure rule evaluation, deterministic.
 """
 from __future__ import annotations
 from typing import List, Optional
 from .enums import ApplicabilityStatus, Confidence
 from .predicates import Condition, evaluate, true_leaves, unknown_fields
 from .rules_regulations import REGULATION_RULES, FIELD_LABELS, RegulationRule
 from .schemas import (
    ApplicableRegulation,
    ExcludedRegulation,
    ProductProfile,
    RegulatoryScope,
    UncertainRegulation,
 )
 _DOWNGRADE = {Confidence.HIGH: Confidence.MEDIUM, Confidence.MEDIUM: Confidence.LOW, Confidence.LOW: Confidence.LOW}
 def _fields_in(condition: Optional[Condition]) -> List[str]:
    if condition is None:
        return []
    if isinstance(condition, tuple):
        return [condition[0]]
    out: List[str] = []
    for c in condition.get("all") or condition.get("any") or []:
        out.extend(_fields_in(c))
    return out
 def _trigger_facts(rule: RegulationRule, profile: ProductProfile) -> List[str]:
    labels: List[str] = []
    for leaf in true_leaves(rule.trigger, profile):
        label = FIELD_LABELS.get(leaf[0])
        if label and label not in labels:
            labels.append(label)
    return labels
 def _missing_prompts(rule: RegulationRule, profile: ProductProfile) -> List[str]:
    fields = list(dict.fromkeys(rule.required_facts + _fields_in(rule.trigger)))
    unknown = unknown_fields(fields, profile)
    prompts: List[str] = []
    for f in unknown:
        prompt = rule.fact_prompts.get(f)
        if prompt and prompt not in prompts:
            prompts.append(prompt)
    return prompts
 def discover_scope(profile: ProductProfile) -> RegulatoryScope:
    scope = RegulatoryScope(product_profile_id=profile.product_profile_id)
    for rule in REGULATION_RULES:
        role_value = profile.manufacturer_role.value if profile.manufacturer_role is not None else None
        role_excluded = role_value is not None and role_value in rule.excludable_roles
        trig = evaluate(rule.trigger, profile)
        missing = _missing_prompts(rule, profile)
        if role_excluded:
            scope.excluded_regulations.append(
                ExcludedRegulation(
                    regulation_id=rule.regulation_id,
                    name=rule.name,
                    reason="Rolle '%s' ist von dieser Regulierung nicht unmittelbar adressiert." % role_value,
                )
            )
            continue
        if trig is True:
            conf = Confidence.MEDIUM if rule.inferred else rule.confidence_when_applicable
            status = (
                ApplicabilityStatus.PARTIALLY_APPLICABLE if rule.inferred else ApplicabilityStatus.APPLICABLE
            )
            unresolved = unknown_fields(rule.required_facts, profile)
            if unresolved:
                conf = _DOWNGRADE[conf]
                for f in unresolved:
                    prompt = rule.fact_prompts.get(f)
                    if prompt and prompt not in scope.missing_facts:
                        scope.missing_facts.append(prompt)
            scope.applicable_regulations.append(
                ApplicableRegulation(
                    regulation_id=rule.regulation_id,
                    name=rule.name,
                    applicability_status=status,
                    trigger_facts=_trigger_facts(rule, profile),
                    legal_basis_refs=rule.legal_basis_refs,
                    confidence=conf,
                    explanation=rule.summary,
                )
            )
        elif trig is None:
            scope.uncertain_regulations.append(
                UncertainRegulation(
                    regulation_id=rule.regulation_id,
                    name=rule.name,
                    missing_facts=missing,
                    explanation=rule.summary,
                )
            )
            for m in missing:
                if m not in scope.missing_facts:
                    scope.missing_facts.append(m)
        else:  # trig is False -> definitively excluded by a known fact
            scope.excluded_regulations.append(
                ExcludedRegulation(
                    regulation_id=rule.regulation_id,
                    name=rule.name,
                    reason="Auslösende Voraussetzungen sind anhand der bekannten Fakten nicht erfüllt.",
                )
            )
    scope.confidence = _overall_confidence(scope)
    scope.reasoning_summary = _summary(scope)
    return scope
 def _overall_confidence(scope: RegulatoryScope) -> Confidence:
    if scope.applicable_regulations and not scope.uncertain_regulations and not scope.missing_facts:
        return Confidence.HIGH
    if scope.applicable_regulations:
        return Confidence.MEDIUM
    return Confidence.LOW
 def _summary(scope: RegulatoryScope) -> str:
    applicable = ", ".join(r.regulation_id for r in scope.applicable_regulations) or "—"
    uncertain = ", ".join(r.regulation_id for r in scope.uncertain_regulations) or "—"
    return "Wahrscheinlich anwendbar: %s. Unsicher (fehlende Fakten): %s." % (applicable, uncertain)
@@ -0,0 +1,104 @@
 """Deterministic taxonomy for normalising free-text customer claims.
 Capability names echo the planned Obligation -> Capability layer of the
 Compliance Execution Graph (memory `project_compliance_graph.md`), so the
 reasoning layer's claim capabilities line up with the registry's capabilities.
 Matching is lowercase substring matching — deterministic, no LLM, no RAG.
 """
 from __future__ import annotations
 from typing import Dict, List
 # capability -> trigger substrings (German + English), matched lowercase.
 CAPABILITY_KEYWORDS: Dict[str, List[str]] = {
    "software_bill_of_materials": [
        "sbom", "stückliste", "stueckliste", "bill of materials", "komponentenliste",
    ],
    "secure_updates": ["update", "patch", "aktualisier", "release", "rollout"],
    "software_integrity": ["signier", "signatur", "signed", "integrität", "integritaet", "hash"],
    "vulnerability_management": [
        "schwachstelle", "vulnerab", "cve", "schwachstellenmanagement", "vuln",
    ],
    "coordinated_disclosure": [
        "disclosure", "offenlegung", "security.txt", "responsible disclosure",
    ],
    "incident_reporting": [
        "incident", "vorfall", "behörde", "behoerde", "csirt", "meldepflicht", "an die behörde",
    ],
    "authentication": [
        "authentifizier", "login", "passwort", "password", "mfa", "2fa", "anmeldung",
    ],
    "secure_by_default": [
        "härtung", "haertung", "hardening", "default", "standardkonfig",
        "sichere konfiguration", "angriffsfläche", "angriffsflaeche",
    ],
    "security_logging": ["logging", "log ", "logs", "protokoll", "audit-trail", "ereignisprotokoll"],
    "secure_communication": ["verschlüssel", "verschluessel", "encryption", "tls", "vpn", "ssl"],
    "risk_assessment": [
        "risikoanalyse", "risikobeurteil", "risk assessment", "gefährdungsbeurteil",
        "gefaehrdungsbeurteil", "bedrohungsanalyse", "threat model",
    ],
    "technical_documentation": [
        "dokumentation", "technische unterlagen", "betriebsanleitung", "handbuch", "documentation",
    ],
    "conformity_assessment": ["konformität", "konformitaet", "conformity", "baumuster", "ce-kenn"],
    "functional_safety": [
        "performance level", "sil ", "iso 13849", "funktionale sicherheit", "safety control",
    ],
    "data_access_provision": [
        "datenzugang", "data access", "datenportabilität", "datenexport", "data export",
    ],
 }
 # capability -> broader compliance topics it touches (spec related_topics).
 CAPABILITY_TOPICS: Dict[str, List[str]] = {
    "software_bill_of_materials": ["component_transparency", "supply_chain", "vulnerability_management"],
    "secure_updates": ["secure_updates", "vulnerability_remediation", "release_management"],
    "software_integrity": ["secure_updates", "supply_chain", "tamper_protection"],
    "vulnerability_management": ["vulnerability_handling", "monitoring", "patch_management"],
    "coordinated_disclosure": ["vulnerability_handling", "transparency"],
    "incident_reporting": ["incident_handling", "authority_notification"],
    "authentication": ["access_control", "identity"],
    "secure_by_default": ["hardening", "attack_surface", "configuration"],
    "security_logging": ["monitoring", "forensics", "incident_handling"],
    "secure_communication": ["confidentiality", "integrity", "remote_access"],
    "risk_assessment": ["risk_management", "secure_by_design"],
    "technical_documentation": ["documentation", "conformity"],
    "conformity_assessment": ["conformity", "ce_marking"],
    "functional_safety": ["machine_safety", "control_systems"],
    "data_access_provision": ["data_sharing", "portability"],
 }
 # qualifier -> substrings that signal a weak/incomplete implementation.
 QUALIFIER_KEYWORDS: Dict[str, List[str]] = {
    "reactive": [
        "wenn kunden", "wenn ein kunde", "nach meldung", "auf anfrage", "auf nachfrage",
        "nur wenn", "reaktiv", "wenn fehler", "when customers", "on request", "when reported",
        "ad hoc", "ad-hoc", "bei bedarf",
    ],
    "manual": ["manuell", "von hand", "manual", "händisch", "haendisch"],
    "planned": [
        "geplant", "in planung", "wollen wir", "planen wir", "noch nicht", "zukünftig", "künftig",
    ],
    "absent": ["haben wir nicht", "gibt es nicht", "nicht vorhanden", "keinen prozess", "keine"],
 }
 def match_capabilities(text: str) -> List[str]:
    low = text.lower()
    return [cap for cap, kws in CAPABILITY_KEYWORDS.items() if any(k in low for k in kws)]
 def match_qualifiers(text: str) -> List[str]:
    low = text.lower()
    return [q for q, kws in QUALIFIER_KEYWORDS.items() if any(k in low for k in kws)]
 def topics_for(capabilities: List[str]) -> List[str]:
    out: List[str] = []
    for cap in capabilities:
        for t in CAPABILITY_TOPICS.get(cap, []):
            if t not in out:
                out.append(t)
    return out
--- a/Show More
+++ b/Show More