Merge pull request 'POST /onboarding/advisor-start — expose the Advisor at runtime (#58 )' (#47 ) from feat/onboarding-advisor-endpoint into main

feat: POST /onboarding/advisor-start — expose the Smart Onboarding Advisor at runtime (#58 )
This exposes the existing Smart Onboarding Advisor through a runtime endpoint; it does not add new reasoning logic. Tightly scoped: adapter boundary + endpoint, no big frontend, no persistence, no empirical learning, no new scanners, no LLM. POST /onboarding/advisor-start : (company + certifications + target + scanner_findings[ProducedSignal]) -> Normalizer -> Silent Knowledge Pass -> Advisor -> { silent_intake_summary, inferred_assumptions, rejected_assumptions, top_5_questions, capability_delta, top_measures, evidence_requests, completeness_summary, auto_detected, headline } GET /onboarding/targets : the supported target ids (CRA, TISAX, MDR, Environmental) compliance/services/onboarding_service.py is the app-caller: it loads the curated knowledge (hypothesis library, signal vocabulary + map, the target's required capabilities) once and calls the pure, tested orchestration (normalize_signals -> silent_intake -> advisor_start). The scanner ADAPTER boundary is the ProducedSignal format the request carries — existing scanners emit it, no new scanners. Thin handler (<30 LOC), registered in the auto-load list. No DB. Additive to the OpenAPI contract (contract test is additive-friendly; baseline regenerates on CI/py3.12). First deployable runtime feature -> dev deploy + smoke. mypy --strict clean, 22 onboarding tests pass, check-loc 0.
2026-06-28 15:14:05 +02:00 · 2026-06-28 15:14:00 +02:00 · 2026-06-28 14:51:08 +02:00 · 2026-06-28 14:49:57 +02:00 · 2026-06-28 14:34:31 +02:00 · 2026-06-28 14:34:27 +02:00
260 changed files with 38297 additions and 144 deletions
@@ -1,4 +1,6 @@
 # Build stage
 # ci-retrigger 2026-06-27: transient registry.meghsakha.com 502 on push (Runde 1) + last-build
 # tag-bug skipped the rerun (Runde 2). No logic change — forces detect-changes to rebuild ai-sdk.
 FROM golang:1.24-alpine AS builder
 WORKDIR /app
@@ -33,6 +35,14 @@ COPY migrations/ ./migrations/
 # Copy policy files (YAML rules)
 COPY policies/ ./policies/
 # Copy Compliance Execution Graph data (file-backed: Registry join-key copy + accepted control
 # mappings + evidence requirements) consumed by GET /sdk/v1/compliance/obligation-status.
 # data/obligations/obligation_join_keys.json is a synced copy of the repo-root Registry contract
 # (the Obligation Registry owns the canonical file) — re-sync it when the Registry grows.
 COPY data/control_mappings/ ./data/control_mappings/
 COPY data/evidence_requirements/ ./data/evidence_requirements/
 COPY data/obligations/ ./data/obligations/
 # Create non-root user
 RUN adduser -D -u 1000 appuser
 USER appuser
@@ -34,6 +34,8 @@ func main() {
 		cmdEcho(os.Args[2:])
 	case "hierarchy":
 		cmdHierarchy(os.Args[2:])
 	case "propose":
 		cmdPropose(os.Args[2:])
 	default:
 		usage()
 		os.Exit(2)
@@ -41,7 +43,7 @@ func main() {
 }
 func usage() {
-	fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy> [args]")
+	fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy|propose> [args]")
 }
 func cmdReachability(_ []string) {
@@ -0,0 +1,188 @@
 package main
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"os"
 	"strconv"
 	"strings"
 	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
 	"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
 	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
 )
 type narrativeInput struct {
 	MachineType  string   `json:"machine_type"`
 	Narrative    string   `json:"narrative"`
 	MachineTypes []string `json:"machine_types,omitempty"`
 }
 // cmdPropose — Method P: offline dedup-candidate proposer.
 //
 //	iace-audit propose <narrative.json> [<ground-truth.json>]
 //
 // Detect near-duplicate patterns, screen survivors against a ground truth (if
 // given), judge them (heuristic by default, LLM when enabled), and write the
 // human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
 // writes a report and never mutates the pattern library.
 //
 // Env:
 //
 //	IACE_PROPOSE_THRESHOLD  candidate score threshold (default 0.30)
 //	IACE_PROPOSE_LLM=1      use the offline LLM judge instead of the heuristic
 //	OLLAMA_URL              ollama base URL (default http://localhost:11434)
 //	SELF_HOSTED_LLM_MODEL   model name (default qwen2.5:32b-instruct)
 func cmdPropose(args []string) {
 	if len(args) < 1 {
 		fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
 		os.Exit(2)
 	}
 	var in narrativeInput
 	must(readJSONFile(args[0], &in))
 	if in.Narrative == "" {
 		fmt.Fprintln(os.Stderr, "propose: narrative is empty")
 		os.Exit(2)
 	}
 	var gt *iace.GroundTruth
 	if len(args) >= 2 {
 		var g iace.GroundTruth
 		must(readJSONFile(args[1], &g))
 		gt = &g
 	}
 	threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
 	hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
 	candidates := iace.FindDedupCandidates(fired, threshold)
 	byID := make(map[string]iace.PatternMatch, len(fired))
 	for _, pm := range fired {
 		byID[pm.PatternID] = pm
 	}
 	judge := selectJudge(in.MachineType)
 	ctx := context.Background()
 	var proposals []iace.JudgedProposal
 	blocked := 0
 	for _, c := range candidates {
 		var sr iace.ScreenResult
 		if gt != nil {
 			sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
 			if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
 				blocked++
 				continue
 			}
 		}
 		v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
 		proposals = append(proposals, iace.JudgedProposal{
 			Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
 		})
 	}
 	writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
 	writeJSON("audit-reports/proposals.json", proposals)
 	// Type 2: foreign-framing candidates (zone terms with no narrative echo).
 	framing := iace.FindFramingCandidates(fired, in.Narrative, envFloat("IACE_FRAMING_MIN_ORPHAN", 0.6))
 	writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
 	writeJSON("audit-reports/framing.json", framing)
 	// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
 	// names as a whole word, with a dominant shared required tag).
 	vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
 	var vgaps []audit.DictionarySuggestion
 	for _, s := range vocab.SuggestedDictionaryEntries {
 		if len(s.SuggestedTags) > 0 {
 			vgaps = append(vgaps, s)
 		}
 	}
 	writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
 	writeJSON("audit-reports/vocab.json", vgaps)
 	// Type 4: coverage blind-spots (empty ISO 12100 groups A-G) + LLM expansion.
 	gaps := iace.FindCoverageGaps(hazards)
 	var missing []iace.MissingHazard
 	if lj, ok := judge.(iace.LLMJudge); ok {
 		missing = iace.ProposeMissingHazards(ctx, lj.Completer, in.MachineType, in.Narrative, hazards, gaps)
 	}
 	writeText("audit-reports/coverage.md", iace.RenderCoverageQueue(in.MachineType, gaps, missing))
 	writeJSON("audit-reports/coverage.json", gaps)
 	printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
 		"fired_patterns": len(fired),
 		"candidates":     len(candidates),
 		"in_queue":       len(proposals),
 		"gt_blocked":     blocked,
 		"framing_flags":  len(framing),
 		"vocab_gaps":     len(vgaps),
 		"coverage_gaps":  len(gaps),
 	})
 	if gt == nil {
 		fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
 	}
 }
 func selectJudge(machineClass string) iace.CandidateJudge {
 	if os.Getenv("IACE_PROPOSE_LLM") != "1" {
 		return iace.HeuristicJudge{}
 	}
 	base := envStr("OLLAMA_URL", "http://localhost:11434")
 	model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
 	reg := llm.NewProviderRegistry("ollama", "")
 	reg.Register(llm.NewOllamaAdapter(base, model))
 	fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
 	return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
 }
 func readJSONFile(path string, v any) error {
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		return err
 	}
 	return json.Unmarshal(raw, v)
 }
 func writeText(path, content string) {
 	_ = os.MkdirAll("audit-reports", 0o755)
 	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
 		fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
 		return
 	}
 	fmt.Println("→ wrote", path)
 }
 func envStr(key, def string) string {
 	if v := os.Getenv(key); v != "" {
 		return v
 	}
 	return def
 }
 func envFloat(key string, def float64) float64 {
 	if v := os.Getenv(key); v != "" {
 		if f, err := strconv.ParseFloat(v, 64); err == nil {
 			return f
 		}
 	}
 	return def
 }
 func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
 	fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
 	for i, s := range entries {
 		tag := "<tag>"
 		if len(s.SuggestedTags) > 0 {
 			tag = s.SuggestedTags[0]
 		}
 		fmt.Fprintf(&b, "## %d. \"%s\"  → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
 		fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
 		fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
 	}
 	return b.String()
 }
@@ -0,0 +1,8 @@
 // Control-Mapping: CRA Annex I -> NIST SP 800-53 Rev. 5. Eine Zeile = ein Mapping (Schema: ControlMapping).
 // Reviewt 2026-06-25 (benjamin): 3 accepted, mapping_type=primary_implementation (kanonische Primaer-Control je Anforderung).
 // Heimat der OWASP-Rejects (2)(e)/(2)(l)/(2)(i): dort war OWASP nicht der Zielstandard ("Mapping ueber NIST/BSI erforderlich").
 // related-Controls (SC-3(3), RA-5, AC-6, SI-16, ...) folgen separat als mapping_type=supports — hier nur der kanonische Einstieg.
 // obligation_id (Registry-Handoff #4 adoptiert, #6 auf CORE re-pointet 2026-06-26): SI-7->software_integrity_protection (CORE (2)(f)), SI-2->provide_security_updates, CM-7->attack_surface_minimization (CORE (2)(j)). Join exakt. Die domaenen-scoped IDs (signed_update_integrity, remote_access_attack_surface_min) bleiben gueltige Obligations und zeigen per specializes->CORE auf diese Ziele.
 {"source_norm": "CRA Annex I Part I (2)(e) — Integritaet", "source_role": "operational_requirement", "target_framework": "NIST SP 800-53", "target_control": "SI-7", "mapping_type": "primary_implementation", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "NIST SI-7 = Software, Firmware, and Information Integrity — kanonische Integritaetskontrolle (Signaturpruefung, Manipulationserkennung).", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Primaere Implementierung der CRA-Integritaetsanforderung; OWASP war hier kein passender Treffer. Related (spaeter, supports): SA-10, CM-14.", "version": "2026-06-25", "obligation_id": "software_integrity_protection"}
 {"source_norm": "CRA Annex I Part I (2)(l) — Sichere Updates", "source_role": "operational_requirement", "target_framework": "NIST SP 800-53", "target_control": "SI-2", "mapping_type": "primary_implementation", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "NIST SI-2 = Flaw Remediation — kanonische Update-/Patch-Kontrolle.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Primaere Implementierung der CRA-Update-Anforderung. Related (spaeter, supports): RA-5, CM-3, SA-11.", "version": "2026-06-25", "obligation_id": "provide_security_updates"}
 {"source_norm": "CRA Annex I Part I (2)(i) — Angriffsflaeche minimieren", "source_role": "operational_requirement", "target_framework": "NIST SP 800-53", "target_control": "CM-7", "mapping_type": "primary_implementation", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "NIST CM-7 = Least Functionality — Deaktivierung nicht benoetigter Ports/Dienste/Funktionen.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "CM-7 als Primaer-Control fuer Angriffsflaeche (nicht SC-3(3)). Related (spaeter, supports): SC-3(3), AC-6, SI-16.", "version": "2026-06-25", "obligation_id": "attack_surface_minimization"}
@@ -2,13 +2,13 @@
 // Reviewt 2026-06-25 (benjamin): 7 accepted, 13 rejected. accepted = Audit-Wahrheit (Advisor nutzt acceptedOnly).
 // rejected bleiben als Audit-Spur ("warum verworfen"). KEIN confidence — kuratiert = fachliche Feststellung.
 // Architekturbeweis: CRA -> OWASP fuer AppSec/Auth/Crypto/Logging; Ops/Update/Attack-Surface/Integritaet -> NIST/BSI.
-{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V6.3.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V6 = Authentication.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V6 = Authentication, sauberer Treffer fuer Zugriffsschutz/Authentisierung.", "version": "2026-06-25"}
+{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V6.3.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V6 = Authentication.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V6 = Authentication, sauberer Treffer fuer Zugriffsschutz/Authentisierung.", "version": "2026-06-25", "obligation_id": "user_authentication_required"}
-{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V6.1.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V6 = Authentication.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V6 = Authentication, sauberer Treffer fuer Zugriffsschutz/Authentisierung.", "version": "2026-06-25"}
+{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V6.1.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V6 = Authentication.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V6 = Authentication, sauberer Treffer fuer Zugriffsschutz/Authentisierung.", "version": "2026-06-25", "obligation_id": "user_authentication_required"}
-{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V11.2.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V11 = Cryptography.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Korrektur von V14: V11 = Cryptography, richtiger Bereich fuer Verschluesselung.", "version": "2026-06-25"}
+{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V11.2.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V11 = Cryptography.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Korrektur von V14: V11 = Cryptography, richtiger Bereich fuer Verschluesselung.", "version": "2026-06-25", "obligation_id": "credential_confidentiality_protection"}
-{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V11.7.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V11.7 = Key Management.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Korrektur von V14: V11.7 = Key Management fuer Verschluesselung/Schluesselverwaltung.", "version": "2026-06-25"}
+{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V11.7.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V11.7 = Key Management.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Korrektur von V14: V11.7 = Key Management fuer Verschluesselung/Schluesselverwaltung.", "version": "2026-06-25", "obligation_id": "auth_key_management"}
-{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.3.3", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25"}
+{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.3.3", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25", "obligation_id": "event_logging_security_events"}
-{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.3.4", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25"}
+{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.3.4", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25", "obligation_id": "event_logging_security_events"}
-{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.1.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25"}
+{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.1.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25", "obligation_id": "event_logging_security_events"}
 {"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.2.4", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, kein Auth — verworfen.", "version": "2026-06-25"}
 {"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.2.4", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, Crypto gehoert zu V11 — verworfen.", "version": "2026-06-25"}
 {"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.3.2", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, Crypto gehoert zu V11 — verworfen.", "version": "2026-06-25"}
@@ -0,0 +1,10 @@
 // Evidence-Requirements je NIST-SP-800-53-Control (Schema: EvidenceRequirement). Eine Zeile = eine geforderte Evidenz.
 // WICHTIG: evidence_type ist FRAMEWORK-AGNOSTISCH (geteilter Katalog config_export/test_report/repo_scan/sbom/...) —
 // dieselben Typen tragen CRA, NIST, ISO 27001, IEC 62443, BSI. (framework, control) ist nur der Verweis, nicht der Typ.
 // Stand 2026-06-25, Basis: die 3 accepted CRA->NIST primary_implementation-Mappings (SI-7 Integritaet, SI-2 Updates, CM-7 Angriffsflaeche).
 {"framework": "NIST SP 800-53", "control": "SI-7", "evidence_type": "sbom", "evidence_source": "ci", "freshness_requirement": "per_release", "required": true, "rationale": "SBOM weist die Integritaet/Herkunft der Software-Bestandteile nach (bekannte, unmanipulierte Komponenten).", "version": "2026-06-25"}
 {"framework": "NIST SP 800-53", "control": "SI-7", "evidence_type": "config_export", "evidence_source": "github", "freshness_requirement": "per_release", "required": true, "rationale": "Secure-Boot-/Code-Signing-Konfiguration als Nachweis der Integritaetspruefung.", "version": "2026-06-25"}
 {"framework": "NIST SP 800-53", "control": "SI-2", "evidence_type": "config_export", "evidence_source": "github", "freshness_requirement": "per_release", "required": true, "rationale": "Konfiguration des sicheren Update-/Patch-Mechanismus (signierte/automatische Updates) als technischer Nachweis.", "version": "2026-06-25"}
 {"framework": "NIST SP 800-53", "control": "SI-2", "evidence_type": "test_report", "evidence_source": "ci", "freshness_requirement": "per_release", "required": true, "rationale": "Update-/Patch-Verifikationstest (CI) belegt, dass Sicherheitsupdates greifen.", "version": "2026-06-25"}
 {"framework": "NIST SP 800-53", "control": "CM-7", "evidence_type": "config_export", "evidence_source": "github", "freshness_requirement": "per_release", "required": true, "rationale": "Konfiguration deaktivierter Ports/Dienste/Funktionen als Nachweis minimierter Angriffsflaeche.", "version": "2026-06-25"}
 {"framework": "NIST SP 800-53", "control": "CM-7", "evidence_type": "repo_scan", "evidence_source": "scanner", "freshness_requirement": "per_release", "required": true, "rationale": "Angriffsflaechen-Scan (offene Ports/Dienste) als Nachweis tatsaechlich minimierter Angriffsflaeche.", "version": "2026-06-25"}
@@ -0,0 +1,846 @@
 {
 "schema_version": "obligation_join_keys_v1",
 "contract": "obligation_id ist der stabile Join-Key. Legal Knowledge Graph haengt citation_spans an obligation_id; Compliance Execution Graph mappt control_mapping.source_norm -> obligation_id. Interim-Bruecke = citation_units. obligation_id NIE neu vergeben (re-link).",
 "count": 95,
 "obligation_ids": [
  {
   "obligation_id": "sbom_creation",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part II (1)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "sbom_dependency_coverage",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Art. 3(36) i.V.m. Annex I Part II (1)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "sbom_format_standard",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part II (1)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "sbom_maintenance_update",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part II (1)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "sbom_completeness_verification",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "sbom_tooling_automation",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "IMPLEMENTATION"
  },
  {
   "obligation_id": "sbom_access_provision",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "sbom_authority_provision",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Art. 31 / Annex I Part II (1)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "sbom_confidentiality",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Art. 31(4)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "sbom_supply_chain_contracts",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "sbom_technical_documentation",
   "regulation": "CRA",
   "family": "sbom",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Art. 31 i.V.m. Annex VII"
   ],
   "source_role": "EVIDENCE"
  },
  {
   "obligation_id": "vuln_identification_inventory",
   "regulation": "CRA",
   "family": "vuln",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part II (1)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "vuln_assessment_prioritization",
   "regulation": "CRA",
   "family": "vuln",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part II (1)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "vuln_remediation_patching",
   "regulation": "CRA",
   "family": "vuln",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part II (2) & (8)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "vuln_handling_process",
   "regulation": "CRA",
   "family": "vuln",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Article 13(8) & Annex VII"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "coordinated_vulnerability_disclosure",
   "regulation": "CRA",
   "family": "vuln",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part II (5)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "exploited_vuln_reporting_authorities",
   "regulation": "CRA",
   "family": "vuln",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Article 14 & Article 16"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "vuln_info_dissemination_users",
   "regulation": "CRA",
   "family": "vuln",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part II (4) & (6)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "attack_surface_minimization",
   "regulation": "CRA",
   "family": "core",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part I (2)(j)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "software_integrity_protection",
   "regulation": "CRA",
   "family": "core",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part I (2)(f)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "user_authentication_required",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(d)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "authentication_policy_documented",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "auth_exceptions_documented",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "mfa_required",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "step_up_authentication",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "privileged_op_reauth",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "strong_crypto_authentication",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(e)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "credential_lifecycle_management",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "credential_confidentiality_protection",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(e)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "password_policy",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "no_default_credentials",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(a)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "account_lockout_failed_attempts",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "server_side_validation",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "session_binding_management",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "reauth_after_inactivity",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "token_validation_lifecycle",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "mutual_authentication",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "revocation_check",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "encrypted_auth_channel",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(e)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "tls_certificate_auth",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "service_to_service_auth",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "auth_key_management",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "biometric_authentication",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "federated_auth_assertions",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "separate_authn_authz",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_authentication",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "supplier_access_auth",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "personal_admin_accounts",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "firmware_software_authentication",
   "regulation": "CRA",
   "family": "authentication",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(c)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "event_logging_security_events",
   "regulation": "CRA",
   "family": "logging",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part I (2)(k)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "access_control_event_logging",
   "regulation": "CRA",
   "family": "logging",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part I (2)(k)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "audit_trail_admin_actions",
   "regulation": "CRA",
   "family": "logging",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part I (2)(k)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "log_integrity_immutability",
   "regulation": "CRA",
   "family": "logging",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part I (2)(k)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "log_access_control_protection",
   "regulation": "CRA",
   "family": "logging",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part I (2)(k)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "log_retention_archival",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "centralized_log_management",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "log_monitoring_alerting",
   "regulation": "CRA",
   "family": "logging",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I Part I (2)(k)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "log_data_minimization_privacy",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "log_format_standardization",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "log_timestamp_synchronization",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "logging_availability_resilience",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "logging_thread_safety_correctness",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "IMPLEMENTATION"
  },
  {
   "obligation_id": "logging_library_supply_chain",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "logging_config_management",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "logging_governance_roles",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "incident_response_logging",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "log_transmission_security",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "network_traffic_logging",
   "regulation": "CRA",
   "family": "logging",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_control_least_privilege",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (1)(2)(d)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "remote_access_confidentiality_integrity",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (1)(2)(b)(c)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "remote_session_management",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_mfa",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_encryption",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "reject_insecure_remote_protocols",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_logging_audit",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (1)(2)(g)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "remote_access_user_validation_ot",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_training",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_architecture_design",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_attack_surface_min",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (1)(2)(a)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "remote_access_vuln_patch_mgmt",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(1)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "remote_access_threat_detection",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_maintenance_governance",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "temporary_remote_access_mgmt",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_data_export_protection",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "component_remote_interface_security",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "remote_access_fallback_concept",
   "regulation": "CRA",
   "family": "remote_access",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "provide_security_updates",
   "regulation": "CRA",
   "family": "updates",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(c)",
    "Art. 13"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "support_period_maintenance",
   "regulation": "CRA",
   "family": "updates",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Art. 13(8)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "signed_update_integrity",
   "regulation": "CRA",
   "family": "updates",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (1)(3)(f)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "trusted_update_source",
   "regulation": "CRA",
   "family": "updates",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (1)(3)(d)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "update_testing_validation",
   "regulation": "CRA",
   "family": "updates",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "update_rollback",
   "regulation": "CRA",
   "family": "updates",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "GUIDANCE"
  },
  {
   "obligation_id": "automatic_updates_optout",
   "regulation": "CRA",
   "family": "updates",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (2)(c)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "update_risk_assessment",
   "regulation": "CRA",
   "family": "updates",
   "tier": "LEGAL_MINIMUM",
   "citation_units": [
    "Annex I (1)(2)"
   ],
   "source_role": "LEGAL_BASIS"
  },
  {
   "obligation_id": "secure_modification_control",
   "regulation": "CRA",
   "family": "updates",
   "tier": "BEST_PRACTICE",
   "citation_units": [],
   "source_role": "IMPLEMENTATION"
  }
 ]
 }
@@ -0,0 +1,126 @@
 package handlers
 import (
 	"net/http"
 	"strings"
 	"github.com/gin-gonic/gin"
 	"github.com/breakpilot/ai-compliance-sdk/internal/ucca"
 )
 // ComplianceGraphHandlers serves the read-only Compliance Execution Graph
 // (Regulation -> Obligation -> Control -> Evidence) over the file-backed bridge artifacts.
 // It is intentionally SEPARATE from the DB-backed ObligationsHandlers: this is the curated
 // cross-session graph (Registry join keys + accepted control mappings + evidence requirements),
 // loaded once at startup. Fail-closed: if the graph could not load, every request answers 503.
 type ComplianceGraphHandlers struct {
 	joins    *ucca.ObligationJoinKeys
 	mappings *ucca.ControlMappingSet
 	evidence *ucca.EvidenceRequirementSet
 	loadErr  error
 }
 // NewComplianceGraphHandlers loads the graph once. Construction never fails; a load error is
 // retained and surfaced as 503 per request (matches the codebase's load-warn-continue startup).
 func NewComplianceGraphHandlers() *ComplianceGraphHandlers {
 	joins, mappings, evidence, err := ucca.LoadComplianceGraph()
 	return &ComplianceGraphHandlers{joins: joins, mappings: mappings, evidence: evidence, loadErr: err}
 }
 // LoadError exposes a startup load failure so the wiring can log a warning.
 func (h *ComplianceGraphHandlers) LoadError() error { return h.loadErr }
 // RegisterRoutes mounts the compliance-graph routes under /compliance.
 func (h *ComplianceGraphHandlers) RegisterRoutes(r *gin.RouterGroup) {
 	g := r.Group("/compliance")
 	g.GET("/obligation-status", h.ObligationStatus)
 }
 type cgControlDTO struct {
 	Framework        string   `json:"framework"`
 	Control          string   `json:"control"`
 	MappingType      string   `json:"mapping_type"`
 	EvidenceRequired []string `json:"evidence_required"`
 	EvidenceStatus   string   `json:"evidence_status"` // missing | partial | present | none_required
 }
 type cgStatusResponse struct {
 	ObligationID  string         `json:"obligation_id"`
 	OverallStatus string         `json:"overall_status"` // unknown_obligation | unmapped | not_assessed | open | met
 	LegalBasis    []string       `json:"legal_basis,omitempty"`
 	CitationSpans string         `json:"citation_spans"` // "pending" until the Legal-KG attaches spans
 	Controls      []cgControlDTO `json:"controls"`
 	Note          string         `json:"note,omitempty"`
 }
 // ObligationStatus answers GET /sdk/v1/compliance/obligation-status?obligation_id=...
 //
 // It NEVER asserts fulfillment automatically. With no evidence collection wired (MVP), a mapped
 // obligation is "not_assessed" and every required evidence is "missing" — the honest picture is
 // "required vs present evidence", not "a document exists". Fail-closed otherwise:
 //   - no obligation_id          -> 400
 //   - graph not loaded          -> 503
 //   - id not in the Registry    -> 200 overall_status=unknown_obligation
 //   - mapped but no control yet  -> 200 overall_status=unmapped
 func (h *ComplianceGraphHandlers) ObligationStatus(c *gin.Context) {
 	if h.loadErr != nil {
 		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "compliance graph unavailable", "detail": h.loadErr.Error()})
 		return
 	}
 	obID := strings.TrimSpace(c.Query("obligation_id"))
 	if obID == "" {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "obligation_id query parameter required"})
 		return
 	}
 	resp := cgStatusResponse{ObligationID: obID, CitationSpans: "pending", Controls: []cgControlDTO{}}
 	if h.joins.FindObligation(obID) == nil {
 		resp.OverallStatus = "unknown_obligation"
 		resp.Note = "obligation_id not in the Registry join-key contract"
 		c.JSON(http.StatusOK, resp)
 		return
 	}
 	// MVP: hasEvidence=nil -> no collection wired -> all required evidence counts as missing.
 	st := ucca.AssessObligationStatus(h.joins, h.mappings, h.evidence, obID, nil)
 	resp.LegalBasis = st.LegalBasis
 	if len(st.Controls) == 0 {
 		resp.OverallStatus = "unmapped"
 		resp.Note = "no accepted control maps to this obligation yet"
 		c.JSON(http.StatusOK, resp)
 		return
 	}
 	for _, cs := range st.Controls {
 		types := make([]string, 0, len(cs.RequiredEvidence))
 		for _, e := range cs.RequiredEvidence {
 			types = append(types, e.EvidenceType)
 		}
 		resp.Controls = append(resp.Controls, cgControlDTO{
 			Framework:        cs.Framework,
 			Control:          cs.Control,
 			MappingType:      cs.MappingType,
 			EvidenceRequired: types,
 			EvidenceStatus:   cgEvidenceStatus(len(cs.RequiredEvidence), len(cs.MissingEvidence)),
 		})
 	}
 	// No fulfillment claim without real evidence collection.
 	resp.OverallStatus = "not_assessed"
 	resp.Note = "evidence collection not wired (MVP) — fulfillment not asserted"
 	c.JSON(http.StatusOK, resp)
 }
 func cgEvidenceStatus(required, missing int) string {
 	switch {
 	case required == 0:
 		return "none_required"
 	case missing == 0:
 		return "present"
 	case missing == required:
 		return "missing"
 	default:
 		return "partial"
 	}
 }
@@ -0,0 +1,133 @@
 package handlers
 import (
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"testing"
 	"github.com/gin-gonic/gin"
 )
 func newComplianceGraphTestRouter(t *testing.T) *gin.Engine {
 	t.Helper()
 	gin.SetMode(gin.TestMode)
 	h := NewComplianceGraphHandlers()
 	if err := h.LoadError(); err != nil {
 		t.Fatalf("compliance graph failed to load (candidate paths): %v", err)
 	}
 	r := gin.New()
 	h.RegisterRoutes(r.Group("/sdk/v1"))
 	return r
 }
 func getObligationStatus(t *testing.T, r *gin.Engine, query string) (int, cgStatusResponse) {
 	t.Helper()
 	w := httptest.NewRecorder()
 	req, _ := http.NewRequest(http.MethodGet, "/sdk/v1/compliance/obligation-status"+query, nil)
 	r.ServeHTTP(w, req)
 	var resp cgStatusResponse
 	if w.Code == http.StatusOK {
 		if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
 			t.Fatalf("decode body %q: %v", w.Body.String(), err)
 		}
 	}
 	return w.Code, resp
 }
 func TestObligationStatus(t *testing.T) {
 	r := newComplianceGraphTestRouter(t)
 	tests := []struct {
 		name         string
 		query        string
 		wantHTTP     int
 		wantOverall  string
 		wantControls bool // expect >=1 control
 	}{
 		{"missing param -> 400", "", http.StatusBadRequest, "", false},
 		{"unknown id -> unknown_obligation", "?obligation_id=does_not_exist", http.StatusOK, "unknown_obligation", false},
 		{"mapped (OWASP V6) -> not_assessed", "?obligation_id=user_authentication_required", http.StatusOK, "not_assessed", true},
 		{"NIST adopted (SI-2) -> not_assessed", "?obligation_id=provide_security_updates", http.StatusOK, "not_assessed", true},
 		{"CORE attack_surface_minimization -> CM-7", "?obligation_id=attack_surface_minimization", http.StatusOK, "not_assessed", true},
 		{"CORE software_integrity_protection -> SI-7", "?obligation_id=software_integrity_protection", http.StatusOK, "not_assessed", true},
 		{"in registry, no control -> unmapped", "?obligation_id=sbom_creation", http.StatusOK, "unmapped", false},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			code, resp := getObligationStatus(t, r, tt.query)
 			if code != tt.wantHTTP {
 				t.Fatalf("http %d, want %d", code, tt.wantHTTP)
 			}
 			if tt.wantHTTP != http.StatusOK {
 				return
 			}
 			if resp.OverallStatus != tt.wantOverall {
 				t.Errorf("overall_status=%q, want %q", resp.OverallStatus, tt.wantOverall)
 			}
 			if tt.wantControls && len(resp.Controls) == 0 {
 				t.Error("expected >=1 control")
 			}
 			if !tt.wantControls && len(resp.Controls) != 0 {
 				t.Errorf("expected 0 controls, got %d", len(resp.Controls))
 			}
 			if resp.CitationSpans != "pending" {
 				t.Errorf("citation_spans=%q, want pending", resp.CitationSpans)
 			}
 		})
 	}
 }
 // The MVP must NEVER auto-assert fulfillment: with no evidence collection wired, every required
 // evidence is "missing" and the overall status stays "not_assessed".
 func TestObligationStatus_NoFulfillmentClaim(t *testing.T) {
 	r := newComplianceGraphTestRouter(t)
 	code, resp := getObligationStatus(t, r, "?obligation_id=user_authentication_required")
 	if code != http.StatusOK {
 		t.Fatalf("http %d", code)
 	}
 	if resp.OverallStatus == "met" || resp.OverallStatus == "erfuellt" {
 		t.Fatalf("MVP must not assert fulfillment, got overall_status=%q", resp.OverallStatus)
 	}
 	for _, ctl := range resp.Controls {
 		if len(ctl.EvidenceRequired) > 0 && ctl.EvidenceStatus != "missing" {
 			t.Errorf("control %s/%s evidence_status=%q, want missing (no collection wired)", ctl.Framework, ctl.Control, ctl.EvidenceStatus)
 		}
 	}
 }
 // Pin the curated evidence_required set per NIST obligation. A required:false row silently
 // drops from evidence_required, which the table test above (control-count only) would miss.
 func TestObligationStatus_NISTEvidenceTypes(t *testing.T) {
 	r := newComplianceGraphTestRouter(t)
 	want := map[string][]string{
 		"attack_surface_minimization":   {"config_export", "repo_scan"},
 		"software_integrity_protection": {"sbom", "config_export"},
 		"provide_security_updates":      {"config_export", "test_report"},
 	}
 	for ob, exp := range want {
 		_, resp := getObligationStatus(t, r, "?obligation_id="+ob)
 		if len(resp.Controls) != 1 {
 			t.Fatalf("%s: want 1 control, got %d", ob, len(resp.Controls))
 		}
 		if got := resp.Controls[0].EvidenceRequired; !sameStringSet(got, exp) {
 			t.Errorf("%s evidence_required = %v, want %v", ob, got, exp)
 		}
 	}
 }
 func sameStringSet(a, b []string) bool {
 	if len(a) != len(b) {
 		return false
 	}
 	m := make(map[string]bool, len(a))
 	for _, x := range a {
 		m[x] = true
 	}
 	for _, x := range b {
 		if !m[x] {
 			return false
 		}
 	}
 	return true
 }
@@ -153,6 +153,12 @@ func buildRouter(cfg *config.Config, pool *pgxpool.Pool) *gin.Engine {
 	ragHandlers := handlers.NewRAGHandlers(corpusVersionStore)
 	obligationsHandlers := handlers.NewObligationsHandlersWithStore(obligationsStore)
 	// Compliance Execution Graph (file-backed: Registry join keys + accepted control mappings + evidence)
 	complianceGraphHandlers := handlers.NewComplianceGraphHandlers()
 	if err := complianceGraphHandlers.LoadError(); err != nil {
 		log.Printf("WARNING: compliance graph not loaded (obligation-status -> 503): %v", err)
 	}
 	// Regulatory News
 	allV2Regs, err := ucca.LoadAllV2Regulations()
 	if err != nil {
@@ -201,7 +207,8 @@ func buildRouter(cfg *config.Config, pool *pgxpool.Pool) *gin.Engine {
 		uccaHandlers, escalationHandlers, obligationsHandlers, ragHandlers,
 		roadmapHandlers, workshopHandlers, portfolioHandlers,
 		academyHandlers, trainingHandlers, whistleblowerHandlers, iaceHandler,
-		gapHandler, maximizerHandlers, regulatoryNewsHandlers, useCaseHandler)
+		gapHandler, maximizerHandlers, regulatoryNewsHandlers, useCaseHandler,
 		complianceGraphHandlers)
 	return router
 }
@@ -30,6 +30,7 @@ func registerRoutes(
 	maximizerHandlers *handlers.MaximizerHandlers,
 	regulatoryNewsHandlers *handlers.RegulatoryNewsHandlers,
 	useCaseHandler *handlers.UseCaseHandler,
 	complianceGraphHandlers *handlers.ComplianceGraphHandlers,
 ) {
 	v1 := router.Group("/sdk/v1")
 	{
@@ -54,6 +55,7 @@ func registerRoutes(
 		registerMaximizerRoutes(v1, maximizerHandlers)
 		registerUseCaseRoutes(v1, useCaseHandler)
 		v1.GET("/regulatory-news", regulatoryNewsHandlers.GetNews)
 		complianceGraphHandlers.RegisterRoutes(v1)
 	}
 }
@@ -36,6 +36,10 @@ type DictionarySuggestion struct {
 	Token      string   `json:"token"`
 	Field      string   `json:"field"`
 	PatternIDs []string `json:"pattern_ids"`
 	// SuggestedTags are the RequiredComponentTags shared by the naming patterns,
 	// ranked by frequency — the candidate tags a keyword_dictionary entry for this
 	// token would emit so narratives mentioning it can trigger those patterns.
 	SuggestedTags []string `json:"suggested_tags,omitempty"`
 }
 type VocabularyReport struct {
@@ -66,6 +66,10 @@ func runVocabulary(form map[string]any) VocabularyReport {
 	// For each unknown token check if any pattern names it
 	patterns := iace.AllPatterns()
 	byID := make(map[string]iace.HazardPattern, len(patterns))
 	for _, p := range patterns {
 		byID[p.ID] = p
 	}
 	for _, tok := range report.UnknownTokens {
 		hits := patternsMentioning(tok, patterns)
 		if len(hits) == 0 {
@@ -74,6 +78,7 @@ func runVocabulary(form map[string]any) VocabularyReport {
 		report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
 			Token:         tok,
 			PatternIDs:    hits,
 			SuggestedTags: suggestTagsFor(hits, byID),
 		})
 	}
 	sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
@@ -129,18 +134,24 @@ func dictTokenHit(tok string, dict map[string]bool) bool {
 	return false
 }
-// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
+// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/harm/
-// harm/zone text contains the token (case-insensitive substring).
+// zone text names the token as a WHOLE WORD. Whole-word (not substring) matching
 // is essential: a substring match flags common fragments like "stehen" inside
 // "entstehen", producing spurious hits and nonsensical tag suggestions.
 func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
 	tokLower := strings.ToLower(tok)
 	seen := map[string]bool{}
 	var out []string
 	for _, p := range patterns {
 		hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
-		if !strings.Contains(hay, tokLower) {
+		matched := false
-			continue
+		for _, w := range tokenRE.FindAllString(hay, -1) {
 			if w == tokLower {
 				matched = true
 				break
 			}
-		if seen[p.ID] {
+		}
 		if !matched || seen[p.ID] {
 			continue
 		}
 		seen[p.ID] = true
@@ -151,3 +162,57 @@ func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
 	}
 	return out
 }
 // suggestTagsFor returns the RequiredComponentTags shared across the naming
 // patterns, ranked by how many of them require each tag (ties broken by name),
 // top 3. These are the candidate tags a dictionary entry for the token should
 // emit so a narrative mentioning the token can trigger those patterns.
 func suggestTagsFor(ids []string, byID map[string]iace.HazardPattern) []string {
 	freq := map[string]int{}
 	total := 0
 	for _, id := range ids {
 		p, ok := byID[id]
 		if !ok {
 			continue
 		}
 		total++
 		seen := map[string]bool{}
 		for _, tag := range p.RequiredComponentTags {
 			if seen[tag] {
 				continue
 			}
 			seen[tag] = true
 			freq[tag]++
 		}
 	}
 	if total == 0 {
 		return nil
 	}
 	type tf struct {
 		tag string
 		n   int
 	}
 	ranked := make([]tf, 0, len(freq))
 	for t, n := range freq {
 		ranked = append(ranked, tf{t, n})
 	}
 	sort.Slice(ranked, func(i, j int) bool {
 		if ranked[i].n != ranked[j].n {
 			return ranked[i].n > ranked[j].n
 		}
 		return ranked[i].tag < ranked[j].tag
 	})
 	// Only suggest a tag shared by >= 40% of the naming patterns. Diffuse tokens
 	// (common verbs spread across categories) get no dominant tag and are dropped.
 	var out []string
 	for _, x := range ranked {
 		if float64(x.n)/float64(total) < 0.4 {
 			break
 		}
 		out = append(out, x.tag)
 		if len(out) >= 3 {
 			break
 		}
 	}
 	return out
 }
@@ -0,0 +1,36 @@
 package audit
 import (
 	"testing"
 	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
 )
 func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) {
 	byID := map[string]iace.HazardPattern{
 		"P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}},
 		"P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}},
 		"P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}},
 	}
 	got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID)
 	if len(got) == 0 || got[0] != "backflow_risk" {
 		t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got)
 	}
 }
 func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) {
 	byID := map[string]iace.HazardPattern{
 		"P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}},
 	}
 	got := suggestTagsFor([]string{"P1"}, byID)
 	if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" {
 		t.Fatalf("want stable alpha top-3 [a b c], got %v", got)
 	}
 }
 func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) {
 	byID := map[string]iace.HazardPattern{}
 	if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 {
 		t.Fatalf("want empty for unknown patterns, got %v", got)
 	}
 }
@@ -7,8 +7,6 @@ import (
 	"path/filepath"
 	"sort"
 	"testing"
 	"github.com/google/uuid"
 )
 // TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
@@ -110,65 +108,6 @@ func TestKistenhub_GTCoverage(t *testing.T) {
 // patternsToHazardsAndMitigations converts a pattern match output into the
 // Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
 // iace_handler_init.go does in production but without DB writes.
 func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
 	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
 	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		cat := ""
 		if len(pm.HazardCats) > 0 {
 			cat = pm.HazardCats[0]
 		}
 		zone := pm.ZoneDE
 		lifecycle := ""
 		if len(pm.ApplicableLifecycles) > 0 {
 			lifecycle = pm.ApplicableLifecycles[0]
 		}
 		h := Hazard{
 			ID:             uuid.New(),
 			Name:           pm.ScenarioDE,
 			Category:       cat,
 			Description:    pm.ScenarioDE,
 			Scenario:       pm.ScenarioDE,
 			TriggerEvent:   pm.TriggerDE,
 			PossibleHarm:   pm.HarmDE,
 			AffectedPerson: pm.AffectedDE,
 			HazardousZone:  zone,
 			LifecyclePhase: lifecycle,
 		}
 		if h.Name == "" {
 			h.Name = pm.PatternName
 		}
 		hazards = append(hazards, h)
 		patternToHazard[pm.PatternID] = h.ID
 	}
 	measureNames := make(map[string]string)
 	for _, m := range GetProtectiveMeasureLibrary() {
 		measureNames[m.ID] = m.Name
 	}
 	var mitigations []Mitigation
 	for _, sm := range out.SuggestedMeasures {
 		name := measureNames[sm.MeasureID]
 		if name == "" {
 			name = sm.MeasureID
 		}
 		for _, srcPattern := range sm.SourcePatterns {
 			hid, ok := patternToHazard[srcPattern]
 			if !ok {
 				continue
 			}
 			mitigations = append(mitigations, Mitigation{
 				ID:       uuid.New(),
 				HazardID: hid,
 				Name:     name,
 			})
 		}
 	}
 	return hazards, mitigations
 }
 func abbrev(s string, max int) string {
 	if len(s) <= max {
 		return s
@@ -1,6 +1,7 @@
 package iace
 import (
 	"context"
 	"encoding/json"
 	"os"
 	"path/filepath"
@@ -45,7 +46,7 @@ var warewashingCyberCategories = map[string]bool{
 // warewashingEngineOutput runs the production chain and returns the filtered
 // hazards/mitigations the user would see for the UC-M.
-func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
+func warewashingEngineOutput() ([]Hazard, []Mitigation, []PatternMatch) {
 	res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
 	var compIDs, compNames []string
@@ -94,7 +95,7 @@ func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
 	filtered := *out
 	filtered.MatchedPatterns = kept
 	hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
-	return hazards, mitigations, len(kept)
+	return hazards, mitigations, kept
 }
 func TestWarewashing_GTCoverage(t *testing.T) {
@@ -119,8 +120,8 @@ func TestWarewashing_GTCoverage(t *testing.T) {
 		t.Logf("Parsed components: %v", cn)
 	}
-	hazards, mitigations, nPatterns := warewashingEngineOutput()
+	hazards, mitigations, keptPatterns := warewashingEngineOutput()
-	t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", nPatterns, len(hazards))
+	t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", len(keptPatterns), len(hazards))
 	result := CompareBenchmark(&gt, hazards, mitigations)
 	precision := 0.0
@@ -180,3 +181,57 @@ func TestWarewashing_GTCoverage(t *testing.T) {
 		t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
 	}
 }
 // TestWarewashing_DedupProposer exercises the offline dedup-candidate proposer
 // end-to-end on the real warewashing engine output: detect candidates, screen
 // each against the GT, and log the human-review queue. It asserts the WALL is
 // self-consistent — a PASS verdict may never coincide with a recall drop.
 func TestWarewashing_DedupProposer(t *testing.T) {
 	raw, err := os.ReadFile(filepath.Join("testdata", "ground_truth_warewashing.json"))
 	if err != nil {
 		t.Fatalf("read GT: %v", err)
 	}
 	var gt GroundTruth
 	if err := json.Unmarshal(raw, &gt); err != nil {
 		t.Fatalf("parse GT: %v", err)
 	}
 	hazards, mits, kept := warewashingEngineOutput()
 	byID := map[string]PatternMatch{}
 	for _, pm := range kept {
 		byID[pm.PatternID] = pm
 	}
 	// 0.25 is a deliberately permissive candidate threshold: the proposer is meant
 	// to over-surface, because the deterministic GT wall below (and a human, and the
 	// LLM judge) is the precision filter — not the detector.
 	candidates := FindDedupCandidates(kept, 0.25)
 	t.Logf("Proposer: %d dedup candidate(s) from %d fired patterns", len(candidates), len(kept))
 	// Deterministic judge in the test; the dev-time CLI swaps in LLMJudge.
 	judge := HeuristicJudge{}
 	var judged []JudgedProposal
 	blocked := 0
 	for _, c := range candidates {
 		sr := ScreenSupersession(&gt, hazards, mits, c.KeepHazardName, c.DropName)
 		switch {
 		case sr.RecallAfter < sr.RecallBefore:
 			t.Logf("[BLOCK recall-load-bearing] keep %s / drop %s", c.KeepPattern, c.DropPattern)
 			blocked++
 		case sr.DistinctGT:
 			t.Logf("[BLOCK distinct GT %s vs %s] keep %s / drop %s", sr.KeepGT, sr.DropGT, c.KeepPattern, c.DropPattern)
 			blocked++
 		default:
 			if !sr.Safe {
 				t.Errorf("RECALL-SAFE branch but ScreenResult.Safe=false for drop %s", c.DropPattern)
 			}
 			v, conf, rat := judge.Judge(context.Background(), c, byID[c.KeepPattern], byID[c.DropPattern])
 			judged = append(judged, JudgedProposal{
 				Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
 			})
 		}
 	}
 	t.Logf("\n%s", RenderProposalQueue("Gewerbliche Geschirrspuelmaschine (vernetzt)", judged))
 	t.Logf("Proposer summary: %d candidate(s) in queue (judge=%s), %d BLOCKED by the GT wall — propose-only, nothing auto-applied",
 		len(judged), judge.Name(), blocked)
 }
@@ -0,0 +1,50 @@
 package iace
 import "sort"
 // EN ISO 12100 hazard-group ordering for the hazard log. Without it the log is
 // returned in pattern-firing order, which reads as a jumble. This groups the
 // hazards top-down by type (A. Mechanisch, B. Elektrisch, C. Thermisch, …),
 // matching the frontend CATEGORY_LABELS.
 var isoCategoryRank = map[string]int{
 	// A. Mechanisch
 	"mechanical_hazard": 10, "mechanical": 10, "maintenance_hazard": 11,
 	// B. Elektrisch
 	"electrical_hazard": 20, "electrical": 20, "emc_hazard": 21,
 	// C. Thermisch
 	"thermal_hazard": 30, "thermal": 30, "high_temperature": 31, "fire_explosion": 32,
 	// D. Pneumatik / Hydraulik
 	"pneumatic_hydraulic": 40,
 	// E. Laerm / Vibration
 	"noise_hazard": 50, "noise_vibration": 50, "vibration_hazard": 51,
 	// F. Ergonomie
 	"ergonomic_hazard": 60, "ergonomic": 60,
 	// G. Stoffe / Umwelt
 	"material_environmental": 70, "chemical_risk": 71, "radiation_hazard": 72,
 	// H. Software / Steuerung (funktionale Sicherheit)
 	"software_control": 80, "software_fault": 80, "safety_function_failure": 81,
 	"configuration_error": 82, "sensor_fault": 83, "hmi_error": 84, "mode_confusion": 85,
 	"communication_failure": 86, "update_failure": 87,
 	// I. Cyber / Netzwerk (zur Ordnungs-Vollstaendigkeit; im CE-Log ausgeschlossen)
 	"unauthorized_access": 90, "firmware_corruption": 91, "cyber_resilience": 92,
 	"cyber_network": 93, "logging_audit_failure": 94, "sensor_spoofing": 95,
 	// J. KI-spezifisch
 	"ai_specific": 100, "ai_misclassification": 100, "false_classification": 100,
 	"model_drift": 100, "data_poisoning": 100, "unintended_bias": 100,
 }
 func categoryRank(cat string) int {
 	if r, ok := isoCategoryRank[cat]; ok {
 		return r
 	}
 	return 999 // unknown categories last
 }
 // SortHazardsByISO12100 groups hazards by ISO 12100 hazard group. Stable: the
 // relative order within a group (creation/priority order from the engine) is
 // preserved.
 func SortHazardsByISO12100(hazards []Hazard) {
 	sort.SliceStable(hazards, func(i, j int) bool {
 		return categoryRank(hazards[i].Category) < categoryRank(hazards[j].Category)
 	})
 }
@@ -157,7 +157,7 @@ func GetGTBremseHazardPatterns() []HazardPattern {
 		// ════════════════════════════════════════════════════════════════
 		{
 			ID: "HP1717", NameDE: "Verletzung durch unvermittelt austretende pneumatische Restenergie", NameEN: "Injury from unexpectedly released pneumatic stored energy",
-			RequiredComponentTags: []string{"stored_energy"},
+			RequiredComponentTags: []string{"pneumatic_part"},
 			RequiredEnergyTags:    []string{"pneumatic_pressure"},
 			GeneratedHazardCats:   []string{"mechanical_hazard"},
 			SuggestedMeasureIDs:   []string{"M485", "M534", "M527"},
@@ -375,7 +375,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		// ================================================================
 		{
 			ID: "HP753", NameDE: "Thermal Runaway bei Lithium-Batterie", NameEN: "Thermal runaway of lithium battery",
-			RequiredComponentTags: []string{"stored_energy", "high_temperature"},
+			RequiredComponentTags: []string{"battery", "high_temperature"},
 			RequiredEnergyTags:    []string{"electrical_energy", "thermal"},
 			GeneratedHazardCats:   []string{"thermal_hazard", "electrical_hazard"},
 			SuggestedMeasureIDs:   []string{"M005", "M141"},
@@ -390,7 +390,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		},
 		{
 			ID: "HP754", NameDE: "Ausgasung giftiger Daempfe aus Batterie", NameEN: "Toxic gas emission from battery",
-			RequiredComponentTags: []string{"stored_energy", "chemical_risk"},
+			RequiredComponentTags: []string{"battery", "chemical_risk"},
 			RequiredEnergyTags:    []string{},
 			GeneratedHazardCats:   []string{"material_environmental"},
 			SuggestedMeasureIDs:   []string{"M005", "M141"},
@@ -405,7 +405,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		},
 		{
 			ID: "HP755", NameDE: "Elektrischer Schlag an Hochvolt-Batteriespeicher", NameEN: "Electric shock from high-voltage battery storage",
-			RequiredComponentTags: []string{"stored_energy", "electrical_part"},
+			RequiredComponentTags: []string{"battery", "electrical_part"},
 			RequiredEnergyTags:    []string{"electrical_energy"},
 			GeneratedHazardCats:   []string{"electrical_hazard"},
 			SuggestedMeasureIDs:   []string{"M082", "M141"},
@@ -137,7 +137,7 @@ func GetKeywordDictionary() []KeywordEntry {
 		{Keywords: []string{"kreiselmaeher", "scheibenmaeher", "maehwerk"}, ExtraTags: []string{"agri_mower"}},
 		{Keywords: []string{"spruehduese", "spritzduese", "spruehkopf"}, ExtraTags: []string{"spray_nozzle"}},
 		{Keywords: []string{"galvanikbad", "tauchbad", "beizbad", "chemiebad"}, ExtraTags: []string{"chemical_bath"}},
-		{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie"}, ExtraTags: []string{"battery"}},
+		{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie", "lithium", "batteriespeicher", "hochvoltbatterie", "lithium-batterie"}, ExtraTags: []string{"battery"}},
 		{Keywords: []string{"heizelement", "heizpatrone", "heizband"}, ExtraTags: []string{"heating_element"}},
 		{Keywords: []string{"uv-lampe", "uv-strahler", "uv-c-strahler"}, ExtraTags: []string{"uv_source"}},
 		{Keywords: []string{"roentgen", "radioaktiv", "strahlenquelle", "gammastrahl", "isotop"}, ExtraTags: []string{"radiation_source"}},
@@ -42,3 +42,29 @@ func guardedLifecycles(p HazardPattern, tagSet map[string]bool) []string {
 	}
 	return p.ApplicableLifecycles
 }
 // Domain-specific supersession.
 //
 // A generic pattern that fires via a broad tag (e.g. high_temperature) can
 // duplicate a domain-specific pattern that describes the same hazard more
 // precisely. When the domain is present, the specific pattern wins and the
 // generic duplicate is dropped. Scoped to the domain tag, so machines outside
 // the domain keep the generic pattern — regression-safe by construction.
 //
 //	HP016 (generic hot surfaces)  -> HP2201 (Boiler/Tank/Spuelkammer)
 //	HP018 (actuator burn)         -> HP2201 (same contact-burn hazard)
 //	HP013 (stored electrical NRG) -> HP144  (residual voltage; HP013's zone is
 //	                                 framed for Batteriefaecher/USV-Anlagen a
 //	                                 dishwasher does not have, HP144 is the
 //	                                 Frequenzumrichter/Zwischenkreis variant)
 var genericSupersededByWarewashing = map[string]bool{
 	"HP016": true,
 	"HP018": true,
 	"HP013": true,
 }
 // supersededByDomainSpecific reports whether a generic pattern is replaced by a
 // more precise equivalent that the project's domain already provides.
 func supersededByDomainSpecific(p HazardPattern, tagSet map[string]bool) bool {
 	return tagSet["dom_warewashing"] && genericSupersededByWarewashing[p.ID]
 }
@@ -416,6 +416,11 @@ func patternMatches(p HazardPattern, tagSet map[string]bool, input MatchInput) b
 		return false
 	}
 	// Domain-specific supersession (generic duplicate replaced by a precise one).
 	if supersededByDomainSpecific(p, tagSet) {
 		return false
 	}
 	return true
 }
@@ -0,0 +1,143 @@
 package iace
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"strings"
 )
 // Coverage blind-spot proposer (P2 slice 6, type 4). DEV-TIME, propose-only.
 //
 // Deterministic skeleton: which EN ISO 12100 hazard groups (A-G, the classic CE
 // groups; H-J are control/CRA and routinely routed elsewhere) did the engine
 // leave with ZERO hazards for this machine? An empty group is a structural
 // blind-spot signal — the machine may genuinely lack that hazard, or a pattern
 // may be missing. The LLM then expands each gap into specific expected-but-missing
 // hazards a safety assessor would name, for a human to confirm into a new pattern
 // or GT case. The gaps alone are useful without any model.
 type isoGroup struct {
 	Key   string
 	Label string
 	Cats  []string
 }
 var iso12100Groups = []isoGroup{
 	{"mechanical", "A. Mechanisch", []string{"mechanical_hazard", "mechanical", "maintenance_hazard"}},
 	{"electrical", "B. Elektrisch", []string{"electrical_hazard", "electrical", "emc_hazard"}},
 	{"thermal", "C. Thermisch", []string{"thermal_hazard", "thermal", "high_temperature", "fire_explosion"}},
 	{"pneumatic_hydraulic", "D. Pneumatik/Hydraulik", []string{"pneumatic_hydraulic"}},
 	{"noise_vibration", "E. Laerm/Vibration", []string{"noise_hazard", "noise_vibration", "vibration_hazard"}},
 	{"ergonomic", "F. Ergonomie", []string{"ergonomic_hazard", "ergonomic"}},
 	{"material", "G. Stoffe/Umwelt", []string{"material_environmental", "chemical_risk", "radiation_hazard"}},
 }
 // CoverageGap is an ISO 12100 hazard group with no engine hazard.
 type CoverageGap struct {
 	Group string `json:"group"`
 	Key   string `json:"key"`
 	Note  string `json:"note"`
 }
 // FindCoverageGaps returns the A-G hazard groups that produced zero hazards.
 func FindCoverageGaps(hazards []Hazard) []CoverageGap {
 	present := make(map[string]bool, len(hazards))
 	for _, h := range hazards {
 		present[h.Category] = true
 	}
 	var gaps []CoverageGap
 	for _, g := range iso12100Groups {
 		covered := false
 		for _, c := range g.Cats {
 			if present[c] {
 				covered = true
 				break
 			}
 		}
 		if !covered {
 			gaps = append(gaps, CoverageGap{
 				Group: g.Label, Key: g.Key,
 				Note: "no engine hazard in this ISO 12100 group — verify the machine truly lacks it, or a pattern is missing",
 			})
 		}
 	}
 	return gaps
 }
 // MissingHazard is an LLM-proposed hazard a safety assessor would expect.
 type MissingHazard struct {
 	Group  string `json:"group"`
 	Hazard string `json:"hazard"`
 	Why    string `json:"why"`
 }
 // ProposeMissingHazards asks the LLM to expand the empty groups into specific
 // expected hazards. Returns nil without a completer or on any error — propose-only,
 // never breaks the run.
 func ProposeMissingHazards(ctx context.Context, completer LLMCompleter, machineClass, narrative string, produced []Hazard, gaps []CoverageGap) []MissingHazard {
 	if completer == nil || len(gaps) == 0 {
 		return nil
 	}
 	system, user := BuildCoveragePrompt(machineClass, narrative, produced, gaps)
 	raw, err := completer.Complete(ctx, system, user)
 	if err != nil {
 		return nil
 	}
 	return parseMissingHazards(raw)
 }
 // BuildCoveragePrompt frames the "what is missing?" question for the LLM.
 func BuildCoveragePrompt(machineClass, narrative string, produced []Hazard, gaps []CoverageGap) (system, user string) {
 	system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
 		"Dir werden eine Maschine, die bereits erkannten Gefaehrdungen und Gefaehrdungsgruppen OHNE Eintrag genannt. " +
 		"Nenne nur Gefaehrdungen, die ein Sachverstaendiger fuer DIESE Maschine ERWARTET, die aber FEHLEN. " +
 		"Erfinde nichts Maschinenfremdes. Antworte AUSSCHLIESSLICH als JSON-Array: " +
 		`[{"group":"...","hazard":"...","why":"..."}].`
 	var have []string
 	seen := map[string]bool{}
 	for _, h := range produced {
 		if h.Category != "" && !seen[h.Category] {
 			seen[h.Category] = true
 			have = append(have, h.Category)
 		}
 	}
 	var empty []string
 	for _, g := range gaps {
 		empty = append(empty, g.Group)
 	}
 	user = fmt.Sprintf("Maschinenklasse: %s\n\nBeschreibung:\n%s\n\nBereits erkannte Kategorien: %s\n\nGruppen OHNE Eintrag (Fokus): %s\n\nWelche erwarteten Gefaehrdungen fehlen?",
 		machineClass, narrative, strings.Join(have, ", "), strings.Join(empty, ", "))
 	return system, user
 }
 func parseMissingHazards(raw string) []MissingHazard {
 	start, end := strings.Index(raw, "["), strings.LastIndex(raw, "]")
 	if start < 0 || end <= start {
 		return nil
 	}
 	var out []MissingHazard
 	if err := json.Unmarshal([]byte(raw[start:end+1]), &out); err != nil {
 		return nil
 	}
 	return out
 }
 // RenderCoverageQueue renders the deterministic gaps plus any LLM-proposed missing
 // hazards as a markdown review queue.
 func RenderCoverageQueue(machine string, gaps []CoverageGap, missing []MissingHazard) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "# Coverage blind-spot queue — %s\n\n", machine)
 	fmt.Fprintf(&b, "%d ISO 12100 group(s) (A-G) have no engine hazard. Propose-only — a human confirms whether the machine truly lacks it or a pattern/GT case is missing.\n\n", len(gaps))
 	for _, g := range gaps {
 		fmt.Fprintf(&b, "- **%s** — %s\n", g.Group, g.Note)
 	}
 	if len(missing) > 0 {
 		fmt.Fprintf(&b, "\n## LLM-proposed expected-but-missing hazards (%d)\n\n", len(missing))
 		for i, m := range missing {
 			fmt.Fprintf(&b, "%d. [%s] %s\n   - why: %s\n", i+1, m.Group, m.Hazard, m.Why)
 		}
 	}
 	return b.String()
 }
@@ -0,0 +1,59 @@
 package iace
 import (
 	"context"
 	"strings"
 	"testing"
 )
 func TestFindCoverageGaps(t *testing.T) {
 	hazards := []Hazard{
 		{Category: "mechanical_hazard"},
 		{Category: "thermal_hazard"},
 		{Category: "electrical_hazard"},
 		{Category: "material_environmental"},
 	}
 	gapKeys := map[string]bool{}
 	for _, g := range FindCoverageGaps(hazards) {
 		gapKeys[g.Key] = true
 	}
 	for _, want := range []string{"pneumatic_hydraulic", "noise_vibration", "ergonomic"} {
 		if !gapKeys[want] {
 			t.Errorf("expected gap %s", want)
 		}
 	}
 	for _, notWant := range []string{"mechanical", "thermal", "electrical", "material"} {
 		if gapKeys[notWant] {
 			t.Errorf("did not expect gap %s (covered)", notWant)
 		}
 	}
 }
 func TestBuildCoveragePrompt_ContainsContext(t *testing.T) {
 	produced := []Hazard{{Category: "thermal_hazard"}}
 	gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
 	system, user := BuildCoveragePrompt("Geschirrspuelmaschine", "Eine Spuelmaschine mit Tank.", produced, gaps)
 	if !strings.Contains(system, "EN ISO 12100") || !strings.Contains(system, "JSON") {
 		t.Errorf("system prompt missing framing")
 	}
 	for _, want := range []string{"Geschirrspuelmaschine", "thermal_hazard", "F. Ergonomie", "Spuelmaschine mit Tank"} {
 		if !strings.Contains(user, want) {
 			t.Errorf("user prompt missing %q", want)
 		}
 	}
 }
 func TestProposeMissingHazards_ParsesAndDegrades(t *testing.T) {
 	gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
 	c := fakeCompleter{out: `Hier: [{"group":"F. Ergonomie","hazard":"Heben schwerer Koerbe","why":"manuelles Beladen"}] fertig`}
 	got := ProposeMissingHazards(context.Background(), c, "x", "n", nil, gaps)
 	if len(got) != 1 || got[0].Hazard != "Heben schwerer Koerbe" {
 		t.Fatalf("parse: got %+v", got)
 	}
 	if ProposeMissingHazards(context.Background(), nil, "x", "n", nil, gaps) != nil {
 		t.Errorf("nil completer must return nil")
 	}
 	if ProposeMissingHazards(context.Background(), fakeCompleter{err: context.DeadlineExceeded}, "x", "n", nil, gaps) != nil {
 		t.Errorf("error must return nil")
 	}
 }
@@ -0,0 +1,152 @@
 package iace
 import (
 	"fmt"
 	"math"
 	"regexp"
 	"sort"
 	"strings"
 )
 // Offline dedup-candidate proposer (P2, type 1). DEV-TIME ONLY.
 //
 // It inspects the patterns that fired for one machine and proposes which look
 // like duplicates, so a human (later an LLM) can decide a supersession/merge. It
 // NEVER mutates the pattern library or the runtime — it only surfaces candidates.
 // The deterministic GT screen (ScreenSupersession, proposer_screen.go) is the
 // wall that proves a proposal is safe before a human ever sees it.
 //
 // Detection here is purely structural (category + zone + measure + scenario
 // overlap) and therefore reproducible. Two safety rules bake in what P1 taught
 // us about the dishwasher review:
 //   - only patterns with the SAME primary category are ever compared;
 //   - a pair with DIFFERENT operational states is NEVER proposed, because
 //     normal-operation and maintenance are legitimately distinct contexts with
 //     different protective measures (e.g. HP011 vs HP077). Merging them would
 //     erase the maintenance view.
 // DedupCandidate is a proposed near-duplicate pattern pair for one machine class.
 type DedupCandidate struct {
 	KeepPattern     string  `json:"keep_pattern"` // higher-priority survivor
 	DropPattern     string  `json:"drop_pattern"` // supersession target
 	KeepName        string  `json:"keep_name"`
 	KeepHazardName  string  `json:"keep_hazard_name"` // keep pattern ScenarioDE (for the GT-distinctness screen)
 	DropName        string  `json:"drop_name"`        // == generated hazard Name (ScenarioDE) of the drop pattern
 	Category        string  `json:"category"`
 	ZoneJaccard     float64 `json:"zone_jaccard"`
 	MeasureJaccard  float64 `json:"measure_jaccard"`
 	ScenarioJaccard float64 `json:"scenario_jaccard"`
 	Score           float64 `json:"score"`
 	Rationale       string  `json:"rationale"`
 }
 // FindDedupCandidates compares the fired patterns pairwise and returns near-dup
 // candidates whose combined overlap score meets threshold, deterministically
 // ordered (score desc, then drop-pattern id). The combined score weights measure
 // overlap highest (shared measures are the strongest duplicate signal), then zone
 // and scenario equally.
 func FindDedupCandidates(fired []PatternMatch, threshold float64) []DedupCandidate {
 	var out []DedupCandidate
 	for i := 0; i < len(fired); i++ {
 		for j := i + 1; j < len(fired); j++ {
 			a, b := fired[i], fired[j]
 			ca := primaryCat(a)
 			if ca == "" || ca != primaryCat(b) {
 				continue
 			}
 			if !sameOpStateSet(a.OperationalStates, b.OperationalStates) {
 				continue // legitimate lifecycle variants — never propose a merge
 			}
 			zj := tokenJaccard(zoneTokenSet(a.ZoneDE), zoneTokenSet(b.ZoneDE))
 			mj := tokenJaccard(toSet(a.SuggestedMeasureIDs), toSet(b.SuggestedMeasureIDs))
 			sj := tokenJaccard(wordTokenSet(a.ScenarioDE), wordTokenSet(b.ScenarioDE))
 			score := 0.4*mj + 0.3*zj + 0.3*sj
 			if score < threshold {
 				continue
 			}
 			keep, drop := a, b
 			if b.Priority > a.Priority {
 				keep, drop = b, a
 			}
 			out = append(out, DedupCandidate{
 				KeepPattern: keep.PatternID, DropPattern: drop.PatternID,
 				KeepName: keep.PatternName, KeepHazardName: keep.ScenarioDE, DropName: drop.ScenarioDE,
 				Category: ca, ZoneJaccard: round2(zj), MeasureJaccard: round2(mj),
 				ScenarioJaccard: round2(sj), Score: round2(score),
 				Rationale: fmt.Sprintf(
 					"same category %q · measure overlap %.0f%% · zone overlap %.0f%% · scenario overlap %.0f%% → keep %s (P%d), supersede %s (P%d)",
 					ca, mj*100, zj*100, sj*100, keep.PatternID, keep.Priority, drop.PatternID, drop.Priority),
 			})
 		}
 	}
 	sort.SliceStable(out, func(i, j int) bool {
 		if out[i].Score != out[j].Score {
 			return out[i].Score > out[j].Score
 		}
 		return out[i].DropPattern < out[j].DropPattern
 	})
 	return out
 }
 func primaryCat(pm PatternMatch) string {
 	if len(pm.HazardCats) == 0 {
 		return ""
 	}
 	return pm.HazardCats[0]
 }
 func sameOpStateSet(a, b []string) bool {
 	sa, sb := toSet(a), toSet(b)
 	if len(sa) != len(sb) {
 		return false
 	}
 	for k := range sa {
 		if !sb[k] {
 			return false
 		}
 	}
 	return true
 }
 var proposerWordSplit = regexp.MustCompile(`[^\p{L}]+`)
 // zoneTokenSet splits a comma-separated zone string into its component terms.
 func zoneTokenSet(zone string) map[string]bool {
 	out := map[string]bool{}
 	for _, part := range strings.Split(strings.ToLower(zone), ",") {
 		if t := strings.TrimSpace(part); len([]rune(t)) >= 3 {
 			out[t] = true
 		}
 	}
 	return out
 }
 // wordTokenSet tokenises free text into words of length >= 4 (drops connectives).
 func wordTokenSet(s string) map[string]bool {
 	out := map[string]bool{}
 	for _, w := range proposerWordSplit.Split(strings.ToLower(s), -1) {
 		if len([]rune(w)) >= 4 {
 			out[w] = true
 		}
 	}
 	return out
 }
 func tokenJaccard(a, b map[string]bool) float64 {
 	if len(a) == 0 && len(b) == 0 {
 		return 0
 	}
 	inter := 0
 	for k := range a {
 		if b[k] {
 			inter++
 		}
 	}
 	union := len(a) + len(b) - inter
 	if union == 0 {
 		return 0
 	}
 	return float64(inter) / float64(union)
 }
 func round2(x float64) float64 { return math.Round(x*100) / 100 }
@@ -0,0 +1,67 @@
 package iace
 import "testing"
 func mkPM(id, cat, zone, scenario string, prio int, measures, opstates []string) PatternMatch {
 	return PatternMatch{
 		PatternID: id, PatternName: id, Priority: prio,
 		HazardCats: []string{cat}, ZoneDE: zone, ScenarioDE: scenario,
 		SuggestedMeasureIDs: measures, OperationalStates: opstates,
 	}
 }
 func TestFindDedupCandidates_FindsOverlappingPair(t *testing.T) {
 	fired := []PatternMatch{
 		mkPM("HPa", "update_failure", "Steuerung, SPS", "Software-Update der Steuerung scheitert nach Abbruch", 80,
 			[]string{"M138", "M146"}, nil),
 		mkPM("HPb", "update_failure", "Steuerung, Antriebsregler", "Software-Update der Steuerung schlaegt fehl", 75,
 			[]string{"M138", "M146", "M141"}, nil),
 		mkPM("HPc", "mechanical_hazard", "Tuer", "Quetschen der Finger an der Tuer", 70,
 			[]string{"M003"}, nil),
 	}
 	got := FindDedupCandidates(fired, 0.4)
 	if len(got) != 1 {
 		t.Fatalf("want 1 candidate, got %d: %+v", len(got), got)
 	}
 	// Higher-priority pattern survives, lower one is the drop target.
 	if got[0].KeepPattern != "HPa" || got[0].DropPattern != "HPb" {
 		t.Errorf("want keep HPa / drop HPb, got keep %s / drop %s", got[0].KeepPattern, got[0].DropPattern)
 	}
 	if got[0].DropName != "Software-Update der Steuerung schlaegt fehl" {
 		t.Errorf("DropName must equal drop pattern ScenarioDE, got %q", got[0].DropName)
 	}
 }
 func TestFindDedupCandidates_LifecycleGuard(t *testing.T) {
 	// Same category, zone and measures — but normal-operation vs maintenance.
 	// These are legitimate variants (HP011 vs HP077) and must NOT be proposed.
 	fired := []PatternMatch{
 		mkPM("HP011", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 95,
 			[]string{"M481", "M482"}, nil),
 		mkPM("HP077", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 80,
 			[]string{"M481", "M482"}, []string{"maintenance"}),
 	}
 	if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
 		t.Fatalf("lifecycle guard failed: want 0 candidates, got %d: %+v", len(got), got)
 	}
 }
 func TestFindDedupCandidates_DifferentCategoryIgnored(t *testing.T) {
 	fired := []PatternMatch{
 		mkPM("HPa", "thermal_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
 		mkPM("HPb", "mechanical_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
 	}
 	if got := FindDedupCandidates(fired, 0.3); len(got) != 0 {
 		t.Fatalf("cross-category pair must not be proposed, got %d", len(got))
 	}
 }
 func TestFindDedupCandidates_BelowThresholdDropped(t *testing.T) {
 	fired := []PatternMatch{
 		mkPM("HPa", "mechanical_hazard", "Tuer", "Quetschen an der Tuer", 80, []string{"M003"}, nil),
 		mkPM("HPb", "mechanical_hazard", "Foerderband", "Einzug am Foerderband", 80, []string{"M540"}, nil),
 	}
 	if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
 		t.Fatalf("disjoint pair must be below threshold, got %d: %+v", len(got), got)
 	}
 }
@@ -0,0 +1,154 @@
 package iace
 import (
 	"fmt"
 	"sort"
 	"strings"
 )
 // Foreign-framing proposer (P2 slice 4, type 2). DEV-TIME, propose-only.
 //
 // A pattern can fire for a machine yet describe its hazard with a zone text
 // framed for a DIFFERENT machine (e.g. a dishwasher hazard whose zone names
 // "Walzen, Transportbaender" or "Bearbeitungszone"). Such foreign framing leaks
 // through terms that are NOT yet in domainGateTerms — once a term is a gate term,
 // the ghost-pattern invariant already fences the pattern out. So we surface the
 // candidates structurally: zone terms a fired pattern names that the machine's
 // narrative never mentions (minus generic hazard-location vocabulary). A human
 // (or the LLM) then decides: add a dom_* gate term, or re-frame the zone text.
 //
 // This OVER-surfaces by design — the human/LLM is the precision filter, not the
 // detector (same contract as the dedup proposer).
 // genericHazardStop are hazard-LOCATION words that legitimately appear in zones
 // without being echoed in a narrative — they are not evidence of foreign framing.
 var genericHazardStop = map[string]bool{
 	"quetschstelle": true, "einzugstelle": true, "einzugsstelle": true, "scherstelle": true,
 	"schneidstelle": true, "stossstelle": true, "fangstelle": true, "klemmstelle": true,
 	"gefahrbereich": true, "gefahrenbereich": true, "gefahrstelle": true, "gefahrenstelle": true,
 	"arbeitsbereich": true, "wirkbereich": true, "schutzbereich": true, "umgebung": true,
 	"bereich": true, "zugang": true, "oberflaeche": true, "oberflaechen": true,
 	"gehaeuse": true, "bauteil": true, "bauteile": true, "komponente": true, "maschine": true,
 }
 // FramingCandidate is a fired pattern whose zone text looks foreign for the machine.
 type FramingCandidate struct {
 	Pattern        string   `json:"pattern"`
 	Name           string   `json:"name"`
 	Category       string   `json:"category"`
 	Zone           string   `json:"zone"`
 	OrphanTerms    []string `json:"orphan_terms"`
 	OrphanFraction float64  `json:"orphan_fraction"`
 	Verdict        string   `json:"verdict"` // heuristic lean: foreign | plausible
 	Evidence       string   `json:"evidence"`
 }
 // FindFramingCandidates returns fired patterns whose zone is mostly not echoed in
 // the narrative, sorted by orphan fraction descending (deterministic).
 func FindFramingCandidates(fired []PatternMatch, narrative string, minFraction float64) []FramingCandidate {
 	nar := strings.ToLower(narrative)
 	var narStems []string
 	for _, w := range proposerWordSplit.Split(nar, -1) {
 		if len([]rune(w)) >= 5 {
 			narStems = append(narStems, w)
 		}
 	}
 	var out []FramingCandidate
 	for _, pm := range fired {
 		parts := zoneParts(pm.ZoneDE)
 		if len(parts) == 0 {
 			continue
 		}
 		var orphans []string
 		for _, p := range parts {
 			if !partEchoed(p, nar, narStems) {
 				orphans = append(orphans, p)
 			}
 		}
 		frac := float64(len(orphans)) / float64(len(parts))
 		if len(orphans) == 0 || frac < minFraction {
 			continue
 		}
 		out = append(out, FramingCandidate{
 			Pattern: pm.PatternID, Name: pm.PatternName, Category: primaryCat(pm),
 			Zone: pm.ZoneDE, OrphanTerms: orphans, OrphanFraction: round2(frac),
 			Verdict:  framingHeuristicVerdict(frac),
 			Evidence: fmt.Sprintf("%d/%d zone terms have no narrative echo: %s", len(orphans), len(parts), strings.Join(orphans, ", ")),
 		})
 	}
 	sort.SliceStable(out, func(i, j int) bool {
 		if out[i].OrphanFraction != out[j].OrphanFraction {
 			return out[i].OrphanFraction > out[j].OrphanFraction
 		}
 		return out[i].Pattern < out[j].Pattern
 	})
 	return out
 }
 func framingHeuristicVerdict(frac float64) string {
 	if frac >= 0.99 {
 		return "foreign" // nothing in the zone is echoed by the narrative
 	}
 	return "plausible" // partial echo — likely generic vocabulary, human to confirm
 }
 // zoneParts splits a zone string into significant terms on commas, slashes,
 // parentheses and semicolons, lowercased, length >= 4.
 func zoneParts(zone string) []string {
 	fields := strings.FieldsFunc(strings.ToLower(zone), func(r rune) bool {
 		return r == ',' || r == '/' || r == ';' || r == '(' || r == ')'
 	})
 	var out []string
 	for _, f := range fields {
 		if t := strings.TrimSpace(f); len([]rune(t)) >= 4 {
 			out = append(out, t)
 		}
 	}
 	return out
 }
 // partEchoed reports whether a zone part is reflected in the narrative. Matching
 // is bidirectional to survive German compounding: a zone word echoes if it is a
 // generic hazard term, if it is a substring of the narrative, OR if any narrative
 // stem (>= 5 chars) is a substring of the zone word (so narrative "Steuerung"
 // echoes zone "Steuerungssystem").
 func partEchoed(part, narrative string, narStems []string) bool {
 	for _, w := range strings.Fields(part) {
 		if genericHazardStop[w] {
 			return true
 		}
 		if len([]rune(w)) < 4 {
 			continue
 		}
 		if strings.Contains(narrative, w) {
 			return true
 		}
 		for _, ns := range narStems {
 			if strings.Contains(w, ns) {
 				return true
 			}
 		}
 	}
 	return false
 }
 // RenderFramingQueue renders foreign-framing candidates as a markdown review queue.
 func RenderFramingQueue(machine string, candidates []FramingCandidate) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "# Foreign-framing review queue — %s\n\n", machine)
 	fmt.Fprintf(&b, "%d fired pattern(s) name zone terms the narrative never mentions. Propose-only — a human (or the LLM) decides: add a dom_* gate term, or re-frame the zone.\n\n", len(candidates))
 	for i, c := range candidates {
 		fmt.Fprintf(&b, "## %d. %s — %s  [%s, orphan %.0f%%]\n", i+1, c.Pattern, c.Name, c.Verdict, c.OrphanFraction*100)
 		fmt.Fprintf(&b, "- category: %s\n- zone: %s\n", c.Category, c.Zone)
 		fmt.Fprintf(&b, "- orphan terms (no narrative echo): %s\n", strings.Join(c.OrphanTerms, ", "))
 		fmt.Fprintf(&b, "- suggested action: %s\n\n", framingAction(c.Verdict))
 	}
 	return b.String()
 }
 func framingAction(verdict string) string {
 	if verdict == "foreign" {
 		return "likely foreign-framed — propose a dom_* gate term for the orphan term(s), or re-frame the zone; human confirms + commits + pins a GT case"
 	}
 	return "partial echo — likely generic vocabulary; human to confirm whether any orphan term is a foreign-machine component"
 }
@@ -0,0 +1,33 @@
 package iace
 import "testing"
 func TestFindFramingCandidates_FlagsForeignZone(t *testing.T) {
 	narrative := "Gewerbliche Geschirrspuelmaschine mit Boiler und Tank. Die Tuer ist verriegelt."
 	fired := []PatternMatch{
 		mkPM("HPforeign", "mechanical_hazard", "Walzen, Transportbaender, Bearbeitungszone", "Einzug", 80, nil, nil),
 		mkPM("HPlocal", "thermal_hazard", "Boiler, Tank, Tuer", "Verbrennung", 80, nil, nil),
 		mkPM("HPgeneric", "mechanical_hazard", "Quetschstelle, Gefahrbereich", "Quetschen", 80, nil, nil),
 	}
 	got := FindFramingCandidates(fired, narrative, 0.6)
 	if len(got) != 1 || got[0].Pattern != "HPforeign" {
 		t.Fatalf("want only HPforeign flagged, got %+v", got)
 	}
 	if got[0].Verdict != "foreign" {
 		t.Errorf("fully-orphan zone should be 'foreign', got %s", got[0].Verdict)
 	}
 }
 func TestFindFramingCandidates_PartialEchoIsPlausible(t *testing.T) {
 	narrative := "Maschine mit Boiler und Tank."
 	fired := []PatternMatch{
 		mkPM("HPx", "thermal_hazard", "Boiler, Tank, Auspuffleitung", "x", 80, nil, nil),
 	}
 	got := FindFramingCandidates(fired, narrative, 0.3)
 	if len(got) != 1 {
 		t.Fatalf("want 1 candidate (1/3 orphan >= 0.3), got %d", len(got))
 	}
 	if got[0].Verdict != "plausible" || len(got[0].OrphanTerms) != 1 || got[0].OrphanTerms[0] != "auspuffleitung" {
 		t.Errorf("want plausible + orphan [auspuffleitung], got %s %v", got[0].Verdict, got[0].OrphanTerms)
 	}
 }
@@ -0,0 +1,123 @@
 package iace
 import "github.com/google/uuid"
 // Non-test plumbing for the offline proposer (P2 slice 3): run the engine for a
 // narrative and produce the fired patterns + the engine-built hazards/mitigations
 // the dedup proposer and GT screen consume. This is the same pipeline the GT
 // benchmark tests use, lifted out of test scope so the dev-time CLI can call it.
 // universalLifecyclePhases are appended so patterns gated to a specific lifecycle
 // (maintenance/cleaning/setup/fault clearing) still fire — the proposer wants the
 // full hazard picture, not only normal-operation hazards.
 var universalLifecyclePhases = []string{"normal_operation", "maintenance", "cleaning", "setup", "fault_clearing"}
 // BuildProposerInput parses a narrative, runs the pattern engine, keeps the
 // narrative-relevant patterns, and returns the hazards, mitigations and fired
 // patterns. NOTE: it does not apply the CE cyber-category skip, so the proposer
 // view may include cyber/AI hazards that the CE log excludes — harmless for the
 // GT recall screen (they match no CE ground-truth entry).
 func BuildProposerInput(narrative, machineType string, extraMachineTypes []string) ([]Hazard, []Mitigation, []PatternMatch) {
 	res := ParseNarrative(narrative, machineType)
 	var compIDs, compNames, energyIDs []string
 	for _, c := range res.Components {
 		if c.Negated {
 			continue
 		}
 		compIDs = append(compIDs, c.LibraryID)
 		compNames = append(compNames, c.NameDE)
 	}
 	for _, e := range res.EnergySources {
 		energyIDs = append(energyIDs, e.SourceID)
 	}
 	machineTypes := append([]string{}, extraMachineTypes...)
 	if machineType != "" {
 		machineTypes = append(machineTypes, machineType)
 	}
 	lifecycles := append(append([]string{}, res.LifecyclePhases...), universalLifecyclePhases...)
 	out := NewPatternEngine().Match(MatchInput{
 		ComponentLibraryIDs: compIDs,
 		EnergySourceIDs:     energyIDs,
 		LifecyclePhases:     lifecycles,
 		CustomTags:          res.CustomTags,
 		OperationalStates:   res.OperationalStates,
 		StateTransitions:    res.StateTransitions,
 		HumanRoles:          res.Roles,
 		MachineTypes:        machineTypes,
 	})
 	kept := make([]PatternMatch, 0, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		if IsPatternRelevant(pm, narrative, compNames) {
 			kept = append(kept, pm)
 		}
 	}
 	filtered := *out
 	filtered.MatchedPatterns = kept
 	hazards, mits := patternsToHazardsAndMitigations(&filtered)
 	return hazards, mits, kept
 }
 // patternsToHazardsAndMitigations converts engine output into the hazard/mitigation
 // entities the benchmark + proposer compare on. Simplified vs InitializeProject
 // (no risk estimation, no norm refs) — it only needs category/zone/scenario/measures.
 func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
 	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
 	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
 	for _, pm := range out.MatchedPatterns {
 		cat := ""
 		if len(pm.HazardCats) > 0 {
 			cat = pm.HazardCats[0]
 		}
 		lifecycle := ""
 		if len(pm.ApplicableLifecycles) > 0 {
 			lifecycle = pm.ApplicableLifecycles[0]
 		}
 		h := Hazard{
 			ID:             uuid.New(),
 			Name:           pm.ScenarioDE,
 			Category:       cat,
 			Description:    pm.ScenarioDE,
 			Scenario:       pm.ScenarioDE,
 			TriggerEvent:   pm.TriggerDE,
 			PossibleHarm:   pm.HarmDE,
 			AffectedPerson: pm.AffectedDE,
 			HazardousZone:  pm.ZoneDE,
 			LifecyclePhase: lifecycle,
 		}
 		if h.Name == "" {
 			h.Name = pm.PatternName
 		}
 		hazards = append(hazards, h)
 		patternToHazard[pm.PatternID] = h.ID
 	}
 	measureNames := make(map[string]string)
 	for _, m := range GetProtectiveMeasureLibrary() {
 		measureNames[m.ID] = m.Name
 	}
 	var mitigations []Mitigation
 	for _, sm := range out.SuggestedMeasures {
 		name := measureNames[sm.MeasureID]
 		if name == "" {
 			name = sm.MeasureID
 		}
 		for _, srcPattern := range sm.SourcePatterns {
 			hid, ok := patternToHazard[srcPattern]
 			if !ok {
 				continue
 			}
 			mitigations = append(mitigations, Mitigation{
 				ID:       uuid.New(),
 				HazardID: hid,
 				Name:     name,
 			})
 		}
 	}
 	return hazards, mitigations
 }
@@ -0,0 +1,25 @@
 package iace
 import "testing"
 func TestBuildProposerInput_WarewashingFires(t *testing.T) {
 	hazards, _, fired := BuildProposerInput(
 		warewashingNarrative,
 		"Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
 		[]string{"food_processing"},
 	)
 	if len(fired) == 0 || len(hazards) == 0 {
 		t.Fatalf("want fired patterns + hazards, got %d patterns / %d hazards", len(fired), len(hazards))
 	}
 	has := func(id string) bool {
 		for _, pm := range fired {
 			if pm.PatternID == id {
 				return true
 			}
 		}
 		return false
 	}
 	if !has("HP2201") {
 		t.Errorf("warewashing-specific HP2201 must fire via BuildProposerInput")
 	}
 }
@@ -0,0 +1,174 @@
 package iace
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"strings"
 	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
 )
 // Semantic judgement over RECALL-SAFE dedup candidates (P2 slice 2). DEV-TIME,
 // propose-only. The deterministic GT wall (proposer_screen.go) has already
 // removed candidates that would drop recall or that credit different GT entries;
 // the judge only adds an opinion on whether the survivors are truly the same
 // hazard, plus a rationale, for the human review queue. It NEVER mutates anything.
 //
 // The judge is pluggable behind CandidateJudge so the runtime/tests stay
 // deterministic (HeuristicJudge) while the dev-time CLI can plug in the
 // non-deterministic LLM (LLMJudge over the shared llm.ProviderRegistry).
 const (
 	VerdictDuplicate = "duplicate"
 	VerdictDistinct  = "distinct"
 	VerdictUncertain = "uncertain"
 )
 // JudgedProposal is one candidate with its GT-wall result and the judge's opinion.
 type JudgedProposal struct {
 	Candidate  DedupCandidate `json:"candidate"`
 	Screen     ScreenResult   `json:"screen"`
 	Verdict    string         `json:"verdict"`
 	Confidence string         `json:"confidence"`
 	Rationale  string         `json:"rationale"`
 	Judge      string         `json:"judge"`
 }
 // CandidateJudge decides whether two near-duplicate patterns are the same hazard.
 type CandidateJudge interface {
 	Name() string
 	Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (verdict, confidence, rationale string)
 }
 // HeuristicJudge is the deterministic default/fallback. It only ever returns "low"
 // confidence — it is a placeholder for the LLM, and it deliberately punts to
 // "uncertain" on the hard cases (low text overlap, shared measures) so the queue
 // makes clear exactly where the LLM earns its keep.
 type HeuristicJudge struct{}
 func (HeuristicJudge) Name() string { return "heuristic" }
 func (HeuristicJudge) Judge(_ context.Context, c DedupCandidate, _, _ PatternMatch) (string, string, string) {
 	switch {
 	case c.ScenarioJaccard >= 0.5 || (c.ZoneJaccard >= 0.5 && c.MeasureJaccard >= 0.5):
 		return VerdictDuplicate, "low", "structural: high scenario, or combined zone+measure, overlap"
 	case c.MeasureJaccard >= 0.99 && c.ZoneJaccard == 0 && c.ScenarioJaccard < 0.3:
 		return VerdictDistinct, "low", "structural: identical measures but no zone/scenario overlap — likely distinct hazards sharing generic measures"
 	default:
 		return VerdictUncertain, "low", "structural signal inconclusive — needs the LLM judge"
 	}
 }
 // LLMJudge asks an offline model to make the semantic call. Non-deterministic, so
 // it lives only in the dev-time tool, never in tests or the runtime. It degrades
 // to "uncertain" on any transport or parse error — it must never break the run.
 type LLMJudge struct {
 	Completer    LLMCompleter
 	MachineClass string
 }
 func (LLMJudge) Name() string { return "llm" }
 func (j LLMJudge) Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (string, string, string) {
 	system, user := BuildJudgePrompt(j.MachineClass, a, b)
 	raw, err := j.Completer.Complete(ctx, system, user)
 	if err != nil {
 		return VerdictUncertain, "low", "LLM error: " + err.Error()
 	}
 	return parseJudgeJSON(raw)
 }
 // BuildJudgePrompt is the real LLM artifact — built and unit-tested deterministically
 // even though the call itself is not. It frames the ISO 12100 same-vs-distinct
 // question and forces a JSON answer.
 func BuildJudgePrompt(machineClass string, a, b PatternMatch) (system, user string) {
 	system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
 		"Entscheide, ob zwei generierte Gefaehrdungen fuer DIESE Maschine DIESELBE Gefaehrdung " +
 		"beschreiben (Dublette) oder fachlich VERSCHIEDENE Gefaehrdungen sind, die nur zufaellig " +
 		"dieselben Schutzmassnahmen teilen. Verschieden, wenn Wirkort, Ausloeser oder " +
 		"Schadensmechanismus abweichen — auch bei gleicher Kategorie und gleichen Massnahmen. " +
 		"Antworte AUSSCHLIESSLICH als JSON: " +
 		`{"verdict":"duplicate|distinct|uncertain","confidence":"high|medium|low","rationale":"..."}.`
 	user = fmt.Sprintf(`Maschinenklasse: %s
 Gefaehrdung A (%s):
  Name: %s
  Kategorie: %s
  Zone: %s
  Szenario: %s
  Ausloeser: %s
  Schaden: %s
  Massnahmen: %s
 Gefaehrdung B (%s):
  Name: %s
  Kategorie: %s
  Zone: %s
  Szenario: %s
  Ausloeser: %s
  Schaden: %s
  Massnahmen: %s
 Sind A und B dieselbe Gefaehrdung fuer diese Maschine?`,
 		machineClass,
 		a.PatternID, a.PatternName, primaryCat(a), a.ZoneDE, a.ScenarioDE, a.TriggerDE, a.HarmDE, strings.Join(a.SuggestedMeasureIDs, ", "),
 		b.PatternID, b.PatternName, primaryCat(b), b.ZoneDE, b.ScenarioDE, b.TriggerDE, b.HarmDE, strings.Join(b.SuggestedMeasureIDs, ", "))
 	return system, user
 }
 func parseJudgeJSON(raw string) (verdict, confidence, rationale string) {
 	start, end := strings.Index(raw, "{"), strings.LastIndex(raw, "}")
 	if start < 0 || end <= start {
 		return VerdictUncertain, "low", "unparseable LLM output"
 	}
 	var v struct {
 		Verdict    string `json:"verdict"`
 		Confidence string `json:"confidence"`
 		Rationale  string `json:"rationale"`
 	}
 	if err := json.Unmarshal([]byte(raw[start:end+1]), &v); err != nil {
 		return VerdictUncertain, "low", "unparseable LLM JSON: " + err.Error()
 	}
 	switch v.Verdict {
 	case VerdictDuplicate, VerdictDistinct, VerdictUncertain:
 	default:
 		v.Verdict = VerdictUncertain
 	}
 	if v.Confidence == "" {
 		v.Confidence = "low"
 	}
 	return v.Verdict, v.Confidence, v.Rationale
 }
 // LLMCompleter is the minimal text-in/text-out the LLM judge needs. Tests pass a
 // stub; the dev-time tool passes a registry-backed adapter (NewRegistryCompleter).
 type LLMCompleter interface {
 	Complete(ctx context.Context, system, user string) (string, error)
 }
 type registryCompleter struct {
 	reg   *llm.ProviderRegistry
 	model string
 }
 // NewRegistryCompleter adapts the shared llm.ProviderRegistry to LLMCompleter so
 // the proposer can reuse the platform's offline model wiring (e.g. self-hosted qwen).
 func NewRegistryCompleter(reg *llm.ProviderRegistry, model string) LLMCompleter {
 	return &registryCompleter{reg: reg, model: model}
 }
 func (rc *registryCompleter) Complete(ctx context.Context, system, user string) (string, error) {
 	resp, err := rc.reg.Chat(ctx, &llm.ChatRequest{
 		Model: rc.model,
 		Messages: []llm.Message{
 			{Role: "system", Content: system},
 			{Role: "user", Content: user},
 		},
 		Temperature: 0,
 	})
 	if err != nil {
 		return "", err
 	}
 	return resp.Message.Content, nil
 }
@@ -0,0 +1,104 @@
 package iace
 import (
 	"context"
 	"errors"
 	"strings"
 	"testing"
 )
 func TestHeuristicJudge_Verdicts(t *testing.T) {
 	tests := []struct {
 		name        string
 		zone, meas  float64
 		scenario    float64
 		wantVerdict string
 	}{
 		{"high scenario overlap -> duplicate", 0, 0.3, 0.6, VerdictDuplicate},
 		{"high zone+measure -> duplicate", 0.6, 0.6, 0.1, VerdictDuplicate},
 		{"identical measures, no text -> distinct", 0, 1.0, 0.0, VerdictDistinct},
 		{"shared measures, low text -> uncertain", 0, 0.67, 0.19, VerdictUncertain},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			c := DedupCandidate{ZoneJaccard: tt.zone, MeasureJaccard: tt.meas, ScenarioJaccard: tt.scenario}
 			v, conf, _ := HeuristicJudge{}.Judge(context.Background(), c, PatternMatch{}, PatternMatch{})
 			if v != tt.wantVerdict {
 				t.Errorf("verdict: want %s, got %s", tt.wantVerdict, v)
 			}
 			if conf != "low" {
 				t.Errorf("heuristic confidence must be low, got %s", conf)
 			}
 		})
 	}
 }
 func TestBuildJudgePrompt_ContainsKeyFacts(t *testing.T) {
 	a := PatternMatch{PatternID: "HPa", PatternName: "Heisse Flaeche", HazardCats: []string{"thermal_hazard"},
 		ZoneDE: "Boiler", ScenarioDE: "Beruehrung heisser Boiler", SuggestedMeasureIDs: []string{"M071"}}
 	b := PatternMatch{PatternID: "HPb", PatternName: "Heisses Spuelgut", HazardCats: []string{"thermal_hazard"},
 		ZoneDE: "Spuelgut", ScenarioDE: "Beruehrung heisses Geschirr", SuggestedMeasureIDs: []string{"M071"}}
 	system, user := BuildJudgePrompt("Geschirrspuelmaschine", a, b)
 	for _, want := range []string{"EN ISO 12100", "JSON", "verdict"} {
 		if !strings.Contains(system, want) {
 			t.Errorf("system prompt missing %q", want)
 		}
 	}
 	for _, want := range []string{"Geschirrspuelmaschine", "HPa", "HPb", "Boiler", "Spuelgut", "thermal_hazard"} {
 		if !strings.Contains(user, want) {
 			t.Errorf("user prompt missing %q", want)
 		}
 	}
 }
 type fakeCompleter struct {
 	out string
 	err error
 }
 func (f fakeCompleter) Complete(_ context.Context, _, _ string) (string, error) { return f.out, f.err }
 func TestLLMJudge_ParsesAndDegrades(t *testing.T) {
 	cand := DedupCandidate{KeepPattern: "HPa", DropPattern: "HPb"}
 	// Well-formed JSON, even wrapped in chatter, parses.
 	j := LLMJudge{Completer: fakeCompleter{out: "Sicher. {\"verdict\":\"distinct\",\"confidence\":\"high\",\"rationale\":\"andere Wirkorte\"}"}, MachineClass: "x"}
 	if v, conf, r := j.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictDistinct || conf != "high" || r != "andere Wirkorte" {
 		t.Errorf("parse: got %s/%s/%q", v, conf, r)
 	}
 	// Unknown verdict value normalises to uncertain.
 	j2 := LLMJudge{Completer: fakeCompleter{out: `{"verdict":"maybe","confidence":"medium","rationale":"x"}`}}
 	if v, _, _ := j2.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
 		t.Errorf("unknown verdict must normalise to uncertain, got %s", v)
 	}
 	// Transport error degrades gracefully, never panics.
 	j3 := LLMJudge{Completer: fakeCompleter{err: errors.New("connection refused")}}
 	if v, _, r := j3.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain || !strings.Contains(r, "LLM error") {
 		t.Errorf("error path: got %s / %q", v, r)
 	}
 	// Garbage (no JSON) degrades to uncertain.
 	j4 := LLMJudge{Completer: fakeCompleter{out: "no json here"}}
 	if v, _, _ := j4.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
 		t.Errorf("garbage must degrade to uncertain, got %s", v)
 	}
 }
 func TestRenderProposalQueue_ShowsActions(t *testing.T) {
 	proposals := []JudgedProposal{
 		{
 			Candidate: DedupCandidate{KeepPattern: "HP807", DropPattern: "HP033", Category: "update_failure", Score: 0.32},
 			Screen:    ScreenResult{RecallBefore: 1, RecallAfter: 1},
 			Verdict:   VerdictDuplicate, Confidence: "medium", Rationale: "same update failure", Judge: "llm",
 		},
 	}
 	out := RenderProposalQueue("Geschirrspuelmaschine", proposals)
 	for _, want := range []string{"HP807", "HP033", "update_failure", "supersession", "Propose-only"} {
 		if !strings.Contains(out, want) {
 			t.Errorf("queue missing %q\n%s", want, out)
 		}
 	}
 }
@@ -0,0 +1,47 @@
 package iace
 import (
 	"fmt"
 	"strings"
 )
 // RenderProposalQueue turns judged dedup proposals into the human-review queue
 // (markdown). Deterministic. Nothing here applies a change — every entry is a
 // suggestion for a human to confirm, edit, commit, and pin with a GT case.
 func RenderProposalQueue(machine string, proposals []JudgedProposal) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "# Dedup proposal queue — %s\n\n", machine)
 	fmt.Fprintf(&b, "%d candidate(s) survived the deterministic GT wall. Propose-only — nothing is applied automatically.\n\n", len(proposals))
 	for i, p := range proposals {
 		c := p.Candidate
 		fmt.Fprintf(&b, "## %d. keep %s  ⊃  drop %s   [%s → %s (%s)]\n",
 			i+1, c.KeepPattern, c.DropPattern, p.Judge, p.Verdict, p.Confidence)
 		fmt.Fprintf(&b, "- category %s · score %.2f (measures %.0f%%, zone %.0f%%, scenario %.0f%%)\n",
 			c.Category, c.Score, c.MeasureJaccard*100, c.ZoneJaccard*100, c.ScenarioJaccard*100)
 		fmt.Fprintf(&b, "- GT recall %.1f%% → %.1f%% when %s is dropped (wall: %s)\n",
 			p.Screen.RecallBefore*100, p.Screen.RecallAfter*100, c.DropPattern, wallNote(p.Screen))
 		fmt.Fprintf(&b, "- keep: %s\n- drop: %s\n", c.KeepHazardName, c.DropName)
 		fmt.Fprintf(&b, "- judge rationale: %s\n", p.Rationale)
 		fmt.Fprintf(&b, "- suggested action: %s\n\n", suggestedAction(p))
 	}
 	return b.String()
 }
 func wallNote(s ScreenResult) string {
 	if s.DistinctGT {
 		return fmt.Sprintf("distinct GT %s vs %s", s.KeepGT, s.DropGT)
 	}
 	return "recall-safe"
 }
 func suggestedAction(p JudgedProposal) string {
 	switch p.Verdict {
 	case VerdictDuplicate:
 		return fmt.Sprintf("add %s to a supersession set, then a human confirms + commits + pins a GT case", p.Candidate.DropPattern)
 	case VerdictDistinct:
 		return "keep both — judge considers them distinct hazards"
 	default:
 		return "needs human (or higher-confidence LLM) review — no automatic action"
 	}
 }
@@ -0,0 +1,61 @@
 package iace
 import "github.com/google/uuid"
 // ScreenResult is the deterministic GT verdict for one proposed supersession.
 type ScreenResult struct {
 	RecallBefore float64 `json:"recall_before"`
 	RecallAfter  float64 `json:"recall_after"`
 	KeepGT       string  `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any)
 	DropGT       string  `json:"drop_gt,omitempty"` // GT entry the drop credits (if any)
 	DistinctGT   bool    `json:"distinct_gt"`       // keep & drop credit DIFFERENT GT entries -> distinct hazards
 	Safe         bool    `json:"safe"`              // recall preserved AND not distinct
 }
 // ScreenSupersession is the WALL between "propose" and "decide". A proposal is
 // safe only if BOTH deterministic checks pass:
 //
 //  1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed
 //     — otherwise the drop is load-bearing for GT coverage.
 //  2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone
 //     is necessary but not sufficient: two genuinely distinct hazards that share
 //     the same measures (e.g. hot boiler surface vs hot ware on unloading) keep
 //     recall at 100% when one is dropped, yet must NOT be merged. If keep and
 //     drop each match a different GT entry, they are distinct.
 //
 // Whatever survives both is still only RECALL-SAFE — a candidate for a human (and
 // in slice 2, an LLM) to confirm semantically. Deterministic; reuses
 // CompareBenchmark; touches neither the library nor the runtime.
 func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult {
 	before := CompareBenchmark(gt, hazards, mits)
 	gtOf := map[string]string{}
 	for _, p := range before.MatchedPairs {
 		gtOf[p.EngineHazard.Name] = p.GTEntry.Nr
 	}
 	keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName]
 	distinct := keepGT != "" && dropGT != "" && keepGT != dropGT
 	kept := make([]Hazard, 0, len(hazards))
 	dropped := map[uuid.UUID]bool{}
 	for _, h := range hazards {
 		if h.Name == dropHazardName {
 			dropped[h.ID] = true
 			continue
 		}
 		kept = append(kept, h)
 	}
 	keptMits := make([]Mitigation, 0, len(mits))
 	for _, m := range mits {
 		if !dropped[m.HazardID] {
 			keptMits = append(keptMits, m)
 		}
 	}
 	after := CompareBenchmark(gt, kept, keptMits)
 	return ScreenResult{
 		RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore,
 		KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct,
 		Safe: after.CoverageScore >= before.CoverageScore && !distinct,
 	}
 }
@@ -160,6 +160,7 @@ func (s *Store) ListHazards(ctx context.Context, projectID uuid.UUID) ([]Hazard,
 		hazards = append(hazards, h)
 	}
 	SortHazardsByISO12100(hazards)
 	return hazards, nil
 }
@@ -110,9 +110,10 @@ type domainDef struct {
 // Deterministic order (slice, not map) — important for stable classification + tests.
 var domains = []domainDef{
 	{"data_protection",
-		[]string{"DSGVO", "GDPR", "BDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
+		[]string{"DSGVO", "GDPR", "BDSG", "TDDDG", "TTDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
 		[]string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb",
-			"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeiter"}},
+			"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeit",
 			"cookie", "endeinrichtung", "endgerät", "endgeraet", "tracking"}},
 	{"cyber",
 		[]string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"},
 		[]string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom",
@@ -126,6 +127,16 @@ var domains = []domainDef{
 		nil},
 }
 // euPrimaryDomains are domains whose PRIMARY binding act is an EU regulation/directive
 // (DSGVO, CRA/NIS2, AI Act, MaschinenVO). In these domains a NATIONAL implementing law
 // (e.g. BDSG) is subsidiary for general questions — see nationalSubsidiarityPenalty.
 var euPrimaryDomains = map[string]bool{
 	"data_protection": true,
 	"cyber":           true,
 	"ai":              true,
 	"product_safety":  true,
 }
 func queryDomain(query string) string {
 	ql := strings.ToLower(query)
 	for _, d := range domains {
@@ -135,6 +146,16 @@ func queryDomain(query string) string {
 			}
 		}
 	}
 	// Fallback: an explicit regulation mention (e.g. "DSGVO", "BDSG", "CRA") also signals the
 	// domain — so a question phrased around the act ("... gilt die DSGVO ...") is scoped even
 	// without a topical keyword. Keyword match wins first (more specific).
 	for _, d := range domains {
 		for _, reg := range d.regs {
 			if strings.Contains(ql, strings.ToLower(reg)) {
 				return d.name
 			}
 		}
 	}
 	return ""
 }
@@ -180,6 +201,11 @@ var topics = []topicDef{
 	{[]string{"bussgeld", "geldbusse"}, []string{"Art. 83"}},
 	{[]string{"security update", "sicherheitsupdate", "schwachstelle", "sbom", "cybersicherheitsanforderung"}, []string{"CRA Anhang I"}},
 	{[]string{"meldepflicht", "sicherheitsvorfall"}, []string{"Art. 14 CRA"}},
 	// ePrivacy / cookies: § 25 TDDDG (ex-TTDSG) is lex specialis for terminal-equipment access /
 	// cookie consent. Co-primary on a cookie/tracking query, so the subsidiarity rule does NOT
 	// demote it like general-DP DE law subsidiary to the DSGVO. Keywords are cookie-specific
 	// (NOT bare "Einwilligung") so a general consent question still resolves to Art. 7 DSGVO.
 	{[]string{"cookie", "endeinrichtung", "endgerät", "endgeraet", "tracking", "speicherung von informationen", "zugriff auf informationen"}, []string{"§ 25 TDDDG"}},
 }
 // resultMatchesTopic reports whether the result is a preferred norm of a topic the query hits.
@@ -14,6 +14,7 @@ const (
 	domainMatchGain   = 0.15
 	offDomainPenalty  = 0.10 // off-domain binding (demoted, not removed)
 	scopePenalty      = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
 	subsidiarityPen   = 0.18 // national implementing law (BDSG) on a general EU-primary question: SOFT demote, not exclusion
 	topicGain         = 0.18 // amplifier only
 	supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
 	intentLiftGain    = 0.10 // epsilon a qualifying interpretative source is lifted ABOVE the best binding
@@ -102,6 +103,15 @@ func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign
 	if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" {
 		score -= scopePenalty
 	}
 	// Subsidiarity: a national implementing law (DE binding, e.g. BDSG) is subsidiary to the
 	// primary EU act for GENERAL questions in an EU-primary domain — UNLESS the query hits a
 	// topic where the national norm is co-primary (DSB §38, special categories §22, ...). The
 	// topic boost below lifts those; here we only SOFT-demote the non-topic national norm, so
 	// it stays visible and can still win on a strongly matching topic. No hard exclusion.
 	if euPrimaryDomains[qDomain] && info.sourceClass == "binding_law" &&
 		info.jurisdiction == "DE" && !resultMatchesTopic(query, r) {
 		score -= subsidiarityPen
 	}
 	if resultMatchesTopic(query, r) {
 		score += topicGain // Verstaerker, kein Override
 	}
@@ -72,6 +72,95 @@ func TestRerankByAuthority_Acceptance(t *testing.T) {
 		}
 	})
 	// Subsidiarity (KB-2026.1 BDSG-pilot regression): a national implementing § that is NOT a
 	// co-primary topic norm must not outrank the primary DSGVO article on a general question.
 	t.Run("subsidiarity dp_05: BDSG §23 below DSGVO Art.6 (Rechtsgrundlage)", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 23 BDSG", "BDSG", "DE", 0.70),
 			bindingRes("Art. 6 DSGVO", "DSGVO", "EU", 0.66),
 		}
 		out := rerankByAuthority("Welche Rechtsgrundlagen erlauben eine Verarbeitung personenbezogener Daten?", in)
 		if out[0].RegulationShort != "DSGVO" {
 			t.Fatalf("DSGVO Art.6 must beat general BDSG §, got %q", out[0].ArticleLabel)
 		}
 		if len(out) != 2 {
 			t.Fatalf("BDSG must stay visible (soft demote), got len=%d", len(out))
 		}
 	})
 	t.Run("subsidiarity dp_08: BDSG §70 below DSGVO Art.28 (Auftragsverarbeitung)", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 70 BDSG", "BDSG", "DE", 0.70), // Teil 3 → scope + subsidiarity
 			bindingRes("Art. 28 DSGVO", "DSGVO", "EU", 0.66),
 		}
 		out := rerankByAuthority("Was muss ein Auftragsverarbeitungsvertrag enthalten?", in)
 		if out[0].RegulationShort != "DSGVO" {
 			t.Fatalf("DSGVO Art.28 must beat BDSG §70, got %q", out[0].ArticleLabel)
 		}
 	})
 	t.Run("subsidiarity dp_11: BDSG §22 below DSGVO Art.32 on a TOM question", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 22 BDSG", "BDSG", "DE", 0.70),
 			bindingRes("Art. 32 DSGVO", "DSGVO", "EU", 0.66),
 		}
 		out := rerankByAuthority("Welche technischen und organisatorischen Massnahmen verlangt das Datenschutzrecht?", in)
 		if out[0].RegulationShort != "DSGVO" {
 			t.Fatalf("DSGVO Art.32 must beat BDSG §22 on a non-topic TOM question, got %q", out[0].ArticleLabel)
 		}
 	})
 	t.Run("cr_07: a 'DSGVO' mention scopes the domain so BDSG Teil-3 §64 is demoted", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 64 BDSG", "BDSG", "DE", 0.70), // Teil 3 (law enforcement)
 			bindingRes("Art. 32 DSGVO", "DSGVO", "EU", 0.66),
 		}
 		// Query has no DP keyword but names the DSGVO → domain fallback scopes it data_protection,
 		// so scope+subsidiarity demote the law-enforcement § below the primary norm.
 		out := rerankByAuthority("Welche rechtliche Grundlage gilt fuer technische und organisatorische Massnahmen - DSGVO oder ein Standard?", in)
 		if out[0].RegulationShort != "DSGVO" {
 			t.Fatalf("DSGVO must win on a DSGVO-mention question, got %q", out[0].ArticleLabel)
 		}
 	})
 	t.Run("ePrivacy: a cookie query lifts §25 TDDDG above DSGVO consent (lex specialis topic)", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("Art. 7 DSGVO", "DSGVO", "EU", 0.70), // higher semantic
 			bindingRes("§ 25 TDDDG", "TDDDG", "DE", 0.66),
 		}
 		out := rerankByAuthority("Wann ist eine Einwilligung fuer das Speichern von Cookies auf Endgeraeten erforderlich?", in)
 		if out[0].RegulationShort != "TDDDG" {
 			t.Fatalf("§25 TDDDG must win a cookie question (lex specialis topic), got %q", out[0].ArticleLabel)
 		}
 	})
 	t.Run("a general consent question still resolves to DSGVO, not §25 TDDDG", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 25 TDDDG", "TDDDG", "DE", 0.70), // higher semantic but no cookie topic
 			bindingRes("Art. 7 DSGVO", "DSGVO", "EU", 0.66),
 		}
 		out := rerankByAuthority("Welche Anforderungen gelten an eine wirksame Einwilligung?", in)
 		if out[0].RegulationShort != "DSGVO" {
 			t.Fatalf("a general consent question must resolve to DSGVO (TDDDG demoted), got %q", out[0].ArticleLabel)
 		}
 	})
 	t.Run("co-primary dp_01: BDSG §38 stays top on a DSB question (national special rule)", func(t *testing.T) {
 		in := []LegalSearchResult{
 			bindingRes("§ 38 BDSG", "BDSG", "DE", 0.66),
 			bindingRes("Art. 37 DSGVO", "DSGVO", "EU", 0.64),
 		}
 		out := rerankByAuthority("Ab wann muss ein Datenschutzbeauftragter benannt werden?", in)
 		// DSB topic → §38 is co-primary (topic-matched, NOT subsidiarity-demoted) and keeps its
 		// semantic lead; Art. 37 stays a close second. Both remain top-2.
 		if out[0].RegulationShort != "BDSG" {
 			t.Fatalf("BDSG §38 (DSB co-primary) must stay top, got %q", out[0].ArticleLabel)
 		}
 		if out[1].RegulationShort != "DSGVO" {
 			t.Fatalf("Art. 37 DSGVO must stay co-primary second, got %q", out[1].ArticleLabel)
 		}
 	})
 	t.Run("nothing is dropped and topic amplifies", func(t *testing.T) {
 		in := []LegalSearchResult{
 			guidanceRes("ENISA", "ENISA", 0.72),
@@ -0,0 +1,89 @@
 package ucca
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"runtime"
 )
 // graphCallerRel resolves a path relative to THIS source file (build-time location), so the
 // graph data is findable under `go test` (cwd = package dir) regardless of working directory.
 // In a built container the source is gone, so cwd-relative candidates carry the load instead.
 func graphCallerRel(rel string) string {
 	_, file, _, ok := runtime.Caller(0)
 	if !ok {
 		return ""
 	}
 	return filepath.Join(filepath.Dir(file), rel)
 }
 // firstExisting returns the first candidate path that exists with the requested kind (dir vs
 // file). Empty candidates (e.g. unset env overrides) are skipped.
 func firstExisting(candidates []string, wantDir bool) string {
 	for _, p := range candidates {
 		if p == "" {
 			continue
 		}
 		info, err := os.Stat(p)
 		if err != nil || info.IsDir() != wantDir {
 			continue
 		}
 		return p
 	}
 	return ""
 }
 // LoadComplianceGraph loads the file-backed Compliance Execution Graph: the Registry join-key
 // contract (obligations/obligation_join_keys.json — owned by the Obligation session) + our
 // curated, accepted control mappings + evidence requirements. Locations are resolved across
 // three layouts: dev (cwd = ai-compliance-sdk/, canonical contract at ../obligations), container
 // (WORKDIR /app, data/ copied in incl. a synced data/obligations/ copy) and `go test`
 // (cwd = package dir, via graphCallerRel). Fail-closed: a missing/invalid source returns an
 // error so the handler serves 503 — never a half-built graph.
 //
 // NOTE: data/obligations/obligation_join_keys.json is a SYNCED COPY of the repo-root contract
 // (the canonical owner is the Obligation session). Re-sync it when the Registry grows; dev/test
 // prefer the canonical repo-root path, only the container falls back to the copy.
 func LoadComplianceGraph() (*ObligationJoinKeys, *ControlMappingSet, *EvidenceRequirementSet, error) {
 	joinPath := firstExisting([]string{
 		os.Getenv("BP_OBLIGATION_JOIN_KEYS"),
 		"../obligations/obligation_join_keys.json",
 		graphCallerRel("../../../obligations/obligation_join_keys.json"),
 		"data/obligations/obligation_join_keys.json",
 		graphCallerRel("../../data/obligations/obligation_join_keys.json"),
 	}, false)
 	if joinPath == "" {
 		return nil, nil, nil, fmt.Errorf("obligation_join_keys.json not found in any candidate path")
 	}
 	mapDir := firstExisting([]string{
 		os.Getenv("BP_CONTROL_MAPPINGS_DIR"),
 		"data/control_mappings",
 		graphCallerRel("../../data/control_mappings"),
 	}, true)
 	if mapDir == "" {
 		return nil, nil, nil, fmt.Errorf("control_mappings dir not found in any candidate path")
 	}
 	evDir := firstExisting([]string{
 		os.Getenv("BP_EVIDENCE_DIR"),
 		"data/evidence_requirements",
 		graphCallerRel("../../data/evidence_requirements"),
 	}, true)
 	if evDir == "" {
 		return nil, nil, nil, fmt.Errorf("evidence_requirements dir not found in any candidate path")
 	}
 	joins, err := LoadObligationJoinKeys(joinPath)
 	if err != nil {
 		return nil, nil, nil, fmt.Errorf("load join keys (%s): %w", joinPath, err)
 	}
 	mappings, err := LoadControlMappings(mapDir)
 	if err != nil {
 		return nil, nil, nil, fmt.Errorf("load control mappings (%s): %w", mapDir, err)
 	}
 	evidence, err := LoadEvidenceRequirements(evDir)
 	if err != nil {
 		return nil, nil, nil, fmt.Errorf("load evidence (%s): %w", evDir, err)
 	}
 	return joins, mappings, evidence, nil
 }
@@ -0,0 +1,71 @@
 package ucca
 // ObligationStatus is the Advisor's vertical slice over the compliance graph for ONE legal
 // obligation: which accepted controls satisfy it, what evidence they require, what's missing,
 // and the resulting status. The point is "the required evidence is (not) present", not "a
 // document exists". citation_spans is pending until the Legal-Knowledge-Graph session attaches
 // them to the obligation (the upper half of the bridge).
 type ObligationStatus struct {
 	ObligationID  string                    `json:"obligation_id"`
 	LegalBasis    []string                  `json:"legal_basis"` // the obligation's citation_units
 	Status        string                    `json:"status"`      // erfuellt | offen | unklar
 	Controls      []ObligationControlStatus `json:"controls"`
 	CitationSpans string                    `json:"citation_spans"` // "pending" until the registry fills them
 }
 // ObligationControlStatus is one control under an obligation with its evidence picture.
 type ObligationControlStatus struct {
 	Framework        string                `json:"framework"`
 	Control          string                `json:"control"`
 	MappingType      string                `json:"mapping_type"`
 	RequiredEvidence []EvidenceRequirement `json:"required_evidence"`
 	MissingEvidence  []EvidenceRequirement `json:"missing_evidence"`
 }
 // AssessObligationStatus traverses obligation_id -> (citation_unit) -> accepted Controls ->
 // required Evidence -> Status. hasEvidence reports whether a given (framework, control,
 // evidence_type) is already collected; pass nil in the MVP (no collection yet) -> everything
 // required is missing and the status is "offen". Unknown or unmapped obligation -> "unklar".
 func AssessObligationStatus(joins *ObligationJoinKeys, mappings *ControlMappingSet, evidence *EvidenceRequirementSet, obligationID string, hasEvidence func(framework, control, evidenceType string) bool) ObligationStatus {
 	ob := joins.FindObligation(obligationID)
 	if ob == nil {
 		return ObligationStatus{ObligationID: obligationID, Status: "unklar", CitationSpans: "pending"}
 	}
 	st := ObligationStatus{
 		ObligationID:  obligationID,
 		LegalBasis:    ob.CitationUnits,
 		CitationSpans: "pending",
 		Controls:      []ObligationControlStatus{},
 	}
 	ctrls := AcceptedControlsForObligation(*ob, mappings)
 	if len(ctrls) == 0 {
 		st.Status = "unklar" // no accepted control reaches it — we cannot assess
 		return st
 	}
 	anyMissing := false
 	for _, m := range ctrls {
 		req := evidence.RequiredFor(m.TargetFramework, m.TargetControl)
 		missing := make([]EvidenceRequirement, 0, len(req))
 		for _, e := range req {
 			if hasEvidence == nil || !hasEvidence(e.Framework, e.Control, e.EvidenceType) {
 				missing = append(missing, e)
 			}
 		}
 		if len(missing) > 0 {
 			anyMissing = true
 		}
 		st.Controls = append(st.Controls, ObligationControlStatus{
 			Framework:        m.TargetFramework,
 			Control:          m.TargetControl,
 			MappingType:      m.MappingType,
 			RequiredEvidence: req,
 			MissingEvidence:  missing,
 		})
 	}
 	if anyMissing {
 		st.Status = "offen"
 	} else {
 		st.Status = "erfuellt"
 	}
 	return st
 }
@@ -0,0 +1,59 @@
 package ucca
 import "testing"
 func loadGraph(t *testing.T) (*ObligationJoinKeys, *ControlMappingSet, *EvidenceRequirementSet) {
 	t.Helper()
 	joins, err := LoadObligationJoinKeys("../../../obligations/obligation_join_keys.json")
 	if err != nil {
 		t.Fatalf("join keys: %v", err)
 	}
 	maps, err := LoadControlMappings("../../data/control_mappings")
 	if err != nil {
 		t.Fatalf("mappings: %v", err)
 	}
 	ev, err := LoadEvidenceRequirements("../../data/evidence_requirements")
 	if err != nil {
 		t.Fatalf("evidence: %v", err)
 	}
 	return joins, maps, ev
 }
 func TestAssessObligationStatus(t *testing.T) {
 	joins, maps, ev := loadGraph(t)
 	// covered obligation, no evidence collected yet (MVP) -> offen
 	st := AssessObligationStatus(joins, maps, ev, "user_authentication_required", nil)
 	if st.Status != "offen" {
 		t.Errorf("want offen, got %q", st.Status)
 	}
 	if len(st.Controls) == 0 {
 		t.Fatal("expected controls for a covered obligation")
 	}
 	for _, c := range st.Controls {
 		if len(c.MissingEvidence) != len(c.RequiredEvidence) {
 			t.Error("MVP: all required evidence should be missing")
 		}
 	}
 	t.Logf("DURCHSTICH user_authentication_required: status=%s legal_basis=%v citation_spans=%s",
 		st.Status, st.LegalBasis, st.CitationSpans)
 	for _, c := range st.Controls {
 		t.Logf("  %s %s (%s): %d required evidence, %d missing", c.Framework, c.Control, c.MappingType, len(c.RequiredEvidence), len(c.MissingEvidence))
 	}
 	// all evidence present -> erfuellt
 	st2 := AssessObligationStatus(joins, maps, ev, "user_authentication_required", func(f, c, et string) bool { return true })
 	if st2.Status != "erfuellt" {
 		t.Errorf("want erfuellt with all evidence present, got %q", st2.Status)
 	}
 	// uncovered obligation (no accepted control reaches it) -> unklar
 	if st3 := AssessObligationStatus(joins, maps, ev, "sbom_creation", nil); st3.Status != "unklar" {
 		t.Errorf("uncovered sbom_creation: want unklar, got %q", st3.Status)
 	}
 	// unknown obligation_id -> unklar
 	if st4 := AssessObligationStatus(joins, maps, ev, "does_not_exist", nil); st4.Status != "unklar" {
 		t.Errorf("unknown obligation: want unklar, got %q", st4.Status)
 	}
 }
@@ -23,9 +23,10 @@ type ControlMapping struct {
 	SourceRole      string `json:"source_role"`             // source_role of the norm (operational_requirement, ...)
 	TargetFramework string `json:"target_framework"`        // e.g. "OWASP ASVS"
 	TargetControl   string `json:"target_control"`          // e.g. "V6.3.1"
-	MappingType     string `json:"mapping_type"`     // supports | partially_supports | implements | related | contradicts
+	MappingType     string `json:"mapping_type"`            // primary_implementation | implements | supports | partially_supports | related | contradicts
 	MappingStatus   string `json:"mapping_status"`          // candidate | accepted | rejected | superseded
 	Provenance      string `json:"provenance"`              // retriever_candidate | human_curated | rule_based
 	ObligationID    string `json:"obligation_id,omitempty"` // stable cross-session join key (Obligation Registry); empty until adopted, citation_unit is the interim bridge
 	Rationale       string `json:"rationale"`
 	ReviewedBy      string `json:"reviewed_by,omitempty"` // who decided (human or rule id)
 	ReviewDate      string `json:"review_date,omitempty"` // YYYY-MM-DD
@@ -35,7 +36,7 @@ type ControlMapping struct {
 // Allowed enum values — the deterministic "rule" layer that keeps the curated store clean.
 var (
-	mappingTypeValues   = map[string]bool{"supports": true, "partially_supports": true, "implements": true, "related": true, "contradicts": true}
+	mappingTypeValues   = map[string]bool{"primary_implementation": true, "implements": true, "supports": true, "partially_supports": true, "related": true, "contradicts": true}
 	mappingStatusValues = map[string]bool{"candidate": true, "accepted": true, "rejected": true, "superseded": true}
 	provenanceValues    = map[string]bool{"retriever_candidate": true, "human_curated": true, "rule_based": true}
 )
@@ -0,0 +1,172 @@
 package ucca
 import (
 	"encoding/json"
 	"os"
 	"regexp"
 	"strings"
 )
 // ObligationKey is one entry of the Obligation Registry's cross-session contract
 // (obligations/obligation_join_keys.json). obligation_id is the STABLE join key — assigned
 // only by the Registry, never minted here. citation_units are the interim bridge until our
 // ControlMapping adopts obligation_id directly.
 type ObligationKey struct {
 	ObligationID  string   `json:"obligation_id"`
 	Regulation    string   `json:"regulation"`
 	Family        string   `json:"family"`
 	Tier          string   `json:"tier"`
 	CitationUnits []string `json:"citation_units"`
 	SourceRole    string   `json:"source_role"`
 }
 // ObligationJoinKeys is the loaded contract + a citation-unit index for the interim join.
 type ObligationJoinKeys struct {
 	SchemaVersion string          `json:"schema_version"`
 	Count         int             `json:"count"`
 	ObligationIDs []ObligationKey `json:"obligation_ids"`
 	byCitationKey map[string][]string
 }
 var citationRefRe = regexp.MustCompile(`\(([0-9a-zA-Z]+)\)`)
 // citationUnitKey normalizes a CRA Annex I reference for the INTERIM citation_unit join, so
 // our "CRA Annex I Part I (2)(c)" and the Registry's "Annex I (2)(c)" collapse to the same
 // key ("i:2.c"). Interim only — superseded by the stable obligation_id once adopted.
 func citationUnitKey(cu string) string {
 	low := strings.ToLower(cu)
 	part := ""
 	switch {
 	case strings.Contains(low, "part ii"):
 		part = "ii"
 	case strings.Contains(low, "part i"), strings.Contains(low, "(2)"):
 		part = "i" // CRA Annex I Part I = the (2)(x) essential requirements
 	}
 	var refs []string
 	for _, m := range citationRefRe.FindAllStringSubmatch(cu, -1) {
 		refs = append(refs, strings.ToLower(m[1]))
 	}
 	return part + ":" + strings.Join(refs, ".")
 }
 // LoadObligationJoinKeys reads the Registry contract and indexes it by citation-unit key.
 func LoadObligationJoinKeys(path string) (*ObligationJoinKeys, error) {
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		return nil, err
 	}
 	var o ObligationJoinKeys
 	if err := json.Unmarshal(raw, &o); err != nil {
 		return nil, err
 	}
 	o.byCitationKey = map[string][]string{}
 	for _, ob := range o.ObligationIDs {
 		for _, cu := range ob.CitationUnits {
 			k := citationUnitKey(cu)
 			o.byCitationKey[k] = append(o.byCitationKey[k], ob.ObligationID)
 		}
 	}
 	return &o, nil
 }
 // ObligationsForCitation returns the obligation_ids that join (interim) to a citation
 // reference such as a control_mapping.source_norm.
 func (o *ObligationJoinKeys) ObligationsForCitation(citationRef string) []string {
 	return o.byCitationKey[citationUnitKey(citationRef)]
 }
 // FindObligation returns the registry entry for an obligation_id (nil if unknown).
 func (o *ObligationJoinKeys) FindObligation(obligationID string) *ObligationKey {
 	for i := range o.ObligationIDs {
 		if o.ObligationIDs[i].ObligationID == obligationID {
 			return &o.ObligationIDs[i]
 		}
 	}
 	return nil
 }
 // mappingReaches reports whether a control mapping reaches an obligation — EXACT via the
 // adopted obligation_id (semantic, preferred), else via the interim citation_unit join (for
 // not-yet-adopted rows). Once obligation_id is set, the coarse citation_unit match is ignored:
 // that is how the semantic join replaces the structural one (e.g. V11.2.1 crypto no longer
 // rides (2)(d) into user_authentication_required — it goes to credential_confidentiality_protection).
 func mappingReaches(m ControlMapping, ob ObligationKey, citationKeys map[string]bool) bool {
 	if m.ObligationID != "" {
 		return m.ObligationID == ob.ObligationID
 	}
 	return citationKeys[citationUnitKey(m.SourceNorm)]
 }
 // AcceptedControlsForObligation returns our accepted control mappings that reach an obligation
 // (deduped by target control), obligation_id-exact where adopted, citation_unit otherwise.
 func AcceptedControlsForObligation(ob ObligationKey, mappings *ControlMappingSet) []ControlMapping {
 	keys := make(map[string]bool, len(ob.CitationUnits))
 	for _, cu := range ob.CitationUnits {
 		keys[citationUnitKey(cu)] = true
 	}
 	out := []ControlMapping{}
 	seen := map[string]bool{}
 	for _, m := range mappings.All {
 		if !m.IsAccepted() || !mappingReaches(m, ob, keys) {
 			continue
 		}
 		ck := m.TargetFramework + ":" + m.TargetControl
 		if seen[ck] {
 			continue
 		}
 		seen[ck] = true
 		out = append(out, m)
 	}
 	return out
 }
 // ObligationCoverage is one row of the cross-session coverage report.
 type ObligationCoverage struct {
 	ObligationID     string   `json:"obligation_id"`
 	Family           string   `json:"family"`
 	Status           string   `json:"status"` // covered | mapped_rejected | uncovered
 	AcceptedControls []string `json:"accepted_controls"`
 	EvidenceCount    int      `json:"evidence_count"`
 }
 // ComputeObligationCoverage joins the Registry obligations to our control mappings — exact via
 // obligation_id where adopted, else via the interim citation_unit join — and reports per
 // obligation: covered (>=1 accepted control reaches it), mapped_rejected (only rejected
 // mappings reach it), or uncovered. The signal back to the Obligation session.
 func ComputeObligationCoverage(joins *ObligationJoinKeys, mappings *ControlMappingSet, evidence *EvidenceRequirementSet) []ObligationCoverage {
 	out := make([]ObligationCoverage, 0, len(joins.ObligationIDs))
 	for _, ob := range joins.ObligationIDs {
 		keys := make(map[string]bool, len(ob.CitationUnits))
 		for _, cu := range ob.CitationUnits {
 			keys[citationUnitKey(cu)] = true
 		}
 		cov := ObligationCoverage{ObligationID: ob.ObligationID, Family: ob.Family}
 		seen := map[string]bool{}
 		rejected := false
 		for _, m := range mappings.All {
 			if !mappingReaches(m, ob, keys) {
 				continue
 			}
 			if m.IsAccepted() {
 				ck := m.TargetFramework + ":" + m.TargetControl
 				if !seen[ck] {
 					seen[ck] = true
 					cov.AcceptedControls = append(cov.AcceptedControls, ck)
 					cov.EvidenceCount += len(evidence.RequiredFor(m.TargetFramework, m.TargetControl))
 				}
 			} else if m.MappingStatus == "rejected" {
 				rejected = true
 			}
 		}
 		switch {
 		case len(cov.AcceptedControls) > 0:
 			cov.Status = "covered"
 		case rejected:
 			cov.Status = "mapped_rejected"
 		default:
 			cov.Status = "uncovered"
 		}
 		out = append(out, cov)
 	}
 	return out
 }
@@ -0,0 +1,61 @@
 package ucca
 import "testing"
 func TestCitationUnitKey_Join(t *testing.T) {
 	// our source_norm and the registry citation_unit must collapse to the SAME key.
 	if citationUnitKey("CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff") != citationUnitKey("Annex I (2)(c)") {
 		t.Errorf("interim join broken: %q vs %q",
 			citationUnitKey("CRA Annex I Part I (2)(c)"), citationUnitKey("Annex I (2)(c)"))
 	}
 	// Part II must NOT collide with Part I.
 	if citationUnitKey("Annex I Part II (1)") == citationUnitKey("CRA Annex I Part I (2)(c)") {
 		t.Error("Part II must not join to Part I")
 	}
 }
 func TestLoadObligationJoinKeys(t *testing.T) {
 	o, err := LoadObligationJoinKeys("../../../obligations/obligation_join_keys.json")
 	if err != nil {
 		t.Fatalf("load: %v", err)
 	}
 	if o.Count != len(o.ObligationIDs) {
 		t.Errorf("count %d != len %d", o.Count, len(o.ObligationIDs))
 	}
 	if len(o.ObligationIDs) == 0 {
 		t.Fatal("empty contract")
 	}
 	if got := o.ObligationsForCitation("CRA Annex I Part I (2)(c)"); len(got) == 0 {
 		t.Error("expected an obligation joined to (2)(c)")
 	}
 }
 func TestObligationCoverage_Report(t *testing.T) {
 	joins, err := LoadObligationJoinKeys("../../../obligations/obligation_join_keys.json")
 	if err != nil {
 		t.Fatalf("join keys: %v", err)
 	}
 	maps, err := LoadControlMappings("../../data/control_mappings")
 	if err != nil {
 		t.Fatalf("mappings: %v", err)
 	}
 	ev, err := LoadEvidenceRequirements("../../data/evidence_requirements")
 	if err != nil {
 		t.Fatalf("evidence: %v", err)
 	}
 	cov := ComputeObligationCoverage(joins, maps, ev)
 	if len(cov) == 0 {
 		t.Fatal("no coverage computed")
 	}
 	byStatus := map[string]int{}
 	for _, c := range cov {
 		byStatus[c.Status]++
 	}
 	t.Logf("COVERAGE: %d Obligations | covered=%d mapped_rejected=%d uncovered=%d",
 		len(cov), byStatus["covered"], byStatus["mapped_rejected"], byStatus["uncovered"])
 	for _, c := range cov {
 		if c.Status != "uncovered" {
 			t.Logf("  %-15s %-36s controls=%v evidence=%d", c.Status, c.ObligationID, c.AcceptedControls, c.EvidenceCount)
 		}
 	}
 }
@@ -77,6 +77,8 @@ _ROUTER_MODULES = [
    "licenses_routes",
    "template_rule_routes",
    "specialist_agent_routes",
    "reasoning_routes",
    "onboarding_routes",
 ]
 _loaded_count = 0
@@ -0,0 +1,72 @@
 """Onboarding Advisor endpoint — exposes the existing Smart Onboarding Advisor at runtime.
 This adds NO new reasoning logic. It exposes the already-built, tested orchestration (Signal Producers
 -> Normalizer -> Silent Knowledge Pass -> Advisor) through one runtime endpoint. No DB, no persistence.
  POST /onboarding/advisor-start  — (company + certs + target + scanner findings) -> advisory payload
  GET  /onboarding/targets        — the supported target ids
 """
 import logging
 from typing import List, Optional
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
 from compliance.onboarding import (
    AdvisorMeasure,
    AdvisorQuestion,
    InferredAssumption,
    ProducedSignal,
    RejectedAssumption,
 )
 from compliance.services.onboarding_service import run_advisor, supported_targets
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/onboarding", tags=["onboarding"])
 class OnboardingAdvisorRequest(BaseModel):
    company: str = ""
    industry: Optional[str] = None
    products: List[str] = Field(default_factory=list)
    markets: List[str] = Field(default_factory=list)
    certifications: List[str] = Field(default_factory=list)
    known_evidence: List[str] = Field(default_factory=list)
    target: str = "CRA"
    scanner_findings: List[ProducedSignal] = Field(default_factory=list)   # adapters upstream produced these
 class AdvisorResponse(BaseModel):
    silent_intake_summary: str = ""
    headline: str = ""
    auto_detected: List[str] = Field(default_factory=list)
    inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
    rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
    top_5_questions: List[AdvisorQuestion] = Field(default_factory=list)
    capability_delta: List[str] = Field(default_factory=list)
    top_measures: List[AdvisorMeasure] = Field(default_factory=list)
    evidence_requests: List[str] = Field(default_factory=list)
    unsupported_domains: List[str] = Field(default_factory=list)
    completeness_summary: str = ""
@router.get("/targets")
 def list_targets() -> dict:
    return {"targets": supported_targets()}
@router.post("/advisor-start", response_model=AdvisorResponse)
 def advisor_start_endpoint(req: OnboardingAdvisorRequest) -> AdvisorResponse:
    if req.target not in supported_targets():
        raise HTTPException(status_code=404, detail="unsupported target '%s'; supported: %s" % (req.target, supported_targets()))
    result, si_summary = run_advisor(
        company=req.company, certifications=req.certifications, target=req.target,
        signals=req.scanner_findings, known_evidence=req.known_evidence,
        products=req.products, markets=req.markets, industry=req.industry or "")
    return AdvisorResponse(
        silent_intake_summary=si_summary, headline=result.headline, auto_detected=result.auto_detected,
        inferred_assumptions=result.inferred_assumptions, rejected_assumptions=result.rejected_assumptions,
        top_5_questions=result.next_best_questions, capability_delta=result.capability_delta,
        top_measures=result.top_measures, evidence_requests=result.evidence_requests,
        unsupported_domains=result.unsupported_domains, completeness_summary=result.completeness_summary)
@@ -0,0 +1,98 @@
 """HTTP endpoints for the Regulatory Reasoning Engine (spec §7).
 Thin handlers — all reasoning lives in `compliance.reasoning.*`. No DB, no RAG;
 pure deterministic rule evaluation.
    POST /reasoning/scope                      -> which regulations apply + missing facts
    POST /reasoning/obligations                -> obligations, overlaps, multi-evidence
    POST /reasoning/implementation-reasoning   -> claim->obligation mapping (Welt 1, no verdict)
    POST /reasoning/interpretation-assessment  -> verdict on a customer interpretation
    POST /reasoning/product-scope              -> gate on facts, else run discover_scope once
    POST /reasoning/regulatory-map             -> customer-readable read-model over the scope
    POST /reasoning/interpretation-in-map      -> judge a customer interpretation within the map
 """
 from __future__ import annotations
 from fastapi import APIRouter
 from compliance.interpretation_map import (
    InterpretationInMapRequest,
    InterpretationInMapResult,
    interpret_in_map,
 )
 from compliance.product_scope import (
    ProductScopeRequest,
    ProductScopeResponse,
    resolve_product_scope,
 )
 from compliance.regulatory_map import RegulatoryMap, RegulatoryMapRequest, render_regulatory_map
 from compliance.reasoning import (
    assess_interpretation,
    derive_obligations,
    discover_scope,
    reason_implementation_claim,
 )
 from compliance.reasoning.schemas import (
    ImplementationReasoningRequest,
    ImplementationReasoningResponse,
    InterpretationRequest,
    InterpretationResponse,
    ObligationsRequest,
    ObligationsResponse,
    ScopeRequest,
    ScopeResponse,
 )
 router = APIRouter(prefix="/reasoning", tags=["reasoning"])
@router.post("/scope", response_model=ScopeResponse)
 def scope_discovery(req: ScopeRequest) -> ScopeResponse:
    scope = discover_scope(req.product_profile)
    return ScopeResponse(
        regulatory_scope=scope,
        missing_facts=scope.missing_facts,
        confidence=scope.confidence,
    )
@router.post("/obligations", response_model=ObligationsResponse)
 def applicable_obligations(req: ObligationsRequest) -> ObligationsResponse:
    return derive_obligations(req.product_profile, req.regulatory_scope)
@router.post("/implementation-reasoning", response_model=ImplementationReasoningResponse)
 def implementation_reasoning(req: ImplementationReasoningRequest) -> ImplementationReasoningResponse:
    return reason_implementation_claim(req.product_profile, req.customer_claim)
@router.post("/product-scope", response_model=ProductScopeResponse)
 def product_scope(req: ProductScopeRequest) -> ProductScopeResponse:
    return resolve_product_scope(req.product_profile)
@router.post("/regulatory-map", response_model=RegulatoryMap)
 def regulatory_map(req: RegulatoryMapRequest) -> RegulatoryMap:
    return render_regulatory_map(req.product_profile)
@router.post("/interpretation-in-map", response_model=InterpretationInMapResult)
 def interpretation_in_map(req: InterpretationInMapRequest) -> InterpretationInMapResult:
    reg_map = render_regulatory_map(req.product_profile)
    return interpret_in_map(reg_map, req.customer_interpretation)
@router.post("/interpretation-assessment", response_model=InterpretationResponse)
 def interpretation_assessment(req: InterpretationRequest) -> InterpretationResponse:
    result = assess_interpretation(req.customer_interpretation, req.product_profile)
    return InterpretationResponse(
        assessment=result.assessment,
        affected_regulations=result.affected_regulations,
        affected_obligations=result.affected_obligations,
        corrected_interpretation=result.corrected_interpretation,
        risks=result.risks,
        legal_basis_refs=result.legal_basis_refs,
        explanation=result.explanation,
        confidence=result.confidence,
    )
@@ -0,0 +1,70 @@
 """Master Capability Registry v0 (Phase 2C) — Compliance Execution domain.
 Registry + minting layer for Master Capabilities — the third instance of the
 identity-machine pattern (Master Controls, Master Obligations, Master Capabilities).
 STORED: identities, sources, relationship types, policy versions, lifecycle events,
 provenance. DERIVED (never stored): confidence, coverage, gap.
 v0 scope: types + minting + typed relations + versioned policy + identity lifecycle.
 NOT here: Company-Gap, real ISO/cert mappings, certification derivations, UI, RAG,
 new meta-model class, generic canonicalization engine.
 """
 from __future__ import annotations
 from .engine import (
    CapabilityRegistry,
    deprecate_capability,
    evaluate_relation,
    merge_capabilities,
    mint_capability,
    resolve,
    split_capability,
 )
 from .policy import DEFAULT_POLICY, assert_no_certification_confirms
 from .schemas import (
    AssertionStatus,
    CapabilityCandidate,
    CapabilityRelation,
    Confidence,
    DerivedAssessment,
    EvidenceKind,
    IdentityLifecycleEvent,
    LifecycleEventType,
    LifecycleState,
    MasterCapability,
    PolicyRule,
    PolicyVersion,
    Provenance,
    RelationType,
 )
 __all__ = [
    # engine
    "CapabilityRegistry",
    "mint_capability",
    "evaluate_relation",
    "resolve",
    "deprecate_capability",
    "merge_capabilities",
    "split_capability",
    # policy
    "DEFAULT_POLICY",
    "assert_no_certification_confirms",
    # schemas
    "MasterCapability",
    "CapabilityCandidate",
    "CapabilityRelation",
    "RelationType",
    "EvidenceKind",
    "AssertionStatus",
    "Confidence",
    "PolicyRule",
    "PolicyVersion",
    "IdentityLifecycleEvent",
    "LifecycleEventType",
    "LifecycleState",
    "Provenance",
    "DerivedAssessment",
 ]
@@ -0,0 +1,191 @@
 """Master Capability Registry v0 — minting, derivation, identity lifecycle.
 STORED on the registry: identities, sources, relation types, policy versions,
 lifecycle events, provenance. DERIVED (never stored): confidence/status, via
 `evaluate_relation` under a versioned policy.
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Set
 from pydantic import BaseModel, Field
 from .policy import DEFAULT_POLICY
 from .schemas import (
    AssertionStatus,
    CapabilityCandidate,
    CapabilityRelation,
    Confidence,
    DerivedAssessment,
    IdentityLifecycleEvent,
    LifecycleEventType,
    LifecycleState,
    MasterCapability,
    PolicyVersion,
    Provenance,
 )
 class CapabilityRegistry(BaseModel):
    # NOTE: no confidence/coverage field anywhere — those are DERIVED, never stored.
    capabilities: Dict[str, MasterCapability] = Field(default_factory=dict)
    relations: List[CapabilityRelation] = Field(default_factory=list)
    lifecycle_events: List[IdentityLifecycleEvent] = Field(default_factory=list)
    policy: PolicyVersion = Field(default_factory=lambda: DEFAULT_POLICY)
    next_serial: int = 1
 def _mcap_id(serial: int) -> str:
    return "MCAP-%05d" % serial
 def _next_event_id(registry: "CapabilityRegistry") -> str:
    return "evt-%d" % (len(registry.lifecycle_events) + 1)
 def mint_capability(
    registry: CapabilityRegistry,
    candidate: CapabilityCandidate,
    provenance: Optional[Provenance] = None,
    name: str = "",
    definition: str = "",
    category: str = "",
    domains: Optional[List[str]] = None,
 ) -> MasterCapability:
    """Assign the next stable MCAP id to a candidate and register it (with provenance)."""
    cap_id = _mcap_id(registry.next_serial)
    cap = MasterCapability(
        capability_id=cap_id,
        name=name or candidate.normalized or candidate.raw_term,
        definition=definition,
        category=category,
        domains=domains or [],
        provenance=provenance
        or Provenance(author="system", basis="minted from candidate '%s'" % candidate.raw_term),
    )
    registry.capabilities[cap_id] = cap
    registry.next_serial += 1
    return cap
 def evaluate_relation(
    relation: CapabilityRelation, policy: Optional[PolicyVersion] = None
 ) -> DerivedAssessment:
    """Derive (status, confidence) from (relationship_type, evidence_kind) under a
    versioned policy. Deterministic; result is returned, never stored."""
    pol = policy if policy is not None else DEFAULT_POLICY
    status = AssertionStatus.UNKNOWN
    confidence = Confidence.LOW
    found = False
    for rule in pol.rules:
        if (
            rule.relationship_type == relation.relationship_type
            and rule.evidence_kind == relation.evidence_kind
        ):
            status, confidence, found = rule.status, rule.confidence, True
            break
    expl = "%s + %s under %s -> %s/%s%s" % (
        relation.relationship_type.value,
        relation.evidence_kind.value,
        pol.policy_version,
        status.value,
        confidence.value,
        "" if found else " (no rule)",
    )
    return DerivedAssessment(
        target_capability_id=relation.target_capability_id,
        status=status,
        confidence=confidence,
        policy_version=pol.policy_version,
        explanation=expl,
    )
 def resolve(
    registry: CapabilityRegistry, capability_id: str, _seen: Optional[Set[str]] = None
 ) -> Optional[MasterCapability]:
    """Follow redirects (from merge/deprecate) to the current canonical capability."""
    seen = _seen if _seen is not None else set()
    if capability_id in seen:
        return None  # redirect cycle guard
    seen.add(capability_id)
    cap = registry.capabilities.get(capability_id)
    if cap is None:
        return None
    if cap.redirect_to:
        return resolve(registry, cap.redirect_to, seen)
    # terminal: only an ACTIVE capability resolves; a deprecated dead-end -> None
    return cap if cap.state == LifecycleState.ACTIVE else None
 def deprecate_capability(
    registry: CapabilityRegistry,
    capability_id: str,
    redirect_to: Optional[str] = None,
    provenance: Optional[Provenance] = None,
 ) -> IdentityLifecycleEvent:
    cap = registry.capabilities.get(capability_id)
    if cap is None:
        raise KeyError(capability_id)
    cap.state = LifecycleState.DEPRECATED
    cap.redirect_to = redirect_to
    event = IdentityLifecycleEvent(
        event_id=_next_event_id(registry),
        event_type=LifecycleEventType.REDIRECT if redirect_to else LifecycleEventType.DEPRECATE,
        from_ids=[capability_id],
        to_ids=[redirect_to] if redirect_to else [],
        provenance=provenance or Provenance(author="system", basis="deprecate %s" % capability_id),
    )
    registry.lifecycle_events.append(event)
    return event
 def merge_capabilities(
    registry: CapabilityRegistry,
    from_id: str,
    into_id: str,
    provenance: Optional[Provenance] = None,
 ) -> IdentityLifecycleEvent:
    """Merge `from_id` into `into_id`: deprecate `from_id` with a redirect to `into_id`."""
    if from_id not in registry.capabilities or into_id not in registry.capabilities:
        raise KeyError("%s or %s" % (from_id, into_id))
    frm = registry.capabilities[from_id]
    frm.state = LifecycleState.DEPRECATED
    frm.redirect_to = into_id
    event = IdentityLifecycleEvent(
        event_id=_next_event_id(registry),
        event_type=LifecycleEventType.MERGE,
        from_ids=[from_id],
        to_ids=[into_id],
        provenance=provenance or Provenance(author="system", basis="merge %s -> %s" % (from_id, into_id)),
    )
    registry.lifecycle_events.append(event)
    return event
 def split_capability(
    registry: CapabilityRegistry,
    from_id: str,
    into_ids: List[str],
    primary: Optional[str] = None,
    provenance: Optional[Provenance] = None,
 ) -> IdentityLifecycleEvent:
    """Split `from_id` into several capabilities. The old id deprecates; it redirects
    to `primary` only if one is given (else it resolves to None — split is ambiguous)."""
    if from_id not in registry.capabilities:
        raise KeyError(from_id)
    frm = registry.capabilities[from_id]
    frm.state = LifecycleState.DEPRECATED
    frm.redirect_to = primary
    event = IdentityLifecycleEvent(
        event_id=_next_event_id(registry),
        event_type=LifecycleEventType.SPLIT,
        from_ids=[from_id],
        to_ids=list(into_ids),
        provenance=provenance or Provenance(author="system", basis="split %s" % from_id),
    )
    registry.lifecycle_events.append(event)
    return event
@@ -0,0 +1,65 @@
 """Derivation policy v0 for the Master Capability Registry.
 Confidence + status are DERIVED from (relationship_type, evidence_kind) under a
 versioned policy — never stored. HARD RULE baked in and structurally guarded: a
 CERTIFICATION is a claim, never proof — no certification-backed rule may yield
 CONFIRMED. CONFIRMED requires a CONFIRMS relation backed by a concrete ARTIFACT
 (or an EXPERT assertion).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from .schemas import (
    AssertionStatus,
    Confidence,
    EvidenceKind,
    PolicyRule,
    PolicyVersion,
    RelationType,
 )
 def _rule(rt: RelationType, ek: EvidenceKind, st: AssertionStatus, cf: Confidence) -> PolicyRule:
    return PolicyRule(relationship_type=rt, evidence_kind=ek, status=st, confidence=cf)
 # (relationship_type, evidence_kind) -> (status, confidence)
 _V0_RULES = [
    # concrete artifact / expert confirming the capability -> CONFIRMED
    _rule(RelationType.CONFIRMS, EvidenceKind.ARTIFACT, AssertionStatus.CONFIRMED, Confidence.HIGH),
    _rule(RelationType.CONFIRMS, EvidenceKind.EXPERT, AssertionStatus.CONFIRMED, Confidence.MEDIUM),
    # equivalent capability — certificate or artifact behind it -> INFERRED (never confirmed)
    _rule(RelationType.EQUIVALENT, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.HIGH),
    _rule(RelationType.EQUIVALENT, EvidenceKind.ARTIFACT, AssertionStatus.INFERRED, Confidence.HIGH),
    # supports — weaker
    _rule(RelationType.SUPPORTS, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
    _rule(RelationType.SUPPORTS, EvidenceKind.ARTIFACT, AssertionStatus.INFERRED, Confidence.MEDIUM),
    # requires = an obligation NEEDS the capability (relevance, not possession)
    _rule(RelationType.REQUIRES, EvidenceKind.NONE, AssertionStatus.UNKNOWN, Confidence.LOW),
    # broader/narrower certificate -> weak inference
    _rule(RelationType.BROADER_THAN, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
    _rule(RelationType.NARROWER_THAN, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
    _rule(RelationType.RELATED_TO, EvidenceKind.CERTIFICATION, AssertionStatus.UNKNOWN, Confidence.LOW),
 ]
 DEFAULT_POLICY = PolicyVersion(
    policy_version="capability-policy-v0",
    description="v0: certification never yields CONFIRMED; only CONFIRMS + ARTIFACT/EXPERT does.",
    rules=_V0_RULES,
 )
 def assert_no_certification_confirms(policy: PolicyVersion) -> None:
    """Structural guard for the hard rule: no CERTIFICATION-backed rule is CONFIRMED."""
    for r in policy.rules:
        if r.evidence_kind == EvidenceKind.CERTIFICATION and r.status == AssertionStatus.CONFIRMED:
            raise ValueError(
                "policy %s violates hard rule: certification -> confirmed (%s)"
                % (policy.policy_version, r.relationship_type.value)
            )
 # fail fast at import: the shipped default must satisfy the hard rule
 assert_no_certification_confirms(DEFAULT_POLICY)
@@ -0,0 +1,150 @@
 """Master Capability Registry v0 — Compliance Execution domain (Phase 2C).
 Built from the Reasoning session per user directive, but this IS the Compliance
 Execution model (Execution owns Capability). Third real instance of the
 identity-machine pattern (after Master Controls and Master Obligations):
    Candidate -> Normalization -> Dedup -> Stable Identity (MCAP) -> Typed Relations
 KEY SENTENCE (stored vs derived):
    STORED  : identities, sources, relationship types, policy versions, lifecycle
              events, provenance.
    DERIVED : confidence, coverage and gap statements — computed on demand, NEVER
              stored (see policy.py / engine.evaluate_relation).
 These are APPLICATION/registry types, NOT compliance-meta-model classes. In
 particular `CapabilityRelation` is relation METADATA inside the registry — it does
 NOT introduce a new meta-model class. Whether a reified relation must enter the
 frozen meta-model is a Meta-Model-Owner decision (architecture freeze v1.0),
 deferred until a demonstrable failure case exists.
 Self-contained (no Reasoning import — Reasoning consumes Capability, not the other
 way round). Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List, Optional
 from pydantic import BaseModel, Field
 class Confidence(str, Enum):
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
 class AssertionStatus(str, Enum):
    """How well-established a capability claim is. A numeric score is presentation;
    THIS type is the truth (derived from relationship type + evidence + policy)."""
    DECLARED = "declared"
    INFERRED = "inferred"
    CONFIRMED = "confirmed"
    UNKNOWN = "unknown"
 class RelationType(str, Enum):
    EQUIVALENT = "equivalent"
    SUPPORTS = "supports"
    REQUIRES = "requires"
    CONFIRMS = "confirms"
    BROADER_THAN = "broader_than"
    NARROWER_THAN = "narrower_than"
    RELATED_TO = "related_to"
 class EvidenceKind(str, Enum):
    CERTIFICATION = "certification"  # a held certificate — a CLAIM, never proof
    ARTIFACT = "artifact"  # concrete doc/config/test/log
    EXPERT = "expert"  # human expert assertion
    NONE = "none"
 class LifecycleState(str, Enum):
    ACTIVE = "active"
    DEPRECATED = "deprecated"
 class LifecycleEventType(str, Enum):
    MERGE = "merge"
    SPLIT = "split"
    DEPRECATE = "deprecate"
    REDIRECT = "redirect"
 class Provenance(BaseModel):
    """Every CURATED atom carries its own provenance (who / when / on what basis)."""
    author: str = ""
    asserted_at: Optional[str] = None  # ISO timestamp passed in; never generated here
    basis: str = ""
 # ── stored: identity ──────────────────────────────────────────────────────
 class MasterCapability(BaseModel):
    capability_id: str  # stable MCAP-xxxxx
    name: str = ""
    definition: str = ""
    category: str = ""
    domains: List[str] = Field(default_factory=list)
    typical_evidence: List[str] = Field(default_factory=list)
    version: int = 1
    state: LifecycleState = LifecycleState.ACTIVE
    redirect_to: Optional[str] = None  # set on merge/deprecate
    provenance: Provenance = Field(default_factory=Provenance)
 class CapabilityCandidate(BaseModel):
    raw_term: str  # e.g. "Patch Management"
    source: str = ""  # e.g. "CRA:Annex I (2)(d)"
    normalized: str = ""
 # ── stored: typed relation metadata (NOT a meta-model class) ──────────────
 class CapabilityRelation(BaseModel):
    relation_id: str
    source: str  # external term/obligation/certification id, e.g. "certification:ISO27001"
    target_capability_id: str  # MCAP-...
    relationship_type: RelationType
    evidence_kind: EvidenceKind = EvidenceKind.NONE
    provenance: Provenance = Field(default_factory=Provenance)
 # ── stored: versioned derivation policy ───────────────────────────────────
 class PolicyRule(BaseModel):
    relationship_type: RelationType
    evidence_kind: EvidenceKind
    status: AssertionStatus
    confidence: Confidence
 class PolicyVersion(BaseModel):
    """A versioned derivation policy. `policy_version` is recorded with every
    assessment so "why did you say X last year" is answerable with the policy
    as-of-then. Without this, `derived` and `auditable/reproducible` contradict."""
    policy_version: str
    description: str = ""
    rules: List[PolicyRule] = Field(default_factory=list)
 # ── stored: identity lifecycle ────────────────────────────────────────────
 class IdentityLifecycleEvent(BaseModel):
    event_id: str
    event_type: LifecycleEventType
    from_ids: List[str] = Field(default_factory=list)
    to_ids: List[str] = Field(default_factory=list)
    at: Optional[str] = None
    provenance: Provenance = Field(default_factory=Provenance)
 # ── DERIVED — never stored ────────────────────────────────────────────────
 class DerivedAssessment(BaseModel):
    target_capability_id: str
    status: AssertionStatus
    confidence: Confidence
    policy_version: str
    explanation: str = ""
@@ -0,0 +1,46 @@
 """Company Intelligence (Phase 2A) — Company Capability Profile foundation.
 The HEAD of the spine Company -> Capability -> Product -> Regulation -> Obligation
 -> Procedure -> Evidence. Builds a CompanyContext into a CompanyCapabilityProfile
 with a four-state trust model (declared/inferred/confirmed/unknown). A certification
 yields at most an INFERRED candidate — never "erfuellt".
 Reasoning OWNS the container + trust-state; it CONSUMES the Certification->Capability
 mapping (Execution-owned) via an injected contract — no mapping data in product code.
 """
 from __future__ import annotations
 from .contract import CapabilityMappingEntry, CertificationCapabilityMap, EMPTY_MAPPING
 from .engine import build_company_profile
 from .schemas import (
    CapabilityEvidence,
    Certification,
    CompanyCapabilityProfile,
    CompanyContext,
    Declaration,
    ExistingEvidence,
    ExistingProcess,
    ExistingSystem,
    OperationalCapability,
    OperationalCapabilityCandidate,
    VerificationStatus,
 )
 __all__ = [
    "build_company_profile",
    "CompanyContext",
    "CompanyCapabilityProfile",
    "Certification",
    "Declaration",
    "ExistingProcess",
    "ExistingSystem",
    "ExistingEvidence",
    "CapabilityEvidence",
    "OperationalCapabilityCandidate",
    "OperationalCapability",
    "VerificationStatus",
    "CapabilityMappingEntry",
    "CertificationCapabilityMap",
    "EMPTY_MAPPING",
 ]
@@ -0,0 +1,43 @@
 """Consumption contract for the Certification -> Capability mapping.
 OWNERSHIP BOUNDARY (hard): the Capability Registry, CapabilityDefinition and the
 Certification->Capability / Feature->Capability mapping RULES live in the Compliance
 Execution domain. This Reasoning layer defines ONLY the shape it consumes and never
 ships mapping DATA in product code — tests inject mocks, so the real table can only
 ever live in Execution.
 Execution will eventually provide CapabilityRegistry / CapabilityMapping /
 CapabilityDefinition; Reasoning consumes exactly `OperationalCapabilityCandidate`
 {capability_id, source, confidence, verification_status} (see schemas.py) and the
 minimal mapping SHAPE below — nothing more.
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import Dict, List
 from pydantic import BaseModel, Field
 from compliance.reasoning.enums import Confidence
 class CapabilityMappingEntry(BaseModel):
    """One mapping rule SHAPE: a certification implies candidate capabilities.
    Contract type only. The actual table (which capabilities ISO27001 implies) is
    Execution's DATA and MUST NOT be hard-coded here or anywhere in product code.
    """
    capability_ids: List[str] = Field(default_factory=list)
    confidence: Confidence = Confidence.MEDIUM
 # certification_id -> entry. Injected at call time; product code holds NO entries.
 CertificationCapabilityMap = Dict[str, CapabilityMappingEntry]
 # Intentionally empty: without an injected mapping there are zero inferred
 # candidates. This is the architectural guarantee that the registry lives only in
 # the Compliance Execution domain.
 EMPTY_MAPPING: CertificationCapabilityMap = {}
@@ -0,0 +1,114 @@
 """Company Intelligence engine (Phase 2A) — build the Company Capability Profile.
 Deterministic, no LLM/RAG. Turns a raw CompanyContext into capability evidence,
 candidates and (only via explicit verification) confirmed capabilities.
 HARD RULE enforced here: a certification yields at most an INFERRED candidate; it
 can NEVER produce a CONFIRMED capability on its own. Only real ExistingEvidence
 (`proves_capability_id`) promotes a capability to CONFIRMED. Certifications without
 a known mapping yield evidence-of-claim but NO inferred capability (the mapping is
 Execution's data, injected — never hard-coded here).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Tuple
 from compliance.reasoning.enums import Confidence
 from .contract import EMPTY_MAPPING, CertificationCapabilityMap
 from .schemas import (
    CapabilityEvidence,
    CompanyCapabilityProfile,
    CompanyContext,
    OperationalCapability,
    OperationalCapabilityCandidate,
    VerificationStatus,
 )
 def _declared(context: CompanyContext) -> List[OperationalCapabilityCandidate]:
    out: List[OperationalCapabilityCandidate] = []
    for d in context.declarations:
        out.append(
            OperationalCapabilityCandidate(
                capability_id=d.capability_id,
                source="declaration:%s" % context.company_id,
                confidence=Confidence.MEDIUM,
                verification_status=VerificationStatus.DECLARED,
            )
        )
    return out
 def _from_certifications(
    context: CompanyContext, mapping: CertificationCapabilityMap
 ) -> Tuple[List[CapabilityEvidence], List[OperationalCapabilityCandidate]]:
    # refinement 1: certification -> evidence-of-capability (claim) -> inferred candidate
    evidence: List[CapabilityEvidence] = []
    inferred: List[OperationalCapabilityCandidate] = []
    for cert in context.certifications:
        source = "certification:%s" % cert.certification_id
        evidence.append(
            CapabilityEvidence(
                source=source,
                claim="Company holds %s" % (cert.name or cert.certification_id),
                certification_id=cert.certification_id,
            )
        )
        entry = mapping.get(cert.certification_id)
        if entry is None:
            continue  # no mapping known -> NO inferred capability (data is Execution's)
        for cap_id in entry.capability_ids:
            inferred.append(
                OperationalCapabilityCandidate(
                    capability_id=cap_id,
                    source=source,
                    confidence=entry.confidence,
                    verification_status=VerificationStatus.INFERRED,
                )
            )
    return evidence, inferred
 def _confirmed_from_evidence(context: CompanyContext) -> List[OperationalCapability]:
    proven: Dict[str, List[str]] = {}
    for ev in context.evidence:
        cap = ev.proves_capability_id
        if not cap:
            continue
        proven.setdefault(cap, []).append(ev.evidence_id)
    return [
        OperationalCapability(
            capability_id=cap,
            verification_status=VerificationStatus.CONFIRMED,
            confidence=Confidence.HIGH,
            sources=sources,
        )
        for cap, sources in proven.items()
    ]
 def build_company_profile(
    context: CompanyContext, mapping: Optional[CertificationCapabilityMap] = None
 ) -> CompanyCapabilityProfile:
    """Build the Company Capability Profile from raw context + an injected mapping.
    `mapping` defaults to EMPTY (no inferred candidates) so that the cert->capability
    table can only ever come from the Compliance Execution domain.
    """
    mapping = EMPTY_MAPPING if mapping is None else mapping
    evidence, inferred = _from_certifications(context, mapping)
    declared = _declared(context)
    confirmed = _confirmed_from_evidence(context)
    confirmed_ids = {oc.capability_id for oc in confirmed}
    # a confirmed capability is no longer a mere candidate
    candidates = [c for c in (declared + inferred) if c.capability_id not in confirmed_ids]
    return CompanyCapabilityProfile(
        company_id=context.company_id,
        capability_evidence=evidence,
        candidate_capabilities=candidates,
        confirmed_capabilities=confirmed,
    )
@@ -0,0 +1,150 @@
 """Company Intelligence (Phase 2A) — Company Capability Profile (domain objects).
 This is the HEAD of the spine
    Company -> (Operational) Capability -> Product -> Applicable Regulation ->
    Obligation -> Procedure -> Evidence
 and answers a DIFFERENT question than Regulatory Intelligence: not "which laws
 apply to my product" but "which capabilities does my company already have, and
 which regulatory obligations might they already cover".
 HARD RULE (structural, not convention): a capability derived from a certification
 is at most INFERRED — never CONFIRMED, never "erfuellt". A certification produces
 EVIDENCE for a capability, an inference produces a CANDIDATE, and only checked
 evidence produces a CONFIRMED capability. This keeps the company side inside
 Welt 1 (potential), mirroring `ClaimCoverage` on the obligation side; it is NOT a
 conformity verdict (`ComplianceStatus`, Welt 2, owned by Compliance Execution).
 OWNERSHIP: Reasoning OWNS this CompanyContext container + the trust-state machine.
 It does NOT own the Certification->Capability mapping RULES — those are the same
 kind of rule as Feature->Capability and belong to the Compliance Execution
 Capability Registry. This layer only CONSUMES `OperationalCapabilityCandidate`
 {capability_id, source, confidence, verification_status} via an injected mapping
 (see contract.py). No mapping DATA lives in product code (tests inject mocks).
 Application/reasoning types, NOT compliance-meta-model classes (architecture
 freeze v1.0 untouched). Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List, Optional
 from pydantic import BaseModel, Field
 from compliance.reasoning.enums import Confidence
 class VerificationStatus(str, Enum):
    """Trust state of an operational capability — a FOURTH vocabulary.
    Disjoint from ClaimCoverage (Welt 1, customer claim vs obligation),
    ComplianceStatus (Welt 2, verified conformity) and DeltaType (RCI). It says
    only how well-established a company CAPABILITY is, never whether an obligation
    is met. Progression: DECLARED (customer says) -> INFERRED (a certification
    implies it) -> CONFIRMED (checked against real evidence); UNKNOWN = no signal.
    """
    DECLARED = "declared"
    INFERRED = "inferred"
    CONFIRMED = "confirmed"
    UNKNOWN = "unknown"
 # ── raw company inputs (the CompanyContext children) ──────────────────────
 class Certification(BaseModel):
    certification_id: str  # e.g. "ISO27001"
    name: str = ""
    scope: str = ""  # what the cert covers, customer-stated
 class Declaration(BaseModel):
    """A customer statement that they have a capability ("we do patch management")."""
    capability_id: str
    statement: str = ""
 class ExistingProcess(BaseModel):
    process_id: str
    name: str = ""
 class ExistingSystem(BaseModel):
    system_id: str
    name: str = ""
 class ExistingEvidence(BaseModel):
    """A concrete artefact the company already holds (policy, audit log, SBOM ...).
    `proves_capability_id` is the ONLY thing that may lift a capability to
    CONFIRMED — and only when a human/engine has attached real evidence.
    """
    evidence_id: str
    evidence_type: str = ""  # config_export/test_report/policy/audit_log/...
    proves_capability_id: Optional[str] = None
 # ── intermediate: certification -> evidence-of-capability (refinement 1) ──
 class CapabilityEvidence(BaseModel):
    """A certification does not yield a capability directly — only EVIDENCE for one.
    "Company holds a certified ISMS" is the evidence/claim; capabilities are then
    INFERRED from it via the injected (Execution-owned) mapping, never directly.
    """
    source: str  # provenance, e.g. "certification:ISO27001"
    claim: str = ""
    certification_id: str = ""
 # ── consumed contract type (refinement 2) ─────────────────────────────────
 class OperationalCapabilityCandidate(BaseModel):
    """The ONLY thing Reasoning consumes from Execution's capability mapping.
    Named "operational" (organisational ability) to stay distinct from later
    Product/AI/Safety capabilities. A candidate is always Welt 1 — DECLARED or
    INFERRED — and never CONFIRMED on its own.
    """
    capability_id: str
    source: str
    confidence: Confidence = Confidence.MEDIUM
    verification_status: VerificationStatus = VerificationStatus.INFERRED
 class OperationalCapability(BaseModel):
    """A capability the company actually has, CONFIRMED against real evidence."""
    capability_id: str
    verification_status: VerificationStatus
    confidence: Confidence = Confidence.MEDIUM
    sources: List[str] = Field(default_factory=list)
 # ── the container Reasoning OWNS (raw inputs) ─────────────────────────────
 class CompanyContext(BaseModel):
    company_id: str
    certifications: List[Certification] = Field(default_factory=list)
    declarations: List[Declaration] = Field(default_factory=list)
    processes: List[ExistingProcess] = Field(default_factory=list)
    systems: List[ExistingSystem] = Field(default_factory=list)
    evidence: List[ExistingEvidence] = Field(default_factory=list)
 # ── derived view (the Company Capability Profile) ─────────────────────────
 class CompanyCapabilityProfile(BaseModel):
    """Derived: capability evidence + candidates (declared/inferred) + confirmed.
    `candidate_capabilities` NEVER auto-promote to `confirmed_capabilities`; only
    explicit ExistingEvidence does that. The hard rule is enforced in engine.py.
    """
    company_id: str
    capability_evidence: List[CapabilityEvidence] = Field(default_factory=list)
    candidate_capabilities: List[OperationalCapabilityCandidate] = Field(default_factory=list)
    confirmed_capabilities: List[OperationalCapability] = Field(default_factory=list)
@@ -0,0 +1,24 @@
 """Regulatory Completeness — auditable knowledge coverage, not confidence.
 An internal quality machine: for an assessment it reports identified vs assessed regulations and
 justifies every open or excluded domain (corpus gap -> future_corpus; applicability uncertain ->
 query_required). The metric is counts, never a single percentage. The product never claims full
 coverage — it makes its own knowledge state transparent and auditable. Deterministic, no LLM, no
 new corpus/meta-model class (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import assess_completeness
 from .schemas import (
    Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion,
 )
 __all__ = [
    "assess_completeness",
    "CompletenessReport",
    "CorpusStatus",
    "DomainCoverage",
    "Exclusion",
    "Assumption",
 ]
@@ -0,0 +1,89 @@
 """Regulatory Completeness Engine — measure auditable knowledge coverage for an assessment.
 Separates what we IDENTIFIED (triggered regulations) from what we ASSESSED (validated corpus AND
 determined applicability), and justifies every gap. Two kinds of „open":
  - corpus gap        — no validated corpus yet (e.g. Environmental)            -> future_corpus
  - applicability open — corpus exists but applicability is uncertain (Data Act) -> query_required
 The metric is COUNTS, never a single percentage. The audit statement says plainly „wir bewerteten M
 von N Domänen; K sind nicht im validierten Korpus und wurden bewusst nicht bewertet".
 Deterministic, computed-not-stored, no LLM, no new corpus/meta-model class (freeze v1.0). Python 3.9.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
 from .schemas import (
    Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion,
 )
 _VALID = {s.value for s in CorpusStatus}
 def _status(corpus_status: Dict[str, str], reg: str) -> CorpusStatus:
    raw = corpus_status.get(reg, "unknown")
    return CorpusStatus(raw) if raw in _VALID else CorpusStatus.UNKNOWN
 def assess_completeness(
    identified_regulations: List[str],
    corpus_status: Dict[str, str],
    uncertain: Optional[List[Dict[str, Any]]] = None,
    assumptions: Optional[List[Dict[str, Any]]] = None,
    assessed_obligations: int = 0,
 ) -> CompletenessReport:
    """Build the auditable coverage report.
    `identified_regulations`: triggered/identified for this product. `corpus_status`: regulation ->
    one of validated/draft/unsupported/unknown (curated/injected corpus registry). `uncertain`:
    applicability-uncertain regulations [{regulation, deciding_question, reason}]. `assumptions`:
    [{key, value, note}]. `assessed_obligations`: count from Execution (injected, default 0).
    """
    ids = sorted(set(identified_regulations))
    unc = uncertain or []
    unc_subjects = {str(u.get("regulation") or u.get("subject")) for u in unc if (u.get("regulation") or u.get("subject"))}
    coverage = [DomainCoverage(regulation=r, status=_status(corpus_status, r)) for r in ids]
    assessed = [r for r in ids if _status(corpus_status, r) == CorpusStatus.VALIDATED and r not in unc_subjects]
    open_regs = [r for r in ids if r not in assessed]
    open_corpora = [r for r in ids if _status(corpus_status, r) in (CorpusStatus.UNSUPPORTED, CorpusStatus.UNKNOWN)]
    exclusions: List[Exclusion] = []
    for u in unc:
        subj = str(u.get("regulation") or u.get("subject") or "")
        if not subj:
            continue
        exclusions.append(Exclusion(
            subject=subj, reason=str(u.get("reason", "Anwendbarkeit unsicher")),
            deciding_question=str(u.get("deciding_question", "")), resolution="query_required"))
    for r in open_regs:
        if r in unc_subjects:
            continue
        st = _status(corpus_status, r)
        if st == CorpusStatus.DRAFT:
            exclusions.append(Exclusion(subject=r, reason="Korpus in Bearbeitung (draft)", resolution="in_review"))
        else:
            exclusions.append(Exclusion(subject=r, reason="nicht im validierten Korpus", resolution="future_corpus"))
    covered_subjects = {e.subject for e in exclusions}
    justification = (not open_regs) or set(open_regs) <= covered_subjects
    assumptions_m = [Assumption(key=str(a.get("key", "")), value=str(a.get("value", "")), note=str(a.get("note", ""))) for a in (assumptions or [])]
    summary = "Identifiziert %d · bewertet %d · offen %d · Unsicherheiten %d · Begründung %s" % (
        len(ids), len(assessed), len(open_regs), len(unc), "ja" if justification else "nein")
    if open_regs:
        audit = (
            "Für dieses Produkt konnten wir %d von %d identifizierten regulatorischen Domänen vollständig "
            "bewerten. %d weitere %s noch nicht Bestandteil des validierten Korpus bzw. anwendungsunsicher "
            "und wurden deshalb bewusst nicht bewertet." % (
                len(assessed), len(ids), len(open_regs), "ist" if len(open_regs) == 1 else "sind"))
    else:
        audit = "Für dieses Produkt konnten wir alle %d identifizierten regulatorischen Domänen vollständig bewerten." % len(ids)
    return CompletenessReport(
        identified_regulations=ids, assessed_regulations=assessed, open_regulations=open_regs,
        open_corpora=open_corpora, coverage=coverage, assumptions=assumptions_m, exclusions=exclusions,
        uncertainties_count=len(unc), assessed_obligations=assessed_obligations,
        justification_present=justification, completeness_summary=summary, audit_statement=audit,
    )
@@ -0,0 +1,62 @@
 """Schemas for the Regulatory Completeness Engine — auditable knowledge-coverage, not confidence.
 For an assessment it answers „wie sicher sind wir, dass diese Antwort VOLLSTÄNDIG ist?" by separating
 IDENTIFIED regulations from ASSESSED ones (those in the validated corpus) and listing every open or
 excluded domain WITH a reason. The metric is counts, never a single „87%". This is an internal quality
 machine: the product never claims full coverage — it makes its own knowledge state transparent.
 Deterministic, computed-not-stored, no new meta-model class (freeze v1.0). Python 3.9 compatible.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List
 from pydantic import BaseModel, Field
 class CorpusStatus(str, Enum):
    """The maturity of our knowledge corpus for a regulation/domain."""
    VALIDATED = "validated"      # we can fully assess this
    DRAFT = "draft"             # partial / under review
    UNSUPPORTED = "unsupported"  # triggered but no corpus yet
    UNKNOWN = "unknown"          # not in our registry at all
 class DomainCoverage(BaseModel):
    regulation: str
    status: CorpusStatus = CorpusStatus.UNKNOWN
    note: str = ""
 class Exclusion(BaseModel):
    """A domain/regulation DELIBERATELY not assessed — always with a reason (the heart of the engine)."""
    subject: str
    reason: str
    deciding_question: str = ""                 # what would resolve it (if a query)
    resolution: str = "future_corpus"           # query_required | future_corpus | not_applicable
 class Assumption(BaseModel):
    key: str
    value: str = ""
    note: str = ""
 class CompletenessReport(BaseModel):
    """The auditable coverage report for one assessment — counts + justification, NO single percentage."""
    identified_regulations: List[str] = Field(default_factory=list)
    assessed_regulations: List[str] = Field(default_factory=list)      # in the validated corpus
    open_regulations: List[str] = Field(default_factory=list)          # identified but not validated
    open_corpora: List[str] = Field(default_factory=list)             # missing domains worth building
    coverage: List[DomainCoverage] = Field(default_factory=list)
    assumptions: List[Assumption] = Field(default_factory=list)
    exclusions: List[Exclusion] = Field(default_factory=list)
    uncertainties_count: int = 0
    assessed_obligations: int = 0                                      # injected (Execution-owned)
    justification_present: bool = False
    completeness_summary: str = ""                                    # "Identifiziert N · bewertet M · offen K · ..."
    audit_statement: str = ""                                         # the honest narrative sentence
@@ -0,0 +1,18 @@
 """Interpretation-in-Map — evaluate a customer interpretation within the map.
 Thin adapter over the existing `assess_interpretation`: it judges the customer's
 reading against the regulations/obligations actually present in the product's
 RegulatoryMap, and flags touched unsupported domains as future_corpus_needed
 instead of pseudo-evaluating them. No new legal reasoning, no RCI, no UI.
 """
 from __future__ import annotations
 from .adapter import interpret_in_map
 from .schemas import InterpretationInMapRequest, InterpretationInMapResult
 __all__ = [
    "interpret_in_map",
    "InterpretationInMapRequest",
    "InterpretationInMapResult",
 ]
@@ -0,0 +1,90 @@
 """Interpretation-in-Map adapter (step 5).
 Evaluates a customer interpretation WITHIN the already-built RegulatoryMap. It
 reuses the existing `assess_interpretation` (no new legal engine), restricts the
 affected regulations/obligations to those present in the map, and reports any
 touched unsupported domain (wastewater/chemicals/...) as future_corpus_needed
 rather than pseudo-evaluating it.
 """
 from __future__ import annotations
 from typing import Dict, List
 from compliance.reasoning.enums import InterpretationVerdict
 from compliance.reasoning.interpretation_engine import assess_interpretation
 from compliance.regulatory_map.schemas import RegulatoryMap
 from .schemas import InterpretationInMapResult
 _LABEL: Dict[InterpretationVerdict, str] = {
    InterpretationVerdict.PLAUSIBLE: "plausibel",
    InterpretationVerdict.TOO_NARROW: "zu eng",
    InterpretationVerdict.TOO_BROAD: "zu weit",
    InterpretationVerdict.PARTIALLY_CORRECT: "teilweise korrekt",
    InterpretationVerdict.UNSUPPORTED: "nicht belegt",
    InterpretationVerdict.UNCERTAIN: "unsicher",
 }
 # domain -> keywords that signal the interpretation is ABOUT that (uncovered) domain.
 _ENV_KEYWORDS: Dict[str, List[str]] = {
    "environment_water": ["abwasser", "wastewater", "gewässer", "gewaesser", "einleitung", "abfluss"],
    "chemicals": ["chemikalie", "reach", "clp", "reinigungsmittel", "biozid", "gefahrstoff", "detergenz", "lösemittel", "loesemittel"],
    "environment_air": ["luft", "emission", "voc", "immission", "abluft", "verbrennung"],
    "waste": ["abfall", "entsorgung", "weee", "recycling"],
    "energy_resources": ["energie", "ökodesign", "oekodesign", "verbrauch"],
 }
 def _touches(text: str, domain: str) -> bool:
    low = text.lower()
    return any(kw in low for kw in _ENV_KEYWORDS.get(domain, []))
 def _explain(label: str, detail: str, affected_regs: List[str], future_domains: List[str], in_scope: bool) -> str:
    base = "Ihre Interpretation ist wahrscheinlich %s." % label
    if detail:
        base += " " + detail
    if affected_regs:
        base += " Betroffen in Ihrer Map: %s." % ", ".join(affected_regs)
    if future_domains:
        base += (
            " Für %s liegt noch kein Regelkorpus vor — diese Aspekte werden nicht bewertet (future_corpus_needed)."
            % ", ".join(future_domains)
        )
    if not in_scope and not future_domains:
        base += " Diese Auslegung betrifft kein Regelwerk Ihrer aktuellen Produkt-Map."
    return base
 def interpret_in_map(reg_map: RegulatoryMap, interpretation: str) -> InterpretationInMapResult:
    a = assess_interpretation(interpretation)  # existing engine — no new reasoning
    map_reg_ids = (
        {v.regulation_id for v in reg_map.applicable_regulations}
        | {v.regulation_id for v in reg_map.uncertain_regulations}
        | {v.regulation_id for v in reg_map.excluded_regulations}
    )
    map_ob_ids = {o.obligation_id for v in reg_map.applicable_regulations for o in v.obligations}
    uncertain_ids = {v.regulation_id for v in reg_map.uncertain_regulations}
    affected_regs = [r for r in a.affected_regulations if r in map_reg_ids]
    affected_obs = [o for o in a.affected_obligations if o in map_ob_ids]
    related_unc = [r for r in a.affected_regulations if r in uncertain_ids]
    future = [d for d in reg_map.unsupported_domains if _touches(interpretation, d.domain)]
    in_scope = bool(affected_regs or affected_obs)
    return InterpretationInMapResult(
        raw_interpretation=interpretation,
        assessment=a.assessment,
        in_scope_of_map=in_scope,
        affected_regulations=affected_regs,
        affected_obligations=affected_obs,
        related_uncertainties=related_unc,
        future_corpus_domains=future,
        corrected_interpretation=a.corrected_interpretation,
        risks=a.risks,
        legal_basis_refs=a.legal_basis_refs,
        explanation=_explain(_LABEL[a.assessment], a.explanation, affected_regs, [d.domain for d in future], in_scope),
        confidence=a.confidence,
    )
@@ -0,0 +1,36 @@
 """Schemas for Interpretation-in-Map (step 5).
 A thin adapter that evaluates a customer interpretation WITHIN the already-built
 RegulatoryMap — it does not assess abstract legal questions. Application types
 only; no compliance-meta-model classes (freeze v1.0 untouched).
 """
 from __future__ import annotations
 from typing import List
 from pydantic import BaseModel, Field
 from compliance.product_scope.schemas import UnsupportedDomain
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.reasoning.enums import Confidence, InterpretationVerdict
 class InterpretationInMapRequest(BaseModel):
    product_profile: CanonicalProductRegulatoryProfile
    customer_interpretation: str
 class InterpretationInMapResult(BaseModel):
    raw_interpretation: str
    assessment: InterpretationVerdict
    in_scope_of_map: bool  # True if it touches a regulation/obligation present in the map
    affected_regulations: List[str] = Field(default_factory=list)  # intersected with the map
    affected_obligations: List[str] = Field(default_factory=list)  # intersected (registry-linked)
    related_uncertainties: List[str] = Field(default_factory=list)  # map-uncertain regs it touches
    future_corpus_domains: List[UnsupportedDomain] = Field(default_factory=list)  # NOT evaluated
    corrected_interpretation: str = ""
    risks: List[str] = Field(default_factory=list)
    legal_basis_refs: List[str] = Field(default_factory=list)
    explanation: str = ""
    confidence: Confidence = Confidence.MEDIUM
@@ -0,0 +1,30 @@
 """Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
 The third independent function of the pipeline (after Company 2A `Evidence -> Capability` and RS-005
 `Capability -> Delta`): given ONLY the Capability Delta, rank the known journeys that best EXPLAIN it.
 A Journey is an EXPLANATION of the delta, not its cause — order is `Goal -> Required -> Delta -> Journey`.
 Deliberately dumb + deterministic (pure set overlap; no ML/embeddings/LLM), fully auditable, signatures
 INJECTED (certificate-agnostic capability clusters). No new corpus, no graph (freeze v1.0). The Matcher
 is sanctioned as the last architectural building block; everything after is knowledge work.
 """
 from __future__ import annotations
 from .engine import match_journeys
 from .schemas import (
    JourneyMatch,
    JourneyMatchReason,
    JourneyMatchResult,
    JourneySignature,
    MatchContext,
 )
 __all__ = [
    "match_journeys",
    "JourneySignature",
    "MatchContext",
    "JourneyMatch",
    "JourneyMatchReason",
    "JourneyMatchResult",
 ]
@@ -0,0 +1,94 @@
 """Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
 Three INDEPENDENT functions now compose the pipeline, each a different problem, all interchangeable:
  1. Evidence   -> Capability   (Company 2A)
  2. Capability -> Delta        (RS-005, transition_reasoning)
  3. Delta      -> Journey      (THIS module)
 The paradigm shift: a Journey is no longer the CAUSE (Goal -> Journey -> Delta) but the EXPLANATION
 (Goal -> Required -> Delta -> Journey). The matcher does NOT look at certifications, regulations,
 tenders, OEM specs or the goal — it looks ONLY at the Capability Delta and asks: which known journeys
 describe exactly this delta? Output is a ranked, auditable explanation ("Journey A explains 82% of the
 delta, because 8 of 10 missing capabilities are identical, same target type, ...").
 Deliberately DUMB and deterministic: pure set overlap, NO ML, NO embeddings, NO LLM. A learning ranker
 can be layered ON TOP later; this core stays auditable. Journey signatures are INJECTED (certificate-
 agnostic capability clusters), never loaded here — the engine stays hermetic. No new corpus, no
 graph/meta-model class (freeze v1.0). Python 3.9 compatible.
 Honesty: `score` is the share of the DELTA a journey explains (recall over the customer's missing
 capabilities), never a "fit" or a compliance verdict. `journey_only` documents where a journey reaches
 BEYOND this delta, so a broad journey that explains everything is not silently preferred.
 """
 from __future__ import annotations
 from typing import List, Optional, Sequence
 from .schemas import (
    JourneyMatch,
    JourneyMatchReason,
    JourneyMatchResult,
    JourneySignature,
    MatchContext,
 )
 def _context_signals(journey: JourneySignature, context: Optional[MatchContext]) -> List[str]:
    """Corroborating reasons only — these are documented, they never change the score."""
    if context is None:
        return []
    signals: List[str] = []
    if context.target_type and journey.target_type and context.target_type == journey.target_type:
        signals.append("gleiche Zielart")
    if context.industry and journey.industry and context.industry == journey.industry:
        signals.append("gleiche Branche")
    if context.product_type and journey.product_type and context.product_type == journey.product_type:
        signals.append("gleicher Produkttyp")
    return signals
 def match_journeys(
    delta: Sequence[str],
    journeys: Sequence[JourneySignature],
    context: Optional[MatchContext] = None,
 ) -> JourneyMatchResult:
    """Rank known journeys by the share of the Capability Delta they EXPLAIN.
    `delta` = the customer's MISSING capabilities (from RS-005). `journeys` = injected, certificate-
    agnostic signatures. score = |delta INTERSECT pattern| / |delta|. Ranking is deterministic:
    score desc, then context-signal count desc (corroboration only), then journey_id asc. Context
    never changes the score — only the documented reasons. Pure; no I/O; computed-not-stored.
    """
    delta_set = set(delta)
    n = len(delta_set)
    matches: List[JourneyMatch] = []
    for j in journeys:
        pattern = set(j.capability_pattern)
        matched = sorted(delta_set & pattern)
        score = (len(matched) / n) if n else 0.0
        signals = _context_signals(j, context)
        reason = JourneyMatchReason(
            matched_capabilities=matched,
            unexplained_delta=sorted(delta_set - pattern),
            journey_only=sorted(pattern - delta_set),
            context_signals=signals,
        )
        matches.append(
            JourneyMatch(
                journey_id=j.journey_id,
                label=j.label,
                score=round(score, 2),
                explains="%d von %d fehlenden Capabilities" % (len(matched), n),
                reason=reason,
            )
        )
    matches.sort(key=lambda m: (-m.score, -len(m.reason.context_signals), m.journey_id))
    best = matches[0] if matches and matches[0].score > 0.0 else None
    headline = (
        "%d Journeys erklaeren das Delta; beste: %s (%d%% des Deltas)"
        % (sum(1 for m in matches if m.score > 0.0), best.label, round(best.score * 100))
        if best
        else "Keine bekannte Journey erklaert dieses Delta (neue Journey-Kandidatin)"
    )
    return JourneyMatchResult(delta_size=n, matches=matches, best=best, headline=headline)
@@ -0,0 +1,66 @@
 """Schemas for the Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
 Derived views (computed-not-stored): nothing here is persisted; every match is recomputed from the
 input delta + injected journey signatures each call. No new corpus, no graph (freeze v1.0).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import List, Optional
 from pydantic import BaseModel, Field
 class JourneySignature(BaseModel):
    """A known journey described ONLY by its capability pattern (Input cluster -> Output cluster).
    Deliberately certificate-/regulation-agnostic: the match uses `capability_pattern` alone. `label`
    and the context fields exist for the human-auditable explanation, NEVER for the score. (Today the
    signatures are derived from the transition patterns; the IDs like "ISO27001->CRA" are just one way
    to describe the clusters — the matcher never reads them.)
    """
    journey_id: str
    label: str
    capability_pattern: List[str] = Field(default_factory=list)     # OUTPUT cluster: the delta this journey is about
    assumed_capabilities: List[str] = Field(default_factory=list)   # INPUT cluster: typically already present
    industry: Optional[str] = None
    product_type: Optional[str] = None
    target_type: Optional[str] = None        # context only: regulation / certification / contract / environmental
 class MatchContext(BaseModel):
    """Optional corroborating context — surfaced as documented reasons, never part of the score."""
    industry: Optional[str] = None
    product_type: Optional[str] = None
    target_type: Optional[str] = None
 class JourneyMatchReason(BaseModel):
    """The auditable WHY behind one match — everything a reviewer needs, no opaque score."""
    matched_capabilities: List[str] = Field(default_factory=list)   # delta INTERSECT pattern (what it explains)
    unexplained_delta: List[str] = Field(default_factory=list)      # delta - pattern (what it does NOT explain)
    journey_only: List[str] = Field(default_factory=list)           # pattern - delta (journey covers, not needed here)
    context_signals: List[str] = Field(default_factory=list)        # "gleiche Zielart", "gleiche Branche", ...
 class JourneyMatch(BaseModel):
    """One known journey, ranked by how much of the delta it EXPLAINS (not how well it 'fits')."""
    journey_id: str
    label: str
    score: float = 0.0                       # |delta INTERSECT pattern| / |delta|, 0..1: share of the delta explained
    explains: str = ""                       # "8 von 10 fehlenden Capabilities"
    reason: JourneyMatchReason
 class JourneyMatchResult(BaseModel):
    """Ranked known journeys that EXPLAIN a Capability Delta. Journey = explanation, not cause."""
    delta_size: int = 0
    matches: List[JourneyMatch] = Field(default_factory=list)       # ranked desc by score
    best: Optional[JourneyMatch] = None
    headline: str = ""
@@ -0,0 +1,23 @@
 """Knowledge Intake — classify an incoming document and assess its impact on existing knowledge.
 The stage BEFORE the parser: no content extraction, only Einordnung. Intersects a document's signals
 (regulations + keywords) with an index of the existing knowledge to emit a `KnowledgePackage` — which
 capabilities / playbooks / patterns / reference scenarios / obligations it probably touches, whether
 it is a new domain, and how much review it warrants. Deterministic, no LLM, no new corpus (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import assess_document_impact, build_knowledge_index
 from .schemas import (
    DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage,
 )
 __all__ = [
    "build_knowledge_index",
    "assess_document_impact",
    "DocumentDescriptor",
    "KnowledgeIndex",
    "KnowledgePackage",
    "ImpactLevel",
 ]
@@ -0,0 +1,111 @@
 """Knowledge Intake — classify a document and assess its impact on existing knowledge.
 The real Knowledge Production is not writing — it is TARGETED UPDATING: when 20 documents arrive,
 which 5 actually change our knowledge and which 15 are ignorable? Intake answers this deterministically
 by intersecting a document's signals (declared regulations + keywords) with an index of the existing
 knowledge (capabilities, playbooks, transition patterns, reference scenarios, injected obligations).
 It performs NO content extraction (that is the later parser stage) and uses NO LLM.
 Pipeline: Knowledge Intake -> Knowledge Package -> Parser -> Draft Generator -> Review -> Published.
 Pure, deterministic, computed-not-stored. No new corpus/meta-model class (freeze v1.0). Python 3.9.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional, Set
 from .schemas import DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage
 def _targets(goal_to: Any) -> List[str]:
    """Extract target regulations from a transition_goal.to (single dict OR list of targets)."""
    out: List[str] = []
    items = goal_to if isinstance(goal_to, list) else [goal_to]
    for it in items:
        if isinstance(it, dict):
            reg = it.get("regulation") or it.get("target") or it.get("framework")
            if reg:
                out.append(str(reg))
    return out
 def build_knowledge_index(
    patterns: List[Dict[str, Any]],
    playbooks: List[Dict[str, Any]],
    reference_scenarios: List[Dict[str, Any]],
    obligation_index: Optional[Dict[str, List[str]]] = None,
 ) -> KnowledgeIndex:
    """Assemble the matching index from already-loaded knowledge dicts (file I/O stays in the caller)."""
    tp: Dict[str, List[str]] = {}
    cap_regs: Dict[str, List[str]] = {}
    for p in patterns:
        pid = str(p.get("id", ""))
        targets = _targets(p.get("transition_goal", {}).get("to"))
        if pid:
            tp[pid] = targets
        for item in list(p.get("likely_covered", [])) + list(p.get("delta_requirements", [])):
            cap = item.get("capability")
            if not cap:
                continue
            regs = [str(t) for t in item.get("covers_targets", [])] or targets
            cap_regs.setdefault(str(cap), [])
            cap_regs[str(cap)] = sorted(set(cap_regs[str(cap)]) | set(regs))
    rts = {str(r.get("id", "")): _targets(r.get("transition_goal", {}).get("to")) for r in reference_scenarios}
    rts.pop("", None)
    obl = obligation_index or {}
    regulations = sorted(
        {t for ts in tp.values() for t in ts}
        | {t for ts in rts.values() for t in ts}
        | {t for ts in cap_regs.values() for t in ts}
        | set(obl.keys())
    )
    return KnowledgeIndex(
        regulations=regulations, capability_regulations=cap_regs,
        playbook_capabilities=sorted({str(pb.get("capability_id", "")) for pb in playbooks} - {""}),
        transition_patterns=tp, reference_scenarios=rts, obligation_index=dict(obl),
    )
 def _kw_match(keywords: Set[str], capability: str) -> bool:
    tokens = set(capability.lower().split("_"))
    return bool(keywords & tokens) or capability.lower() in keywords
 def assess_document_impact(descriptor: DocumentDescriptor, index: KnowledgeIndex) -> KnowledgePackage:
    """Classify the document and compute which existing knowledge it probably touches, and how much."""
    doc_regs = set(descriptor.regulations)
    known = set(index.regulations)
    unknown = sorted(doc_regs - known)
    new_domain = bool(doc_regs) and not (doc_regs & known)
    kw = {k.lower() for k in descriptor.keywords}
    caps = sorted(c for c, regs in index.capability_regulations.items() if (set(regs) & doc_regs) or _kw_match(kw, c))
    playbooks = sorted(set(caps) & set(index.playbook_capabilities))
    patterns = sorted(pid for pid, regs in index.transition_patterns.items() if set(regs) & doc_regs)
    scenarios = sorted(rid for rid, regs in index.reference_scenarios.items() if set(regs) & doc_regs)
    obligations = sorted({o for r in doc_regs for o in index.obligation_index.get(r, [])})
    total = len(caps) + len(playbooks) + len(patterns) + len(scenarios) + len(obligations)
    if new_domain:
        level, rec = ImpactLevel.NEW_DOMAIN, "Neue Domäne — Corpus-Intake nötig (kein bestehendes Wissen betroffen)."
    elif total == 0:
        level, rec = ImpactLevel.NONE, "Wahrscheinlich ignorierbar — betrifft keinen bekannten Wissensbaustein."
    elif len(caps) >= 3 or playbooks or len(obligations) >= 5:
        level, rec = ImpactLevel.HIGH, "Gezielter Review priorisieren — hoher Impact auf bestehendes Wissen."
    else:
        level, rec = ImpactLevel.LOW, "Gezielter Review — geringer, eingegrenzter Impact."
    summary = "Betrifft %d Capabilities, %d Playbooks, %d Patterns, %d Reference Scenarios, %d Obligations; %s." % (
        len(caps), len(playbooks), len(patterns), len(scenarios), len(obligations),
        "NEUE Domäne" if new_domain else "keine neue Domäne",
    )
    return KnowledgePackage(
        document_id=descriptor.document_id,
        classification={"regulations": sorted(doc_regs), "keywords": sorted(kw),
                        "document_type": [descriptor.document_type] if descriptor.document_type else []},
        new_domain=new_domain, unknown_regulations=unknown,
        affected_capabilities=caps, affected_playbooks=playbooks,
        affected_transition_patterns=patterns, affected_reference_scenarios=scenarios,
        affected_obligations=obligations, impact_level=level,
        impact_summary=summary, recommendation=rec,
    )
@@ -0,0 +1,62 @@
 """Schemas for Knowledge Intake — classify a new document and assess its IMPACT (no extraction yet).
 Before the parser/draft stages, Intake answers „welche Teile unseres Wissensbestands sind überhaupt
 betroffen?". It does NOT extract content — it only classifies the document and intersects its signals
 with an index of the existing knowledge (capabilities, playbooks, transition patterns, reference
 scenarios, injected obligations) to emit a `KnowledgePackage` (an impact analysis). Deterministic,
 computed-not-stored, no new corpus, no new meta-model class (freeze v1.0). Python 3.9 compatible.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List
 from pydantic import BaseModel, Field
 class ImpactLevel(str, Enum):
    NONE = "none"                # touches nothing known -> likely ignorable
    LOW = "low"                  # touches a little -> targeted review
    HIGH = "high"                # touches a lot -> prioritise review
    NEW_DOMAIN = "new_domain"    # references only unknown regulations -> domain intake
 class DocumentDescriptor(BaseModel):
    """Lightweight signals of an incoming document — NO content body, only classification inputs."""
    document_id: str
    title: str = ""
    source: str = ""                                      # e.g. BSI, ENISA, EU
    document_type: str = ""                               # e.g. guidance, faq, regulation, recommendation
    regulations: List[str] = Field(default_factory=list)  # declared regulations it references
    keywords: List[str] = Field(default_factory=list)     # lightweight topic signals (e.g. sbom)
    product_types: List[str] = Field(default_factory=list)
 class KnowledgeIndex(BaseModel):
    """A deterministic index of the EXISTING knowledge to match an incoming document against."""
    regulations: List[str] = Field(default_factory=list)               # all regulations the corpus knows
    capability_regulations: Dict[str, List[str]] = Field(default_factory=dict)   # capability -> covers_targets
    playbook_capabilities: List[str] = Field(default_factory=list)     # capabilities that HAVE a playbook
    transition_patterns: Dict[str, List[str]] = Field(default_factory=dict)      # pattern_id -> target regulations
    reference_scenarios: Dict[str, List[str]] = Field(default_factory=dict)      # rts_id -> regulations
    obligation_index: Dict[str, List[str]] = Field(default_factory=dict)         # regulation -> obligation ids (INJECTED)
 class KnowledgePackage(BaseModel):
    """The impact analysis for one document — what of our knowledge it probably touches, and how much."""
    document_id: str
    classification: Dict[str, List[str]] = Field(default_factory=dict)   # echoed regulations/keywords/types
    new_domain: bool = False
    unknown_regulations: List[str] = Field(default_factory=list)
    affected_capabilities: List[str] = Field(default_factory=list)
    affected_playbooks: List[str] = Field(default_factory=list)
    affected_transition_patterns: List[str] = Field(default_factory=list)
    affected_reference_scenarios: List[str] = Field(default_factory=list)
    affected_obligations: List[str] = Field(default_factory=list)
    impact_level: ImpactLevel = ImpactLevel.NONE
    impact_summary: str = ""
    recommendation: str = ""
@@ -0,0 +1,19 @@
 """Knowledge Production — deterministically prepare the corpus, then curate it.
 The corpus is not written by hand: the Playbook Draft Generator structures drafts from data the
 software already owns (Transition Pattern + leverage + injected Execution controls), leaving the
 practitioner know-how as TODO for expert review. Mirrors the legal pipeline (Parser -> Review).
 Deterministic, no LLM in core, no new corpus, no new meta-model class (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import drafts_from_pattern, generate_playbook_draft
 from .schemas import DraftStatus, PlaybookDraft
 __all__ = [
    "generate_playbook_draft",
    "drafts_from_pattern",
    "PlaybookDraft",
    "DraftStatus",
 ]
@@ -0,0 +1,91 @@
 """Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review).
 Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge:
 new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review ->
 versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the
 software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets,
 expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps /
 how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step).
 Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline,
 advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
 from .schemas import DraftStatus, PlaybookDraft
 _SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"]   # practitioner know-how — expert/offline-propose
 _DISCLAIMER = (
    "Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + "
    "injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) "
    "und Statuswechsel draft_generated -> reviewed -> validated."
 )
 def generate_playbook_draft(
    capability_id: str,
    requirement: Optional[Dict[str, Any]] = None,
    control_links: Optional[List[str]] = None,
 ) -> PlaybookDraft:
    """Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic).
    `requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned
    fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected
    Execution controls (default empty — no Execution data in the draft generator).
    """
    req = requirement or {}
    why = str(req.get("why_asked") or req.get("missing_because") or "")
    closes = sorted({str(t) for t in req.get("covers_targets", [])})
    evidence = [str(e) for e in req.get("expected_evidence", [])]
    controls = list(control_links or [])
    provenance: Dict[str, str] = {}
    todo: List[str] = []
    if why:
        provenance["why"] = "transition_pattern:why_asked"
    else:
        todo.append("why")
    if closes:
        provenance["closes_regulations"] = "leverage:covers_targets"
    if evidence:
        provenance["expected_evidence"] = "transition_pattern:expected_evidence"
    else:
        todo.append("expected_evidence")
    if controls:
        provenance["typical_controls"] = "execution:control_links"
    todo.extend(_SOFT_FIELDS)   # always expert-owned
    return PlaybookDraft(
        capability_id=capability_id,
        status=DraftStatus.DRAFT_GENERATED,
        title=capability_id.replace("_", " "),
        why=why,
        closes_regulations=closes,
        expected_evidence=evidence,
        typical_controls=controls,
        provenance=provenance,
        todo=todo,
        disclaimer=_DISCLAIMER,
    )
 def drafts_from_pattern(
    pattern: Dict[str, Any],
    control_links_by_cap: Optional[Dict[str, List[str]]] = None,
 ) -> List[PlaybookDraft]:
    """Assemble one playbook draft per delta capability of a transition/convergence pattern.
    This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing
    capability, ready for expert review. Deterministic + order-preserving (pattern order).
    """
    links = control_links_by_cap or {}
    drafts: List[PlaybookDraft] = []
    for d in pattern.get("delta_requirements", []):
        cap = d.get("capability")
        if not cap:
            continue
        drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap))))
    return drafts
@@ -0,0 +1,46 @@
 """Schemas for Knowledge Production — deterministic draft assembly + lifecycle.
 The corpus is no longer written by hand: it is deterministically PREPARED from data the software
 already owns (Capability, Transition Pattern, Controls, Evidence, leverage), then curated by an
 expert. A `PlaybookDraft` is a machine-assembled skeleton with per-field provenance and an explicit
 TODO list of what still needs human (or offline-propose) input. No LLM in the deterministic core.
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List
 from pydantic import BaseModel, Field
 class DraftStatus(str, Enum):
    """Freigabestatus — the knowledge lifecycle from machine draft to proven (mirrors the
    transition-pattern / playbook maturity, with a machine-assembled pre-stage)."""
    DRAFT_GENERATED = "draft_generated"   # machine-assembled, NOT yet expert-touched
    IN_REVIEW = "in_review"               # an expert is curating it
    REVIEWED = "reviewed"                 # internally reviewed
    VALIDATED = "validated"               # domain expert confirmed
    PROVEN = "proven"                     # confirmed in the field
 class PlaybookDraft(BaseModel):
    """A deterministically assembled playbook draft for one capability.
    Owned fields (why / closes_regulations / expected_evidence / typical_controls) are filled from
    existing data with provenance; the practitioner know-how (tools / process_steps / how_others)
    is left as TODO. The expert reviews a draft instead of writing from a blank page.
    """
    capability_id: str
    status: DraftStatus = DraftStatus.DRAFT_GENERATED
    title: str = ""
    why: str = ""                                          # from the transition pattern (why_asked/missing_because)
    closes_regulations: List[str] = Field(default_factory=list)   # from leverage (covers_targets)
    expected_evidence: List[str] = Field(default_factory=list)    # from the transition pattern
    typical_controls: List[str] = Field(default_factory=list)     # injected from Execution (may be empty)
    provenance: Dict[str, str] = Field(default_factory=dict)      # field -> source it was assembled from
    todo: List[str] = Field(default_factory=list)          # fields the expert/offline-propose must still add
    disclaimer: str = ""                                   # machine draft, requires expert curation
@@ -0,0 +1,29 @@
 """Product Regulatory Navigator — thin missing-facts layer.
 Sits above the CanonicalProductRegulatoryProfile (prefilled from company-profile /
 ProductWizard) and reports only which facts are still missing + prioritized
 questions to collect them. It decides which facts are needed, NOT what regulation
 applies — that stays with the Scope Engine (step 3). No regulation logic, no UI,
 no Go, no RAG.
 """
 from __future__ import annotations
 from .engine import CompletenessSummary, NavigatorResult, apply_answers, navigate
 from .questions import (
    QUESTION_CATALOG,
    AnswerType,
    NavigatorQuestion,
    QuestionPriority,
 )
 __all__ = [
    "navigate",
    "apply_answers",
    "NavigatorResult",
    "CompletenessSummary",
    "NavigatorQuestion",
    "AnswerType",
    "QuestionPriority",
    "QUESTION_CATALOG",
 ]
@@ -0,0 +1,116 @@
 """Product Regulatory Navigator engine — missing-facts only.
 `navigate(profile)` reports which canonical fields are still unknown and the
 prioritized questions to fill them. `apply_answers(profile, answers)` returns the
 updated profile. It NEVER decides what applies — that is the Scope Engine (step 3).
 Pure field-presence checking; no scope-engine import, no regulation evaluation.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Type
 from pydantic import BaseModel, Field
 from compliance.profile.canonical import (
    CanonicalLifecyclePhase,
    CanonicalProductRegulatoryProfile,
    EconomicOperatorRole,
    ProductComponent,
 )
 from .questions import QUESTION_CATALOG, NavigatorQuestion, QuestionPriority
 _ENUM_FIELDS: Dict[str, Type[Any]] = {
    "economic_operator_role": EconomicOperatorRole,
    "lifecycle_phase": CanonicalLifecyclePhase,
 }
 class CompletenessSummary(BaseModel):
    total_relevant: int
    answered: int
    missing: int
    missing_by_priority: Dict[str, int] = Field(default_factory=dict)
    ready_for_scope: bool  # True once no P0 fact is missing
    note: str = ""
 class NavigatorResult(BaseModel):
    missing_facts: List[str] = Field(default_factory=list)  # canonical target fields
    suggested_questions: List[NavigatorQuestion] = Field(default_factory=list)
    completeness_summary: CompletenessSummary
 def _value(profile: CanonicalProductRegulatoryProfile, dotted: str) -> Any:
    if "." in dotted:
        head, tail = dotted.split(".", 1)
        return getattr(getattr(profile, head), tail, None)
    return getattr(profile, dotted, None)
 def _is_unknown(profile: CanonicalProductRegulatoryProfile, q: NavigatorQuestion) -> bool:
    value = _value(profile, q.target_field)
    if value is None:
        return True
    if isinstance(value, list) and not value:
        return True
    return False
 def navigate(profile: CanonicalProductRegulatoryProfile) -> NavigatorResult:
    missing = [q for q in QUESTION_CATALOG if _is_unknown(profile, q)]
    missing.sort(key=lambda q: q.order())
    by_priority: Dict[str, int] = {}
    for q in missing:
        by_priority[q.priority.value] = by_priority.get(q.priority.value, 0) + 1
    ready = QuestionPriority.P0.value not in by_priority
    total = len(QUESTION_CATALOG)
    summary = CompletenessSummary(
        total_relevant=total,
        answered=total - len(missing),
        missing=len(missing),
        missing_by_priority=by_priority,
        ready_for_scope=ready,
        note=(
            "%d von %d Fakten vorhanden; %d offen. Scope-Engine startklar: %s."
            % (total - len(missing), total, len(missing), "ja" if ready else "nein (P0 fehlt)")
        ),
    )
    return NavigatorResult(
        missing_facts=[q.target_field for q in missing],
        suggested_questions=missing,
        completeness_summary=summary,
    )
 def _coerce(q: NavigatorQuestion, value: Any) -> Any:
    if q.target_field in _ENUM_FIELDS:
        return _ENUM_FIELDS[q.target_field](value)
    if q.target_field == "components":
        return [c if isinstance(c, ProductComponent) else ProductComponent(**c) for c in (value or [])]
    if q.answer_type.value in {"country_list", "multiselect"}:
        return list(value or [])
    if q.answer_type.value == "bool":
        return bool(value)
    return value
 def apply_answers(
    profile: CanonicalProductRegulatoryProfile, answers: Dict[str, Any]
 ) -> CanonicalProductRegulatoryProfile:
    updated = profile.model_copy(deep=True)
    by_id = {q.question_id: q for q in QUESTION_CATALOG}
    for question_id, raw in answers.items():
        q = by_id.get(question_id)
        if q is None or raw is None:
            continue
        value = _coerce(q, raw)
        if "." in q.target_field:
            head, tail = q.target_field.split(".", 1)
            setattr(getattr(updated, head), tail, value)
        else:
            setattr(updated, q.target_field, value)
    return updated
@@ -0,0 +1,171 @@
 """Product Regulatory Navigator — question catalog.
 The Navigator is a THIN missing-facts layer over CanonicalProductRegulatoryProfile.
 It does NOT decide what applies — `regulatory_domains_unblocked` is static metadata
 (which domains a fact would help the Scope Engine decide later), never an
 evaluation. No regulation logic, no UI, no Go, no RAG.
 `NavigatorQuestion` is an interaction type, NOT a compliance-meta-model class
 (architecture freeze v1.0 untouched).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List
 from pydantic import BaseModel, Field
 from compliance.profile.canonical import CanonicalLifecyclePhase, EconomicOperatorRole
 class AnswerType(str, Enum):
    BOOL = "bool"
    ENUM = "enum"
    MULTISELECT = "multiselect"
    TEXT = "text"
    COUNTRY_LIST = "country_list"
    COMPONENT_LIST = "component_list"
 class QuestionPriority(str, Enum):
    P0 = "P0"  # blocks scope: EU-vs-not, role, lifecycle, machine/component
    P1 = "P1"  # unblocks a specific domain: RED, Data Act, environment, security
    P2 = "P2"  # refinement: structured BOM
 _PRIORITY_ORDER = {QuestionPriority.P0: 0, QuestionPriority.P1: 1, QuestionPriority.P2: 2}
 class NavigatorQuestion(BaseModel):
    question_id: str
    target_field: str  # dotted path into the canonical profile
    label: str
    why_needed: str
    regulatory_domains_unblocked: List[str] = Field(default_factory=list)
    answer_type: AnswerType
    options: List[str] = Field(default_factory=list)
    priority: QuestionPriority
    def order(self) -> int:
        return _PRIORITY_ORDER[self.priority]
 _ROLE_OPTIONS = [e.value for e in EconomicOperatorRole]
 _PHASE_OPTIONS = [e.value for e in CanonicalLifecyclePhase]
 QUESTION_CATALOG: List[NavigatorQuestion] = [
    # ── P0: block the scope decision itself ───────────────────────────
    NavigatorQuestion(
        question_id="markets",
        target_field="markets",
        label="In welche Märkte / Länder liefern Sie das Produkt?",
        why_needed="Bestimmt EU- vs. Nicht-EU-Anwendbarkeit und nationale Pflichten.",
        regulatory_domains_unblocked=["cyber", "machine_safety", "data", "radio", "emv", "environment"],
        answer_type=AnswerType.COUNTRY_LIST,
        priority=QuestionPriority.P0,
    ),
    NavigatorQuestion(
        question_id="economic_operator_role",
        target_field="economic_operator_role",
        label="Welche Rolle nehmen Sie ein?",
        why_needed="Pflichten hängen von der Rolle ab (Hersteller/Importeur/Händler/Betreiber/Service).",
        regulatory_domains_unblocked=["cyber", "machine_safety", "data"],
        answer_type=AnswerType.ENUM,
        options=_ROLE_OPTIONS,
        priority=QuestionPriority.P0,
    ),
    NavigatorQuestion(
        question_id="lifecycle_phase",
        target_field="lifecycle_phase",
        label="In welcher Lebenszyklusphase betrachten Sie das Produkt?",
        why_needed="Manche Pflichten greifen nur beim Inverkehrbringen oder in der Wartung.",
        regulatory_domains_unblocked=["cyber", "machine_safety"],
        answer_type=AnswerType.ENUM,
        options=_PHASE_OPTIONS,
        priority=QuestionPriority.P0,
    ),
    NavigatorQuestion(
        question_id="is_machine",
        target_field="is_machine",
        label="Ist das Produkt eine (vollständige) Maschine?",
        why_needed="Entscheidet die Anwendbarkeit der Maschinenverordnung.",
        regulatory_domains_unblocked=["machine_safety"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P0,
    ),
    NavigatorQuestion(
        question_id="is_component",
        target_field="is_component",
        label="Ist das Produkt ein Bauteil / eine unvollständige Maschine?",
        why_needed="Sicherheitsbauteil vs. vollständige Maschine ändert die Pflichten.",
        regulatory_domains_unblocked=["machine_safety"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P0,
    ),
    # ── P1: unblock one specific domain ───────────────────────────────
    NavigatorQuestion(
        question_id="has_radio_module",
        target_field="has_radio_module",
        label="Enthält das Produkt ein Funkmodul (WLAN/Bluetooth/Mobilfunk)?",
        why_needed="Ein Funkmodul löst die Funkanlagen-Richtlinie (RED) aus.",
        regulatory_domains_unblocked=["radio"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="generates_usage_data",
        target_field="generates_usage_data",
        label="Erzeugt das vernetzte Produkt nutzbare Produkt-/Nutzungsdaten?",
        why_needed="Erzeugte Nutzungsdaten entscheiden über Data-Act-Pflichten.",
        regulatory_domains_unblocked=["data"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="has_security_function",
        target_field="has_security_function",
        label="Hat das Produkt eine dedizierte Security-Funktion (gegen böswillige Akteure)?",
        why_needed="Trennt Security- von Safety-Funktion (CRA vs. MaschinenVO).",
        regulatory_domains_unblocked=["cyber", "machine_safety"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="env_wastewater",
        target_field="environmental.discharges_to_wastewater",
        label="Gibt das Produkt Stoffe an Wasser / Abwasser ab?",
        why_needed="Abwassereinleitung löst Abwasser-/Gewässerrecht aus.",
        regulatory_domains_unblocked=["environment_water"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="env_air",
        target_field="environmental.emits_to_air",
        label="Entstehen Luftemissionen (VOC, Staub, Verbrennung, Aerosole)?",
        why_needed="Luftemissionen lösen Immissionsschutzrecht aus.",
        regulatory_domains_unblocked=["environment_air"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    NavigatorQuestion(
        question_id="env_chemicals",
        target_field="environmental.uses_cleaning_chemicals",
        label="Werden Reinigungs-, Desinfektions- oder Biozidmittel verwendet/mitgeliefert?",
        why_needed="Chemikalien lösen REACH/CLP/Detergenzien-/Biozidrecht aus.",
        regulatory_domains_unblocked=["chemicals"],
        answer_type=AnswerType.BOOL,
        priority=QuestionPriority.P1,
    ),
    # ── P2: refinement ────────────────────────────────────────────────
    NavigatorQuestion(
        question_id="components",
        target_field="components",
        label="Aus welchen wesentlichen Komponenten besteht das Produkt?",
        why_needed="Eine strukturierte Stückliste verfeinert komponenten-abgeleitete Pflichten.",
        regulatory_domains_unblocked=["radio", "emv", "environment_water", "chemicals"],
        answer_type=AnswerType.COMPONENT_LIST,
        priority=QuestionPriority.P2,
    ),
 ]
@@ -0,0 +1,72 @@
 """Smart Onboarding Advisor — the onboarding runtime step (orchestration over existing engines).
 Turns (company + products + certifications + target) into inferred assumptions, the next best questions
 (<=5, each self-explaining), the capability delta, top measures, evidence requests and completeness —
 with NO sales interpretation and NO regulation picking. Orchestrator only: no new engine/registry/
 meta-model; certificate->capability hypotheses and target requirements are INJECTED.
 """
 from __future__ import annotations
 from .engine import advisor_start, apply_answer
 from .hypotheses import (
    CapabilityHypothesis,
    inferred_hypotheses,
    resolve_for_certifications,
 )
 from .observations import (
    Observation,
    ObservationType,
    empirical_confidence,
    empirical_distribution,
    reviewed,
 )
 from .signals import (
    ProducedSignal,
    SignalVocabularyEntry,
    normalize_signals,
 )
 from .silent_intake import (
    DetectedCapability,
    IntakeSignal,
    ProductFact,
    SignalMapping,
    SilentIntakeResult,
    silent_intake,
 )
 from .schemas import (
    AdvisorMeasure,
    AdvisorQuestion,
    AdvisorResult,
    InferredAssumption,
    OnboardingInput,
    RejectedAssumption,
 )
 __all__ = [
    "advisor_start",
    "apply_answer",
    "OnboardingInput",
    "AdvisorResult",
    "AdvisorQuestion",
    "AdvisorMeasure",
    "InferredAssumption",
    "RejectedAssumption",
    "CapabilityHypothesis",
    "inferred_hypotheses",
    "resolve_for_certifications",
    "Observation",
    "ObservationType",
    "empirical_distribution",
    "empirical_confidence",
    "reviewed",
    "silent_intake",
    "IntakeSignal",
    "SignalMapping",
    "DetectedCapability",
    "ProductFact",
    "SilentIntakeResult",
    "ProducedSignal",
    "SignalVocabularyEntry",
    "normalize_signals",
 ]
@@ -0,0 +1,154 @@
 """Smart Onboarding Advisor — orchestration over the existing engines (the onboarding runtime step).
 The point of the whole platform, made usable: the user types company + products + certifications +
 target, and the system does the rest — no sales interpretation, no regulation picking. This is an
 ORCHESTRATOR, not a new engine: it wires Company 2A (Evidence -> Capability), RS-005 (Capability ->
 Delta), optimization (Delta -> Roadmap) and completeness into one onboarding flow.
 Three principles it must honour (acceptance criteria):
  - Multi-cert works; a profile is built from ALL certificates.
  - relevance(evidence, target): ISO 14001 is NOT falsely relevant to the CRA; ISO 27001/TISAX REDUCE
    questions but satisfy NOTHING automatically (Welt-1 -> verification_required).
  - Only the NEXT BEST questions (<= 5), each explaining WHY; every answer updates the profile.
 Certificate -> probable-capability hypotheses and the target's required capabilities are INJECTED (the
 hypotheses are curated knowledge, not in this code). No corpus loaded here. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Sequence
 from ..company import (
    CapabilityMappingEntry,
    Certification,
    CompanyCapabilityProfile,
    CompanyContext,
    build_company_profile,
 )
 from ..completeness import assess_completeness
 from ..optimization import roadmap_from_delta
 from ..reasoning.enums import Confidence
 from ..transition_reasoning import (
    CoverageStatus,
    TargetRequirement,
    TransitionContext,
    TransitionGoal,
    assess_transition,
 )
 from .schemas import (
    AdvisorMeasure,
    AdvisorQuestion,
    AdvisorResult,
    InferredAssumption,
    OnboardingInput,
    RejectedAssumption,
 )
 _GAIN = {"high": 3, "medium": 2, "low": 1}
 _RISK = {"high": 2, "medium": 1, "low": 0}
 def _profile(
    inp: OnboardingInput, cert_hypotheses: Dict[str, List[str]],
    detected: Optional[Sequence[str]] = None,
 ) -> CompanyCapabilityProfile:
    cmap = {
        cert: CapabilityMappingEntry(capability_ids=list(caps), confidence=Confidence.MEDIUM)
        for cert, caps in cert_hypotheses.items()
        if cert in inp.certifications and caps
    }
    certs = [Certification(certification_id=c) for c in cmap]
    if detected:                                            # Silent Pass: concrete findings -> HIGH confidence
        cmap["__detected__"] = CapabilityMappingEntry(
            capability_ids=list(dict.fromkeys(detected)), confidence=Confidence.HIGH)
        certs.append(Certification(certification_id="__detected__"))
    return build_company_profile(CompanyContext(company_id=inp.company or "company", certifications=certs), cmap)
 def advisor_start(
    inp: OnboardingInput,
    cert_hypotheses: Dict[str, List[str]],
    target_requirements: Sequence[TargetRequirement],
    target_id: str = "target",
    covers_targets: Optional[Dict[str, List[str]]] = None,
    corpus_status: Optional[Dict[str, str]] = None,
    uncertain: Optional[List[Dict[str, str]]] = None,
    detected_capabilities: Optional[Sequence[str]] = None,
 ) -> AdvisorResult:
    """Run the onboarding flow: (silent intake +) certs -> profile -> delta -> ranked questions + measures.
    Pure orchestration; deterministic. `cert_hypotheses` (cert -> probable cap ids), `target_requirements`
    and `detected_capabilities` (from the Silent Knowledge Pass) are INJECTED. Detected capabilities are
    recognised WITHOUT asking -> they shrink the delta and remove questions.
    """
    covers_targets = covers_targets or {}
    required = {r.capability_id for r in target_requirements}
    profile = _profile(inp, cert_hypotheses, detected_capabilities)
    auto_detected = sorted(set(detected_capabilities or []) & required)
    assess = assess_transition(
        TransitionContext(company_id=inp.company or "company", target=TransitionGoal(target_id=target_id)),
        list(target_requirements), profile)
    # inferred (Welt-1): per cert, the caps it probably provides that are RELEVANT to this target
    inferred: List[InferredAssumption] = []
    rejected: List[RejectedAssumption] = []
    for cert in inp.certifications:
        caps = set(cert_hypotheses.get(cert, []))
        relevant = sorted(caps & required)
        if relevant:
            inferred.append(InferredAssumption(
                certification=cert, capabilities=relevant,
                statement="%s legt %d relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt"
                % (cert, len(relevant))))
        elif caps:
            rejected.append(RejectedAssumption(
                certification=cert,
                statement="%s ist für dieses Ziel nicht relevant" % cert,
                reason="relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt"))
    # next best questions (<=5): re-rank the RS-005 requests by info gain + leverage + risk + evidence-gap
    known_ev = set(inp.known_evidence)
    scored = []
    for q in assess.question_requests:
        lev = len(covers_targets.get(q.capability_id, []))
        ev_missing = 1 if (q.expected_evidence and not (set(q.expected_evidence) & known_ev)) else 0
        score = _GAIN.get(q.information_gain.value, 1) + lev + _RISK.get(q.priority.value, 0) + ev_missing
        scored.append((score, q))
    scored.sort(key=lambda x: (-x[0], x[1].capability_id))
    next_q = [
        AdvisorQuestion(capability_id=q.capability_id, question_intent=q.question_intent, why=q.reason,
                        information_value=float(s), priority=q.priority.value)
        for s, q in scored[:5]
    ]
    delta = sorted({c.capability_id for c in assess.coverage if c.status == CoverageStatus.MISSING})
    plan = roadmap_from_delta(assess, {c: covers_targets.get(c, []) for c in delta})
    measures = [AdvisorMeasure(capability_id=m.capability_id, leverage=m.leverage, closes=m.covers)
                for m in plan.ranked_measures[:5]]
    evidence = sorted({e for q in assess.question_requests for e in q.expected_evidence})
    applicable = list(inp.target) or [target_id]
    rep = assess_completeness(applicable, corpus_status or {}, uncertain=uncertain or [])
    unsupported = [e.subject for e in rep.exclusions]
    probably = [c for c in assess.summary.probably_covered if c not in set(auto_detected)]
    return AdvisorResult(
        inferred_assumptions=inferred, rejected_assumptions=rejected, auto_detected=auto_detected,
        next_best_questions=next_q, capability_delta=delta, top_measures=measures,
        evidence_requests=evidence, unsupported_domains=unsupported,
        completeness_summary=rep.completeness_summary,
        headline="%d Anforderungen erkannt · %d automatisch erkannt (Intake) · %d wahrscheinlich (Zertifikate) · %d zu klären"
        % (len(assess.coverage), len(auto_detected), len(probably), len(next_q)))
 def apply_answer(known_capabilities: Sequence[str], capability_id: str, answer: str) -> List[str]:
    """Update the known-capability set from one answer. `answer` in {confirmed, rejected, unknown}.
    A confirmed answer adds the capability to the known set (shrinking the delta on the next run);
    rejected/unknown leave it open. This is how every answer updates the profile (criterion 6).
    """
    known = list(dict.fromkeys(known_capabilities))
    if answer == "confirmed" and capability_id not in known:
        known.append(capability_id)
    return known
@@ -0,0 +1,54 @@
 """Certification Capability Hypotheses — capability-centric, with EMPIRICAL (computed) confidence.
 Each hypothesis is its own knowledge object: "IF a company holds one of `supported_by` certs, we EXPECT
 `capability` (verification required)" — Welt-1, never "erfüllt". Written ONCE per capability with a list
 of supporting certs (reuse, not redundancy), so multi-certification merges AUTOMATICALLY.
 `confidence` is NOT an expert/LLM score: it is COMPUTED from real-onboarding observations
 (confirmed / (confirmed+refuted)), `None` until any are seen. This is the empirical learning loop — the
 long-term moat. The library is DATA, loaded outside this module and injected. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Sequence
 from pydantic import BaseModel, Field
 class CapabilityHypothesis(BaseModel):
    """Curated knowledge only. Confidence is NOT stored here — it is computed from the reviewed
    observation stream (see observations.py); a raw answer never changes a hypothesis (review gate)."""
    id: str
    capability: str
    supported_by: List[str] = Field(default_factory=list)        # certifications that suggest this capability
    relationship: str = "supports"                               # supports / partially_supports
    verification_required: bool = True                           # Welt-1: never auto-satisfied
    question_intent: str = "verify_existence"
    expected_evidence: List[str] = Field(default_factory=list)
    kind: str = "shared"                                         # shared / specific
 def inferred_hypotheses(
    certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
 ) -> List[CapabilityHypothesis]:
    """Every hypothesis whose `supported_by` intersects the company's certs — the auto multi-cert merge."""
    certs = set(certifications)
    return [h for h in library if certs & set(h.supported_by)]
 def resolve_for_certifications(
    certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
 ) -> Dict[str, List[str]]:
    """Adapt the capability-centric library to the Advisor's `cert -> [capability]` input.
    For each held certification, the capabilities its hypotheses suggest (deduped, deterministic order).
    """
    certs = set(certifications)
    out: Dict[str, List[str]] = {}
    for h in library:
        for cert in h.supported_by:
            if cert in certs and h.capability not in out.setdefault(cert, []):
                out[cert].append(h.capability)
    return {c: out[c] for c in sorted(out)}
@@ -0,0 +1,85 @@
 """Observation Model — the empirical learning unit (Task 59a: model BEFORE persistence/API).
 The learning point is NOT the hypothesis, it is the QUESTION. A hypothesis ("ISO 27001 suggests supplier
 management") produces a question ("Is there a documented supplier-security process?"), and the answer is
 rarely binary — "yes" / "no" / "partial, only critical suppliers" / "certified but not lived" are very
 different observations. So the chain is:
    Hypothesis -> Question -> Observation -> (Review) -> Hypothesis
 Two principles (durable):
  - Richer than confirmed/refuted: an Observation carries an `observation_type` (confirmed / partial /
    refuted / not_applicable / unknown), a free-text answer, a scope_note ("only critical suppliers"),
    and whether evidence was uploaded.
  - REVIEW GATE: a raw answer NEVER changes a hypothesis directly. Only REVIEWED observations calibrate;
    otherwise the system learns from outliers. Hypotheses stay curated knowledge; confidence is COMPUTED
    from the reviewed observation stream (keyed by hypothesis id), not stored on the hypothesis.
 This module defines the model + the deterministic statistics it enables (a DISTRIBUTION, not a single
 %). Persistence (store), aggregation across customers and hypothesis calibration are later tasks
 (59b/c/d). Pure, no I/O. Python 3.9 compatible.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List, Optional, Sequence
 from pydantic import BaseModel, Field
 class ObservationType(str, Enum):
    CONFIRMED = "confirmed"
    PARTIAL = "partial"
    REFUTED = "refuted"
    NOT_APPLICABLE = "not_applicable"
    UNKNOWN = "unknown"
 class Observation(BaseModel):
    """One real-onboarding answer to one hypothesis-driven question. The raw empirical unit."""
    hypothesis_id: str
    capability: str = ""                                # denormalised for convenient aggregation
    question: str = ""                                  # the question that was actually asked
    answer: str = ""                                    # the customer's raw answer (free text)
    observation_type: ObservationType = ObservationType.UNKNOWN
    scope_note: Optional[str] = None                    # "only critical suppliers" / "only DE" / "not lived"
    evidence_uploaded: bool = False
    reviewed: bool = False                              # the review gate: only reviewed obs calibrate
    reviewed_by: Optional[str] = None
 # observation types that count as evidence for/against the capability (n/a + unknown do not)
 _FOR_AGAINST = (ObservationType.CONFIRMED, ObservationType.PARTIAL, ObservationType.REFUTED)
 def empirical_distribution(
    observations: Sequence[Observation], reviewed_only: bool = True
 ) -> Dict[str, int]:
    """Count observations per type — the DISTRIBUTION (e.g. confirmed 61 / partial 31 / refuted 8),
    far richer than a single percentage. By default only REVIEWED observations count (the review gate)."""
    dist = {t.value: 0 for t in ObservationType}
    for o in observations:
        if o.reviewed or not reviewed_only:
            dist[o.observation_type.value] += 1
    return dist
 def empirical_confidence(
    observations: Sequence[Observation], reviewed_only: bool = True
 ) -> Optional[float]:
    """Confidence from the reviewed stream: (confirmed + 0.5*partial) / (confirmed+partial+refuted).
    `not_applicable` and `unknown` are excluded from the denominator (they are not evidence either way).
    `None` until any for/against observation is reviewed — never an expert/LLM score."""
    dist = empirical_distribution(observations, reviewed_only)
    base = dist[ObservationType.CONFIRMED.value] + dist[ObservationType.PARTIAL.value] + dist[ObservationType.REFUTED.value]
    if base == 0:
        return None
    return round((dist[ObservationType.CONFIRMED.value] + 0.5 * dist[ObservationType.PARTIAL.value]) / base, 2)
 def reviewed(observations: Sequence[Observation]) -> List[Observation]:
    """The calibration set: only reviewed observations (a raw answer never updates a hypothesis)."""
    return [o for o in observations if o.reviewed]
@@ -0,0 +1,63 @@
 """Schemas for the Smart Onboarding Advisor — the onboarding RUNTIME step.
 DTOs only. The Advisor ORCHESTRATES the existing engines (Company 2A, RS-005, optimization,
 completeness) — no new reasoning engine, no new capability registry, no new meta-model. Welt-1
 discipline: a certificate yields PROBABLE capabilities (verification required), never "erfüllt".
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import List, Optional
 from pydantic import BaseModel, Field
 class OnboardingInput(BaseModel):
    company: str = ""
    industry: Optional[str] = None
    products: List[str] = Field(default_factory=list)
    markets: List[str] = Field(default_factory=list)
    certifications: List[str] = Field(default_factory=list)
    known_evidence: List[str] = Field(default_factory=list)
    target: List[str] = Field(default_factory=list)          # informational; the delta uses injected requirements
 class InferredAssumption(BaseModel):
    certification: str
    capabilities: List[str] = Field(default_factory=list)    # RELEVANT-to-target caps the cert probably provides
    verification_required: bool = True                       # Welt-1: never auto-satisfied
    statement: str = ""
 class RejectedAssumption(BaseModel):
    certification: Optional[str] = None
    statement: str = ""
    reason: str = ""                                         # e.g. "relevance(evidence, target) = 0"
 class AdvisorQuestion(BaseModel):
    capability_id: str
    question_intent: str
    why: str                                                 # every question explains itself
    information_value: float = 0.0                           # deterministic rank score
    priority: str = "medium"
 class AdvisorMeasure(BaseModel):
    capability_id: str
    leverage: int = 0
    closes: List[str] = Field(default_factory=list)
 class AdvisorResult(BaseModel):
    inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
    rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
    auto_detected: List[str] = Field(default_factory=list)                     # Silent Pass: recognised w/o asking
    next_best_questions: List[AdvisorQuestion] = Field(default_factory=list)   # max 5
    capability_delta: List[str] = Field(default_factory=list)
    top_measures: List[AdvisorMeasure] = Field(default_factory=list)
    evidence_requests: List[str] = Field(default_factory=list)
    unsupported_domains: List[str] = Field(default_factory=list)
    completeness_summary: str = ""
    headline: str = ""                                       # "N erkannt, M wahrscheinlich abgedeckt, K zu klären"
@@ -0,0 +1,61 @@
 """Signal Producer interface + Normalizer — one signal language for all sources (NOT new architecture).
 The platform already HAS scanners (website, repo/code, SBOM, security headers, TLS, SPF/DKIM/DMARC,
 document analysis, RAG over uploads, product classification). The Silent Pass does not want a
 WebsiteScanner or a RepoScanner — it wants their UNIFIED output. So every source (a scanner, a PDF
 parser, a tender parser, an API, or the user) emits the SAME `ProducedSignal`
 {signal_id, source_type, confidence, evidence, provenance}, and `normalize_signals` reduces producer-
 specific signal ids to ONE canonical signal id via a vocabulary (id + aliases) — exactly the
 Requirement-Source / MCAP / regulation-alias pattern. The Silent Pass then never gets per-scanner logic.
 A common DATA FORMAT, not a new module/framework. Later a tender (`requires_sbom`) or an OEM spec
 (`supplier_requires_psirt`) produces the same stream as a website — the Silent Pass cannot tell the
 difference. Pure, deterministic, no I/O. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Sequence
 from pydantic import BaseModel, Field
 from .silent_intake import IntakeSignal
 class ProducedSignal(BaseModel):
    """What ANY signal producer emits — the common interface every source agrees on."""
    signal_id: str                              # raw or canonical id the producer used
    source_type: str = ""                       # website / repository / document / product / tender / oem / user / api
    confidence: float = 1.0
    evidence: Optional[str] = None              # the artifact found (already in hand)
    provenance: str = ""                        # url / filename / tender clause / "customer statement"
 class SignalVocabularyEntry(BaseModel):
    """One canonical signal + the producer-specific aliases that mean the same thing."""
    id: str
    aliases: List[str] = Field(default_factory=list)
 def normalize_signals(
    produced: Sequence[ProducedSignal], vocabulary: Sequence[SignalVocabularyEntry]
 ) -> List[IntakeSignal]:
    """Reduce heterogeneous producer signals to the canonical IntakeSignal stream (alias resolution).
    Unknown signal ids pass through unchanged (a new producer's signal stays visible, not silently
    dropped). Deterministic; carries confidence/evidence/provenance for the audit trail.
    """
    alias: Dict[str, str] = {}
    for v in vocabulary:
        alias[v.id] = v.id
        for a in v.aliases:
            alias[a] = v.id
    out: List[IntakeSignal] = []
    for p in produced:
        canonical = alias.get(p.signal_id, p.signal_id)
        out.append(IntakeSignal(
            source=p.source_type, signal=canonical, confidence=p.confidence,
            evidence=p.evidence, provenance=p.provenance))
    return out
@@ -0,0 +1,106 @@
 """Silent Knowledge Pass — recognise everything possible BEFORE asking a single question (Phase 0).
 The Advisor can say "I need 5 answers" but does not yet decide WHAT it can find out by itself. The Silent
 Pass runs first: from signals that existing scanners/parsers already produce (website, repository,
 documents, product data) it deterministically derives capabilities the company demonstrably HAS and
 product facts that drive scope — so every recognised item shrinks the delta and removes a question.
 The customer then experiences "we already recognised 11 of 17 — only these 4 remain" instead of a
 question wall. This is NOT new architecture: it is one orchestration step in front of the Advisor
  Company -> Silent Intake -> Company Profile -> Hypotheses -> Delta -> Top Questions
 All building blocks already exist. SIGNALS are INJECTED (the scanners produce them); the signal->capability
 map is curated DATA, also injected. Pure, deterministic, no I/O. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Sequence, Set
 from pydantic import BaseModel, Field
 class IntakeSignal(BaseModel):
    """A CANONICAL signal the Silent Pass consumes. Producer-agnostic: the same `signal` may have come
    from a website, a repo, a PDF, a tender or the user — normalize_signals() unified them (see signals.py)."""
    source: str                                 # source_type: website / repository / document / product / tender / user
    signal: str                                 # CANONICAL signal id, e.g. "sbom_file_found"
    confidence: float = 1.0                     # carried from the producer
    evidence: Optional[str] = None              # the artifact already in hand
    provenance: str = ""                        # where it came from (url / filename / tender clause) — audit trail
    detail: str = ""                            # free-text (kept for back-compat)
 class SignalMapping(BaseModel):
    """Curated: what a signal lets us conclude. A signal yields a capability OR a product fact."""
    signal: str
    capability: Optional[str] = None            # capability the signal evidences
    relationship: str = "detected"              # detected (concrete artifact) / partial (indicative)
    evidence: Optional[str] = None              # the artifact found (already in hand -> no upload needed)
    product_fact: Optional[str] = None          # e.g. "connected_to_internet"
    fact_value: str = "true"
 class DetectedCapability(BaseModel):
    capability: str
    relationship: str = "detected"
    source: str = ""                            # which signal/source detected it (audit trail)
    evidence: Optional[str] = None
    confidence: float = 1.0                     # carried from the producing signal
    provenance: str = ""                        # where the signal came from
 class ProductFact(BaseModel):
    key: str
    value: str = "true"
    source: str = ""
 class SilentIntakeResult(BaseModel):
    detected_capabilities: List[DetectedCapability] = Field(default_factory=list)
    product_facts: List[ProductFact] = Field(default_factory=list)
    evidence_found: List[str] = Field(default_factory=list)
    summary: str = ""
    def capability_ids(self) -> List[str]:
        """The detected capability ids — fed into the Advisor as already-present (delta-reducing)."""
        return sorted({d.capability for d in self.detected_capabilities})
 def silent_intake(
    signals: Sequence[IntakeSignal], signal_map: Sequence[SignalMapping]
 ) -> SilentIntakeResult:
    """Derive capabilities + product facts from injected scanner signals (deterministic, no questions).
    Each signal is matched to curated mappings by `signal` id; a mapping contributes either a detected
    capability (+ optional evidence already in hand) or a product fact. Deduped, deterministic order.
    """
    by_signal: Dict[str, List[SignalMapping]] = {}
    for m in signal_map:
        by_signal.setdefault(m.signal, []).append(m)
    caps: Dict[str, DetectedCapability] = {}
    facts: Dict[str, ProductFact] = {}
    evidence: Set[str] = set()
    for s in signals:
        for m in by_signal.get(s.signal, []):
            if m.capability and m.capability not in caps:
                caps[m.capability] = DetectedCapability(
                    capability=m.capability, relationship=m.relationship,
                    source="%s:%s" % (s.source, s.signal), evidence=m.evidence,
                    confidence=s.confidence, provenance=s.provenance)
                if m.evidence:
                    evidence.add(m.evidence)
            if m.product_fact:
                facts[m.product_fact] = ProductFact(key=m.product_fact, value=m.fact_value, source=s.source)
    detected = [caps[k] for k in sorted(caps)]
    product_facts = [facts[k] for k in sorted(facts)]
    summary = (
        "Stille Vorbefüllung: %d Fähigkeit(en) automatisch erkannt, %d Produktfakt(en), %d Nachweis(e) bereits vorhanden."
        % (len(detected), len(product_facts), len(evidence))
    )
    return SilentIntakeResult(
        detected_capabilities=detected, product_facts=product_facts,
        evidence_found=sorted(evidence), summary=summary)
@@ -0,0 +1,21 @@
 """Regulatory Optimization — the Roadmap / Management renderer of the Capability Delta Engine.
 Ranks the OPEN Capability Delta (from RS-005) by regulatory leverage: which measure closes the
 most regulatory requirements at once. Answers the Geschäftsführer question "Womit anfangen?".
 Pure, deterministic, computed-not-stored. Consumes the RS-005 delta (acyclic dependency); the
 delta engine stays hermetic. No new corpus, no new meta-model class (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import regulatory_leverage, roadmap_from_delta, select_within_budget
 from .schemas import BudgetPlan, OptimizationPlan, RankedMeasure
 __all__ = [
    "regulatory_leverage",
    "select_within_budget",
    "roadmap_from_delta",
    "OptimizationPlan",
    "RankedMeasure",
    "BudgetPlan",
 ]
@@ -0,0 +1,134 @@
 """Regulatory Optimization — the Roadmap / Management RENDERER of the Capability Delta Engine.
 GAP analysis and measure-prioritisation are TWO VIEWS OF THE SAME COMPUTATION. The Capability
 Delta Engine (`compliance/transition_reasoning`, RS-005) computes Required - Known = the
 Capability Delta once. Renderers read that ONE delta:
  - Interview Renderer  (missing INFORMATION -> questions)    = `TransitionQuestionRequest` (built)
  - Roadmap / Management Renderer (missing CAPABILITIES -> measures by leverage) = THIS module
  - Evidence Renderer   (missing EVIDENCE -> upload requests) = later
 There is one truth, not a Gap engine and a separate Roadmap engine.
 A measure (a capability to implement) has *regulatory leverage* = the number of distinct
 regulatory requirements it closes AT ONCE (e.g. patch management closes a CRA, a MaschinenVO,
 an IEC 62443 and an ISO 27001 requirement -> leverage 4). The product turns from "you have N
 obligations" into "of N identified requirements you only need M measures — and these K first".
 Fully deterministic, computed-not-stored, NO new corpus. `regulatory_leverage`/`select_within_budget`
 are pure math over `capability -> requirements`; `roadmap_from_delta` binds them to the RS-005
 delta (dependency optimization -> transition_reasoning, acyclic; the delta engine stays hermetic).
 No new graph/meta-model class (freeze v1.0). Python 3.9 compatible.
 Honesty (Welt-1): the percentages are exact count ratios over the IDENTIFIED requirements from
 the known patterns — never "% gesetzeskonform". Label outputs as "der identifizierten Anforderungen".
 """
 from __future__ import annotations
 from typing import Dict, List, Optional
 from ..transition_reasoning import CoverageStatus, TransitionAssessment
 from .schemas import BudgetPlan, OptimizationPlan, RankedMeasure
 def _ranked(
    capability_requirements: Dict[str, List[str]], in_scope: Optional[List[str]]
 ) -> List[RankedMeasure]:
    """Rank measures: leverage desc, then capability_id asc (deterministic). Empty covers dropped."""
    scope = (
        set(in_scope)
        if in_scope is not None
        else {r for reqs in capability_requirements.values() for r in reqs}
    )
    measures: List[RankedMeasure] = []
    for cap, reqs in capability_requirements.items():
        covers = sorted({r for r in reqs if r in scope})
        if not covers:
            continue  # this capability closes nothing in scope -> not a measure here
        measures.append(RankedMeasure(capability_id=cap, covers=covers, leverage=len(covers)))
    measures.sort(key=lambda m: (-m.leverage, m.capability_id))
    total = sum(m.leverage for m in measures)
    running = 0
    for m in measures:
        running += m.leverage
        m.cumulative_requirements = running
        m.cumulative_coverage = (running / total) if total else 0.0
    return measures
 def regulatory_leverage(
    capability_requirements: Dict[str, List[str]], in_scope: Optional[List[str]] = None
 ) -> OptimizationPlan:
    """Rank measures by regulatory leverage; report the compression (requirements -> measures).
    `capability_requirements`: measure (capability_id) -> the requirement keys it satisfies. A
    requirement key is currently a regulation (via `covers_targets`); finer obligation granularity
    is a future extension. `in_scope`: restrict the requirement keys counted (default: all seen).
    """
    measures = _ranked(capability_requirements, in_scope)
    scope = sorted(
        set(in_scope)
        if in_scope is not None
        else {r for reqs in capability_requirements.values() for r in reqs}
    )
    total = sum(m.leverage for m in measures)
    avg = (total / len(measures)) if measures else 0.0
    headline = (
        "%d identifizierte Anforderungen aus %d Regelwerken -> %d Massnahmen (Ø Hebel %.1f)."
        % (total, len(scope), len(measures), avg)
    )
    return OptimizationPlan(
        in_scope_requirements=scope,
        total_measures=len(measures),
        total_requirements=total,
        ranked_measures=measures,
        headline=headline,
    )
 def select_within_budget(
    capability_requirements: Dict[str, List[str]],
    budget: int,
    in_scope: Optional[List[str]] = None,
 ) -> BudgetPlan:
    """The budget answer: with K measures, pick the K highest-leverage ones and report coverage.
    Because each requirement key is closed by exactly one measure here, greedy-by-leverage is the
    optimal cover, so ranking == selection. (When requirements become shared across capabilities,
    this becomes weighted set-cover; the signature is ready for that.)
    """
    measures = _ranked(capability_requirements, in_scope)
    total = sum(m.leverage for m in measures)
    k = max(0, budget)
    selected = measures[:k]
    closed = selected[-1].cumulative_requirements if selected else 0
    ratio = (closed / total) if total else 0.0
    headline = (
        "Mit den Top-%d Massnahmen (nach regulatorischem Hebel) schliessen Sie %d von %d "
        "identifizierten Anforderungen (%.0f%%)." % (len(selected), closed, total, ratio * 100)
    )
    return BudgetPlan(
        budget=budget,
        selected_capabilities=[m.capability_id for m in selected],
        requirements_closed=closed,
        total_requirements=total,
        coverage_ratio=ratio,
        headline=headline,
    )
 def roadmap_from_delta(
    assessment: TransitionAssessment,
    capability_requirements: Dict[str, List[str]],
    in_scope: Optional[List[str]] = None,
    open_statuses: Optional[List[CoverageStatus]] = None,
 ) -> OptimizationPlan:
    """Render the Roadmap view FROM a Capability Delta (an RS-005 `TransitionAssessment`).
    Takes the OPEN capabilities of the delta — MISSING by default — and ranks them by regulatory
    leverage. This is the same delta the Interview Renderer turns into questions; here it becomes
    prioritised measures. The binding that makes "one truth, two renderers" real in code.
    """
    statuses = set(open_statuses) if open_statuses is not None else {CoverageStatus.MISSING}
    open_caps = [c.capability_id for c in assessment.coverage if c.status in statuses]
    delta_reqs = {cap: capability_requirements.get(cap, []) for cap in open_caps}
    return regulatory_leverage(delta_reqs, in_scope)
@@ -0,0 +1,48 @@
 """Schemas for the Regulatory Optimization Engine.
 These DTOs are *derived views* (computed-not-stored): nothing here is persisted; every value
 is recomputed from the input each call. No new meta-model class, no graph (freeze v1.0).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import List
 from pydantic import BaseModel, Field
 class RankedMeasure(BaseModel):
    """One measure (a capability to implement) ranked by its regulatory leverage."""
    capability_id: str
    covers: List[str] = Field(default_factory=list)        # the in-scope requirements it satisfies
    leverage: int = 0                                       # = len(covers): how many it closes at once
    cumulative_requirements: int = 0                        # running total of requirements closed (ranked order)
    cumulative_coverage: float = 0.0                        # cumulative_requirements / total_requirements (0..1)
 class OptimizationPlan(BaseModel):
    """Measures ranked by regulatory leverage — greatest regulatory effect first.
    `total_requirements` counts the IDENTIFIED requirements in scope (the known delta from the
    patterns), NOT a company's total legal duties. The percentages are exact count ratios over
    this identified set — never a compliance verdict (Welt-1 discipline).
    """
    in_scope_requirements: List[str] = Field(default_factory=list)   # the distinct requirement keys counted
    total_measures: int = 0                                          # number of distinct measures (delta capabilities)
    total_requirements: int = 0                                      # Sum of leverage = identified requirements closable
    ranked_measures: List[RankedMeasure] = Field(default_factory=list)
    headline: str = ""                                               # "N identifizierte Anforderungen -> M Massnahmen ..."
 class BudgetPlan(BaseModel):
    """The budget answer: with a budget of K measures, which K and how much do they close?"""
    budget: int = 0
    selected_capabilities: List[str] = Field(default_factory=list)
    requirements_closed: int = 0
    total_requirements: int = 0
    coverage_ratio: float = 0.0                                      # requirements_closed / total_requirements (0..1)
    headline: str = ""
@@ -0,0 +1,20 @@
 """Implementation Playbook — the Berater renderer ("wie komme ich dort hin?").
 For one capability it assembles the full implementation journey (why / closes which regulations /
 tools / process / evidence / controls) from curated playbook knowledge + regulatory leverage +
 injected Execution links. `playbooks_for_plan` chains the Optimization Roadmap into per-measure
 playbooks. Pure, deterministic, computed-not-stored. No new corpus, no new meta-model class
 (freeze v1.0). Curated content = expert draft, never normative.
 """
 from __future__ import annotations
 from .engine import build_playbook, playbooks_for_plan
 from .schemas import Playbook, PlaybookStep
 __all__ = [
    "build_playbook",
    "playbooks_for_plan",
    "Playbook",
    "PlaybookStep",
 ]
@@ -0,0 +1,96 @@
 """Implementation Playbook — the Berater renderer ("wie komme ich dort hin?").
 After the Capability Delta Engine says WHAT is missing and the Optimization renderer says WHICH
 measure first, the Playbook renderer says HOW to implement it. For one capability it assembles the
 full journey from three sources:
  - curated playbook KNOWLEDGE (why / tools / process steps / evidence / how others do it) — the
    Reasoning Knowledge Acquisition layer under `knowledge/implementation_playbooks/`,
  - the regulatory LEVERAGE (which regulations a delivered capability closes) — reused from the
    Optimization renderer,
  - injected Procedure/Control/Evidence links (Execution-owned; empty until linked).
 Pure, deterministic, computed-not-stored. Chains optimization -> playbook (acyclic). No new corpus,
 no new meta-model class (freeze v1.0). Python 3.9 compatible.
 The curated content is an EXPERT DRAFT, never a normative requirement. When no playbook knowledge
 exists for a capability yet, the renderer emits a `status: missing` stub — the honest signal that
 the bottleneck is CONTENT (Knowledge Acquisition), not software.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
 from ..optimization import OptimizationPlan
 from .schemas import Playbook, PlaybookStep
 _MISSING_WHY = "(Playbook-Inhalt fehlt — Knowledge Acquisition offen.)"
 _DRAFT_DISCLAIMER = (
    "Kuratiertes Experten-Wissen (Erstentwurf), KEINE normative Anforderung. Tools/Schritte sind "
    "Empfehlungen, kein Pflichtkatalog; Controls werden aus der Execution-Schicht injiziert."
 )
 def _steps(raw: Any) -> List[PlaybookStep]:
    steps: List[PlaybookStep] = []
    for i, s in enumerate(raw or [], 1):
        steps.append(PlaybookStep(order=i, title=str(s.get("title", "")), detail=str(s.get("detail", ""))))
    return steps
 def build_playbook(
    capability_id: str,
    knowledge: Optional[Dict[str, Any]] = None,
    closes_regulations: Optional[List[str]] = None,
    control_links: Optional[List[str]] = None,
 ) -> Playbook:
    """Assemble the implementation journey for ONE capability.
    `knowledge`: the curated playbook dict (None/empty -> a `missing` stub). `closes_regulations`:
    the regulations a delivered capability closes (leverage, from `covers_targets`). `control_links`:
    Execution-owned control refs, injected (default empty — no Execution data in Reasoning code).
    """
    closes = sorted(set(closes_regulations or []))
    if not knowledge:
        return Playbook(
            capability_id=capability_id, title=capability_id, why=_MISSING_WHY,
            closes_regulations=closes, leverage=len(closes), controls=list(control_links or []),
            status="missing", disclaimer=_DRAFT_DISCLAIMER,
        )
    return Playbook(
        capability_id=capability_id,
        title=str(knowledge.get("title", capability_id)),
        why=str(knowledge.get("why", "")),
        closes_regulations=closes,
        leverage=len(closes),
        tools=list(knowledge.get("tools", [])),
        process_steps=_steps(knowledge.get("process_steps")),
        expected_evidence=list(knowledge.get("expected_evidence", [])),
        controls=list(control_links or []),
        how_others_do_it=str(knowledge.get("how_others_do_it", "")),
        status=str(knowledge.get("status", "draft")),
        disclaimer=str(knowledge.get("disclaimer", _DRAFT_DISCLAIMER)),
    )
 def playbooks_for_plan(
    plan: OptimizationPlan,
    knowledge_by_cap: Dict[str, Dict[str, Any]],
    top_k: Optional[int] = None,
    control_links_by_cap: Optional[Dict[str, List[str]]] = None,
 ) -> List[Playbook]:
    """Render playbooks for the highest-leverage measures of an OptimizationPlan (Roadmap -> How).
    Walks the ranked measures (top_k, or all) and builds each capability's playbook, using the
    measure's own `covers` as the regulations it closes. Measures without curated knowledge become
    `missing` stubs — surfacing exactly where playbook content is still owed.
    """
    links = control_links_by_cap or {}
    measures = plan.ranked_measures if top_k is None else plan.ranked_measures[: max(0, top_k)]
    return [
        build_playbook(
            m.capability_id, knowledge_by_cap.get(m.capability_id),
            closes_regulations=m.covers, control_links=links.get(m.capability_id),
        )
        for m in measures
    ]
@@ -0,0 +1,45 @@
 """Schemas for the Implementation Playbook renderer.
 A Playbook is a *derived view* (computed-not-stored): it assembles, for one capability, the full
 "wie komme ich dort hin?" journey from (a) curated playbook KNOWLEDGE, (b) the regulatory leverage
 (which regulations a delivered capability closes), and (c) injected Procedure/Control/Evidence links
 (Execution-owned). Nothing here is persisted. No new meta-model class, no graph (freeze v1.0).
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from typing import List
 from pydantic import BaseModel, Field
 class PlaybookStep(BaseModel):
    """One step in the recommended way to stand up a capability."""
    order: int
    title: str
    detail: str = ""
 class Playbook(BaseModel):
    """The complete implementation journey for ONE capability — the Berater view.
    Answers, in order: Warum? -> Welche Regelwerke schliesst das? -> Welche Tools? -> Welche
    Prozesse? -> Welche Nachweise? -> Welche Controls? The curated parts (why/tools/steps/evidence/
    how-others) are an EXPERT DRAFT, not a normative requirement; controls are injected from
    Execution (may be empty until linked).
    """
    capability_id: str
    title: str = ""
    why: str = ""                                          # why this is required (regulatory rationale)
    closes_regulations: List[str] = Field(default_factory=list)   # leverage: regulations a delivered cap closes
    leverage: int = 0                                      # = len(closes_regulations)
    tools: List[str] = Field(default_factory=list)         # typical tooling (curated knowledge)
    process_steps: List[PlaybookStep] = Field(default_factory=list)   # how to stand it up
    expected_evidence: List[str] = Field(default_factory=list)        # artifacts that prove it
    controls: List[str] = Field(default_factory=list)      # control refs (injected from Execution; may be empty)
    how_others_do_it: str = ""                             # "wie machen das andere?" (curated)
    status: str = "draft"                                  # draft -> reviewed -> validated -> proven
    disclaimer: str = ""                                   # expert draft, not a normative requirement
@@ -0,0 +1,26 @@
 """Product-scope orchestration (step 3).
 Connects the Navigator's fact-gate to the existing reasoning `discover_scope`:
 decide regulatory scope only once the minimum (P0) facts are present, otherwise
 return the missing facts. Reuses discover_scope unchanged — no new scope logic.
 """
 from __future__ import annotations
 from .orchestrator import resolve_product_scope
 from .schemas import (
    ProductScopeRequest,
    ProductScopeResponse,
    RegulatoryScopeResult,
    ScopeStatus,
    UnsupportedDomain,
 )
 __all__ = [
    "resolve_product_scope",
    "ProductScopeRequest",
    "ProductScopeResponse",
    "RegulatoryScopeResult",
    "UnsupportedDomain",
    "ScopeStatus",
 ]
@@ -0,0 +1,77 @@
 """Product-scope orchestrator (step 3) — gate, then reuse discover_scope.
 THE rule: the Scope Engine decides only once the Navigator has released the
 minimum facts. If P0 facts are missing, return the missing facts/questions and do
 NOT run discover_scope. Otherwise project the canonical into the reasoning profile
 and run the EXISTING `discover_scope` exactly once.
 No new scope rules, no new regulations, no environmental-law evaluation (those
 domains are surfaced only as unsupported_domains / future_corpus_needed).
 """
 from __future__ import annotations
 from typing import List, Tuple
 from compliance.navigator.engine import navigate
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.profile.to_reasoning import to_reasoning_profile
 from compliance.reasoning.scope_engine import discover_scope
 from .schemas import (
    ProductScopeResponse,
    RegulatoryScopeResult,
    ScopeStatus,
    UnsupportedDomain,
 )
 # environmental trigger field -> (domain, note). Transparency only — not a verdict.
 _ENV_DOMAINS: List[Tuple[str, str, str]] = [
    ("discharges_to_wastewater", "environment_water", "Abwasser-/Gewässerrecht (z. B. AbwV, WRRL) — noch nicht im Korpus."),
    ("has_cooling_or_spraying_water", "environment_water", "Wasserbezogene Anforderungen — noch nicht im Korpus."),
    ("emits_to_air", "environment_air", "Immissionsschutz-/Luftreinhalterecht (z. B. BImSchG, IED) — noch nicht im Korpus."),
    ("uses_solvents", "environment_air", "Lösemittel-/VOC-Recht (z. B. 31. BImSchV) — noch nicht im Korpus."),
    ("uses_cleaning_chemicals", "chemicals", "Chemikalienrecht (REACH/CLP/Detergenzien/Biozide) — noch nicht im Korpus."),
    ("supplies_chemicals", "chemicals", "Chemikalienrecht (REACH/CLP) — noch nicht im Korpus."),
    ("contains_restricted_substances", "chemicals", "Stoffbeschränkungen (REACH/RoHS) — noch nicht im Korpus."),
    ("creates_waste", "waste", "Abfall-/Entsorgungsrecht (u. a. WEEE) — noch nicht im Korpus."),
    ("consumes_energy_or_water", "energy_resources", "Energie-/Ökodesign-Recht — noch nicht im Korpus."),
 ]
 def _unsupported_domains(profile: CanonicalProductRegulatoryProfile) -> List[UnsupportedDomain]:
    env = profile.environmental
    seen = set()
    out: List[UnsupportedDomain] = []
    for field, domain, note in _ENV_DOMAINS:
        if getattr(env, field) is True and domain not in seen:
            seen.add(domain)
            out.append(UnsupportedDomain(domain=domain, trigger=field, note=note))
    return out
 def resolve_product_scope(profile: CanonicalProductRegulatoryProfile) -> ProductScopeResponse:
    nav = navigate(profile)
    if not nav.completeness_summary.ready_for_scope:
        return ProductScopeResponse(
            status=ScopeStatus.NEEDS_FACTS,
            completeness_summary=nav.completeness_summary,
            missing_facts=nav.missing_facts,
            suggested_questions=nav.suggested_questions,
        )
    scope = discover_scope(to_reasoning_profile(profile))  # exactly once
    result = RegulatoryScopeResult(
        applicable_regulations=scope.applicable_regulations,
        excluded_regulations=scope.excluded_regulations,
        uncertain_regulations=scope.uncertain_regulations,
        unsupported_domains=_unsupported_domains(profile),
        reasoning_summary=scope.reasoning_summary,
        confidence=scope.confidence,
    )
    return ProductScopeResponse(
        status=ScopeStatus.RESOLVED,
        completeness_summary=nav.completeness_summary,
        regulatory_scope=result,
    )
@@ -0,0 +1,63 @@
 """Response schemas for the product-scope orchestrator (step 3).
 These are application/API types — NOT compliance-meta-model classes (architecture
 freeze v1.0 untouched). The scope verdict itself is produced by the existing
 `discover_scope`; nothing here adds scope rules.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List, Optional
 from pydantic import BaseModel, Field
 from compliance.navigator.engine import CompletenessSummary
 from compliance.navigator.questions import NavigatorQuestion
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.reasoning.enums import Confidence
 from compliance.reasoning.schemas import (
    ApplicableRegulation,
    ExcludedRegulation,
    UncertainRegulation,
 )
 class ScopeStatus(str, Enum):
    NEEDS_FACTS = "needs_facts"  # P0 facts missing -> ask, do not decide
    RESOLVED = "resolved"  # minimum facts present -> scope decided
 class UnsupportedDomain(BaseModel):
    """A domain the product triggers but the corpus does not yet cover.
    Surfaced for transparency (no false completeness) — NEVER a legal evaluation.
    """
    domain: str
    trigger: str
    status: str = "future_corpus_needed"
    note: str = ""
 class RegulatoryScopeResult(BaseModel):
    applicable_regulations: List[ApplicableRegulation] = Field(default_factory=list)
    excluded_regulations: List[ExcludedRegulation] = Field(default_factory=list)
    uncertain_regulations: List[UncertainRegulation] = Field(default_factory=list)
    unsupported_domains: List[UnsupportedDomain] = Field(default_factory=list)
    reasoning_summary: str = ""
    confidence: Confidence = Confidence.MEDIUM
 class ProductScopeRequest(BaseModel):
    product_profile: CanonicalProductRegulatoryProfile
 class ProductScopeResponse(BaseModel):
    status: ScopeStatus
    completeness_summary: CompletenessSummary
    # case NEEDS_FACTS
    missing_facts: List[str] = Field(default_factory=list)
    suggested_questions: List[NavigatorQuestion] = Field(default_factory=list)
    # case RESOLVED
    regulatory_scope: Optional[RegulatoryScopeResult] = None
@@ -0,0 +1,38 @@
 """Product profile convergence layer.
 ONE canonical product profile (`CanonicalProductRegulatoryProfile`) that the Go
 gap engine and the Python reasoning engine both project from — so "SPS mit
 Remote Access" means the same thing everywhere. gap.ProductProfile leads; the
 reasoning ProductProfile is an adapter/DTO. Types + mappers only — no regulation
 logic, no UI, no new questions.
 """
 from __future__ import annotations
 from .canonical import (
    CanonicalLifecyclePhase,
    CanonicalProductRegulatoryProfile,
    CanonicalProductType,
    ComponentKind,
    EconomicOperatorRole,
    EnvironmentalImpact,
    ProductComponent,
 )
 from .from_company_profile import from_company_profile
 from .from_product_wizard import from_product_wizard
 from .to_gap import to_gap_profile
 from .to_reasoning import to_reasoning_profile
 __all__ = [
    "CanonicalProductRegulatoryProfile",
    "CanonicalProductType",
    "EconomicOperatorRole",
    "CanonicalLifecyclePhase",
    "ComponentKind",
    "ProductComponent",
    "EnvironmentalImpact",
    "from_product_wizard",
    "from_company_profile",
    "to_gap_profile",
    "to_reasoning_profile",
 ]
@@ -0,0 +1,158 @@
 """CanonicalProductRegulatoryProfile — the single semantic product profile.
 Convergence layer (spec 2026-06-26): instead of letting the Go `gap.ProductProfile`
 and the Python reasoning `ProductProfile` drift, ONE canonical type is the source
 of truth. The Go gap engine LEADS (it carries real engine logic), so the canonical
 mirrors gap's field names and adds the Navigator gaps the audit found missing
 (economic-operator role, radio module, generates_usage_data, lifecycle phase,
 structured BOM, safety-vs-security split, machine-vs-component) plus a
 forward-looking Environmental-Impact domain.
 No regulation logic lives here — types only. Mappers live in sibling modules.
 Python 3.9 compatible (no `|` unions).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import List, Optional
 from pydantic import BaseModel, Field
 class CanonicalProductType(str, Enum):  # mirrors gap.ProductType
    SOFTWARE = "software"
    HARDWARE = "hardware"
    IOT = "iot"
    SAAS = "saas"
    EXCHANGE = "exchange"
    MEDICAL_DEVICE = "medical_device"
    MACHINERY = "machinery"
    OTHER = "other"
 class EconomicOperatorRole(str, Enum):  # CE/CRA role — gap.ProductProfile has none
    MANUFACTURER = "manufacturer"
    IMPORTER = "importer"
    DISTRIBUTOR = "distributor"
    INTEGRATOR = "integrator"
    OPERATOR = "operator"
    SERVICE_PROVIDER = "service_provider"
 class CanonicalLifecyclePhase(str, Enum):
    DEVELOPMENT = "development"
    PLACING_ON_MARKET = "placing_on_market"
    OPERATION = "operation"
    MAINTENANCE = "maintenance"
    UPDATE = "update"
    END_OF_LIFE = "end_of_life"
 class ComponentKind(str, Enum):
    MOTOR = "motor"
    PUMP = "pump"
    HEATING = "heating"
    COOLING = "cooling"
    CONTROLLER = "controller"
    PLC = "plc"
    HMI = "hmi"
    SENSOR = "sensor"
    ACTUATOR = "actuator"
    CAMERA = "camera"
    NETWORK_INTERFACE = "network_interface"
    RADIO_MODULE = "radio_module"
    CHEMICAL_DOSING = "chemical_dosing"
    WATER_INLET = "water_inlet"
    WASTEWATER_OUTLET = "wastewater_outlet"
    BATTERY = "battery"
    OTHER = "other"
 class ProductComponent(BaseModel):
    """One structured BOM node — these nodes are what later trigger domains."""
    name: str
    kind: ComponentKind = ComponentKind.OTHER
    notes: Optional[str] = None
 class EnvironmentalImpact(BaseModel):
    """Forward-looking Umweltmedien-Trigger (own Navigator domain).
    No regulation logic consumes these yet — profile fields only, so the model
    is not blind to wastewater/air/chemicals/waste questions when that domain
    is wired later (AbwV/WRRL/REACH/CLP/IED/BImSchG ...).
    """
    discharges_to_wastewater: Optional[bool] = None
    uses_cleaning_chemicals: Optional[bool] = None
    supplies_chemicals: Optional[bool] = None
    emits_to_air: Optional[bool] = None
    uses_solvents: Optional[bool] = None
    creates_waste: Optional[bool] = None
    contains_restricted_substances: Optional[bool] = None
    consumes_energy_or_water: Optional[bool] = None
    has_cooling_or_spraying_water: Optional[bool] = None
 class CanonicalProductRegulatoryProfile(BaseModel):
    # --- identity ---
    name: str = ""
    description: str = ""
    product_type: Optional[CanonicalProductType] = None
    product_profile_id: Optional[str] = None
    tenant_id: Optional[str] = None
    iace_project_id: Optional[str] = None
    # --- gap-native lists ---
    technologies: List[str] = Field(default_factory=list)
    data_processing: List[str] = Field(default_factory=list)
    markets: List[str] = Field(default_factory=list)  # real list — never hardcoded ['EU']
    existing_certifications: List[str] = Field(default_factory=list)
    applied_norms: List[str] = Field(default_factory=list)
    # --- gap-native product / IST-state booleans (tri-state: None = unknown) ---
    connected_to_internet: Optional[bool] = None
    has_software_updates: Optional[bool] = None
    uses_ai: Optional[bool] = None
    processes_personal_data: Optional[bool] = None
    is_critical_infra_supplier: Optional[bool] = None
    has_risk_assessment: Optional[bool] = None
    has_technical_file: Optional[bool] = None
    has_operating_manual: Optional[bool] = None
    has_sbom: Optional[bool] = None
    has_vuln_management: Optional[bool] = None
    has_update_mechanism: Optional[bool] = None
    has_incident_response: Optional[bool] = None
    has_supply_chain_mgmt: Optional[bool] = None
    ce_marking_since: Optional[str] = None
    product_age: Optional[str] = None
    # --- NEW Navigator-gap fields (audit 2026-06-26) ---
    economic_operator_role: Optional[EconomicOperatorRole] = None
    has_radio_module: Optional[bool] = None
    generates_usage_data: Optional[bool] = None
    lifecycle_phase: Optional[CanonicalLifecyclePhase] = None
    components: List[ProductComponent] = Field(default_factory=list)
    has_safety_function: Optional[bool] = None
    safety_function_description: Optional[str] = None
    has_security_function: Optional[bool] = None  # safety vs security split
    has_remote_access: Optional[bool] = None
    has_embedded_software: Optional[bool] = None
    is_machine: Optional[bool] = None
    is_component: Optional[bool] = None
    is_spare_part: Optional[bool] = None
    # --- company / market context (NIS2 + scope; from company-profile) ---
    b2b_or_b2c: Optional[str] = None
    sector_industry: Optional[str] = None
    company_size: Optional[str] = None
    primary_jurisdiction: Optional[str] = None
    # --- AI context (classification stays delegated to ai-act/ucca) ---
    ai_integration_type: List[str] = Field(default_factory=list)
    human_oversight_level: Optional[str] = None
    # --- forward-looking environmental domain ---
    environmental: EnvironmentalImpact = Field(default_factory=EnvironmentalImpact)
@@ -0,0 +1,59 @@
 """company-profile -> CanonicalProductRegulatoryProfile (prefill, acceptance #2).
 Pulls master data (industry, business model, size, markets) and the conditional
 `machine_builder` block (camelCase JSONB keys, defined frontend-side) so the user
 re-answers nothing. The machineBuilder block is the richest product/safety/
 connectivity source — note it is industry-gated in the UI, so a prefill may find
 it empty; that is fine (fields stay None = unknown).
 """
 from __future__ import annotations
 from typing import Any, Dict, List
 from .canonical import CanonicalProductRegulatoryProfile
 _EU_MEMBER_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
 def _markets(p: Dict[str, Any], mb: Dict[str, Any]) -> List[str]:
    out: List[str] = []
    for source in (p.get("target_markets"), mb.get("exportMarkets"), [p.get("primary_jurisdiction")], [p.get("headquarters_country")]):
        for m in source or []:
            if m and m not in out:
                out.append(m)
    return out
 def _is_machine(mb: Dict[str, Any]) -> Any:
    types = mb.get("productTypes")
    if types:
        return True
    return None
 def from_company_profile(profile: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
    p = profile
    mb = p.get("machine_builder") or {}
    contains_ai = mb.get("containsAI")
    uses_ai = contains_ai if contains_ai is not None else p.get("uses_ai")
    return CanonicalProductRegulatoryProfile(
        description=mb.get("productDescription") or "",
        sector_industry=p.get("industry") or None,
        b2b_or_b2c=p.get("business_model") or None,
        company_size=p.get("company_size") or None,
        primary_jurisdiction=p.get("primary_jurisdiction") or None,
        markets=_markets(p, mb),
        uses_ai=uses_ai,
        ai_integration_type=list(mb.get("aiIntegrationType") or []),
        human_oversight_level=mb.get("humanOversightLevel") or None,
        has_embedded_software=mb.get("containsFirmware"),
        has_safety_function=mb.get("hasSafetyFunction"),
        safety_function_description=mb.get("safetyFunctionDescription") or None,
        has_remote_access=mb.get("hasRemoteAccess"),
        connected_to_internet=mb.get("isNetworked"),
        has_software_updates=mb.get("hasOTAUpdates"),
        has_risk_assessment=mb.get("hasRiskAssessment"),
        is_machine=_is_machine(mb),
        is_critical_infra_supplier=mb.get("criticalSectorClients"),
    )
@@ -0,0 +1,50 @@
 """ProductWizard payload -> CanonicalProductRegulatoryProfile (lossless).
 The gap-analysis ProductWizard POSTs exactly the gap.ProductProfile JSON shape
 (see admin-compliance/.../ProductWizard.tsx handleSubmit). This mapper copies
 every gap field verbatim so that `to_gap_profile(from_product_wizard(p))`
 reproduces the gap subset of `p` byte-for-byte (acceptance #1). New Navigator
 fields the wizard does not ask stay None.
 """
 from __future__ import annotations
 from typing import Any, Dict, Optional
 from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
 def _as_product_type(value: Any) -> Optional[CanonicalProductType]:
    try:
        return CanonicalProductType(value)
    except ValueError:
        return None
 def from_product_wizard(payload: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
    g = payload.get
    return CanonicalProductRegulatoryProfile(
        name=g("name", ""),
        description=g("description", ""),
        product_type=_as_product_type(g("product_type")),
        technologies=list(g("technologies") or []),
        data_processing=list(g("data_processing") or []),
        markets=list(g("markets") or []),
        existing_certifications=list(g("existing_certifications") or []),
        applied_norms=list(g("applied_norms") or []),
        connected_to_internet=g("connected_to_internet"),
        has_software_updates=g("has_software_updates"),
        uses_ai=g("uses_ai"),
        processes_personal_data=g("processes_personal_data"),
        is_critical_infra_supplier=g("is_critical_infra_supplier"),
        has_risk_assessment=g("has_risk_assessment"),
        has_technical_file=g("has_technical_file"),
        has_operating_manual=g("has_operating_manual"),
        has_sbom=g("has_sbom"),
        has_vuln_management=g("has_vuln_management"),
        has_update_mechanism=g("has_update_mechanism"),
        has_incident_response=g("has_incident_response"),
        has_supply_chain_mgmt=g("has_supply_chain_mgmt"),
        ce_marking_since=g("ce_marking_since"),
        product_age=g("product_age"),
    )
@@ -0,0 +1,41 @@
 """CanonicalProductRegulatoryProfile -> gap.ProductProfile JSON shape.
 Emits exactly the keys the Go gap engine already consumes (gap/models.go json
 tags), so the gap engine runs UNCHANGED — the canonical is a superset and gap is
 its lossless projection. Canonical-only fields (role/radio/components/...) are
 intentionally not emitted here; they reach the reasoning side via to_reasoning.
 """
 from __future__ import annotations
 from typing import Any, Dict
 from .canonical import CanonicalProductRegulatoryProfile
 def to_gap_profile(c: CanonicalProductRegulatoryProfile) -> Dict[str, Any]:
    return {
        "name": c.name,
        "description": c.description,
        "product_type": c.product_type.value if c.product_type else "",
        "technologies": list(c.technologies),
        "data_processing": list(c.data_processing),
        "markets": list(c.markets),
        "existing_certifications": list(c.existing_certifications),
        "applied_norms": list(c.applied_norms),
        "connected_to_internet": bool(c.connected_to_internet),
        "has_software_updates": bool(c.has_software_updates),
        "uses_ai": bool(c.uses_ai),
        "processes_personal_data": bool(c.processes_personal_data),
        "is_critical_infra_supplier": bool(c.is_critical_infra_supplier),
        "has_risk_assessment": bool(c.has_risk_assessment),
        "has_technical_file": bool(c.has_technical_file),
        "has_operating_manual": bool(c.has_operating_manual),
        "has_sbom": bool(c.has_sbom),
        "has_vuln_management": bool(c.has_vuln_management),
        "has_update_mechanism": bool(c.has_update_mechanism),
        "has_incident_response": bool(c.has_incident_response),
        "has_supply_chain_mgmt": bool(c.has_supply_chain_mgmt),
        "ce_marking_since": c.ce_marking_since if c.ce_marking_since is not None else "",
        "product_age": c.product_age if c.product_age is not None else "",
    }
@@ -0,0 +1,88 @@
 """CanonicalProductRegulatoryProfile -> reasoning ProductProfile (adapter/DTO).
 The reasoning engine stays the consumer, never the source of truth (spec): the
 canonical leads, this projects it into the Python reasoning ProductProfile so the
 Reasoning engine and the Go gap engine run off ONE semantic profile (acceptance
 #10). AI classification is NOT done here — only `uses_ai` is forwarded; risk
 classification stays delegated to ai-act/ucca (acceptance #3).
 This is the ONLY one-way coupling profile -> reasoning; reasoning never imports
 profile, so the reasoning layer stays hermetic.
 """
 from __future__ import annotations
 from typing import List, Optional
 from compliance.reasoning.enums import ManufacturerRole, MarketModel, ProductLifecyclePhase
 from compliance.reasoning.schemas import ProductProfile
 from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
 _SOFTWARE_TYPES = {CanonicalProductType.SOFTWARE, CanonicalProductType.SAAS, CanonicalProductType.IOT}
 _SOFTWARE_TECH = {"ai", "api", "database", "encryption", "ota_updates", "cloud", "blockchain"}
 _EU_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
 _B2X = {"B2B": MarketModel.B2B, "B2C": MarketModel.B2C, "B2B_B2C": MarketModel.BOTH, "B2B2C": MarketModel.BOTH}
 def _or_none(*values: Optional[bool]) -> Optional[bool]:
    """True if any value is truthy; None if all are None/absent; else False."""
    if any(v is True for v in values):
        return True
    if all(v is None for v in values):
        return None
    return False
 def _has_software(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
    type_sig = True if c.product_type in _SOFTWARE_TYPES else None
    tech_sig = True if (set(c.technologies) & _SOFTWARE_TECH) else None
    return _or_none(c.has_embedded_software, c.has_software_updates, c.uses_ai, type_sig, tech_sig)
 def _eu_market(markets: List[str]) -> Optional[bool]:
    if not markets:
        return None
    return True if (set(markets) & _EU_HINTS) else False
 def _has_radio(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
    if c.has_radio_module is not None:
        return c.has_radio_module
    if any(comp.kind.value == "radio_module" for comp in c.components):
        return True
    return None
 def to_reasoning_profile(c: CanonicalProductRegulatoryProfile) -> ProductProfile:
    role = ManufacturerRole(c.economic_operator_role.value) if c.economic_operator_role else None
    phase = ProductLifecyclePhase(c.lifecycle_phase.value) if c.lifecycle_phase else None
    b2x = _B2X.get(c.b2b_or_b2c) if c.b2b_or_b2c else None
    is_machine = c.is_machine if c.is_machine is not None else (
        True if c.product_type == CanonicalProductType.MACHINERY else None
    )
    generates_data = c.generates_usage_data if c.generates_usage_data is not None else (
        True if "telemetry" in c.data_processing else None
    )
    return ProductProfile(
        product_name=c.name or "Produkt",
        product_profile_id=c.product_profile_id,
        manufacturer_role=role,
        product_type=[c.product_type.value] if c.product_type else [],
        has_software=_has_software(c),
        has_embedded_software=c.has_embedded_software,
        has_remote_access=c.has_remote_access,
        has_cloud_connection=True if "cloud" in c.technologies else None,
        has_ai_functionality=c.uses_ai,
        has_radio_module=_has_radio(c),
        has_safety_function=c.has_safety_function,
        generates_usage_data=generates_data,
        is_machine=is_machine,
        is_component=c.is_component,
        is_spare_part=c.is_spare_part,
        eu_market=_eu_market(c.markets),
        b2b_or_b2c=b2x,
        lifecycle_phase=phase,
        company_size=c.company_size,
        sector=c.sector_industry,
    )
@@ -0,0 +1,34 @@
 """Regulatory Change Intelligence (RCI) — delta layer over the product-first map.
 Answers "what changes relative to my existing Regulatory Map?" — NOT "what does
 the new law say in general". Snapshot the pipeline into a ComplianceBaseline, then
 assess a (simulated/provided) RegulatoryChange into per-obligation deltas + a
 management ChangeImpactSummary. Read/reasoning only — no UI, no ingestion, no RAG,
 no new regulations/controls, no legal evaluation outside the stored map.
 """
 from __future__ import annotations
 from .baseline import create_baseline
 from .delta_engine import assess_change
 from .schemas import (
    ChangeAssessment,
    ChangeImpactSummary,
    ChangeType,
    ComplianceBaseline,
    DeltaType,
    ObligationDelta,
    RegulatoryChange,
 )
 __all__ = [
    "create_baseline",
    "assess_change",
    "ComplianceBaseline",
    "RegulatoryChange",
    "ObligationDelta",
    "ChangeImpactSummary",
    "ChangeAssessment",
    "DeltaType",
    "ChangeType",
 ]
@@ -0,0 +1,44 @@
 """Snapshot the current product-first pipeline into a ComplianceBaseline.
 This is the ONLY place RCI runs the pipeline — to freeze a point-in-time map +
 registry-linked obligations + their required evidence. Everything downstream
 (delta computation) works purely against this snapshot, never re-evaluating.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.profile.to_reasoning import to_reasoning_profile
 from compliance.reasoning.obligation_engine import derive_obligations
 from compliance.regulatory_map.renderer import render_regulatory_map
 from .schemas import ComplianceBaseline
 def create_baseline(
    profile: CanonicalProductRegulatoryProfile,
    evidence_refs: Optional[Dict[str, List[str]]] = None,
    baseline_id: str = "baseline",
    created_at: Optional[str] = None,
 ) -> ComplianceBaseline:
    reg_map = render_regulatory_map(profile)
    obligations = derive_obligations(to_reasoning_profile(profile)).applicable_obligations
    applicable: List[str] = []
    required: Dict[str, List[str]] = {}
    for ob in obligations:
        if ob.registry_anchor:  # only registry-linked obligations enter the baseline
            applicable.append(ob.obligation_id)
            required[ob.obligation_id] = list(ob.required_evidence)
    return ComplianceBaseline(
        baseline_id=baseline_id,
        product_profile_snapshot=profile,
        regulatory_map_snapshot=reg_map,
        applicable_obligations=applicable,
        obligation_evidence_required=required,
        evidence_refs=dict(evidence_refs or {}),
        created_at=created_at,
    )
@@ -0,0 +1,114 @@
 """RCI delta engine — assess a RegulatoryChange against a ComplianceBaseline.
 Answers "what changes relative to my existing Map?" deterministically, working
 ONLY against the stored baseline (no re-evaluation of scope, no new legal
 assessment outside the map). Per-obligation classification -> ObligationDelta;
 aggregate -> ChangeImpactSummary.
 """
 from __future__ import annotations
 from typing import List, Tuple
 from compliance.reasoning.enums import Confidence
 from .schemas import (
    ChangeAssessment,
    ChangeImpactSummary,
    ChangeType,
    ComplianceBaseline,
    DeltaType,
    ObligationDelta,
    RegulatoryChange,
 )
 _ACTION = {DeltaType.NEW, DeltaType.CHANGED, DeltaType.NEEDS_REVIEW}
 def _classify(
    in_base: bool, has_ev: bool, change_type: ChangeType, rel_app: bool, rel_unc: bool
 ) -> Tuple[DeltaType, str, Confidence]:
    if not (rel_app or rel_unc):
        return DeltaType.NOT_APPLICABLE, "Die Änderung betrifft kein Regelwerk Ihrer Map.", Confidence.HIGH
    if rel_unc and not rel_app:
        return (
            DeltaType.NEEDS_REVIEW,
            "Betrifft ein für Ihr Produkt noch UNSICHERES Regelwerk — erst Anwendbarkeit klären.",
            Confidence.LOW,
        )
    if change_type == ChangeType.REPEAL:
        if in_base:
            return DeltaType.REMOVED, "Regelwerk/Pflicht aufgehoben — entfällt für Ihr Produkt.", Confidence.HIGH
        return DeltaType.NOT_APPLICABLE, "Aufhebung betrifft keine Ihrer bestehenden Pflichten.", Confidence.HIGH
    if not in_base:
        return DeltaType.NEW, "Neue Pflicht durch die Änderung — bisher nicht in Ihrer Map.", Confidence.MEDIUM
    if change_type == ChangeType.GUIDANCE_UPDATE:
        if has_ev:
            return (
                DeltaType.ALREADY_COVERED,
                "Bestehende Pflicht mit vorhandenen Nachweisen — Leitlinien-Update vermutlich abgedeckt.",
                Confidence.MEDIUM,
            )
        return DeltaType.NEEDS_REVIEW, "Bestehende Pflicht ohne Nachweis — Leitlinien-Update prüfen.", Confidence.MEDIUM
    return DeltaType.CHANGED, "Bestehende Pflicht inhaltlich geändert — Umsetzung und Nachweis prüfen.", Confidence.MEDIUM
 def assess_change(baseline: ComplianceBaseline, change: RegulatoryChange) -> ChangeAssessment:
    snap = baseline.regulatory_map_snapshot
    app_regs = {v.regulation_id for v in snap.applicable_regulations}
    unc_regs = {v.regulation_id for v in snap.uncertain_regulations}
    base_obs = set(baseline.applicable_obligations)
    affected = set(change.affected_regulations)
    rel_app = bool(affected & app_regs)
    rel_unc = bool(affected & unc_regs)
    affects_product = rel_app or rel_unc
    deltas: List[ObligationDelta] = []
    for ob in change.affected_obligations:
        present = baseline.evidence_refs.get(ob, [])
        required = baseline.obligation_evidence_required.get(ob, [])
        dt, reason, conf = _classify(ob in base_obs, bool(present), change.change_type, rel_app, rel_unc)
        missing = [e for e in required if e not in present] if dt in _ACTION else []
        deltas.append(
            ObligationDelta(
                obligation_id=ob,
                delta_type=dt,
                reason=reason,
                affected_evidence=list(present),
                missing_evidence=missing,
                confidence=conf,
            )
        )
    return ChangeAssessment(
        change_id=change.change_id,
        affects_product=affects_product,
        deltas=deltas,
        summary=_summary(deltas, [d.domain for d in snap.unsupported_domains]),
    )
 def _ids(deltas: List[ObligationDelta], *types: DeltaType) -> List[str]:
    wanted = set(types)
    return [d.obligation_id for d in deltas if d.delta_type in wanted]
 def _summary(deltas: List[ObligationDelta], unsupported: List[str]) -> ChangeImpactSummary:
    n_new = len(_ids(deltas, DeltaType.NEW))
    n_changed = len(_ids(deltas, DeltaType.CHANGED))
    n_removed = len(_ids(deltas, DeltaType.REMOVED))
    n_covered = len(_ids(deltas, DeltaType.ALREADY_COVERED))
    n_review = len(_ids(deltas, DeltaType.NEEDS_REVIEW, DeltaType.CHANGED))
    n_na = len(_ids(deltas, DeltaType.NOT_APPLICABLE))
    return ChangeImpactSummary(
        what_changed=(
            "%d neu, %d geändert, %d entfällt, %d bereits abgedeckt, %d zu prüfen, %d nicht relevant."
            % (n_new, n_changed, n_removed, n_covered, n_review, n_na)
        ),
        what_matters_for_this_product=_ids(deltas, *_ACTION),
        already_covered=_ids(deltas, DeltaType.ALREADY_COVERED),
        needs_review=_ids(deltas, DeltaType.NEEDS_REVIEW, DeltaType.CHANGED),
        not_relevant=_ids(deltas, DeltaType.NOT_APPLICABLE),
        unsupported_domains=unsupported,
    )
@@ -0,0 +1,92 @@
 """Regulatory Change Intelligence (RCI) — domain objects.
 RCI is a read-/reasoning layer ON TOP of the product-first pipeline. It answers
 "what changes relative to my existing Regulatory Map?" — NOT "what does the new
 law say in general". A RegulatoryChange is simulated/provided INPUT (no ingestion,
 no newsletter/mailbox, no RAG); the delta is computed against a stored
 ComplianceBaseline (snapshot of the map).
 `delta_type` is a THIRD vocabulary — distinct from `ClaimCoverage` (Welt 1, what
 the customer claims) and `ComplianceStatus` (Welt 2, verified evidence). The three
 must never be conflated. These are application/reasoning types, NOT
 compliance-meta-model classes (architecture freeze v1.0 untouched).
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List, Optional
 from pydantic import BaseModel, Field
 from compliance.profile.canonical import CanonicalProductRegulatoryProfile
 from compliance.reasoning.enums import AuthorityLevel, Confidence
 from compliance.regulatory_map.schemas import RegulatoryMap
 class DeltaType(str, Enum):
    NEW = "new"  # obligation now applies that was not in the baseline
    CHANGED = "changed"  # existing obligation substantively modified
    REMOVED = "removed"  # obligation no longer applies (repeal)
    ALREADY_COVERED = "already_covered"  # existing obligation, evidence likely suffices
    NEEDS_REVIEW = "needs_review"  # a human must check
    NOT_APPLICABLE = "not_applicable"  # change does not touch this product's map
 class ChangeType(str, Enum):
    NEW_REGULATION = "new_regulation"
    AMENDMENT = "amendment"
    REPEAL = "repeal"
    GUIDANCE_UPDATE = "guidance_update"
 # ── stored snapshot ──────────────────────────────────────────────────────
 class ComplianceBaseline(BaseModel):
    baseline_id: str
    product_profile_snapshot: CanonicalProductRegulatoryProfile
    regulatory_map_snapshot: RegulatoryMap
    applicable_obligations: List[str] = Field(default_factory=list)  # registry-linked obligation_ids
    # required evidence per obligation (derived) — to compute missing_evidence
    obligation_evidence_required: Dict[str, List[str]] = Field(default_factory=dict)
    # evidence the customer ALREADY has, per obligation (provided)
    evidence_refs: Dict[str, List[str]] = Field(default_factory=dict)
    created_at: Optional[str] = None
 # ── simulated/provided change (INPUT — never ingested) ───────────────────
 class RegulatoryChange(BaseModel):
    change_id: str
    source: str = "simulated"
    affected_regulations: List[str] = Field(default_factory=list)
    affected_obligations: List[str] = Field(default_factory=list)
    change_type: ChangeType
    effective_date: Optional[str] = None
    authority_level: AuthorityLevel = AuthorityLevel.LEGAL_TEXT
    summary: str = ""
 # ── per-obligation delta ─────────────────────────────────────────────────
 class ObligationDelta(BaseModel):
    obligation_id: str
    delta_type: DeltaType
    reason: str
    affected_evidence: List[str] = Field(default_factory=list)  # evidence already present for it
    missing_evidence: List[str] = Field(default_factory=list)  # required but not yet present
    confidence: Confidence
 # ── management-level summary ──────────────────────────────────────────────
 class ChangeImpactSummary(BaseModel):
    what_changed: str = ""
    what_matters_for_this_product: List[str] = Field(default_factory=list)  # need action
    already_covered: List[str] = Field(default_factory=list)
    needs_review: List[str] = Field(default_factory=list)
    not_relevant: List[str] = Field(default_factory=list)
    unsupported_domains: List[str] = Field(default_factory=list)
 class ChangeAssessment(BaseModel):
    change_id: str
    affects_product: bool
    deltas: List[ObligationDelta] = Field(default_factory=list)
    summary: ChangeImpactSummary
@@ -0,0 +1,27 @@
 """Regulatory Reasoning Engine.
 A deterministic reasoning layer ON TOP of the Legal Knowledge Graph (obligation
 registry) and the Compliance Execution Graph (control mapping / evidence). It
 answers, for a concrete product: which regulations apply, which obligations
 follow, whether the customer's implementation covers them, and whether a
 customer interpretation is legally sound.
 No new RAG, no new controls, no DB schema changes — scope & reasoning metamodel
 only (spec §14).
 """
 from __future__ import annotations
 from .claim_normalizer import normalize_claim
 from .implementation_engine import reason_implementation_claim
 from .interpretation_engine import assess_interpretation
 from .obligation_engine import derive_obligations
 from .scope_engine import discover_scope
 __all__ = [
    "discover_scope",
    "derive_obligations",
    "normalize_claim",
    "reason_implementation_claim",
    "assess_interpretation",
 ]
--- a/Show More
+++ b/Show More