Compare commits
136 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e54f3cde94 | |||
| 77459d06d6 | |||
| 3202e555ab | |||
| 978052b5a2 | |||
| 19931208a9 | |||
| c39787ad96 | |||
| b5b6cdddb3 | |||
| a4123ace71 | |||
| 3bb48f2147 | |||
| c2c8f7e424 | |||
| b70c1b7c37 | |||
| 9c33582412 | |||
| 23d977e26b | |||
| 88b83d4daf | |||
| 98d616d82b | |||
| 59b7006e5a | |||
| 2d2cb2a244 | |||
| 02c9fdb18e | |||
| 3ba90f49cf | |||
| 009083882a | |||
| a98076196b | |||
| afe5a98474 | |||
| 80f2e2f619 | |||
| 897e9464a7 | |||
| c160bb8291 | |||
| a2332fb13d | |||
| 90c3fe16b5 | |||
| e0d9816c99 | |||
| fbbd0957bd | |||
| 2805256c33 | |||
| cefacb87af | |||
| d0575d286f | |||
| 80bf1993e0 | |||
| 3c6e2a2acc | |||
| dbf7b9b587 | |||
| 5cba0504df | |||
| 77d6bc5551 | |||
| d196ad1cab | |||
| b71771e52e | |||
| 256bb0607d | |||
| ff9a66fb72 | |||
| 363c76d274 | |||
| dfb2c6dfdb | |||
| 16d6ad4122 | |||
| 3856bb3a4f | |||
| 0b962b41fa | |||
| b6c400902e | |||
| 98f67e75d9 | |||
| f652e2d4ed | |||
| ecae5bc7f1 | |||
| 23a6f02ec2 | |||
| 4a7412e4f2 | |||
| 0cb224a7f1 | |||
| d44f3672be | |||
| c98500c303 | |||
| 4efbfa45c4 | |||
| 86a783e72f | |||
| 1054facffa | |||
| 18f5d0cb05 | |||
| a2403eaed9 | |||
| 1a9439d013 | |||
| c737e1ad7d | |||
| 9c02c2c4a2 | |||
| c4e9ca8f4d | |||
| aa99111a87 | |||
| 0b0d262462 | |||
| 07e392913f | |||
| d51bcd77c7 | |||
| b6cfc0a503 | |||
| 1e1689f1f2 | |||
| 78f0ffa9de | |||
| 50d88d611d | |||
| cfafa31ea2 | |||
| ffff9bb592 | |||
| a0f72fc39b | |||
| 5fde7690a5 | |||
| 66be23f0c4 | |||
| caa9b8b609 | |||
| f78e03bd0a | |||
| 5412864705 | |||
| 0da093c046 | |||
| 3199d0d90e | |||
| 4bfd552da7 | |||
| cb18eac7ec | |||
| bea8559f78 | |||
| 81f8b56b48 | |||
| db2efe9f52 | |||
| 77de7e794c | |||
| fb4e14d9b9 | |||
| 07916df330 | |||
| 5e735e9e56 | |||
| 24fdde89c6 | |||
| f3d3255de1 | |||
| fe21c2f487 | |||
| e4695cf289 | |||
| d72dcbacfb | |||
| 4ad681741d | |||
| 88ca2b0b03 | |||
| 8a51db92ed | |||
| 16371f2909 | |||
| c7339e68df | |||
| 06efb9e61b | |||
| aaacec087c | |||
| ed64d92904 | |||
| 6ccc6c87c1 | |||
| 7eb7f61483 | |||
| 8c893ca783 | |||
| d1383227b2 | |||
| a5687bbc65 | |||
| da466b3821 | |||
| eca8ec43c5 | |||
| 37c9b8e773 | |||
| 50ae9e94d1 | |||
| 429ac957c1 | |||
| 9312ad18ef | |||
| 2063615d37 | |||
| 4d225f73a8 | |||
| c13aa9183a | |||
| 662aec209a | |||
| 8440ddfecb | |||
| 0ce4794767 | |||
| 8674b2cd9a | |||
| 80862e7073 | |||
| a8c61eb320 | |||
| 8f89fbf8a7 | |||
| 33790bb5e7 | |||
| 7287e989a6 | |||
| 63fe2d496e | |||
| 4e8eb2dc0e | |||
| 78aeedafae | |||
| 2e6eee6ba1 | |||
| f23ae32077 | |||
| 739a477d3f | |||
| 6673c8052b | |||
| 5e5002c883 | |||
| 1607c89459 |
@@ -1,4 +1,6 @@
|
|||||||
# Build stage
|
# Build stage
|
||||||
|
# ci-retrigger 2026-06-27: transient registry.meghsakha.com 502 on push (Runde 1) + last-build
|
||||||
|
# tag-bug skipped the rerun (Runde 2). No logic change — forces detect-changes to rebuild ai-sdk.
|
||||||
FROM golang:1.24-alpine AS builder
|
FROM golang:1.24-alpine AS builder
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ func main() {
|
|||||||
cmdEcho(os.Args[2:])
|
cmdEcho(os.Args[2:])
|
||||||
case "hierarchy":
|
case "hierarchy":
|
||||||
cmdHierarchy(os.Args[2:])
|
cmdHierarchy(os.Args[2:])
|
||||||
|
case "propose":
|
||||||
|
cmdPropose(os.Args[2:])
|
||||||
default:
|
default:
|
||||||
usage()
|
usage()
|
||||||
os.Exit(2)
|
os.Exit(2)
|
||||||
@@ -41,7 +43,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func usage() {
|
func usage() {
|
||||||
fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy> [args]")
|
fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy|propose> [args]")
|
||||||
}
|
}
|
||||||
|
|
||||||
func cmdReachability(_ []string) {
|
func cmdReachability(_ []string) {
|
||||||
|
|||||||
@@ -0,0 +1,188 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type narrativeInput struct {
|
||||||
|
MachineType string `json:"machine_type"`
|
||||||
|
Narrative string `json:"narrative"`
|
||||||
|
MachineTypes []string `json:"machine_types,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// cmdPropose — Method P: offline dedup-candidate proposer.
|
||||||
|
//
|
||||||
|
// iace-audit propose <narrative.json> [<ground-truth.json>]
|
||||||
|
//
|
||||||
|
// Detect near-duplicate patterns, screen survivors against a ground truth (if
|
||||||
|
// given), judge them (heuristic by default, LLM when enabled), and write the
|
||||||
|
// human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
|
||||||
|
// writes a report and never mutates the pattern library.
|
||||||
|
//
|
||||||
|
// Env:
|
||||||
|
//
|
||||||
|
// IACE_PROPOSE_THRESHOLD candidate score threshold (default 0.30)
|
||||||
|
// IACE_PROPOSE_LLM=1 use the offline LLM judge instead of the heuristic
|
||||||
|
// OLLAMA_URL ollama base URL (default http://localhost:11434)
|
||||||
|
// SELF_HOSTED_LLM_MODEL model name (default qwen2.5:32b-instruct)
|
||||||
|
func cmdPropose(args []string) {
|
||||||
|
if len(args) < 1 {
|
||||||
|
fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
var in narrativeInput
|
||||||
|
must(readJSONFile(args[0], &in))
|
||||||
|
if in.Narrative == "" {
|
||||||
|
fmt.Fprintln(os.Stderr, "propose: narrative is empty")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
var gt *iace.GroundTruth
|
||||||
|
if len(args) >= 2 {
|
||||||
|
var g iace.GroundTruth
|
||||||
|
must(readJSONFile(args[1], &g))
|
||||||
|
gt = &g
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
|
||||||
|
hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
|
||||||
|
candidates := iace.FindDedupCandidates(fired, threshold)
|
||||||
|
|
||||||
|
byID := make(map[string]iace.PatternMatch, len(fired))
|
||||||
|
for _, pm := range fired {
|
||||||
|
byID[pm.PatternID] = pm
|
||||||
|
}
|
||||||
|
|
||||||
|
judge := selectJudge(in.MachineType)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
var proposals []iace.JudgedProposal
|
||||||
|
blocked := 0
|
||||||
|
for _, c := range candidates {
|
||||||
|
var sr iace.ScreenResult
|
||||||
|
if gt != nil {
|
||||||
|
sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
|
||||||
|
if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
|
||||||
|
blocked++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
|
||||||
|
proposals = append(proposals, iace.JudgedProposal{
|
||||||
|
Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
|
||||||
|
writeJSON("audit-reports/proposals.json", proposals)
|
||||||
|
|
||||||
|
// Type 2: foreign-framing candidates (zone terms with no narrative echo).
|
||||||
|
framing := iace.FindFramingCandidates(fired, in.Narrative, envFloat("IACE_FRAMING_MIN_ORPHAN", 0.6))
|
||||||
|
writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
|
||||||
|
writeJSON("audit-reports/framing.json", framing)
|
||||||
|
|
||||||
|
// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
|
||||||
|
// names as a whole word, with a dominant shared required tag).
|
||||||
|
vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
|
||||||
|
var vgaps []audit.DictionarySuggestion
|
||||||
|
for _, s := range vocab.SuggestedDictionaryEntries {
|
||||||
|
if len(s.SuggestedTags) > 0 {
|
||||||
|
vgaps = append(vgaps, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
|
||||||
|
writeJSON("audit-reports/vocab.json", vgaps)
|
||||||
|
|
||||||
|
// Type 4: coverage blind-spots (empty ISO 12100 groups A-G) + LLM expansion.
|
||||||
|
gaps := iace.FindCoverageGaps(hazards)
|
||||||
|
var missing []iace.MissingHazard
|
||||||
|
if lj, ok := judge.(iace.LLMJudge); ok {
|
||||||
|
missing = iace.ProposeMissingHazards(ctx, lj.Completer, in.MachineType, in.Narrative, hazards, gaps)
|
||||||
|
}
|
||||||
|
writeText("audit-reports/coverage.md", iace.RenderCoverageQueue(in.MachineType, gaps, missing))
|
||||||
|
writeJSON("audit-reports/coverage.json", gaps)
|
||||||
|
|
||||||
|
printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
|
||||||
|
"fired_patterns": len(fired),
|
||||||
|
"candidates": len(candidates),
|
||||||
|
"in_queue": len(proposals),
|
||||||
|
"gt_blocked": blocked,
|
||||||
|
"framing_flags": len(framing),
|
||||||
|
"vocab_gaps": len(vgaps),
|
||||||
|
"coverage_gaps": len(gaps),
|
||||||
|
})
|
||||||
|
if gt == nil {
|
||||||
|
fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func selectJudge(machineClass string) iace.CandidateJudge {
|
||||||
|
if os.Getenv("IACE_PROPOSE_LLM") != "1" {
|
||||||
|
return iace.HeuristicJudge{}
|
||||||
|
}
|
||||||
|
base := envStr("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
|
||||||
|
reg := llm.NewProviderRegistry("ollama", "")
|
||||||
|
reg.Register(llm.NewOllamaAdapter(base, model))
|
||||||
|
fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
|
||||||
|
return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
|
||||||
|
}
|
||||||
|
|
||||||
|
func readJSONFile(path string, v any) error {
|
||||||
|
raw, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return json.Unmarshal(raw, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeText(path, content string) {
|
||||||
|
_ = os.MkdirAll("audit-reports", 0o755)
|
||||||
|
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Println("→ wrote", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func envStr(key, def string) string {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
|
func envFloat(key string, def float64) float64 {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
if f, err := strconv.ParseFloat(v, 64); err == nil {
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
|
||||||
|
fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
|
||||||
|
for i, s := range entries {
|
||||||
|
tag := "<tag>"
|
||||||
|
if len(s.SuggestedTags) > 0 {
|
||||||
|
tag = s.SuggestedTags[0]
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, "## %d. \"%s\" → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
|
||||||
|
fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
|
||||||
|
fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
@@ -36,6 +36,10 @@ type DictionarySuggestion struct {
|
|||||||
Token string `json:"token"`
|
Token string `json:"token"`
|
||||||
Field string `json:"field"`
|
Field string `json:"field"`
|
||||||
PatternIDs []string `json:"pattern_ids"`
|
PatternIDs []string `json:"pattern_ids"`
|
||||||
|
// SuggestedTags are the RequiredComponentTags shared by the naming patterns,
|
||||||
|
// ranked by frequency — the candidate tags a keyword_dictionary entry for this
|
||||||
|
// token would emit so narratives mentioning it can trigger those patterns.
|
||||||
|
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type VocabularyReport struct {
|
type VocabularyReport struct {
|
||||||
|
|||||||
@@ -66,14 +66,19 @@ func runVocabulary(form map[string]any) VocabularyReport {
|
|||||||
|
|
||||||
// For each unknown token check if any pattern names it
|
// For each unknown token check if any pattern names it
|
||||||
patterns := iace.AllPatterns()
|
patterns := iace.AllPatterns()
|
||||||
|
byID := make(map[string]iace.HazardPattern, len(patterns))
|
||||||
|
for _, p := range patterns {
|
||||||
|
byID[p.ID] = p
|
||||||
|
}
|
||||||
for _, tok := range report.UnknownTokens {
|
for _, tok := range report.UnknownTokens {
|
||||||
hits := patternsMentioning(tok, patterns)
|
hits := patternsMentioning(tok, patterns)
|
||||||
if len(hits) == 0 {
|
if len(hits) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
|
report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
|
||||||
Token: tok,
|
Token: tok,
|
||||||
PatternIDs: hits,
|
PatternIDs: hits,
|
||||||
|
SuggestedTags: suggestTagsFor(hits, byID),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
|
sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
|
||||||
@@ -129,18 +134,24 @@ func dictTokenHit(tok string, dict map[string]bool) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
|
// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/harm/
|
||||||
// harm/zone text contains the token (case-insensitive substring).
|
// zone text names the token as a WHOLE WORD. Whole-word (not substring) matching
|
||||||
|
// is essential: a substring match flags common fragments like "stehen" inside
|
||||||
|
// "entstehen", producing spurious hits and nonsensical tag suggestions.
|
||||||
func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
|
func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
|
||||||
tokLower := strings.ToLower(tok)
|
tokLower := strings.ToLower(tok)
|
||||||
seen := map[string]bool{}
|
seen := map[string]bool{}
|
||||||
var out []string
|
var out []string
|
||||||
for _, p := range patterns {
|
for _, p := range patterns {
|
||||||
hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
|
hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
|
||||||
if !strings.Contains(hay, tokLower) {
|
matched := false
|
||||||
continue
|
for _, w := range tokenRE.FindAllString(hay, -1) {
|
||||||
|
if w == tokLower {
|
||||||
|
matched = true
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if seen[p.ID] {
|
if !matched || seen[p.ID] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[p.ID] = true
|
seen[p.ID] = true
|
||||||
@@ -151,3 +162,57 @@ func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
|
|||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// suggestTagsFor returns the RequiredComponentTags shared across the naming
|
||||||
|
// patterns, ranked by how many of them require each tag (ties broken by name),
|
||||||
|
// top 3. These are the candidate tags a dictionary entry for the token should
|
||||||
|
// emit so a narrative mentioning the token can trigger those patterns.
|
||||||
|
func suggestTagsFor(ids []string, byID map[string]iace.HazardPattern) []string {
|
||||||
|
freq := map[string]int{}
|
||||||
|
total := 0
|
||||||
|
for _, id := range ids {
|
||||||
|
p, ok := byID[id]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
total++
|
||||||
|
seen := map[string]bool{}
|
||||||
|
for _, tag := range p.RequiredComponentTags {
|
||||||
|
if seen[tag] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[tag] = true
|
||||||
|
freq[tag]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if total == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
type tf struct {
|
||||||
|
tag string
|
||||||
|
n int
|
||||||
|
}
|
||||||
|
ranked := make([]tf, 0, len(freq))
|
||||||
|
for t, n := range freq {
|
||||||
|
ranked = append(ranked, tf{t, n})
|
||||||
|
}
|
||||||
|
sort.Slice(ranked, func(i, j int) bool {
|
||||||
|
if ranked[i].n != ranked[j].n {
|
||||||
|
return ranked[i].n > ranked[j].n
|
||||||
|
}
|
||||||
|
return ranked[i].tag < ranked[j].tag
|
||||||
|
})
|
||||||
|
// Only suggest a tag shared by >= 40% of the naming patterns. Diffuse tokens
|
||||||
|
// (common verbs spread across categories) get no dominant tag and are dropped.
|
||||||
|
var out []string
|
||||||
|
for _, x := range ranked {
|
||||||
|
if float64(x.n)/float64(total) < 0.4 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
out = append(out, x.tag)
|
||||||
|
if len(out) >= 3 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
package audit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) {
|
||||||
|
byID := map[string]iace.HazardPattern{
|
||||||
|
"P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}},
|
||||||
|
"P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}},
|
||||||
|
"P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}},
|
||||||
|
}
|
||||||
|
got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID)
|
||||||
|
if len(got) == 0 || got[0] != "backflow_risk" {
|
||||||
|
t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) {
|
||||||
|
byID := map[string]iace.HazardPattern{
|
||||||
|
"P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}},
|
||||||
|
}
|
||||||
|
got := suggestTagsFor([]string{"P1"}, byID)
|
||||||
|
if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" {
|
||||||
|
t.Fatalf("want stable alpha top-3 [a b c], got %v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) {
|
||||||
|
byID := map[string]iace.HazardPattern{}
|
||||||
|
if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 {
|
||||||
|
t.Fatalf("want empty for unknown patterns, got %v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,8 +7,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
|
// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
|
||||||
@@ -110,65 +108,6 @@ func TestKistenhub_GTCoverage(t *testing.T) {
|
|||||||
// patternsToHazardsAndMitigations converts a pattern match output into the
|
// patternsToHazardsAndMitigations converts a pattern match output into the
|
||||||
// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
|
// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
|
||||||
// iace_handler_init.go does in production but without DB writes.
|
// iace_handler_init.go does in production but without DB writes.
|
||||||
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
|
|
||||||
hazards := make([]Hazard, 0, len(out.MatchedPatterns))
|
|
||||||
patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
|
|
||||||
|
|
||||||
for _, pm := range out.MatchedPatterns {
|
|
||||||
cat := ""
|
|
||||||
if len(pm.HazardCats) > 0 {
|
|
||||||
cat = pm.HazardCats[0]
|
|
||||||
}
|
|
||||||
zone := pm.ZoneDE
|
|
||||||
lifecycle := ""
|
|
||||||
if len(pm.ApplicableLifecycles) > 0 {
|
|
||||||
lifecycle = pm.ApplicableLifecycles[0]
|
|
||||||
}
|
|
||||||
h := Hazard{
|
|
||||||
ID: uuid.New(),
|
|
||||||
Name: pm.ScenarioDE,
|
|
||||||
Category: cat,
|
|
||||||
Description: pm.ScenarioDE,
|
|
||||||
Scenario: pm.ScenarioDE,
|
|
||||||
TriggerEvent: pm.TriggerDE,
|
|
||||||
PossibleHarm: pm.HarmDE,
|
|
||||||
AffectedPerson: pm.AffectedDE,
|
|
||||||
HazardousZone: zone,
|
|
||||||
LifecyclePhase: lifecycle,
|
|
||||||
}
|
|
||||||
if h.Name == "" {
|
|
||||||
h.Name = pm.PatternName
|
|
||||||
}
|
|
||||||
hazards = append(hazards, h)
|
|
||||||
patternToHazard[pm.PatternID] = h.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
measureNames := make(map[string]string)
|
|
||||||
for _, m := range GetProtectiveMeasureLibrary() {
|
|
||||||
measureNames[m.ID] = m.Name
|
|
||||||
}
|
|
||||||
|
|
||||||
var mitigations []Mitigation
|
|
||||||
for _, sm := range out.SuggestedMeasures {
|
|
||||||
name := measureNames[sm.MeasureID]
|
|
||||||
if name == "" {
|
|
||||||
name = sm.MeasureID
|
|
||||||
}
|
|
||||||
for _, srcPattern := range sm.SourcePatterns {
|
|
||||||
hid, ok := patternToHazard[srcPattern]
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
mitigations = append(mitigations, Mitigation{
|
|
||||||
ID: uuid.New(),
|
|
||||||
HazardID: hid,
|
|
||||||
Name: name,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hazards, mitigations
|
|
||||||
}
|
|
||||||
|
|
||||||
func abbrev(s string, max int) string {
|
func abbrev(s string, max int) string {
|
||||||
if len(s) <= max {
|
if len(s) <= max {
|
||||||
return s
|
return s
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package iace
|
package iace
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -45,7 +46,7 @@ var warewashingCyberCategories = map[string]bool{
|
|||||||
|
|
||||||
// warewashingEngineOutput runs the production chain and returns the filtered
|
// warewashingEngineOutput runs the production chain and returns the filtered
|
||||||
// hazards/mitigations the user would see for the UC-M.
|
// hazards/mitigations the user would see for the UC-M.
|
||||||
func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
|
func warewashingEngineOutput() ([]Hazard, []Mitigation, []PatternMatch) {
|
||||||
res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
|
res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
|
||||||
|
|
||||||
var compIDs, compNames []string
|
var compIDs, compNames []string
|
||||||
@@ -94,7 +95,7 @@ func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
|
|||||||
filtered := *out
|
filtered := *out
|
||||||
filtered.MatchedPatterns = kept
|
filtered.MatchedPatterns = kept
|
||||||
hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
|
hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
|
||||||
return hazards, mitigations, len(kept)
|
return hazards, mitigations, kept
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWarewashing_GTCoverage(t *testing.T) {
|
func TestWarewashing_GTCoverage(t *testing.T) {
|
||||||
@@ -119,8 +120,8 @@ func TestWarewashing_GTCoverage(t *testing.T) {
|
|||||||
t.Logf("Parsed components: %v", cn)
|
t.Logf("Parsed components: %v", cn)
|
||||||
}
|
}
|
||||||
|
|
||||||
hazards, mitigations, nPatterns := warewashingEngineOutput()
|
hazards, mitigations, keptPatterns := warewashingEngineOutput()
|
||||||
t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", nPatterns, len(hazards))
|
t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", len(keptPatterns), len(hazards))
|
||||||
|
|
||||||
result := CompareBenchmark(>, hazards, mitigations)
|
result := CompareBenchmark(>, hazards, mitigations)
|
||||||
precision := 0.0
|
precision := 0.0
|
||||||
@@ -180,3 +181,57 @@ func TestWarewashing_GTCoverage(t *testing.T) {
|
|||||||
t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
|
t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestWarewashing_DedupProposer exercises the offline dedup-candidate proposer
|
||||||
|
// end-to-end on the real warewashing engine output: detect candidates, screen
|
||||||
|
// each against the GT, and log the human-review queue. It asserts the WALL is
|
||||||
|
// self-consistent — a PASS verdict may never coincide with a recall drop.
|
||||||
|
func TestWarewashing_DedupProposer(t *testing.T) {
|
||||||
|
raw, err := os.ReadFile(filepath.Join("testdata", "ground_truth_warewashing.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read GT: %v", err)
|
||||||
|
}
|
||||||
|
var gt GroundTruth
|
||||||
|
if err := json.Unmarshal(raw, >); err != nil {
|
||||||
|
t.Fatalf("parse GT: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
hazards, mits, kept := warewashingEngineOutput()
|
||||||
|
byID := map[string]PatternMatch{}
|
||||||
|
for _, pm := range kept {
|
||||||
|
byID[pm.PatternID] = pm
|
||||||
|
}
|
||||||
|
// 0.25 is a deliberately permissive candidate threshold: the proposer is meant
|
||||||
|
// to over-surface, because the deterministic GT wall below (and a human, and the
|
||||||
|
// LLM judge) is the precision filter — not the detector.
|
||||||
|
candidates := FindDedupCandidates(kept, 0.25)
|
||||||
|
t.Logf("Proposer: %d dedup candidate(s) from %d fired patterns", len(candidates), len(kept))
|
||||||
|
|
||||||
|
// Deterministic judge in the test; the dev-time CLI swaps in LLMJudge.
|
||||||
|
judge := HeuristicJudge{}
|
||||||
|
var judged []JudgedProposal
|
||||||
|
blocked := 0
|
||||||
|
for _, c := range candidates {
|
||||||
|
sr := ScreenSupersession(>, hazards, mits, c.KeepHazardName, c.DropName)
|
||||||
|
switch {
|
||||||
|
case sr.RecallAfter < sr.RecallBefore:
|
||||||
|
t.Logf("[BLOCK recall-load-bearing] keep %s / drop %s", c.KeepPattern, c.DropPattern)
|
||||||
|
blocked++
|
||||||
|
case sr.DistinctGT:
|
||||||
|
t.Logf("[BLOCK distinct GT %s vs %s] keep %s / drop %s", sr.KeepGT, sr.DropGT, c.KeepPattern, c.DropPattern)
|
||||||
|
blocked++
|
||||||
|
default:
|
||||||
|
if !sr.Safe {
|
||||||
|
t.Errorf("RECALL-SAFE branch but ScreenResult.Safe=false for drop %s", c.DropPattern)
|
||||||
|
}
|
||||||
|
v, conf, rat := judge.Judge(context.Background(), c, byID[c.KeepPattern], byID[c.DropPattern])
|
||||||
|
judged = append(judged, JudgedProposal{
|
||||||
|
Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("\n%s", RenderProposalQueue("Gewerbliche Geschirrspuelmaschine (vernetzt)", judged))
|
||||||
|
t.Logf("Proposer summary: %d candidate(s) in queue (judge=%s), %d BLOCKED by the GT wall — propose-only, nothing auto-applied",
|
||||||
|
len(judged), judge.Name(), blocked)
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,50 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "sort"
|
||||||
|
|
||||||
|
// EN ISO 12100 hazard-group ordering for the hazard log. Without it the log is
|
||||||
|
// returned in pattern-firing order, which reads as a jumble. This groups the
|
||||||
|
// hazards top-down by type (A. Mechanisch, B. Elektrisch, C. Thermisch, …),
|
||||||
|
// matching the frontend CATEGORY_LABELS.
|
||||||
|
var isoCategoryRank = map[string]int{
|
||||||
|
// A. Mechanisch
|
||||||
|
"mechanical_hazard": 10, "mechanical": 10, "maintenance_hazard": 11,
|
||||||
|
// B. Elektrisch
|
||||||
|
"electrical_hazard": 20, "electrical": 20, "emc_hazard": 21,
|
||||||
|
// C. Thermisch
|
||||||
|
"thermal_hazard": 30, "thermal": 30, "high_temperature": 31, "fire_explosion": 32,
|
||||||
|
// D. Pneumatik / Hydraulik
|
||||||
|
"pneumatic_hydraulic": 40,
|
||||||
|
// E. Laerm / Vibration
|
||||||
|
"noise_hazard": 50, "noise_vibration": 50, "vibration_hazard": 51,
|
||||||
|
// F. Ergonomie
|
||||||
|
"ergonomic_hazard": 60, "ergonomic": 60,
|
||||||
|
// G. Stoffe / Umwelt
|
||||||
|
"material_environmental": 70, "chemical_risk": 71, "radiation_hazard": 72,
|
||||||
|
// H. Software / Steuerung (funktionale Sicherheit)
|
||||||
|
"software_control": 80, "software_fault": 80, "safety_function_failure": 81,
|
||||||
|
"configuration_error": 82, "sensor_fault": 83, "hmi_error": 84, "mode_confusion": 85,
|
||||||
|
"communication_failure": 86, "update_failure": 87,
|
||||||
|
// I. Cyber / Netzwerk (zur Ordnungs-Vollstaendigkeit; im CE-Log ausgeschlossen)
|
||||||
|
"unauthorized_access": 90, "firmware_corruption": 91, "cyber_resilience": 92,
|
||||||
|
"cyber_network": 93, "logging_audit_failure": 94, "sensor_spoofing": 95,
|
||||||
|
// J. KI-spezifisch
|
||||||
|
"ai_specific": 100, "ai_misclassification": 100, "false_classification": 100,
|
||||||
|
"model_drift": 100, "data_poisoning": 100, "unintended_bias": 100,
|
||||||
|
}
|
||||||
|
|
||||||
|
func categoryRank(cat string) int {
|
||||||
|
if r, ok := isoCategoryRank[cat]; ok {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 999 // unknown categories last
|
||||||
|
}
|
||||||
|
|
||||||
|
// SortHazardsByISO12100 groups hazards by ISO 12100 hazard group. Stable: the
|
||||||
|
// relative order within a group (creation/priority order from the engine) is
|
||||||
|
// preserved.
|
||||||
|
func SortHazardsByISO12100(hazards []Hazard) {
|
||||||
|
sort.SliceStable(hazards, func(i, j int) bool {
|
||||||
|
return categoryRank(hazards[i].Category) < categoryRank(hazards[j].Category)
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -157,7 +157,7 @@ func GetGTBremseHazardPatterns() []HazardPattern {
|
|||||||
// ════════════════════════════════════════════════════════════════
|
// ════════════════════════════════════════════════════════════════
|
||||||
{
|
{
|
||||||
ID: "HP1717", NameDE: "Verletzung durch unvermittelt austretende pneumatische Restenergie", NameEN: "Injury from unexpectedly released pneumatic stored energy",
|
ID: "HP1717", NameDE: "Verletzung durch unvermittelt austretende pneumatische Restenergie", NameEN: "Injury from unexpectedly released pneumatic stored energy",
|
||||||
RequiredComponentTags: []string{"stored_energy"},
|
RequiredComponentTags: []string{"pneumatic_part"},
|
||||||
RequiredEnergyTags: []string{"pneumatic_pressure"},
|
RequiredEnergyTags: []string{"pneumatic_pressure"},
|
||||||
GeneratedHazardCats: []string{"mechanical_hazard"},
|
GeneratedHazardCats: []string{"mechanical_hazard"},
|
||||||
SuggestedMeasureIDs: []string{"M485", "M534", "M527"},
|
SuggestedMeasureIDs: []string{"M485", "M534", "M527"},
|
||||||
|
|||||||
@@ -375,7 +375,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
|
|||||||
// ================================================================
|
// ================================================================
|
||||||
{
|
{
|
||||||
ID: "HP753", NameDE: "Thermal Runaway bei Lithium-Batterie", NameEN: "Thermal runaway of lithium battery",
|
ID: "HP753", NameDE: "Thermal Runaway bei Lithium-Batterie", NameEN: "Thermal runaway of lithium battery",
|
||||||
RequiredComponentTags: []string{"stored_energy", "high_temperature"},
|
RequiredComponentTags: []string{"battery", "high_temperature"},
|
||||||
RequiredEnergyTags: []string{"electrical_energy", "thermal"},
|
RequiredEnergyTags: []string{"electrical_energy", "thermal"},
|
||||||
GeneratedHazardCats: []string{"thermal_hazard", "electrical_hazard"},
|
GeneratedHazardCats: []string{"thermal_hazard", "electrical_hazard"},
|
||||||
SuggestedMeasureIDs: []string{"M005", "M141"},
|
SuggestedMeasureIDs: []string{"M005", "M141"},
|
||||||
@@ -390,7 +390,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP754", NameDE: "Ausgasung giftiger Daempfe aus Batterie", NameEN: "Toxic gas emission from battery",
|
ID: "HP754", NameDE: "Ausgasung giftiger Daempfe aus Batterie", NameEN: "Toxic gas emission from battery",
|
||||||
RequiredComponentTags: []string{"stored_energy", "chemical_risk"},
|
RequiredComponentTags: []string{"battery", "chemical_risk"},
|
||||||
RequiredEnergyTags: []string{},
|
RequiredEnergyTags: []string{},
|
||||||
GeneratedHazardCats: []string{"material_environmental"},
|
GeneratedHazardCats: []string{"material_environmental"},
|
||||||
SuggestedMeasureIDs: []string{"M005", "M141"},
|
SuggestedMeasureIDs: []string{"M005", "M141"},
|
||||||
@@ -405,7 +405,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP755", NameDE: "Elektrischer Schlag an Hochvolt-Batteriespeicher", NameEN: "Electric shock from high-voltage battery storage",
|
ID: "HP755", NameDE: "Elektrischer Schlag an Hochvolt-Batteriespeicher", NameEN: "Electric shock from high-voltage battery storage",
|
||||||
RequiredComponentTags: []string{"stored_energy", "electrical_part"},
|
RequiredComponentTags: []string{"battery", "electrical_part"},
|
||||||
RequiredEnergyTags: []string{"electrical_energy"},
|
RequiredEnergyTags: []string{"electrical_energy"},
|
||||||
GeneratedHazardCats: []string{"electrical_hazard"},
|
GeneratedHazardCats: []string{"electrical_hazard"},
|
||||||
SuggestedMeasureIDs: []string{"M082", "M141"},
|
SuggestedMeasureIDs: []string{"M082", "M141"},
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ func GetKeywordDictionary() []KeywordEntry {
|
|||||||
{Keywords: []string{"kreiselmaeher", "scheibenmaeher", "maehwerk"}, ExtraTags: []string{"agri_mower"}},
|
{Keywords: []string{"kreiselmaeher", "scheibenmaeher", "maehwerk"}, ExtraTags: []string{"agri_mower"}},
|
||||||
{Keywords: []string{"spruehduese", "spritzduese", "spruehkopf"}, ExtraTags: []string{"spray_nozzle"}},
|
{Keywords: []string{"spruehduese", "spritzduese", "spruehkopf"}, ExtraTags: []string{"spray_nozzle"}},
|
||||||
{Keywords: []string{"galvanikbad", "tauchbad", "beizbad", "chemiebad"}, ExtraTags: []string{"chemical_bath"}},
|
{Keywords: []string{"galvanikbad", "tauchbad", "beizbad", "chemiebad"}, ExtraTags: []string{"chemical_bath"}},
|
||||||
{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie"}, ExtraTags: []string{"battery"}},
|
{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie", "lithium", "batteriespeicher", "hochvoltbatterie", "lithium-batterie"}, ExtraTags: []string{"battery"}},
|
||||||
{Keywords: []string{"heizelement", "heizpatrone", "heizband"}, ExtraTags: []string{"heating_element"}},
|
{Keywords: []string{"heizelement", "heizpatrone", "heizband"}, ExtraTags: []string{"heating_element"}},
|
||||||
{Keywords: []string{"uv-lampe", "uv-strahler", "uv-c-strahler"}, ExtraTags: []string{"uv_source"}},
|
{Keywords: []string{"uv-lampe", "uv-strahler", "uv-c-strahler"}, ExtraTags: []string{"uv_source"}},
|
||||||
{Keywords: []string{"roentgen", "radioaktiv", "strahlenquelle", "gammastrahl", "isotop"}, ExtraTags: []string{"radiation_source"}},
|
{Keywords: []string{"roentgen", "radioaktiv", "strahlenquelle", "gammastrahl", "isotop"}, ExtraTags: []string{"radiation_source"}},
|
||||||
|
|||||||
@@ -42,3 +42,29 @@ func guardedLifecycles(p HazardPattern, tagSet map[string]bool) []string {
|
|||||||
}
|
}
|
||||||
return p.ApplicableLifecycles
|
return p.ApplicableLifecycles
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Domain-specific supersession.
|
||||||
|
//
|
||||||
|
// A generic pattern that fires via a broad tag (e.g. high_temperature) can
|
||||||
|
// duplicate a domain-specific pattern that describes the same hazard more
|
||||||
|
// precisely. When the domain is present, the specific pattern wins and the
|
||||||
|
// generic duplicate is dropped. Scoped to the domain tag, so machines outside
|
||||||
|
// the domain keep the generic pattern — regression-safe by construction.
|
||||||
|
//
|
||||||
|
// HP016 (generic hot surfaces) -> HP2201 (Boiler/Tank/Spuelkammer)
|
||||||
|
// HP018 (actuator burn) -> HP2201 (same contact-burn hazard)
|
||||||
|
// HP013 (stored electrical NRG) -> HP144 (residual voltage; HP013's zone is
|
||||||
|
// framed for Batteriefaecher/USV-Anlagen a
|
||||||
|
// dishwasher does not have, HP144 is the
|
||||||
|
// Frequenzumrichter/Zwischenkreis variant)
|
||||||
|
var genericSupersededByWarewashing = map[string]bool{
|
||||||
|
"HP016": true,
|
||||||
|
"HP018": true,
|
||||||
|
"HP013": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// supersededByDomainSpecific reports whether a generic pattern is replaced by a
|
||||||
|
// more precise equivalent that the project's domain already provides.
|
||||||
|
func supersededByDomainSpecific(p HazardPattern, tagSet map[string]bool) bool {
|
||||||
|
return tagSet["dom_warewashing"] && genericSupersededByWarewashing[p.ID]
|
||||||
|
}
|
||||||
|
|||||||
@@ -416,6 +416,11 @@ func patternMatches(p HazardPattern, tagSet map[string]bool, input MatchInput) b
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Domain-specific supersession (generic duplicate replaced by a precise one).
|
||||||
|
if supersededByDomainSpecific(p, tagSet) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,143 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Coverage blind-spot proposer (P2 slice 6, type 4). DEV-TIME, propose-only.
|
||||||
|
//
|
||||||
|
// Deterministic skeleton: which EN ISO 12100 hazard groups (A-G, the classic CE
|
||||||
|
// groups; H-J are control/CRA and routinely routed elsewhere) did the engine
|
||||||
|
// leave with ZERO hazards for this machine? An empty group is a structural
|
||||||
|
// blind-spot signal — the machine may genuinely lack that hazard, or a pattern
|
||||||
|
// may be missing. The LLM then expands each gap into specific expected-but-missing
|
||||||
|
// hazards a safety assessor would name, for a human to confirm into a new pattern
|
||||||
|
// or GT case. The gaps alone are useful without any model.
|
||||||
|
|
||||||
|
type isoGroup struct {
|
||||||
|
Key string
|
||||||
|
Label string
|
||||||
|
Cats []string
|
||||||
|
}
|
||||||
|
|
||||||
|
var iso12100Groups = []isoGroup{
|
||||||
|
{"mechanical", "A. Mechanisch", []string{"mechanical_hazard", "mechanical", "maintenance_hazard"}},
|
||||||
|
{"electrical", "B. Elektrisch", []string{"electrical_hazard", "electrical", "emc_hazard"}},
|
||||||
|
{"thermal", "C. Thermisch", []string{"thermal_hazard", "thermal", "high_temperature", "fire_explosion"}},
|
||||||
|
{"pneumatic_hydraulic", "D. Pneumatik/Hydraulik", []string{"pneumatic_hydraulic"}},
|
||||||
|
{"noise_vibration", "E. Laerm/Vibration", []string{"noise_hazard", "noise_vibration", "vibration_hazard"}},
|
||||||
|
{"ergonomic", "F. Ergonomie", []string{"ergonomic_hazard", "ergonomic"}},
|
||||||
|
{"material", "G. Stoffe/Umwelt", []string{"material_environmental", "chemical_risk", "radiation_hazard"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
// CoverageGap is an ISO 12100 hazard group with no engine hazard.
|
||||||
|
type CoverageGap struct {
|
||||||
|
Group string `json:"group"`
|
||||||
|
Key string `json:"key"`
|
||||||
|
Note string `json:"note"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindCoverageGaps returns the A-G hazard groups that produced zero hazards.
|
||||||
|
func FindCoverageGaps(hazards []Hazard) []CoverageGap {
|
||||||
|
present := make(map[string]bool, len(hazards))
|
||||||
|
for _, h := range hazards {
|
||||||
|
present[h.Category] = true
|
||||||
|
}
|
||||||
|
var gaps []CoverageGap
|
||||||
|
for _, g := range iso12100Groups {
|
||||||
|
covered := false
|
||||||
|
for _, c := range g.Cats {
|
||||||
|
if present[c] {
|
||||||
|
covered = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !covered {
|
||||||
|
gaps = append(gaps, CoverageGap{
|
||||||
|
Group: g.Label, Key: g.Key,
|
||||||
|
Note: "no engine hazard in this ISO 12100 group — verify the machine truly lacks it, or a pattern is missing",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return gaps
|
||||||
|
}
|
||||||
|
|
||||||
|
// MissingHazard is an LLM-proposed hazard a safety assessor would expect.
|
||||||
|
type MissingHazard struct {
|
||||||
|
Group string `json:"group"`
|
||||||
|
Hazard string `json:"hazard"`
|
||||||
|
Why string `json:"why"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ProposeMissingHazards asks the LLM to expand the empty groups into specific
|
||||||
|
// expected hazards. Returns nil without a completer or on any error — propose-only,
|
||||||
|
// never breaks the run.
|
||||||
|
func ProposeMissingHazards(ctx context.Context, completer LLMCompleter, machineClass, narrative string, produced []Hazard, gaps []CoverageGap) []MissingHazard {
|
||||||
|
if completer == nil || len(gaps) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
system, user := BuildCoveragePrompt(machineClass, narrative, produced, gaps)
|
||||||
|
raw, err := completer.Complete(ctx, system, user)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return parseMissingHazards(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildCoveragePrompt frames the "what is missing?" question for the LLM.
|
||||||
|
func BuildCoveragePrompt(machineClass, narrative string, produced []Hazard, gaps []CoverageGap) (system, user string) {
|
||||||
|
system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
|
||||||
|
"Dir werden eine Maschine, die bereits erkannten Gefaehrdungen und Gefaehrdungsgruppen OHNE Eintrag genannt. " +
|
||||||
|
"Nenne nur Gefaehrdungen, die ein Sachverstaendiger fuer DIESE Maschine ERWARTET, die aber FEHLEN. " +
|
||||||
|
"Erfinde nichts Maschinenfremdes. Antworte AUSSCHLIESSLICH als JSON-Array: " +
|
||||||
|
`[{"group":"...","hazard":"...","why":"..."}].`
|
||||||
|
|
||||||
|
var have []string
|
||||||
|
seen := map[string]bool{}
|
||||||
|
for _, h := range produced {
|
||||||
|
if h.Category != "" && !seen[h.Category] {
|
||||||
|
seen[h.Category] = true
|
||||||
|
have = append(have, h.Category)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var empty []string
|
||||||
|
for _, g := range gaps {
|
||||||
|
empty = append(empty, g.Group)
|
||||||
|
}
|
||||||
|
user = fmt.Sprintf("Maschinenklasse: %s\n\nBeschreibung:\n%s\n\nBereits erkannte Kategorien: %s\n\nGruppen OHNE Eintrag (Fokus): %s\n\nWelche erwarteten Gefaehrdungen fehlen?",
|
||||||
|
machineClass, narrative, strings.Join(have, ", "), strings.Join(empty, ", "))
|
||||||
|
return system, user
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseMissingHazards(raw string) []MissingHazard {
|
||||||
|
start, end := strings.Index(raw, "["), strings.LastIndex(raw, "]")
|
||||||
|
if start < 0 || end <= start {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var out []MissingHazard
|
||||||
|
if err := json.Unmarshal([]byte(raw[start:end+1]), &out); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// RenderCoverageQueue renders the deterministic gaps plus any LLM-proposed missing
|
||||||
|
// hazards as a markdown review queue.
|
||||||
|
func RenderCoverageQueue(machine string, gaps []CoverageGap, missing []MissingHazard) string {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# Coverage blind-spot queue — %s\n\n", machine)
|
||||||
|
fmt.Fprintf(&b, "%d ISO 12100 group(s) (A-G) have no engine hazard. Propose-only — a human confirms whether the machine truly lacks it or a pattern/GT case is missing.\n\n", len(gaps))
|
||||||
|
for _, g := range gaps {
|
||||||
|
fmt.Fprintf(&b, "- **%s** — %s\n", g.Group, g.Note)
|
||||||
|
}
|
||||||
|
if len(missing) > 0 {
|
||||||
|
fmt.Fprintf(&b, "\n## LLM-proposed expected-but-missing hazards (%d)\n\n", len(missing))
|
||||||
|
for i, m := range missing {
|
||||||
|
fmt.Fprintf(&b, "%d. [%s] %s\n - why: %s\n", i+1, m.Group, m.Hazard, m.Why)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFindCoverageGaps(t *testing.T) {
|
||||||
|
hazards := []Hazard{
|
||||||
|
{Category: "mechanical_hazard"},
|
||||||
|
{Category: "thermal_hazard"},
|
||||||
|
{Category: "electrical_hazard"},
|
||||||
|
{Category: "material_environmental"},
|
||||||
|
}
|
||||||
|
gapKeys := map[string]bool{}
|
||||||
|
for _, g := range FindCoverageGaps(hazards) {
|
||||||
|
gapKeys[g.Key] = true
|
||||||
|
}
|
||||||
|
for _, want := range []string{"pneumatic_hydraulic", "noise_vibration", "ergonomic"} {
|
||||||
|
if !gapKeys[want] {
|
||||||
|
t.Errorf("expected gap %s", want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, notWant := range []string{"mechanical", "thermal", "electrical", "material"} {
|
||||||
|
if gapKeys[notWant] {
|
||||||
|
t.Errorf("did not expect gap %s (covered)", notWant)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildCoveragePrompt_ContainsContext(t *testing.T) {
|
||||||
|
produced := []Hazard{{Category: "thermal_hazard"}}
|
||||||
|
gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
|
||||||
|
system, user := BuildCoveragePrompt("Geschirrspuelmaschine", "Eine Spuelmaschine mit Tank.", produced, gaps)
|
||||||
|
if !strings.Contains(system, "EN ISO 12100") || !strings.Contains(system, "JSON") {
|
||||||
|
t.Errorf("system prompt missing framing")
|
||||||
|
}
|
||||||
|
for _, want := range []string{"Geschirrspuelmaschine", "thermal_hazard", "F. Ergonomie", "Spuelmaschine mit Tank"} {
|
||||||
|
if !strings.Contains(user, want) {
|
||||||
|
t.Errorf("user prompt missing %q", want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProposeMissingHazards_ParsesAndDegrades(t *testing.T) {
|
||||||
|
gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
|
||||||
|
c := fakeCompleter{out: `Hier: [{"group":"F. Ergonomie","hazard":"Heben schwerer Koerbe","why":"manuelles Beladen"}] fertig`}
|
||||||
|
got := ProposeMissingHazards(context.Background(), c, "x", "n", nil, gaps)
|
||||||
|
if len(got) != 1 || got[0].Hazard != "Heben schwerer Koerbe" {
|
||||||
|
t.Fatalf("parse: got %+v", got)
|
||||||
|
}
|
||||||
|
if ProposeMissingHazards(context.Background(), nil, "x", "n", nil, gaps) != nil {
|
||||||
|
t.Errorf("nil completer must return nil")
|
||||||
|
}
|
||||||
|
if ProposeMissingHazards(context.Background(), fakeCompleter{err: context.DeadlineExceeded}, "x", "n", nil, gaps) != nil {
|
||||||
|
t.Errorf("error must return nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Offline dedup-candidate proposer (P2, type 1). DEV-TIME ONLY.
|
||||||
|
//
|
||||||
|
// It inspects the patterns that fired for one machine and proposes which look
|
||||||
|
// like duplicates, so a human (later an LLM) can decide a supersession/merge. It
|
||||||
|
// NEVER mutates the pattern library or the runtime — it only surfaces candidates.
|
||||||
|
// The deterministic GT screen (ScreenSupersession, proposer_screen.go) is the
|
||||||
|
// wall that proves a proposal is safe before a human ever sees it.
|
||||||
|
//
|
||||||
|
// Detection here is purely structural (category + zone + measure + scenario
|
||||||
|
// overlap) and therefore reproducible. Two safety rules bake in what P1 taught
|
||||||
|
// us about the dishwasher review:
|
||||||
|
// - only patterns with the SAME primary category are ever compared;
|
||||||
|
// - a pair with DIFFERENT operational states is NEVER proposed, because
|
||||||
|
// normal-operation and maintenance are legitimately distinct contexts with
|
||||||
|
// different protective measures (e.g. HP011 vs HP077). Merging them would
|
||||||
|
// erase the maintenance view.
|
||||||
|
|
||||||
|
// DedupCandidate is a proposed near-duplicate pattern pair for one machine class.
|
||||||
|
type DedupCandidate struct {
|
||||||
|
KeepPattern string `json:"keep_pattern"` // higher-priority survivor
|
||||||
|
DropPattern string `json:"drop_pattern"` // supersession target
|
||||||
|
KeepName string `json:"keep_name"`
|
||||||
|
KeepHazardName string `json:"keep_hazard_name"` // keep pattern ScenarioDE (for the GT-distinctness screen)
|
||||||
|
DropName string `json:"drop_name"` // == generated hazard Name (ScenarioDE) of the drop pattern
|
||||||
|
Category string `json:"category"`
|
||||||
|
ZoneJaccard float64 `json:"zone_jaccard"`
|
||||||
|
MeasureJaccard float64 `json:"measure_jaccard"`
|
||||||
|
ScenarioJaccard float64 `json:"scenario_jaccard"`
|
||||||
|
Score float64 `json:"score"`
|
||||||
|
Rationale string `json:"rationale"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindDedupCandidates compares the fired patterns pairwise and returns near-dup
|
||||||
|
// candidates whose combined overlap score meets threshold, deterministically
|
||||||
|
// ordered (score desc, then drop-pattern id). The combined score weights measure
|
||||||
|
// overlap highest (shared measures are the strongest duplicate signal), then zone
|
||||||
|
// and scenario equally.
|
||||||
|
func FindDedupCandidates(fired []PatternMatch, threshold float64) []DedupCandidate {
|
||||||
|
var out []DedupCandidate
|
||||||
|
for i := 0; i < len(fired); i++ {
|
||||||
|
for j := i + 1; j < len(fired); j++ {
|
||||||
|
a, b := fired[i], fired[j]
|
||||||
|
ca := primaryCat(a)
|
||||||
|
if ca == "" || ca != primaryCat(b) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !sameOpStateSet(a.OperationalStates, b.OperationalStates) {
|
||||||
|
continue // legitimate lifecycle variants — never propose a merge
|
||||||
|
}
|
||||||
|
zj := tokenJaccard(zoneTokenSet(a.ZoneDE), zoneTokenSet(b.ZoneDE))
|
||||||
|
mj := tokenJaccard(toSet(a.SuggestedMeasureIDs), toSet(b.SuggestedMeasureIDs))
|
||||||
|
sj := tokenJaccard(wordTokenSet(a.ScenarioDE), wordTokenSet(b.ScenarioDE))
|
||||||
|
score := 0.4*mj + 0.3*zj + 0.3*sj
|
||||||
|
if score < threshold {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
keep, drop := a, b
|
||||||
|
if b.Priority > a.Priority {
|
||||||
|
keep, drop = b, a
|
||||||
|
}
|
||||||
|
out = append(out, DedupCandidate{
|
||||||
|
KeepPattern: keep.PatternID, DropPattern: drop.PatternID,
|
||||||
|
KeepName: keep.PatternName, KeepHazardName: keep.ScenarioDE, DropName: drop.ScenarioDE,
|
||||||
|
Category: ca, ZoneJaccard: round2(zj), MeasureJaccard: round2(mj),
|
||||||
|
ScenarioJaccard: round2(sj), Score: round2(score),
|
||||||
|
Rationale: fmt.Sprintf(
|
||||||
|
"same category %q · measure overlap %.0f%% · zone overlap %.0f%% · scenario overlap %.0f%% → keep %s (P%d), supersede %s (P%d)",
|
||||||
|
ca, mj*100, zj*100, sj*100, keep.PatternID, keep.Priority, drop.PatternID, drop.Priority),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.SliceStable(out, func(i, j int) bool {
|
||||||
|
if out[i].Score != out[j].Score {
|
||||||
|
return out[i].Score > out[j].Score
|
||||||
|
}
|
||||||
|
return out[i].DropPattern < out[j].DropPattern
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func primaryCat(pm PatternMatch) string {
|
||||||
|
if len(pm.HazardCats) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return pm.HazardCats[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
func sameOpStateSet(a, b []string) bool {
|
||||||
|
sa, sb := toSet(a), toSet(b)
|
||||||
|
if len(sa) != len(sb) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for k := range sa {
|
||||||
|
if !sb[k] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
var proposerWordSplit = regexp.MustCompile(`[^\p{L}]+`)
|
||||||
|
|
||||||
|
// zoneTokenSet splits a comma-separated zone string into its component terms.
|
||||||
|
func zoneTokenSet(zone string) map[string]bool {
|
||||||
|
out := map[string]bool{}
|
||||||
|
for _, part := range strings.Split(strings.ToLower(zone), ",") {
|
||||||
|
if t := strings.TrimSpace(part); len([]rune(t)) >= 3 {
|
||||||
|
out[t] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// wordTokenSet tokenises free text into words of length >= 4 (drops connectives).
|
||||||
|
func wordTokenSet(s string) map[string]bool {
|
||||||
|
out := map[string]bool{}
|
||||||
|
for _, w := range proposerWordSplit.Split(strings.ToLower(s), -1) {
|
||||||
|
if len([]rune(w)) >= 4 {
|
||||||
|
out[w] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func tokenJaccard(a, b map[string]bool) float64 {
|
||||||
|
if len(a) == 0 && len(b) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
inter := 0
|
||||||
|
for k := range a {
|
||||||
|
if b[k] {
|
||||||
|
inter++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
union := len(a) + len(b) - inter
|
||||||
|
if union == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return float64(inter) / float64(union)
|
||||||
|
}
|
||||||
|
|
||||||
|
func round2(x float64) float64 { return math.Round(x*100) / 100 }
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func mkPM(id, cat, zone, scenario string, prio int, measures, opstates []string) PatternMatch {
|
||||||
|
return PatternMatch{
|
||||||
|
PatternID: id, PatternName: id, Priority: prio,
|
||||||
|
HazardCats: []string{cat}, ZoneDE: zone, ScenarioDE: scenario,
|
||||||
|
SuggestedMeasureIDs: measures, OperationalStates: opstates,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindDedupCandidates_FindsOverlappingPair(t *testing.T) {
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPa", "update_failure", "Steuerung, SPS", "Software-Update der Steuerung scheitert nach Abbruch", 80,
|
||||||
|
[]string{"M138", "M146"}, nil),
|
||||||
|
mkPM("HPb", "update_failure", "Steuerung, Antriebsregler", "Software-Update der Steuerung schlaegt fehl", 75,
|
||||||
|
[]string{"M138", "M146", "M141"}, nil),
|
||||||
|
mkPM("HPc", "mechanical_hazard", "Tuer", "Quetschen der Finger an der Tuer", 70,
|
||||||
|
[]string{"M003"}, nil),
|
||||||
|
}
|
||||||
|
got := FindDedupCandidates(fired, 0.4)
|
||||||
|
if len(got) != 1 {
|
||||||
|
t.Fatalf("want 1 candidate, got %d: %+v", len(got), got)
|
||||||
|
}
|
||||||
|
// Higher-priority pattern survives, lower one is the drop target.
|
||||||
|
if got[0].KeepPattern != "HPa" || got[0].DropPattern != "HPb" {
|
||||||
|
t.Errorf("want keep HPa / drop HPb, got keep %s / drop %s", got[0].KeepPattern, got[0].DropPattern)
|
||||||
|
}
|
||||||
|
if got[0].DropName != "Software-Update der Steuerung schlaegt fehl" {
|
||||||
|
t.Errorf("DropName must equal drop pattern ScenarioDE, got %q", got[0].DropName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindDedupCandidates_LifecycleGuard(t *testing.T) {
|
||||||
|
// Same category, zone and measures — but normal-operation vs maintenance.
|
||||||
|
// These are legitimate variants (HP011 vs HP077) and must NOT be proposed.
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HP011", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 95,
|
||||||
|
[]string{"M481", "M482"}, nil),
|
||||||
|
mkPM("HP077", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 80,
|
||||||
|
[]string{"M481", "M482"}, []string{"maintenance"}),
|
||||||
|
}
|
||||||
|
if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
|
||||||
|
t.Fatalf("lifecycle guard failed: want 0 candidates, got %d: %+v", len(got), got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindDedupCandidates_DifferentCategoryIgnored(t *testing.T) {
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPa", "thermal_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
|
||||||
|
mkPM("HPb", "mechanical_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
|
||||||
|
}
|
||||||
|
if got := FindDedupCandidates(fired, 0.3); len(got) != 0 {
|
||||||
|
t.Fatalf("cross-category pair must not be proposed, got %d", len(got))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindDedupCandidates_BelowThresholdDropped(t *testing.T) {
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPa", "mechanical_hazard", "Tuer", "Quetschen an der Tuer", 80, []string{"M003"}, nil),
|
||||||
|
mkPM("HPb", "mechanical_hazard", "Foerderband", "Einzug am Foerderband", 80, []string{"M540"}, nil),
|
||||||
|
}
|
||||||
|
if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
|
||||||
|
t.Fatalf("disjoint pair must be below threshold, got %d: %+v", len(got), got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Foreign-framing proposer (P2 slice 4, type 2). DEV-TIME, propose-only.
|
||||||
|
//
|
||||||
|
// A pattern can fire for a machine yet describe its hazard with a zone text
|
||||||
|
// framed for a DIFFERENT machine (e.g. a dishwasher hazard whose zone names
|
||||||
|
// "Walzen, Transportbaender" or "Bearbeitungszone"). Such foreign framing leaks
|
||||||
|
// through terms that are NOT yet in domainGateTerms — once a term is a gate term,
|
||||||
|
// the ghost-pattern invariant already fences the pattern out. So we surface the
|
||||||
|
// candidates structurally: zone terms a fired pattern names that the machine's
|
||||||
|
// narrative never mentions (minus generic hazard-location vocabulary). A human
|
||||||
|
// (or the LLM) then decides: add a dom_* gate term, or re-frame the zone text.
|
||||||
|
//
|
||||||
|
// This OVER-surfaces by design — the human/LLM is the precision filter, not the
|
||||||
|
// detector (same contract as the dedup proposer).
|
||||||
|
|
||||||
|
// genericHazardStop are hazard-LOCATION words that legitimately appear in zones
|
||||||
|
// without being echoed in a narrative — they are not evidence of foreign framing.
|
||||||
|
var genericHazardStop = map[string]bool{
|
||||||
|
"quetschstelle": true, "einzugstelle": true, "einzugsstelle": true, "scherstelle": true,
|
||||||
|
"schneidstelle": true, "stossstelle": true, "fangstelle": true, "klemmstelle": true,
|
||||||
|
"gefahrbereich": true, "gefahrenbereich": true, "gefahrstelle": true, "gefahrenstelle": true,
|
||||||
|
"arbeitsbereich": true, "wirkbereich": true, "schutzbereich": true, "umgebung": true,
|
||||||
|
"bereich": true, "zugang": true, "oberflaeche": true, "oberflaechen": true,
|
||||||
|
"gehaeuse": true, "bauteil": true, "bauteile": true, "komponente": true, "maschine": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// FramingCandidate is a fired pattern whose zone text looks foreign for the machine.
|
||||||
|
type FramingCandidate struct {
|
||||||
|
Pattern string `json:"pattern"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Category string `json:"category"`
|
||||||
|
Zone string `json:"zone"`
|
||||||
|
OrphanTerms []string `json:"orphan_terms"`
|
||||||
|
OrphanFraction float64 `json:"orphan_fraction"`
|
||||||
|
Verdict string `json:"verdict"` // heuristic lean: foreign | plausible
|
||||||
|
Evidence string `json:"evidence"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindFramingCandidates returns fired patterns whose zone is mostly not echoed in
|
||||||
|
// the narrative, sorted by orphan fraction descending (deterministic).
|
||||||
|
func FindFramingCandidates(fired []PatternMatch, narrative string, minFraction float64) []FramingCandidate {
|
||||||
|
nar := strings.ToLower(narrative)
|
||||||
|
var narStems []string
|
||||||
|
for _, w := range proposerWordSplit.Split(nar, -1) {
|
||||||
|
if len([]rune(w)) >= 5 {
|
||||||
|
narStems = append(narStems, w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var out []FramingCandidate
|
||||||
|
for _, pm := range fired {
|
||||||
|
parts := zoneParts(pm.ZoneDE)
|
||||||
|
if len(parts) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var orphans []string
|
||||||
|
for _, p := range parts {
|
||||||
|
if !partEchoed(p, nar, narStems) {
|
||||||
|
orphans = append(orphans, p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
frac := float64(len(orphans)) / float64(len(parts))
|
||||||
|
if len(orphans) == 0 || frac < minFraction {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, FramingCandidate{
|
||||||
|
Pattern: pm.PatternID, Name: pm.PatternName, Category: primaryCat(pm),
|
||||||
|
Zone: pm.ZoneDE, OrphanTerms: orphans, OrphanFraction: round2(frac),
|
||||||
|
Verdict: framingHeuristicVerdict(frac),
|
||||||
|
Evidence: fmt.Sprintf("%d/%d zone terms have no narrative echo: %s", len(orphans), len(parts), strings.Join(orphans, ", ")),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.SliceStable(out, func(i, j int) bool {
|
||||||
|
if out[i].OrphanFraction != out[j].OrphanFraction {
|
||||||
|
return out[i].OrphanFraction > out[j].OrphanFraction
|
||||||
|
}
|
||||||
|
return out[i].Pattern < out[j].Pattern
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func framingHeuristicVerdict(frac float64) string {
|
||||||
|
if frac >= 0.99 {
|
||||||
|
return "foreign" // nothing in the zone is echoed by the narrative
|
||||||
|
}
|
||||||
|
return "plausible" // partial echo — likely generic vocabulary, human to confirm
|
||||||
|
}
|
||||||
|
|
||||||
|
// zoneParts splits a zone string into significant terms on commas, slashes,
|
||||||
|
// parentheses and semicolons, lowercased, length >= 4.
|
||||||
|
func zoneParts(zone string) []string {
|
||||||
|
fields := strings.FieldsFunc(strings.ToLower(zone), func(r rune) bool {
|
||||||
|
return r == ',' || r == '/' || r == ';' || r == '(' || r == ')'
|
||||||
|
})
|
||||||
|
var out []string
|
||||||
|
for _, f := range fields {
|
||||||
|
if t := strings.TrimSpace(f); len([]rune(t)) >= 4 {
|
||||||
|
out = append(out, t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// partEchoed reports whether a zone part is reflected in the narrative. Matching
|
||||||
|
// is bidirectional to survive German compounding: a zone word echoes if it is a
|
||||||
|
// generic hazard term, if it is a substring of the narrative, OR if any narrative
|
||||||
|
// stem (>= 5 chars) is a substring of the zone word (so narrative "Steuerung"
|
||||||
|
// echoes zone "Steuerungssystem").
|
||||||
|
func partEchoed(part, narrative string, narStems []string) bool {
|
||||||
|
for _, w := range strings.Fields(part) {
|
||||||
|
if genericHazardStop[w] {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if len([]rune(w)) < 4 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.Contains(narrative, w) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, ns := range narStems {
|
||||||
|
if strings.Contains(w, ns) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// RenderFramingQueue renders foreign-framing candidates as a markdown review queue.
|
||||||
|
func RenderFramingQueue(machine string, candidates []FramingCandidate) string {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# Foreign-framing review queue — %s\n\n", machine)
|
||||||
|
fmt.Fprintf(&b, "%d fired pattern(s) name zone terms the narrative never mentions. Propose-only — a human (or the LLM) decides: add a dom_* gate term, or re-frame the zone.\n\n", len(candidates))
|
||||||
|
for i, c := range candidates {
|
||||||
|
fmt.Fprintf(&b, "## %d. %s — %s [%s, orphan %.0f%%]\n", i+1, c.Pattern, c.Name, c.Verdict, c.OrphanFraction*100)
|
||||||
|
fmt.Fprintf(&b, "- category: %s\n- zone: %s\n", c.Category, c.Zone)
|
||||||
|
fmt.Fprintf(&b, "- orphan terms (no narrative echo): %s\n", strings.Join(c.OrphanTerms, ", "))
|
||||||
|
fmt.Fprintf(&b, "- suggested action: %s\n\n", framingAction(c.Verdict))
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func framingAction(verdict string) string {
|
||||||
|
if verdict == "foreign" {
|
||||||
|
return "likely foreign-framed — propose a dom_* gate term for the orphan term(s), or re-frame the zone; human confirms + commits + pins a GT case"
|
||||||
|
}
|
||||||
|
return "partial echo — likely generic vocabulary; human to confirm whether any orphan term is a foreign-machine component"
|
||||||
|
}
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestFindFramingCandidates_FlagsForeignZone(t *testing.T) {
|
||||||
|
narrative := "Gewerbliche Geschirrspuelmaschine mit Boiler und Tank. Die Tuer ist verriegelt."
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPforeign", "mechanical_hazard", "Walzen, Transportbaender, Bearbeitungszone", "Einzug", 80, nil, nil),
|
||||||
|
mkPM("HPlocal", "thermal_hazard", "Boiler, Tank, Tuer", "Verbrennung", 80, nil, nil),
|
||||||
|
mkPM("HPgeneric", "mechanical_hazard", "Quetschstelle, Gefahrbereich", "Quetschen", 80, nil, nil),
|
||||||
|
}
|
||||||
|
got := FindFramingCandidates(fired, narrative, 0.6)
|
||||||
|
if len(got) != 1 || got[0].Pattern != "HPforeign" {
|
||||||
|
t.Fatalf("want only HPforeign flagged, got %+v", got)
|
||||||
|
}
|
||||||
|
if got[0].Verdict != "foreign" {
|
||||||
|
t.Errorf("fully-orphan zone should be 'foreign', got %s", got[0].Verdict)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindFramingCandidates_PartialEchoIsPlausible(t *testing.T) {
|
||||||
|
narrative := "Maschine mit Boiler und Tank."
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPx", "thermal_hazard", "Boiler, Tank, Auspuffleitung", "x", 80, nil, nil),
|
||||||
|
}
|
||||||
|
got := FindFramingCandidates(fired, narrative, 0.3)
|
||||||
|
if len(got) != 1 {
|
||||||
|
t.Fatalf("want 1 candidate (1/3 orphan >= 0.3), got %d", len(got))
|
||||||
|
}
|
||||||
|
if got[0].Verdict != "plausible" || len(got[0].OrphanTerms) != 1 || got[0].OrphanTerms[0] != "auspuffleitung" {
|
||||||
|
t.Errorf("want plausible + orphan [auspuffleitung], got %s %v", got[0].Verdict, got[0].OrphanTerms)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "github.com/google/uuid"
|
||||||
|
|
||||||
|
// Non-test plumbing for the offline proposer (P2 slice 3): run the engine for a
|
||||||
|
// narrative and produce the fired patterns + the engine-built hazards/mitigations
|
||||||
|
// the dedup proposer and GT screen consume. This is the same pipeline the GT
|
||||||
|
// benchmark tests use, lifted out of test scope so the dev-time CLI can call it.
|
||||||
|
|
||||||
|
// universalLifecyclePhases are appended so patterns gated to a specific lifecycle
|
||||||
|
// (maintenance/cleaning/setup/fault clearing) still fire — the proposer wants the
|
||||||
|
// full hazard picture, not only normal-operation hazards.
|
||||||
|
var universalLifecyclePhases = []string{"normal_operation", "maintenance", "cleaning", "setup", "fault_clearing"}
|
||||||
|
|
||||||
|
// BuildProposerInput parses a narrative, runs the pattern engine, keeps the
|
||||||
|
// narrative-relevant patterns, and returns the hazards, mitigations and fired
|
||||||
|
// patterns. NOTE: it does not apply the CE cyber-category skip, so the proposer
|
||||||
|
// view may include cyber/AI hazards that the CE log excludes — harmless for the
|
||||||
|
// GT recall screen (they match no CE ground-truth entry).
|
||||||
|
func BuildProposerInput(narrative, machineType string, extraMachineTypes []string) ([]Hazard, []Mitigation, []PatternMatch) {
|
||||||
|
res := ParseNarrative(narrative, machineType)
|
||||||
|
|
||||||
|
var compIDs, compNames, energyIDs []string
|
||||||
|
for _, c := range res.Components {
|
||||||
|
if c.Negated {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
compIDs = append(compIDs, c.LibraryID)
|
||||||
|
compNames = append(compNames, c.NameDE)
|
||||||
|
}
|
||||||
|
for _, e := range res.EnergySources {
|
||||||
|
energyIDs = append(energyIDs, e.SourceID)
|
||||||
|
}
|
||||||
|
|
||||||
|
machineTypes := append([]string{}, extraMachineTypes...)
|
||||||
|
if machineType != "" {
|
||||||
|
machineTypes = append(machineTypes, machineType)
|
||||||
|
}
|
||||||
|
lifecycles := append(append([]string{}, res.LifecyclePhases...), universalLifecyclePhases...)
|
||||||
|
|
||||||
|
out := NewPatternEngine().Match(MatchInput{
|
||||||
|
ComponentLibraryIDs: compIDs,
|
||||||
|
EnergySourceIDs: energyIDs,
|
||||||
|
LifecyclePhases: lifecycles,
|
||||||
|
CustomTags: res.CustomTags,
|
||||||
|
OperationalStates: res.OperationalStates,
|
||||||
|
StateTransitions: res.StateTransitions,
|
||||||
|
HumanRoles: res.Roles,
|
||||||
|
MachineTypes: machineTypes,
|
||||||
|
})
|
||||||
|
|
||||||
|
kept := make([]PatternMatch, 0, len(out.MatchedPatterns))
|
||||||
|
for _, pm := range out.MatchedPatterns {
|
||||||
|
if IsPatternRelevant(pm, narrative, compNames) {
|
||||||
|
kept = append(kept, pm)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filtered := *out
|
||||||
|
filtered.MatchedPatterns = kept
|
||||||
|
hazards, mits := patternsToHazardsAndMitigations(&filtered)
|
||||||
|
return hazards, mits, kept
|
||||||
|
}
|
||||||
|
|
||||||
|
// patternsToHazardsAndMitigations converts engine output into the hazard/mitigation
|
||||||
|
// entities the benchmark + proposer compare on. Simplified vs InitializeProject
|
||||||
|
// (no risk estimation, no norm refs) — it only needs category/zone/scenario/measures.
|
||||||
|
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
|
||||||
|
hazards := make([]Hazard, 0, len(out.MatchedPatterns))
|
||||||
|
patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
|
||||||
|
|
||||||
|
for _, pm := range out.MatchedPatterns {
|
||||||
|
cat := ""
|
||||||
|
if len(pm.HazardCats) > 0 {
|
||||||
|
cat = pm.HazardCats[0]
|
||||||
|
}
|
||||||
|
lifecycle := ""
|
||||||
|
if len(pm.ApplicableLifecycles) > 0 {
|
||||||
|
lifecycle = pm.ApplicableLifecycles[0]
|
||||||
|
}
|
||||||
|
h := Hazard{
|
||||||
|
ID: uuid.New(),
|
||||||
|
Name: pm.ScenarioDE,
|
||||||
|
Category: cat,
|
||||||
|
Description: pm.ScenarioDE,
|
||||||
|
Scenario: pm.ScenarioDE,
|
||||||
|
TriggerEvent: pm.TriggerDE,
|
||||||
|
PossibleHarm: pm.HarmDE,
|
||||||
|
AffectedPerson: pm.AffectedDE,
|
||||||
|
HazardousZone: pm.ZoneDE,
|
||||||
|
LifecyclePhase: lifecycle,
|
||||||
|
}
|
||||||
|
if h.Name == "" {
|
||||||
|
h.Name = pm.PatternName
|
||||||
|
}
|
||||||
|
hazards = append(hazards, h)
|
||||||
|
patternToHazard[pm.PatternID] = h.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
measureNames := make(map[string]string)
|
||||||
|
for _, m := range GetProtectiveMeasureLibrary() {
|
||||||
|
measureNames[m.ID] = m.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
var mitigations []Mitigation
|
||||||
|
for _, sm := range out.SuggestedMeasures {
|
||||||
|
name := measureNames[sm.MeasureID]
|
||||||
|
if name == "" {
|
||||||
|
name = sm.MeasureID
|
||||||
|
}
|
||||||
|
for _, srcPattern := range sm.SourcePatterns {
|
||||||
|
hid, ok := patternToHazard[srcPattern]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mitigations = append(mitigations, Mitigation{
|
||||||
|
ID: uuid.New(),
|
||||||
|
HazardID: hid,
|
||||||
|
Name: name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hazards, mitigations
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestBuildProposerInput_WarewashingFires(t *testing.T) {
|
||||||
|
hazards, _, fired := BuildProposerInput(
|
||||||
|
warewashingNarrative,
|
||||||
|
"Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
|
||||||
|
[]string{"food_processing"},
|
||||||
|
)
|
||||||
|
if len(fired) == 0 || len(hazards) == 0 {
|
||||||
|
t.Fatalf("want fired patterns + hazards, got %d patterns / %d hazards", len(fired), len(hazards))
|
||||||
|
}
|
||||||
|
has := func(id string) bool {
|
||||||
|
for _, pm := range fired {
|
||||||
|
if pm.PatternID == id {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !has("HP2201") {
|
||||||
|
t.Errorf("warewashing-specific HP2201 must fire via BuildProposerInput")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,174 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Semantic judgement over RECALL-SAFE dedup candidates (P2 slice 2). DEV-TIME,
|
||||||
|
// propose-only. The deterministic GT wall (proposer_screen.go) has already
|
||||||
|
// removed candidates that would drop recall or that credit different GT entries;
|
||||||
|
// the judge only adds an opinion on whether the survivors are truly the same
|
||||||
|
// hazard, plus a rationale, for the human review queue. It NEVER mutates anything.
|
||||||
|
//
|
||||||
|
// The judge is pluggable behind CandidateJudge so the runtime/tests stay
|
||||||
|
// deterministic (HeuristicJudge) while the dev-time CLI can plug in the
|
||||||
|
// non-deterministic LLM (LLMJudge over the shared llm.ProviderRegistry).
|
||||||
|
|
||||||
|
const (
|
||||||
|
VerdictDuplicate = "duplicate"
|
||||||
|
VerdictDistinct = "distinct"
|
||||||
|
VerdictUncertain = "uncertain"
|
||||||
|
)
|
||||||
|
|
||||||
|
// JudgedProposal is one candidate with its GT-wall result and the judge's opinion.
|
||||||
|
type JudgedProposal struct {
|
||||||
|
Candidate DedupCandidate `json:"candidate"`
|
||||||
|
Screen ScreenResult `json:"screen"`
|
||||||
|
Verdict string `json:"verdict"`
|
||||||
|
Confidence string `json:"confidence"`
|
||||||
|
Rationale string `json:"rationale"`
|
||||||
|
Judge string `json:"judge"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// CandidateJudge decides whether two near-duplicate patterns are the same hazard.
|
||||||
|
type CandidateJudge interface {
|
||||||
|
Name() string
|
||||||
|
Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (verdict, confidence, rationale string)
|
||||||
|
}
|
||||||
|
|
||||||
|
// HeuristicJudge is the deterministic default/fallback. It only ever returns "low"
|
||||||
|
// confidence — it is a placeholder for the LLM, and it deliberately punts to
|
||||||
|
// "uncertain" on the hard cases (low text overlap, shared measures) so the queue
|
||||||
|
// makes clear exactly where the LLM earns its keep.
|
||||||
|
type HeuristicJudge struct{}
|
||||||
|
|
||||||
|
func (HeuristicJudge) Name() string { return "heuristic" }
|
||||||
|
|
||||||
|
func (HeuristicJudge) Judge(_ context.Context, c DedupCandidate, _, _ PatternMatch) (string, string, string) {
|
||||||
|
switch {
|
||||||
|
case c.ScenarioJaccard >= 0.5 || (c.ZoneJaccard >= 0.5 && c.MeasureJaccard >= 0.5):
|
||||||
|
return VerdictDuplicate, "low", "structural: high scenario, or combined zone+measure, overlap"
|
||||||
|
case c.MeasureJaccard >= 0.99 && c.ZoneJaccard == 0 && c.ScenarioJaccard < 0.3:
|
||||||
|
return VerdictDistinct, "low", "structural: identical measures but no zone/scenario overlap — likely distinct hazards sharing generic measures"
|
||||||
|
default:
|
||||||
|
return VerdictUncertain, "low", "structural signal inconclusive — needs the LLM judge"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LLMJudge asks an offline model to make the semantic call. Non-deterministic, so
|
||||||
|
// it lives only in the dev-time tool, never in tests or the runtime. It degrades
|
||||||
|
// to "uncertain" on any transport or parse error — it must never break the run.
|
||||||
|
type LLMJudge struct {
|
||||||
|
Completer LLMCompleter
|
||||||
|
MachineClass string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (LLMJudge) Name() string { return "llm" }
|
||||||
|
|
||||||
|
func (j LLMJudge) Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (string, string, string) {
|
||||||
|
system, user := BuildJudgePrompt(j.MachineClass, a, b)
|
||||||
|
raw, err := j.Completer.Complete(ctx, system, user)
|
||||||
|
if err != nil {
|
||||||
|
return VerdictUncertain, "low", "LLM error: " + err.Error()
|
||||||
|
}
|
||||||
|
return parseJudgeJSON(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildJudgePrompt is the real LLM artifact — built and unit-tested deterministically
|
||||||
|
// even though the call itself is not. It frames the ISO 12100 same-vs-distinct
|
||||||
|
// question and forces a JSON answer.
|
||||||
|
func BuildJudgePrompt(machineClass string, a, b PatternMatch) (system, user string) {
|
||||||
|
system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
|
||||||
|
"Entscheide, ob zwei generierte Gefaehrdungen fuer DIESE Maschine DIESELBE Gefaehrdung " +
|
||||||
|
"beschreiben (Dublette) oder fachlich VERSCHIEDENE Gefaehrdungen sind, die nur zufaellig " +
|
||||||
|
"dieselben Schutzmassnahmen teilen. Verschieden, wenn Wirkort, Ausloeser oder " +
|
||||||
|
"Schadensmechanismus abweichen — auch bei gleicher Kategorie und gleichen Massnahmen. " +
|
||||||
|
"Antworte AUSSCHLIESSLICH als JSON: " +
|
||||||
|
`{"verdict":"duplicate|distinct|uncertain","confidence":"high|medium|low","rationale":"..."}.`
|
||||||
|
user = fmt.Sprintf(`Maschinenklasse: %s
|
||||||
|
|
||||||
|
Gefaehrdung A (%s):
|
||||||
|
Name: %s
|
||||||
|
Kategorie: %s
|
||||||
|
Zone: %s
|
||||||
|
Szenario: %s
|
||||||
|
Ausloeser: %s
|
||||||
|
Schaden: %s
|
||||||
|
Massnahmen: %s
|
||||||
|
|
||||||
|
Gefaehrdung B (%s):
|
||||||
|
Name: %s
|
||||||
|
Kategorie: %s
|
||||||
|
Zone: %s
|
||||||
|
Szenario: %s
|
||||||
|
Ausloeser: %s
|
||||||
|
Schaden: %s
|
||||||
|
Massnahmen: %s
|
||||||
|
|
||||||
|
Sind A und B dieselbe Gefaehrdung fuer diese Maschine?`,
|
||||||
|
machineClass,
|
||||||
|
a.PatternID, a.PatternName, primaryCat(a), a.ZoneDE, a.ScenarioDE, a.TriggerDE, a.HarmDE, strings.Join(a.SuggestedMeasureIDs, ", "),
|
||||||
|
b.PatternID, b.PatternName, primaryCat(b), b.ZoneDE, b.ScenarioDE, b.TriggerDE, b.HarmDE, strings.Join(b.SuggestedMeasureIDs, ", "))
|
||||||
|
return system, user
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseJudgeJSON(raw string) (verdict, confidence, rationale string) {
|
||||||
|
start, end := strings.Index(raw, "{"), strings.LastIndex(raw, "}")
|
||||||
|
if start < 0 || end <= start {
|
||||||
|
return VerdictUncertain, "low", "unparseable LLM output"
|
||||||
|
}
|
||||||
|
var v struct {
|
||||||
|
Verdict string `json:"verdict"`
|
||||||
|
Confidence string `json:"confidence"`
|
||||||
|
Rationale string `json:"rationale"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(raw[start:end+1]), &v); err != nil {
|
||||||
|
return VerdictUncertain, "low", "unparseable LLM JSON: " + err.Error()
|
||||||
|
}
|
||||||
|
switch v.Verdict {
|
||||||
|
case VerdictDuplicate, VerdictDistinct, VerdictUncertain:
|
||||||
|
default:
|
||||||
|
v.Verdict = VerdictUncertain
|
||||||
|
}
|
||||||
|
if v.Confidence == "" {
|
||||||
|
v.Confidence = "low"
|
||||||
|
}
|
||||||
|
return v.Verdict, v.Confidence, v.Rationale
|
||||||
|
}
|
||||||
|
|
||||||
|
// LLMCompleter is the minimal text-in/text-out the LLM judge needs. Tests pass a
|
||||||
|
// stub; the dev-time tool passes a registry-backed adapter (NewRegistryCompleter).
|
||||||
|
type LLMCompleter interface {
|
||||||
|
Complete(ctx context.Context, system, user string) (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type registryCompleter struct {
|
||||||
|
reg *llm.ProviderRegistry
|
||||||
|
model string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRegistryCompleter adapts the shared llm.ProviderRegistry to LLMCompleter so
|
||||||
|
// the proposer can reuse the platform's offline model wiring (e.g. self-hosted qwen).
|
||||||
|
func NewRegistryCompleter(reg *llm.ProviderRegistry, model string) LLMCompleter {
|
||||||
|
return ®istryCompleter{reg: reg, model: model}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rc *registryCompleter) Complete(ctx context.Context, system, user string) (string, error) {
|
||||||
|
resp, err := rc.reg.Chat(ctx, &llm.ChatRequest{
|
||||||
|
Model: rc.model,
|
||||||
|
Messages: []llm.Message{
|
||||||
|
{Role: "system", Content: system},
|
||||||
|
{Role: "user", Content: user},
|
||||||
|
},
|
||||||
|
Temperature: 0,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return resp.Message.Content, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,104 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHeuristicJudge_Verdicts(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
zone, meas float64
|
||||||
|
scenario float64
|
||||||
|
wantVerdict string
|
||||||
|
}{
|
||||||
|
{"high scenario overlap -> duplicate", 0, 0.3, 0.6, VerdictDuplicate},
|
||||||
|
{"high zone+measure -> duplicate", 0.6, 0.6, 0.1, VerdictDuplicate},
|
||||||
|
{"identical measures, no text -> distinct", 0, 1.0, 0.0, VerdictDistinct},
|
||||||
|
{"shared measures, low text -> uncertain", 0, 0.67, 0.19, VerdictUncertain},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
c := DedupCandidate{ZoneJaccard: tt.zone, MeasureJaccard: tt.meas, ScenarioJaccard: tt.scenario}
|
||||||
|
v, conf, _ := HeuristicJudge{}.Judge(context.Background(), c, PatternMatch{}, PatternMatch{})
|
||||||
|
if v != tt.wantVerdict {
|
||||||
|
t.Errorf("verdict: want %s, got %s", tt.wantVerdict, v)
|
||||||
|
}
|
||||||
|
if conf != "low" {
|
||||||
|
t.Errorf("heuristic confidence must be low, got %s", conf)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildJudgePrompt_ContainsKeyFacts(t *testing.T) {
|
||||||
|
a := PatternMatch{PatternID: "HPa", PatternName: "Heisse Flaeche", HazardCats: []string{"thermal_hazard"},
|
||||||
|
ZoneDE: "Boiler", ScenarioDE: "Beruehrung heisser Boiler", SuggestedMeasureIDs: []string{"M071"}}
|
||||||
|
b := PatternMatch{PatternID: "HPb", PatternName: "Heisses Spuelgut", HazardCats: []string{"thermal_hazard"},
|
||||||
|
ZoneDE: "Spuelgut", ScenarioDE: "Beruehrung heisses Geschirr", SuggestedMeasureIDs: []string{"M071"}}
|
||||||
|
system, user := BuildJudgePrompt("Geschirrspuelmaschine", a, b)
|
||||||
|
|
||||||
|
for _, want := range []string{"EN ISO 12100", "JSON", "verdict"} {
|
||||||
|
if !strings.Contains(system, want) {
|
||||||
|
t.Errorf("system prompt missing %q", want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, want := range []string{"Geschirrspuelmaschine", "HPa", "HPb", "Boiler", "Spuelgut", "thermal_hazard"} {
|
||||||
|
if !strings.Contains(user, want) {
|
||||||
|
t.Errorf("user prompt missing %q", want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeCompleter struct {
|
||||||
|
out string
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeCompleter) Complete(_ context.Context, _, _ string) (string, error) { return f.out, f.err }
|
||||||
|
|
||||||
|
func TestLLMJudge_ParsesAndDegrades(t *testing.T) {
|
||||||
|
cand := DedupCandidate{KeepPattern: "HPa", DropPattern: "HPb"}
|
||||||
|
|
||||||
|
// Well-formed JSON, even wrapped in chatter, parses.
|
||||||
|
j := LLMJudge{Completer: fakeCompleter{out: "Sicher. {\"verdict\":\"distinct\",\"confidence\":\"high\",\"rationale\":\"andere Wirkorte\"}"}, MachineClass: "x"}
|
||||||
|
if v, conf, r := j.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictDistinct || conf != "high" || r != "andere Wirkorte" {
|
||||||
|
t.Errorf("parse: got %s/%s/%q", v, conf, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unknown verdict value normalises to uncertain.
|
||||||
|
j2 := LLMJudge{Completer: fakeCompleter{out: `{"verdict":"maybe","confidence":"medium","rationale":"x"}`}}
|
||||||
|
if v, _, _ := j2.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
|
||||||
|
t.Errorf("unknown verdict must normalise to uncertain, got %s", v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transport error degrades gracefully, never panics.
|
||||||
|
j3 := LLMJudge{Completer: fakeCompleter{err: errors.New("connection refused")}}
|
||||||
|
if v, _, r := j3.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain || !strings.Contains(r, "LLM error") {
|
||||||
|
t.Errorf("error path: got %s / %q", v, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Garbage (no JSON) degrades to uncertain.
|
||||||
|
j4 := LLMJudge{Completer: fakeCompleter{out: "no json here"}}
|
||||||
|
if v, _, _ := j4.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
|
||||||
|
t.Errorf("garbage must degrade to uncertain, got %s", v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRenderProposalQueue_ShowsActions(t *testing.T) {
|
||||||
|
proposals := []JudgedProposal{
|
||||||
|
{
|
||||||
|
Candidate: DedupCandidate{KeepPattern: "HP807", DropPattern: "HP033", Category: "update_failure", Score: 0.32},
|
||||||
|
Screen: ScreenResult{RecallBefore: 1, RecallAfter: 1},
|
||||||
|
Verdict: VerdictDuplicate, Confidence: "medium", Rationale: "same update failure", Judge: "llm",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
out := RenderProposalQueue("Geschirrspuelmaschine", proposals)
|
||||||
|
for _, want := range []string{"HP807", "HP033", "update_failure", "supersession", "Propose-only"} {
|
||||||
|
if !strings.Contains(out, want) {
|
||||||
|
t.Errorf("queue missing %q\n%s", want, out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RenderProposalQueue turns judged dedup proposals into the human-review queue
|
||||||
|
// (markdown). Deterministic. Nothing here applies a change — every entry is a
|
||||||
|
// suggestion for a human to confirm, edit, commit, and pin with a GT case.
|
||||||
|
func RenderProposalQueue(machine string, proposals []JudgedProposal) string {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# Dedup proposal queue — %s\n\n", machine)
|
||||||
|
fmt.Fprintf(&b, "%d candidate(s) survived the deterministic GT wall. Propose-only — nothing is applied automatically.\n\n", len(proposals))
|
||||||
|
|
||||||
|
for i, p := range proposals {
|
||||||
|
c := p.Candidate
|
||||||
|
fmt.Fprintf(&b, "## %d. keep %s ⊃ drop %s [%s → %s (%s)]\n",
|
||||||
|
i+1, c.KeepPattern, c.DropPattern, p.Judge, p.Verdict, p.Confidence)
|
||||||
|
fmt.Fprintf(&b, "- category %s · score %.2f (measures %.0f%%, zone %.0f%%, scenario %.0f%%)\n",
|
||||||
|
c.Category, c.Score, c.MeasureJaccard*100, c.ZoneJaccard*100, c.ScenarioJaccard*100)
|
||||||
|
fmt.Fprintf(&b, "- GT recall %.1f%% → %.1f%% when %s is dropped (wall: %s)\n",
|
||||||
|
p.Screen.RecallBefore*100, p.Screen.RecallAfter*100, c.DropPattern, wallNote(p.Screen))
|
||||||
|
fmt.Fprintf(&b, "- keep: %s\n- drop: %s\n", c.KeepHazardName, c.DropName)
|
||||||
|
fmt.Fprintf(&b, "- judge rationale: %s\n", p.Rationale)
|
||||||
|
fmt.Fprintf(&b, "- suggested action: %s\n\n", suggestedAction(p))
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func wallNote(s ScreenResult) string {
|
||||||
|
if s.DistinctGT {
|
||||||
|
return fmt.Sprintf("distinct GT %s vs %s", s.KeepGT, s.DropGT)
|
||||||
|
}
|
||||||
|
return "recall-safe"
|
||||||
|
}
|
||||||
|
|
||||||
|
func suggestedAction(p JudgedProposal) string {
|
||||||
|
switch p.Verdict {
|
||||||
|
case VerdictDuplicate:
|
||||||
|
return fmt.Sprintf("add %s to a supersession set, then a human confirms + commits + pins a GT case", p.Candidate.DropPattern)
|
||||||
|
case VerdictDistinct:
|
||||||
|
return "keep both — judge considers them distinct hazards"
|
||||||
|
default:
|
||||||
|
return "needs human (or higher-confidence LLM) review — no automatic action"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "github.com/google/uuid"
|
||||||
|
|
||||||
|
// ScreenResult is the deterministic GT verdict for one proposed supersession.
|
||||||
|
type ScreenResult struct {
|
||||||
|
RecallBefore float64 `json:"recall_before"`
|
||||||
|
RecallAfter float64 `json:"recall_after"`
|
||||||
|
KeepGT string `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any)
|
||||||
|
DropGT string `json:"drop_gt,omitempty"` // GT entry the drop credits (if any)
|
||||||
|
DistinctGT bool `json:"distinct_gt"` // keep & drop credit DIFFERENT GT entries -> distinct hazards
|
||||||
|
Safe bool `json:"safe"` // recall preserved AND not distinct
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScreenSupersession is the WALL between "propose" and "decide". A proposal is
|
||||||
|
// safe only if BOTH deterministic checks pass:
|
||||||
|
//
|
||||||
|
// 1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed
|
||||||
|
// — otherwise the drop is load-bearing for GT coverage.
|
||||||
|
// 2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone
|
||||||
|
// is necessary but not sufficient: two genuinely distinct hazards that share
|
||||||
|
// the same measures (e.g. hot boiler surface vs hot ware on unloading) keep
|
||||||
|
// recall at 100% when one is dropped, yet must NOT be merged. If keep and
|
||||||
|
// drop each match a different GT entry, they are distinct.
|
||||||
|
//
|
||||||
|
// Whatever survives both is still only RECALL-SAFE — a candidate for a human (and
|
||||||
|
// in slice 2, an LLM) to confirm semantically. Deterministic; reuses
|
||||||
|
// CompareBenchmark; touches neither the library nor the runtime.
|
||||||
|
func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult {
|
||||||
|
before := CompareBenchmark(gt, hazards, mits)
|
||||||
|
|
||||||
|
gtOf := map[string]string{}
|
||||||
|
for _, p := range before.MatchedPairs {
|
||||||
|
gtOf[p.EngineHazard.Name] = p.GTEntry.Nr
|
||||||
|
}
|
||||||
|
keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName]
|
||||||
|
distinct := keepGT != "" && dropGT != "" && keepGT != dropGT
|
||||||
|
|
||||||
|
kept := make([]Hazard, 0, len(hazards))
|
||||||
|
dropped := map[uuid.UUID]bool{}
|
||||||
|
for _, h := range hazards {
|
||||||
|
if h.Name == dropHazardName {
|
||||||
|
dropped[h.ID] = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
kept = append(kept, h)
|
||||||
|
}
|
||||||
|
keptMits := make([]Mitigation, 0, len(mits))
|
||||||
|
for _, m := range mits {
|
||||||
|
if !dropped[m.HazardID] {
|
||||||
|
keptMits = append(keptMits, m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
after := CompareBenchmark(gt, kept, keptMits)
|
||||||
|
|
||||||
|
return ScreenResult{
|
||||||
|
RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore,
|
||||||
|
KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct,
|
||||||
|
Safe: after.CoverageScore >= before.CoverageScore && !distinct,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -160,6 +160,7 @@ func (s *Store) ListHazards(ctx context.Context, projectID uuid.UUID) ([]Hazard,
|
|||||||
hazards = append(hazards, h)
|
hazards = append(hazards, h)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SortHazardsByISO12100(hazards)
|
||||||
return hazards, nil
|
return hazards, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -110,9 +110,10 @@ type domainDef struct {
|
|||||||
// Deterministic order (slice, not map) — important for stable classification + tests.
|
// Deterministic order (slice, not map) — important for stable classification + tests.
|
||||||
var domains = []domainDef{
|
var domains = []domainDef{
|
||||||
{"data_protection",
|
{"data_protection",
|
||||||
[]string{"DSGVO", "GDPR", "BDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
|
[]string{"DSGVO", "GDPR", "BDSG", "TDDDG", "TTDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
|
||||||
[]string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb",
|
[]string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb",
|
||||||
"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeit"}},
|
"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeit",
|
||||||
|
"cookie", "endeinrichtung", "endgerät", "endgeraet", "tracking"}},
|
||||||
{"cyber",
|
{"cyber",
|
||||||
[]string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"},
|
[]string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"},
|
||||||
[]string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom",
|
[]string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom",
|
||||||
@@ -200,6 +201,11 @@ var topics = []topicDef{
|
|||||||
{[]string{"bussgeld", "geldbusse"}, []string{"Art. 83"}},
|
{[]string{"bussgeld", "geldbusse"}, []string{"Art. 83"}},
|
||||||
{[]string{"security update", "sicherheitsupdate", "schwachstelle", "sbom", "cybersicherheitsanforderung"}, []string{"CRA Anhang I"}},
|
{[]string{"security update", "sicherheitsupdate", "schwachstelle", "sbom", "cybersicherheitsanforderung"}, []string{"CRA Anhang I"}},
|
||||||
{[]string{"meldepflicht", "sicherheitsvorfall"}, []string{"Art. 14 CRA"}},
|
{[]string{"meldepflicht", "sicherheitsvorfall"}, []string{"Art. 14 CRA"}},
|
||||||
|
// ePrivacy / cookies: § 25 TDDDG (ex-TTDSG) is lex specialis for terminal-equipment access /
|
||||||
|
// cookie consent. Co-primary on a cookie/tracking query, so the subsidiarity rule does NOT
|
||||||
|
// demote it like general-DP DE law subsidiary to the DSGVO. Keywords are cookie-specific
|
||||||
|
// (NOT bare "Einwilligung") so a general consent question still resolves to Art. 7 DSGVO.
|
||||||
|
{[]string{"cookie", "endeinrichtung", "endgerät", "endgeraet", "tracking", "speicherung von informationen", "zugriff auf informationen"}, []string{"§ 25 TDDDG"}},
|
||||||
}
|
}
|
||||||
|
|
||||||
// resultMatchesTopic reports whether the result is a preferred norm of a topic the query hits.
|
// resultMatchesTopic reports whether the result is a preferred norm of a topic the query hits.
|
||||||
|
|||||||
@@ -123,6 +123,28 @@ func TestRerankByAuthority_Acceptance(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("ePrivacy: a cookie query lifts §25 TDDDG above DSGVO consent (lex specialis topic)", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("Art. 7 DSGVO", "DSGVO", "EU", 0.70), // higher semantic
|
||||||
|
bindingRes("§ 25 TDDDG", "TDDDG", "DE", 0.66),
|
||||||
|
}
|
||||||
|
out := rerankByAuthority("Wann ist eine Einwilligung fuer das Speichern von Cookies auf Endgeraeten erforderlich?", in)
|
||||||
|
if out[0].RegulationShort != "TDDDG" {
|
||||||
|
t.Fatalf("§25 TDDDG must win a cookie question (lex specialis topic), got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("a general consent question still resolves to DSGVO, not §25 TDDDG", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("§ 25 TDDDG", "TDDDG", "DE", 0.70), // higher semantic but no cookie topic
|
||||||
|
bindingRes("Art. 7 DSGVO", "DSGVO", "EU", 0.66),
|
||||||
|
}
|
||||||
|
out := rerankByAuthority("Welche Anforderungen gelten an eine wirksame Einwilligung?", in)
|
||||||
|
if out[0].RegulationShort != "DSGVO" {
|
||||||
|
t.Fatalf("a general consent question must resolve to DSGVO (TDDDG demoted), got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
t.Run("co-primary dp_01: BDSG §38 stays top on a DSB question (national special rule)", func(t *testing.T) {
|
t.Run("co-primary dp_01: BDSG §38 stays top on a DSB question (national special rule)", func(t *testing.T) {
|
||||||
in := []LegalSearchResult{
|
in := []LegalSearchResult{
|
||||||
bindingRes("§ 38 BDSG", "BDSG", "DE", 0.66),
|
bindingRes("§ 38 BDSG", "BDSG", "DE", 0.66),
|
||||||
|
|||||||
@@ -77,6 +77,8 @@ _ROUTER_MODULES = [
|
|||||||
"licenses_routes",
|
"licenses_routes",
|
||||||
"template_rule_routes",
|
"template_rule_routes",
|
||||||
"specialist_agent_routes",
|
"specialist_agent_routes",
|
||||||
|
"reasoning_routes",
|
||||||
|
"onboarding_routes",
|
||||||
]
|
]
|
||||||
|
|
||||||
_loaded_count = 0
|
_loaded_count = 0
|
||||||
|
|||||||
@@ -0,0 +1,74 @@
|
|||||||
|
"""Onboarding Advisor endpoint — exposes the existing Smart Onboarding Advisor at runtime.
|
||||||
|
|
||||||
|
This adds NO new reasoning logic. It exposes the already-built, tested orchestration (Signal Producers
|
||||||
|
-> Normalizer -> Silent Knowledge Pass -> Advisor) through one runtime endpoint. No DB, no persistence.
|
||||||
|
|
||||||
|
POST /onboarding/advisor-start — (company + certs + target + scanner findings) -> advisory payload
|
||||||
|
GET /onboarding/targets — the supported target ids
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.onboarding import (
|
||||||
|
AdvisorMeasure,
|
||||||
|
AdvisorQuestion,
|
||||||
|
InferredAssumption,
|
||||||
|
ProducedSignal,
|
||||||
|
RejectedAssumption,
|
||||||
|
)
|
||||||
|
from compliance.services.onboarding_service import run_advisor, supported_targets
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
router = APIRouter(prefix="/onboarding", tags=["onboarding"])
|
||||||
|
|
||||||
|
|
||||||
|
class OnboardingAdvisorRequest(BaseModel):
|
||||||
|
company: str = ""
|
||||||
|
industry: Optional[str] = None
|
||||||
|
products: List[str] = Field(default_factory=list)
|
||||||
|
markets: List[str] = Field(default_factory=list)
|
||||||
|
certifications: List[str] = Field(default_factory=list)
|
||||||
|
known_evidence: List[str] = Field(default_factory=list)
|
||||||
|
target: str = "CRA"
|
||||||
|
scanner_findings: List[ProducedSignal] = Field(default_factory=list) # adapters upstream produced these
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisorResponse(BaseModel):
|
||||||
|
silent_intake_summary: str = ""
|
||||||
|
headline: str = ""
|
||||||
|
auto_detected: List[str] = Field(default_factory=list)
|
||||||
|
indications: List[str] = Field(default_factory=list) # partial signal: raises strength, still asked
|
||||||
|
inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
|
||||||
|
rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
|
||||||
|
top_5_questions: List[AdvisorQuestion] = Field(default_factory=list)
|
||||||
|
capability_delta: List[str] = Field(default_factory=list)
|
||||||
|
top_measures: List[AdvisorMeasure] = Field(default_factory=list)
|
||||||
|
evidence_requests: List[str] = Field(default_factory=list)
|
||||||
|
unsupported_domains: List[str] = Field(default_factory=list)
|
||||||
|
completeness_summary: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/targets")
|
||||||
|
def list_targets() -> dict:
|
||||||
|
return {"targets": supported_targets()}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/advisor-start", response_model=AdvisorResponse)
|
||||||
|
def advisor_start_endpoint(req: OnboardingAdvisorRequest) -> AdvisorResponse:
|
||||||
|
if req.target not in supported_targets():
|
||||||
|
raise HTTPException(status_code=404, detail="unsupported target '%s'; supported: %s" % (req.target, supported_targets()))
|
||||||
|
result, si_summary = run_advisor(
|
||||||
|
company=req.company, certifications=req.certifications, target=req.target,
|
||||||
|
signals=req.scanner_findings, known_evidence=req.known_evidence,
|
||||||
|
products=req.products, markets=req.markets, industry=req.industry or "")
|
||||||
|
return AdvisorResponse(
|
||||||
|
silent_intake_summary=si_summary, headline=result.headline, auto_detected=result.auto_detected,
|
||||||
|
indications=result.indications,
|
||||||
|
inferred_assumptions=result.inferred_assumptions, rejected_assumptions=result.rejected_assumptions,
|
||||||
|
top_5_questions=result.next_best_questions, capability_delta=result.capability_delta,
|
||||||
|
top_measures=result.top_measures, evidence_requests=result.evidence_requests,
|
||||||
|
unsupported_domains=result.unsupported_domains, completeness_summary=result.completeness_summary)
|
||||||
@@ -0,0 +1,98 @@
|
|||||||
|
"""HTTP endpoints for the Regulatory Reasoning Engine (spec §7).
|
||||||
|
|
||||||
|
Thin handlers — all reasoning lives in `compliance.reasoning.*`. No DB, no RAG;
|
||||||
|
pure deterministic rule evaluation.
|
||||||
|
|
||||||
|
POST /reasoning/scope -> which regulations apply + missing facts
|
||||||
|
POST /reasoning/obligations -> obligations, overlaps, multi-evidence
|
||||||
|
POST /reasoning/implementation-reasoning -> claim->obligation mapping (Welt 1, no verdict)
|
||||||
|
POST /reasoning/interpretation-assessment -> verdict on a customer interpretation
|
||||||
|
POST /reasoning/product-scope -> gate on facts, else run discover_scope once
|
||||||
|
POST /reasoning/regulatory-map -> customer-readable read-model over the scope
|
||||||
|
POST /reasoning/interpretation-in-map -> judge a customer interpretation within the map
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from compliance.interpretation_map import (
|
||||||
|
InterpretationInMapRequest,
|
||||||
|
InterpretationInMapResult,
|
||||||
|
interpret_in_map,
|
||||||
|
)
|
||||||
|
from compliance.product_scope import (
|
||||||
|
ProductScopeRequest,
|
||||||
|
ProductScopeResponse,
|
||||||
|
resolve_product_scope,
|
||||||
|
)
|
||||||
|
from compliance.regulatory_map import RegulatoryMap, RegulatoryMapRequest, render_regulatory_map
|
||||||
|
from compliance.reasoning import (
|
||||||
|
assess_interpretation,
|
||||||
|
derive_obligations,
|
||||||
|
discover_scope,
|
||||||
|
reason_implementation_claim,
|
||||||
|
)
|
||||||
|
from compliance.reasoning.schemas import (
|
||||||
|
ImplementationReasoningRequest,
|
||||||
|
ImplementationReasoningResponse,
|
||||||
|
InterpretationRequest,
|
||||||
|
InterpretationResponse,
|
||||||
|
ObligationsRequest,
|
||||||
|
ObligationsResponse,
|
||||||
|
ScopeRequest,
|
||||||
|
ScopeResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/reasoning", tags=["reasoning"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/scope", response_model=ScopeResponse)
|
||||||
|
def scope_discovery(req: ScopeRequest) -> ScopeResponse:
|
||||||
|
scope = discover_scope(req.product_profile)
|
||||||
|
return ScopeResponse(
|
||||||
|
regulatory_scope=scope,
|
||||||
|
missing_facts=scope.missing_facts,
|
||||||
|
confidence=scope.confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/obligations", response_model=ObligationsResponse)
|
||||||
|
def applicable_obligations(req: ObligationsRequest) -> ObligationsResponse:
|
||||||
|
return derive_obligations(req.product_profile, req.regulatory_scope)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/implementation-reasoning", response_model=ImplementationReasoningResponse)
|
||||||
|
def implementation_reasoning(req: ImplementationReasoningRequest) -> ImplementationReasoningResponse:
|
||||||
|
return reason_implementation_claim(req.product_profile, req.customer_claim)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/product-scope", response_model=ProductScopeResponse)
|
||||||
|
def product_scope(req: ProductScopeRequest) -> ProductScopeResponse:
|
||||||
|
return resolve_product_scope(req.product_profile)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/regulatory-map", response_model=RegulatoryMap)
|
||||||
|
def regulatory_map(req: RegulatoryMapRequest) -> RegulatoryMap:
|
||||||
|
return render_regulatory_map(req.product_profile)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/interpretation-in-map", response_model=InterpretationInMapResult)
|
||||||
|
def interpretation_in_map(req: InterpretationInMapRequest) -> InterpretationInMapResult:
|
||||||
|
reg_map = render_regulatory_map(req.product_profile)
|
||||||
|
return interpret_in_map(reg_map, req.customer_interpretation)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/interpretation-assessment", response_model=InterpretationResponse)
|
||||||
|
def interpretation_assessment(req: InterpretationRequest) -> InterpretationResponse:
|
||||||
|
result = assess_interpretation(req.customer_interpretation, req.product_profile)
|
||||||
|
return InterpretationResponse(
|
||||||
|
assessment=result.assessment,
|
||||||
|
affected_regulations=result.affected_regulations,
|
||||||
|
affected_obligations=result.affected_obligations,
|
||||||
|
corrected_interpretation=result.corrected_interpretation,
|
||||||
|
risks=result.risks,
|
||||||
|
legal_basis_refs=result.legal_basis_refs,
|
||||||
|
explanation=result.explanation,
|
||||||
|
confidence=result.confidence,
|
||||||
|
)
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
"""Master Capability Registry v0 (Phase 2C) — Compliance Execution domain.
|
||||||
|
|
||||||
|
Registry + minting layer for Master Capabilities — the third instance of the
|
||||||
|
identity-machine pattern (Master Controls, Master Obligations, Master Capabilities).
|
||||||
|
|
||||||
|
STORED: identities, sources, relationship types, policy versions, lifecycle events,
|
||||||
|
provenance. DERIVED (never stored): confidence, coverage, gap.
|
||||||
|
|
||||||
|
v0 scope: types + minting + typed relations + versioned policy + identity lifecycle.
|
||||||
|
NOT here: Company-Gap, real ISO/cert mappings, certification derivations, UI, RAG,
|
||||||
|
new meta-model class, generic canonicalization engine.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import (
|
||||||
|
CapabilityRegistry,
|
||||||
|
deprecate_capability,
|
||||||
|
evaluate_relation,
|
||||||
|
merge_capabilities,
|
||||||
|
mint_capability,
|
||||||
|
resolve,
|
||||||
|
split_capability,
|
||||||
|
)
|
||||||
|
from .policy import DEFAULT_POLICY, assert_no_certification_confirms
|
||||||
|
from .schemas import (
|
||||||
|
AssertionStatus,
|
||||||
|
CapabilityCandidate,
|
||||||
|
CapabilityRelation,
|
||||||
|
Confidence,
|
||||||
|
DerivedAssessment,
|
||||||
|
EvidenceKind,
|
||||||
|
IdentityLifecycleEvent,
|
||||||
|
LifecycleEventType,
|
||||||
|
LifecycleState,
|
||||||
|
MasterCapability,
|
||||||
|
PolicyRule,
|
||||||
|
PolicyVersion,
|
||||||
|
Provenance,
|
||||||
|
RelationType,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# engine
|
||||||
|
"CapabilityRegistry",
|
||||||
|
"mint_capability",
|
||||||
|
"evaluate_relation",
|
||||||
|
"resolve",
|
||||||
|
"deprecate_capability",
|
||||||
|
"merge_capabilities",
|
||||||
|
"split_capability",
|
||||||
|
# policy
|
||||||
|
"DEFAULT_POLICY",
|
||||||
|
"assert_no_certification_confirms",
|
||||||
|
# schemas
|
||||||
|
"MasterCapability",
|
||||||
|
"CapabilityCandidate",
|
||||||
|
"CapabilityRelation",
|
||||||
|
"RelationType",
|
||||||
|
"EvidenceKind",
|
||||||
|
"AssertionStatus",
|
||||||
|
"Confidence",
|
||||||
|
"PolicyRule",
|
||||||
|
"PolicyVersion",
|
||||||
|
"IdentityLifecycleEvent",
|
||||||
|
"LifecycleEventType",
|
||||||
|
"LifecycleState",
|
||||||
|
"Provenance",
|
||||||
|
"DerivedAssessment",
|
||||||
|
]
|
||||||
@@ -0,0 +1,191 @@
|
|||||||
|
"""Master Capability Registry v0 — minting, derivation, identity lifecycle.
|
||||||
|
|
||||||
|
STORED on the registry: identities, sources, relation types, policy versions,
|
||||||
|
lifecycle events, provenance. DERIVED (never stored): confidence/status, via
|
||||||
|
`evaluate_relation` under a versioned policy.
|
||||||
|
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Set
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from .policy import DEFAULT_POLICY
|
||||||
|
from .schemas import (
|
||||||
|
AssertionStatus,
|
||||||
|
CapabilityCandidate,
|
||||||
|
CapabilityRelation,
|
||||||
|
Confidence,
|
||||||
|
DerivedAssessment,
|
||||||
|
IdentityLifecycleEvent,
|
||||||
|
LifecycleEventType,
|
||||||
|
LifecycleState,
|
||||||
|
MasterCapability,
|
||||||
|
PolicyVersion,
|
||||||
|
Provenance,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityRegistry(BaseModel):
|
||||||
|
# NOTE: no confidence/coverage field anywhere — those are DERIVED, never stored.
|
||||||
|
capabilities: Dict[str, MasterCapability] = Field(default_factory=dict)
|
||||||
|
relations: List[CapabilityRelation] = Field(default_factory=list)
|
||||||
|
lifecycle_events: List[IdentityLifecycleEvent] = Field(default_factory=list)
|
||||||
|
policy: PolicyVersion = Field(default_factory=lambda: DEFAULT_POLICY)
|
||||||
|
next_serial: int = 1
|
||||||
|
|
||||||
|
|
||||||
|
def _mcap_id(serial: int) -> str:
|
||||||
|
return "MCAP-%05d" % serial
|
||||||
|
|
||||||
|
|
||||||
|
def _next_event_id(registry: "CapabilityRegistry") -> str:
|
||||||
|
return "evt-%d" % (len(registry.lifecycle_events) + 1)
|
||||||
|
|
||||||
|
|
||||||
|
def mint_capability(
|
||||||
|
registry: CapabilityRegistry,
|
||||||
|
candidate: CapabilityCandidate,
|
||||||
|
provenance: Optional[Provenance] = None,
|
||||||
|
name: str = "",
|
||||||
|
definition: str = "",
|
||||||
|
category: str = "",
|
||||||
|
domains: Optional[List[str]] = None,
|
||||||
|
) -> MasterCapability:
|
||||||
|
"""Assign the next stable MCAP id to a candidate and register it (with provenance)."""
|
||||||
|
cap_id = _mcap_id(registry.next_serial)
|
||||||
|
cap = MasterCapability(
|
||||||
|
capability_id=cap_id,
|
||||||
|
name=name or candidate.normalized or candidate.raw_term,
|
||||||
|
definition=definition,
|
||||||
|
category=category,
|
||||||
|
domains=domains or [],
|
||||||
|
provenance=provenance
|
||||||
|
or Provenance(author="system", basis="minted from candidate '%s'" % candidate.raw_term),
|
||||||
|
)
|
||||||
|
registry.capabilities[cap_id] = cap
|
||||||
|
registry.next_serial += 1
|
||||||
|
return cap
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_relation(
|
||||||
|
relation: CapabilityRelation, policy: Optional[PolicyVersion] = None
|
||||||
|
) -> DerivedAssessment:
|
||||||
|
"""Derive (status, confidence) from (relationship_type, evidence_kind) under a
|
||||||
|
versioned policy. Deterministic; result is returned, never stored."""
|
||||||
|
pol = policy if policy is not None else DEFAULT_POLICY
|
||||||
|
status = AssertionStatus.UNKNOWN
|
||||||
|
confidence = Confidence.LOW
|
||||||
|
found = False
|
||||||
|
for rule in pol.rules:
|
||||||
|
if (
|
||||||
|
rule.relationship_type == relation.relationship_type
|
||||||
|
and rule.evidence_kind == relation.evidence_kind
|
||||||
|
):
|
||||||
|
status, confidence, found = rule.status, rule.confidence, True
|
||||||
|
break
|
||||||
|
expl = "%s + %s under %s -> %s/%s%s" % (
|
||||||
|
relation.relationship_type.value,
|
||||||
|
relation.evidence_kind.value,
|
||||||
|
pol.policy_version,
|
||||||
|
status.value,
|
||||||
|
confidence.value,
|
||||||
|
"" if found else " (no rule)",
|
||||||
|
)
|
||||||
|
return DerivedAssessment(
|
||||||
|
target_capability_id=relation.target_capability_id,
|
||||||
|
status=status,
|
||||||
|
confidence=confidence,
|
||||||
|
policy_version=pol.policy_version,
|
||||||
|
explanation=expl,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve(
|
||||||
|
registry: CapabilityRegistry, capability_id: str, _seen: Optional[Set[str]] = None
|
||||||
|
) -> Optional[MasterCapability]:
|
||||||
|
"""Follow redirects (from merge/deprecate) to the current canonical capability."""
|
||||||
|
seen = _seen if _seen is not None else set()
|
||||||
|
if capability_id in seen:
|
||||||
|
return None # redirect cycle guard
|
||||||
|
seen.add(capability_id)
|
||||||
|
cap = registry.capabilities.get(capability_id)
|
||||||
|
if cap is None:
|
||||||
|
return None
|
||||||
|
if cap.redirect_to:
|
||||||
|
return resolve(registry, cap.redirect_to, seen)
|
||||||
|
# terminal: only an ACTIVE capability resolves; a deprecated dead-end -> None
|
||||||
|
return cap if cap.state == LifecycleState.ACTIVE else None
|
||||||
|
|
||||||
|
|
||||||
|
def deprecate_capability(
|
||||||
|
registry: CapabilityRegistry,
|
||||||
|
capability_id: str,
|
||||||
|
redirect_to: Optional[str] = None,
|
||||||
|
provenance: Optional[Provenance] = None,
|
||||||
|
) -> IdentityLifecycleEvent:
|
||||||
|
cap = registry.capabilities.get(capability_id)
|
||||||
|
if cap is None:
|
||||||
|
raise KeyError(capability_id)
|
||||||
|
cap.state = LifecycleState.DEPRECATED
|
||||||
|
cap.redirect_to = redirect_to
|
||||||
|
event = IdentityLifecycleEvent(
|
||||||
|
event_id=_next_event_id(registry),
|
||||||
|
event_type=LifecycleEventType.REDIRECT if redirect_to else LifecycleEventType.DEPRECATE,
|
||||||
|
from_ids=[capability_id],
|
||||||
|
to_ids=[redirect_to] if redirect_to else [],
|
||||||
|
provenance=provenance or Provenance(author="system", basis="deprecate %s" % capability_id),
|
||||||
|
)
|
||||||
|
registry.lifecycle_events.append(event)
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
|
def merge_capabilities(
|
||||||
|
registry: CapabilityRegistry,
|
||||||
|
from_id: str,
|
||||||
|
into_id: str,
|
||||||
|
provenance: Optional[Provenance] = None,
|
||||||
|
) -> IdentityLifecycleEvent:
|
||||||
|
"""Merge `from_id` into `into_id`: deprecate `from_id` with a redirect to `into_id`."""
|
||||||
|
if from_id not in registry.capabilities or into_id not in registry.capabilities:
|
||||||
|
raise KeyError("%s or %s" % (from_id, into_id))
|
||||||
|
frm = registry.capabilities[from_id]
|
||||||
|
frm.state = LifecycleState.DEPRECATED
|
||||||
|
frm.redirect_to = into_id
|
||||||
|
event = IdentityLifecycleEvent(
|
||||||
|
event_id=_next_event_id(registry),
|
||||||
|
event_type=LifecycleEventType.MERGE,
|
||||||
|
from_ids=[from_id],
|
||||||
|
to_ids=[into_id],
|
||||||
|
provenance=provenance or Provenance(author="system", basis="merge %s -> %s" % (from_id, into_id)),
|
||||||
|
)
|
||||||
|
registry.lifecycle_events.append(event)
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
|
def split_capability(
|
||||||
|
registry: CapabilityRegistry,
|
||||||
|
from_id: str,
|
||||||
|
into_ids: List[str],
|
||||||
|
primary: Optional[str] = None,
|
||||||
|
provenance: Optional[Provenance] = None,
|
||||||
|
) -> IdentityLifecycleEvent:
|
||||||
|
"""Split `from_id` into several capabilities. The old id deprecates; it redirects
|
||||||
|
to `primary` only if one is given (else it resolves to None — split is ambiguous)."""
|
||||||
|
if from_id not in registry.capabilities:
|
||||||
|
raise KeyError(from_id)
|
||||||
|
frm = registry.capabilities[from_id]
|
||||||
|
frm.state = LifecycleState.DEPRECATED
|
||||||
|
frm.redirect_to = primary
|
||||||
|
event = IdentityLifecycleEvent(
|
||||||
|
event_id=_next_event_id(registry),
|
||||||
|
event_type=LifecycleEventType.SPLIT,
|
||||||
|
from_ids=[from_id],
|
||||||
|
to_ids=list(into_ids),
|
||||||
|
provenance=provenance or Provenance(author="system", basis="split %s" % from_id),
|
||||||
|
)
|
||||||
|
registry.lifecycle_events.append(event)
|
||||||
|
return event
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
"""Derivation policy v0 for the Master Capability Registry.
|
||||||
|
|
||||||
|
Confidence + status are DERIVED from (relationship_type, evidence_kind) under a
|
||||||
|
versioned policy — never stored. HARD RULE baked in and structurally guarded: a
|
||||||
|
CERTIFICATION is a claim, never proof — no certification-backed rule may yield
|
||||||
|
CONFIRMED. CONFIRMED requires a CONFIRMS relation backed by a concrete ARTIFACT
|
||||||
|
(or an EXPERT assertion).
|
||||||
|
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
AssertionStatus,
|
||||||
|
Confidence,
|
||||||
|
EvidenceKind,
|
||||||
|
PolicyRule,
|
||||||
|
PolicyVersion,
|
||||||
|
RelationType,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _rule(rt: RelationType, ek: EvidenceKind, st: AssertionStatus, cf: Confidence) -> PolicyRule:
|
||||||
|
return PolicyRule(relationship_type=rt, evidence_kind=ek, status=st, confidence=cf)
|
||||||
|
|
||||||
|
|
||||||
|
# (relationship_type, evidence_kind) -> (status, confidence)
|
||||||
|
_V0_RULES = [
|
||||||
|
# concrete artifact / expert confirming the capability -> CONFIRMED
|
||||||
|
_rule(RelationType.CONFIRMS, EvidenceKind.ARTIFACT, AssertionStatus.CONFIRMED, Confidence.HIGH),
|
||||||
|
_rule(RelationType.CONFIRMS, EvidenceKind.EXPERT, AssertionStatus.CONFIRMED, Confidence.MEDIUM),
|
||||||
|
# equivalent capability — certificate or artifact behind it -> INFERRED (never confirmed)
|
||||||
|
_rule(RelationType.EQUIVALENT, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.HIGH),
|
||||||
|
_rule(RelationType.EQUIVALENT, EvidenceKind.ARTIFACT, AssertionStatus.INFERRED, Confidence.HIGH),
|
||||||
|
# supports — weaker
|
||||||
|
_rule(RelationType.SUPPORTS, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
|
||||||
|
_rule(RelationType.SUPPORTS, EvidenceKind.ARTIFACT, AssertionStatus.INFERRED, Confidence.MEDIUM),
|
||||||
|
# requires = an obligation NEEDS the capability (relevance, not possession)
|
||||||
|
_rule(RelationType.REQUIRES, EvidenceKind.NONE, AssertionStatus.UNKNOWN, Confidence.LOW),
|
||||||
|
# broader/narrower certificate -> weak inference
|
||||||
|
_rule(RelationType.BROADER_THAN, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
|
||||||
|
_rule(RelationType.NARROWER_THAN, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
|
||||||
|
_rule(RelationType.RELATED_TO, EvidenceKind.CERTIFICATION, AssertionStatus.UNKNOWN, Confidence.LOW),
|
||||||
|
]
|
||||||
|
|
||||||
|
DEFAULT_POLICY = PolicyVersion(
|
||||||
|
policy_version="capability-policy-v0",
|
||||||
|
description="v0: certification never yields CONFIRMED; only CONFIRMS + ARTIFACT/EXPERT does.",
|
||||||
|
rules=_V0_RULES,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def assert_no_certification_confirms(policy: PolicyVersion) -> None:
|
||||||
|
"""Structural guard for the hard rule: no CERTIFICATION-backed rule is CONFIRMED."""
|
||||||
|
for r in policy.rules:
|
||||||
|
if r.evidence_kind == EvidenceKind.CERTIFICATION and r.status == AssertionStatus.CONFIRMED:
|
||||||
|
raise ValueError(
|
||||||
|
"policy %s violates hard rule: certification -> confirmed (%s)"
|
||||||
|
% (policy.policy_version, r.relationship_type.value)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# fail fast at import: the shipped default must satisfy the hard rule
|
||||||
|
assert_no_certification_confirms(DEFAULT_POLICY)
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
"""Master Capability Registry v0 — Compliance Execution domain (Phase 2C).
|
||||||
|
|
||||||
|
Built from the Reasoning session per user directive, but this IS the Compliance
|
||||||
|
Execution model (Execution owns Capability). Third real instance of the
|
||||||
|
identity-machine pattern (after Master Controls and Master Obligations):
|
||||||
|
|
||||||
|
Candidate -> Normalization -> Dedup -> Stable Identity (MCAP) -> Typed Relations
|
||||||
|
|
||||||
|
KEY SENTENCE (stored vs derived):
|
||||||
|
STORED : identities, sources, relationship types, policy versions, lifecycle
|
||||||
|
events, provenance.
|
||||||
|
DERIVED : confidence, coverage and gap statements — computed on demand, NEVER
|
||||||
|
stored (see policy.py / engine.evaluate_relation).
|
||||||
|
|
||||||
|
These are APPLICATION/registry types, NOT compliance-meta-model classes. In
|
||||||
|
particular `CapabilityRelation` is relation METADATA inside the registry — it does
|
||||||
|
NOT introduce a new meta-model class. Whether a reified relation must enter the
|
||||||
|
frozen meta-model is a Meta-Model-Owner decision (architecture freeze v1.0),
|
||||||
|
deferred until a demonstrable failure case exists.
|
||||||
|
|
||||||
|
Self-contained (no Reasoning import — Reasoning consumes Capability, not the other
|
||||||
|
way round). Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class Confidence(str, Enum):
|
||||||
|
HIGH = "high"
|
||||||
|
MEDIUM = "medium"
|
||||||
|
LOW = "low"
|
||||||
|
|
||||||
|
|
||||||
|
class AssertionStatus(str, Enum):
|
||||||
|
"""How well-established a capability claim is. A numeric score is presentation;
|
||||||
|
THIS type is the truth (derived from relationship type + evidence + policy)."""
|
||||||
|
|
||||||
|
DECLARED = "declared"
|
||||||
|
INFERRED = "inferred"
|
||||||
|
CONFIRMED = "confirmed"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class RelationType(str, Enum):
|
||||||
|
EQUIVALENT = "equivalent"
|
||||||
|
SUPPORTS = "supports"
|
||||||
|
REQUIRES = "requires"
|
||||||
|
CONFIRMS = "confirms"
|
||||||
|
BROADER_THAN = "broader_than"
|
||||||
|
NARROWER_THAN = "narrower_than"
|
||||||
|
RELATED_TO = "related_to"
|
||||||
|
|
||||||
|
|
||||||
|
class EvidenceKind(str, Enum):
|
||||||
|
CERTIFICATION = "certification" # a held certificate — a CLAIM, never proof
|
||||||
|
ARTIFACT = "artifact" # concrete doc/config/test/log
|
||||||
|
EXPERT = "expert" # human expert assertion
|
||||||
|
NONE = "none"
|
||||||
|
|
||||||
|
|
||||||
|
class LifecycleState(str, Enum):
|
||||||
|
ACTIVE = "active"
|
||||||
|
DEPRECATED = "deprecated"
|
||||||
|
|
||||||
|
|
||||||
|
class LifecycleEventType(str, Enum):
|
||||||
|
MERGE = "merge"
|
||||||
|
SPLIT = "split"
|
||||||
|
DEPRECATE = "deprecate"
|
||||||
|
REDIRECT = "redirect"
|
||||||
|
|
||||||
|
|
||||||
|
class Provenance(BaseModel):
|
||||||
|
"""Every CURATED atom carries its own provenance (who / when / on what basis)."""
|
||||||
|
|
||||||
|
author: str = ""
|
||||||
|
asserted_at: Optional[str] = None # ISO timestamp passed in; never generated here
|
||||||
|
basis: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored: identity ──────────────────────────────────────────────────────
|
||||||
|
class MasterCapability(BaseModel):
|
||||||
|
capability_id: str # stable MCAP-xxxxx
|
||||||
|
name: str = ""
|
||||||
|
definition: str = ""
|
||||||
|
category: str = ""
|
||||||
|
domains: List[str] = Field(default_factory=list)
|
||||||
|
typical_evidence: List[str] = Field(default_factory=list)
|
||||||
|
version: int = 1
|
||||||
|
state: LifecycleState = LifecycleState.ACTIVE
|
||||||
|
redirect_to: Optional[str] = None # set on merge/deprecate
|
||||||
|
provenance: Provenance = Field(default_factory=Provenance)
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityCandidate(BaseModel):
|
||||||
|
raw_term: str # e.g. "Patch Management"
|
||||||
|
source: str = "" # e.g. "CRA:Annex I (2)(d)"
|
||||||
|
normalized: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored: typed relation metadata (NOT a meta-model class) ──────────────
|
||||||
|
class CapabilityRelation(BaseModel):
|
||||||
|
relation_id: str
|
||||||
|
source: str # external term/obligation/certification id, e.g. "certification:ISO27001"
|
||||||
|
target_capability_id: str # MCAP-...
|
||||||
|
relationship_type: RelationType
|
||||||
|
evidence_kind: EvidenceKind = EvidenceKind.NONE
|
||||||
|
provenance: Provenance = Field(default_factory=Provenance)
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored: versioned derivation policy ───────────────────────────────────
|
||||||
|
class PolicyRule(BaseModel):
|
||||||
|
relationship_type: RelationType
|
||||||
|
evidence_kind: EvidenceKind
|
||||||
|
status: AssertionStatus
|
||||||
|
confidence: Confidence
|
||||||
|
|
||||||
|
|
||||||
|
class PolicyVersion(BaseModel):
|
||||||
|
"""A versioned derivation policy. `policy_version` is recorded with every
|
||||||
|
assessment so "why did you say X last year" is answerable with the policy
|
||||||
|
as-of-then. Without this, `derived` and `auditable/reproducible` contradict."""
|
||||||
|
|
||||||
|
policy_version: str
|
||||||
|
description: str = ""
|
||||||
|
rules: List[PolicyRule] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored: identity lifecycle ────────────────────────────────────────────
|
||||||
|
class IdentityLifecycleEvent(BaseModel):
|
||||||
|
event_id: str
|
||||||
|
event_type: LifecycleEventType
|
||||||
|
from_ids: List[str] = Field(default_factory=list)
|
||||||
|
to_ids: List[str] = Field(default_factory=list)
|
||||||
|
at: Optional[str] = None
|
||||||
|
provenance: Provenance = Field(default_factory=Provenance)
|
||||||
|
|
||||||
|
|
||||||
|
# ── DERIVED — never stored ────────────────────────────────────────────────
|
||||||
|
class DerivedAssessment(BaseModel):
|
||||||
|
target_capability_id: str
|
||||||
|
status: AssertionStatus
|
||||||
|
confidence: Confidence
|
||||||
|
policy_version: str
|
||||||
|
explanation: str = ""
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
"""Company Intelligence (Phase 2A) — Company Capability Profile foundation.
|
||||||
|
|
||||||
|
The HEAD of the spine Company -> Capability -> Product -> Regulation -> Obligation
|
||||||
|
-> Procedure -> Evidence. Builds a CompanyContext into a CompanyCapabilityProfile
|
||||||
|
with a four-state trust model (declared/inferred/confirmed/unknown). A certification
|
||||||
|
yields at most an INFERRED candidate — never "erfuellt".
|
||||||
|
|
||||||
|
Reasoning OWNS the container + trust-state; it CONSUMES the Certification->Capability
|
||||||
|
mapping (Execution-owned) via an injected contract — no mapping data in product code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .contract import CapabilityMappingEntry, CertificationCapabilityMap, EMPTY_MAPPING
|
||||||
|
from .engine import build_company_profile
|
||||||
|
from .schemas import (
|
||||||
|
CapabilityEvidence,
|
||||||
|
Certification,
|
||||||
|
CompanyCapabilityProfile,
|
||||||
|
CompanyContext,
|
||||||
|
Declaration,
|
||||||
|
ExistingEvidence,
|
||||||
|
ExistingProcess,
|
||||||
|
ExistingSystem,
|
||||||
|
OperationalCapability,
|
||||||
|
OperationalCapabilityCandidate,
|
||||||
|
VerificationStatus,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"build_company_profile",
|
||||||
|
"CompanyContext",
|
||||||
|
"CompanyCapabilityProfile",
|
||||||
|
"Certification",
|
||||||
|
"Declaration",
|
||||||
|
"ExistingProcess",
|
||||||
|
"ExistingSystem",
|
||||||
|
"ExistingEvidence",
|
||||||
|
"CapabilityEvidence",
|
||||||
|
"OperationalCapabilityCandidate",
|
||||||
|
"OperationalCapability",
|
||||||
|
"VerificationStatus",
|
||||||
|
"CapabilityMappingEntry",
|
||||||
|
"CertificationCapabilityMap",
|
||||||
|
"EMPTY_MAPPING",
|
||||||
|
]
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
"""Consumption contract for the Certification -> Capability mapping.
|
||||||
|
|
||||||
|
OWNERSHIP BOUNDARY (hard): the Capability Registry, CapabilityDefinition and the
|
||||||
|
Certification->Capability / Feature->Capability mapping RULES live in the Compliance
|
||||||
|
Execution domain. This Reasoning layer defines ONLY the shape it consumes and never
|
||||||
|
ships mapping DATA in product code — tests inject mocks, so the real table can only
|
||||||
|
ever live in Execution.
|
||||||
|
|
||||||
|
Execution will eventually provide CapabilityRegistry / CapabilityMapping /
|
||||||
|
CapabilityDefinition; Reasoning consumes exactly `OperationalCapabilityCandidate`
|
||||||
|
{capability_id, source, confidence, verification_status} (see schemas.py) and the
|
||||||
|
minimal mapping SHAPE below — nothing more.
|
||||||
|
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityMappingEntry(BaseModel):
|
||||||
|
"""One mapping rule SHAPE: a certification implies candidate capabilities.
|
||||||
|
|
||||||
|
Contract type only. The actual table (which capabilities ISO27001 implies) is
|
||||||
|
Execution's DATA and MUST NOT be hard-coded here or anywhere in product code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
capability_ids: List[str] = Field(default_factory=list)
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
|
||||||
|
|
||||||
|
# certification_id -> entry. Injected at call time; product code holds NO entries.
|
||||||
|
CertificationCapabilityMap = Dict[str, CapabilityMappingEntry]
|
||||||
|
|
||||||
|
# Intentionally empty: without an injected mapping there are zero inferred
|
||||||
|
# candidates. This is the architectural guarantee that the registry lives only in
|
||||||
|
# the Compliance Execution domain.
|
||||||
|
EMPTY_MAPPING: CertificationCapabilityMap = {}
|
||||||
@@ -0,0 +1,114 @@
|
|||||||
|
"""Company Intelligence engine (Phase 2A) — build the Company Capability Profile.
|
||||||
|
|
||||||
|
Deterministic, no LLM/RAG. Turns a raw CompanyContext into capability evidence,
|
||||||
|
candidates and (only via explicit verification) confirmed capabilities.
|
||||||
|
|
||||||
|
HARD RULE enforced here: a certification yields at most an INFERRED candidate; it
|
||||||
|
can NEVER produce a CONFIRMED capability on its own. Only real ExistingEvidence
|
||||||
|
(`proves_capability_id`) promotes a capability to CONFIRMED. Certifications without
|
||||||
|
a known mapping yield evidence-of-claim but NO inferred capability (the mapping is
|
||||||
|
Execution's data, injected — never hard-coded here).
|
||||||
|
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
|
||||||
|
from .contract import EMPTY_MAPPING, CertificationCapabilityMap
|
||||||
|
from .schemas import (
|
||||||
|
CapabilityEvidence,
|
||||||
|
CompanyCapabilityProfile,
|
||||||
|
CompanyContext,
|
||||||
|
OperationalCapability,
|
||||||
|
OperationalCapabilityCandidate,
|
||||||
|
VerificationStatus,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _declared(context: CompanyContext) -> List[OperationalCapabilityCandidate]:
|
||||||
|
out: List[OperationalCapabilityCandidate] = []
|
||||||
|
for d in context.declarations:
|
||||||
|
out.append(
|
||||||
|
OperationalCapabilityCandidate(
|
||||||
|
capability_id=d.capability_id,
|
||||||
|
source="declaration:%s" % context.company_id,
|
||||||
|
confidence=Confidence.MEDIUM,
|
||||||
|
verification_status=VerificationStatus.DECLARED,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _from_certifications(
|
||||||
|
context: CompanyContext, mapping: CertificationCapabilityMap
|
||||||
|
) -> Tuple[List[CapabilityEvidence], List[OperationalCapabilityCandidate]]:
|
||||||
|
# refinement 1: certification -> evidence-of-capability (claim) -> inferred candidate
|
||||||
|
evidence: List[CapabilityEvidence] = []
|
||||||
|
inferred: List[OperationalCapabilityCandidate] = []
|
||||||
|
for cert in context.certifications:
|
||||||
|
source = "certification:%s" % cert.certification_id
|
||||||
|
evidence.append(
|
||||||
|
CapabilityEvidence(
|
||||||
|
source=source,
|
||||||
|
claim="Company holds %s" % (cert.name or cert.certification_id),
|
||||||
|
certification_id=cert.certification_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
entry = mapping.get(cert.certification_id)
|
||||||
|
if entry is None:
|
||||||
|
continue # no mapping known -> NO inferred capability (data is Execution's)
|
||||||
|
for cap_id in entry.capability_ids:
|
||||||
|
inferred.append(
|
||||||
|
OperationalCapabilityCandidate(
|
||||||
|
capability_id=cap_id,
|
||||||
|
source=source,
|
||||||
|
confidence=entry.confidence,
|
||||||
|
verification_status=VerificationStatus.INFERRED,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return evidence, inferred
|
||||||
|
|
||||||
|
|
||||||
|
def _confirmed_from_evidence(context: CompanyContext) -> List[OperationalCapability]:
|
||||||
|
proven: Dict[str, List[str]] = {}
|
||||||
|
for ev in context.evidence:
|
||||||
|
cap = ev.proves_capability_id
|
||||||
|
if not cap:
|
||||||
|
continue
|
||||||
|
proven.setdefault(cap, []).append(ev.evidence_id)
|
||||||
|
return [
|
||||||
|
OperationalCapability(
|
||||||
|
capability_id=cap,
|
||||||
|
verification_status=VerificationStatus.CONFIRMED,
|
||||||
|
confidence=Confidence.HIGH,
|
||||||
|
sources=sources,
|
||||||
|
)
|
||||||
|
for cap, sources in proven.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def build_company_profile(
|
||||||
|
context: CompanyContext, mapping: Optional[CertificationCapabilityMap] = None
|
||||||
|
) -> CompanyCapabilityProfile:
|
||||||
|
"""Build the Company Capability Profile from raw context + an injected mapping.
|
||||||
|
|
||||||
|
`mapping` defaults to EMPTY (no inferred candidates) so that the cert->capability
|
||||||
|
table can only ever come from the Compliance Execution domain.
|
||||||
|
"""
|
||||||
|
mapping = EMPTY_MAPPING if mapping is None else mapping
|
||||||
|
evidence, inferred = _from_certifications(context, mapping)
|
||||||
|
declared = _declared(context)
|
||||||
|
confirmed = _confirmed_from_evidence(context)
|
||||||
|
confirmed_ids = {oc.capability_id for oc in confirmed}
|
||||||
|
# a confirmed capability is no longer a mere candidate
|
||||||
|
candidates = [c for c in (declared + inferred) if c.capability_id not in confirmed_ids]
|
||||||
|
return CompanyCapabilityProfile(
|
||||||
|
company_id=context.company_id,
|
||||||
|
capability_evidence=evidence,
|
||||||
|
candidate_capabilities=candidates,
|
||||||
|
confirmed_capabilities=confirmed,
|
||||||
|
)
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
"""Company Intelligence (Phase 2A) — Company Capability Profile (domain objects).
|
||||||
|
|
||||||
|
This is the HEAD of the spine
|
||||||
|
|
||||||
|
Company -> (Operational) Capability -> Product -> Applicable Regulation ->
|
||||||
|
Obligation -> Procedure -> Evidence
|
||||||
|
|
||||||
|
and answers a DIFFERENT question than Regulatory Intelligence: not "which laws
|
||||||
|
apply to my product" but "which capabilities does my company already have, and
|
||||||
|
which regulatory obligations might they already cover".
|
||||||
|
|
||||||
|
HARD RULE (structural, not convention): a capability derived from a certification
|
||||||
|
is at most INFERRED — never CONFIRMED, never "erfuellt". A certification produces
|
||||||
|
EVIDENCE for a capability, an inference produces a CANDIDATE, and only checked
|
||||||
|
evidence produces a CONFIRMED capability. This keeps the company side inside
|
||||||
|
Welt 1 (potential), mirroring `ClaimCoverage` on the obligation side; it is NOT a
|
||||||
|
conformity verdict (`ComplianceStatus`, Welt 2, owned by Compliance Execution).
|
||||||
|
|
||||||
|
OWNERSHIP: Reasoning OWNS this CompanyContext container + the trust-state machine.
|
||||||
|
It does NOT own the Certification->Capability mapping RULES — those are the same
|
||||||
|
kind of rule as Feature->Capability and belong to the Compliance Execution
|
||||||
|
Capability Registry. This layer only CONSUMES `OperationalCapabilityCandidate`
|
||||||
|
{capability_id, source, confidence, verification_status} via an injected mapping
|
||||||
|
(see contract.py). No mapping DATA lives in product code (tests inject mocks).
|
||||||
|
|
||||||
|
Application/reasoning types, NOT compliance-meta-model classes (architecture
|
||||||
|
freeze v1.0 untouched). Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
|
||||||
|
|
||||||
|
class VerificationStatus(str, Enum):
|
||||||
|
"""Trust state of an operational capability — a FOURTH vocabulary.
|
||||||
|
|
||||||
|
Disjoint from ClaimCoverage (Welt 1, customer claim vs obligation),
|
||||||
|
ComplianceStatus (Welt 2, verified conformity) and DeltaType (RCI). It says
|
||||||
|
only how well-established a company CAPABILITY is, never whether an obligation
|
||||||
|
is met. Progression: DECLARED (customer says) -> INFERRED (a certification
|
||||||
|
implies it) -> CONFIRMED (checked against real evidence); UNKNOWN = no signal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DECLARED = "declared"
|
||||||
|
INFERRED = "inferred"
|
||||||
|
CONFIRMED = "confirmed"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
# ── raw company inputs (the CompanyContext children) ──────────────────────
|
||||||
|
class Certification(BaseModel):
|
||||||
|
certification_id: str # e.g. "ISO27001"
|
||||||
|
name: str = ""
|
||||||
|
scope: str = "" # what the cert covers, customer-stated
|
||||||
|
|
||||||
|
|
||||||
|
class Declaration(BaseModel):
|
||||||
|
"""A customer statement that they have a capability ("we do patch management")."""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
statement: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ExistingProcess(BaseModel):
|
||||||
|
process_id: str
|
||||||
|
name: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ExistingSystem(BaseModel):
|
||||||
|
system_id: str
|
||||||
|
name: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ExistingEvidence(BaseModel):
|
||||||
|
"""A concrete artefact the company already holds (policy, audit log, SBOM ...).
|
||||||
|
|
||||||
|
`proves_capability_id` is the ONLY thing that may lift a capability to
|
||||||
|
CONFIRMED — and only when a human/engine has attached real evidence.
|
||||||
|
"""
|
||||||
|
|
||||||
|
evidence_id: str
|
||||||
|
evidence_type: str = "" # config_export/test_report/policy/audit_log/...
|
||||||
|
proves_capability_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ── intermediate: certification -> evidence-of-capability (refinement 1) ──
|
||||||
|
class CapabilityEvidence(BaseModel):
|
||||||
|
"""A certification does not yield a capability directly — only EVIDENCE for one.
|
||||||
|
|
||||||
|
"Company holds a certified ISMS" is the evidence/claim; capabilities are then
|
||||||
|
INFERRED from it via the injected (Execution-owned) mapping, never directly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
source: str # provenance, e.g. "certification:ISO27001"
|
||||||
|
claim: str = ""
|
||||||
|
certification_id: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── consumed contract type (refinement 2) ─────────────────────────────────
|
||||||
|
class OperationalCapabilityCandidate(BaseModel):
|
||||||
|
"""The ONLY thing Reasoning consumes from Execution's capability mapping.
|
||||||
|
|
||||||
|
Named "operational" (organisational ability) to stay distinct from later
|
||||||
|
Product/AI/Safety capabilities. A candidate is always Welt 1 — DECLARED or
|
||||||
|
INFERRED — and never CONFIRMED on its own.
|
||||||
|
"""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
source: str
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
verification_status: VerificationStatus = VerificationStatus.INFERRED
|
||||||
|
|
||||||
|
|
||||||
|
class OperationalCapability(BaseModel):
|
||||||
|
"""A capability the company actually has, CONFIRMED against real evidence."""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
verification_status: VerificationStatus
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
sources: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ── the container Reasoning OWNS (raw inputs) ─────────────────────────────
|
||||||
|
class CompanyContext(BaseModel):
|
||||||
|
company_id: str
|
||||||
|
certifications: List[Certification] = Field(default_factory=list)
|
||||||
|
declarations: List[Declaration] = Field(default_factory=list)
|
||||||
|
processes: List[ExistingProcess] = Field(default_factory=list)
|
||||||
|
systems: List[ExistingSystem] = Field(default_factory=list)
|
||||||
|
evidence: List[ExistingEvidence] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ── derived view (the Company Capability Profile) ─────────────────────────
|
||||||
|
class CompanyCapabilityProfile(BaseModel):
|
||||||
|
"""Derived: capability evidence + candidates (declared/inferred) + confirmed.
|
||||||
|
|
||||||
|
`candidate_capabilities` NEVER auto-promote to `confirmed_capabilities`; only
|
||||||
|
explicit ExistingEvidence does that. The hard rule is enforced in engine.py.
|
||||||
|
"""
|
||||||
|
|
||||||
|
company_id: str
|
||||||
|
capability_evidence: List[CapabilityEvidence] = Field(default_factory=list)
|
||||||
|
candidate_capabilities: List[OperationalCapabilityCandidate] = Field(default_factory=list)
|
||||||
|
confirmed_capabilities: List[OperationalCapability] = Field(default_factory=list)
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
"""Regulatory Completeness — auditable knowledge coverage, not confidence.
|
||||||
|
|
||||||
|
An internal quality machine: for an assessment it reports identified vs assessed regulations and
|
||||||
|
justifies every open or excluded domain (corpus gap -> future_corpus; applicability uncertain ->
|
||||||
|
query_required). The metric is counts, never a single percentage. The product never claims full
|
||||||
|
coverage — it makes its own knowledge state transparent and auditable. Deterministic, no LLM, no
|
||||||
|
new corpus/meta-model class (freeze v1.0).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import assess_completeness
|
||||||
|
from .schemas import (
|
||||||
|
Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"assess_completeness",
|
||||||
|
"CompletenessReport",
|
||||||
|
"CorpusStatus",
|
||||||
|
"DomainCoverage",
|
||||||
|
"Exclusion",
|
||||||
|
"Assumption",
|
||||||
|
]
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
"""Regulatory Completeness Engine — measure auditable knowledge coverage for an assessment.
|
||||||
|
|
||||||
|
Separates what we IDENTIFIED (triggered regulations) from what we ASSESSED (validated corpus AND
|
||||||
|
determined applicability), and justifies every gap. Two kinds of „open":
|
||||||
|
- corpus gap — no validated corpus yet (e.g. Environmental) -> future_corpus
|
||||||
|
- applicability open — corpus exists but applicability is uncertain (Data Act) -> query_required
|
||||||
|
The metric is COUNTS, never a single percentage. The audit statement says plainly „wir bewerteten M
|
||||||
|
von N Domänen; K sind nicht im validierten Korpus und wurden bewusst nicht bewertet".
|
||||||
|
|
||||||
|
Deterministic, computed-not-stored, no LLM, no new corpus/meta-model class (freeze v1.0). Python 3.9.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion,
|
||||||
|
)
|
||||||
|
|
||||||
|
_VALID = {s.value for s in CorpusStatus}
|
||||||
|
|
||||||
|
|
||||||
|
def _status(corpus_status: Dict[str, str], reg: str) -> CorpusStatus:
|
||||||
|
raw = corpus_status.get(reg, "unknown")
|
||||||
|
return CorpusStatus(raw) if raw in _VALID else CorpusStatus.UNKNOWN
|
||||||
|
|
||||||
|
|
||||||
|
def assess_completeness(
|
||||||
|
identified_regulations: List[str],
|
||||||
|
corpus_status: Dict[str, str],
|
||||||
|
uncertain: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
assumptions: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
assessed_obligations: int = 0,
|
||||||
|
) -> CompletenessReport:
|
||||||
|
"""Build the auditable coverage report.
|
||||||
|
|
||||||
|
`identified_regulations`: triggered/identified for this product. `corpus_status`: regulation ->
|
||||||
|
one of validated/draft/unsupported/unknown (curated/injected corpus registry). `uncertain`:
|
||||||
|
applicability-uncertain regulations [{regulation, deciding_question, reason}]. `assumptions`:
|
||||||
|
[{key, value, note}]. `assessed_obligations`: count from Execution (injected, default 0).
|
||||||
|
"""
|
||||||
|
ids = sorted(set(identified_regulations))
|
||||||
|
unc = uncertain or []
|
||||||
|
unc_subjects = {str(u.get("regulation") or u.get("subject")) for u in unc if (u.get("regulation") or u.get("subject"))}
|
||||||
|
|
||||||
|
coverage = [DomainCoverage(regulation=r, status=_status(corpus_status, r)) for r in ids]
|
||||||
|
assessed = [r for r in ids if _status(corpus_status, r) == CorpusStatus.VALIDATED and r not in unc_subjects]
|
||||||
|
open_regs = [r for r in ids if r not in assessed]
|
||||||
|
open_corpora = [r for r in ids if _status(corpus_status, r) in (CorpusStatus.UNSUPPORTED, CorpusStatus.UNKNOWN)]
|
||||||
|
|
||||||
|
exclusions: List[Exclusion] = []
|
||||||
|
for u in unc:
|
||||||
|
subj = str(u.get("regulation") or u.get("subject") or "")
|
||||||
|
if not subj:
|
||||||
|
continue
|
||||||
|
exclusions.append(Exclusion(
|
||||||
|
subject=subj, reason=str(u.get("reason", "Anwendbarkeit unsicher")),
|
||||||
|
deciding_question=str(u.get("deciding_question", "")), resolution="query_required"))
|
||||||
|
for r in open_regs:
|
||||||
|
if r in unc_subjects:
|
||||||
|
continue
|
||||||
|
st = _status(corpus_status, r)
|
||||||
|
if st == CorpusStatus.DRAFT:
|
||||||
|
exclusions.append(Exclusion(subject=r, reason="Korpus in Bearbeitung (draft)", resolution="in_review"))
|
||||||
|
else:
|
||||||
|
exclusions.append(Exclusion(subject=r, reason="nicht im validierten Korpus", resolution="future_corpus"))
|
||||||
|
|
||||||
|
covered_subjects = {e.subject for e in exclusions}
|
||||||
|
justification = (not open_regs) or set(open_regs) <= covered_subjects
|
||||||
|
assumptions_m = [Assumption(key=str(a.get("key", "")), value=str(a.get("value", "")), note=str(a.get("note", ""))) for a in (assumptions or [])]
|
||||||
|
|
||||||
|
summary = "Identifiziert %d · bewertet %d · offen %d · Unsicherheiten %d · Begründung %s" % (
|
||||||
|
len(ids), len(assessed), len(open_regs), len(unc), "ja" if justification else "nein")
|
||||||
|
if open_regs:
|
||||||
|
audit = (
|
||||||
|
"Für dieses Produkt konnten wir %d von %d identifizierten regulatorischen Domänen vollständig "
|
||||||
|
"bewerten. %d weitere %s noch nicht Bestandteil des validierten Korpus bzw. anwendungsunsicher "
|
||||||
|
"und wurden deshalb bewusst nicht bewertet." % (
|
||||||
|
len(assessed), len(ids), len(open_regs), "ist" if len(open_regs) == 1 else "sind"))
|
||||||
|
else:
|
||||||
|
audit = "Für dieses Produkt konnten wir alle %d identifizierten regulatorischen Domänen vollständig bewerten." % len(ids)
|
||||||
|
|
||||||
|
return CompletenessReport(
|
||||||
|
identified_regulations=ids, assessed_regulations=assessed, open_regulations=open_regs,
|
||||||
|
open_corpora=open_corpora, coverage=coverage, assumptions=assumptions_m, exclusions=exclusions,
|
||||||
|
uncertainties_count=len(unc), assessed_obligations=assessed_obligations,
|
||||||
|
justification_present=justification, completeness_summary=summary, audit_statement=audit,
|
||||||
|
)
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
"""Schemas for the Regulatory Completeness Engine — auditable knowledge-coverage, not confidence.
|
||||||
|
|
||||||
|
For an assessment it answers „wie sicher sind wir, dass diese Antwort VOLLSTÄNDIG ist?" by separating
|
||||||
|
IDENTIFIED regulations from ASSESSED ones (those in the validated corpus) and listing every open or
|
||||||
|
excluded domain WITH a reason. The metric is counts, never a single „87%". This is an internal quality
|
||||||
|
machine: the product never claims full coverage — it makes its own knowledge state transparent.
|
||||||
|
Deterministic, computed-not-stored, no new meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CorpusStatus(str, Enum):
|
||||||
|
"""The maturity of our knowledge corpus for a regulation/domain."""
|
||||||
|
|
||||||
|
VALIDATED = "validated" # we can fully assess this
|
||||||
|
DRAFT = "draft" # partial / under review
|
||||||
|
UNSUPPORTED = "unsupported" # triggered but no corpus yet
|
||||||
|
UNKNOWN = "unknown" # not in our registry at all
|
||||||
|
|
||||||
|
|
||||||
|
class DomainCoverage(BaseModel):
|
||||||
|
regulation: str
|
||||||
|
status: CorpusStatus = CorpusStatus.UNKNOWN
|
||||||
|
note: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class Exclusion(BaseModel):
|
||||||
|
"""A domain/regulation DELIBERATELY not assessed — always with a reason (the heart of the engine)."""
|
||||||
|
|
||||||
|
subject: str
|
||||||
|
reason: str
|
||||||
|
deciding_question: str = "" # what would resolve it (if a query)
|
||||||
|
resolution: str = "future_corpus" # query_required | future_corpus | not_applicable
|
||||||
|
|
||||||
|
|
||||||
|
class Assumption(BaseModel):
|
||||||
|
key: str
|
||||||
|
value: str = ""
|
||||||
|
note: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class CompletenessReport(BaseModel):
|
||||||
|
"""The auditable coverage report for one assessment — counts + justification, NO single percentage."""
|
||||||
|
|
||||||
|
identified_regulations: List[str] = Field(default_factory=list)
|
||||||
|
assessed_regulations: List[str] = Field(default_factory=list) # in the validated corpus
|
||||||
|
open_regulations: List[str] = Field(default_factory=list) # identified but not validated
|
||||||
|
open_corpora: List[str] = Field(default_factory=list) # missing domains worth building
|
||||||
|
coverage: List[DomainCoverage] = Field(default_factory=list)
|
||||||
|
assumptions: List[Assumption] = Field(default_factory=list)
|
||||||
|
exclusions: List[Exclusion] = Field(default_factory=list)
|
||||||
|
uncertainties_count: int = 0
|
||||||
|
assessed_obligations: int = 0 # injected (Execution-owned)
|
||||||
|
justification_present: bool = False
|
||||||
|
completeness_summary: str = "" # "Identifiziert N · bewertet M · offen K · ..."
|
||||||
|
audit_statement: str = "" # the honest narrative sentence
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
"""Interpretation-in-Map — evaluate a customer interpretation within the map.
|
||||||
|
|
||||||
|
Thin adapter over the existing `assess_interpretation`: it judges the customer's
|
||||||
|
reading against the regulations/obligations actually present in the product's
|
||||||
|
RegulatoryMap, and flags touched unsupported domains as future_corpus_needed
|
||||||
|
instead of pseudo-evaluating them. No new legal reasoning, no RCI, no UI.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .adapter import interpret_in_map
|
||||||
|
from .schemas import InterpretationInMapRequest, InterpretationInMapResult
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"interpret_in_map",
|
||||||
|
"InterpretationInMapRequest",
|
||||||
|
"InterpretationInMapResult",
|
||||||
|
]
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
"""Interpretation-in-Map adapter (step 5).
|
||||||
|
|
||||||
|
Evaluates a customer interpretation WITHIN the already-built RegulatoryMap. It
|
||||||
|
reuses the existing `assess_interpretation` (no new legal engine), restricts the
|
||||||
|
affected regulations/obligations to those present in the map, and reports any
|
||||||
|
touched unsupported domain (wastewater/chemicals/...) as future_corpus_needed
|
||||||
|
rather than pseudo-evaluating it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import InterpretationVerdict
|
||||||
|
from compliance.reasoning.interpretation_engine import assess_interpretation
|
||||||
|
from compliance.regulatory_map.schemas import RegulatoryMap
|
||||||
|
|
||||||
|
from .schemas import InterpretationInMapResult
|
||||||
|
|
||||||
|
_LABEL: Dict[InterpretationVerdict, str] = {
|
||||||
|
InterpretationVerdict.PLAUSIBLE: "plausibel",
|
||||||
|
InterpretationVerdict.TOO_NARROW: "zu eng",
|
||||||
|
InterpretationVerdict.TOO_BROAD: "zu weit",
|
||||||
|
InterpretationVerdict.PARTIALLY_CORRECT: "teilweise korrekt",
|
||||||
|
InterpretationVerdict.UNSUPPORTED: "nicht belegt",
|
||||||
|
InterpretationVerdict.UNCERTAIN: "unsicher",
|
||||||
|
}
|
||||||
|
|
||||||
|
# domain -> keywords that signal the interpretation is ABOUT that (uncovered) domain.
|
||||||
|
_ENV_KEYWORDS: Dict[str, List[str]] = {
|
||||||
|
"environment_water": ["abwasser", "wastewater", "gewässer", "gewaesser", "einleitung", "abfluss"],
|
||||||
|
"chemicals": ["chemikalie", "reach", "clp", "reinigungsmittel", "biozid", "gefahrstoff", "detergenz", "lösemittel", "loesemittel"],
|
||||||
|
"environment_air": ["luft", "emission", "voc", "immission", "abluft", "verbrennung"],
|
||||||
|
"waste": ["abfall", "entsorgung", "weee", "recycling"],
|
||||||
|
"energy_resources": ["energie", "ökodesign", "oekodesign", "verbrauch"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _touches(text: str, domain: str) -> bool:
|
||||||
|
low = text.lower()
|
||||||
|
return any(kw in low for kw in _ENV_KEYWORDS.get(domain, []))
|
||||||
|
|
||||||
|
|
||||||
|
def _explain(label: str, detail: str, affected_regs: List[str], future_domains: List[str], in_scope: bool) -> str:
|
||||||
|
base = "Ihre Interpretation ist wahrscheinlich %s." % label
|
||||||
|
if detail:
|
||||||
|
base += " " + detail
|
||||||
|
if affected_regs:
|
||||||
|
base += " Betroffen in Ihrer Map: %s." % ", ".join(affected_regs)
|
||||||
|
if future_domains:
|
||||||
|
base += (
|
||||||
|
" Für %s liegt noch kein Regelkorpus vor — diese Aspekte werden nicht bewertet (future_corpus_needed)."
|
||||||
|
% ", ".join(future_domains)
|
||||||
|
)
|
||||||
|
if not in_scope and not future_domains:
|
||||||
|
base += " Diese Auslegung betrifft kein Regelwerk Ihrer aktuellen Produkt-Map."
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def interpret_in_map(reg_map: RegulatoryMap, interpretation: str) -> InterpretationInMapResult:
|
||||||
|
a = assess_interpretation(interpretation) # existing engine — no new reasoning
|
||||||
|
|
||||||
|
map_reg_ids = (
|
||||||
|
{v.regulation_id for v in reg_map.applicable_regulations}
|
||||||
|
| {v.regulation_id for v in reg_map.uncertain_regulations}
|
||||||
|
| {v.regulation_id for v in reg_map.excluded_regulations}
|
||||||
|
)
|
||||||
|
map_ob_ids = {o.obligation_id for v in reg_map.applicable_regulations for o in v.obligations}
|
||||||
|
uncertain_ids = {v.regulation_id for v in reg_map.uncertain_regulations}
|
||||||
|
|
||||||
|
affected_regs = [r for r in a.affected_regulations if r in map_reg_ids]
|
||||||
|
affected_obs = [o for o in a.affected_obligations if o in map_ob_ids]
|
||||||
|
related_unc = [r for r in a.affected_regulations if r in uncertain_ids]
|
||||||
|
future = [d for d in reg_map.unsupported_domains if _touches(interpretation, d.domain)]
|
||||||
|
in_scope = bool(affected_regs or affected_obs)
|
||||||
|
|
||||||
|
return InterpretationInMapResult(
|
||||||
|
raw_interpretation=interpretation,
|
||||||
|
assessment=a.assessment,
|
||||||
|
in_scope_of_map=in_scope,
|
||||||
|
affected_regulations=affected_regs,
|
||||||
|
affected_obligations=affected_obs,
|
||||||
|
related_uncertainties=related_unc,
|
||||||
|
future_corpus_domains=future,
|
||||||
|
corrected_interpretation=a.corrected_interpretation,
|
||||||
|
risks=a.risks,
|
||||||
|
legal_basis_refs=a.legal_basis_refs,
|
||||||
|
explanation=_explain(_LABEL[a.assessment], a.explanation, affected_regs, [d.domain for d in future], in_scope),
|
||||||
|
confidence=a.confidence,
|
||||||
|
)
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
"""Schemas for Interpretation-in-Map (step 5).
|
||||||
|
|
||||||
|
A thin adapter that evaluates a customer interpretation WITHIN the already-built
|
||||||
|
RegulatoryMap — it does not assess abstract legal questions. Application types
|
||||||
|
only; no compliance-meta-model classes (freeze v1.0 untouched).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.product_scope.schemas import UnsupportedDomain
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.reasoning.enums import Confidence, InterpretationVerdict
|
||||||
|
|
||||||
|
|
||||||
|
class InterpretationInMapRequest(BaseModel):
|
||||||
|
product_profile: CanonicalProductRegulatoryProfile
|
||||||
|
customer_interpretation: str
|
||||||
|
|
||||||
|
|
||||||
|
class InterpretationInMapResult(BaseModel):
|
||||||
|
raw_interpretation: str
|
||||||
|
assessment: InterpretationVerdict
|
||||||
|
in_scope_of_map: bool # True if it touches a regulation/obligation present in the map
|
||||||
|
affected_regulations: List[str] = Field(default_factory=list) # intersected with the map
|
||||||
|
affected_obligations: List[str] = Field(default_factory=list) # intersected (registry-linked)
|
||||||
|
related_uncertainties: List[str] = Field(default_factory=list) # map-uncertain regs it touches
|
||||||
|
future_corpus_domains: List[UnsupportedDomain] = Field(default_factory=list) # NOT evaluated
|
||||||
|
corrected_interpretation: str = ""
|
||||||
|
risks: List[str] = Field(default_factory=list)
|
||||||
|
legal_basis_refs: List[str] = Field(default_factory=list)
|
||||||
|
explanation: str = ""
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
"""Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
|
||||||
|
|
||||||
|
The third independent function of the pipeline (after Company 2A `Evidence -> Capability` and RS-005
|
||||||
|
`Capability -> Delta`): given ONLY the Capability Delta, rank the known journeys that best EXPLAIN it.
|
||||||
|
A Journey is an EXPLANATION of the delta, not its cause — order is `Goal -> Required -> Delta -> Journey`.
|
||||||
|
|
||||||
|
Deliberately dumb + deterministic (pure set overlap; no ML/embeddings/LLM), fully auditable, signatures
|
||||||
|
INJECTED (certificate-agnostic capability clusters). No new corpus, no graph (freeze v1.0). The Matcher
|
||||||
|
is sanctioned as the last architectural building block; everything after is knowledge work.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import match_journeys
|
||||||
|
from .schemas import (
|
||||||
|
JourneyMatch,
|
||||||
|
JourneyMatchReason,
|
||||||
|
JourneyMatchResult,
|
||||||
|
JourneySignature,
|
||||||
|
MatchContext,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"match_journeys",
|
||||||
|
"JourneySignature",
|
||||||
|
"MatchContext",
|
||||||
|
"JourneyMatch",
|
||||||
|
"JourneyMatchReason",
|
||||||
|
"JourneyMatchResult",
|
||||||
|
]
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
"""Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
|
||||||
|
|
||||||
|
Three INDEPENDENT functions now compose the pipeline, each a different problem, all interchangeable:
|
||||||
|
1. Evidence -> Capability (Company 2A)
|
||||||
|
2. Capability -> Delta (RS-005, transition_reasoning)
|
||||||
|
3. Delta -> Journey (THIS module)
|
||||||
|
|
||||||
|
The paradigm shift: a Journey is no longer the CAUSE (Goal -> Journey -> Delta) but the EXPLANATION
|
||||||
|
(Goal -> Required -> Delta -> Journey). The matcher does NOT look at certifications, regulations,
|
||||||
|
tenders, OEM specs or the goal — it looks ONLY at the Capability Delta and asks: which known journeys
|
||||||
|
describe exactly this delta? Output is a ranked, auditable explanation ("Journey A explains 82% of the
|
||||||
|
delta, because 8 of 10 missing capabilities are identical, same target type, ...").
|
||||||
|
|
||||||
|
Deliberately DUMB and deterministic: pure set overlap, NO ML, NO embeddings, NO LLM. A learning ranker
|
||||||
|
can be layered ON TOP later; this core stays auditable. Journey signatures are INJECTED (certificate-
|
||||||
|
agnostic capability clusters), never loaded here — the engine stays hermetic. No new corpus, no
|
||||||
|
graph/meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
|
||||||
|
Honesty: `score` is the share of the DELTA a journey explains (recall over the customer's missing
|
||||||
|
capabilities), never a "fit" or a compliance verdict. `journey_only` documents where a journey reaches
|
||||||
|
BEYOND this delta, so a broad journey that explains everything is not silently preferred.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional, Sequence
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
JourneyMatch,
|
||||||
|
JourneyMatchReason,
|
||||||
|
JourneyMatchResult,
|
||||||
|
JourneySignature,
|
||||||
|
MatchContext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _context_signals(journey: JourneySignature, context: Optional[MatchContext]) -> List[str]:
|
||||||
|
"""Corroborating reasons only — these are documented, they never change the score."""
|
||||||
|
if context is None:
|
||||||
|
return []
|
||||||
|
signals: List[str] = []
|
||||||
|
if context.target_type and journey.target_type and context.target_type == journey.target_type:
|
||||||
|
signals.append("gleiche Zielart")
|
||||||
|
if context.industry and journey.industry and context.industry == journey.industry:
|
||||||
|
signals.append("gleiche Branche")
|
||||||
|
if context.product_type and journey.product_type and context.product_type == journey.product_type:
|
||||||
|
signals.append("gleicher Produkttyp")
|
||||||
|
return signals
|
||||||
|
|
||||||
|
|
||||||
|
def match_journeys(
|
||||||
|
delta: Sequence[str],
|
||||||
|
journeys: Sequence[JourneySignature],
|
||||||
|
context: Optional[MatchContext] = None,
|
||||||
|
) -> JourneyMatchResult:
|
||||||
|
"""Rank known journeys by the share of the Capability Delta they EXPLAIN.
|
||||||
|
|
||||||
|
`delta` = the customer's MISSING capabilities (from RS-005). `journeys` = injected, certificate-
|
||||||
|
agnostic signatures. score = |delta INTERSECT pattern| / |delta|. Ranking is deterministic:
|
||||||
|
score desc, then context-signal count desc (corroboration only), then journey_id asc. Context
|
||||||
|
never changes the score — only the documented reasons. Pure; no I/O; computed-not-stored.
|
||||||
|
"""
|
||||||
|
delta_set = set(delta)
|
||||||
|
n = len(delta_set)
|
||||||
|
matches: List[JourneyMatch] = []
|
||||||
|
for j in journeys:
|
||||||
|
pattern = set(j.capability_pattern)
|
||||||
|
matched = sorted(delta_set & pattern)
|
||||||
|
score = (len(matched) / n) if n else 0.0
|
||||||
|
signals = _context_signals(j, context)
|
||||||
|
reason = JourneyMatchReason(
|
||||||
|
matched_capabilities=matched,
|
||||||
|
unexplained_delta=sorted(delta_set - pattern),
|
||||||
|
journey_only=sorted(pattern - delta_set),
|
||||||
|
context_signals=signals,
|
||||||
|
)
|
||||||
|
matches.append(
|
||||||
|
JourneyMatch(
|
||||||
|
journey_id=j.journey_id,
|
||||||
|
label=j.label,
|
||||||
|
score=round(score, 2),
|
||||||
|
explains="%d von %d fehlenden Capabilities" % (len(matched), n),
|
||||||
|
reason=reason,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
matches.sort(key=lambda m: (-m.score, -len(m.reason.context_signals), m.journey_id))
|
||||||
|
best = matches[0] if matches and matches[0].score > 0.0 else None
|
||||||
|
headline = (
|
||||||
|
"%d Journeys erklaeren das Delta; beste: %s (%d%% des Deltas)"
|
||||||
|
% (sum(1 for m in matches if m.score > 0.0), best.label, round(best.score * 100))
|
||||||
|
if best
|
||||||
|
else "Keine bekannte Journey erklaert dieses Delta (neue Journey-Kandidatin)"
|
||||||
|
)
|
||||||
|
return JourneyMatchResult(delta_size=n, matches=matches, best=best, headline=headline)
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
"""Schemas for the Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
|
||||||
|
|
||||||
|
Derived views (computed-not-stored): nothing here is persisted; every match is recomputed from the
|
||||||
|
input delta + injected journey signatures each call. No new corpus, no graph (freeze v1.0).
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class JourneySignature(BaseModel):
|
||||||
|
"""A known journey described ONLY by its capability pattern (Input cluster -> Output cluster).
|
||||||
|
|
||||||
|
Deliberately certificate-/regulation-agnostic: the match uses `capability_pattern` alone. `label`
|
||||||
|
and the context fields exist for the human-auditable explanation, NEVER for the score. (Today the
|
||||||
|
signatures are derived from the transition patterns; the IDs like "ISO27001->CRA" are just one way
|
||||||
|
to describe the clusters — the matcher never reads them.)
|
||||||
|
"""
|
||||||
|
|
||||||
|
journey_id: str
|
||||||
|
label: str
|
||||||
|
capability_pattern: List[str] = Field(default_factory=list) # OUTPUT cluster: the delta this journey is about
|
||||||
|
assumed_capabilities: List[str] = Field(default_factory=list) # INPUT cluster: typically already present
|
||||||
|
industry: Optional[str] = None
|
||||||
|
product_type: Optional[str] = None
|
||||||
|
target_type: Optional[str] = None # context only: regulation / certification / contract / environmental
|
||||||
|
|
||||||
|
|
||||||
|
class MatchContext(BaseModel):
|
||||||
|
"""Optional corroborating context — surfaced as documented reasons, never part of the score."""
|
||||||
|
|
||||||
|
industry: Optional[str] = None
|
||||||
|
product_type: Optional[str] = None
|
||||||
|
target_type: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class JourneyMatchReason(BaseModel):
|
||||||
|
"""The auditable WHY behind one match — everything a reviewer needs, no opaque score."""
|
||||||
|
|
||||||
|
matched_capabilities: List[str] = Field(default_factory=list) # delta INTERSECT pattern (what it explains)
|
||||||
|
unexplained_delta: List[str] = Field(default_factory=list) # delta - pattern (what it does NOT explain)
|
||||||
|
journey_only: List[str] = Field(default_factory=list) # pattern - delta (journey covers, not needed here)
|
||||||
|
context_signals: List[str] = Field(default_factory=list) # "gleiche Zielart", "gleiche Branche", ...
|
||||||
|
|
||||||
|
|
||||||
|
class JourneyMatch(BaseModel):
|
||||||
|
"""One known journey, ranked by how much of the delta it EXPLAINS (not how well it 'fits')."""
|
||||||
|
|
||||||
|
journey_id: str
|
||||||
|
label: str
|
||||||
|
score: float = 0.0 # |delta INTERSECT pattern| / |delta|, 0..1: share of the delta explained
|
||||||
|
explains: str = "" # "8 von 10 fehlenden Capabilities"
|
||||||
|
reason: JourneyMatchReason
|
||||||
|
|
||||||
|
|
||||||
|
class JourneyMatchResult(BaseModel):
|
||||||
|
"""Ranked known journeys that EXPLAIN a Capability Delta. Journey = explanation, not cause."""
|
||||||
|
|
||||||
|
delta_size: int = 0
|
||||||
|
matches: List[JourneyMatch] = Field(default_factory=list) # ranked desc by score
|
||||||
|
best: Optional[JourneyMatch] = None
|
||||||
|
headline: str = ""
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
"""Knowledge Intake — classify an incoming document and assess its impact on existing knowledge.
|
||||||
|
|
||||||
|
The stage BEFORE the parser: no content extraction, only Einordnung. Intersects a document's signals
|
||||||
|
(regulations + keywords) with an index of the existing knowledge to emit a `KnowledgePackage` — which
|
||||||
|
capabilities / playbooks / patterns / reference scenarios / obligations it probably touches, whether
|
||||||
|
it is a new domain, and how much review it warrants. Deterministic, no LLM, no new corpus (freeze v1.0).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import assess_document_impact, build_knowledge_index
|
||||||
|
from .schemas import (
|
||||||
|
DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"build_knowledge_index",
|
||||||
|
"assess_document_impact",
|
||||||
|
"DocumentDescriptor",
|
||||||
|
"KnowledgeIndex",
|
||||||
|
"KnowledgePackage",
|
||||||
|
"ImpactLevel",
|
||||||
|
]
|
||||||
@@ -0,0 +1,111 @@
|
|||||||
|
"""Knowledge Intake — classify a document and assess its impact on existing knowledge.
|
||||||
|
|
||||||
|
The real Knowledge Production is not writing — it is TARGETED UPDATING: when 20 documents arrive,
|
||||||
|
which 5 actually change our knowledge and which 15 are ignorable? Intake answers this deterministically
|
||||||
|
by intersecting a document's signals (declared regulations + keywords) with an index of the existing
|
||||||
|
knowledge (capabilities, playbooks, transition patterns, reference scenarios, injected obligations).
|
||||||
|
It performs NO content extraction (that is the later parser stage) and uses NO LLM.
|
||||||
|
|
||||||
|
Pipeline: Knowledge Intake -> Knowledge Package -> Parser -> Draft Generator -> Review -> Published.
|
||||||
|
Pure, deterministic, computed-not-stored. No new corpus/meta-model class (freeze v1.0). Python 3.9.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional, Set
|
||||||
|
|
||||||
|
from .schemas import DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage
|
||||||
|
|
||||||
|
|
||||||
|
def _targets(goal_to: Any) -> List[str]:
|
||||||
|
"""Extract target regulations from a transition_goal.to (single dict OR list of targets)."""
|
||||||
|
out: List[str] = []
|
||||||
|
items = goal_to if isinstance(goal_to, list) else [goal_to]
|
||||||
|
for it in items:
|
||||||
|
if isinstance(it, dict):
|
||||||
|
reg = it.get("regulation") or it.get("target") or it.get("framework")
|
||||||
|
if reg:
|
||||||
|
out.append(str(reg))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def build_knowledge_index(
|
||||||
|
patterns: List[Dict[str, Any]],
|
||||||
|
playbooks: List[Dict[str, Any]],
|
||||||
|
reference_scenarios: List[Dict[str, Any]],
|
||||||
|
obligation_index: Optional[Dict[str, List[str]]] = None,
|
||||||
|
) -> KnowledgeIndex:
|
||||||
|
"""Assemble the matching index from already-loaded knowledge dicts (file I/O stays in the caller)."""
|
||||||
|
tp: Dict[str, List[str]] = {}
|
||||||
|
cap_regs: Dict[str, List[str]] = {}
|
||||||
|
for p in patterns:
|
||||||
|
pid = str(p.get("id", ""))
|
||||||
|
targets = _targets(p.get("transition_goal", {}).get("to"))
|
||||||
|
if pid:
|
||||||
|
tp[pid] = targets
|
||||||
|
for item in list(p.get("likely_covered", [])) + list(p.get("delta_requirements", [])):
|
||||||
|
cap = item.get("capability")
|
||||||
|
if not cap:
|
||||||
|
continue
|
||||||
|
regs = [str(t) for t in item.get("covers_targets", [])] or targets
|
||||||
|
cap_regs.setdefault(str(cap), [])
|
||||||
|
cap_regs[str(cap)] = sorted(set(cap_regs[str(cap)]) | set(regs))
|
||||||
|
rts = {str(r.get("id", "")): _targets(r.get("transition_goal", {}).get("to")) for r in reference_scenarios}
|
||||||
|
rts.pop("", None)
|
||||||
|
obl = obligation_index or {}
|
||||||
|
regulations = sorted(
|
||||||
|
{t for ts in tp.values() for t in ts}
|
||||||
|
| {t for ts in rts.values() for t in ts}
|
||||||
|
| {t for ts in cap_regs.values() for t in ts}
|
||||||
|
| set(obl.keys())
|
||||||
|
)
|
||||||
|
return KnowledgeIndex(
|
||||||
|
regulations=regulations, capability_regulations=cap_regs,
|
||||||
|
playbook_capabilities=sorted({str(pb.get("capability_id", "")) for pb in playbooks} - {""}),
|
||||||
|
transition_patterns=tp, reference_scenarios=rts, obligation_index=dict(obl),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _kw_match(keywords: Set[str], capability: str) -> bool:
|
||||||
|
tokens = set(capability.lower().split("_"))
|
||||||
|
return bool(keywords & tokens) or capability.lower() in keywords
|
||||||
|
|
||||||
|
|
||||||
|
def assess_document_impact(descriptor: DocumentDescriptor, index: KnowledgeIndex) -> KnowledgePackage:
|
||||||
|
"""Classify the document and compute which existing knowledge it probably touches, and how much."""
|
||||||
|
doc_regs = set(descriptor.regulations)
|
||||||
|
known = set(index.regulations)
|
||||||
|
unknown = sorted(doc_regs - known)
|
||||||
|
new_domain = bool(doc_regs) and not (doc_regs & known)
|
||||||
|
kw = {k.lower() for k in descriptor.keywords}
|
||||||
|
|
||||||
|
caps = sorted(c for c, regs in index.capability_regulations.items() if (set(regs) & doc_regs) or _kw_match(kw, c))
|
||||||
|
playbooks = sorted(set(caps) & set(index.playbook_capabilities))
|
||||||
|
patterns = sorted(pid for pid, regs in index.transition_patterns.items() if set(regs) & doc_regs)
|
||||||
|
scenarios = sorted(rid for rid, regs in index.reference_scenarios.items() if set(regs) & doc_regs)
|
||||||
|
obligations = sorted({o for r in doc_regs for o in index.obligation_index.get(r, [])})
|
||||||
|
|
||||||
|
total = len(caps) + len(playbooks) + len(patterns) + len(scenarios) + len(obligations)
|
||||||
|
if new_domain:
|
||||||
|
level, rec = ImpactLevel.NEW_DOMAIN, "Neue Domäne — Corpus-Intake nötig (kein bestehendes Wissen betroffen)."
|
||||||
|
elif total == 0:
|
||||||
|
level, rec = ImpactLevel.NONE, "Wahrscheinlich ignorierbar — betrifft keinen bekannten Wissensbaustein."
|
||||||
|
elif len(caps) >= 3 or playbooks or len(obligations) >= 5:
|
||||||
|
level, rec = ImpactLevel.HIGH, "Gezielter Review priorisieren — hoher Impact auf bestehendes Wissen."
|
||||||
|
else:
|
||||||
|
level, rec = ImpactLevel.LOW, "Gezielter Review — geringer, eingegrenzter Impact."
|
||||||
|
|
||||||
|
summary = "Betrifft %d Capabilities, %d Playbooks, %d Patterns, %d Reference Scenarios, %d Obligations; %s." % (
|
||||||
|
len(caps), len(playbooks), len(patterns), len(scenarios), len(obligations),
|
||||||
|
"NEUE Domäne" if new_domain else "keine neue Domäne",
|
||||||
|
)
|
||||||
|
return KnowledgePackage(
|
||||||
|
document_id=descriptor.document_id,
|
||||||
|
classification={"regulations": sorted(doc_regs), "keywords": sorted(kw),
|
||||||
|
"document_type": [descriptor.document_type] if descriptor.document_type else []},
|
||||||
|
new_domain=new_domain, unknown_regulations=unknown,
|
||||||
|
affected_capabilities=caps, affected_playbooks=playbooks,
|
||||||
|
affected_transition_patterns=patterns, affected_reference_scenarios=scenarios,
|
||||||
|
affected_obligations=obligations, impact_level=level,
|
||||||
|
impact_summary=summary, recommendation=rec,
|
||||||
|
)
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
"""Schemas for Knowledge Intake — classify a new document and assess its IMPACT (no extraction yet).
|
||||||
|
|
||||||
|
Before the parser/draft stages, Intake answers „welche Teile unseres Wissensbestands sind überhaupt
|
||||||
|
betroffen?". It does NOT extract content — it only classifies the document and intersects its signals
|
||||||
|
with an index of the existing knowledge (capabilities, playbooks, transition patterns, reference
|
||||||
|
scenarios, injected obligations) to emit a `KnowledgePackage` (an impact analysis). Deterministic,
|
||||||
|
computed-not-stored, no new corpus, no new meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class ImpactLevel(str, Enum):
|
||||||
|
NONE = "none" # touches nothing known -> likely ignorable
|
||||||
|
LOW = "low" # touches a little -> targeted review
|
||||||
|
HIGH = "high" # touches a lot -> prioritise review
|
||||||
|
NEW_DOMAIN = "new_domain" # references only unknown regulations -> domain intake
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentDescriptor(BaseModel):
|
||||||
|
"""Lightweight signals of an incoming document — NO content body, only classification inputs."""
|
||||||
|
|
||||||
|
document_id: str
|
||||||
|
title: str = ""
|
||||||
|
source: str = "" # e.g. BSI, ENISA, EU
|
||||||
|
document_type: str = "" # e.g. guidance, faq, regulation, recommendation
|
||||||
|
regulations: List[str] = Field(default_factory=list) # declared regulations it references
|
||||||
|
keywords: List[str] = Field(default_factory=list) # lightweight topic signals (e.g. sbom)
|
||||||
|
product_types: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgeIndex(BaseModel):
|
||||||
|
"""A deterministic index of the EXISTING knowledge to match an incoming document against."""
|
||||||
|
|
||||||
|
regulations: List[str] = Field(default_factory=list) # all regulations the corpus knows
|
||||||
|
capability_regulations: Dict[str, List[str]] = Field(default_factory=dict) # capability -> covers_targets
|
||||||
|
playbook_capabilities: List[str] = Field(default_factory=list) # capabilities that HAVE a playbook
|
||||||
|
transition_patterns: Dict[str, List[str]] = Field(default_factory=dict) # pattern_id -> target regulations
|
||||||
|
reference_scenarios: Dict[str, List[str]] = Field(default_factory=dict) # rts_id -> regulations
|
||||||
|
obligation_index: Dict[str, List[str]] = Field(default_factory=dict) # regulation -> obligation ids (INJECTED)
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgePackage(BaseModel):
|
||||||
|
"""The impact analysis for one document — what of our knowledge it probably touches, and how much."""
|
||||||
|
|
||||||
|
document_id: str
|
||||||
|
classification: Dict[str, List[str]] = Field(default_factory=dict) # echoed regulations/keywords/types
|
||||||
|
new_domain: bool = False
|
||||||
|
unknown_regulations: List[str] = Field(default_factory=list)
|
||||||
|
affected_capabilities: List[str] = Field(default_factory=list)
|
||||||
|
affected_playbooks: List[str] = Field(default_factory=list)
|
||||||
|
affected_transition_patterns: List[str] = Field(default_factory=list)
|
||||||
|
affected_reference_scenarios: List[str] = Field(default_factory=list)
|
||||||
|
affected_obligations: List[str] = Field(default_factory=list)
|
||||||
|
impact_level: ImpactLevel = ImpactLevel.NONE
|
||||||
|
impact_summary: str = ""
|
||||||
|
recommendation: str = ""
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
"""Knowledge Production — deterministically prepare the corpus, then curate it.
|
||||||
|
|
||||||
|
The corpus is not written by hand: the Playbook Draft Generator structures drafts from data the
|
||||||
|
software already owns (Transition Pattern + leverage + injected Execution controls), leaving the
|
||||||
|
practitioner know-how as TODO for expert review. Mirrors the legal pipeline (Parser -> Review).
|
||||||
|
Deterministic, no LLM in core, no new corpus, no new meta-model class (freeze v1.0).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import drafts_from_pattern, generate_playbook_draft
|
||||||
|
from .schemas import DraftStatus, PlaybookDraft
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"generate_playbook_draft",
|
||||||
|
"drafts_from_pattern",
|
||||||
|
"PlaybookDraft",
|
||||||
|
"DraftStatus",
|
||||||
|
]
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
"""Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review).
|
||||||
|
|
||||||
|
Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge:
|
||||||
|
new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review ->
|
||||||
|
versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the
|
||||||
|
software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets,
|
||||||
|
expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps /
|
||||||
|
how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step).
|
||||||
|
|
||||||
|
Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline,
|
||||||
|
advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from .schemas import DraftStatus, PlaybookDraft
|
||||||
|
|
||||||
|
_SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"] # practitioner know-how — expert/offline-propose
|
||||||
|
_DISCLAIMER = (
|
||||||
|
"Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + "
|
||||||
|
"injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) "
|
||||||
|
"und Statuswechsel draft_generated -> reviewed -> validated."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_playbook_draft(
|
||||||
|
capability_id: str,
|
||||||
|
requirement: Optional[Dict[str, Any]] = None,
|
||||||
|
control_links: Optional[List[str]] = None,
|
||||||
|
) -> PlaybookDraft:
|
||||||
|
"""Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic).
|
||||||
|
|
||||||
|
`requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned
|
||||||
|
fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected
|
||||||
|
Execution controls (default empty — no Execution data in the draft generator).
|
||||||
|
"""
|
||||||
|
req = requirement or {}
|
||||||
|
why = str(req.get("why_asked") or req.get("missing_because") or "")
|
||||||
|
closes = sorted({str(t) for t in req.get("covers_targets", [])})
|
||||||
|
evidence = [str(e) for e in req.get("expected_evidence", [])]
|
||||||
|
controls = list(control_links or [])
|
||||||
|
|
||||||
|
provenance: Dict[str, str] = {}
|
||||||
|
todo: List[str] = []
|
||||||
|
if why:
|
||||||
|
provenance["why"] = "transition_pattern:why_asked"
|
||||||
|
else:
|
||||||
|
todo.append("why")
|
||||||
|
if closes:
|
||||||
|
provenance["closes_regulations"] = "leverage:covers_targets"
|
||||||
|
if evidence:
|
||||||
|
provenance["expected_evidence"] = "transition_pattern:expected_evidence"
|
||||||
|
else:
|
||||||
|
todo.append("expected_evidence")
|
||||||
|
if controls:
|
||||||
|
provenance["typical_controls"] = "execution:control_links"
|
||||||
|
todo.extend(_SOFT_FIELDS) # always expert-owned
|
||||||
|
|
||||||
|
return PlaybookDraft(
|
||||||
|
capability_id=capability_id,
|
||||||
|
status=DraftStatus.DRAFT_GENERATED,
|
||||||
|
title=capability_id.replace("_", " "),
|
||||||
|
why=why,
|
||||||
|
closes_regulations=closes,
|
||||||
|
expected_evidence=evidence,
|
||||||
|
typical_controls=controls,
|
||||||
|
provenance=provenance,
|
||||||
|
todo=todo,
|
||||||
|
disclaimer=_DISCLAIMER,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def drafts_from_pattern(
|
||||||
|
pattern: Dict[str, Any],
|
||||||
|
control_links_by_cap: Optional[Dict[str, List[str]]] = None,
|
||||||
|
) -> List[PlaybookDraft]:
|
||||||
|
"""Assemble one playbook draft per delta capability of a transition/convergence pattern.
|
||||||
|
|
||||||
|
This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing
|
||||||
|
capability, ready for expert review. Deterministic + order-preserving (pattern order).
|
||||||
|
"""
|
||||||
|
links = control_links_by_cap or {}
|
||||||
|
drafts: List[PlaybookDraft] = []
|
||||||
|
for d in pattern.get("delta_requirements", []):
|
||||||
|
cap = d.get("capability")
|
||||||
|
if not cap:
|
||||||
|
continue
|
||||||
|
drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap))))
|
||||||
|
return drafts
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
"""Schemas for Knowledge Production — deterministic draft assembly + lifecycle.
|
||||||
|
|
||||||
|
The corpus is no longer written by hand: it is deterministically PREPARED from data the software
|
||||||
|
already owns (Capability, Transition Pattern, Controls, Evidence, leverage), then curated by an
|
||||||
|
expert. A `PlaybookDraft` is a machine-assembled skeleton with per-field provenance and an explicit
|
||||||
|
TODO list of what still needs human (or offline-propose) input. No LLM in the deterministic core.
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class DraftStatus(str, Enum):
|
||||||
|
"""Freigabestatus — the knowledge lifecycle from machine draft to proven (mirrors the
|
||||||
|
transition-pattern / playbook maturity, with a machine-assembled pre-stage)."""
|
||||||
|
|
||||||
|
DRAFT_GENERATED = "draft_generated" # machine-assembled, NOT yet expert-touched
|
||||||
|
IN_REVIEW = "in_review" # an expert is curating it
|
||||||
|
REVIEWED = "reviewed" # internally reviewed
|
||||||
|
VALIDATED = "validated" # domain expert confirmed
|
||||||
|
PROVEN = "proven" # confirmed in the field
|
||||||
|
|
||||||
|
|
||||||
|
class PlaybookDraft(BaseModel):
|
||||||
|
"""A deterministically assembled playbook draft for one capability.
|
||||||
|
|
||||||
|
Owned fields (why / closes_regulations / expected_evidence / typical_controls) are filled from
|
||||||
|
existing data with provenance; the practitioner know-how (tools / process_steps / how_others)
|
||||||
|
is left as TODO. The expert reviews a draft instead of writing from a blank page.
|
||||||
|
"""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
status: DraftStatus = DraftStatus.DRAFT_GENERATED
|
||||||
|
title: str = ""
|
||||||
|
why: str = "" # from the transition pattern (why_asked/missing_because)
|
||||||
|
closes_regulations: List[str] = Field(default_factory=list) # from leverage (covers_targets)
|
||||||
|
expected_evidence: List[str] = Field(default_factory=list) # from the transition pattern
|
||||||
|
typical_controls: List[str] = Field(default_factory=list) # injected from Execution (may be empty)
|
||||||
|
provenance: Dict[str, str] = Field(default_factory=dict) # field -> source it was assembled from
|
||||||
|
todo: List[str] = Field(default_factory=list) # fields the expert/offline-propose must still add
|
||||||
|
disclaimer: str = "" # machine draft, requires expert curation
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
"""Product Regulatory Navigator — thin missing-facts layer.
|
||||||
|
|
||||||
|
Sits above the CanonicalProductRegulatoryProfile (prefilled from company-profile /
|
||||||
|
ProductWizard) and reports only which facts are still missing + prioritized
|
||||||
|
questions to collect them. It decides which facts are needed, NOT what regulation
|
||||||
|
applies — that stays with the Scope Engine (step 3). No regulation logic, no UI,
|
||||||
|
no Go, no RAG.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import CompletenessSummary, NavigatorResult, apply_answers, navigate
|
||||||
|
from .questions import (
|
||||||
|
QUESTION_CATALOG,
|
||||||
|
AnswerType,
|
||||||
|
NavigatorQuestion,
|
||||||
|
QuestionPriority,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"navigate",
|
||||||
|
"apply_answers",
|
||||||
|
"NavigatorResult",
|
||||||
|
"CompletenessSummary",
|
||||||
|
"NavigatorQuestion",
|
||||||
|
"AnswerType",
|
||||||
|
"QuestionPriority",
|
||||||
|
"QUESTION_CATALOG",
|
||||||
|
]
|
||||||
@@ -0,0 +1,116 @@
|
|||||||
|
"""Product Regulatory Navigator engine — missing-facts only.
|
||||||
|
|
||||||
|
`navigate(profile)` reports which canonical fields are still unknown and the
|
||||||
|
prioritized questions to fill them. `apply_answers(profile, answers)` returns the
|
||||||
|
updated profile. It NEVER decides what applies — that is the Scope Engine (step 3).
|
||||||
|
Pure field-presence checking; no scope-engine import, no regulation evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Type
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.profile.canonical import (
|
||||||
|
CanonicalLifecyclePhase,
|
||||||
|
CanonicalProductRegulatoryProfile,
|
||||||
|
EconomicOperatorRole,
|
||||||
|
ProductComponent,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .questions import QUESTION_CATALOG, NavigatorQuestion, QuestionPriority
|
||||||
|
|
||||||
|
_ENUM_FIELDS: Dict[str, Type[Any]] = {
|
||||||
|
"economic_operator_role": EconomicOperatorRole,
|
||||||
|
"lifecycle_phase": CanonicalLifecyclePhase,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CompletenessSummary(BaseModel):
|
||||||
|
total_relevant: int
|
||||||
|
answered: int
|
||||||
|
missing: int
|
||||||
|
missing_by_priority: Dict[str, int] = Field(default_factory=dict)
|
||||||
|
ready_for_scope: bool # True once no P0 fact is missing
|
||||||
|
note: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class NavigatorResult(BaseModel):
|
||||||
|
missing_facts: List[str] = Field(default_factory=list) # canonical target fields
|
||||||
|
suggested_questions: List[NavigatorQuestion] = Field(default_factory=list)
|
||||||
|
completeness_summary: CompletenessSummary
|
||||||
|
|
||||||
|
|
||||||
|
def _value(profile: CanonicalProductRegulatoryProfile, dotted: str) -> Any:
|
||||||
|
if "." in dotted:
|
||||||
|
head, tail = dotted.split(".", 1)
|
||||||
|
return getattr(getattr(profile, head), tail, None)
|
||||||
|
return getattr(profile, dotted, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_unknown(profile: CanonicalProductRegulatoryProfile, q: NavigatorQuestion) -> bool:
|
||||||
|
value = _value(profile, q.target_field)
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
if isinstance(value, list) and not value:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def navigate(profile: CanonicalProductRegulatoryProfile) -> NavigatorResult:
|
||||||
|
missing = [q for q in QUESTION_CATALOG if _is_unknown(profile, q)]
|
||||||
|
missing.sort(key=lambda q: q.order())
|
||||||
|
|
||||||
|
by_priority: Dict[str, int] = {}
|
||||||
|
for q in missing:
|
||||||
|
by_priority[q.priority.value] = by_priority.get(q.priority.value, 0) + 1
|
||||||
|
ready = QuestionPriority.P0.value not in by_priority
|
||||||
|
|
||||||
|
total = len(QUESTION_CATALOG)
|
||||||
|
summary = CompletenessSummary(
|
||||||
|
total_relevant=total,
|
||||||
|
answered=total - len(missing),
|
||||||
|
missing=len(missing),
|
||||||
|
missing_by_priority=by_priority,
|
||||||
|
ready_for_scope=ready,
|
||||||
|
note=(
|
||||||
|
"%d von %d Fakten vorhanden; %d offen. Scope-Engine startklar: %s."
|
||||||
|
% (total - len(missing), total, len(missing), "ja" if ready else "nein (P0 fehlt)")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return NavigatorResult(
|
||||||
|
missing_facts=[q.target_field for q in missing],
|
||||||
|
suggested_questions=missing,
|
||||||
|
completeness_summary=summary,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce(q: NavigatorQuestion, value: Any) -> Any:
|
||||||
|
if q.target_field in _ENUM_FIELDS:
|
||||||
|
return _ENUM_FIELDS[q.target_field](value)
|
||||||
|
if q.target_field == "components":
|
||||||
|
return [c if isinstance(c, ProductComponent) else ProductComponent(**c) for c in (value or [])]
|
||||||
|
if q.answer_type.value in {"country_list", "multiselect"}:
|
||||||
|
return list(value or [])
|
||||||
|
if q.answer_type.value == "bool":
|
||||||
|
return bool(value)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def apply_answers(
|
||||||
|
profile: CanonicalProductRegulatoryProfile, answers: Dict[str, Any]
|
||||||
|
) -> CanonicalProductRegulatoryProfile:
|
||||||
|
updated = profile.model_copy(deep=True)
|
||||||
|
by_id = {q.question_id: q for q in QUESTION_CATALOG}
|
||||||
|
for question_id, raw in answers.items():
|
||||||
|
q = by_id.get(question_id)
|
||||||
|
if q is None or raw is None:
|
||||||
|
continue
|
||||||
|
value = _coerce(q, raw)
|
||||||
|
if "." in q.target_field:
|
||||||
|
head, tail = q.target_field.split(".", 1)
|
||||||
|
setattr(getattr(updated, head), tail, value)
|
||||||
|
else:
|
||||||
|
setattr(updated, q.target_field, value)
|
||||||
|
return updated
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
"""Product Regulatory Navigator — question catalog.
|
||||||
|
|
||||||
|
The Navigator is a THIN missing-facts layer over CanonicalProductRegulatoryProfile.
|
||||||
|
It does NOT decide what applies — `regulatory_domains_unblocked` is static metadata
|
||||||
|
(which domains a fact would help the Scope Engine decide later), never an
|
||||||
|
evaluation. No regulation logic, no UI, no Go, no RAG.
|
||||||
|
|
||||||
|
`NavigatorQuestion` is an interaction type, NOT a compliance-meta-model class
|
||||||
|
(architecture freeze v1.0 untouched).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.profile.canonical import CanonicalLifecyclePhase, EconomicOperatorRole
|
||||||
|
|
||||||
|
|
||||||
|
class AnswerType(str, Enum):
|
||||||
|
BOOL = "bool"
|
||||||
|
ENUM = "enum"
|
||||||
|
MULTISELECT = "multiselect"
|
||||||
|
TEXT = "text"
|
||||||
|
COUNTRY_LIST = "country_list"
|
||||||
|
COMPONENT_LIST = "component_list"
|
||||||
|
|
||||||
|
|
||||||
|
class QuestionPriority(str, Enum):
|
||||||
|
P0 = "P0" # blocks scope: EU-vs-not, role, lifecycle, machine/component
|
||||||
|
P1 = "P1" # unblocks a specific domain: RED, Data Act, environment, security
|
||||||
|
P2 = "P2" # refinement: structured BOM
|
||||||
|
|
||||||
|
|
||||||
|
_PRIORITY_ORDER = {QuestionPriority.P0: 0, QuestionPriority.P1: 1, QuestionPriority.P2: 2}
|
||||||
|
|
||||||
|
|
||||||
|
class NavigatorQuestion(BaseModel):
|
||||||
|
question_id: str
|
||||||
|
target_field: str # dotted path into the canonical profile
|
||||||
|
label: str
|
||||||
|
why_needed: str
|
||||||
|
regulatory_domains_unblocked: List[str] = Field(default_factory=list)
|
||||||
|
answer_type: AnswerType
|
||||||
|
options: List[str] = Field(default_factory=list)
|
||||||
|
priority: QuestionPriority
|
||||||
|
|
||||||
|
def order(self) -> int:
|
||||||
|
return _PRIORITY_ORDER[self.priority]
|
||||||
|
|
||||||
|
|
||||||
|
_ROLE_OPTIONS = [e.value for e in EconomicOperatorRole]
|
||||||
|
_PHASE_OPTIONS = [e.value for e in CanonicalLifecyclePhase]
|
||||||
|
|
||||||
|
QUESTION_CATALOG: List[NavigatorQuestion] = [
|
||||||
|
# ── P0: block the scope decision itself ───────────────────────────
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="markets",
|
||||||
|
target_field="markets",
|
||||||
|
label="In welche Märkte / Länder liefern Sie das Produkt?",
|
||||||
|
why_needed="Bestimmt EU- vs. Nicht-EU-Anwendbarkeit und nationale Pflichten.",
|
||||||
|
regulatory_domains_unblocked=["cyber", "machine_safety", "data", "radio", "emv", "environment"],
|
||||||
|
answer_type=AnswerType.COUNTRY_LIST,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="economic_operator_role",
|
||||||
|
target_field="economic_operator_role",
|
||||||
|
label="Welche Rolle nehmen Sie ein?",
|
||||||
|
why_needed="Pflichten hängen von der Rolle ab (Hersteller/Importeur/Händler/Betreiber/Service).",
|
||||||
|
regulatory_domains_unblocked=["cyber", "machine_safety", "data"],
|
||||||
|
answer_type=AnswerType.ENUM,
|
||||||
|
options=_ROLE_OPTIONS,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="lifecycle_phase",
|
||||||
|
target_field="lifecycle_phase",
|
||||||
|
label="In welcher Lebenszyklusphase betrachten Sie das Produkt?",
|
||||||
|
why_needed="Manche Pflichten greifen nur beim Inverkehrbringen oder in der Wartung.",
|
||||||
|
regulatory_domains_unblocked=["cyber", "machine_safety"],
|
||||||
|
answer_type=AnswerType.ENUM,
|
||||||
|
options=_PHASE_OPTIONS,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="is_machine",
|
||||||
|
target_field="is_machine",
|
||||||
|
label="Ist das Produkt eine (vollständige) Maschine?",
|
||||||
|
why_needed="Entscheidet die Anwendbarkeit der Maschinenverordnung.",
|
||||||
|
regulatory_domains_unblocked=["machine_safety"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="is_component",
|
||||||
|
target_field="is_component",
|
||||||
|
label="Ist das Produkt ein Bauteil / eine unvollständige Maschine?",
|
||||||
|
why_needed="Sicherheitsbauteil vs. vollständige Maschine ändert die Pflichten.",
|
||||||
|
regulatory_domains_unblocked=["machine_safety"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
# ── P1: unblock one specific domain ───────────────────────────────
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="has_radio_module",
|
||||||
|
target_field="has_radio_module",
|
||||||
|
label="Enthält das Produkt ein Funkmodul (WLAN/Bluetooth/Mobilfunk)?",
|
||||||
|
why_needed="Ein Funkmodul löst die Funkanlagen-Richtlinie (RED) aus.",
|
||||||
|
regulatory_domains_unblocked=["radio"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="generates_usage_data",
|
||||||
|
target_field="generates_usage_data",
|
||||||
|
label="Erzeugt das vernetzte Produkt nutzbare Produkt-/Nutzungsdaten?",
|
||||||
|
why_needed="Erzeugte Nutzungsdaten entscheiden über Data-Act-Pflichten.",
|
||||||
|
regulatory_domains_unblocked=["data"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="has_security_function",
|
||||||
|
target_field="has_security_function",
|
||||||
|
label="Hat das Produkt eine dedizierte Security-Funktion (gegen böswillige Akteure)?",
|
||||||
|
why_needed="Trennt Security- von Safety-Funktion (CRA vs. MaschinenVO).",
|
||||||
|
regulatory_domains_unblocked=["cyber", "machine_safety"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="env_wastewater",
|
||||||
|
target_field="environmental.discharges_to_wastewater",
|
||||||
|
label="Gibt das Produkt Stoffe an Wasser / Abwasser ab?",
|
||||||
|
why_needed="Abwassereinleitung löst Abwasser-/Gewässerrecht aus.",
|
||||||
|
regulatory_domains_unblocked=["environment_water"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="env_air",
|
||||||
|
target_field="environmental.emits_to_air",
|
||||||
|
label="Entstehen Luftemissionen (VOC, Staub, Verbrennung, Aerosole)?",
|
||||||
|
why_needed="Luftemissionen lösen Immissionsschutzrecht aus.",
|
||||||
|
regulatory_domains_unblocked=["environment_air"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="env_chemicals",
|
||||||
|
target_field="environmental.uses_cleaning_chemicals",
|
||||||
|
label="Werden Reinigungs-, Desinfektions- oder Biozidmittel verwendet/mitgeliefert?",
|
||||||
|
why_needed="Chemikalien lösen REACH/CLP/Detergenzien-/Biozidrecht aus.",
|
||||||
|
regulatory_domains_unblocked=["chemicals"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
# ── P2: refinement ────────────────────────────────────────────────
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="components",
|
||||||
|
target_field="components",
|
||||||
|
label="Aus welchen wesentlichen Komponenten besteht das Produkt?",
|
||||||
|
why_needed="Eine strukturierte Stückliste verfeinert komponenten-abgeleitete Pflichten.",
|
||||||
|
regulatory_domains_unblocked=["radio", "emv", "environment_water", "chemicals"],
|
||||||
|
answer_type=AnswerType.COMPONENT_LIST,
|
||||||
|
priority=QuestionPriority.P2,
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
"""Smart Onboarding Advisor — the onboarding runtime step (orchestration over existing engines).
|
||||||
|
|
||||||
|
Turns (company + products + certifications + target) into inferred assumptions, the next best questions
|
||||||
|
(<=5, each self-explaining), the capability delta, top measures, evidence requests and completeness —
|
||||||
|
with NO sales interpretation and NO regulation picking. Orchestrator only: no new engine/registry/
|
||||||
|
meta-model; certificate->capability hypotheses and target requirements are INJECTED.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import advisor_start, apply_answer
|
||||||
|
from .hypotheses import (
|
||||||
|
CapabilityHypothesis,
|
||||||
|
inferred_hypotheses,
|
||||||
|
resolve_for_certifications,
|
||||||
|
)
|
||||||
|
from .observations import (
|
||||||
|
Observation,
|
||||||
|
ObservationType,
|
||||||
|
empirical_confidence,
|
||||||
|
empirical_distribution,
|
||||||
|
reviewed,
|
||||||
|
)
|
||||||
|
from .signals import (
|
||||||
|
ProducedSignal,
|
||||||
|
SignalVocabularyEntry,
|
||||||
|
normalize_signals,
|
||||||
|
)
|
||||||
|
from .silent_intake import (
|
||||||
|
DetectedCapability,
|
||||||
|
IntakeSignal,
|
||||||
|
ProductFact,
|
||||||
|
SignalMapping,
|
||||||
|
SilentIntakeResult,
|
||||||
|
silent_intake,
|
||||||
|
)
|
||||||
|
from .schemas import (
|
||||||
|
AdvisorMeasure,
|
||||||
|
AdvisorQuestion,
|
||||||
|
AdvisorResult,
|
||||||
|
InferredAssumption,
|
||||||
|
OnboardingInput,
|
||||||
|
RejectedAssumption,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"advisor_start",
|
||||||
|
"apply_answer",
|
||||||
|
"OnboardingInput",
|
||||||
|
"AdvisorResult",
|
||||||
|
"AdvisorQuestion",
|
||||||
|
"AdvisorMeasure",
|
||||||
|
"InferredAssumption",
|
||||||
|
"RejectedAssumption",
|
||||||
|
"CapabilityHypothesis",
|
||||||
|
"inferred_hypotheses",
|
||||||
|
"resolve_for_certifications",
|
||||||
|
"Observation",
|
||||||
|
"ObservationType",
|
||||||
|
"empirical_distribution",
|
||||||
|
"empirical_confidence",
|
||||||
|
"reviewed",
|
||||||
|
"silent_intake",
|
||||||
|
"IntakeSignal",
|
||||||
|
"SignalMapping",
|
||||||
|
"DetectedCapability",
|
||||||
|
"ProductFact",
|
||||||
|
"SilentIntakeResult",
|
||||||
|
"ProducedSignal",
|
||||||
|
"SignalVocabularyEntry",
|
||||||
|
"normalize_signals",
|
||||||
|
]
|
||||||
@@ -0,0 +1,159 @@
|
|||||||
|
"""Smart Onboarding Advisor — orchestration over the existing engines (the onboarding runtime step).
|
||||||
|
|
||||||
|
The point of the whole platform, made usable: the user types company + products + certifications +
|
||||||
|
target, and the system does the rest — no sales interpretation, no regulation picking. This is an
|
||||||
|
ORCHESTRATOR, not a new engine: it wires Company 2A (Evidence -> Capability), RS-005 (Capability ->
|
||||||
|
Delta), optimization (Delta -> Roadmap) and completeness into one onboarding flow.
|
||||||
|
|
||||||
|
Three principles it must honour (acceptance criteria):
|
||||||
|
- Multi-cert works; a profile is built from ALL certificates.
|
||||||
|
- relevance(evidence, target): ISO 14001 is NOT falsely relevant to the CRA; ISO 27001/TISAX REDUCE
|
||||||
|
questions but satisfy NOTHING automatically (Welt-1 -> verification_required).
|
||||||
|
- Only the NEXT BEST questions (<= 5), each explaining WHY; every answer updates the profile.
|
||||||
|
|
||||||
|
Certificate -> probable-capability hypotheses and the target's required capabilities are INJECTED (the
|
||||||
|
hypotheses are curated knowledge, not in this code). No corpus loaded here. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from ..company import (
|
||||||
|
CapabilityMappingEntry,
|
||||||
|
Certification,
|
||||||
|
CompanyCapabilityProfile,
|
||||||
|
CompanyContext,
|
||||||
|
build_company_profile,
|
||||||
|
)
|
||||||
|
from ..completeness import assess_completeness
|
||||||
|
from ..optimization import roadmap_from_delta
|
||||||
|
from ..reasoning.enums import Confidence
|
||||||
|
from ..transition_reasoning import (
|
||||||
|
CoverageStatus,
|
||||||
|
TargetRequirement,
|
||||||
|
TransitionContext,
|
||||||
|
TransitionGoal,
|
||||||
|
assess_transition,
|
||||||
|
)
|
||||||
|
from .schemas import (
|
||||||
|
AdvisorMeasure,
|
||||||
|
AdvisorQuestion,
|
||||||
|
AdvisorResult,
|
||||||
|
InferredAssumption,
|
||||||
|
OnboardingInput,
|
||||||
|
RejectedAssumption,
|
||||||
|
)
|
||||||
|
|
||||||
|
_GAIN = {"high": 3, "medium": 2, "low": 1}
|
||||||
|
_RISK = {"high": 2, "medium": 1, "low": 0}
|
||||||
|
|
||||||
|
|
||||||
|
def _profile(
|
||||||
|
inp: OnboardingInput, cert_hypotheses: Dict[str, List[str]],
|
||||||
|
detected: Optional[Sequence[str]] = None,
|
||||||
|
) -> CompanyCapabilityProfile:
|
||||||
|
cmap = {
|
||||||
|
cert: CapabilityMappingEntry(capability_ids=list(caps), confidence=Confidence.MEDIUM)
|
||||||
|
for cert, caps in cert_hypotheses.items()
|
||||||
|
if cert in inp.certifications and caps
|
||||||
|
}
|
||||||
|
certs = [Certification(certification_id=c) for c in cmap]
|
||||||
|
if detected: # Silent Pass: concrete findings -> HIGH confidence
|
||||||
|
cmap["__detected__"] = CapabilityMappingEntry(
|
||||||
|
capability_ids=list(dict.fromkeys(detected)), confidence=Confidence.HIGH)
|
||||||
|
certs.append(Certification(certification_id="__detected__"))
|
||||||
|
return build_company_profile(CompanyContext(company_id=inp.company or "company", certifications=certs), cmap)
|
||||||
|
|
||||||
|
|
||||||
|
def advisor_start(
|
||||||
|
inp: OnboardingInput,
|
||||||
|
cert_hypotheses: Dict[str, List[str]],
|
||||||
|
target_requirements: Sequence[TargetRequirement],
|
||||||
|
target_id: str = "target",
|
||||||
|
covers_targets: Optional[Dict[str, List[str]]] = None,
|
||||||
|
corpus_status: Optional[Dict[str, str]] = None,
|
||||||
|
uncertain: Optional[List[Dict[str, str]]] = None,
|
||||||
|
detected_capabilities: Optional[Sequence[str]] = None,
|
||||||
|
indicative_capabilities: Optional[Sequence[str]] = None,
|
||||||
|
) -> AdvisorResult:
|
||||||
|
"""Run the onboarding flow: (silent intake +) certs -> profile -> delta -> ranked questions + measures.
|
||||||
|
|
||||||
|
Pure orchestration; deterministic. `cert_hypotheses` (cert -> probable cap ids), `target_requirements`
|
||||||
|
and `detected_capabilities` (from the Silent Knowledge Pass) are INJECTED. Detected capabilities are
|
||||||
|
recognised WITHOUT asking -> they shrink the delta and remove questions.
|
||||||
|
"""
|
||||||
|
covers_targets = covers_targets or {}
|
||||||
|
required = {r.capability_id for r in target_requirements}
|
||||||
|
profile = _profile(inp, cert_hypotheses, detected_capabilities)
|
||||||
|
auto_detected = sorted(set(detected_capabilities or []) & required)
|
||||||
|
# partial/indicative signals raise assumption strength but are NOT fed into the profile -> the gap
|
||||||
|
# stays open and is still asked. Surface only those still relevant and NOT already auto-detected.
|
||||||
|
indications = sorted((set(indicative_capabilities or []) & required) - set(auto_detected))
|
||||||
|
assess = assess_transition(
|
||||||
|
TransitionContext(company_id=inp.company or "company", target=TransitionGoal(target_id=target_id)),
|
||||||
|
list(target_requirements), profile)
|
||||||
|
|
||||||
|
# inferred (Welt-1): per cert, the caps it probably provides that are RELEVANT to this target
|
||||||
|
inferred: List[InferredAssumption] = []
|
||||||
|
rejected: List[RejectedAssumption] = []
|
||||||
|
for cert in inp.certifications:
|
||||||
|
caps = set(cert_hypotheses.get(cert, []))
|
||||||
|
relevant = sorted(caps & required)
|
||||||
|
if relevant:
|
||||||
|
inferred.append(InferredAssumption(
|
||||||
|
certification=cert, capabilities=relevant,
|
||||||
|
statement="%s legt %d relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt"
|
||||||
|
% (cert, len(relevant))))
|
||||||
|
elif caps:
|
||||||
|
rejected.append(RejectedAssumption(
|
||||||
|
certification=cert,
|
||||||
|
statement="%s ist für dieses Ziel nicht relevant" % cert,
|
||||||
|
reason="relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt"))
|
||||||
|
|
||||||
|
# next best questions (<=5): re-rank the RS-005 requests by info gain + leverage + risk + evidence-gap
|
||||||
|
known_ev = set(inp.known_evidence)
|
||||||
|
scored = []
|
||||||
|
for q in assess.question_requests:
|
||||||
|
lev = len(covers_targets.get(q.capability_id, []))
|
||||||
|
ev_missing = 1 if (q.expected_evidence and not (set(q.expected_evidence) & known_ev)) else 0
|
||||||
|
score = _GAIN.get(q.information_gain.value, 1) + lev + _RISK.get(q.priority.value, 0) + ev_missing
|
||||||
|
scored.append((score, q))
|
||||||
|
scored.sort(key=lambda x: (-x[0], x[1].capability_id))
|
||||||
|
next_q = [
|
||||||
|
AdvisorQuestion(capability_id=q.capability_id, question_intent=q.question_intent, why=q.reason,
|
||||||
|
information_value=float(s), priority=q.priority.value)
|
||||||
|
for s, q in scored[:5]
|
||||||
|
]
|
||||||
|
|
||||||
|
delta = sorted({c.capability_id for c in assess.coverage if c.status == CoverageStatus.MISSING})
|
||||||
|
plan = roadmap_from_delta(assess, {c: covers_targets.get(c, []) for c in delta})
|
||||||
|
measures = [AdvisorMeasure(capability_id=m.capability_id, leverage=m.leverage, closes=m.covers)
|
||||||
|
for m in plan.ranked_measures[:5]]
|
||||||
|
evidence = sorted({e for q in assess.question_requests for e in q.expected_evidence})
|
||||||
|
|
||||||
|
applicable = list(inp.target) or [target_id]
|
||||||
|
rep = assess_completeness(applicable, corpus_status or {}, uncertain=uncertain or [])
|
||||||
|
unsupported = [e.subject for e in rep.exclusions]
|
||||||
|
|
||||||
|
probably = [c for c in assess.summary.probably_covered if c not in set(auto_detected)]
|
||||||
|
return AdvisorResult(
|
||||||
|
inferred_assumptions=inferred, rejected_assumptions=rejected, auto_detected=auto_detected,
|
||||||
|
indications=indications,
|
||||||
|
next_best_questions=next_q, capability_delta=delta, top_measures=measures,
|
||||||
|
evidence_requests=evidence, unsupported_domains=unsupported,
|
||||||
|
completeness_summary=rep.completeness_summary,
|
||||||
|
headline="%d Anforderungen erkannt · %d automatisch erkannt (Intake) · %d wahrscheinlich (Zertifikate) · %d zu klären"
|
||||||
|
% (len(assess.coverage), len(auto_detected), len(probably), len(next_q)))
|
||||||
|
|
||||||
|
|
||||||
|
def apply_answer(known_capabilities: Sequence[str], capability_id: str, answer: str) -> List[str]:
|
||||||
|
"""Update the known-capability set from one answer. `answer` in {confirmed, rejected, unknown}.
|
||||||
|
|
||||||
|
A confirmed answer adds the capability to the known set (shrinking the delta on the next run);
|
||||||
|
rejected/unknown leave it open. This is how every answer updates the profile (criterion 6).
|
||||||
|
"""
|
||||||
|
known = list(dict.fromkeys(known_capabilities))
|
||||||
|
if answer == "confirmed" and capability_id not in known:
|
||||||
|
known.append(capability_id)
|
||||||
|
return known
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
"""Certification Capability Hypotheses — capability-centric, with EMPIRICAL (computed) confidence.
|
||||||
|
|
||||||
|
Each hypothesis is its own knowledge object: "IF a company holds one of `supported_by` certs, we EXPECT
|
||||||
|
`capability` (verification required)" — Welt-1, never "erfüllt". Written ONCE per capability with a list
|
||||||
|
of supporting certs (reuse, not redundancy), so multi-certification merges AUTOMATICALLY.
|
||||||
|
|
||||||
|
`confidence` is NOT an expert/LLM score: it is COMPUTED from real-onboarding observations
|
||||||
|
(confirmed / (confirmed+refuted)), `None` until any are seen. This is the empirical learning loop — the
|
||||||
|
long-term moat. The library is DATA, loaded outside this module and injected. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Sequence
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityHypothesis(BaseModel):
|
||||||
|
"""Curated knowledge only. Confidence is NOT stored here — it is computed from the reviewed
|
||||||
|
observation stream (see observations.py); a raw answer never changes a hypothesis (review gate)."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
capability: str
|
||||||
|
supported_by: List[str] = Field(default_factory=list) # certifications that suggest this capability
|
||||||
|
relationship: str = "supports" # supports / partially_supports
|
||||||
|
verification_required: bool = True # Welt-1: never auto-satisfied
|
||||||
|
question_intent: str = "verify_existence"
|
||||||
|
expected_evidence: List[str] = Field(default_factory=list)
|
||||||
|
kind: str = "shared" # shared / specific
|
||||||
|
|
||||||
|
|
||||||
|
def inferred_hypotheses(
|
||||||
|
certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
|
||||||
|
) -> List[CapabilityHypothesis]:
|
||||||
|
"""Every hypothesis whose `supported_by` intersects the company's certs — the auto multi-cert merge."""
|
||||||
|
certs = set(certifications)
|
||||||
|
return [h for h in library if certs & set(h.supported_by)]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_for_certifications(
|
||||||
|
certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
|
||||||
|
) -> Dict[str, List[str]]:
|
||||||
|
"""Adapt the capability-centric library to the Advisor's `cert -> [capability]` input.
|
||||||
|
|
||||||
|
For each held certification, the capabilities its hypotheses suggest (deduped, deterministic order).
|
||||||
|
"""
|
||||||
|
certs = set(certifications)
|
||||||
|
out: Dict[str, List[str]] = {}
|
||||||
|
for h in library:
|
||||||
|
for cert in h.supported_by:
|
||||||
|
if cert in certs and h.capability not in out.setdefault(cert, []):
|
||||||
|
out[cert].append(h.capability)
|
||||||
|
return {c: out[c] for c in sorted(out)}
|
||||||
@@ -0,0 +1,85 @@
|
|||||||
|
"""Observation Model — the empirical learning unit (Task 59a: model BEFORE persistence/API).
|
||||||
|
|
||||||
|
The learning point is NOT the hypothesis, it is the QUESTION. A hypothesis ("ISO 27001 suggests supplier
|
||||||
|
management") produces a question ("Is there a documented supplier-security process?"), and the answer is
|
||||||
|
rarely binary — "yes" / "no" / "partial, only critical suppliers" / "certified but not lived" are very
|
||||||
|
different observations. So the chain is:
|
||||||
|
|
||||||
|
Hypothesis -> Question -> Observation -> (Review) -> Hypothesis
|
||||||
|
|
||||||
|
Two principles (durable):
|
||||||
|
- Richer than confirmed/refuted: an Observation carries an `observation_type` (confirmed / partial /
|
||||||
|
refuted / not_applicable / unknown), a free-text answer, a scope_note ("only critical suppliers"),
|
||||||
|
and whether evidence was uploaded.
|
||||||
|
- REVIEW GATE: a raw answer NEVER changes a hypothesis directly. Only REVIEWED observations calibrate;
|
||||||
|
otherwise the system learns from outliers. Hypotheses stay curated knowledge; confidence is COMPUTED
|
||||||
|
from the reviewed observation stream (keyed by hypothesis id), not stored on the hypothesis.
|
||||||
|
|
||||||
|
This module defines the model + the deterministic statistics it enables (a DISTRIBUTION, not a single
|
||||||
|
%). Persistence (store), aggregation across customers and hypothesis calibration are later tasks
|
||||||
|
(59b/c/d). Pure, no I/O. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class ObservationType(str, Enum):
|
||||||
|
CONFIRMED = "confirmed"
|
||||||
|
PARTIAL = "partial"
|
||||||
|
REFUTED = "refuted"
|
||||||
|
NOT_APPLICABLE = "not_applicable"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class Observation(BaseModel):
|
||||||
|
"""One real-onboarding answer to one hypothesis-driven question. The raw empirical unit."""
|
||||||
|
|
||||||
|
hypothesis_id: str
|
||||||
|
capability: str = "" # denormalised for convenient aggregation
|
||||||
|
question: str = "" # the question that was actually asked
|
||||||
|
answer: str = "" # the customer's raw answer (free text)
|
||||||
|
observation_type: ObservationType = ObservationType.UNKNOWN
|
||||||
|
scope_note: Optional[str] = None # "only critical suppliers" / "only DE" / "not lived"
|
||||||
|
evidence_uploaded: bool = False
|
||||||
|
reviewed: bool = False # the review gate: only reviewed obs calibrate
|
||||||
|
reviewed_by: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# observation types that count as evidence for/against the capability (n/a + unknown do not)
|
||||||
|
_FOR_AGAINST = (ObservationType.CONFIRMED, ObservationType.PARTIAL, ObservationType.REFUTED)
|
||||||
|
|
||||||
|
|
||||||
|
def empirical_distribution(
|
||||||
|
observations: Sequence[Observation], reviewed_only: bool = True
|
||||||
|
) -> Dict[str, int]:
|
||||||
|
"""Count observations per type — the DISTRIBUTION (e.g. confirmed 61 / partial 31 / refuted 8),
|
||||||
|
far richer than a single percentage. By default only REVIEWED observations count (the review gate)."""
|
||||||
|
dist = {t.value: 0 for t in ObservationType}
|
||||||
|
for o in observations:
|
||||||
|
if o.reviewed or not reviewed_only:
|
||||||
|
dist[o.observation_type.value] += 1
|
||||||
|
return dist
|
||||||
|
|
||||||
|
|
||||||
|
def empirical_confidence(
|
||||||
|
observations: Sequence[Observation], reviewed_only: bool = True
|
||||||
|
) -> Optional[float]:
|
||||||
|
"""Confidence from the reviewed stream: (confirmed + 0.5*partial) / (confirmed+partial+refuted).
|
||||||
|
|
||||||
|
`not_applicable` and `unknown` are excluded from the denominator (they are not evidence either way).
|
||||||
|
`None` until any for/against observation is reviewed — never an expert/LLM score."""
|
||||||
|
dist = empirical_distribution(observations, reviewed_only)
|
||||||
|
base = dist[ObservationType.CONFIRMED.value] + dist[ObservationType.PARTIAL.value] + dist[ObservationType.REFUTED.value]
|
||||||
|
if base == 0:
|
||||||
|
return None
|
||||||
|
return round((dist[ObservationType.CONFIRMED.value] + 0.5 * dist[ObservationType.PARTIAL.value]) / base, 2)
|
||||||
|
|
||||||
|
|
||||||
|
def reviewed(observations: Sequence[Observation]) -> List[Observation]:
|
||||||
|
"""The calibration set: only reviewed observations (a raw answer never updates a hypothesis)."""
|
||||||
|
return [o for o in observations if o.reviewed]
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
"""Schemas for the Smart Onboarding Advisor — the onboarding RUNTIME step.
|
||||||
|
|
||||||
|
DTOs only. The Advisor ORCHESTRATES the existing engines (Company 2A, RS-005, optimization,
|
||||||
|
completeness) — no new reasoning engine, no new capability registry, no new meta-model. Welt-1
|
||||||
|
discipline: a certificate yields PROBABLE capabilities (verification required), never "erfüllt".
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class OnboardingInput(BaseModel):
|
||||||
|
company: str = ""
|
||||||
|
industry: Optional[str] = None
|
||||||
|
products: List[str] = Field(default_factory=list)
|
||||||
|
markets: List[str] = Field(default_factory=list)
|
||||||
|
certifications: List[str] = Field(default_factory=list)
|
||||||
|
known_evidence: List[str] = Field(default_factory=list)
|
||||||
|
target: List[str] = Field(default_factory=list) # informational; the delta uses injected requirements
|
||||||
|
|
||||||
|
|
||||||
|
class InferredAssumption(BaseModel):
|
||||||
|
certification: str
|
||||||
|
capabilities: List[str] = Field(default_factory=list) # RELEVANT-to-target caps the cert probably provides
|
||||||
|
verification_required: bool = True # Welt-1: never auto-satisfied
|
||||||
|
statement: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class RejectedAssumption(BaseModel):
|
||||||
|
certification: Optional[str] = None
|
||||||
|
statement: str = ""
|
||||||
|
reason: str = "" # e.g. "relevance(evidence, target) = 0"
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisorQuestion(BaseModel):
|
||||||
|
capability_id: str
|
||||||
|
question_intent: str
|
||||||
|
why: str # every question explains itself
|
||||||
|
information_value: float = 0.0 # deterministic rank score
|
||||||
|
priority: str = "medium"
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisorMeasure(BaseModel):
|
||||||
|
capability_id: str
|
||||||
|
leverage: int = 0
|
||||||
|
closes: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisorResult(BaseModel):
|
||||||
|
inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
|
||||||
|
rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
|
||||||
|
auto_detected: List[str] = Field(default_factory=list) # detected (concrete artifact): recognised w/o asking
|
||||||
|
indications: List[str] = Field(default_factory=list) # partial signal: raises assumption strength, STILL asked
|
||||||
|
next_best_questions: List[AdvisorQuestion] = Field(default_factory=list) # max 5
|
||||||
|
capability_delta: List[str] = Field(default_factory=list)
|
||||||
|
top_measures: List[AdvisorMeasure] = Field(default_factory=list)
|
||||||
|
evidence_requests: List[str] = Field(default_factory=list)
|
||||||
|
unsupported_domains: List[str] = Field(default_factory=list)
|
||||||
|
completeness_summary: str = ""
|
||||||
|
headline: str = "" # "N erkannt, M wahrscheinlich abgedeckt, K zu klären"
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
"""Signal Producer interface + Normalizer — one signal language, but TWO signal KINDS.
|
||||||
|
|
||||||
|
The platform already HAS scanners (website, repo/code, SBOM, security headers, TLS, SPF/DKIM/DMARC,
|
||||||
|
document analysis, RAG over uploads, product classification). The Silent Pass does not want a
|
||||||
|
WebsiteScanner or a RepoScanner — it wants their UNIFIED output. So every source (a scanner, a PDF
|
||||||
|
parser, a tender parser, an OEM spec, an API, or the user) emits the SAME `ProducedSignal`
|
||||||
|
{signal_id, source_type, kind, confidence, evidence, provenance}, and `normalize_signals` reduces
|
||||||
|
producer-specific ids to ONE canonical signal via a vocabulary (id + aliases + kind) — exactly the
|
||||||
|
Requirement-Source / MCAP / regulation-alias pattern. The Silent Pass then never gets per-scanner logic.
|
||||||
|
|
||||||
|
CRITICAL — a signal is one of two KINDS, and they NEVER substitute for each other:
|
||||||
|
observation = "I SAW X" — a repo with an SBOM, a published security.txt, a risk-assessment PDF.
|
||||||
|
requirement = "someone DEMANDS X" — a tender clause `requires_sbom`, an OEM spec `supplier_requires_psirt`.
|
||||||
|
A demanded SBOM is NOT a present SBOM. `kind` is carried on the canonical VOCABULARY entry (authoritative),
|
||||||
|
so even a mislabelled producer signal cannot collapse the two. The Silent Pass consumes ONLY observations;
|
||||||
|
requirement signals are preserved and feed the required-set / prioritisation later. This Observation-vs-
|
||||||
|
Requirement split is the very one the Requirements Verification Platform rests on: Observations (reality)
|
||||||
|
vs Requirements (targets); their comparison IS the delta. Pure, deterministic, no I/O. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from .silent_intake import IntakeSignal
|
||||||
|
|
||||||
|
|
||||||
|
class ProducedSignal(BaseModel):
|
||||||
|
"""What ANY signal producer emits — the common interface every source agrees on."""
|
||||||
|
|
||||||
|
signal_id: str # raw or canonical id the producer used
|
||||||
|
source_type: str = "" # website / repository / document / product / tender / oem / user / api
|
||||||
|
kind: str = "" # "observation" | "requirement"; empty -> resolved from the vocabulary
|
||||||
|
confidence: float = 1.0
|
||||||
|
evidence: Optional[str] = None # the artifact found (already in hand)
|
||||||
|
provenance: str = "" # url / filename / tender clause / "customer statement"
|
||||||
|
|
||||||
|
|
||||||
|
class SignalVocabularyEntry(BaseModel):
|
||||||
|
"""One canonical signal + its aliases + its KIND (the authoritative observation/requirement label)."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
kind: str = "observation" # "observation" (I saw X) | "requirement" (someone DEMANDS X)
|
||||||
|
aliases: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_signals(
|
||||||
|
produced: Sequence[ProducedSignal], vocabulary: Sequence[SignalVocabularyEntry]
|
||||||
|
) -> List[IntakeSignal]:
|
||||||
|
"""Reduce heterogeneous producer signals to the canonical IntakeSignal stream (alias resolution).
|
||||||
|
|
||||||
|
The canonical vocabulary entry's `kind` is AUTHORITATIVE — a producer cannot relabel a requirement as
|
||||||
|
an observation (that is what stops a demanded SBOM from masquerading as a present one). Unknown signal
|
||||||
|
ids pass through unchanged (a new producer's signal stays visible, not silently dropped) and keep the
|
||||||
|
producer-declared kind (default observation). Deterministic; carries confidence/evidence/provenance.
|
||||||
|
"""
|
||||||
|
alias: Dict[str, str] = {}
|
||||||
|
kind_of: Dict[str, str] = {}
|
||||||
|
for v in vocabulary:
|
||||||
|
alias[v.id] = v.id
|
||||||
|
kind_of[v.id] = v.kind
|
||||||
|
for a in v.aliases:
|
||||||
|
alias[a] = v.id
|
||||||
|
out: List[IntakeSignal] = []
|
||||||
|
for p in produced:
|
||||||
|
canonical = alias.get(p.signal_id, p.signal_id)
|
||||||
|
kind = kind_of.get(canonical) or p.kind or "observation"
|
||||||
|
out.append(IntakeSignal(
|
||||||
|
source=p.source_type, signal=canonical, kind=kind, confidence=p.confidence,
|
||||||
|
evidence=p.evidence, provenance=p.provenance))
|
||||||
|
return out
|
||||||
@@ -0,0 +1,124 @@
|
|||||||
|
"""Silent Knowledge Pass — recognise everything possible BEFORE asking a single question (Phase 0).
|
||||||
|
|
||||||
|
The Advisor can say "I need 5 answers" but does not yet decide WHAT it can find out by itself. The Silent
|
||||||
|
Pass runs first: from signals that existing scanners/parsers already produce (website, repository,
|
||||||
|
documents, product data) it deterministically derives capabilities the company demonstrably HAS and
|
||||||
|
product facts that drive scope — so every recognised item shrinks the delta and removes a question.
|
||||||
|
|
||||||
|
The customer then experiences "we already recognised 11 of 17 — only these 4 remain" instead of a
|
||||||
|
question wall. This is NOT new architecture: it is one orchestration step in front of the Advisor
|
||||||
|
Company -> Silent Intake -> Company Profile -> Hypotheses -> Delta -> Top Questions
|
||||||
|
All building blocks already exist. SIGNALS are INJECTED (the scanners produce them); the signal->capability
|
||||||
|
map is curated DATA, also injected. Pure, deterministic, no I/O. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Sequence, Set
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class IntakeSignal(BaseModel):
|
||||||
|
"""A CANONICAL signal the Silent Pass consumes. Producer-agnostic: the same `signal` may have come
|
||||||
|
from a website, a repo, a PDF, a tender or the user — normalize_signals() unified them (see signals.py)."""
|
||||||
|
|
||||||
|
source: str # source_type: website / repository / document / product / tender / user
|
||||||
|
signal: str # CANONICAL signal id, e.g. "sbom_present"
|
||||||
|
kind: str = "observation" # "observation" (I saw X) | "requirement" (someone DEMANDS X)
|
||||||
|
confidence: float = 1.0 # carried from the producer
|
||||||
|
evidence: Optional[str] = None # the artifact already in hand
|
||||||
|
provenance: str = "" # where it came from (url / filename / tender clause) — audit trail
|
||||||
|
detail: str = "" # free-text (kept for back-compat)
|
||||||
|
|
||||||
|
|
||||||
|
class SignalMapping(BaseModel):
|
||||||
|
"""Curated: what a signal lets us conclude. A signal yields a capability OR a product fact."""
|
||||||
|
|
||||||
|
signal: str
|
||||||
|
capability: Optional[str] = None # capability the signal evidences
|
||||||
|
relationship: str = "detected" # detected (concrete artifact) / partial (indicative)
|
||||||
|
evidence: Optional[str] = None # the artifact found (already in hand -> no upload needed)
|
||||||
|
product_fact: Optional[str] = None # e.g. "connected_to_internet"
|
||||||
|
fact_value: str = "true"
|
||||||
|
rationale: str = "" # curated note: WHY only indicative (esp. for partial mappings)
|
||||||
|
|
||||||
|
|
||||||
|
class DetectedCapability(BaseModel):
|
||||||
|
capability: str
|
||||||
|
relationship: str = "detected"
|
||||||
|
source: str = "" # which signal/source detected it (audit trail)
|
||||||
|
evidence: Optional[str] = None
|
||||||
|
confidence: float = 1.0 # carried from the producing signal
|
||||||
|
provenance: str = "" # where the signal came from
|
||||||
|
|
||||||
|
|
||||||
|
class ProductFact(BaseModel):
|
||||||
|
key: str
|
||||||
|
value: str = "true"
|
||||||
|
source: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class SilentIntakeResult(BaseModel):
|
||||||
|
detected_capabilities: List[DetectedCapability] = Field(default_factory=list)
|
||||||
|
product_facts: List[ProductFact] = Field(default_factory=list)
|
||||||
|
evidence_found: List[str] = Field(default_factory=list)
|
||||||
|
requirements_seen: List[str] = Field(default_factory=list) # requirement-kind signals — preserved, NOT present
|
||||||
|
summary: str = ""
|
||||||
|
|
||||||
|
def capability_ids(self) -> List[str]:
|
||||||
|
"""The DETECTED capability ids (relationship == detected) — fed into the Advisor as already-present
|
||||||
|
(delta-reducing, not asked). ONLY observation-kind signals reach here (requirements never become a
|
||||||
|
present capability); a merely PARTIAL/indicative signal does NOT (see indicative_capability_ids)."""
|
||||||
|
return sorted({d.capability for d in self.detected_capabilities if d.relationship == "detected"})
|
||||||
|
|
||||||
|
def indicative_capability_ids(self) -> List[str]:
|
||||||
|
"""Capabilities backed only by a PARTIAL/indicative signal — they raise assumption strength but do
|
||||||
|
NOT replace a question (the gap stays open and is still asked, just with an indication shown)."""
|
||||||
|
return sorted({d.capability for d in self.detected_capabilities if d.relationship != "detected"})
|
||||||
|
|
||||||
|
|
||||||
|
def silent_intake(
|
||||||
|
signals: Sequence[IntakeSignal], signal_map: Sequence[SignalMapping]
|
||||||
|
) -> SilentIntakeResult:
|
||||||
|
"""Derive capabilities + product facts from injected scanner signals (deterministic, no questions).
|
||||||
|
|
||||||
|
Each signal is matched to curated mappings by `signal` id; a mapping contributes either a detected
|
||||||
|
capability (+ optional evidence already in hand) or a product fact. Deduped, deterministic order.
|
||||||
|
"""
|
||||||
|
by_signal: Dict[str, List[SignalMapping]] = {}
|
||||||
|
for m in signal_map:
|
||||||
|
by_signal.setdefault(m.signal, []).append(m)
|
||||||
|
|
||||||
|
caps: Dict[str, DetectedCapability] = {}
|
||||||
|
facts: Dict[str, ProductFact] = {}
|
||||||
|
evidence: Set[str] = set()
|
||||||
|
requirements: Set[str] = set()
|
||||||
|
for s in signals:
|
||||||
|
if s.kind != "observation": # a requirement describes a TARGET, never the present state
|
||||||
|
requirements.add(s.signal) # preserved + visible, but NEVER turned into a capability
|
||||||
|
continue
|
||||||
|
for m in by_signal.get(s.signal, []):
|
||||||
|
if m.capability and m.capability not in caps:
|
||||||
|
caps[m.capability] = DetectedCapability(
|
||||||
|
capability=m.capability, relationship=m.relationship,
|
||||||
|
source="%s:%s" % (s.source, s.signal), evidence=m.evidence,
|
||||||
|
confidence=s.confidence, provenance=s.provenance)
|
||||||
|
if m.evidence:
|
||||||
|
evidence.add(m.evidence)
|
||||||
|
if m.product_fact:
|
||||||
|
facts[m.product_fact] = ProductFact(key=m.product_fact, value=m.fact_value, source=s.source)
|
||||||
|
|
||||||
|
detected = [caps[k] for k in sorted(caps)]
|
||||||
|
product_facts = [facts[k] for k in sorted(facts)]
|
||||||
|
requirements_seen = sorted(requirements)
|
||||||
|
n_detected = sum(1 for d in detected if d.relationship == "detected") # concrete artifacts -> auto-detected
|
||||||
|
n_indication = len(detected) - n_detected # partial -> indication, still asked
|
||||||
|
summary = (
|
||||||
|
"Stille Vorbefüllung: %d Fähigkeit(en) automatisch erkannt, %d Indikation(en), %d Produktfakt(en), "
|
||||||
|
"%d Nachweis(e) bereits vorhanden, %d Anforderung(en) erkannt (nicht als vorhanden gewertet)."
|
||||||
|
% (n_detected, n_indication, len(product_facts), len(evidence), len(requirements_seen))
|
||||||
|
)
|
||||||
|
return SilentIntakeResult(
|
||||||
|
detected_capabilities=detected, product_facts=product_facts,
|
||||||
|
evidence_found=sorted(evidence), requirements_seen=requirements_seen, summary=summary)
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
"""Regulatory Optimization — the Roadmap / Management renderer of the Capability Delta Engine.
|
||||||
|
|
||||||
|
Ranks the OPEN Capability Delta (from RS-005) by regulatory leverage: which measure closes the
|
||||||
|
most regulatory requirements at once. Answers the Geschäftsführer question "Womit anfangen?".
|
||||||
|
Pure, deterministic, computed-not-stored. Consumes the RS-005 delta (acyclic dependency); the
|
||||||
|
delta engine stays hermetic. No new corpus, no new meta-model class (freeze v1.0).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import regulatory_leverage, roadmap_from_delta, select_within_budget
|
||||||
|
from .schemas import BudgetPlan, OptimizationPlan, RankedMeasure
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"regulatory_leverage",
|
||||||
|
"select_within_budget",
|
||||||
|
"roadmap_from_delta",
|
||||||
|
"OptimizationPlan",
|
||||||
|
"RankedMeasure",
|
||||||
|
"BudgetPlan",
|
||||||
|
]
|
||||||
@@ -0,0 +1,134 @@
|
|||||||
|
"""Regulatory Optimization — the Roadmap / Management RENDERER of the Capability Delta Engine.
|
||||||
|
|
||||||
|
GAP analysis and measure-prioritisation are TWO VIEWS OF THE SAME COMPUTATION. The Capability
|
||||||
|
Delta Engine (`compliance/transition_reasoning`, RS-005) computes Required - Known = the
|
||||||
|
Capability Delta once. Renderers read that ONE delta:
|
||||||
|
- Interview Renderer (missing INFORMATION -> questions) = `TransitionQuestionRequest` (built)
|
||||||
|
- Roadmap / Management Renderer (missing CAPABILITIES -> measures by leverage) = THIS module
|
||||||
|
- Evidence Renderer (missing EVIDENCE -> upload requests) = later
|
||||||
|
There is one truth, not a Gap engine and a separate Roadmap engine.
|
||||||
|
|
||||||
|
A measure (a capability to implement) has *regulatory leverage* = the number of distinct
|
||||||
|
regulatory requirements it closes AT ONCE (e.g. patch management closes a CRA, a MaschinenVO,
|
||||||
|
an IEC 62443 and an ISO 27001 requirement -> leverage 4). The product turns from "you have N
|
||||||
|
obligations" into "of N identified requirements you only need M measures — and these K first".
|
||||||
|
|
||||||
|
Fully deterministic, computed-not-stored, NO new corpus. `regulatory_leverage`/`select_within_budget`
|
||||||
|
are pure math over `capability -> requirements`; `roadmap_from_delta` binds them to the RS-005
|
||||||
|
delta (dependency optimization -> transition_reasoning, acyclic; the delta engine stays hermetic).
|
||||||
|
No new graph/meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
|
||||||
|
Honesty (Welt-1): the percentages are exact count ratios over the IDENTIFIED requirements from
|
||||||
|
the known patterns — never "% gesetzeskonform". Label outputs as "der identifizierten Anforderungen".
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from ..transition_reasoning import CoverageStatus, TransitionAssessment
|
||||||
|
from .schemas import BudgetPlan, OptimizationPlan, RankedMeasure
|
||||||
|
|
||||||
|
|
||||||
|
def _ranked(
|
||||||
|
capability_requirements: Dict[str, List[str]], in_scope: Optional[List[str]]
|
||||||
|
) -> List[RankedMeasure]:
|
||||||
|
"""Rank measures: leverage desc, then capability_id asc (deterministic). Empty covers dropped."""
|
||||||
|
scope = (
|
||||||
|
set(in_scope)
|
||||||
|
if in_scope is not None
|
||||||
|
else {r for reqs in capability_requirements.values() for r in reqs}
|
||||||
|
)
|
||||||
|
measures: List[RankedMeasure] = []
|
||||||
|
for cap, reqs in capability_requirements.items():
|
||||||
|
covers = sorted({r for r in reqs if r in scope})
|
||||||
|
if not covers:
|
||||||
|
continue # this capability closes nothing in scope -> not a measure here
|
||||||
|
measures.append(RankedMeasure(capability_id=cap, covers=covers, leverage=len(covers)))
|
||||||
|
measures.sort(key=lambda m: (-m.leverage, m.capability_id))
|
||||||
|
total = sum(m.leverage for m in measures)
|
||||||
|
running = 0
|
||||||
|
for m in measures:
|
||||||
|
running += m.leverage
|
||||||
|
m.cumulative_requirements = running
|
||||||
|
m.cumulative_coverage = (running / total) if total else 0.0
|
||||||
|
return measures
|
||||||
|
|
||||||
|
|
||||||
|
def regulatory_leverage(
|
||||||
|
capability_requirements: Dict[str, List[str]], in_scope: Optional[List[str]] = None
|
||||||
|
) -> OptimizationPlan:
|
||||||
|
"""Rank measures by regulatory leverage; report the compression (requirements -> measures).
|
||||||
|
|
||||||
|
`capability_requirements`: measure (capability_id) -> the requirement keys it satisfies. A
|
||||||
|
requirement key is currently a regulation (via `covers_targets`); finer obligation granularity
|
||||||
|
is a future extension. `in_scope`: restrict the requirement keys counted (default: all seen).
|
||||||
|
"""
|
||||||
|
measures = _ranked(capability_requirements, in_scope)
|
||||||
|
scope = sorted(
|
||||||
|
set(in_scope)
|
||||||
|
if in_scope is not None
|
||||||
|
else {r for reqs in capability_requirements.values() for r in reqs}
|
||||||
|
)
|
||||||
|
total = sum(m.leverage for m in measures)
|
||||||
|
avg = (total / len(measures)) if measures else 0.0
|
||||||
|
headline = (
|
||||||
|
"%d identifizierte Anforderungen aus %d Regelwerken -> %d Massnahmen (Ø Hebel %.1f)."
|
||||||
|
% (total, len(scope), len(measures), avg)
|
||||||
|
)
|
||||||
|
return OptimizationPlan(
|
||||||
|
in_scope_requirements=scope,
|
||||||
|
total_measures=len(measures),
|
||||||
|
total_requirements=total,
|
||||||
|
ranked_measures=measures,
|
||||||
|
headline=headline,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def select_within_budget(
|
||||||
|
capability_requirements: Dict[str, List[str]],
|
||||||
|
budget: int,
|
||||||
|
in_scope: Optional[List[str]] = None,
|
||||||
|
) -> BudgetPlan:
|
||||||
|
"""The budget answer: with K measures, pick the K highest-leverage ones and report coverage.
|
||||||
|
|
||||||
|
Because each requirement key is closed by exactly one measure here, greedy-by-leverage is the
|
||||||
|
optimal cover, so ranking == selection. (When requirements become shared across capabilities,
|
||||||
|
this becomes weighted set-cover; the signature is ready for that.)
|
||||||
|
"""
|
||||||
|
measures = _ranked(capability_requirements, in_scope)
|
||||||
|
total = sum(m.leverage for m in measures)
|
||||||
|
k = max(0, budget)
|
||||||
|
selected = measures[:k]
|
||||||
|
closed = selected[-1].cumulative_requirements if selected else 0
|
||||||
|
ratio = (closed / total) if total else 0.0
|
||||||
|
headline = (
|
||||||
|
"Mit den Top-%d Massnahmen (nach regulatorischem Hebel) schliessen Sie %d von %d "
|
||||||
|
"identifizierten Anforderungen (%.0f%%)." % (len(selected), closed, total, ratio * 100)
|
||||||
|
)
|
||||||
|
return BudgetPlan(
|
||||||
|
budget=budget,
|
||||||
|
selected_capabilities=[m.capability_id for m in selected],
|
||||||
|
requirements_closed=closed,
|
||||||
|
total_requirements=total,
|
||||||
|
coverage_ratio=ratio,
|
||||||
|
headline=headline,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def roadmap_from_delta(
|
||||||
|
assessment: TransitionAssessment,
|
||||||
|
capability_requirements: Dict[str, List[str]],
|
||||||
|
in_scope: Optional[List[str]] = None,
|
||||||
|
open_statuses: Optional[List[CoverageStatus]] = None,
|
||||||
|
) -> OptimizationPlan:
|
||||||
|
"""Render the Roadmap view FROM a Capability Delta (an RS-005 `TransitionAssessment`).
|
||||||
|
|
||||||
|
Takes the OPEN capabilities of the delta — MISSING by default — and ranks them by regulatory
|
||||||
|
leverage. This is the same delta the Interview Renderer turns into questions; here it becomes
|
||||||
|
prioritised measures. The binding that makes "one truth, two renderers" real in code.
|
||||||
|
"""
|
||||||
|
statuses = set(open_statuses) if open_statuses is not None else {CoverageStatus.MISSING}
|
||||||
|
open_caps = [c.capability_id for c in assessment.coverage if c.status in statuses]
|
||||||
|
delta_reqs = {cap: capability_requirements.get(cap, []) for cap in open_caps}
|
||||||
|
return regulatory_leverage(delta_reqs, in_scope)
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
"""Schemas for the Regulatory Optimization Engine.
|
||||||
|
|
||||||
|
These DTOs are *derived views* (computed-not-stored): nothing here is persisted; every value
|
||||||
|
is recomputed from the input each call. No new meta-model class, no graph (freeze v1.0).
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class RankedMeasure(BaseModel):
|
||||||
|
"""One measure (a capability to implement) ranked by its regulatory leverage."""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
covers: List[str] = Field(default_factory=list) # the in-scope requirements it satisfies
|
||||||
|
leverage: int = 0 # = len(covers): how many it closes at once
|
||||||
|
cumulative_requirements: int = 0 # running total of requirements closed (ranked order)
|
||||||
|
cumulative_coverage: float = 0.0 # cumulative_requirements / total_requirements (0..1)
|
||||||
|
|
||||||
|
|
||||||
|
class OptimizationPlan(BaseModel):
|
||||||
|
"""Measures ranked by regulatory leverage — greatest regulatory effect first.
|
||||||
|
|
||||||
|
`total_requirements` counts the IDENTIFIED requirements in scope (the known delta from the
|
||||||
|
patterns), NOT a company's total legal duties. The percentages are exact count ratios over
|
||||||
|
this identified set — never a compliance verdict (Welt-1 discipline).
|
||||||
|
"""
|
||||||
|
|
||||||
|
in_scope_requirements: List[str] = Field(default_factory=list) # the distinct requirement keys counted
|
||||||
|
total_measures: int = 0 # number of distinct measures (delta capabilities)
|
||||||
|
total_requirements: int = 0 # Sum of leverage = identified requirements closable
|
||||||
|
ranked_measures: List[RankedMeasure] = Field(default_factory=list)
|
||||||
|
headline: str = "" # "N identifizierte Anforderungen -> M Massnahmen ..."
|
||||||
|
|
||||||
|
|
||||||
|
class BudgetPlan(BaseModel):
|
||||||
|
"""The budget answer: with a budget of K measures, which K and how much do they close?"""
|
||||||
|
|
||||||
|
budget: int = 0
|
||||||
|
selected_capabilities: List[str] = Field(default_factory=list)
|
||||||
|
requirements_closed: int = 0
|
||||||
|
total_requirements: int = 0
|
||||||
|
coverage_ratio: float = 0.0 # requirements_closed / total_requirements (0..1)
|
||||||
|
headline: str = ""
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
"""Implementation Playbook — the Berater renderer ("wie komme ich dort hin?").
|
||||||
|
|
||||||
|
For one capability it assembles the full implementation journey (why / closes which regulations /
|
||||||
|
tools / process / evidence / controls) from curated playbook knowledge + regulatory leverage +
|
||||||
|
injected Execution links. `playbooks_for_plan` chains the Optimization Roadmap into per-measure
|
||||||
|
playbooks. Pure, deterministic, computed-not-stored. No new corpus, no new meta-model class
|
||||||
|
(freeze v1.0). Curated content = expert draft, never normative.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import build_playbook, playbooks_for_plan
|
||||||
|
from .schemas import Playbook, PlaybookStep
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"build_playbook",
|
||||||
|
"playbooks_for_plan",
|
||||||
|
"Playbook",
|
||||||
|
"PlaybookStep",
|
||||||
|
]
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
"""Implementation Playbook — the Berater renderer ("wie komme ich dort hin?").
|
||||||
|
|
||||||
|
After the Capability Delta Engine says WHAT is missing and the Optimization renderer says WHICH
|
||||||
|
measure first, the Playbook renderer says HOW to implement it. For one capability it assembles the
|
||||||
|
full journey from three sources:
|
||||||
|
- curated playbook KNOWLEDGE (why / tools / process steps / evidence / how others do it) — the
|
||||||
|
Reasoning Knowledge Acquisition layer under `knowledge/implementation_playbooks/`,
|
||||||
|
- the regulatory LEVERAGE (which regulations a delivered capability closes) — reused from the
|
||||||
|
Optimization renderer,
|
||||||
|
- injected Procedure/Control/Evidence links (Execution-owned; empty until linked).
|
||||||
|
|
||||||
|
Pure, deterministic, computed-not-stored. Chains optimization -> playbook (acyclic). No new corpus,
|
||||||
|
no new meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
|
||||||
|
The curated content is an EXPERT DRAFT, never a normative requirement. When no playbook knowledge
|
||||||
|
exists for a capability yet, the renderer emits a `status: missing` stub — the honest signal that
|
||||||
|
the bottleneck is CONTENT (Knowledge Acquisition), not software.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from ..optimization import OptimizationPlan
|
||||||
|
from .schemas import Playbook, PlaybookStep
|
||||||
|
|
||||||
|
_MISSING_WHY = "(Playbook-Inhalt fehlt — Knowledge Acquisition offen.)"
|
||||||
|
_DRAFT_DISCLAIMER = (
|
||||||
|
"Kuratiertes Experten-Wissen (Erstentwurf), KEINE normative Anforderung. Tools/Schritte sind "
|
||||||
|
"Empfehlungen, kein Pflichtkatalog; Controls werden aus der Execution-Schicht injiziert."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _steps(raw: Any) -> List[PlaybookStep]:
|
||||||
|
steps: List[PlaybookStep] = []
|
||||||
|
for i, s in enumerate(raw or [], 1):
|
||||||
|
steps.append(PlaybookStep(order=i, title=str(s.get("title", "")), detail=str(s.get("detail", ""))))
|
||||||
|
return steps
|
||||||
|
|
||||||
|
|
||||||
|
def build_playbook(
|
||||||
|
capability_id: str,
|
||||||
|
knowledge: Optional[Dict[str, Any]] = None,
|
||||||
|
closes_regulations: Optional[List[str]] = None,
|
||||||
|
control_links: Optional[List[str]] = None,
|
||||||
|
) -> Playbook:
|
||||||
|
"""Assemble the implementation journey for ONE capability.
|
||||||
|
|
||||||
|
`knowledge`: the curated playbook dict (None/empty -> a `missing` stub). `closes_regulations`:
|
||||||
|
the regulations a delivered capability closes (leverage, from `covers_targets`). `control_links`:
|
||||||
|
Execution-owned control refs, injected (default empty — no Execution data in Reasoning code).
|
||||||
|
"""
|
||||||
|
closes = sorted(set(closes_regulations or []))
|
||||||
|
if not knowledge:
|
||||||
|
return Playbook(
|
||||||
|
capability_id=capability_id, title=capability_id, why=_MISSING_WHY,
|
||||||
|
closes_regulations=closes, leverage=len(closes), controls=list(control_links or []),
|
||||||
|
status="missing", disclaimer=_DRAFT_DISCLAIMER,
|
||||||
|
)
|
||||||
|
return Playbook(
|
||||||
|
capability_id=capability_id,
|
||||||
|
title=str(knowledge.get("title", capability_id)),
|
||||||
|
why=str(knowledge.get("why", "")),
|
||||||
|
closes_regulations=closes,
|
||||||
|
leverage=len(closes),
|
||||||
|
tools=list(knowledge.get("tools", [])),
|
||||||
|
process_steps=_steps(knowledge.get("process_steps")),
|
||||||
|
expected_evidence=list(knowledge.get("expected_evidence", [])),
|
||||||
|
controls=list(control_links or []),
|
||||||
|
how_others_do_it=str(knowledge.get("how_others_do_it", "")),
|
||||||
|
status=str(knowledge.get("status", "draft")),
|
||||||
|
disclaimer=str(knowledge.get("disclaimer", _DRAFT_DISCLAIMER)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def playbooks_for_plan(
|
||||||
|
plan: OptimizationPlan,
|
||||||
|
knowledge_by_cap: Dict[str, Dict[str, Any]],
|
||||||
|
top_k: Optional[int] = None,
|
||||||
|
control_links_by_cap: Optional[Dict[str, List[str]]] = None,
|
||||||
|
) -> List[Playbook]:
|
||||||
|
"""Render playbooks for the highest-leverage measures of an OptimizationPlan (Roadmap -> How).
|
||||||
|
|
||||||
|
Walks the ranked measures (top_k, or all) and builds each capability's playbook, using the
|
||||||
|
measure's own `covers` as the regulations it closes. Measures without curated knowledge become
|
||||||
|
`missing` stubs — surfacing exactly where playbook content is still owed.
|
||||||
|
"""
|
||||||
|
links = control_links_by_cap or {}
|
||||||
|
measures = plan.ranked_measures if top_k is None else plan.ranked_measures[: max(0, top_k)]
|
||||||
|
return [
|
||||||
|
build_playbook(
|
||||||
|
m.capability_id, knowledge_by_cap.get(m.capability_id),
|
||||||
|
closes_regulations=m.covers, control_links=links.get(m.capability_id),
|
||||||
|
)
|
||||||
|
for m in measures
|
||||||
|
]
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
"""Schemas for the Implementation Playbook renderer.
|
||||||
|
|
||||||
|
A Playbook is a *derived view* (computed-not-stored): it assembles, for one capability, the full
|
||||||
|
"wie komme ich dort hin?" journey from (a) curated playbook KNOWLEDGE, (b) the regulatory leverage
|
||||||
|
(which regulations a delivered capability closes), and (c) injected Procedure/Control/Evidence links
|
||||||
|
(Execution-owned). Nothing here is persisted. No new meta-model class, no graph (freeze v1.0).
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class PlaybookStep(BaseModel):
|
||||||
|
"""One step in the recommended way to stand up a capability."""
|
||||||
|
|
||||||
|
order: int
|
||||||
|
title: str
|
||||||
|
detail: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class Playbook(BaseModel):
|
||||||
|
"""The complete implementation journey for ONE capability — the Berater view.
|
||||||
|
|
||||||
|
Answers, in order: Warum? -> Welche Regelwerke schliesst das? -> Welche Tools? -> Welche
|
||||||
|
Prozesse? -> Welche Nachweise? -> Welche Controls? The curated parts (why/tools/steps/evidence/
|
||||||
|
how-others) are an EXPERT DRAFT, not a normative requirement; controls are injected from
|
||||||
|
Execution (may be empty until linked).
|
||||||
|
"""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
title: str = ""
|
||||||
|
why: str = "" # why this is required (regulatory rationale)
|
||||||
|
closes_regulations: List[str] = Field(default_factory=list) # leverage: regulations a delivered cap closes
|
||||||
|
leverage: int = 0 # = len(closes_regulations)
|
||||||
|
tools: List[str] = Field(default_factory=list) # typical tooling (curated knowledge)
|
||||||
|
process_steps: List[PlaybookStep] = Field(default_factory=list) # how to stand it up
|
||||||
|
expected_evidence: List[str] = Field(default_factory=list) # artifacts that prove it
|
||||||
|
controls: List[str] = Field(default_factory=list) # control refs (injected from Execution; may be empty)
|
||||||
|
how_others_do_it: str = "" # "wie machen das andere?" (curated)
|
||||||
|
status: str = "draft" # draft -> reviewed -> validated -> proven
|
||||||
|
disclaimer: str = "" # expert draft, not a normative requirement
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
"""Product-scope orchestration (step 3).
|
||||||
|
|
||||||
|
Connects the Navigator's fact-gate to the existing reasoning `discover_scope`:
|
||||||
|
decide regulatory scope only once the minimum (P0) facts are present, otherwise
|
||||||
|
return the missing facts. Reuses discover_scope unchanged — no new scope logic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .orchestrator import resolve_product_scope
|
||||||
|
from .schemas import (
|
||||||
|
ProductScopeRequest,
|
||||||
|
ProductScopeResponse,
|
||||||
|
RegulatoryScopeResult,
|
||||||
|
ScopeStatus,
|
||||||
|
UnsupportedDomain,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"resolve_product_scope",
|
||||||
|
"ProductScopeRequest",
|
||||||
|
"ProductScopeResponse",
|
||||||
|
"RegulatoryScopeResult",
|
||||||
|
"UnsupportedDomain",
|
||||||
|
"ScopeStatus",
|
||||||
|
]
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
"""Product-scope orchestrator (step 3) — gate, then reuse discover_scope.
|
||||||
|
|
||||||
|
THE rule: the Scope Engine decides only once the Navigator has released the
|
||||||
|
minimum facts. If P0 facts are missing, return the missing facts/questions and do
|
||||||
|
NOT run discover_scope. Otherwise project the canonical into the reasoning profile
|
||||||
|
and run the EXISTING `discover_scope` exactly once.
|
||||||
|
|
||||||
|
No new scope rules, no new regulations, no environmental-law evaluation (those
|
||||||
|
domains are surfaced only as unsupported_domains / future_corpus_needed).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from compliance.navigator.engine import navigate
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.profile.to_reasoning import to_reasoning_profile
|
||||||
|
from compliance.reasoning.scope_engine import discover_scope
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
ProductScopeResponse,
|
||||||
|
RegulatoryScopeResult,
|
||||||
|
ScopeStatus,
|
||||||
|
UnsupportedDomain,
|
||||||
|
)
|
||||||
|
|
||||||
|
# environmental trigger field -> (domain, note). Transparency only — not a verdict.
|
||||||
|
_ENV_DOMAINS: List[Tuple[str, str, str]] = [
|
||||||
|
("discharges_to_wastewater", "environment_water", "Abwasser-/Gewässerrecht (z. B. AbwV, WRRL) — noch nicht im Korpus."),
|
||||||
|
("has_cooling_or_spraying_water", "environment_water", "Wasserbezogene Anforderungen — noch nicht im Korpus."),
|
||||||
|
("emits_to_air", "environment_air", "Immissionsschutz-/Luftreinhalterecht (z. B. BImSchG, IED) — noch nicht im Korpus."),
|
||||||
|
("uses_solvents", "environment_air", "Lösemittel-/VOC-Recht (z. B. 31. BImSchV) — noch nicht im Korpus."),
|
||||||
|
("uses_cleaning_chemicals", "chemicals", "Chemikalienrecht (REACH/CLP/Detergenzien/Biozide) — noch nicht im Korpus."),
|
||||||
|
("supplies_chemicals", "chemicals", "Chemikalienrecht (REACH/CLP) — noch nicht im Korpus."),
|
||||||
|
("contains_restricted_substances", "chemicals", "Stoffbeschränkungen (REACH/RoHS) — noch nicht im Korpus."),
|
||||||
|
("creates_waste", "waste", "Abfall-/Entsorgungsrecht (u. a. WEEE) — noch nicht im Korpus."),
|
||||||
|
("consumes_energy_or_water", "energy_resources", "Energie-/Ökodesign-Recht — noch nicht im Korpus."),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _unsupported_domains(profile: CanonicalProductRegulatoryProfile) -> List[UnsupportedDomain]:
|
||||||
|
env = profile.environmental
|
||||||
|
seen = set()
|
||||||
|
out: List[UnsupportedDomain] = []
|
||||||
|
for field, domain, note in _ENV_DOMAINS:
|
||||||
|
if getattr(env, field) is True and domain not in seen:
|
||||||
|
seen.add(domain)
|
||||||
|
out.append(UnsupportedDomain(domain=domain, trigger=field, note=note))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_product_scope(profile: CanonicalProductRegulatoryProfile) -> ProductScopeResponse:
|
||||||
|
nav = navigate(profile)
|
||||||
|
|
||||||
|
if not nav.completeness_summary.ready_for_scope:
|
||||||
|
return ProductScopeResponse(
|
||||||
|
status=ScopeStatus.NEEDS_FACTS,
|
||||||
|
completeness_summary=nav.completeness_summary,
|
||||||
|
missing_facts=nav.missing_facts,
|
||||||
|
suggested_questions=nav.suggested_questions,
|
||||||
|
)
|
||||||
|
|
||||||
|
scope = discover_scope(to_reasoning_profile(profile)) # exactly once
|
||||||
|
result = RegulatoryScopeResult(
|
||||||
|
applicable_regulations=scope.applicable_regulations,
|
||||||
|
excluded_regulations=scope.excluded_regulations,
|
||||||
|
uncertain_regulations=scope.uncertain_regulations,
|
||||||
|
unsupported_domains=_unsupported_domains(profile),
|
||||||
|
reasoning_summary=scope.reasoning_summary,
|
||||||
|
confidence=scope.confidence,
|
||||||
|
)
|
||||||
|
return ProductScopeResponse(
|
||||||
|
status=ScopeStatus.RESOLVED,
|
||||||
|
completeness_summary=nav.completeness_summary,
|
||||||
|
regulatory_scope=result,
|
||||||
|
)
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
"""Response schemas for the product-scope orchestrator (step 3).
|
||||||
|
|
||||||
|
These are application/API types — NOT compliance-meta-model classes (architecture
|
||||||
|
freeze v1.0 untouched). The scope verdict itself is produced by the existing
|
||||||
|
`discover_scope`; nothing here adds scope rules.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.navigator.engine import CompletenessSummary
|
||||||
|
from compliance.navigator.questions import NavigatorQuestion
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
from compliance.reasoning.schemas import (
|
||||||
|
ApplicableRegulation,
|
||||||
|
ExcludedRegulation,
|
||||||
|
UncertainRegulation,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ScopeStatus(str, Enum):
|
||||||
|
NEEDS_FACTS = "needs_facts" # P0 facts missing -> ask, do not decide
|
||||||
|
RESOLVED = "resolved" # minimum facts present -> scope decided
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedDomain(BaseModel):
|
||||||
|
"""A domain the product triggers but the corpus does not yet cover.
|
||||||
|
|
||||||
|
Surfaced for transparency (no false completeness) — NEVER a legal evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
domain: str
|
||||||
|
trigger: str
|
||||||
|
status: str = "future_corpus_needed"
|
||||||
|
note: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryScopeResult(BaseModel):
|
||||||
|
applicable_regulations: List[ApplicableRegulation] = Field(default_factory=list)
|
||||||
|
excluded_regulations: List[ExcludedRegulation] = Field(default_factory=list)
|
||||||
|
uncertain_regulations: List[UncertainRegulation] = Field(default_factory=list)
|
||||||
|
unsupported_domains: List[UnsupportedDomain] = Field(default_factory=list)
|
||||||
|
reasoning_summary: str = ""
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
|
||||||
|
|
||||||
|
class ProductScopeRequest(BaseModel):
|
||||||
|
product_profile: CanonicalProductRegulatoryProfile
|
||||||
|
|
||||||
|
|
||||||
|
class ProductScopeResponse(BaseModel):
|
||||||
|
status: ScopeStatus
|
||||||
|
completeness_summary: CompletenessSummary
|
||||||
|
# case NEEDS_FACTS
|
||||||
|
missing_facts: List[str] = Field(default_factory=list)
|
||||||
|
suggested_questions: List[NavigatorQuestion] = Field(default_factory=list)
|
||||||
|
# case RESOLVED
|
||||||
|
regulatory_scope: Optional[RegulatoryScopeResult] = None
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
"""Product profile convergence layer.
|
||||||
|
|
||||||
|
ONE canonical product profile (`CanonicalProductRegulatoryProfile`) that the Go
|
||||||
|
gap engine and the Python reasoning engine both project from — so "SPS mit
|
||||||
|
Remote Access" means the same thing everywhere. gap.ProductProfile leads; the
|
||||||
|
reasoning ProductProfile is an adapter/DTO. Types + mappers only — no regulation
|
||||||
|
logic, no UI, no new questions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .canonical import (
|
||||||
|
CanonicalLifecyclePhase,
|
||||||
|
CanonicalProductRegulatoryProfile,
|
||||||
|
CanonicalProductType,
|
||||||
|
ComponentKind,
|
||||||
|
EconomicOperatorRole,
|
||||||
|
EnvironmentalImpact,
|
||||||
|
ProductComponent,
|
||||||
|
)
|
||||||
|
from .from_company_profile import from_company_profile
|
||||||
|
from .from_product_wizard import from_product_wizard
|
||||||
|
from .to_gap import to_gap_profile
|
||||||
|
from .to_reasoning import to_reasoning_profile
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"CanonicalProductRegulatoryProfile",
|
||||||
|
"CanonicalProductType",
|
||||||
|
"EconomicOperatorRole",
|
||||||
|
"CanonicalLifecyclePhase",
|
||||||
|
"ComponentKind",
|
||||||
|
"ProductComponent",
|
||||||
|
"EnvironmentalImpact",
|
||||||
|
"from_product_wizard",
|
||||||
|
"from_company_profile",
|
||||||
|
"to_gap_profile",
|
||||||
|
"to_reasoning_profile",
|
||||||
|
]
|
||||||
@@ -0,0 +1,158 @@
|
|||||||
|
"""CanonicalProductRegulatoryProfile — the single semantic product profile.
|
||||||
|
|
||||||
|
Convergence layer (spec 2026-06-26): instead of letting the Go `gap.ProductProfile`
|
||||||
|
and the Python reasoning `ProductProfile` drift, ONE canonical type is the source
|
||||||
|
of truth. The Go gap engine LEADS (it carries real engine logic), so the canonical
|
||||||
|
mirrors gap's field names and adds the Navigator gaps the audit found missing
|
||||||
|
(economic-operator role, radio module, generates_usage_data, lifecycle phase,
|
||||||
|
structured BOM, safety-vs-security split, machine-vs-component) plus a
|
||||||
|
forward-looking Environmental-Impact domain.
|
||||||
|
|
||||||
|
No regulation logic lives here — types only. Mappers live in sibling modules.
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CanonicalProductType(str, Enum): # mirrors gap.ProductType
|
||||||
|
SOFTWARE = "software"
|
||||||
|
HARDWARE = "hardware"
|
||||||
|
IOT = "iot"
|
||||||
|
SAAS = "saas"
|
||||||
|
EXCHANGE = "exchange"
|
||||||
|
MEDICAL_DEVICE = "medical_device"
|
||||||
|
MACHINERY = "machinery"
|
||||||
|
OTHER = "other"
|
||||||
|
|
||||||
|
|
||||||
|
class EconomicOperatorRole(str, Enum): # CE/CRA role — gap.ProductProfile has none
|
||||||
|
MANUFACTURER = "manufacturer"
|
||||||
|
IMPORTER = "importer"
|
||||||
|
DISTRIBUTOR = "distributor"
|
||||||
|
INTEGRATOR = "integrator"
|
||||||
|
OPERATOR = "operator"
|
||||||
|
SERVICE_PROVIDER = "service_provider"
|
||||||
|
|
||||||
|
|
||||||
|
class CanonicalLifecyclePhase(str, Enum):
|
||||||
|
DEVELOPMENT = "development"
|
||||||
|
PLACING_ON_MARKET = "placing_on_market"
|
||||||
|
OPERATION = "operation"
|
||||||
|
MAINTENANCE = "maintenance"
|
||||||
|
UPDATE = "update"
|
||||||
|
END_OF_LIFE = "end_of_life"
|
||||||
|
|
||||||
|
|
||||||
|
class ComponentKind(str, Enum):
|
||||||
|
MOTOR = "motor"
|
||||||
|
PUMP = "pump"
|
||||||
|
HEATING = "heating"
|
||||||
|
COOLING = "cooling"
|
||||||
|
CONTROLLER = "controller"
|
||||||
|
PLC = "plc"
|
||||||
|
HMI = "hmi"
|
||||||
|
SENSOR = "sensor"
|
||||||
|
ACTUATOR = "actuator"
|
||||||
|
CAMERA = "camera"
|
||||||
|
NETWORK_INTERFACE = "network_interface"
|
||||||
|
RADIO_MODULE = "radio_module"
|
||||||
|
CHEMICAL_DOSING = "chemical_dosing"
|
||||||
|
WATER_INLET = "water_inlet"
|
||||||
|
WASTEWATER_OUTLET = "wastewater_outlet"
|
||||||
|
BATTERY = "battery"
|
||||||
|
OTHER = "other"
|
||||||
|
|
||||||
|
|
||||||
|
class ProductComponent(BaseModel):
|
||||||
|
"""One structured BOM node — these nodes are what later trigger domains."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
kind: ComponentKind = ComponentKind.OTHER
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class EnvironmentalImpact(BaseModel):
|
||||||
|
"""Forward-looking Umweltmedien-Trigger (own Navigator domain).
|
||||||
|
|
||||||
|
No regulation logic consumes these yet — profile fields only, so the model
|
||||||
|
is not blind to wastewater/air/chemicals/waste questions when that domain
|
||||||
|
is wired later (AbwV/WRRL/REACH/CLP/IED/BImSchG ...).
|
||||||
|
"""
|
||||||
|
|
||||||
|
discharges_to_wastewater: Optional[bool] = None
|
||||||
|
uses_cleaning_chemicals: Optional[bool] = None
|
||||||
|
supplies_chemicals: Optional[bool] = None
|
||||||
|
emits_to_air: Optional[bool] = None
|
||||||
|
uses_solvents: Optional[bool] = None
|
||||||
|
creates_waste: Optional[bool] = None
|
||||||
|
contains_restricted_substances: Optional[bool] = None
|
||||||
|
consumes_energy_or_water: Optional[bool] = None
|
||||||
|
has_cooling_or_spraying_water: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
|
class CanonicalProductRegulatoryProfile(BaseModel):
|
||||||
|
# --- identity ---
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
product_type: Optional[CanonicalProductType] = None
|
||||||
|
product_profile_id: Optional[str] = None
|
||||||
|
tenant_id: Optional[str] = None
|
||||||
|
iace_project_id: Optional[str] = None
|
||||||
|
|
||||||
|
# --- gap-native lists ---
|
||||||
|
technologies: List[str] = Field(default_factory=list)
|
||||||
|
data_processing: List[str] = Field(default_factory=list)
|
||||||
|
markets: List[str] = Field(default_factory=list) # real list — never hardcoded ['EU']
|
||||||
|
existing_certifications: List[str] = Field(default_factory=list)
|
||||||
|
applied_norms: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
# --- gap-native product / IST-state booleans (tri-state: None = unknown) ---
|
||||||
|
connected_to_internet: Optional[bool] = None
|
||||||
|
has_software_updates: Optional[bool] = None
|
||||||
|
uses_ai: Optional[bool] = None
|
||||||
|
processes_personal_data: Optional[bool] = None
|
||||||
|
is_critical_infra_supplier: Optional[bool] = None
|
||||||
|
has_risk_assessment: Optional[bool] = None
|
||||||
|
has_technical_file: Optional[bool] = None
|
||||||
|
has_operating_manual: Optional[bool] = None
|
||||||
|
has_sbom: Optional[bool] = None
|
||||||
|
has_vuln_management: Optional[bool] = None
|
||||||
|
has_update_mechanism: Optional[bool] = None
|
||||||
|
has_incident_response: Optional[bool] = None
|
||||||
|
has_supply_chain_mgmt: Optional[bool] = None
|
||||||
|
ce_marking_since: Optional[str] = None
|
||||||
|
product_age: Optional[str] = None
|
||||||
|
|
||||||
|
# --- NEW Navigator-gap fields (audit 2026-06-26) ---
|
||||||
|
economic_operator_role: Optional[EconomicOperatorRole] = None
|
||||||
|
has_radio_module: Optional[bool] = None
|
||||||
|
generates_usage_data: Optional[bool] = None
|
||||||
|
lifecycle_phase: Optional[CanonicalLifecyclePhase] = None
|
||||||
|
components: List[ProductComponent] = Field(default_factory=list)
|
||||||
|
has_safety_function: Optional[bool] = None
|
||||||
|
safety_function_description: Optional[str] = None
|
||||||
|
has_security_function: Optional[bool] = None # safety vs security split
|
||||||
|
has_remote_access: Optional[bool] = None
|
||||||
|
has_embedded_software: Optional[bool] = None
|
||||||
|
is_machine: Optional[bool] = None
|
||||||
|
is_component: Optional[bool] = None
|
||||||
|
is_spare_part: Optional[bool] = None
|
||||||
|
|
||||||
|
# --- company / market context (NIS2 + scope; from company-profile) ---
|
||||||
|
b2b_or_b2c: Optional[str] = None
|
||||||
|
sector_industry: Optional[str] = None
|
||||||
|
company_size: Optional[str] = None
|
||||||
|
primary_jurisdiction: Optional[str] = None
|
||||||
|
|
||||||
|
# --- AI context (classification stays delegated to ai-act/ucca) ---
|
||||||
|
ai_integration_type: List[str] = Field(default_factory=list)
|
||||||
|
human_oversight_level: Optional[str] = None
|
||||||
|
|
||||||
|
# --- forward-looking environmental domain ---
|
||||||
|
environmental: EnvironmentalImpact = Field(default_factory=EnvironmentalImpact)
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
"""company-profile -> CanonicalProductRegulatoryProfile (prefill, acceptance #2).
|
||||||
|
|
||||||
|
Pulls master data (industry, business model, size, markets) and the conditional
|
||||||
|
`machine_builder` block (camelCase JSONB keys, defined frontend-side) so the user
|
||||||
|
re-answers nothing. The machineBuilder block is the richest product/safety/
|
||||||
|
connectivity source — note it is industry-gated in the UI, so a prefill may find
|
||||||
|
it empty; that is fine (fields stay None = unknown).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
from .canonical import CanonicalProductRegulatoryProfile
|
||||||
|
|
||||||
|
_EU_MEMBER_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
|
||||||
|
|
||||||
|
|
||||||
|
def _markets(p: Dict[str, Any], mb: Dict[str, Any]) -> List[str]:
|
||||||
|
out: List[str] = []
|
||||||
|
for source in (p.get("target_markets"), mb.get("exportMarkets"), [p.get("primary_jurisdiction")], [p.get("headquarters_country")]):
|
||||||
|
for m in source or []:
|
||||||
|
if m and m not in out:
|
||||||
|
out.append(m)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _is_machine(mb: Dict[str, Any]) -> Any:
|
||||||
|
types = mb.get("productTypes")
|
||||||
|
if types:
|
||||||
|
return True
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def from_company_profile(profile: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
|
||||||
|
p = profile
|
||||||
|
mb = p.get("machine_builder") or {}
|
||||||
|
contains_ai = mb.get("containsAI")
|
||||||
|
uses_ai = contains_ai if contains_ai is not None else p.get("uses_ai")
|
||||||
|
return CanonicalProductRegulatoryProfile(
|
||||||
|
description=mb.get("productDescription") or "",
|
||||||
|
sector_industry=p.get("industry") or None,
|
||||||
|
b2b_or_b2c=p.get("business_model") or None,
|
||||||
|
company_size=p.get("company_size") or None,
|
||||||
|
primary_jurisdiction=p.get("primary_jurisdiction") or None,
|
||||||
|
markets=_markets(p, mb),
|
||||||
|
uses_ai=uses_ai,
|
||||||
|
ai_integration_type=list(mb.get("aiIntegrationType") or []),
|
||||||
|
human_oversight_level=mb.get("humanOversightLevel") or None,
|
||||||
|
has_embedded_software=mb.get("containsFirmware"),
|
||||||
|
has_safety_function=mb.get("hasSafetyFunction"),
|
||||||
|
safety_function_description=mb.get("safetyFunctionDescription") or None,
|
||||||
|
has_remote_access=mb.get("hasRemoteAccess"),
|
||||||
|
connected_to_internet=mb.get("isNetworked"),
|
||||||
|
has_software_updates=mb.get("hasOTAUpdates"),
|
||||||
|
has_risk_assessment=mb.get("hasRiskAssessment"),
|
||||||
|
is_machine=_is_machine(mb),
|
||||||
|
is_critical_infra_supplier=mb.get("criticalSectorClients"),
|
||||||
|
)
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
"""ProductWizard payload -> CanonicalProductRegulatoryProfile (lossless).
|
||||||
|
|
||||||
|
The gap-analysis ProductWizard POSTs exactly the gap.ProductProfile JSON shape
|
||||||
|
(see admin-compliance/.../ProductWizard.tsx handleSubmit). This mapper copies
|
||||||
|
every gap field verbatim so that `to_gap_profile(from_product_wizard(p))`
|
||||||
|
reproduces the gap subset of `p` byte-for-byte (acceptance #1). New Navigator
|
||||||
|
fields the wizard does not ask stay None.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
|
||||||
|
|
||||||
|
|
||||||
|
def _as_product_type(value: Any) -> Optional[CanonicalProductType]:
|
||||||
|
try:
|
||||||
|
return CanonicalProductType(value)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def from_product_wizard(payload: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
|
||||||
|
g = payload.get
|
||||||
|
return CanonicalProductRegulatoryProfile(
|
||||||
|
name=g("name", ""),
|
||||||
|
description=g("description", ""),
|
||||||
|
product_type=_as_product_type(g("product_type")),
|
||||||
|
technologies=list(g("technologies") or []),
|
||||||
|
data_processing=list(g("data_processing") or []),
|
||||||
|
markets=list(g("markets") or []),
|
||||||
|
existing_certifications=list(g("existing_certifications") or []),
|
||||||
|
applied_norms=list(g("applied_norms") or []),
|
||||||
|
connected_to_internet=g("connected_to_internet"),
|
||||||
|
has_software_updates=g("has_software_updates"),
|
||||||
|
uses_ai=g("uses_ai"),
|
||||||
|
processes_personal_data=g("processes_personal_data"),
|
||||||
|
is_critical_infra_supplier=g("is_critical_infra_supplier"),
|
||||||
|
has_risk_assessment=g("has_risk_assessment"),
|
||||||
|
has_technical_file=g("has_technical_file"),
|
||||||
|
has_operating_manual=g("has_operating_manual"),
|
||||||
|
has_sbom=g("has_sbom"),
|
||||||
|
has_vuln_management=g("has_vuln_management"),
|
||||||
|
has_update_mechanism=g("has_update_mechanism"),
|
||||||
|
has_incident_response=g("has_incident_response"),
|
||||||
|
has_supply_chain_mgmt=g("has_supply_chain_mgmt"),
|
||||||
|
ce_marking_since=g("ce_marking_since"),
|
||||||
|
product_age=g("product_age"),
|
||||||
|
)
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
"""CanonicalProductRegulatoryProfile -> gap.ProductProfile JSON shape.
|
||||||
|
|
||||||
|
Emits exactly the keys the Go gap engine already consumes (gap/models.go json
|
||||||
|
tags), so the gap engine runs UNCHANGED — the canonical is a superset and gap is
|
||||||
|
its lossless projection. Canonical-only fields (role/radio/components/...) are
|
||||||
|
intentionally not emitted here; they reach the reasoning side via to_reasoning.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from .canonical import CanonicalProductRegulatoryProfile
|
||||||
|
|
||||||
|
|
||||||
|
def to_gap_profile(c: CanonicalProductRegulatoryProfile) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": c.name,
|
||||||
|
"description": c.description,
|
||||||
|
"product_type": c.product_type.value if c.product_type else "",
|
||||||
|
"technologies": list(c.technologies),
|
||||||
|
"data_processing": list(c.data_processing),
|
||||||
|
"markets": list(c.markets),
|
||||||
|
"existing_certifications": list(c.existing_certifications),
|
||||||
|
"applied_norms": list(c.applied_norms),
|
||||||
|
"connected_to_internet": bool(c.connected_to_internet),
|
||||||
|
"has_software_updates": bool(c.has_software_updates),
|
||||||
|
"uses_ai": bool(c.uses_ai),
|
||||||
|
"processes_personal_data": bool(c.processes_personal_data),
|
||||||
|
"is_critical_infra_supplier": bool(c.is_critical_infra_supplier),
|
||||||
|
"has_risk_assessment": bool(c.has_risk_assessment),
|
||||||
|
"has_technical_file": bool(c.has_technical_file),
|
||||||
|
"has_operating_manual": bool(c.has_operating_manual),
|
||||||
|
"has_sbom": bool(c.has_sbom),
|
||||||
|
"has_vuln_management": bool(c.has_vuln_management),
|
||||||
|
"has_update_mechanism": bool(c.has_update_mechanism),
|
||||||
|
"has_incident_response": bool(c.has_incident_response),
|
||||||
|
"has_supply_chain_mgmt": bool(c.has_supply_chain_mgmt),
|
||||||
|
"ce_marking_since": c.ce_marking_since if c.ce_marking_since is not None else "",
|
||||||
|
"product_age": c.product_age if c.product_age is not None else "",
|
||||||
|
}
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
"""CanonicalProductRegulatoryProfile -> reasoning ProductProfile (adapter/DTO).
|
||||||
|
|
||||||
|
The reasoning engine stays the consumer, never the source of truth (spec): the
|
||||||
|
canonical leads, this projects it into the Python reasoning ProductProfile so the
|
||||||
|
Reasoning engine and the Go gap engine run off ONE semantic profile (acceptance
|
||||||
|
#10). AI classification is NOT done here — only `uses_ai` is forwarded; risk
|
||||||
|
classification stays delegated to ai-act/ucca (acceptance #3).
|
||||||
|
|
||||||
|
This is the ONLY one-way coupling profile -> reasoning; reasoning never imports
|
||||||
|
profile, so the reasoning layer stays hermetic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import ManufacturerRole, MarketModel, ProductLifecyclePhase
|
||||||
|
from compliance.reasoning.schemas import ProductProfile
|
||||||
|
|
||||||
|
from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
|
||||||
|
|
||||||
|
_SOFTWARE_TYPES = {CanonicalProductType.SOFTWARE, CanonicalProductType.SAAS, CanonicalProductType.IOT}
|
||||||
|
_SOFTWARE_TECH = {"ai", "api", "database", "encryption", "ota_updates", "cloud", "blockchain"}
|
||||||
|
_EU_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
|
||||||
|
_B2X = {"B2B": MarketModel.B2B, "B2C": MarketModel.B2C, "B2B_B2C": MarketModel.BOTH, "B2B2C": MarketModel.BOTH}
|
||||||
|
|
||||||
|
|
||||||
|
def _or_none(*values: Optional[bool]) -> Optional[bool]:
|
||||||
|
"""True if any value is truthy; None if all are None/absent; else False."""
|
||||||
|
if any(v is True for v in values):
|
||||||
|
return True
|
||||||
|
if all(v is None for v in values):
|
||||||
|
return None
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _has_software(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
|
||||||
|
type_sig = True if c.product_type in _SOFTWARE_TYPES else None
|
||||||
|
tech_sig = True if (set(c.technologies) & _SOFTWARE_TECH) else None
|
||||||
|
return _or_none(c.has_embedded_software, c.has_software_updates, c.uses_ai, type_sig, tech_sig)
|
||||||
|
|
||||||
|
|
||||||
|
def _eu_market(markets: List[str]) -> Optional[bool]:
|
||||||
|
if not markets:
|
||||||
|
return None
|
||||||
|
return True if (set(markets) & _EU_HINTS) else False
|
||||||
|
|
||||||
|
|
||||||
|
def _has_radio(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
|
||||||
|
if c.has_radio_module is not None:
|
||||||
|
return c.has_radio_module
|
||||||
|
if any(comp.kind.value == "radio_module" for comp in c.components):
|
||||||
|
return True
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def to_reasoning_profile(c: CanonicalProductRegulatoryProfile) -> ProductProfile:
|
||||||
|
role = ManufacturerRole(c.economic_operator_role.value) if c.economic_operator_role else None
|
||||||
|
phase = ProductLifecyclePhase(c.lifecycle_phase.value) if c.lifecycle_phase else None
|
||||||
|
b2x = _B2X.get(c.b2b_or_b2c) if c.b2b_or_b2c else None
|
||||||
|
is_machine = c.is_machine if c.is_machine is not None else (
|
||||||
|
True if c.product_type == CanonicalProductType.MACHINERY else None
|
||||||
|
)
|
||||||
|
generates_data = c.generates_usage_data if c.generates_usage_data is not None else (
|
||||||
|
True if "telemetry" in c.data_processing else None
|
||||||
|
)
|
||||||
|
return ProductProfile(
|
||||||
|
product_name=c.name or "Produkt",
|
||||||
|
product_profile_id=c.product_profile_id,
|
||||||
|
manufacturer_role=role,
|
||||||
|
product_type=[c.product_type.value] if c.product_type else [],
|
||||||
|
has_software=_has_software(c),
|
||||||
|
has_embedded_software=c.has_embedded_software,
|
||||||
|
has_remote_access=c.has_remote_access,
|
||||||
|
has_cloud_connection=True if "cloud" in c.technologies else None,
|
||||||
|
has_ai_functionality=c.uses_ai,
|
||||||
|
has_radio_module=_has_radio(c),
|
||||||
|
has_safety_function=c.has_safety_function,
|
||||||
|
generates_usage_data=generates_data,
|
||||||
|
is_machine=is_machine,
|
||||||
|
is_component=c.is_component,
|
||||||
|
is_spare_part=c.is_spare_part,
|
||||||
|
eu_market=_eu_market(c.markets),
|
||||||
|
b2b_or_b2c=b2x,
|
||||||
|
lifecycle_phase=phase,
|
||||||
|
company_size=c.company_size,
|
||||||
|
sector=c.sector_industry,
|
||||||
|
)
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
"""Regulatory Change Intelligence (RCI) — delta layer over the product-first map.
|
||||||
|
|
||||||
|
Answers "what changes relative to my existing Regulatory Map?" — NOT "what does
|
||||||
|
the new law say in general". Snapshot the pipeline into a ComplianceBaseline, then
|
||||||
|
assess a (simulated/provided) RegulatoryChange into per-obligation deltas + a
|
||||||
|
management ChangeImpactSummary. Read/reasoning only — no UI, no ingestion, no RAG,
|
||||||
|
no new regulations/controls, no legal evaluation outside the stored map.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .baseline import create_baseline
|
||||||
|
from .delta_engine import assess_change
|
||||||
|
from .schemas import (
|
||||||
|
ChangeAssessment,
|
||||||
|
ChangeImpactSummary,
|
||||||
|
ChangeType,
|
||||||
|
ComplianceBaseline,
|
||||||
|
DeltaType,
|
||||||
|
ObligationDelta,
|
||||||
|
RegulatoryChange,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"create_baseline",
|
||||||
|
"assess_change",
|
||||||
|
"ComplianceBaseline",
|
||||||
|
"RegulatoryChange",
|
||||||
|
"ObligationDelta",
|
||||||
|
"ChangeImpactSummary",
|
||||||
|
"ChangeAssessment",
|
||||||
|
"DeltaType",
|
||||||
|
"ChangeType",
|
||||||
|
]
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
"""Snapshot the current product-first pipeline into a ComplianceBaseline.
|
||||||
|
|
||||||
|
This is the ONLY place RCI runs the pipeline — to freeze a point-in-time map +
|
||||||
|
registry-linked obligations + their required evidence. Everything downstream
|
||||||
|
(delta computation) works purely against this snapshot, never re-evaluating.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.profile.to_reasoning import to_reasoning_profile
|
||||||
|
from compliance.reasoning.obligation_engine import derive_obligations
|
||||||
|
from compliance.regulatory_map.renderer import render_regulatory_map
|
||||||
|
|
||||||
|
from .schemas import ComplianceBaseline
|
||||||
|
|
||||||
|
|
||||||
|
def create_baseline(
|
||||||
|
profile: CanonicalProductRegulatoryProfile,
|
||||||
|
evidence_refs: Optional[Dict[str, List[str]]] = None,
|
||||||
|
baseline_id: str = "baseline",
|
||||||
|
created_at: Optional[str] = None,
|
||||||
|
) -> ComplianceBaseline:
|
||||||
|
reg_map = render_regulatory_map(profile)
|
||||||
|
obligations = derive_obligations(to_reasoning_profile(profile)).applicable_obligations
|
||||||
|
|
||||||
|
applicable: List[str] = []
|
||||||
|
required: Dict[str, List[str]] = {}
|
||||||
|
for ob in obligations:
|
||||||
|
if ob.registry_anchor: # only registry-linked obligations enter the baseline
|
||||||
|
applicable.append(ob.obligation_id)
|
||||||
|
required[ob.obligation_id] = list(ob.required_evidence)
|
||||||
|
|
||||||
|
return ComplianceBaseline(
|
||||||
|
baseline_id=baseline_id,
|
||||||
|
product_profile_snapshot=profile,
|
||||||
|
regulatory_map_snapshot=reg_map,
|
||||||
|
applicable_obligations=applicable,
|
||||||
|
obligation_evidence_required=required,
|
||||||
|
evidence_refs=dict(evidence_refs or {}),
|
||||||
|
created_at=created_at,
|
||||||
|
)
|
||||||
@@ -0,0 +1,114 @@
|
|||||||
|
"""RCI delta engine — assess a RegulatoryChange against a ComplianceBaseline.
|
||||||
|
|
||||||
|
Answers "what changes relative to my existing Map?" deterministically, working
|
||||||
|
ONLY against the stored baseline (no re-evaluation of scope, no new legal
|
||||||
|
assessment outside the map). Per-obligation classification -> ObligationDelta;
|
||||||
|
aggregate -> ChangeImpactSummary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
ChangeAssessment,
|
||||||
|
ChangeImpactSummary,
|
||||||
|
ChangeType,
|
||||||
|
ComplianceBaseline,
|
||||||
|
DeltaType,
|
||||||
|
ObligationDelta,
|
||||||
|
RegulatoryChange,
|
||||||
|
)
|
||||||
|
|
||||||
|
_ACTION = {DeltaType.NEW, DeltaType.CHANGED, DeltaType.NEEDS_REVIEW}
|
||||||
|
|
||||||
|
|
||||||
|
def _classify(
|
||||||
|
in_base: bool, has_ev: bool, change_type: ChangeType, rel_app: bool, rel_unc: bool
|
||||||
|
) -> Tuple[DeltaType, str, Confidence]:
|
||||||
|
if not (rel_app or rel_unc):
|
||||||
|
return DeltaType.NOT_APPLICABLE, "Die Änderung betrifft kein Regelwerk Ihrer Map.", Confidence.HIGH
|
||||||
|
if rel_unc and not rel_app:
|
||||||
|
return (
|
||||||
|
DeltaType.NEEDS_REVIEW,
|
||||||
|
"Betrifft ein für Ihr Produkt noch UNSICHERES Regelwerk — erst Anwendbarkeit klären.",
|
||||||
|
Confidence.LOW,
|
||||||
|
)
|
||||||
|
if change_type == ChangeType.REPEAL:
|
||||||
|
if in_base:
|
||||||
|
return DeltaType.REMOVED, "Regelwerk/Pflicht aufgehoben — entfällt für Ihr Produkt.", Confidence.HIGH
|
||||||
|
return DeltaType.NOT_APPLICABLE, "Aufhebung betrifft keine Ihrer bestehenden Pflichten.", Confidence.HIGH
|
||||||
|
if not in_base:
|
||||||
|
return DeltaType.NEW, "Neue Pflicht durch die Änderung — bisher nicht in Ihrer Map.", Confidence.MEDIUM
|
||||||
|
if change_type == ChangeType.GUIDANCE_UPDATE:
|
||||||
|
if has_ev:
|
||||||
|
return (
|
||||||
|
DeltaType.ALREADY_COVERED,
|
||||||
|
"Bestehende Pflicht mit vorhandenen Nachweisen — Leitlinien-Update vermutlich abgedeckt.",
|
||||||
|
Confidence.MEDIUM,
|
||||||
|
)
|
||||||
|
return DeltaType.NEEDS_REVIEW, "Bestehende Pflicht ohne Nachweis — Leitlinien-Update prüfen.", Confidence.MEDIUM
|
||||||
|
return DeltaType.CHANGED, "Bestehende Pflicht inhaltlich geändert — Umsetzung und Nachweis prüfen.", Confidence.MEDIUM
|
||||||
|
|
||||||
|
|
||||||
|
def assess_change(baseline: ComplianceBaseline, change: RegulatoryChange) -> ChangeAssessment:
|
||||||
|
snap = baseline.regulatory_map_snapshot
|
||||||
|
app_regs = {v.regulation_id for v in snap.applicable_regulations}
|
||||||
|
unc_regs = {v.regulation_id for v in snap.uncertain_regulations}
|
||||||
|
base_obs = set(baseline.applicable_obligations)
|
||||||
|
|
||||||
|
affected = set(change.affected_regulations)
|
||||||
|
rel_app = bool(affected & app_regs)
|
||||||
|
rel_unc = bool(affected & unc_regs)
|
||||||
|
affects_product = rel_app or rel_unc
|
||||||
|
|
||||||
|
deltas: List[ObligationDelta] = []
|
||||||
|
for ob in change.affected_obligations:
|
||||||
|
present = baseline.evidence_refs.get(ob, [])
|
||||||
|
required = baseline.obligation_evidence_required.get(ob, [])
|
||||||
|
dt, reason, conf = _classify(ob in base_obs, bool(present), change.change_type, rel_app, rel_unc)
|
||||||
|
missing = [e for e in required if e not in present] if dt in _ACTION else []
|
||||||
|
deltas.append(
|
||||||
|
ObligationDelta(
|
||||||
|
obligation_id=ob,
|
||||||
|
delta_type=dt,
|
||||||
|
reason=reason,
|
||||||
|
affected_evidence=list(present),
|
||||||
|
missing_evidence=missing,
|
||||||
|
confidence=conf,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return ChangeAssessment(
|
||||||
|
change_id=change.change_id,
|
||||||
|
affects_product=affects_product,
|
||||||
|
deltas=deltas,
|
||||||
|
summary=_summary(deltas, [d.domain for d in snap.unsupported_domains]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _ids(deltas: List[ObligationDelta], *types: DeltaType) -> List[str]:
|
||||||
|
wanted = set(types)
|
||||||
|
return [d.obligation_id for d in deltas if d.delta_type in wanted]
|
||||||
|
|
||||||
|
|
||||||
|
def _summary(deltas: List[ObligationDelta], unsupported: List[str]) -> ChangeImpactSummary:
|
||||||
|
n_new = len(_ids(deltas, DeltaType.NEW))
|
||||||
|
n_changed = len(_ids(deltas, DeltaType.CHANGED))
|
||||||
|
n_removed = len(_ids(deltas, DeltaType.REMOVED))
|
||||||
|
n_covered = len(_ids(deltas, DeltaType.ALREADY_COVERED))
|
||||||
|
n_review = len(_ids(deltas, DeltaType.NEEDS_REVIEW, DeltaType.CHANGED))
|
||||||
|
n_na = len(_ids(deltas, DeltaType.NOT_APPLICABLE))
|
||||||
|
return ChangeImpactSummary(
|
||||||
|
what_changed=(
|
||||||
|
"%d neu, %d geändert, %d entfällt, %d bereits abgedeckt, %d zu prüfen, %d nicht relevant."
|
||||||
|
% (n_new, n_changed, n_removed, n_covered, n_review, n_na)
|
||||||
|
),
|
||||||
|
what_matters_for_this_product=_ids(deltas, *_ACTION),
|
||||||
|
already_covered=_ids(deltas, DeltaType.ALREADY_COVERED),
|
||||||
|
needs_review=_ids(deltas, DeltaType.NEEDS_REVIEW, DeltaType.CHANGED),
|
||||||
|
not_relevant=_ids(deltas, DeltaType.NOT_APPLICABLE),
|
||||||
|
unsupported_domains=unsupported,
|
||||||
|
)
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
"""Regulatory Change Intelligence (RCI) — domain objects.
|
||||||
|
|
||||||
|
RCI is a read-/reasoning layer ON TOP of the product-first pipeline. It answers
|
||||||
|
"what changes relative to my existing Regulatory Map?" — NOT "what does the new
|
||||||
|
law say in general". A RegulatoryChange is simulated/provided INPUT (no ingestion,
|
||||||
|
no newsletter/mailbox, no RAG); the delta is computed against a stored
|
||||||
|
ComplianceBaseline (snapshot of the map).
|
||||||
|
|
||||||
|
`delta_type` is a THIRD vocabulary — distinct from `ClaimCoverage` (Welt 1, what
|
||||||
|
the customer claims) and `ComplianceStatus` (Welt 2, verified evidence). The three
|
||||||
|
must never be conflated. These are application/reasoning types, NOT
|
||||||
|
compliance-meta-model classes (architecture freeze v1.0 untouched).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.reasoning.enums import AuthorityLevel, Confidence
|
||||||
|
from compliance.regulatory_map.schemas import RegulatoryMap
|
||||||
|
|
||||||
|
|
||||||
|
class DeltaType(str, Enum):
|
||||||
|
NEW = "new" # obligation now applies that was not in the baseline
|
||||||
|
CHANGED = "changed" # existing obligation substantively modified
|
||||||
|
REMOVED = "removed" # obligation no longer applies (repeal)
|
||||||
|
ALREADY_COVERED = "already_covered" # existing obligation, evidence likely suffices
|
||||||
|
NEEDS_REVIEW = "needs_review" # a human must check
|
||||||
|
NOT_APPLICABLE = "not_applicable" # change does not touch this product's map
|
||||||
|
|
||||||
|
|
||||||
|
class ChangeType(str, Enum):
|
||||||
|
NEW_REGULATION = "new_regulation"
|
||||||
|
AMENDMENT = "amendment"
|
||||||
|
REPEAL = "repeal"
|
||||||
|
GUIDANCE_UPDATE = "guidance_update"
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored snapshot ──────────────────────────────────────────────────────
|
||||||
|
class ComplianceBaseline(BaseModel):
|
||||||
|
baseline_id: str
|
||||||
|
product_profile_snapshot: CanonicalProductRegulatoryProfile
|
||||||
|
regulatory_map_snapshot: RegulatoryMap
|
||||||
|
applicable_obligations: List[str] = Field(default_factory=list) # registry-linked obligation_ids
|
||||||
|
# required evidence per obligation (derived) — to compute missing_evidence
|
||||||
|
obligation_evidence_required: Dict[str, List[str]] = Field(default_factory=dict)
|
||||||
|
# evidence the customer ALREADY has, per obligation (provided)
|
||||||
|
evidence_refs: Dict[str, List[str]] = Field(default_factory=dict)
|
||||||
|
created_at: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ── simulated/provided change (INPUT — never ingested) ───────────────────
|
||||||
|
class RegulatoryChange(BaseModel):
|
||||||
|
change_id: str
|
||||||
|
source: str = "simulated"
|
||||||
|
affected_regulations: List[str] = Field(default_factory=list)
|
||||||
|
affected_obligations: List[str] = Field(default_factory=list)
|
||||||
|
change_type: ChangeType
|
||||||
|
effective_date: Optional[str] = None
|
||||||
|
authority_level: AuthorityLevel = AuthorityLevel.LEGAL_TEXT
|
||||||
|
summary: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── per-obligation delta ─────────────────────────────────────────────────
|
||||||
|
class ObligationDelta(BaseModel):
|
||||||
|
obligation_id: str
|
||||||
|
delta_type: DeltaType
|
||||||
|
reason: str
|
||||||
|
affected_evidence: List[str] = Field(default_factory=list) # evidence already present for it
|
||||||
|
missing_evidence: List[str] = Field(default_factory=list) # required but not yet present
|
||||||
|
confidence: Confidence
|
||||||
|
|
||||||
|
|
||||||
|
# ── management-level summary ──────────────────────────────────────────────
|
||||||
|
class ChangeImpactSummary(BaseModel):
|
||||||
|
what_changed: str = ""
|
||||||
|
what_matters_for_this_product: List[str] = Field(default_factory=list) # need action
|
||||||
|
already_covered: List[str] = Field(default_factory=list)
|
||||||
|
needs_review: List[str] = Field(default_factory=list)
|
||||||
|
not_relevant: List[str] = Field(default_factory=list)
|
||||||
|
unsupported_domains: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class ChangeAssessment(BaseModel):
|
||||||
|
change_id: str
|
||||||
|
affects_product: bool
|
||||||
|
deltas: List[ObligationDelta] = Field(default_factory=list)
|
||||||
|
summary: ChangeImpactSummary
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
"""Regulatory Reasoning Engine.
|
||||||
|
|
||||||
|
A deterministic reasoning layer ON TOP of the Legal Knowledge Graph (obligation
|
||||||
|
registry) and the Compliance Execution Graph (control mapping / evidence). It
|
||||||
|
answers, for a concrete product: which regulations apply, which obligations
|
||||||
|
follow, whether the customer's implementation covers them, and whether a
|
||||||
|
customer interpretation is legally sound.
|
||||||
|
|
||||||
|
No new RAG, no new controls, no DB schema changes — scope & reasoning metamodel
|
||||||
|
only (spec §14).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .claim_normalizer import normalize_claim
|
||||||
|
from .implementation_engine import reason_implementation_claim
|
||||||
|
from .interpretation_engine import assess_interpretation
|
||||||
|
from .obligation_engine import derive_obligations
|
||||||
|
from .scope_engine import discover_scope
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"discover_scope",
|
||||||
|
"derive_obligations",
|
||||||
|
"normalize_claim",
|
||||||
|
"reason_implementation_claim",
|
||||||
|
"assess_interpretation",
|
||||||
|
]
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
"""Customer implementation claim normaliser (spec §4.6).
|
||||||
|
|
||||||
|
Turns a free-text statement ("Wir haben einen Update-Prozess.") into structured
|
||||||
|
capabilities + related topics + weakness qualifiers. Deterministic substring
|
||||||
|
matching — the claim_id is a stable hash so the same statement always maps to
|
||||||
|
the same id (no randomness, replay-safe).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from .schemas import CustomerImplementationClaim
|
||||||
|
from .taxonomy_claims import match_capabilities, match_qualifiers, topics_for
|
||||||
|
|
||||||
|
|
||||||
|
def _claim_id(raw_statement: str) -> str:
|
||||||
|
digest = hashlib.sha1(raw_statement.strip().lower().encode("utf-8")).hexdigest()
|
||||||
|
return "claim_%s" % digest[:10]
|
||||||
|
|
||||||
|
|
||||||
|
def _normalized(capabilities: List[str], qualifiers: List[str]) -> str:
|
||||||
|
if not capabilities:
|
||||||
|
return "Keine bekannte Compliance-Fähigkeit aus der Aussage ableitbar."
|
||||||
|
text = "Fähigkeiten: " + ", ".join(capabilities)
|
||||||
|
if qualifiers:
|
||||||
|
text += " | Einschränkungen: " + ", ".join(qualifiers)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_claim(
|
||||||
|
raw_statement: str, claim_id: Optional[str] = None, evidence_refs: Optional[List[str]] = None
|
||||||
|
) -> CustomerImplementationClaim:
|
||||||
|
capabilities = match_capabilities(raw_statement)
|
||||||
|
qualifiers = match_qualifiers(raw_statement)
|
||||||
|
return CustomerImplementationClaim(
|
||||||
|
claim_id=claim_id or _claim_id(raw_statement),
|
||||||
|
raw_statement=raw_statement,
|
||||||
|
normalized_claim=_normalized(capabilities, qualifiers),
|
||||||
|
claimed_capability=capabilities,
|
||||||
|
related_topics=topics_for(capabilities),
|
||||||
|
qualifiers=qualifiers,
|
||||||
|
evidence_refs=evidence_refs or [],
|
||||||
|
)
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
"""Enumerations for the Regulatory Reasoning Engine.
|
||||||
|
|
||||||
|
Kept dependency-free and Python 3.9 compatible (str-Enums, no `|` unions).
|
||||||
|
The reasoning layer sits ON TOP of the Legal Knowledge Graph (obligation
|
||||||
|
registry) and the Compliance Execution Graph (control mapping / evidence).
|
||||||
|
See memory `project_compliance_graph.md` for the cross-session contract.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class ManufacturerRole(str, Enum):
|
||||||
|
MANUFACTURER = "manufacturer"
|
||||||
|
IMPORTER = "importer"
|
||||||
|
DISTRIBUTOR = "distributor"
|
||||||
|
INTEGRATOR = "integrator"
|
||||||
|
OPERATOR = "operator"
|
||||||
|
SERVICE_PROVIDER = "service_provider"
|
||||||
|
|
||||||
|
|
||||||
|
class ProductLifecyclePhase(str, Enum):
|
||||||
|
DEVELOPMENT = "development"
|
||||||
|
PLACING_ON_MARKET = "placing_on_market"
|
||||||
|
OPERATION = "operation"
|
||||||
|
MAINTENANCE = "maintenance"
|
||||||
|
UPDATE = "update"
|
||||||
|
END_OF_LIFE = "end_of_life"
|
||||||
|
|
||||||
|
|
||||||
|
class MarketModel(str, Enum):
|
||||||
|
B2B = "b2b"
|
||||||
|
B2C = "b2c"
|
||||||
|
BOTH = "both"
|
||||||
|
|
||||||
|
|
||||||
|
class ApplicabilityStatus(str, Enum):
|
||||||
|
APPLICABLE = "applicable"
|
||||||
|
PARTIALLY_APPLICABLE = "partially_applicable"
|
||||||
|
UNCERTAIN = "uncertain"
|
||||||
|
NOT_APPLICABLE = "not_applicable"
|
||||||
|
|
||||||
|
|
||||||
|
class Confidence(str, Enum):
|
||||||
|
HIGH = "high"
|
||||||
|
MEDIUM = "medium"
|
||||||
|
LOW = "low"
|
||||||
|
|
||||||
|
|
||||||
|
class AuthorityLevel(str, Enum):
|
||||||
|
"""How binding a statement is — answers MUST visibly separate these."""
|
||||||
|
|
||||||
|
LEGAL_TEXT = "legal_text"
|
||||||
|
RECITAL = "recital"
|
||||||
|
GUIDANCE = "guidance"
|
||||||
|
HARMONIZED_STANDARD = "harmonized_standard"
|
||||||
|
TECHNICAL_STANDARD = "technical_standard"
|
||||||
|
BEST_PRACTICE = "best_practice"
|
||||||
|
INTERNAL_INTERPRETATION = "internal_interpretation"
|
||||||
|
|
||||||
|
|
||||||
|
class OverlapType(str, Enum):
|
||||||
|
IDENTICAL = "identical"
|
||||||
|
SIMILAR = "similar"
|
||||||
|
COMPLEMENTARY = "complementary"
|
||||||
|
CONFLICTING = "conflicting"
|
||||||
|
DIFFERENT_SCOPE = "different_scope"
|
||||||
|
|
||||||
|
|
||||||
|
class ClaimCoverage(str, Enum):
|
||||||
|
"""How a customer's *claim* relates to an obligation — Welt 1 (reasoning).
|
||||||
|
|
||||||
|
This is NOT a conformity verdict. It judges only the customer's statement,
|
||||||
|
never whether the obligation is actually met. The real compliance verdict
|
||||||
|
(erfüllt/offen/unklar from verified evidence) is `ComplianceStatus`, owned by
|
||||||
|
the Compliance Execution Graph — the two must never be conflated.
|
||||||
|
"""
|
||||||
|
|
||||||
|
POTENTIALLY_ADDRESSES = "potentially_addresses"
|
||||||
|
PARTIALLY_ADDRESSES = "partially_addresses"
|
||||||
|
DOES_NOT_ADDRESS = "does_not_address"
|
||||||
|
INSUFFICIENT_INFORMATION = "insufficient_information"
|
||||||
|
|
||||||
|
|
||||||
|
class InterpretationVerdict(str, Enum):
|
||||||
|
PLAUSIBLE = "plausible"
|
||||||
|
TOO_NARROW = "too_narrow"
|
||||||
|
TOO_BROAD = "too_broad"
|
||||||
|
PARTIALLY_CORRECT = "partially_correct"
|
||||||
|
UNSUPPORTED = "unsupported"
|
||||||
|
UNCERTAIN = "uncertain"
|
||||||
@@ -0,0 +1,158 @@
|
|||||||
|
"""Implementation reasoning (spec Modus 3) — Welt 1 only.
|
||||||
|
|
||||||
|
Maps a free-text claim ("Wir haben SBOMs und machen Updates, wenn Kunden Fehler
|
||||||
|
melden.") onto the product's applicable obligations and reports, per obligation,
|
||||||
|
whether the *claim* potentially/partially/does-not address it — plus the
|
||||||
|
evidence that WOULD be needed to prove real implementation.
|
||||||
|
|
||||||
|
This is NOT a conformity verdict. It judges the customer's statement, never
|
||||||
|
whether the obligation is met. The real verdict (ComplianceStatus: erfüllt/
|
||||||
|
offen/unklar from verified evidence) lives in the Compliance Execution Graph.
|
||||||
|
The four reasoning layers: claim -> interpretation (capabilities/topics on the
|
||||||
|
claim) -> potential obligation coverage (`claim_coverage`) -> evidence required.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from .claim_normalizer import normalize_claim
|
||||||
|
from .enums import ClaimCoverage, Confidence
|
||||||
|
from .obligation_engine import derive_obligations
|
||||||
|
from .schemas import (
|
||||||
|
ClaimObligationMapping,
|
||||||
|
CustomerImplementationClaim,
|
||||||
|
ImplementationReasoningResponse,
|
||||||
|
ProductProfile,
|
||||||
|
)
|
||||||
|
from .taxonomy_claims import topics_for
|
||||||
|
|
||||||
|
DISCLAIMER = (
|
||||||
|
"Diese Auswertung interpretiert ausschließlich die Kundenaussage (ClaimCoverage, Welt 1). "
|
||||||
|
"Sie ist KEINE Konformitätsaussage — der tatsächliche Compliance-Status (ComplianceStatus, "
|
||||||
|
"Welt 2) ergibt sich erst aus geprüften Nachweisen im Compliance Execution Graph."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Typical sub-elements a capability still misses when only partially claimed.
|
||||||
|
STANDARD_GAPS: Dict[str, List[str]] = {
|
||||||
|
"software_bill_of_materials": [
|
||||||
|
"Vulnerability-Monitoring der Komponenten",
|
||||||
|
"Bewertung betroffener Komponenten",
|
||||||
|
"Lieferantenprozess",
|
||||||
|
],
|
||||||
|
"secure_updates": [
|
||||||
|
"aktive Schwachstellenüberwachung",
|
||||||
|
"Patch-Bewertung",
|
||||||
|
"Fristen und Verantwortlichkeiten",
|
||||||
|
"Nachweis der Updatefähigkeit",
|
||||||
|
],
|
||||||
|
"vulnerability_management": [
|
||||||
|
"definierter Vulnerability-Handling-Prozess",
|
||||||
|
"Priorisierung und Fristen",
|
||||||
|
],
|
||||||
|
"authentication": ["MFA für privilegierte Zugänge", "keine Standard-Zugangsdaten"],
|
||||||
|
"security_logging": ["Schutz der Logs vor Manipulation", "Monitoring/Alerting"],
|
||||||
|
"software_integrity": ["Signierung der Updates", "Verifikation der Update-Signatur"],
|
||||||
|
"secure_by_default": ["Härtung der Auslieferungskonfiguration", "Minimierung der Angriffsfläche"],
|
||||||
|
"secure_communication": ["verschlüsselte Übertragung", "Integritätsschutz der Verbindung"],
|
||||||
|
"risk_assessment": ["dokumentierte Risikobewertung", "Aufnahme in die technische Doku"],
|
||||||
|
"technical_documentation": ["vollständige technische Unterlagen", "Aktualisierung über den Lebenszyklus"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _missing_for(capabilities: List[str]) -> List[str]:
|
||||||
|
out: List[str] = []
|
||||||
|
for cap in capabilities:
|
||||||
|
for gap in STANDARD_GAPS.get(cap, []):
|
||||||
|
if gap not in out:
|
||||||
|
out.append(gap)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _coverage(required: List[str], claimed: List[str], qualifiers: List[str]) -> ClaimCoverage:
|
||||||
|
if not required:
|
||||||
|
return ClaimCoverage.INSUFFICIENT_INFORMATION
|
||||||
|
req, have = set(required), set(claimed)
|
||||||
|
hit = req & have
|
||||||
|
if not hit:
|
||||||
|
return ClaimCoverage.DOES_NOT_ADDRESS
|
||||||
|
if "absent" in qualifiers or "planned" in qualifiers:
|
||||||
|
return ClaimCoverage.DOES_NOT_ADDRESS
|
||||||
|
if "reactive" in qualifiers and hit & {"secure_updates", "vulnerability_management"}:
|
||||||
|
return ClaimCoverage.PARTIALLY_ADDRESSES
|
||||||
|
if req <= have:
|
||||||
|
return ClaimCoverage.POTENTIALLY_ADDRESSES
|
||||||
|
return ClaimCoverage.PARTIALLY_ADDRESSES
|
||||||
|
|
||||||
|
|
||||||
|
def reason_implementation_claim(
|
||||||
|
profile: ProductProfile, customer_claim: str
|
||||||
|
) -> ImplementationReasoningResponse:
|
||||||
|
claim = normalize_claim(customer_claim)
|
||||||
|
obligations = derive_obligations(profile).applicable_obligations
|
||||||
|
claimed = claim.claimed_capability
|
||||||
|
claim_topics = set(claim.related_topics) | set(claimed)
|
||||||
|
|
||||||
|
mappings: List[ClaimObligationMapping] = []
|
||||||
|
missing_evidence: List[str] = []
|
||||||
|
|
||||||
|
for ob in obligations:
|
||||||
|
from .rules_obligations import obligation_rule
|
||||||
|
|
||||||
|
rule = obligation_rule(ob.obligation_id)
|
||||||
|
required_caps = rule.required_capabilities if rule else []
|
||||||
|
ob_topics = set(topics_for(required_caps)) | set(required_caps)
|
||||||
|
directly_claimed = bool(set(required_caps) & set(claimed))
|
||||||
|
related = bool(ob_topics & claim_topics)
|
||||||
|
if not directly_claimed and not related:
|
||||||
|
continue # unrelated to the claim -> don't reason about it
|
||||||
|
|
||||||
|
coverage = _coverage(required_caps, claimed, claim.qualifiers)
|
||||||
|
missing = [] if coverage == ClaimCoverage.POTENTIALLY_ADDRESSES else _missing_for(required_caps)
|
||||||
|
if coverage != ClaimCoverage.POTENTIALLY_ADDRESSES:
|
||||||
|
for ev in ob.required_evidence:
|
||||||
|
if ev not in missing_evidence:
|
||||||
|
missing_evidence.append(ev)
|
||||||
|
mappings.append(
|
||||||
|
ClaimObligationMapping(
|
||||||
|
claim_id=claim.claim_id,
|
||||||
|
obligation_id=ob.obligation_id,
|
||||||
|
claim_coverage=coverage,
|
||||||
|
missing_elements=missing,
|
||||||
|
required_evidence=ob.required_evidence,
|
||||||
|
explanation=_explain(coverage, ob.title, claim.qualifiers),
|
||||||
|
confidence=Confidence.MEDIUM,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return ImplementationReasoningResponse(
|
||||||
|
claim=claim,
|
||||||
|
mappings=mappings,
|
||||||
|
missing_evidence=missing_evidence,
|
||||||
|
summary=_summary(claim, mappings),
|
||||||
|
disclaimer=DISCLAIMER,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _explain(coverage: ClaimCoverage, title: str, qualifiers: List[str]) -> str:
|
||||||
|
if coverage == ClaimCoverage.POTENTIALLY_ADDRESSES:
|
||||||
|
return "Die Aussage adressiert die Pflicht '%s' direkt — Nachweise erforderlich für eine Bewertung der Umsetzung." % title
|
||||||
|
if coverage == ClaimCoverage.PARTIALLY_ADDRESSES:
|
||||||
|
extra = " Der beschriebene Prozess wirkt reaktiv." if "reactive" in qualifiers else ""
|
||||||
|
return "Die Aussage adressiert die Pflicht '%s' nur teilweise.%s" % (title, extra)
|
||||||
|
if coverage == ClaimCoverage.DOES_NOT_ADDRESS:
|
||||||
|
return "Die Aussage adressiert die Pflicht '%s' nicht." % title
|
||||||
|
return "Zur Pflicht '%s' liegen zu wenige Angaben für eine Einordnung vor." % title
|
||||||
|
|
||||||
|
|
||||||
|
def _summary(claim: CustomerImplementationClaim, mappings: List[ClaimObligationMapping]) -> str:
|
||||||
|
if not claim.claimed_capability:
|
||||||
|
return "Die Aussage ist zu unspezifisch — bitte konkretisieren, was umgesetzt wurde."
|
||||||
|
full = sum(1 for m in mappings if m.claim_coverage == ClaimCoverage.POTENTIALLY_ADDRESSES)
|
||||||
|
partial = sum(1 for m in mappings if m.claim_coverage == ClaimCoverage.PARTIALLY_ADDRESSES)
|
||||||
|
none = sum(1 for m in mappings if m.claim_coverage == ClaimCoverage.DOES_NOT_ADDRESS)
|
||||||
|
return (
|
||||||
|
"Die beschriebene Maßnahme adressiert wahrscheinlich %d Pflicht(en) direkt und %d "
|
||||||
|
"teilweise; %d werden durch die Aussage nicht berührt. Für eine Bewertung der tatsächlichen "
|
||||||
|
"Umsetzung sind Nachweise erforderlich. Dies ist keine Konformitätsaussage." % (full, partial, none)
|
||||||
|
)
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
"""Interpretation review engine (spec Modus 4).
|
||||||
|
|
||||||
|
Evaluates whether a customer's legal interpretation is plausible, too narrow,
|
||||||
|
too broad, etc. Matches the interpretation against a curated pattern library;
|
||||||
|
no match -> `uncertain` plus a request for the missing context (never invent a
|
||||||
|
verdict, spec §6.3).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .enums import Confidence, InterpretationVerdict
|
||||||
|
from .schemas import InterpretationAssessment, ProductProfile
|
||||||
|
from .taxonomy_interpretations import INTERPRETATION_PATTERNS, InterpretationPattern
|
||||||
|
|
||||||
|
|
||||||
|
def _interpretation_id(raw: str) -> str:
|
||||||
|
digest = hashlib.sha1(raw.strip().lower().encode("utf-8")).hexdigest()
|
||||||
|
return "interp_%s" % digest[:10]
|
||||||
|
|
||||||
|
|
||||||
|
def _best_match(text: str) -> Optional[InterpretationPattern]:
|
||||||
|
low = text.lower()
|
||||||
|
best: Optional[InterpretationPattern] = None
|
||||||
|
best_score = 0
|
||||||
|
for pattern in INTERPRETATION_PATTERNS:
|
||||||
|
score = sum(1 for t in pattern.triggers if t in low)
|
||||||
|
if score > best_score:
|
||||||
|
best, best_score = pattern, score
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
def assess_interpretation(
|
||||||
|
raw_interpretation: str, profile: Optional[ProductProfile] = None
|
||||||
|
) -> InterpretationAssessment:
|
||||||
|
interp_id = _interpretation_id(raw_interpretation)
|
||||||
|
pattern = _best_match(raw_interpretation)
|
||||||
|
|
||||||
|
if pattern is None:
|
||||||
|
return InterpretationAssessment(
|
||||||
|
interpretation_id=interp_id,
|
||||||
|
raw_interpretation=raw_interpretation,
|
||||||
|
assessment=InterpretationVerdict.UNCERTAIN,
|
||||||
|
corrected_interpretation=(
|
||||||
|
"Diese Auslegung lässt sich ohne weitere Angaben nicht bewerten. Bitte Produkt, "
|
||||||
|
"Rolle, Marktzugang und die konkret betroffene Pflicht benennen."
|
||||||
|
),
|
||||||
|
explanation="Kein bekanntes Auslegungsmuster erkannt — bewusst keine Scheinsicherheit.",
|
||||||
|
confidence=Confidence.LOW,
|
||||||
|
)
|
||||||
|
|
||||||
|
return InterpretationAssessment(
|
||||||
|
interpretation_id=interp_id,
|
||||||
|
raw_interpretation=raw_interpretation,
|
||||||
|
affected_regulations=pattern.affected_regulations,
|
||||||
|
affected_obligations=pattern.affected_obligations,
|
||||||
|
assessment=pattern.verdict,
|
||||||
|
risks=pattern.risks,
|
||||||
|
corrected_interpretation=pattern.corrected_interpretation,
|
||||||
|
legal_basis_refs=pattern.legal_basis_refs,
|
||||||
|
explanation=pattern.explanation,
|
||||||
|
confidence=pattern.confidence,
|
||||||
|
)
|
||||||
@@ -0,0 +1,116 @@
|
|||||||
|
"""Applicable-obligation engine (spec Modus 2).
|
||||||
|
|
||||||
|
Maps a product profile (optionally a precomputed scope) to the concrete legal
|
||||||
|
obligations, the overlaps between them, and which evidence types satisfy more
|
||||||
|
than one obligation at once (the core USP, spec §16).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from .predicates import evaluate, true_leaves
|
||||||
|
from .rules_obligations import ALL_OBLIGATIONS
|
||||||
|
from .rules_overlaps import OVERLAP_GROUPS
|
||||||
|
from .rules_regulations import FIELD_LABELS
|
||||||
|
from .rules_types import ObligationRule
|
||||||
|
from .schemas import (
|
||||||
|
ApplicableObligation,
|
||||||
|
ObligationOverlap,
|
||||||
|
ObligationsResponse,
|
||||||
|
ProductProfile,
|
||||||
|
RegulatoryScope,
|
||||||
|
)
|
||||||
|
from .scope_engine import discover_scope
|
||||||
|
|
||||||
|
|
||||||
|
def _applicable_regulation_ids(profile: ProductProfile, scope: Optional[RegulatoryScope]) -> List[str]:
|
||||||
|
if scope is None:
|
||||||
|
scope = discover_scope(profile)
|
||||||
|
return [r.regulation_id for r in scope.applicable_regulations]
|
||||||
|
|
||||||
|
|
||||||
|
def _applies_because(rule: ObligationRule, profile: ProductProfile) -> List[str]:
|
||||||
|
labels: List[str] = []
|
||||||
|
for leaf in true_leaves(rule.applies_if, profile):
|
||||||
|
label = FIELD_LABELS.get(leaf[0])
|
||||||
|
if label and label not in labels:
|
||||||
|
labels.append(label)
|
||||||
|
if not labels:
|
||||||
|
labels.append("%s ist für dieses Produkt anwendbar." % rule.source_regulation)
|
||||||
|
return labels
|
||||||
|
|
||||||
|
|
||||||
|
def _role_ok(rule: ObligationRule, profile: ProductProfile) -> bool:
|
||||||
|
role = profile.manufacturer_role
|
||||||
|
if role is None:
|
||||||
|
return True # unknown role -> do not exclude
|
||||||
|
return role.value in rule.applies_to_role
|
||||||
|
|
||||||
|
|
||||||
|
def derive_obligations(
|
||||||
|
profile: ProductProfile, scope: Optional[RegulatoryScope] = None
|
||||||
|
) -> ObligationsResponse:
|
||||||
|
active_regs = set(_applicable_regulation_ids(profile, scope))
|
||||||
|
response = ObligationsResponse()
|
||||||
|
applied_ids: List[str] = []
|
||||||
|
|
||||||
|
for rule in ALL_OBLIGATIONS:
|
||||||
|
if rule.source_regulation not in active_regs:
|
||||||
|
continue
|
||||||
|
if rule.applies_unless is not None and evaluate(rule.applies_unless, profile) is True:
|
||||||
|
continue
|
||||||
|
verdict = evaluate(rule.applies_if, profile)
|
||||||
|
if verdict is not True or not _role_ok(rule, profile):
|
||||||
|
if verdict is False:
|
||||||
|
response.excluded_obligations.append(rule.obligation_id)
|
||||||
|
continue
|
||||||
|
applied_ids.append(rule.obligation_id)
|
||||||
|
response.applicable_obligations.append(
|
||||||
|
ApplicableObligation(
|
||||||
|
obligation_id=rule.obligation_id,
|
||||||
|
title=rule.title,
|
||||||
|
source_regulation=rule.source_regulation,
|
||||||
|
legal_basis_refs=rule.legal_basis_refs,
|
||||||
|
obligation_text=rule.obligation_text,
|
||||||
|
authority_level=rule.authority_level,
|
||||||
|
applies_because=_applies_because(rule, profile),
|
||||||
|
applies_to_role=rule.applies_to_role,
|
||||||
|
lifecycle_phase=rule.lifecycle_phase,
|
||||||
|
overlap_group_id=rule.overlap_group_id,
|
||||||
|
required_evidence=rule.required_evidence,
|
||||||
|
confidence=rule.base_confidence,
|
||||||
|
registry_anchor=rule.registry_anchor,
|
||||||
|
proposed=rule.proposed,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response.overlaps = _overlaps(applied_ids)
|
||||||
|
response.evidence_for_multiple = _evidence_for_multiple(response.applicable_obligations)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def _overlaps(applied_ids: List[str]) -> List[ObligationOverlap]:
|
||||||
|
applied = set(applied_ids)
|
||||||
|
out: List[ObligationOverlap] = []
|
||||||
|
for group in OVERLAP_GROUPS:
|
||||||
|
present = [m for m in group.members if m in applied]
|
||||||
|
if len(present) >= 2:
|
||||||
|
out.append(
|
||||||
|
ObligationOverlap(
|
||||||
|
overlap_group_id=group.overlap_group_id,
|
||||||
|
obligations=present,
|
||||||
|
overlap_type=group.overlap_type,
|
||||||
|
canonical_obligation_id=group.canonical_obligation_id,
|
||||||
|
explanation=group.explanation,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _evidence_for_multiple(obligations: List[ApplicableObligation]) -> Dict[str, List[str]]:
|
||||||
|
by_evidence: Dict[str, List[str]] = {}
|
||||||
|
for ob in obligations:
|
||||||
|
for ev in ob.required_evidence:
|
||||||
|
by_evidence.setdefault(ev, []).append(ob.obligation_id)
|
||||||
|
return {ev: ids for ev, ids in by_evidence.items() if len(ids) > 1}
|
||||||
@@ -0,0 +1,100 @@
|
|||||||
|
"""Safe, tri-state condition evaluator for applicability rules.
|
||||||
|
|
||||||
|
Conditions are plain data (no `eval`): a *leaf* is a 3-tuple
|
||||||
|
``(field, op, value)``; a *composite* is ``{"all": [...]}`` or
|
||||||
|
``{"any": [...]}``. Evaluation is tri-state — ``True`` / ``False`` /
|
||||||
|
``None`` (unknown) — so a missing product fact yields *uncertain*, never a
|
||||||
|
false negative.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
Leaf = Tuple[str, str, Any]
|
||||||
|
Condition = Union[Leaf, Dict[str, Any]]
|
||||||
|
|
||||||
|
|
||||||
|
def _attr(profile: Any, field: str) -> Any:
|
||||||
|
value = getattr(profile, field, None)
|
||||||
|
if isinstance(value, Enum):
|
||||||
|
return value.value
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _eval_leaf(leaf: Leaf, profile: Any) -> Optional[bool]:
|
||||||
|
field, op, expected = leaf
|
||||||
|
actual = _attr(profile, field)
|
||||||
|
|
||||||
|
if op == "not_none":
|
||||||
|
return actual is not None
|
||||||
|
if op == "is_none":
|
||||||
|
return actual is None
|
||||||
|
|
||||||
|
if op == "contains_any":
|
||||||
|
# list-valued field (e.g. product_type); empty list = known-empty.
|
||||||
|
items = actual or []
|
||||||
|
hay = " ".join(str(x).lower() for x in items)
|
||||||
|
return any(str(k).lower() in hay for k in expected)
|
||||||
|
|
||||||
|
if actual is None:
|
||||||
|
return None # unknown fact -> unknown result
|
||||||
|
|
||||||
|
if op == "eq":
|
||||||
|
return bool(actual == expected)
|
||||||
|
if op == "ne":
|
||||||
|
return bool(actual != expected)
|
||||||
|
if op == "truthy":
|
||||||
|
return bool(actual)
|
||||||
|
if op == "falsy":
|
||||||
|
return not bool(actual)
|
||||||
|
if op == "in":
|
||||||
|
return bool(actual in expected)
|
||||||
|
if op == "not_in":
|
||||||
|
return bool(actual not in expected)
|
||||||
|
if op == "date_after":
|
||||||
|
return bool(actual > expected)
|
||||||
|
raise ValueError("unknown predicate op: %r" % (op,))
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate(condition: Optional[Condition], profile: Any) -> Optional[bool]:
|
||||||
|
"""Return True/False/None(unknown) for a condition tree."""
|
||||||
|
if condition is None:
|
||||||
|
return True
|
||||||
|
if isinstance(condition, tuple):
|
||||||
|
return _eval_leaf(condition, profile)
|
||||||
|
|
||||||
|
if "all" in condition:
|
||||||
|
results = [evaluate(c, profile) for c in condition["all"]]
|
||||||
|
if any(r is False for r in results):
|
||||||
|
return False
|
||||||
|
if any(r is None for r in results):
|
||||||
|
return None
|
||||||
|
return True
|
||||||
|
if "any" in condition:
|
||||||
|
results = [evaluate(c, profile) for c in condition["any"]]
|
||||||
|
if any(r is True for r in results):
|
||||||
|
return True
|
||||||
|
if any(r is None for r in results):
|
||||||
|
return None
|
||||||
|
return False
|
||||||
|
raise ValueError("malformed condition: %r" % (condition,))
|
||||||
|
|
||||||
|
|
||||||
|
def true_leaves(condition: Optional[Condition], profile: Any) -> List[Leaf]:
|
||||||
|
"""Collect the leaf conditions that evaluated True (for trigger_facts)."""
|
||||||
|
if condition is None:
|
||||||
|
return []
|
||||||
|
if isinstance(condition, tuple):
|
||||||
|
return [condition] if _eval_leaf(condition, profile) is True else []
|
||||||
|
members = condition.get("all") or condition.get("any") or []
|
||||||
|
out: List[Leaf] = []
|
||||||
|
for c in members:
|
||||||
|
out.extend(true_leaves(c, profile))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def unknown_fields(fields: List[str], profile: Any) -> List[str]:
|
||||||
|
"""Subset of `fields` whose value on the profile is None (unknown)."""
|
||||||
|
return [f for f in fields if _attr(profile, f) is None]
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
"""Aggregated obligation scope rules + lookup helpers."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from .rules_obligations_cra import CRA_OBLIGATIONS
|
||||||
|
from .rules_obligations_machine_data import DATA_ACT_OBLIGATIONS, MACHINE_OBLIGATIONS
|
||||||
|
from .rules_types import ObligationRule
|
||||||
|
|
||||||
|
ALL_OBLIGATIONS: List[ObligationRule] = (
|
||||||
|
CRA_OBLIGATIONS + MACHINE_OBLIGATIONS + DATA_ACT_OBLIGATIONS
|
||||||
|
)
|
||||||
|
|
||||||
|
_BY_ID: Dict[str, ObligationRule] = {o.obligation_id: o for o in ALL_OBLIGATIONS}
|
||||||
|
|
||||||
|
|
||||||
|
def obligation_rule(obligation_id: str) -> Optional[ObligationRule]:
|
||||||
|
return _BY_ID.get(obligation_id)
|
||||||
|
|
||||||
|
|
||||||
|
def obligations_for_regulation(regulation_id: str) -> List[ObligationRule]:
|
||||||
|
return [o for o in ALL_OBLIGATIONS if o.source_regulation == regulation_id]
|
||||||
@@ -0,0 +1,271 @@
|
|||||||
|
"""CRA obligation scope rules.
|
||||||
|
|
||||||
|
`obligation_id`s in the six CRA-P1 families (sbom/vuln/authentication/logging/
|
||||||
|
remote_access/updates) are RE-USED verbatim from the Legal-KG registry
|
||||||
|
(`obligations/obligation_join_keys.json`) — never re-minted (control_uuid trap,
|
||||||
|
memory `project_compliance_graph.md`). Cross-cutting CRA *process* obligations
|
||||||
|
(risk assessment, technical documentation, CE, instructions, secure-by-design
|
||||||
|
umbrella) are not yet in the registry and are flagged `proposed=True`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from .enums import AuthorityLevel, Confidence
|
||||||
|
from .rules_types import ObligationRule
|
||||||
|
|
||||||
|
_HAS_SW = ("has_software", "eq", True)
|
||||||
|
_EU = ("eu_market", "eq", True)
|
||||||
|
_REMOTE_OR_CLOUD = {"any": [("has_remote_access", "eq", True), ("has_cloud_connection", "eq", True)]}
|
||||||
|
_LM = AuthorityLevel.LEGAL_TEXT
|
||||||
|
|
||||||
|
CRA_OBLIGATIONS: List[ObligationRule] = [
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="sbom_creation",
|
||||||
|
title="Software Bill of Materials erstellen",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Eine SBOM erstellen, die mindestens die obersten Abhängigkeiten des Produkts dokumentiert.",
|
||||||
|
legal_basis_refs=["CRA Annex I Part II (1)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="sbom",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["software_bill_of_materials"],
|
||||||
|
required_evidence=["sbom", "repo_scan"],
|
||||||
|
lifecycle_phase=["development", "placing_on_market", "maintenance"],
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="provide_security_updates",
|
||||||
|
title="Sicherheitsupdates bereitstellen",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Sicherheitsrelevante Updates zeitnah und über den Supportzeitraum bereitstellen.",
|
||||||
|
legal_basis_refs=["CRA Annex I (2)(c)", "CRA Art. 13"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="updates",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["secure_updates"],
|
||||||
|
required_evidence=["policy", "ticket", "test_report"],
|
||||||
|
lifecycle_phase=["maintenance", "update"],
|
||||||
|
overlap_group_id="SECURITY_UPDATES",
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="support_period_maintenance",
|
||||||
|
title="Supportzeitraum definieren und einhalten",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Einen angemessenen Supportzeitraum festlegen, in dem Schwachstellen behandelt werden.",
|
||||||
|
legal_basis_refs=["CRA Art. 13(8)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="updates",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["secure_updates"],
|
||||||
|
required_evidence=["policy"],
|
||||||
|
lifecycle_phase=["placing_on_market", "maintenance", "update"],
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="signed_update_integrity",
|
||||||
|
title="Integrität von Updates sicherstellen",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Updates signieren und ihre Integrität bei der Verteilung verifizieren.",
|
||||||
|
legal_basis_refs=["CRA Annex I (1)(3)(f)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="updates",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["software_integrity"],
|
||||||
|
required_evidence=["config_export", "test_report"],
|
||||||
|
lifecycle_phase=["development", "maintenance", "update"],
|
||||||
|
overlap_group_id="SECURITY_UPDATES",
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="vuln_handling_process",
|
||||||
|
title="Schwachstellenbehandlungs-Prozess",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Einen dokumentierten Prozess zur Identifikation, Bewertung und Behebung von Schwachstellen betreiben.",
|
||||||
|
legal_basis_refs=["CRA Art. 13(8)", "CRA Annex VII"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="vuln",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["vulnerability_management"],
|
||||||
|
required_evidence=["policy", "ticket"],
|
||||||
|
lifecycle_phase=["development", "operation", "maintenance"],
|
||||||
|
overlap_group_id="VULNERABILITY_HANDLING",
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="coordinated_vulnerability_disclosure",
|
||||||
|
title="Coordinated Vulnerability Disclosure",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Eine Richtlinie zur koordinierten Offenlegung von Schwachstellen bereitstellen.",
|
||||||
|
legal_basis_refs=["CRA Annex I Part II (5)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="vuln",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["coordinated_disclosure"],
|
||||||
|
required_evidence=["policy"],
|
||||||
|
lifecycle_phase=["operation", "maintenance"],
|
||||||
|
overlap_group_id="VULNERABILITY_HANDLING",
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="exploited_vuln_reporting_authorities",
|
||||||
|
title="Meldung aktiv ausgenutzter Schwachstellen / Vorfälle",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Aktiv ausgenutzte Schwachstellen und schwerwiegende Vorfälle an die zuständigen Behörden melden.",
|
||||||
|
legal_basis_refs=["CRA Art. 14", "CRA Art. 16"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="vuln",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["incident_reporting"],
|
||||||
|
required_evidence=["policy", "ticket"],
|
||||||
|
lifecycle_phase=["operation", "maintenance"],
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="user_authentication_required",
|
||||||
|
title="Authentifizierung vorsehen",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Den Zugang über einen geeigneten Authentifizierungsmechanismus schützen.",
|
||||||
|
legal_basis_refs=["CRA Annex I (2)(d)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="authentication",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["authentication"],
|
||||||
|
required_evidence=["config_export", "pentest"],
|
||||||
|
lifecycle_phase=["development", "operation"],
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="no_default_credentials",
|
||||||
|
title="Keine unveränderlichen Standard-Zugangsdaten",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Sichere Standardkonfiguration; keine fest hinterlegten oder unveränderlichen Standard-Passwörter.",
|
||||||
|
legal_basis_refs=["CRA Annex I (2)(a)", "CRA Annex I (2)(b)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="authentication",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["secure_by_default"],
|
||||||
|
required_evidence=["config_export", "test_report"],
|
||||||
|
lifecycle_phase=["development", "placing_on_market"],
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="event_logging_security_events",
|
||||||
|
title="Sicherheitsrelevante Ereignisse protokollieren",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Sicherheitsrelevante Ereignisse und Zugriffe aufzeichnen, um Vorfälle nachvollziehen zu können.",
|
||||||
|
legal_basis_refs=["CRA Annex I Part I (2)(k)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="logging",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["security_logging"],
|
||||||
|
required_evidence=["config_export", "audit_log"],
|
||||||
|
lifecycle_phase=["operation", "maintenance"],
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="remote_access_attack_surface_min",
|
||||||
|
title="Angriffsfläche minimieren",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Die Angriffsfläche begrenzen, insbesondere exponierte Remote-/Cloud-Schnittstellen.",
|
||||||
|
legal_basis_refs=["CRA Annex I (1)(2)(a)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="remote_access",
|
||||||
|
applies_if={"all": [_REMOTE_OR_CLOUD, _EU]},
|
||||||
|
required_capabilities=["secure_by_default"],
|
||||||
|
required_evidence=["config_export", "repo_scan", "pentest"],
|
||||||
|
lifecycle_phase=["development", "operation"],
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="remote_access_confidentiality_integrity",
|
||||||
|
title="Vertraulichkeit/Integrität der Fernverbindung",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Daten bei Fernzugriff/Cloud-Anbindung verschlüsselt und integritätsgeschützt übertragen.",
|
||||||
|
legal_basis_refs=["CRA Annex I (1)(2)(b)", "CRA Annex I (1)(2)(c)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="remote_access",
|
||||||
|
applies_if={"all": [_REMOTE_OR_CLOUD, _EU]},
|
||||||
|
required_capabilities=["secure_communication"],
|
||||||
|
required_evidence=["config_export", "pentest"],
|
||||||
|
lifecycle_phase=["operation"],
|
||||||
|
registry_anchor=True,
|
||||||
|
),
|
||||||
|
# --- Cross-cutting CRA process obligations (not yet in registry) ---------
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="cra_secure_by_design",
|
||||||
|
title="Security by Design",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Das Produkt so entwerfen, entwickeln und herstellen, dass ein angemessenes Cybersicherheitsniveau gewährleistet ist.",
|
||||||
|
legal_basis_refs=["CRA Annex I Part I (1)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="cra_process",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["secure_by_default", "risk_assessment"],
|
||||||
|
required_evidence=["policy", "test_report"],
|
||||||
|
lifecycle_phase=["development", "placing_on_market"],
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="cra_risk_assessment",
|
||||||
|
title="Cybersicherheits-Risikobewertung",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Eine Cybersicherheits-Risikobewertung durchführen und dokumentieren; in die technische Dokumentation aufnehmen.",
|
||||||
|
legal_basis_refs=["CRA Art. 13(2)", "CRA Annex I Part I (1)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="cra_process",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["risk_assessment"],
|
||||||
|
required_evidence=["policy"],
|
||||||
|
lifecycle_phase=["development", "placing_on_market"],
|
||||||
|
overlap_group_id="RISK_ASSESSMENT",
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="cra_technical_documentation",
|
||||||
|
title="Technische Dokumentation",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Technische Dokumentation erstellen und aktuell halten, die Konformität mit den Anforderungen belegt.",
|
||||||
|
legal_basis_refs=["CRA Art. 31", "CRA Annex VII"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="cra_process",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["technical_documentation"],
|
||||||
|
required_evidence=["policy"],
|
||||||
|
lifecycle_phase=["placing_on_market", "maintenance"],
|
||||||
|
overlap_group_id="TECHNICAL_DOCUMENTATION",
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="cra_ce_conformity_assessment",
|
||||||
|
title="Konformitätsbewertung / CE-Kennzeichnung",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Vor dem Inverkehrbringen das passende Konformitätsbewertungsverfahren durchlaufen und CE kennzeichnen.",
|
||||||
|
legal_basis_refs=["CRA Art. 32", "CRA Art. 28"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="cra_process",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["conformity_assessment"],
|
||||||
|
required_evidence=["test_report", "policy"],
|
||||||
|
lifecycle_phase=["placing_on_market"],
|
||||||
|
overlap_group_id="CE_CONFORMITY",
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="cra_instructions_for_use",
|
||||||
|
title="Informationen und Anweisungen für Nutzer",
|
||||||
|
source_regulation="CRA",
|
||||||
|
obligation_text="Nutzern verständliche Sicherheitsinformationen und -anweisungen bereitstellen (z. B. zu Updates und Support-Ende).",
|
||||||
|
legal_basis_refs=["CRA Annex II"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="cra_process",
|
||||||
|
applies_if={"all": [_HAS_SW, _EU]},
|
||||||
|
required_capabilities=["technical_documentation"],
|
||||||
|
required_evidence=["policy"],
|
||||||
|
lifecycle_phase=["placing_on_market"],
|
||||||
|
overlap_group_id="INSTRUCTIONS_FOR_USE",
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,139 @@
|
|||||||
|
"""MaschinenVO and Data Act obligation scope rules.
|
||||||
|
|
||||||
|
These regulations are NOT yet in the Legal-KG registry (which currently covers
|
||||||
|
the six CRA-P1 families). Every obligation here is therefore `proposed=True`:
|
||||||
|
the reasoning layer proposes the snake_case id, the Obligation Registry session
|
||||||
|
remains the only authority that may canonicalise it (re-link, never re-mint).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from .enums import AuthorityLevel, Confidence
|
||||||
|
from .rules_types import ObligationRule
|
||||||
|
|
||||||
|
_EU = ("eu_market", "eq", True)
|
||||||
|
_IS_MACHINE = ("is_machine", "eq", True)
|
||||||
|
_LM = AuthorityLevel.LEGAL_TEXT
|
||||||
|
|
||||||
|
MACHINE_OBLIGATIONS: List[ObligationRule] = [
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="machine_risk_assessment",
|
||||||
|
title="Maschinen-Risikobeurteilung",
|
||||||
|
source_regulation="MaschinenVO",
|
||||||
|
obligation_text="Eine Risikobeurteilung der Maschine durchführen, um Gefährdungen zu ermitteln und zu mindern.",
|
||||||
|
legal_basis_refs=["MaschinenVO (EU) 2023/1230 Anhang III (1.1.1)", "EN ISO 12100"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="machine_safety",
|
||||||
|
applies_if={"all": [_IS_MACHINE, _EU]},
|
||||||
|
required_capabilities=["risk_assessment"],
|
||||||
|
required_evidence=["policy"],
|
||||||
|
lifecycle_phase=["development", "placing_on_market"],
|
||||||
|
overlap_group_id="RISK_ASSESSMENT",
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="machine_safety_control_systems",
|
||||||
|
title="Sichere Steuerungssysteme",
|
||||||
|
source_regulation="MaschinenVO",
|
||||||
|
obligation_text="Sicherheitsbezogene Teile der Steuerung so auslegen, dass Ausfälle nicht zu gefährlichen Zuständen führen.",
|
||||||
|
legal_basis_refs=["MaschinenVO (EU) 2023/1230 Anhang III (1.2.1)", "EN ISO 13849-1"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="machine_safety",
|
||||||
|
applies_if={"all": [_IS_MACHINE, ("has_safety_function", "eq", True), _EU]},
|
||||||
|
required_capabilities=["functional_safety"],
|
||||||
|
required_evidence=["test_report", "policy"],
|
||||||
|
lifecycle_phase=["development", "placing_on_market"],
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="machine_protection_against_corruption",
|
||||||
|
title="Schutz gegen Korrumpierung sicherheitsrelevanter Funktionen",
|
||||||
|
source_regulation="MaschinenVO",
|
||||||
|
obligation_text="Sicherstellen, dass eine (auch beabsichtigte) Korrumpierung der Software/Verbindung keine gefährliche Situation auslöst.",
|
||||||
|
legal_basis_refs=["MaschinenVO (EU) 2023/1230 Anhang III (1.1.9)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="machine_safety",
|
||||||
|
applies_if={
|
||||||
|
"all": [
|
||||||
|
_IS_MACHINE,
|
||||||
|
("has_safety_function", "eq", True),
|
||||||
|
{"any": [("has_remote_access", "eq", True), ("has_software", "eq", True)]},
|
||||||
|
_EU,
|
||||||
|
]
|
||||||
|
},
|
||||||
|
required_capabilities=["software_integrity", "secure_by_default"],
|
||||||
|
required_evidence=["test_report", "config_export"],
|
||||||
|
lifecycle_phase=["development", "operation", "maintenance"],
|
||||||
|
overlap_group_id="VULNERABILITY_HANDLING",
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="machine_instructions_for_use",
|
||||||
|
title="Betriebsanleitung",
|
||||||
|
source_regulation="MaschinenVO",
|
||||||
|
obligation_text="Eine vollständige Betriebsanleitung mit Sicherheitshinweisen bereitstellen.",
|
||||||
|
legal_basis_refs=["MaschinenVO (EU) 2023/1230 Anhang III (1.7.4)"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="machine_safety",
|
||||||
|
applies_if={"all": [_IS_MACHINE, _EU]},
|
||||||
|
required_capabilities=["technical_documentation"],
|
||||||
|
required_evidence=["policy"],
|
||||||
|
lifecycle_phase=["placing_on_market"],
|
||||||
|
overlap_group_id="INSTRUCTIONS_FOR_USE",
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="machine_ce_conformity",
|
||||||
|
title="Konformitätsbewertung / CE (Maschine)",
|
||||||
|
source_regulation="MaschinenVO",
|
||||||
|
obligation_text="Das passende Konformitätsbewertungsverfahren der MaschinenVO durchlaufen und CE kennzeichnen.",
|
||||||
|
legal_basis_refs=["MaschinenVO (EU) 2023/1230 Art. 25", "Anhang IV"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="machine_safety",
|
||||||
|
applies_if={"all": [_IS_MACHINE, _EU]},
|
||||||
|
required_capabilities=["conformity_assessment"],
|
||||||
|
required_evidence=["test_report", "policy"],
|
||||||
|
lifecycle_phase=["placing_on_market"],
|
||||||
|
overlap_group_id="CE_CONFORMITY",
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
DATA_ACT_OBLIGATIONS: List[ObligationRule] = [
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="data_act_data_access_by_design",
|
||||||
|
title="Datenzugang by design",
|
||||||
|
source_regulation="DataAct",
|
||||||
|
obligation_text="Vernetzte Produkte so gestalten, dass die erzeugten Produktdaten standardmäßig zugänglich sind.",
|
||||||
|
legal_basis_refs=["Data Act (EU) 2023/2854 Art. 3"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="data_act",
|
||||||
|
applies_if={
|
||||||
|
"all": [
|
||||||
|
("generates_usage_data", "eq", True),
|
||||||
|
{"any": [("has_cloud_connection", "eq", True), ("has_remote_access", "eq", True)]},
|
||||||
|
_EU,
|
||||||
|
]
|
||||||
|
},
|
||||||
|
required_capabilities=["data_access_provision"],
|
||||||
|
required_evidence=["config_export", "policy"],
|
||||||
|
lifecycle_phase=["development", "placing_on_market"],
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
ObligationRule(
|
||||||
|
obligation_id="data_act_user_data_access",
|
||||||
|
title="Datenzugang für Nutzer",
|
||||||
|
source_regulation="DataAct",
|
||||||
|
obligation_text="Nutzern Zugang zu den von ihnen erzeugten Daten gewähren und Weitergabe an Dritte ermöglichen.",
|
||||||
|
legal_basis_refs=["Data Act (EU) 2023/2854 Art. 4", "Art. 5"],
|
||||||
|
authority_level=_LM,
|
||||||
|
family="data_act",
|
||||||
|
applies_if={"all": [("generates_usage_data", "eq", True), _EU]},
|
||||||
|
required_capabilities=["data_access_provision"],
|
||||||
|
required_evidence=["policy"],
|
||||||
|
lifecycle_phase=["operation"],
|
||||||
|
proposed=True,
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
"""Obligation overlap groups (spec §4.5 / Modus 2).
|
||||||
|
|
||||||
|
Overlaps are emitted only for the members that are actually applicable to the
|
||||||
|
product. `canonical_obligation_id` points at the strongest / most specific
|
||||||
|
obligation in the group (preferring a registry-anchored CRA id).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from .enums import OverlapType
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class OverlapGroup:
|
||||||
|
overlap_group_id: str
|
||||||
|
members: List[str]
|
||||||
|
overlap_type: OverlapType
|
||||||
|
canonical_obligation_id: str
|
||||||
|
explanation: str
|
||||||
|
|
||||||
|
|
||||||
|
OVERLAP_GROUPS: List[OverlapGroup] = [
|
||||||
|
OverlapGroup(
|
||||||
|
overlap_group_id="VULNERABILITY_HANDLING",
|
||||||
|
members=[
|
||||||
|
"vuln_handling_process",
|
||||||
|
"coordinated_vulnerability_disclosure",
|
||||||
|
"machine_protection_against_corruption",
|
||||||
|
],
|
||||||
|
overlap_type=OverlapType.COMPLEMENTARY,
|
||||||
|
canonical_obligation_id="vuln_handling_process",
|
||||||
|
explanation=(
|
||||||
|
"CRA adressiert die Schwachstellenbehandlung des Produkts. Die MaschinenVO wird "
|
||||||
|
"komplementär relevant, sobald eine Cyber-Schwachstelle eine Sicherheitsfunktion "
|
||||||
|
"beeinflussen kann (Anhang III 1.1.9). Nicht identisch, aber gemeinsam zu erfüllen."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
OverlapGroup(
|
||||||
|
overlap_group_id="SECURITY_UPDATES",
|
||||||
|
members=["provide_security_updates", "signed_update_integrity"],
|
||||||
|
overlap_type=OverlapType.COMPLEMENTARY,
|
||||||
|
canonical_obligation_id="provide_security_updates",
|
||||||
|
explanation=(
|
||||||
|
"Updates bereitstellen und ihre Integrität sichern sind zwei Seiten desselben "
|
||||||
|
"Update-Prozesses; ein Nachweis (Update-Policy, Release Notes) deckt teils beide ab."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
OverlapGroup(
|
||||||
|
overlap_group_id="RISK_ASSESSMENT",
|
||||||
|
members=["cra_risk_assessment", "machine_risk_assessment"],
|
||||||
|
overlap_type=OverlapType.DIFFERENT_SCOPE,
|
||||||
|
canonical_obligation_id="cra_risk_assessment",
|
||||||
|
explanation=(
|
||||||
|
"Zwei getrennte Risikobetrachtungen: CRA = Cybersicherheits-Risiko, MaschinenVO = "
|
||||||
|
"Sicherheits-/Gefährdungsbeurteilung. Methodisch verwandt, inhaltlich unterschiedlich."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
OverlapGroup(
|
||||||
|
overlap_group_id="TECHNICAL_DOCUMENTATION",
|
||||||
|
members=["cra_technical_documentation", "machine_risk_assessment"],
|
||||||
|
overlap_type=OverlapType.SIMILAR,
|
||||||
|
canonical_obligation_id="cra_technical_documentation",
|
||||||
|
explanation=(
|
||||||
|
"Beide Regime verlangen eine technische Dokumentation; Teile (Risikobetrachtung, "
|
||||||
|
"Konstruktionsunterlagen) lassen sich in einem konsolidierten technischen Dossier führen."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
OverlapGroup(
|
||||||
|
overlap_group_id="CE_CONFORMITY",
|
||||||
|
members=["cra_ce_conformity_assessment", "machine_ce_conformity"],
|
||||||
|
overlap_type=OverlapType.COMPLEMENTARY,
|
||||||
|
canonical_obligation_id="machine_ce_conformity",
|
||||||
|
explanation=(
|
||||||
|
"Ein Produkt kann zwei CE-Regime gleichzeitig erfüllen müssen (MaschinenVO + CRA). "
|
||||||
|
"Eine gemeinsame CE-Kennzeichnung, aber getrennte Konformitätsbewertungen."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
OverlapGroup(
|
||||||
|
overlap_group_id="INSTRUCTIONS_FOR_USE",
|
||||||
|
members=["cra_instructions_for_use", "machine_instructions_for_use"],
|
||||||
|
overlap_type=OverlapType.SIMILAR,
|
||||||
|
canonical_obligation_id="machine_instructions_for_use",
|
||||||
|
explanation=(
|
||||||
|
"Betriebsanleitung (MaschinenVO) und Sicherheitsinformationen (CRA) überschneiden sich; "
|
||||||
|
"ein integriertes Anleitungsdokument kann beide Pflichten bedienen."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,160 @@
|
|||||||
|
"""Regulation-level applicability trigger rules (scope discovery, spec Modus 1).
|
||||||
|
|
||||||
|
Each rule is pure data consumed by `scope_engine`. Triggers reference
|
||||||
|
`ProductProfile` fields through the safe predicate evaluator. `required_facts`
|
||||||
|
that are unknown turn the verdict *uncertain* and surface `fact_prompts`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from .enums import Confidence
|
||||||
|
from .predicates import Condition
|
||||||
|
|
||||||
|
# Positive, human-readable label per profile fact (for trigger_facts output).
|
||||||
|
FIELD_LABELS: Dict[str, str] = {
|
||||||
|
"has_software": "Produkt enthält Software / digitale Elemente",
|
||||||
|
"has_embedded_software": "Produkt enthält eingebettete Software",
|
||||||
|
"has_remote_access": "Produkt besitzt Fernzugriff / Fernwartung",
|
||||||
|
"has_cloud_connection": "Produkt ist mit einer Cloud verbunden",
|
||||||
|
"has_radio_module": "Produkt enthält ein Funkmodul",
|
||||||
|
"has_safety_function": "Produkt erfüllt eine Sicherheitsfunktion",
|
||||||
|
"generates_usage_data": "Vernetztes Produkt erzeugt nutzbare Produktdaten",
|
||||||
|
"is_machine": "Produkt ist eine Maschine",
|
||||||
|
"is_component": "Produkt ist ein (Sicherheits-)Bauteil",
|
||||||
|
"eu_market": "Produkt wird auf dem EU-Markt bereitgestellt",
|
||||||
|
"is_essential_or_important_entity": "Unternehmen ist wesentliche/wichtige Einrichtung",
|
||||||
|
"manufacturer_role": "Wirtschaftsakteur-Rolle (Hersteller/Importeur/Händler)",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class RegulationRule:
|
||||||
|
regulation_id: str
|
||||||
|
name: str
|
||||||
|
trigger: Condition
|
||||||
|
required_facts: List[str]
|
||||||
|
fact_prompts: Dict[str, str]
|
||||||
|
legal_basis_refs: List[str]
|
||||||
|
summary: str
|
||||||
|
confidence_when_applicable: Confidence = Confidence.HIGH
|
||||||
|
exclusion: Optional[Condition] = None
|
||||||
|
# Status is downgraded to PARTIALLY_APPLICABLE / MEDIUM when the trigger
|
||||||
|
# fires only via inference rather than a directly stated fact.
|
||||||
|
inferred: bool = False
|
||||||
|
excludable_roles: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
_ECONOMIC_ROLES = ["manufacturer", "importer", "distributor"]
|
||||||
|
|
||||||
|
REGULATION_RULES: List[RegulationRule] = [
|
||||||
|
RegulationRule(
|
||||||
|
regulation_id="CRA",
|
||||||
|
name="Cyber Resilience Act (EU) 2024/2847",
|
||||||
|
trigger={
|
||||||
|
"all": [
|
||||||
|
{"any": [("has_software", "eq", True), ("has_embedded_software", "eq", True)]},
|
||||||
|
("eu_market", "eq", True),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
required_facts=["has_software", "eu_market", "manufacturer_role"],
|
||||||
|
fact_prompts={
|
||||||
|
"has_software": "Enthält das Produkt Software / digitale Elemente?",
|
||||||
|
"eu_market": "Wird das Produkt auf dem EU-Markt bereitgestellt oder in Verkehr gebracht?",
|
||||||
|
"manufacturer_role": "Welche Rolle nehmen Sie ein (Hersteller / Importeur / Händler)?",
|
||||||
|
},
|
||||||
|
legal_basis_refs=["CRA Art. 2(1)", "CRA Art. 3(1)"],
|
||||||
|
summary="Produkte mit digitalen Elementen, die auf dem EU-Markt bereitgestellt werden.",
|
||||||
|
confidence_when_applicable=Confidence.HIGH,
|
||||||
|
excludable_roles=["operator"],
|
||||||
|
),
|
||||||
|
RegulationRule(
|
||||||
|
regulation_id="MaschinenVO",
|
||||||
|
name="Maschinenverordnung (EU) 2023/1230",
|
||||||
|
trigger={
|
||||||
|
"any": [
|
||||||
|
("is_machine", "eq", True),
|
||||||
|
{"all": [("is_component", "eq", True), ("has_safety_function", "eq", True)]},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
required_facts=["is_machine", "eu_market"],
|
||||||
|
fact_prompts={
|
||||||
|
"is_machine": "Ist das Produkt eine Maschine oder ein Sicherheitsbauteil?",
|
||||||
|
"has_safety_function": "Erfüllt das Bauteil eine Sicherheitsfunktion?",
|
||||||
|
},
|
||||||
|
legal_basis_refs=["MaschinenVO (EU) 2023/1230 Art. 2", "Anhang III"],
|
||||||
|
summary="Maschinen oder Sicherheitsbauteile, ggf. mit sicherheitsrelevanter Steuerung.",
|
||||||
|
confidence_when_applicable=Confidence.MEDIUM,
|
||||||
|
),
|
||||||
|
RegulationRule(
|
||||||
|
regulation_id="RED",
|
||||||
|
name="Radio Equipment Directive 2014/53/EU",
|
||||||
|
trigger=("has_radio_module", "eq", True),
|
||||||
|
required_facts=["has_radio_module"],
|
||||||
|
fact_prompts={
|
||||||
|
"has_radio_module": "Besitzt das Produkt ein Funkmodul (WLAN, Bluetooth, Mobilfunk)?",
|
||||||
|
},
|
||||||
|
legal_basis_refs=["RED 2014/53/EU Art. 1", "Art. 3(3)(d-f)"],
|
||||||
|
summary="Funkanlagen; Art. 3(3) deckt zusätzlich Cybersecurity-Anforderungen ab.",
|
||||||
|
confidence_when_applicable=Confidence.HIGH,
|
||||||
|
),
|
||||||
|
RegulationRule(
|
||||||
|
regulation_id="EMV",
|
||||||
|
name="EMV-Richtlinie 2014/30/EU",
|
||||||
|
trigger={
|
||||||
|
"any": [
|
||||||
|
("has_software", "eq", True),
|
||||||
|
("has_embedded_software", "eq", True),
|
||||||
|
("has_radio_module", "eq", True),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
required_facts=[],
|
||||||
|
fact_prompts={
|
||||||
|
"is_electrical": "Ist das Produkt ein elektrisches / elektronisches Betriebsmittel?",
|
||||||
|
},
|
||||||
|
legal_basis_refs=["EMV-RL 2014/30/EU Art. 2"],
|
||||||
|
summary="Elektrische/elektronische Betriebsmittel (hier aus den digitalen Elementen abgeleitet).",
|
||||||
|
confidence_when_applicable=Confidence.MEDIUM,
|
||||||
|
inferred=True,
|
||||||
|
),
|
||||||
|
RegulationRule(
|
||||||
|
regulation_id="DataAct",
|
||||||
|
name="Data Act (EU) 2023/2854",
|
||||||
|
trigger={
|
||||||
|
"all": [
|
||||||
|
{"any": [("has_cloud_connection", "eq", True), ("has_remote_access", "eq", True)]},
|
||||||
|
("generates_usage_data", "eq", True),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
required_facts=["generates_usage_data"],
|
||||||
|
fact_prompts={
|
||||||
|
"generates_usage_data": "Erzeugt das vernetzte Produkt nutzbare Produkt-/Nutzungsdaten?",
|
||||||
|
},
|
||||||
|
legal_basis_refs=["Data Act (EU) 2023/2854 Art. 2(5)", "Art. 3-5"],
|
||||||
|
summary="Vernetzte Produkte, die Nutzungsdaten erzeugen und zugänglich machen.",
|
||||||
|
confidence_when_applicable=Confidence.HIGH,
|
||||||
|
),
|
||||||
|
RegulationRule(
|
||||||
|
regulation_id="NIS2",
|
||||||
|
name="NIS2-Richtlinie (EU) 2022/2555",
|
||||||
|
trigger=("is_essential_or_important_entity", "eq", True),
|
||||||
|
required_facts=["company_size", "sector", "is_essential_or_important_entity"],
|
||||||
|
fact_prompts={
|
||||||
|
"company_size": "Unternehmensgröße (Mitarbeiterzahl / Umsatz)?",
|
||||||
|
"sector": "In welchem Sektor ist das Unternehmen tätig (Anhang I/II)?",
|
||||||
|
"is_essential_or_important_entity": "Fällt das Unternehmen als wesentliche/wichtige Einrichtung unter NIS2?",
|
||||||
|
},
|
||||||
|
legal_basis_refs=["NIS2-RL (EU) 2022/2555 Art. 2", "Art. 3"],
|
||||||
|
summary="Adressiert die ORGANISATION (Größe/Sektor/Rolle), nicht das Produkt.",
|
||||||
|
confidence_when_applicable=Confidence.MEDIUM,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def regulation_rule(regulation_id: str) -> Optional[RegulationRule]:
|
||||||
|
for rule in REGULATION_RULES:
|
||||||
|
if rule.regulation_id == regulation_id:
|
||||||
|
return rule
|
||||||
|
return None
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
"""Shared types for obligation scope rules.
|
||||||
|
|
||||||
|
`required_evidence` MUST draw from the framework-AGNOSTIC evidence catalog
|
||||||
|
owned by the Compliance Execution Graph (memory `project_compliance_graph.md`,
|
||||||
|
User-Direktive 2026-06-25). Do not invent framework-specific evidence types.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from .enums import AuthorityLevel, Confidence
|
||||||
|
from .predicates import Condition
|
||||||
|
|
||||||
|
# Framework-agnostic shared evidence catalog (the only allowed tokens).
|
||||||
|
EVIDENCE_CATALOG = frozenset(
|
||||||
|
{
|
||||||
|
"config_export",
|
||||||
|
"test_report",
|
||||||
|
"repo_scan",
|
||||||
|
"sbom",
|
||||||
|
"policy",
|
||||||
|
"audit_log",
|
||||||
|
"pentest",
|
||||||
|
"ticket",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ObligationRule:
|
||||||
|
obligation_id: str
|
||||||
|
title: str
|
||||||
|
source_regulation: str
|
||||||
|
obligation_text: str
|
||||||
|
legal_basis_refs: List[str]
|
||||||
|
authority_level: AuthorityLevel
|
||||||
|
family: str
|
||||||
|
applies_if: Condition
|
||||||
|
required_capabilities: List[str]
|
||||||
|
required_evidence: List[str]
|
||||||
|
base_confidence: Confidence = Confidence.HIGH
|
||||||
|
applies_unless: Optional[Condition] = None
|
||||||
|
lifecycle_phase: List[str] = field(default_factory=list)
|
||||||
|
applies_to_role: List[str] = field(default_factory=lambda: ["manufacturer", "importer"])
|
||||||
|
overlap_group_id: Optional[str] = None
|
||||||
|
# True => obligation_id is owned by the Legal-KG registry (re-link, never re-mint).
|
||||||
|
registry_anchor: bool = False
|
||||||
|
# True => Machine/Data-Act obligation the registry has not canonicalised yet.
|
||||||
|
proposed: bool = False
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
bad = [e for e in self.required_evidence if e not in EVIDENCE_CATALOG]
|
||||||
|
if bad:
|
||||||
|
raise ValueError(
|
||||||
|
"obligation %s uses non-catalog evidence %r" % (self.obligation_id, bad)
|
||||||
|
)
|
||||||
@@ -0,0 +1,226 @@
|
|||||||
|
"""Pydantic domain objects for the Regulatory Reasoning Engine.
|
||||||
|
|
||||||
|
Trigger facts that drive scope are tri-state (`Optional[bool] = None`): `None`
|
||||||
|
means "fact unknown" and produces an *uncertain* verdict plus a concrete
|
||||||
|
missing-fact prompt — never silent false security (spec §6.3).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from .enums import (
|
||||||
|
ApplicabilityStatus,
|
||||||
|
AuthorityLevel,
|
||||||
|
ClaimCoverage,
|
||||||
|
Confidence,
|
||||||
|
InterpretationVerdict,
|
||||||
|
ManufacturerRole,
|
||||||
|
MarketModel,
|
||||||
|
OverlapType,
|
||||||
|
ProductLifecyclePhase,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Input
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
class ProductProfile(BaseModel):
|
||||||
|
"""The customer's product / system. Tri-state booleans => unknown facts."""
|
||||||
|
|
||||||
|
product_name: str
|
||||||
|
product_profile_id: Optional[str] = None
|
||||||
|
manufacturer_role: Optional[ManufacturerRole] = None
|
||||||
|
product_type: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
has_software: Optional[bool] = None
|
||||||
|
has_embedded_software: Optional[bool] = None
|
||||||
|
has_remote_access: Optional[bool] = None
|
||||||
|
has_cloud_connection: Optional[bool] = None
|
||||||
|
has_ai_functionality: Optional[bool] = None
|
||||||
|
has_radio_module: Optional[bool] = None
|
||||||
|
has_safety_function: Optional[bool] = None
|
||||||
|
generates_usage_data: Optional[bool] = None
|
||||||
|
|
||||||
|
is_machine: Optional[bool] = None
|
||||||
|
is_component: Optional[bool] = None
|
||||||
|
is_spare_part: Optional[bool] = None
|
||||||
|
|
||||||
|
placed_on_market_after: Optional[date] = None
|
||||||
|
intended_use: Optional[str] = None
|
||||||
|
eu_market: Optional[bool] = None
|
||||||
|
b2b_or_b2c: Optional[MarketModel] = None
|
||||||
|
lifecycle_phase: Optional[ProductLifecyclePhase] = None
|
||||||
|
|
||||||
|
# Organisation context — only needed for NIS2 (not a product fact).
|
||||||
|
company_size: Optional[str] = None
|
||||||
|
sector: Optional[str] = None
|
||||||
|
is_essential_or_important_entity: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Scope
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
class ApplicableRegulation(BaseModel):
|
||||||
|
regulation_id: str
|
||||||
|
name: str
|
||||||
|
applicability_status: ApplicabilityStatus
|
||||||
|
trigger_facts: List[str] = Field(default_factory=list)
|
||||||
|
legal_basis_refs: List[str] = Field(default_factory=list)
|
||||||
|
confidence: Confidence
|
||||||
|
explanation: str
|
||||||
|
|
||||||
|
|
||||||
|
class ExcludedRegulation(BaseModel):
|
||||||
|
regulation_id: str
|
||||||
|
name: str
|
||||||
|
reason: str
|
||||||
|
|
||||||
|
|
||||||
|
class UncertainRegulation(BaseModel):
|
||||||
|
regulation_id: str
|
||||||
|
name: str
|
||||||
|
missing_facts: List[str] = Field(default_factory=list)
|
||||||
|
explanation: str
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryScope(BaseModel):
|
||||||
|
product_profile_id: Optional[str] = None
|
||||||
|
applicable_regulations: List[ApplicableRegulation] = Field(default_factory=list)
|
||||||
|
excluded_regulations: List[ExcludedRegulation] = Field(default_factory=list)
|
||||||
|
uncertain_regulations: List[UncertainRegulation] = Field(default_factory=list)
|
||||||
|
missing_facts: List[str] = Field(default_factory=list)
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
reasoning_summary: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Obligations
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
class ApplicableObligation(BaseModel):
|
||||||
|
obligation_id: str
|
||||||
|
title: str
|
||||||
|
source_regulation: str
|
||||||
|
legal_basis_refs: List[str] = Field(default_factory=list)
|
||||||
|
obligation_text: str
|
||||||
|
authority_level: AuthorityLevel
|
||||||
|
applies_because: List[str] = Field(default_factory=list)
|
||||||
|
applies_to_role: List[str] = Field(default_factory=list)
|
||||||
|
lifecycle_phase: List[str] = Field(default_factory=list)
|
||||||
|
overlap_group_id: Optional[str] = None
|
||||||
|
required_evidence: List[str] = Field(default_factory=list)
|
||||||
|
confidence: Confidence
|
||||||
|
# True only when obligation_id is owned by the Legal-KG registry (CRA P1).
|
||||||
|
registry_anchor: bool = False
|
||||||
|
# Machine/Data-Act obligations the registry has not canonicalised yet.
|
||||||
|
proposed: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class ObligationOverlap(BaseModel):
|
||||||
|
overlap_group_id: str
|
||||||
|
obligations: List[str] = Field(default_factory=list)
|
||||||
|
overlap_type: OverlapType
|
||||||
|
canonical_obligation_id: str
|
||||||
|
explanation: str
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Customer claims & assessments
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
class CustomerImplementationClaim(BaseModel):
|
||||||
|
claim_id: str
|
||||||
|
raw_statement: str
|
||||||
|
normalized_claim: str = ""
|
||||||
|
claimed_capability: List[str] = Field(default_factory=list)
|
||||||
|
related_topics: List[str] = Field(default_factory=list)
|
||||||
|
qualifiers: List[str] = Field(default_factory=list)
|
||||||
|
evidence_refs: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class ClaimObligationMapping(BaseModel):
|
||||||
|
"""One row of Welt-1 reasoning: how a customer claim relates to an obligation.
|
||||||
|
|
||||||
|
Layers (spec / architect): claim -> interpretation (on the claim object) ->
|
||||||
|
*potential* obligation coverage (`claim_coverage`) -> evidence required.
|
||||||
|
Carries NO compliance verdict.
|
||||||
|
"""
|
||||||
|
|
||||||
|
claim_id: str
|
||||||
|
obligation_id: str
|
||||||
|
claim_coverage: ClaimCoverage
|
||||||
|
missing_elements: List[str] = Field(default_factory=list)
|
||||||
|
required_evidence: List[str] = Field(default_factory=list)
|
||||||
|
explanation: str
|
||||||
|
confidence: Confidence
|
||||||
|
|
||||||
|
|
||||||
|
class InterpretationAssessment(BaseModel):
|
||||||
|
interpretation_id: str
|
||||||
|
raw_interpretation: str
|
||||||
|
affected_regulations: List[str] = Field(default_factory=list)
|
||||||
|
affected_obligations: List[str] = Field(default_factory=list)
|
||||||
|
assessment: InterpretationVerdict
|
||||||
|
risks: List[str] = Field(default_factory=list)
|
||||||
|
corrected_interpretation: str = ""
|
||||||
|
legal_basis_refs: List[str] = Field(default_factory=list)
|
||||||
|
explanation: str
|
||||||
|
confidence: Confidence
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# API request / response envelopes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
class ScopeRequest(BaseModel):
|
||||||
|
product_profile: ProductProfile
|
||||||
|
|
||||||
|
|
||||||
|
class ScopeResponse(BaseModel):
|
||||||
|
regulatory_scope: RegulatoryScope
|
||||||
|
missing_facts: List[str] = Field(default_factory=list)
|
||||||
|
confidence: Confidence
|
||||||
|
|
||||||
|
|
||||||
|
class ObligationsRequest(BaseModel):
|
||||||
|
product_profile: ProductProfile
|
||||||
|
regulatory_scope: Optional[RegulatoryScope] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ObligationsResponse(BaseModel):
|
||||||
|
applicable_obligations: List[ApplicableObligation] = Field(default_factory=list)
|
||||||
|
overlaps: List[ObligationOverlap] = Field(default_factory=list)
|
||||||
|
excluded_obligations: List[str] = Field(default_factory=list)
|
||||||
|
evidence_for_multiple: Dict[str, List[str]] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class ImplementationReasoningRequest(BaseModel):
|
||||||
|
product_profile: ProductProfile
|
||||||
|
customer_claim: str
|
||||||
|
|
||||||
|
|
||||||
|
class ImplementationReasoningResponse(BaseModel):
|
||||||
|
claim: CustomerImplementationClaim
|
||||||
|
mappings: List[ClaimObligationMapping] = Field(default_factory=list)
|
||||||
|
missing_evidence: List[str] = Field(default_factory=list)
|
||||||
|
summary: str = ""
|
||||||
|
# Makes the Welt-1 boundary explicit: this is advisory claim-mapping, not a
|
||||||
|
# conformity verdict (that is ComplianceStatus in the Execution Graph).
|
||||||
|
disclaimer: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class InterpretationRequest(BaseModel):
|
||||||
|
product_profile: Optional[ProductProfile] = None
|
||||||
|
customer_interpretation: str
|
||||||
|
|
||||||
|
|
||||||
|
class InterpretationResponse(BaseModel):
|
||||||
|
assessment: InterpretationVerdict
|
||||||
|
affected_regulations: List[str] = Field(default_factory=list)
|
||||||
|
affected_obligations: List[str] = Field(default_factory=list)
|
||||||
|
corrected_interpretation: str = ""
|
||||||
|
risks: List[str] = Field(default_factory=list)
|
||||||
|
legal_basis_refs: List[str] = Field(default_factory=list)
|
||||||
|
explanation: str = ""
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
@@ -0,0 +1,136 @@
|
|||||||
|
"""Scope discovery engine (spec Modus 1).
|
||||||
|
|
||||||
|
Answers "which regulations apply to my product?" — and, crucially, never says
|
||||||
|
"X applies" without the triggers, and never hides a missing fact behind a false
|
||||||
|
verdict. Pure rule evaluation, deterministic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from .enums import ApplicabilityStatus, Confidence
|
||||||
|
from .predicates import Condition, evaluate, true_leaves, unknown_fields
|
||||||
|
from .rules_regulations import REGULATION_RULES, FIELD_LABELS, RegulationRule
|
||||||
|
from .schemas import (
|
||||||
|
ApplicableRegulation,
|
||||||
|
ExcludedRegulation,
|
||||||
|
ProductProfile,
|
||||||
|
RegulatoryScope,
|
||||||
|
UncertainRegulation,
|
||||||
|
)
|
||||||
|
|
||||||
|
_DOWNGRADE = {Confidence.HIGH: Confidence.MEDIUM, Confidence.MEDIUM: Confidence.LOW, Confidence.LOW: Confidence.LOW}
|
||||||
|
|
||||||
|
|
||||||
|
def _fields_in(condition: Optional[Condition]) -> List[str]:
|
||||||
|
if condition is None:
|
||||||
|
return []
|
||||||
|
if isinstance(condition, tuple):
|
||||||
|
return [condition[0]]
|
||||||
|
out: List[str] = []
|
||||||
|
for c in condition.get("all") or condition.get("any") or []:
|
||||||
|
out.extend(_fields_in(c))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _trigger_facts(rule: RegulationRule, profile: ProductProfile) -> List[str]:
|
||||||
|
labels: List[str] = []
|
||||||
|
for leaf in true_leaves(rule.trigger, profile):
|
||||||
|
label = FIELD_LABELS.get(leaf[0])
|
||||||
|
if label and label not in labels:
|
||||||
|
labels.append(label)
|
||||||
|
return labels
|
||||||
|
|
||||||
|
|
||||||
|
def _missing_prompts(rule: RegulationRule, profile: ProductProfile) -> List[str]:
|
||||||
|
fields = list(dict.fromkeys(rule.required_facts + _fields_in(rule.trigger)))
|
||||||
|
unknown = unknown_fields(fields, profile)
|
||||||
|
prompts: List[str] = []
|
||||||
|
for f in unknown:
|
||||||
|
prompt = rule.fact_prompts.get(f)
|
||||||
|
if prompt and prompt not in prompts:
|
||||||
|
prompts.append(prompt)
|
||||||
|
return prompts
|
||||||
|
|
||||||
|
|
||||||
|
def discover_scope(profile: ProductProfile) -> RegulatoryScope:
|
||||||
|
scope = RegulatoryScope(product_profile_id=profile.product_profile_id)
|
||||||
|
|
||||||
|
for rule in REGULATION_RULES:
|
||||||
|
role_value = profile.manufacturer_role.value if profile.manufacturer_role is not None else None
|
||||||
|
role_excluded = role_value is not None and role_value in rule.excludable_roles
|
||||||
|
trig = evaluate(rule.trigger, profile)
|
||||||
|
missing = _missing_prompts(rule, profile)
|
||||||
|
|
||||||
|
if role_excluded:
|
||||||
|
scope.excluded_regulations.append(
|
||||||
|
ExcludedRegulation(
|
||||||
|
regulation_id=rule.regulation_id,
|
||||||
|
name=rule.name,
|
||||||
|
reason="Rolle '%s' ist von dieser Regulierung nicht unmittelbar adressiert." % role_value,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if trig is True:
|
||||||
|
conf = Confidence.MEDIUM if rule.inferred else rule.confidence_when_applicable
|
||||||
|
status = (
|
||||||
|
ApplicabilityStatus.PARTIALLY_APPLICABLE if rule.inferred else ApplicabilityStatus.APPLICABLE
|
||||||
|
)
|
||||||
|
unresolved = unknown_fields(rule.required_facts, profile)
|
||||||
|
if unresolved:
|
||||||
|
conf = _DOWNGRADE[conf]
|
||||||
|
for f in unresolved:
|
||||||
|
prompt = rule.fact_prompts.get(f)
|
||||||
|
if prompt and prompt not in scope.missing_facts:
|
||||||
|
scope.missing_facts.append(prompt)
|
||||||
|
scope.applicable_regulations.append(
|
||||||
|
ApplicableRegulation(
|
||||||
|
regulation_id=rule.regulation_id,
|
||||||
|
name=rule.name,
|
||||||
|
applicability_status=status,
|
||||||
|
trigger_facts=_trigger_facts(rule, profile),
|
||||||
|
legal_basis_refs=rule.legal_basis_refs,
|
||||||
|
confidence=conf,
|
||||||
|
explanation=rule.summary,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif trig is None:
|
||||||
|
scope.uncertain_regulations.append(
|
||||||
|
UncertainRegulation(
|
||||||
|
regulation_id=rule.regulation_id,
|
||||||
|
name=rule.name,
|
||||||
|
missing_facts=missing,
|
||||||
|
explanation=rule.summary,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for m in missing:
|
||||||
|
if m not in scope.missing_facts:
|
||||||
|
scope.missing_facts.append(m)
|
||||||
|
else: # trig is False -> definitively excluded by a known fact
|
||||||
|
scope.excluded_regulations.append(
|
||||||
|
ExcludedRegulation(
|
||||||
|
regulation_id=rule.regulation_id,
|
||||||
|
name=rule.name,
|
||||||
|
reason="Auslösende Voraussetzungen sind anhand der bekannten Fakten nicht erfüllt.",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
scope.confidence = _overall_confidence(scope)
|
||||||
|
scope.reasoning_summary = _summary(scope)
|
||||||
|
return scope
|
||||||
|
|
||||||
|
|
||||||
|
def _overall_confidence(scope: RegulatoryScope) -> Confidence:
|
||||||
|
if scope.applicable_regulations and not scope.uncertain_regulations and not scope.missing_facts:
|
||||||
|
return Confidence.HIGH
|
||||||
|
if scope.applicable_regulations:
|
||||||
|
return Confidence.MEDIUM
|
||||||
|
return Confidence.LOW
|
||||||
|
|
||||||
|
|
||||||
|
def _summary(scope: RegulatoryScope) -> str:
|
||||||
|
applicable = ", ".join(r.regulation_id for r in scope.applicable_regulations) or "—"
|
||||||
|
uncertain = ", ".join(r.regulation_id for r in scope.uncertain_regulations) or "—"
|
||||||
|
return "Wahrscheinlich anwendbar: %s. Unsicher (fehlende Fakten): %s." % (applicable, uncertain)
|
||||||
@@ -0,0 +1,104 @@
|
|||||||
|
"""Deterministic taxonomy for normalising free-text customer claims.
|
||||||
|
|
||||||
|
Capability names echo the planned Obligation -> Capability layer of the
|
||||||
|
Compliance Execution Graph (memory `project_compliance_graph.md`), so the
|
||||||
|
reasoning layer's claim capabilities line up with the registry's capabilities.
|
||||||
|
Matching is lowercase substring matching — deterministic, no LLM, no RAG.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
# capability -> trigger substrings (German + English), matched lowercase.
|
||||||
|
CAPABILITY_KEYWORDS: Dict[str, List[str]] = {
|
||||||
|
"software_bill_of_materials": [
|
||||||
|
"sbom", "stückliste", "stueckliste", "bill of materials", "komponentenliste",
|
||||||
|
],
|
||||||
|
"secure_updates": ["update", "patch", "aktualisier", "release", "rollout"],
|
||||||
|
"software_integrity": ["signier", "signatur", "signed", "integrität", "integritaet", "hash"],
|
||||||
|
"vulnerability_management": [
|
||||||
|
"schwachstelle", "vulnerab", "cve", "schwachstellenmanagement", "vuln",
|
||||||
|
],
|
||||||
|
"coordinated_disclosure": [
|
||||||
|
"disclosure", "offenlegung", "security.txt", "responsible disclosure",
|
||||||
|
],
|
||||||
|
"incident_reporting": [
|
||||||
|
"incident", "vorfall", "behörde", "behoerde", "csirt", "meldepflicht", "an die behörde",
|
||||||
|
],
|
||||||
|
"authentication": [
|
||||||
|
"authentifizier", "login", "passwort", "password", "mfa", "2fa", "anmeldung",
|
||||||
|
],
|
||||||
|
"secure_by_default": [
|
||||||
|
"härtung", "haertung", "hardening", "default", "standardkonfig",
|
||||||
|
"sichere konfiguration", "angriffsfläche", "angriffsflaeche",
|
||||||
|
],
|
||||||
|
"security_logging": ["logging", "log ", "logs", "protokoll", "audit-trail", "ereignisprotokoll"],
|
||||||
|
"secure_communication": ["verschlüssel", "verschluessel", "encryption", "tls", "vpn", "ssl"],
|
||||||
|
"risk_assessment": [
|
||||||
|
"risikoanalyse", "risikobeurteil", "risk assessment", "gefährdungsbeurteil",
|
||||||
|
"gefaehrdungsbeurteil", "bedrohungsanalyse", "threat model",
|
||||||
|
],
|
||||||
|
"technical_documentation": [
|
||||||
|
"dokumentation", "technische unterlagen", "betriebsanleitung", "handbuch", "documentation",
|
||||||
|
],
|
||||||
|
"conformity_assessment": ["konformität", "konformitaet", "conformity", "baumuster", "ce-kenn"],
|
||||||
|
"functional_safety": [
|
||||||
|
"performance level", "sil ", "iso 13849", "funktionale sicherheit", "safety control",
|
||||||
|
],
|
||||||
|
"data_access_provision": [
|
||||||
|
"datenzugang", "data access", "datenportabilität", "datenexport", "data export",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# capability -> broader compliance topics it touches (spec related_topics).
|
||||||
|
CAPABILITY_TOPICS: Dict[str, List[str]] = {
|
||||||
|
"software_bill_of_materials": ["component_transparency", "supply_chain", "vulnerability_management"],
|
||||||
|
"secure_updates": ["secure_updates", "vulnerability_remediation", "release_management"],
|
||||||
|
"software_integrity": ["secure_updates", "supply_chain", "tamper_protection"],
|
||||||
|
"vulnerability_management": ["vulnerability_handling", "monitoring", "patch_management"],
|
||||||
|
"coordinated_disclosure": ["vulnerability_handling", "transparency"],
|
||||||
|
"incident_reporting": ["incident_handling", "authority_notification"],
|
||||||
|
"authentication": ["access_control", "identity"],
|
||||||
|
"secure_by_default": ["hardening", "attack_surface", "configuration"],
|
||||||
|
"security_logging": ["monitoring", "forensics", "incident_handling"],
|
||||||
|
"secure_communication": ["confidentiality", "integrity", "remote_access"],
|
||||||
|
"risk_assessment": ["risk_management", "secure_by_design"],
|
||||||
|
"technical_documentation": ["documentation", "conformity"],
|
||||||
|
"conformity_assessment": ["conformity", "ce_marking"],
|
||||||
|
"functional_safety": ["machine_safety", "control_systems"],
|
||||||
|
"data_access_provision": ["data_sharing", "portability"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# qualifier -> substrings that signal a weak/incomplete implementation.
|
||||||
|
QUALIFIER_KEYWORDS: Dict[str, List[str]] = {
|
||||||
|
"reactive": [
|
||||||
|
"wenn kunden", "wenn ein kunde", "nach meldung", "auf anfrage", "auf nachfrage",
|
||||||
|
"nur wenn", "reaktiv", "wenn fehler", "when customers", "on request", "when reported",
|
||||||
|
"ad hoc", "ad-hoc", "bei bedarf",
|
||||||
|
],
|
||||||
|
"manual": ["manuell", "von hand", "manual", "händisch", "haendisch"],
|
||||||
|
"planned": [
|
||||||
|
"geplant", "in planung", "wollen wir", "planen wir", "noch nicht", "zukünftig", "künftig",
|
||||||
|
],
|
||||||
|
"absent": ["haben wir nicht", "gibt es nicht", "nicht vorhanden", "keinen prozess", "keine"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def match_capabilities(text: str) -> List[str]:
|
||||||
|
low = text.lower()
|
||||||
|
return [cap for cap, kws in CAPABILITY_KEYWORDS.items() if any(k in low for k in kws)]
|
||||||
|
|
||||||
|
|
||||||
|
def match_qualifiers(text: str) -> List[str]:
|
||||||
|
low = text.lower()
|
||||||
|
return [q for q, kws in QUALIFIER_KEYWORDS.items() if any(k in low for k in kws)]
|
||||||
|
|
||||||
|
|
||||||
|
def topics_for(capabilities: List[str]) -> List[str]:
|
||||||
|
out: List[str] = []
|
||||||
|
for cap in capabilities:
|
||||||
|
for t in CAPABILITY_TOPICS.get(cap, []):
|
||||||
|
if t not in out:
|
||||||
|
out.append(t)
|
||||||
|
return out
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user